[Committed] S/390: New option -mpic-data-is-text-relative

2017-06-28 Thread Andreas Krebbel
For hotpatching it might be required to introduce new .text parts
while keep using the existing .data/.bss sections.  To make this work
the backend needs to be prevented from using relative addressing
between code and data.
This only works when already building PIC
since the addressing will then be handling via GOT.

Bootstrapped and regression tested on s390x.

Committed to mainline.

Bye,

-Andreas-

gcc/testsuite/ChangeLog:

2017-06-28  Andreas Krebbel  

* gcc.target/s390/nodatarel-1.c: New test.

gcc/ChangeLog:

2017-06-28  Andreas Krebbel  

* config/s390/predicates.md: Use s390_rel_address_ok_p.
* config/s390/s390-protos.h: Add prototype of
s390_rel_address_ok_p.
* config/s390/s390.c (s390_got_symbol): New function.
(s390_rel_address_ok_p): New function.
(legitimize_pic_address): Use s390_rel_address_ok_p.
(s390_load_got): Use s390_got_symbol.
(s390_option_override): Issue error if
-mno-pic-data-is-text-relative is used without -fpic/-fPIC.
* config/s390/s390.h (TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE):
New macro.
* config/s390/s390.opt: New option mpic-data-is-text-relative.
---
 gcc/config/s390/predicates.md   |  9 ++--
 gcc/config/s390/s390-protos.h   |  1 +
 gcc/config/s390/s390.c  | 53 ++
 gcc/config/s390/s390.h  |  4 ++
 gcc/config/s390/s390.opt|  4 ++
 gcc/testsuite/gcc.target/s390/nodatarel-1.c | 83 +
 6 files changed, 140 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/nodatarel-1.c

diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 34a7ea2..fc151ac 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -131,10 +131,10 @@
   /* Allow labels and local symbols.  */
   if (GET_CODE (op) == LABEL_REF)
 return true;
-  if (GET_CODE (op) == SYMBOL_REF)
+  if (SYMBOL_REF_P (op))
 return (!SYMBOL_FLAG_NOTALIGN2_P (op)
&& SYMBOL_REF_TLS_MODEL (op) == 0
-   && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+   && s390_rel_address_ok_p (op));
 
   /* Everything else must have a CONST, so strip it.  */
   if (GET_CODE (op) != CONST)
@@ -156,10 +156,11 @@
   /* Labels and local symbols allowed here as well.  */
   if (GET_CODE (op) == LABEL_REF)
 return true;
-  if (GET_CODE (op) == SYMBOL_REF)
+  if (SYMBOL_REF_P (op))
 return (!SYMBOL_FLAG_NOTALIGN2_P (op)
&& SYMBOL_REF_TLS_MODEL (op) == 0
-   && (!flag_pic || SYMBOL_REF_LOCAL_P (op)));
+   && s390_rel_address_ok_p (op));
+
 
   /* Now we must have a @GOTENT offset or @PLT stub
  or an @INDNTPOFF TLS offset.  */
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 3fdb320..6df37ef 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -79,6 +79,7 @@ extern bool s390_bytemask_vector_p (rtx, unsigned *);
 extern bool s390_split_ok_p (rtx, rtx, machine_mode, int);
 extern bool s390_overlap_p (rtx, rtx, HOST_WIDE_INT);
 extern bool s390_offset_p (rtx, rtx, rtx);
+extern bool s390_rel_address_ok_p (rtx);
 extern int tls_symbolic_operand (rtx);
 
 extern bool s390_match_ccmode (rtx_insn *, machine_mode);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index eb94237..bfc38db 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1179,6 +1179,23 @@ s390_label_align (rtx_insn *label)
   return align_labels_log;
 }
 
+static GTY(()) rtx got_symbol;
+
+/* Return the GOT table symbol.  The symbol will be created when the
+   function is invoked for the first time.  */
+
+static rtx
+s390_got_symbol (void)
+{
+  if (!got_symbol)
+{
+  got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+  SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
+}
+
+  return got_symbol;
+}
+
 static machine_mode
 s390_libgcc_cmp_return_mode (void)
 {
@@ -4496,6 +4513,26 @@ s390_load_address (rtx dst, rtx src)
 emit_insn (gen_force_la_31 (dst, src));
 }
 
+/* Return true if it ok to use SYMBOL_REF in a relative address.  */
+
+bool
+s390_rel_address_ok_p (rtx symbol_ref)
+{
+  tree decl;
+
+  if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
+return true;
+
+  decl = SYMBOL_REF_DECL (symbol_ref);
+
+  if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
+return (s390_pic_data_is_text_relative
+   || (decl
+   && TREE_CODE (decl) == FUNCTION_DECL));
+
+  return false;
+}
+
 /* Return a legitimate reference for ORIG (an address) using the
register REG.  If REG is 0, a new pseudo is generated.
 
@@ -4533,7 +4570,7 @@ legitimize_pic_address (rtx orig, rtx reg)
 }
 
   if ((GET_CODE (addr) == LABEL_REF
-   || (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (addr))
+   || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (a

Re: [Doc, AArch64] Fix/Update AArch64 options.

2017-06-28 Thread Yvan Roux
Hi Sandra,

On 27 June 2017 at 18:05, Sandra Loosemore  wrote:
> On 06/27/2017 06:19 AM, Yvan Roux wrote:
>
>> diff --git a/gcc/config/aarch64/aarch64.opt
>> b/gcc/config/aarch64/aarch64.opt
>> index 942a7d5..0fd1bfa 100644
>> --- a/gcc/config/aarch64/aarch64.opt
>> +++ b/gcc/config/aarch64/aarch64.opt
>> @@ -146,7 +146,7 @@ EnumValue
>>  Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
>>
>>  mpc-relative-literal-loads
>> -Target Report Save Var(pcrelative_literal_loads) Init(2) Save
>> +Target Report Var(pcrelative_literal_loads) Init(2) Save
>>  PC relative literal loads.
>>
>>  msign-return-address=
>
>
> I think this qualifies as an obvious fix.  I can't approve it if it isn't,
> anyway  ;-)

Ok, I'll commit it separately unless there is an objection to its obviousness.

>> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
>> index d1e097b..6e0e776 100644
>> --- a/gcc/doc/invoke.texi
>> +++ b/gcc/doc/invoke.texi
>> @@ -595,7 +595,9 @@ Objective-C and Objective-C++ Dialects}.
>>  -mlow-precision-recip-sqrt  -mno-low-precision-recip-sqrt@gol
>>  -mlow-precision-sqrt  -mno-low-precision-sqrt@gol
>>  -mlow-precision-div  -mno-low-precision-div @gol
>> --march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}}
>> +-mpc-relative-literal-loads -mno-pc-relative-literal-loads @gol
>
>
> For options that have both positive and negative variants, we should only be
> listing the one that is not the default in the Option Summary table.  Can
> you please remove the existing redundant options listed for AArch64, instead
> of adding a new one?
>
>> +-msign-return-address=@var{scope} @gol
>> +-march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}
>> -moverride=@var{string}}
>>
>>  @emph{Adapteva Epiphany Options}
>>  @gccoptlist{-mhalf-reg-file  -mprefer-short-insn-regs @gol
>> @@ -14158,8 +14160,10 @@ across releases.
>>  This option is only intended to be useful when developing GCC.
>>
>>  @item -mpc-relative-literal-loads
>> +@item -mno-pc-relative-literal-loads
>
>
> It is OK to list both the positive and negative forms in the full
> description, but in a table with multiple items in the same entry, the
> second and subsequent ones should use @itemx markup instead of @item.
>
>>  @opindex mpc-relative-literal-loads
>> -Enable PC-relative literal loads.  With this option literal pools are
>> +@opindex mno-pc-relative-literal-loads
>> +Enable or disable PC-relative literal loads.  With this option literal
>> pools are
>>  accessed using a single instruction and emitted after each function.
>> This
>>  limits the maximum size of functions to 1MB.  This is enabled by default
>> for
>>  @option{-mcmodel=tiny}.

OK, here is the new patch with the comments addressed.  I've spotted
that there is also some m / -mno  options at least in the ARM section,
I'll make another patch to fix that.

Thanks
Yvan


>
> -Sandra
>
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index d1e097b..e1bb8a8 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -587,15 +587,14 @@ Objective-C and Objective-C++ Dialects}.
 -mgeneral-regs-only @gol
 -mcmodel=tiny  -mcmodel=small  -mcmodel=large @gol
 -mstrict-align @gol
--momit-leaf-frame-pointer  -mno-omit-leaf-frame-pointer @gol
+-momit-leaf-frame-pointer @gol
 -mtls-dialect=desc  -mtls-dialect=traditional @gol
 -mtls-size=@var{size} @gol
--mfix-cortex-a53-835769  -mno-fix-cortex-a53-835769 @gol
--mfix-cortex-a53-843419  -mno-fix-cortex-a53-843419 @gol
--mlow-precision-recip-sqrt  -mno-low-precision-recip-sqrt@gol
--mlow-precision-sqrt  -mno-low-precision-sqrt@gol
--mlow-precision-div  -mno-low-precision-div @gol
--march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}}
+-mfix-cortex-a53-835769  -mfix-cortex-a53-843419 @gol
+-mlow-precision-recip-sqrt  -mlow-precision-sqrt  -mlow-precision-div @gol
+-mpc-relative-literal-loads @gol
+-msign-return-address=@var{scope} @gol
+-march=@var{name}  -mcpu=@var{name}  -mtune=@var{name}  -moverride=@var{string}}
 
 @emph{Adapteva Epiphany Options}
 @gccoptlist{-mhalf-reg-file  -mprefer-short-insn-regs @gol
@@ -14158,8 +14157,10 @@ across releases.
 This option is only intended to be useful when developing GCC.
 
 @item -mpc-relative-literal-loads
+@itemx -mno-pc-relative-literal-loads
 @opindex mpc-relative-literal-loads
-Enable PC-relative literal loads.  With this option literal pools are
+@opindex mno-pc-relative-literal-loads
+Enable or disable PC-relative literal loads.  With this option literal pools are
 accessed using a single instruction and emitted after each function.  This
 limits the maximum size of functions to 1MB.  This is enabled by default for
 @option{-mcmodel=tiny}.


Re: [PATCH GCC][01/13]Introduce internal function IFN_LOOP_DIST_ALIAS

2017-06-28 Thread Richard Biener
On Tue, Jun 27, 2017 at 6:46 PM, Bin.Cheng  wrote:
> On Tue, Jun 27, 2017 at 3:59 PM, Richard Biener
>  wrote:
>> On June 27, 2017 4:27:17 PM GMT+02:00, "Bin.Cheng"  
>> wrote:
>>>On Tue, Jun 27, 2017 at 1:58 PM, Richard Biener
>>> wrote:
 On Fri, Jun 23, 2017 at 12:10 PM, Bin.Cheng 
>>>wrote:
> On Mon, Jun 12, 2017 at 6:02 PM, Bin Cheng 
>>>wrote:
>> Hi,
>> I was asked by upstream to split the loop distribution patch into
>>>small ones.
>> It is hard because data structure and algorithm are closely coupled
>>>together.
>> Anyway, this is the patch series with smaller patches.  Basically I
>>>tried to
>> separate data structure and bug-fix changes apart with one as the
>>>main patch.
>> Note I only made necessary code refactoring in order to separate
>>>patch, apart
>> from that, there is no change against the last version.
>>
>> This is the first patch introducing new internal function
>>>IFN_LOOP_DIST_ALIAS.
>> GCC will distribute loops under condition of this function call.
>>
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
> Hi,
> I need to update this patch fixing an issue in
> vect_loop_dist_alias_call.  The previous patch fails to find some
> IFN_LOOP_DIST_ALIAS calls.
>
> Bootstrap and test in series.  Is it OK?

 So I wonder if we really need to track ldist_alias_id or if we can do
>>>sth
>>>Yes, it is needed because otherwise we probably falsely trying to
>>>search for IFN_LOOP_DIST_ALIAS for a normal (not from distribution)
>>>loop.
>>>
 more "general", like tracking a copy_of or origin and then directly
 go to nearest_common_dominator (loop->header, copy_of->header)
 to find the controlling condition?
>>>I tend to not record any pointer in loop structure, it can easily go
>>>dangling for a across passes data structure.
>>
>> I didn't mean to record a pointer, just rename your field and make it more 
>> general.  The common dominator thing shod still work, no?
> I might not be following.  If we record the original loop->num in the
> renamed field, nearest_common_dominator can't work because we don't
> have basic blocks to start the call?  The original loop could be
> eliminated at several points, for example, instantly after
> distribution, or folded in vectorizer for other loops distributed from
> the original loop.
> BTW, setting the copy_of/origin field in loop_version is not enough
> for this use case, all generated loops (actually, except the versioned
> loop) from distribution need to be set.

Of course it would need to be set for all distributed loops.

I'm not sure "loop vanishes" is the case to optimize for though.  If the loop
is still available then origin->header should work as BB.  If not then can't
we conclude, for the purpose of IFN_LOOP_DIST_ALIAS, that the whole
region must be dead?  We still have to identify it of course, but it means
we can fold stray IFN_LOOP_DIST_ALIAS calls left over after vectorization
to whatever we like?

>>
>> As far as memory usage
>>>is concerned.  I actually don't need a whole integer to record the
>>>loop num.  I can simply restrict number of distributions in one
>>>function to at most 256, and record such id in a char field in struct
>>>loop?  Does this sounds better?
>>
>> As said, tracking loop origin sounds useful anyway so I'd rather add and use 
>> that somehow.
> To be honest, I don't know.  the current field works like a unique
> index of distribution operation.  The original loop could be destroyed
> at different points thus no longer exists, this makes the recorded
> copy_of/origin less meaningful?

I think we talked about prologue and epilogue loops to be easier identifiable
as so (and as to what "main" loop).  So lets say we have one "origin" field
and accompaning flags "origin_is_loop_dist_alias_version",
"origin_is_main_loop_of_prologue", etc.?  I can't think of the case where
origin would be two things at the same time (you can always walk up
the origin tree).

Vanishing origins could also be cleared btw, or we can make the new origin
the origin of the vanishing origin...

Richard.

> Thanks,
> bin
>>
>>>Thanks,
>>>bin

 That said "ldist_alias_id" is a bit too narrow of purpose to "waste"
 an int inside struct loop?  I'd set copy_of/origi in loop_version for
>>>example.
 'origin' would probably be better given the ldist cases aren't really
 full "copies".

 fold_loop_dist_alias_call should re-use / rename
>>>fold_loop_vectorized_call
 by just passing folded_value to it.

 Richard.

> Thanks,
> bin
>>
>> Thanks,
>> bin
>> 2017-06-07  Bin Cheng  
>>
>> * cfgloop.h (struct loop): New field ldist_alias_id.
>> * cfgloopmanip.c (lv_adjust_loop_entry_edge): Comment
>>>change.
>> * internal-fn.c (expand_LOOP_DIST_ALIAS): New function.
>> * internal-fn.def (LOOP_DIST_ALIAS): New.
>> * tree-vectorizer.c (vect

Re: Use ucontext_t not struct ucontext in linux-unwind.h files

2017-06-28 Thread Richard Biener
On Tue, Jun 27, 2017 at 7:54 PM, Joseph Myers  wrote:
> On Tue, 27 Jun 2017, Joseph Myers wrote:
>
>> Current glibc no longer gives the ucontext_t type the tag struct
>> ucontext, to conform with POSIX namespace rules.  This requires
>> various linux-unwind.h files in libgcc, that were previously using
>> struct ucontext, to be fixed to use ucontext_t instead.  This is
>> similar to the removal of the struct siginfo tag from siginfo_t some
>> years ago.
>>
>> This patch changes those files to use ucontext_t instead.  As the
>> standard name that should be unconditionally safe, so this is not
>> restricted to architectures supported by glibc, or conditioned on the
>> glibc version.
>>
>> Testing compilation together with current glibc with glibc's
>> build-many-glibcs.py.  OK to commit (mainline and active release
>> branches) if that passes?

Ok to commit to trunk.  Please wait for a while before backporting to catch
non-glibc and ancient glibc issues.

Thanks,
Richard.

> That compilation testing has now passed (together with a couple of glibc
> patches, now committed, to fix the build with -Wmultistatement-macros).
>
> --
> Joseph S. Myers
> jos...@codesourcery.com


Re: [PATCH] Fold (a > 0 ? 1.0 : -1.0) into copysign (1.0, a) and a * copysign (1.0, a) into abs(a)

2017-06-28 Thread Jakub Jelinek
On Tue, Jun 27, 2017 at 10:52:47AM -0700, Andrew Pinski wrote:
> On Tue, Jun 27, 2017 at 7:56 AM, Richard Biener
>  wrote:
> > On June 27, 2017 4:52:28 PM GMT+02:00, Tamar Christina 
> >  wrote:
> >>> >> +(for cmp (gt ge lt le)
> >>> >> + outp (convert convert negate negate)
> >>> >> + outn (negate negate convert convert)
> >>> >> + /* Transform (X > 0.0 ? 1.0 : -1.0) into copysign(1, X). */
> >>> >> + /* Transform (X >= 0.0 ? 1.0 : -1.0) into copysign(1, X). */
> >>> >> + /* Transform (X < 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
> >>> >> + /* Transform (X <= 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
> >>> >> +(simplify
> >>> >> +  (cond (cmp @0 real_zerop) real_onep real_minus_onep)
> >>> >> +  (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type)
> >>> >> +   && types_match (type, TREE_TYPE (@0)))
> >>> >> +   (switch
> >>> >> +(if (types_match (type, float_type_node))
> >>> >> + (BUILT_IN_COPYSIGNF { build_one_cst (type); } (outp @0)))
> >>> >> +(if (types_match (type, double_type_node))
> >>> >> + (BUILT_IN_COPYSIGN { build_one_cst (type); } (outp @0)))
> >>> >> +(if (types_match (type, long_double_type_node))
> >>> >> + (BUILT_IN_COPYSIGNL { build_one_cst (type); } (outp @0))

The patch regressed the gcc.target/i386/cmov7.c testcase.
>From the description in the testcase it was testing the combiner, so I've
transformed it into something that also tests the combiner the same way,
ok for trunk?  That said, for copysign if we match it we don't emit a fcmov
while it might be a good idea.

2017-06-28  Jakub Jelinek  

* gcc.target/i386/cmov7.c (sgn): Renamed to ...
(foo): ... this.  Change constants such that it isn't matched
as __builtin_copysign, yet tests the combiner the same.

--- gcc/testsuite/gcc.target/i386/cmov7.c.jj2016-05-22 12:20:23.0 
+0200
+++ gcc/testsuite/gcc.target/i386/cmov7.c   2017-06-28 09:20:24.0 
+0200
@@ -10,7 +10,7 @@
(set (reg:DF) (float_extend:DF (mem:SF (symbol_ref....  */
 
 double
-sgn (double __x)
+foo (double __x)
 {
-  return __x >= 0.0 ? 1.0 : -1.0;
+  return __x >= 1.0 ? 0.0 : -1.0;
 }


Jakub


[RFC][PR 67336][PING^3] Verify pointers during stack unwind

2017-06-28 Thread Yuri Gribov
Hi all,

I've rebased the previous patch to trunk per Andrew's suggestion.
Original patch description/motivation/questions are in
https://gcc.gnu.org/ml/gcc-patches/2017-06/msg01869.html

-Y


safe-unwind-2.patch
Description: Binary data
#include 
#include 

struct _Unwind_Context;

typedef int (*_Unwind_Trace_Fn)(struct _Unwind_Context *, void *vdata);

extern int _Unwind_Backtrace(_Unwind_Trace_Fn trace, void * trace_argument);
extern int _Unwind_Backtrace_Checked(_Unwind_Trace_Fn trace, void * trace_argument);

#ifdef CHECK_UNWIND
#define _Unwind_Backtrace _Unwind_Backtrace_Checked
#endif

extern void *_Unwind_GetIP (struct _Unwind_Context *context);

int simple_unwind (struct _Unwind_Context *context, void *vdata) {
  printf("Next frame: ");
  void *pc = _Unwind_GetIP(context);
  printf("%p\n", pc);
  return 0;
}

#define noinline __attribute__((noinline))

noinline int foo() {
  // Clobber stack to provoke errors in unwinder
  int x;
  void *p = &x;
  asm("" :: "r"(p));
  memset(p, 0xa, 128);

  printf("After clobbering stack\n");

  int ret = _Unwind_Backtrace(simple_unwind, 0);
  printf("After unwind: %d\n", ret);
  printf("We're going to fail now\n");

  return 0;
}

noinline int bar() {
  int x = foo();
  return x + 1;
}

int main() {
  bar();
  return 0;
}


[PATCH] Disentangle reduction initial value compute from vect_get_vec_defs

2017-06-28 Thread Richard Biener

This creates a SLP variant for get_initial_def_for_reduction thereby
cleaning up the get_defs interfaces to not pass in reduc_index.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-06-28  Richard Biener  

* tree-vectorizer.h (vect_get_vec_defs): Remove.
(vect_get_slp_defs): Adjust.
* tree-vect-loop.c (get_initial_defs_for_reduction): Split
out from ...
* tree-vect-slp.c (vect_get_constant_vectors): ... here and
simplify.
* tree-vect-loop.c (vect_create_epilog_for_reduction): Use
get_initial_defs_for_reduction instead of vect_get_vec_defs.
(vectorizable_reduction): Adjust.
* tree-vect-slp.c (vect_get_constant_vectors): Remove reduction
handling.
(vect_get_slp_defs): Likewise.
* tree-vect-stmts.c (vect_get_vec_defs): Make static and adjust.
(vectorizable_bswap): Adjust.
(vectorizable_call): Likewise.
(vectorizable_conversion): Likewise.
(vectorizable_assignment): Likewise.
(vectorizable_shift): Likewise.
(vectorizable_operation): Likewise.
(vectorizable_store): Likewise.
(vectorizable_condition): Likewise.
(vectorizable_comparison): Likewise.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 249686)
+++ gcc/tree-vect-loop.c(working copy)
@@ -4045,6 +4045,203 @@ get_initial_def_for_reduction (gimple *s
   return init_def;
 }
 
+/* Get at the initial defs for OP in the reduction SLP_NODE.
+   NUMBER_OF_VECTORS is the number of vector defs to create.
+   REDUC_INDEX is the index of the reduction operand in the statements.  */
+
+static void
+get_initial_defs_for_reduction (slp_tree slp_node,
+   vec *vec_oprnds,
+   unsigned int number_of_vectors,
+   int reduc_index, enum tree_code code)
+{
+  vec stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+  gimple *stmt = stmts[0];
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
+  unsigned nunits;
+  tree vec_cst;
+  tree *elts;
+  unsigned j, number_of_places_left_in_vector;
+  tree vector_type, scalar_type;
+  tree vop;
+  int group_size = stmts.length ();
+  unsigned int vec_num, i;
+  unsigned number_of_copies = 1;
+  vec voprnds;
+  voprnds.create (number_of_vectors);
+  bool constant_p;
+  tree neutral_op = NULL;
+  gimple *def_stmt;
+  struct loop *loop;
+  gimple_seq ctor_seq = NULL;
+
+  vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
+  scalar_type = TREE_TYPE (vector_type);
+  nunits = TYPE_VECTOR_SUBPARTS (vector_type);
+
+  gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+ && reduc_index != -1);
+
+  /* op is the reduction operand of the first stmt already.  */
+  /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
+ we need either neutral operands or the original operands.  See
+ get_initial_def_for_reduction() for details.  */
+  switch (code)
+{
+case WIDEN_SUM_EXPR:
+case DOT_PROD_EXPR:
+case SAD_EXPR:
+case PLUS_EXPR:
+case MINUS_EXPR:
+case BIT_IOR_EXPR:
+case BIT_XOR_EXPR:
+  neutral_op = build_zero_cst (scalar_type);
+  break;
+
+case MULT_EXPR:
+  neutral_op = build_one_cst (scalar_type);
+  break;
+
+case BIT_AND_EXPR:
+  neutral_op = build_all_ones_cst (scalar_type);
+  break;
+
+/* For MIN/MAX we don't have an easy neutral operand but
+   the initial values can be used fine here.  Only for
+   a reduction chain we have to force a neutral element.  */
+case MAX_EXPR:
+case MIN_EXPR:
+  if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
+   neutral_op = NULL;
+  else
+   {
+ tree op = get_reduction_op (stmts[0], reduc_index);
+ def_stmt = SSA_NAME_DEF_STMT (op);
+ loop = (gimple_bb (stmt))->loop_father;
+ neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+ loop_preheader_edge (loop));
+   }
+  break;
+
+default:
+  gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
+  neutral_op = NULL;
+}
+
+  /* NUMBER_OF_COPIES is the number of times we need to use the same values in
+ created vectors. It is greater than 1 if unrolling is performed.
+
+ For example, we have two scalar operands, s1 and s2 (e.g., group of
+ strided accesses of size two), while NUNITS is four (i.e., four scalars
+ of this type can be packed in a vector).  The output vector will contain
+ two copies of each scalar operand: {s1, s2, s1, s2}.  (NUMBER_OF_COPIES
+ will be 2).
+
+ If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
+ containing the operands.
+
+ For example, NUNITS is four as before, and the group size is 8
+ (s1, s2, ..., s8).  We will create two vectors {s1, s2, s3, s4} and
+ {s

Re: Simple reassoc transforms in match.pd

2017-06-28 Thread Marc Glisse

On Mon, 26 Jun 2017, Richard Biener wrote:


On Fri, Jun 23, 2017 at 3:12 PM, Marc Glisse  wrote:

Hello,

here are a few simple transformations, mostly useful for types with
undefined overflow where we do not have reassoc.

I did not name the testcase reassoc-* to leave that namespace to the realloc
pass, and -fno-tree-reassoc is just in case someone ever enhances that
pass...


You probably saw

 /* (T)(P + A) - (T)(P + B) -> (T)A - (T)B */
 (for add (plus pointer_plus)
  (simplify
   (minus (convert (add @@0 @1))
(convert (add @0 @2)))


And

/* X + Z < Y + Z is the same as X < Y when there is no overflow.  */
[...]
/* For equality and subtraction, this is also true with wrapping overflow.  */
(for op (eq ne minus)
 (simplify
  (op (plus:c @0 @2) (plus:c @1 @2))
  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
   && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
   || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0
   (op @0 @1


as you didn't duplicate its functionality.  It misses a :c in one of
the adds for the


For some other transform, I was tempted to write
(for op (plus:c minus) ...
i.e. use a different commutativity for different operations, but that's
not supported.


PLUS_EXPR case though so it might be worth splitting that out near to your
added cases?  Which then raises the question of handling conversions around
the inner ops in your patterns?


It is probably possible to merge / generalize some of those transforms 
(although we sometimes do strange assumptions about overflow with pointers 
that may not match with the usual integer case). In this case, I was 
interested in the undefined-overflow case (more precisely the sum of 2 
pointer_diff), and as soon as one operation is done in a wrapping type, 
the result has to be as well (which could still be handled in the same 
transformation).


--
Marc Glisse


[testsuite] Fix gcc.dg/tree-prof/val-profiler-threads-1.c

2017-06-28 Thread Eric Botcazou
The manual says that the type of the 2nd argument of pthread_join is void **.
Now gcc.dg/tree-prof/val-profiler-threads-1.c does this:

int retval;
for(t=0;t

* gcc.dg/tree-prof/val-profiler-threads-1.c (main): Fix 2nd argument
passed to pthread_join.

-- 
Eric BotcazouIndex: gcc.dg/tree-prof/val-profiler-threads-1.c
===
--- gcc.dg/tree-prof/val-profiler-threads-1.c	(revision 249619)
+++ gcc.dg/tree-prof/val-profiler-threads-1.c	(working copy)
@@ -35,9 +35,9 @@ int main(int argc, char *argv[])
}
  }
 
-   int retval;
+   void *retval;
for(t=0;t

GCC 6 branch now frozen for upcoming GCC 6.4 release

2017-06-28 Thread Richard Biener

All changes require release manager approval.

Thanks,
Richard.


Re: Avoid generating useless range info

2017-06-28 Thread Aldy Hernandez



On 06/27/2017 06:38 AM, Jakub Jelinek wrote:

On Tue, Jun 27, 2017 at 06:26:46AM -0400, Aldy Hernandez wrote:

How about this?


@@ -360,6 +363,22 @@ set_range_info (tree name, enum value_range_type 
range_type,
  }
  }
  
+/* Store range information RANGE_TYPE, MIN, and MAX to tree ssa_name

+   NAME while making sure we don't store useless range info.  */
+
+void
+set_range_info (tree name, enum value_range_type range_type,
+   const wide_int_ref &min, const wide_int_ref &max)
+{
+  /* A range of the entire domain is really no range at all.  */
+  tree type = TREE_TYPE (name);
+  if (min == wi::min_value (TYPE_PRECISION (type), TYPE_SIGN (type))
+  && max == wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type)))
+return;
+
+  set_range_info_raw (name, range_type, min, max);
+}
+

Won't this misbehave if we have a narrower range on some SSA_NAME and
call set_range_info to make it VARYING?
In that case (i.e. SSA_NAME_RANGE_INFO (name) != NULL), we should either
set_range_info_raw too (if nonzero_bits is not all ones) or clear
SSA_NAME_RANGE_INFO (otherwise).


Good point.  Fixed.

  
  /* Gets range information MIN, MAX and returns enum value_range_type

 corresponding to tree ssa_name NAME.  enum value_range_type returned
@@ -419,9 +438,13 @@ set_nonzero_bits (tree name, const wide_int_ref &mask)
  {
gcc_assert (!POINTER_TYPE_P (TREE_TYPE (name)));
if (SSA_NAME_RANGE_INFO (name) == NULL)
-set_range_info (name, VR_RANGE,
-   TYPE_MIN_VALUE (TREE_TYPE (name)),
-   TYPE_MAX_VALUE (TREE_TYPE (name)));
+{
+  if (mask == -1)
+   return;
+  set_range_info_raw (name, VR_RANGE,
+ TYPE_MIN_VALUE (TREE_TYPE (name)),
+ TYPE_MAX_VALUE (TREE_TYPE (name)));
+}
range_info_def *ri = SSA_NAME_RANGE_INFO (name);
ri->set_nonzero_bits (mask);

Similarly, if SSA_NAME_RANGE_INFO is previously non-NULL, but min/max
are VARYING and the new mask is -1, shouldn't we free it rather than
set it to the default?


Here, if SSA_NAME_RANGE_INFO is previously non-NULL then we proceed as 
always-- just set the nonzero bits to whatever was specified (without 
clearning SSA_NAME_RANGE_INFO).  A mask of -1 and an SSA_NAME_RANGE_INFO 
of non-NULL can coexist just fine.


How about this?

Aldy
gcc/

* tree-ssanames.c (set_range_info_raw): Abstract from ...
(set_range_info): ...here.  Only call set_range_info_raw if domain
is useful.
(set_nonzero_bits): Call set_range_info_raw.
* tree-ssanames.h (set_range_info_raw): New.

gcc/testsuite/

* gcc.dg/Walloca-14.c: Adapt test to recognize new complaint of
unbounded use.

diff --git a/gcc/testsuite/gcc.dg/Walloca-14.c 
b/gcc/testsuite/gcc.dg/Walloca-14.c
index 723dbe5..f3e3f57 100644
--- a/gcc/testsuite/gcc.dg/Walloca-14.c
+++ b/gcc/testsuite/gcc.dg/Walloca-14.c
@@ -9,5 +9,6 @@ g (int *p)
   extern void f (void *);
 
   void *q = __builtin_alloca (p); /* { dg-warning "passing argument 1" } */
+  /* { dg-warning "unbounded use of 'alloca'" "unbounded" { target *-*-* } 11 
} */
   f (q);
 }
diff --git a/gcc/tree-ssanames.c b/gcc/tree-ssanames.c
index 353c7b1..0053b01 100644
--- a/gcc/tree-ssanames.c
+++ b/gcc/tree-ssanames.c
@@ -320,11 +320,14 @@ make_ssa_name_fn (struct function *fn, tree var, gimple 
*stmt,
   return t;
 }
 
-/* Store range information RANGE_TYPE, MIN, and MAX to tree ssa_name NAME.  */
+/* Helper function for set_range_info.
+
+   Store range information RANGE_TYPE, MIN, and MAX to tree ssa_name
+   NAME.  */
 
 void
-set_range_info (tree name, enum value_range_type range_type,
-   const wide_int_ref &min, const wide_int_ref &max)
+set_range_info_raw (tree name, enum value_range_type range_type,
+   const wide_int_ref &min, const wide_int_ref &max)
 {
   gcc_assert (!POINTER_TYPE_P (TREE_TYPE (name)));
   gcc_assert (range_type == VR_RANGE || range_type == VR_ANTI_RANGE);
@@ -360,6 +363,34 @@ set_range_info (tree name, enum value_range_type 
range_type,
 }
 }
 
+/* Store range information RANGE_TYPE, MIN, and MAX to tree ssa_name
+   NAME while making sure we don't store useless range info.  */
+
+void
+set_range_info (tree name, enum value_range_type range_type,
+   const wide_int_ref &min, const wide_int_ref &max)
+{
+  gcc_assert (!POINTER_TYPE_P (TREE_TYPE (name)));
+
+  /* A range of the entire domain is really no range at all.  */
+  tree type = TREE_TYPE (name);
+  if (min == wi::min_value (TYPE_PRECISION (type), TYPE_SIGN (type))
+  && max == wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type)))
+{
+  range_info_def *ri = SSA_NAME_RANGE_INFO (name);
+  if (ri == NULL)
+   return;
+  if (ri->get_nonzero_bits () == -1)
+   {
+ ggc_free (ri);
+ SSA_NAME_RANGE_INFO (name) = NULL;
+ return;
+   }
+}
+
+  set_range_info_raw (name, range_type, min, max);
+}
+
 
 /* Gets r

Re: [PATCH] Fold (a > 0 ? 1.0 : -1.0) into copysign (1.0, a) and a * copysign (1.0, a) into abs(a)

2017-06-28 Thread Uros Bizjak
On Wed, Jun 28, 2017 at 9:37 AM, Jakub Jelinek  wrote:
> On Tue, Jun 27, 2017 at 10:52:47AM -0700, Andrew Pinski wrote:
>> On Tue, Jun 27, 2017 at 7:56 AM, Richard Biener
>>  wrote:
>> > On June 27, 2017 4:52:28 PM GMT+02:00, Tamar Christina 
>> >  wrote:
>> >>> >> +(for cmp (gt ge lt le)
>> >>> >> + outp (convert convert negate negate)
>> >>> >> + outn (negate negate convert convert)
>> >>> >> + /* Transform (X > 0.0 ? 1.0 : -1.0) into copysign(1, X). */
>> >>> >> + /* Transform (X >= 0.0 ? 1.0 : -1.0) into copysign(1, X). */
>> >>> >> + /* Transform (X < 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
>> >>> >> + /* Transform (X <= 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
>> >>> >> +(simplify
>> >>> >> +  (cond (cmp @0 real_zerop) real_onep real_minus_onep)
>> >>> >> +  (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type)
>> >>> >> +   && types_match (type, TREE_TYPE (@0)))
>> >>> >> +   (switch
>> >>> >> +(if (types_match (type, float_type_node))
>> >>> >> + (BUILT_IN_COPYSIGNF { build_one_cst (type); } (outp @0)))
>> >>> >> +(if (types_match (type, double_type_node))
>> >>> >> + (BUILT_IN_COPYSIGN { build_one_cst (type); } (outp @0)))
>> >>> >> +(if (types_match (type, long_double_type_node))
>> >>> >> + (BUILT_IN_COPYSIGNL { build_one_cst (type); } (outp @0))
>
> The patch regressed the gcc.target/i386/cmov7.c testcase.
> From the description in the testcase it was testing the combiner, so I've
> transformed it into something that also tests the combiner the same way,
> ok for trunk?  That said, for copysign if we match it we don't emit a fcmov
> while it might be a good idea.
>
> 2017-06-28  Jakub Jelinek  
>
> * gcc.target/i386/cmov7.c (sgn): Renamed to ...
> (foo): ... this.  Change constants such that it isn't matched
> as __builtin_copysign, yet tests the combiner the same.

OK.

Thanks,
Uros.

> --- gcc/testsuite/gcc.target/i386/cmov7.c.jj2016-05-22 12:20:23.0 
> +0200
> +++ gcc/testsuite/gcc.target/i386/cmov7.c   2017-06-28 09:20:24.0 
> +0200
> @@ -10,7 +10,7 @@
> (set (reg:DF) (float_extend:DF (mem:SF (symbol_ref....  */
>
>  double
> -sgn (double __x)
> +foo (double __x)
>  {
> -  return __x >= 0.0 ? 1.0 : -1.0;
> +  return __x >= 1.0 ? 0.0 : -1.0;
>  }
>
>
> Jakub


Re: Fix genmultilib reuse rule checks for large sets of option combinations

2017-06-28 Thread Christophe Lyon
Hi Joseph,

On 8 June 2017 at 22:28, Joseph Myers  wrote:
> genmultilib computes combination_space, a list of all combinations of
> options in MULTILIB_OPTIONS that might have multilibs built for them
> (some of which may end up not having multilibs built for them, and
> some of those may end up being mapped to other multilibs with
> MULTILIB_REUSE).  It is then used to validate the right hand part of
> MULTILIB_REUSE rules, checking with expr that combination_space
> matches a basic regular expression derived from that right hand part.
>
> There are two problems with this approach to validation:
>
> * It requires that right hand part to have options in the same order
>   as in MULTILIB_OPTIONS, in contradiction to the documentation of
>   MULTILIB_REUSE saying that order does not matter there.
>
> * combination_space can be so large that the expr call fails with an
>   E2BIG error.  I have a local ARM configuration with 40 multilibs but
>   3840 combinations of options from MULTILIB_OPTIONS (so 3839 listed
>   in combination_space, since it doesn't list the default multilib)
>   and 996 MULTILIB_REUSE rules.  This generates a combination_space
>   string longer than the Linux kernel's MAX_ARG_STRLEN (PAGE_SIZE *
>   32, the limit on the length of a single argv string), so that expr
>   cannot be run.
>
> This patch changes the validation approach to generate a much shorter
> extended regular expression for any sequence of multilib options in
> any order, and uses that for the validation instead.
>
> Tested with a built for arm-none-eabi --with-multilib-list=aprofile
> (as a configuration that uses MULTILIB_REUSE).
>
> 2017-06-08  Joseph Myers  
>
> * genmultilib (combination_space): Remove variable.
> Validate reuse rules against regular expression for any sequence
> of multilib options in any order.
>
> Index: gcc/genmultilib
> ===
> --- gcc/genmultilib (revision 249028)
> +++ gcc/genmultilib (working copy)
> @@ -186,8 +186,7 @@
>  EOF
>  chmod +x tmpmultilib
>
> -combination_space=`initial=/ ./tmpmultilib ${options}`
> -combinations="$combination_space"
> +combinations=`initial=/ ./tmpmultilib ${options}`
>
>  # If there exceptions, weed them out now
>  if [ -n "${exceptions}" ]; then
> @@ -460,6 +459,15 @@
>  echo "NULL"
>  echo "};"
>
> +# Generate a regular expression to validate option combinations.
> +options_re=
> +for set in ${options}; do
> +  for opt in `echo ${set} | sed -e 's_[/|]_ _g'`; do
> +options_re="${options_re}${options_re:+|}${opt}"
> +  done
> +done
> +options_re="^/((${options_re})/)*\$"
> +
>  # Output rules used for multilib reuse.
>  echo ""
>  echo "static const char *const multilib_reuse_raw[] = {"
> @@ -473,7 +481,7 @@
># in this variable, it means no multilib will be built for current reuse
># rule.  Thus the reuse purpose specified by current rule is meaningless.
>if expr "${combinations} " : ".*/${combo}/.*" > /dev/null; then
> -if expr "${combination_space} " : ".*/${copts}/.*" > /dev/null; then
> +if echo "/${copts}/" | grep -E "${options_re}" > /dev/null; then
>combo="/${combo}/"
>dirout=`./tmpmultilib3 "${combo}" "${todirnames}" "${toosdirnames}" 
> "${enable_multilib}"`
>copts="/${copts}/"
>

This broke my builds, where I do not use
--with-multilib-list=aprofile, and uses the default.
I suspect it would also fail for the aprofile multilibs, though.

I think there's a problem with options that have a '+' which confuses
the regexp in options_re.
For instance, I get this error message:
The rule mthumb=mthumb/mfpu.auto/march.armv5te+fp contains an option
absent from MULTILIB_OPTIONS.

A bit of manual debugging led me to:
$ echo /mthumb/mfpu=auto/march=armv5te+fp/ | grep -E
'^/((marm|mthumb|mfpu=auto|march=armv5te+fp|march=armv7+fp|mfloat-abi=hard)/)*$'
[empty result]

Replacing the '+' in armv5te+fp with either '\+' or '.' allows the
pattern to match:
/mthumb/mfpu=auto/march=armv5te+fp/

Is it a matter of adding sed -e 's/\+/./g' when building options_re ?
Or would this break something else?

Thanks,

Christophe

> --
> Joseph S. Myers
> jos...@codesourcery.com


Re: [PATCH GCC][3/6]New file computing regional register pressure on TREE SSA

2017-06-28 Thread Bin.Cheng
On Tue, Jun 27, 2017 at 6:40 PM, Jeff Law  wrote:
> On 05/12/2017 05:28 AM, Bin Cheng wrote:
>> Hi,
>> This patch computes register pressure information on TREE SSA by a backward 
>> live
>> range data flow problem.  The major motivation is to estimate register 
>> pressure
>> for inner-most loop on TREE SSA, then other optimizations can use it.  So 
>> far the
>> information is used only in predcom later, but it could be useful to 
>> implement a
>> tree level scheduler in order to shrink live ranges.  Unfortunately the 
>> example
>> live range shrink pass I implemented doesn't have obvious impact on 
>> performance.
>> I think one reason is TER which effectively undoes its effect.  Maybe it 
>> will be
>> useful once TER/expanding is replaced with a better instruction selector, it 
>> is
>> not included in this patch.
>> One fact I need to mention is David proposed a similar patch long time ago at
>> https://gcc.gnu.org/ml/gcc-patches/2008-12/msg01261.html.  It tries to 
>> compute
>> register pressure information on tree ssa and shrink live ranges based on 
>> that
>> information.  Unfortunately the patch wasn't merged in the end.  There has 
>> been
>> quite changes in GCC implementation, I didn't use its code directly.  
>> However,
>> I did read that patch and had it in mind when implementing this one.  If 
>> there
>> is any issue in this patch, it would be me that should be blamed.  I also 
>> sent
>> message to David about this patch and the possible relation with his.
>>
>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>
>> Thanks,
>> bin
>>
>> 2017-05-10  Xinliang David Li  
>>   Bin Cheng  
>>
>>   * Makefile.in (tree-ssa-regpressure.o): New object file.
>>   * tree-ssa-regpressure.c: New file.
>>   * tree-ssa-regpressure.h: New file.
> Any thoughts on tests, either end-to-end or unit testing?
>
> At a high level does this make more sense as a pass or as a function
> that is called by other passes?  I don't have a strong opinion here,
> just putting the question out there for discussion.
>
> You've got a live computation solver in here.  Is there some reason you
> don't use the existing life analysis code?   I'd prefer not have have
> another life analysis implementation if we can avoid it.  And if you
> were using that code, I think you can easily get the coalescing data
> you're using as well.
>
> I haven't gone through all the detail in the patch as I think we need to
> make sure we've got the design issues right first.  BUt there are a
> couple nits noted inline below.
>
>
Hi Jeff, Thanks very much for the review.  I plan to revisit this
after current tasks as well as study more benchmark cases for register
pressure.

Thanks,
bin
>
>
>
>
>>
>>
>> 0003-tree-ssa-regpressure-20170504.txt
>>
>>
>> From bf6e51ff68d87c372719de567d4de49d77744f77 Mon Sep 17 00:00:00 2001
>> From: Bin Cheng 
>> Date: Mon, 8 May 2017 15:20:27 +0100
>> Subject: [PATCH 3/6] tree-ssa-regpressure-20170504.txt
>>
>> ---
>>  gcc/Makefile.in|   1 +
>>  gcc/tree-ssa-regpressure.c | 829 
>> +
>>  gcc/tree-ssa-regpressure.h |  21 ++
>>  3 files changed, 851 insertions(+)
>>  create mode 100644 gcc/tree-ssa-regpressure.c
>>  create mode 100644 gcc/tree-ssa-regpressure.h
>>
>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>> index 97259ac..abfd4bc 100644
>> --- a/gcc/Makefile.in
>> +++ b/gcc/Makefile.in
>> @@ -1534,6 +1534,7 @@ OBJS = \
>>   tree-ssa-pre.o \
>>   tree-ssa-propagate.o \
>>   tree-ssa-reassoc.o \
>> + tree-ssa-regpressure.o \
>>   tree-ssa-sccvn.o \
>>   tree-ssa-scopedtables.o \
>>   tree-ssa-sink.o \
>> diff --git a/gcc/tree-ssa-regpressure.c b/gcc/tree-ssa-regpressure.c
>> new file mode 100644
>> index 000..ebc6576
>> --- /dev/null
>> +++ b/gcc/tree-ssa-regpressure.c
>> @@ -0,0 +1,829 @@
>> +/* Reg Pressure Model and Live Range Shrinking Optimization on TREE SSA.
>> +   Copyright (C) 2017 Free Software Foundation, Inc.
>> +
>> +This file is part of GCC.
>> +
>> +GCC is free software; you can redistribute it and/or modify it
>> +under the terms of the GNU General Public License as published by the
>> +Free Software Foundation; either version 3, or (at your option) any
>> +later version.
>> +
>> +GCC is distributed in the hope that it will be useful, but WITHOUT
>> +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
>> +for more details.
>> +
>> +You should have received a copy of the GNU General Public License
>> +along with GCC; see the file COPYING3.  If not see
>> +.  */
>> +
>> +
>> +#include "config.h"
>> +#include "system.h"
>> +#include "coretypes.h"
>> +#include "backend.h"
>> +#include "rtl.h"
>> +#include "memmodel.h"
>> +#include "ira.h"
> So I suspect what we need from ira.h is fairly narrow.  Would it be
> possible to pull the externalized interfaces we need for 

Re: Fix genmultilib reuse rule checks for large sets of option combinations

2017-06-28 Thread Christophe Lyon
On 28 June 2017 at 10:03, Christophe Lyon  wrote:
> Hi Joseph,
>
> On 8 June 2017 at 22:28, Joseph Myers  wrote:
>> genmultilib computes combination_space, a list of all combinations of
>> options in MULTILIB_OPTIONS that might have multilibs built for them
>> (some of which may end up not having multilibs built for them, and
>> some of those may end up being mapped to other multilibs with
>> MULTILIB_REUSE).  It is then used to validate the right hand part of
>> MULTILIB_REUSE rules, checking with expr that combination_space
>> matches a basic regular expression derived from that right hand part.
>>
>> There are two problems with this approach to validation:
>>
>> * It requires that right hand part to have options in the same order
>>   as in MULTILIB_OPTIONS, in contradiction to the documentation of
>>   MULTILIB_REUSE saying that order does not matter there.
>>
>> * combination_space can be so large that the expr call fails with an
>>   E2BIG error.  I have a local ARM configuration with 40 multilibs but
>>   3840 combinations of options from MULTILIB_OPTIONS (so 3839 listed
>>   in combination_space, since it doesn't list the default multilib)
>>   and 996 MULTILIB_REUSE rules.  This generates a combination_space
>>   string longer than the Linux kernel's MAX_ARG_STRLEN (PAGE_SIZE *
>>   32, the limit on the length of a single argv string), so that expr
>>   cannot be run.
>>
>> This patch changes the validation approach to generate a much shorter
>> extended regular expression for any sequence of multilib options in
>> any order, and uses that for the validation instead.
>>
>> Tested with a built for arm-none-eabi --with-multilib-list=aprofile
>> (as a configuration that uses MULTILIB_REUSE).
>>
>> 2017-06-08  Joseph Myers  
>>
>> * genmultilib (combination_space): Remove variable.
>> Validate reuse rules against regular expression for any sequence
>> of multilib options in any order.
>>
>> Index: gcc/genmultilib
>> ===
>> --- gcc/genmultilib (revision 249028)
>> +++ gcc/genmultilib (working copy)
>> @@ -186,8 +186,7 @@
>>  EOF
>>  chmod +x tmpmultilib
>>
>> -combination_space=`initial=/ ./tmpmultilib ${options}`
>> -combinations="$combination_space"
>> +combinations=`initial=/ ./tmpmultilib ${options}`
>>
>>  # If there exceptions, weed them out now
>>  if [ -n "${exceptions}" ]; then
>> @@ -460,6 +459,15 @@
>>  echo "NULL"
>>  echo "};"
>>
>> +# Generate a regular expression to validate option combinations.
>> +options_re=
>> +for set in ${options}; do
>> +  for opt in `echo ${set} | sed -e 's_[/|]_ _g'`; do
>> +options_re="${options_re}${options_re:+|}${opt}"
>> +  done
>> +done
>> +options_re="^/((${options_re})/)*\$"
>> +
>>  # Output rules used for multilib reuse.
>>  echo ""
>>  echo "static const char *const multilib_reuse_raw[] = {"
>> @@ -473,7 +481,7 @@
>># in this variable, it means no multilib will be built for current reuse
>># rule.  Thus the reuse purpose specified by current rule is meaningless.
>>if expr "${combinations} " : ".*/${combo}/.*" > /dev/null; then
>> -if expr "${combination_space} " : ".*/${copts}/.*" > /dev/null; then
>> +if echo "/${copts}/" | grep -E "${options_re}" > /dev/null; then
>>combo="/${combo}/"
>>dirout=`./tmpmultilib3 "${combo}" "${todirnames}" "${toosdirnames}" 
>> "${enable_multilib}"`
>>copts="/${copts}/"
>>
>
> This broke my builds, where I do not use
> --with-multilib-list=aprofile, and uses the default.
> I suspect it would also fail for the aprofile multilibs, though.
>
> I think there's a problem with options that have a '+' which confuses
> the regexp in options_re.
> For instance, I get this error message:
> The rule mthumb=mthumb/mfpu.auto/march.armv5te+fp contains an option
> absent from MULTILIB_OPTIONS.
>
> A bit of manual debugging led me to:
> $ echo /mthumb/mfpu=auto/march=armv5te+fp/ | grep -E
> '^/((marm|mthumb|mfpu=auto|march=armv5te+fp|march=armv7+fp|mfloat-abi=hard)/)*$'
> [empty result]
>
> Replacing the '+' in armv5te+fp with either '\+' or '.' allows the
> pattern to match:
> /mthumb/mfpu=auto/march=armv5te+fp/
>
> Is it a matter of adding sed -e 's/\+/./g' when building options_re ?
> Or would this break something else?
>
This small patch:
diff --git a/gcc/genmultilib b/gcc/genmultilib
index 0767e68..e65a0dd 100644
--- a/gcc/genmultilib
+++ b/gcc/genmultilib
@@ -462,7 +462,7 @@ echo "};"
 # Generate a regular expression to validate option combinations.
 options_re=
 for set in ${options}; do
-  for opt in `echo ${set} | sed -e 's_[/|]_ _g'`; do
+  for opt in `echo ${set} | sed -e 's_[/|]_ _g' | sed -e 's/\+/./g' `; do
 options_re="${options_re}${options_re:+|}${opt}"
   done
 done

works for me. If OK, I'll commit it with a suitable ChangeLog entry.

Thanks,

> Thanks,
>
> Christophe
>
>> --
>> Joseph S. Myers
>> jos...@codesourcery.com


Re: Fix genmultilib reuse rule checks for large sets of option combinations

2017-06-28 Thread Andreas Schwab
On Jun 28 2017, Christophe Lyon  wrote:

> diff --git a/gcc/genmultilib b/gcc/genmultilib
> index 0767e68..e65a0dd 100644
> --- a/gcc/genmultilib
> +++ b/gcc/genmultilib
> @@ -462,7 +462,7 @@ echo "};"
>  # Generate a regular expression to validate option combinations.
>  options_re=
>  for set in ${options}; do
> -  for opt in `echo ${set} | sed -e 's_[/|]_ _g'`; do
> +  for opt in `echo ${set} | sed -e 's_[/|]_ _g' | sed -e 's/\+/./g' `; do

No need to run two seds, just pass -e twice.  Also, + isn't special, so
no backslash.

Andreas.

-- 
Andreas Schwab, SUSE Labs, sch...@suse.de
GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
"And now for something completely different."


Re: [PATCH] Fold (a > 0 ? 1.0 : -1.0) into copysign (1.0, a) and a * copysign (1.0, a) into abs(a)

2017-06-28 Thread Christophe Lyon
On 25 June 2017 at 23:28, Andrew Pinski  wrote:
> On Sun, Jun 25, 2017 at 11:18 AM, Andrew Pinski  wrote:
>> On Sun, Jun 25, 2017 at 1:28 AM, Marc Glisse  wrote:
>>> +(for cmp (gt ge lt le)
>>> + outp (convert convert negate negate)
>>> + outn (negate negate convert convert)
>>> + /* Transform (X > 0.0 ? 1.0 : -1.0) into copysign(1, X). */
>>> + /* Transform (X >= 0.0 ? 1.0 : -1.0) into copysign(1, X). */
>>> + /* Transform (X < 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
>>> + /* Transform (X <= 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
>>> + (simplify
>>> +  (cond (cmp @0 real_zerop) real_onep real_minus_onep)
>>> +  (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type)
>>> +   && types_match (type, TREE_TYPE (@0)))
>>> +   (switch
>>> +(if (types_match (type, float_type_node))
>>> + (BUILT_IN_COPYSIGNF { build_one_cst (type); } (outp @0)))
>>> +(if (types_match (type, double_type_node))
>>> + (BUILT_IN_COPYSIGN { build_one_cst (type); } (outp @0)))
>>> +(if (types_match (type, long_double_type_node))
>>> + (BUILT_IN_COPYSIGNL { build_one_cst (type); } (outp @0))
>>>
>>> There is already a 1.0 of the right type in the input, it would be easier to
>>> reuse it in the output than build a new one.
>>
>> Right.  Fixed.
>>
>>>
>>> Non-generic builtins like copysign are such a pain... We also end up missing
>>> the 128-bit case that way (pre-existing problem, not your patch). We seem to
>>> have a corresponding internal function, but apparently it is not used until
>>> expansion (well, maybe during vectorization).
>>
>> Yes I noticed that while working on a different patch related to
>> copysign; The generic version of a*copysign(1.0, b) [see the other
>> thread where the ARM folks started a patch for it; yes it was by pure
>> accident that I was working on this and really did not notice that
>> thread until yesterday].
>> I was looking into a nice way of creating copysign without having to
>> do the switch but I could not find one.  In the end I copied was done
>> already in a different location in match.pd; this is also the reason
>> why I had the build_one_cst there.
>>
>>>
>>> + /* Transform (X > 0.0 ? -1.0 : 1.0) into copysign(1,-X). */
>>> + /* Transform (X >= 0.0 ? -1.0 : 1.0) into copysign(1,-X). */
>>> + /* Transform (X < 0.0 ? -1.0 : 1.0) into copysign(1,X). */
>>> + /* Transform (X <= 0.0 ? -1.0 : 1.0) into copysign(1,X). */
>>> + (simplify
>>> +  (cond (cmp @0 real_zerop) real_minus_onep real_onep)
>>> +  (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type)
>>> +   && types_match (type, TREE_TYPE (@0)))
>>> +   (switch
>>> +(if (types_match (type, float_type_node))
>>> + (BUILT_IN_COPYSIGNF { build_one_cst (type); } (outn @0)))
>>> +(if (types_match (type, double_type_node))
>>> + (BUILT_IN_COPYSIGN { build_one_cst (type); } (outn @0)))
>>> +(if (types_match (type, long_double_type_node))
>>> + (BUILT_IN_COPYSIGNL { build_one_cst (type); } (outn @0)))
>>> +
>>> +/* Transform X * copysign (1.0, X) into abs(X). */
>>> +(simplify
>>> + (mult:c @0 (COPYSIGN real_onep @0))
>>> + (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type))
>>> +  (abs @0)))
>>>
>>> I would have expected it do to the right thing for signed zero and qNaN. Can
>>> you describe a case where it would give the wrong result, or are the
>>> conditions just conservative?
>>
>> I was just being conservative; maybe too conservative but I was a bit
>> worried I could get it incorrect.
>>
>>>
>>> +/* Transform X * copysign (1.0, -X) into -abs(X). */
>>> +(simplify
>>> + (mult:c @0 (COPYSIGN real_onep (negate @0)))
>>> + (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type))
>>> +  (negate (abs @0
>>> +
>>> +/* Transform copysign (-1.0, X) into copysign (1.0, X). */
>>> +(simplify
>>> + (COPYSIGN real_minus_onep @0)
>>> + (COPYSIGN { build_one_cst (type); } @0))
>>>
>>> (simplify
>>>  (COPYSIGN REAL_CST@0 @1)
>>>  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@0)))
>>>   (COPYSIGN (negate @0) @1)))
>>> ? Or does that create trouble with sNaN and only the 1.0 case is worth
>>> the trouble?
>>
>> No that is the correct way; I Noticed the other thread about copysign
>> had something similar as what should be done too.
>>
>> I will send out a new patch after testing soon.
>
> New patch.
> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>
Hi Andrew,

2 of the new testcases fail on aarch64*-elf:
FAIL:gcc.dg/tree-ssa/copy-sign-1.c scan-tree-dump-times gimple "copysign" 8
FAIL:gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple "ABS" 8

The attached patch makes them unsupported by requiring c99_runtime
effective-target.

OK?


> Thanks,
> Andrew Pinski
>
> ChangeLog:
> * match.pd (X >/>=/ (X * copysign (1.0, X)): New pattern.
> (X * copysign (1.0, -X)): New pattern.
> (copysign (-1.0, CST)): New pattern.
>
> testsuite/ChangeLog:
> * gcc.dg/tree-ssa/copy-sign-1.c: New testcase.
> * gcc.dg/tree-ssa/copy-sign-2.c: New testcase.
> * gcc.dg/tree-ssa/

Re: Fix genmultilib reuse rule checks for large sets of option combinations

2017-06-28 Thread Christophe Lyon
On 28 June 2017 at 10:14, Andreas Schwab  wrote:
> On Jun 28 2017, Christophe Lyon  wrote:
>
>> diff --git a/gcc/genmultilib b/gcc/genmultilib
>> index 0767e68..e65a0dd 100644
>> --- a/gcc/genmultilib
>> +++ b/gcc/genmultilib
>> @@ -462,7 +462,7 @@ echo "};"
>>  # Generate a regular expression to validate option combinations.
>>  options_re=
>>  for set in ${options}; do
>> -  for opt in `echo ${set} | sed -e 's_[/|]_ _g'`; do
>> +  for opt in `echo ${set} | sed -e 's_[/|]_ _g' | sed -e 's/\+/./g' `; do
>
> No need to run two seds, just pass -e twice.  Also, + isn't special, so
> no backslash.
>
Indeed, thanks.

Here is what I have committed (r249730)

Christophe

> Andreas.
>
> --
> Andreas Schwab, SUSE Labs, sch...@suse.de
> GPG Key fingerprint = 0196 BAD8 1CE9 1970 F4BE  1748 E4D4 88E3 0EEA B9D7
> "And now for something completely different."
2017-06-28  Christophe Lyon  

* genmultilib (combination_space): Accept '+' in option names.
diff --git a/gcc/genmultilib b/gcc/genmultilib
index 0767e68..1da3a6e 100644
--- a/gcc/genmultilib
+++ b/gcc/genmultilib
@@ -462,7 +462,7 @@ echo "};"
 # Generate a regular expression to validate option combinations.
 options_re=
 for set in ${options}; do
-  for opt in `echo ${set} | sed -e 's_[/|]_ _g'`; do
+  for opt in `echo ${set} | sed -e 's_[/|]_ _g' -e 's/+/./g' `; do
 options_re="${options_re}${options_re:+|}${opt}"
   done
 done


Re: C/C++ PATCH to add __typeof_noqual (PR c/65455, c/39985)

2017-06-28 Thread Joseph Myers
On Wed, 28 Jun 2017, Martin Sebor wrote:

> > I.e., just having blocks to remove qualifiers of kind X is not sufficient
> > without "remove all qualifiers (possibly except these kinds)" as well.  I
> > suppose you could have __remove_quals (const volatile _Atomic, expr) and
> > __remove_quals_except (_Atomic, expr) or similar (with some keyword that
> > goes in there to mean "any address space").
> 
> Right.  My point isn't that the bigger features shouldn't exist,
> but that they can and should be built on top of the primitives
> and defined not in the compiler but in a header.  With
> __bultin_remove_const() and __builtin_remove_volatile()
> a __typeof_noqual(x) can be a macro that expands to these two,
> plus any others as/if necessary, with any other additional

My point is that users should not need to update their definitions every 
time another qualifier is invented.  Either you need the "remove 
qualifiers except" functionality in the language, or you need a 
*compiler-provided* header that defines "remove qualifiers except" 
operations for every combination of qualifiers in that version of the 
compiler (because "remove qualifiers except _Atomic" and "remove 
qualifiers except address spaces" cannot be composed into "remove 
qualifiers except _Atomic and address spaces, and "remove const, volatile, 
restrict and address spaces" is not something user code should need to 
hardcode when the intent is "remove qualifiers except _Atomic").

> adjustments, again if/when necessary.  This is the C++ approach;

C++ doesn't seem to add new qualifiers so much.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] Fold (a > 0 ? 1.0 : -1.0) into copysign (1.0, a) and a * copysign (1.0, a) into abs(a)

2017-06-28 Thread Richard Biener
On Wed, Jun 28, 2017 at 10:50 AM, Christophe Lyon
 wrote:
> On 25 June 2017 at 23:28, Andrew Pinski  wrote:
>> On Sun, Jun 25, 2017 at 11:18 AM, Andrew Pinski  wrote:
>>> On Sun, Jun 25, 2017 at 1:28 AM, Marc Glisse  wrote:
 +(for cmp (gt ge lt le)
 + outp (convert convert negate negate)
 + outn (negate negate convert convert)
 + /* Transform (X > 0.0 ? 1.0 : -1.0) into copysign(1, X). */
 + /* Transform (X >= 0.0 ? 1.0 : -1.0) into copysign(1, X). */
 + /* Transform (X < 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
 + /* Transform (X <= 0.0 ? 1.0 : -1.0) into copysign(1,-X). */
 + (simplify
 +  (cond (cmp @0 real_zerop) real_onep real_minus_onep)
 +  (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type)
 +   && types_match (type, TREE_TYPE (@0)))
 +   (switch
 +(if (types_match (type, float_type_node))
 + (BUILT_IN_COPYSIGNF { build_one_cst (type); } (outp @0)))
 +(if (types_match (type, double_type_node))
 + (BUILT_IN_COPYSIGN { build_one_cst (type); } (outp @0)))
 +(if (types_match (type, long_double_type_node))
 + (BUILT_IN_COPYSIGNL { build_one_cst (type); } (outp @0))

 There is already a 1.0 of the right type in the input, it would be easier 
 to
 reuse it in the output than build a new one.
>>>
>>> Right.  Fixed.
>>>

 Non-generic builtins like copysign are such a pain... We also end up 
 missing
 the 128-bit case that way (pre-existing problem, not your patch). We seem 
 to
 have a corresponding internal function, but apparently it is not used until
 expansion (well, maybe during vectorization).
>>>
>>> Yes I noticed that while working on a different patch related to
>>> copysign; The generic version of a*copysign(1.0, b) [see the other
>>> thread where the ARM folks started a patch for it; yes it was by pure
>>> accident that I was working on this and really did not notice that
>>> thread until yesterday].
>>> I was looking into a nice way of creating copysign without having to
>>> do the switch but I could not find one.  In the end I copied was done
>>> already in a different location in match.pd; this is also the reason
>>> why I had the build_one_cst there.
>>>

 + /* Transform (X > 0.0 ? -1.0 : 1.0) into copysign(1,-X). */
 + /* Transform (X >= 0.0 ? -1.0 : 1.0) into copysign(1,-X). */
 + /* Transform (X < 0.0 ? -1.0 : 1.0) into copysign(1,X). */
 + /* Transform (X <= 0.0 ? -1.0 : 1.0) into copysign(1,X). */
 + (simplify
 +  (cond (cmp @0 real_zerop) real_minus_onep real_onep)
 +  (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type)
 +   && types_match (type, TREE_TYPE (@0)))
 +   (switch
 +(if (types_match (type, float_type_node))
 + (BUILT_IN_COPYSIGNF { build_one_cst (type); } (outn @0)))
 +(if (types_match (type, double_type_node))
 + (BUILT_IN_COPYSIGN { build_one_cst (type); } (outn @0)))
 +(if (types_match (type, long_double_type_node))
 + (BUILT_IN_COPYSIGNL { build_one_cst (type); } (outn @0)))
 +
 +/* Transform X * copysign (1.0, X) into abs(X). */
 +(simplify
 + (mult:c @0 (COPYSIGN real_onep @0))
 + (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type))
 +  (abs @0)))

 I would have expected it do to the right thing for signed zero and qNaN. 
 Can
 you describe a case where it would give the wrong result, or are the
 conditions just conservative?
>>>
>>> I was just being conservative; maybe too conservative but I was a bit
>>> worried I could get it incorrect.
>>>

 +/* Transform X * copysign (1.0, -X) into -abs(X). */
 +(simplify
 + (mult:c @0 (COPYSIGN real_onep (negate @0)))
 + (if (!HONOR_NANS (type) && !HONOR_SIGNED_ZEROS (type))
 +  (negate (abs @0
 +
 +/* Transform copysign (-1.0, X) into copysign (1.0, X). */
 +(simplify
 + (COPYSIGN real_minus_onep @0)
 + (COPYSIGN { build_one_cst (type); } @0))

 (simplify
  (COPYSIGN REAL_CST@0 @1)
  (if (REAL_VALUE_NEGATIVE (TREE_REAL_CST (@0)))
   (COPYSIGN (negate @0) @1)))
 ? Or does that create trouble with sNaN and only the 1.0 case is worth
 the trouble?
>>>
>>> No that is the correct way; I Noticed the other thread about copysign
>>> had something similar as what should be done too.
>>>
>>> I will send out a new patch after testing soon.
>>
>> New patch.
>> OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.
>>
> Hi Andrew,
>
> 2 of the new testcases fail on aarch64*-elf:
> FAIL:gcc.dg/tree-ssa/copy-sign-1.c scan-tree-dump-times gimple "copysign" 
> 8
> FAIL:gcc.dg/tree-ssa/mult-abs-2.c scan-tree-dump-times gimple "ABS" 8
>
> The attached patch makes them unsupported by requiring c99_runtime
> effective-target.
>
> OK?

Ok.

>
>> Thanks,
>> Andrew Pinski
>>
>> ChangeLog:
>> * match.pd (X >/>=/> (X * copysign (1.0, X)): New pattern.
>> (X * 

[PATCH PR81196]Analyze ntiers for loop with exit condition comparing induction variables

2017-06-28 Thread Bin Cheng
Hi,
This patch picks up a missed-optimization case in loop niter analysis.  With 
this
patch, niters information for loop as in added test can be analyzed.  Bootstrap
and test on x86_64 and AArch64.  Is it OK?

Thanks,
bin
2017-06-27  Bin Cheng  

PR tree-optimization/81196
* tree-ssa-loop-niter.c (number_of_iterations_cond): Handle loop
exit condition comparing two IVs.

gcc/testsuite/ChangeLog
2017-06-27  Bin Cheng  

PR tree-optimization/81196
* gcc.dg/vect/pr81196.c: New.From e11856e20b64bacaa4c5ebc3ea08f875160161dc Mon Sep 17 00:00:00 2001
From: Bin Cheng 
Date: Mon, 26 Jun 2017 15:06:31 +0100
Subject: [PATCH] pr81196-20170626.txt

---
 gcc/testsuite/gcc.dg/vect/pr81196.c | 19 +++
 gcc/tree-ssa-loop-niter.c   | 30 +++---
 2 files changed, 42 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr81196.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr81196.c b/gcc/testsuite/gcc.dg/vect/pr81196.c
new file mode 100644
index 000..46d7a9e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr81196.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_perm_short } */
+
+void f(short*p){
+  p=(short*)__builtin_assume_aligned(p,64);
+  short*q=p+256;
+  for(;p!=q;++p,--q){
+short t=*p;*p=*q;*q=t;
+  }
+}
+void b(short*p){
+  p=(short*)__builtin_assume_aligned(p,64);
+  short*q=p+256;
+  for(;pstep) && !integer_zerop (iv1->step))
 {
   tree step_type = POINTER_TYPE_P (type) ? sizetype : type;
-  if (code != NE_EXPR)
+  tree step = fold_binary_to_constant (MINUS_EXPR, step_type,
+	   iv0->step, iv1->step);
+
+  /* No need to check sign of the new step since below code takes care
+	 of this well.  */
+  if (code != NE_EXPR && TREE_CODE (step) != INTEGER_CST)
 	return false;
 
-  iv0->step = fold_binary_to_constant (MINUS_EXPR, step_type,
-	   iv0->step, iv1->step);
-  iv0->no_overflow = false;
+  iv0->step = step;
+  if (!POINTER_TYPE_P (type))
+	iv0->no_overflow = false;
+
   iv1->step = build_int_cst (step_type, 0);
   iv1->no_overflow = true;
 }
-- 
1.9.1



Re: [PATCH] multiarch support for non-glibc linux systems

2017-06-28 Thread Matthias Klose
On 07.06.2017 19:22, Szabolcs Nagy wrote:
> Current multiarch directory name is always *-linux-gnu* on linux,
> this patch configures different names for uclibc and musl targets.
> (tested by the debian rebootstrap scripts for various *-linux-musl
> and *-linux-uclibc targets see debian bug #861588)
> 
> gcc/
> 2017-06-07  Szabolcs Nagy  
> 
>   * config.gcc (*-linux-musl*): Add t-musl tmake_file.
>   (*-linux-uclibc*): Add t-uclibc tmake_file.
>   * config/t-musl: New.
>   * config/t-uclibc: New.

I can't formally approve that, but this looks ok.

Matthias


[PATCH][x86] Add permutex[var]_epi[32,64] intrinsics

2017-06-28 Thread Peryt, Sebastian
Hi,

This patch adds missing intrinsics:
- _mm256_permutexvar_epi32
- _mm256_permutex_epi64
- _mm256_permutexvar_epi64

gcc/
* config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64, 
_mm256_permutexvar_epi32,
_mm256_permutex_epi64): New intrinsics.

gcc/tesuite/
* gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32): Test 
new intrinsic.
* gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64): 
Ditto.
* gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64): 
Ditto.
*gcc.target/i386/avx512f-vpermd-2.c: Removed define length constraint.
* gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
* gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.

Is it ok for trunk?

Thanks,
Sebastian


permutex.patch
Description: permutex.patch


Re: [PATCH GCC][13/13]Distribute loop with loop versioning under runtime alias check

2017-06-28 Thread Richard Biener
On Tue, Jun 27, 2017 at 4:07 PM, Bin.Cheng  wrote:
> On Tue, Jun 27, 2017 at 1:44 PM, Richard Biener
>  wrote:
>> On Fri, Jun 23, 2017 at 12:30 PM, Bin.Cheng  wrote:
>>> On Tue, Jun 20, 2017 at 10:22 AM, Bin.Cheng  wrote:
 On Mon, Jun 12, 2017 at 6:03 PM, Bin Cheng  wrote:
> Hi,
>>> Rebased V3 for changes in previous patches.  Bootstap and test on
>>> x86_64 and aarch64.
>>
>> why is ldist-12.c no longer distributed?  your comment says it doesn't expose
>> more "parallelism" but the point is to reduce memory bandwith requirements
>> which it clearly does.
>>
>> Likewise for -13.c, -14.c.  -4.c may be a questionable case but the wording
>> of "parallelism" still confuses me.
>>
>> Can you elaborate on that.  Now onto the patch:
> Given we don't model data locality or memory bandwidth, whether
> distribution enables loops that can be executed paralleled becomes the
> major criteria for distribution.  BTW, I think a good memory stream
> optimization model shouldn't consider small loops as in ldist-12.c,
> etc., appropriate for distribution.

True.  But what means "parallel" here?  ldist-13.c if partitioned into two loops
can be executed "in parallel"

>>
>> +   Loop distribution is the dual of loop fusion.  It separates statements
>> +   of a loop (or loop nest) into multiple loops (or loop nests) with the
>> +   same loop header.  The major goal is to separate statements which may
>> +   be vectorized from those that can't.  This pass implements distribution
>> +   in the following steps:
>>
>> misses the goal of being a memory stream optimization, not only a 
>> vectorization
>> enabler.  distributing a loop can also reduce register pressure.
> I will revise the comment, but as explained, enabling more
> vectorization is the major criteria for distribution to some extend
> now.

Yes, I agree -- originally it was written to optimize the stream benchmark IIRC.

>>
>> You introduce ldist_alias_id in struct loop (probably in 01/n which I
>> didn't look
>> into yet).  If you don't use that please introduce it separately.
> Hmm, yes it is introduced in patch [01/n] and set in this patch.
>
>>
>> + /* Be conservative.  If data references are not well analyzed,
>> +or the two data references have the same base address and
>> +offset, add dependence and consider it alias to each other.
>> +In other words, the dependence can not be resolved by
>> +runtime alias check.  */
>> + if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
>> + || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
>> + || !DR_INIT (dr1) || !DR_INIT (dr2)
>> + || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
>> + || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
>> + || res == 0)
>>
>> ISTR a helper that computes whether we can handle a runtime alias check for
>> a specific case?
> I guess you mean runtime_alias_check_p that I factored out previously?
>  Unfortunately, it's factored out vectorizer's usage and doesn't fit
> here straightforwardly.  Shall I try to further generalize the
> interface as independence patch to this one?

That would be nice.

>>
>> +  /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
>> +  if (flag_tree_loop_vectorize)
>> +{
>>
>> so at this point I'd condition the whole runtime-alias check generating
>> on flag_tree_loop_vectorize.  You seem to support versioning w/o
>> that here but in other places disable versioning w/o 
>> flag_tree_loop_vectorize.
>> That looks somewhat inconsistent...
> It is a bit complicated.  In function version_for_distribution_p, we have
> +
> +  /* Need to version loop if runtime alias check is necessary.  */
> +  if (alias_ddrs->length () > 0)
> +return true;
> +
> +  /* Don't version the loop with call to IFN_LOOP_DIST_ALIAS if
> + vectorizer is not enable because no other pass can fold it.  */
> +  if (!flag_tree_loop_vectorize)
> +return false;
> +
>
> It means we also versioning loops even if runtime alias check is
> unnecessary.  I did this because we lack cost model and current
> distribution may result in too many distribution?  If that's the case,
> at least vectorizer will remove distributed version loop and fall back
> to the original one.  Hmm, shall I change it into below code:

Hmm, ok - that really ties things to vectorization apart from the
builtin recognition.  So what happens if the vectorizer can vectorize
both the distributed and non-distributed loops?

> +
> +  /* Need to version loop if runtime alias check is necessary.  */
> +  if (alias_ddrs->length () == 0)
> +return false;
> +
> +  /* Don't version the loop with call to IFN_LOOP_DIST_ALIAS if
> + vectorizer is not enable because no other pass can fold it.  */
> +  if (!flag_tree_loop_vectorize)
> +return false;
> +
>
> Then I can remove the check you mentioned in function
> version_loop_by_alias_check?

Yeah, I

Re: [PATCH GCC][3/6]New file computing regional register pressure on TREE SSA

2017-06-28 Thread Richard Biener
On Wed, Jun 28, 2017 at 10:09 AM, Bin.Cheng  wrote:
> On Tue, Jun 27, 2017 at 6:40 PM, Jeff Law  wrote:
>> On 05/12/2017 05:28 AM, Bin Cheng wrote:
>>> Hi,
>>> This patch computes register pressure information on TREE SSA by a backward 
>>> live
>>> range data flow problem.  The major motivation is to estimate register 
>>> pressure
>>> for inner-most loop on TREE SSA, then other optimizations can use it.  So 
>>> far the
>>> information is used only in predcom later, but it could be useful to 
>>> implement a
>>> tree level scheduler in order to shrink live ranges.  Unfortunately the 
>>> example
>>> live range shrink pass I implemented doesn't have obvious impact on 
>>> performance.
>>> I think one reason is TER which effectively undoes its effect.  Maybe it 
>>> will be
>>> useful once TER/expanding is replaced with a better instruction selector, 
>>> it is
>>> not included in this patch.
>>> One fact I need to mention is David proposed a similar patch long time ago 
>>> at
>>> https://gcc.gnu.org/ml/gcc-patches/2008-12/msg01261.html.  It tries to 
>>> compute
>>> register pressure information on tree ssa and shrink live ranges based on 
>>> that
>>> information.  Unfortunately the patch wasn't merged in the end.  There has 
>>> been
>>> quite changes in GCC implementation, I didn't use its code directly.  
>>> However,
>>> I did read that patch and had it in mind when implementing this one.  If 
>>> there
>>> is any issue in this patch, it would be me that should be blamed.  I also 
>>> sent
>>> message to David about this patch and the possible relation with his.
>>>
>>> Bootstrap and test on x86_64 and AArch64.  Is it OK?
>>>
>>> Thanks,
>>> bin
>>>
>>> 2017-05-10  Xinliang David Li  
>>>   Bin Cheng  
>>>
>>>   * Makefile.in (tree-ssa-regpressure.o): New object file.
>>>   * tree-ssa-regpressure.c: New file.
>>>   * tree-ssa-regpressure.h: New file.
>> Any thoughts on tests, either end-to-end or unit testing?
>>
>> At a high level does this make more sense as a pass or as a function
>> that is called by other passes?  I don't have a strong opinion here,
>> just putting the question out there for discussion.
>>
>> You've got a live computation solver in here.  Is there some reason you
>> don't use the existing life analysis code?   I'd prefer not have have
>> another life analysis implementation if we can avoid it.  And if you
>> were using that code, I think you can easily get the coalescing data
>> you're using as well.
>>
>> I haven't gone through all the detail in the patch as I think we need to
>> make sure we've got the design issues right first.  BUt there are a
>> couple nits noted inline below.
>>
>>
> Hi Jeff, Thanks very much for the review.  I plan to revisit this
> after current tasks as well as study more benchmark cases for register
> pressure.

Please also rename the files to ssa-regpressure.h and ssa-prepressure.cc
(I think new files should get C++ extensions now).

Richard.

> Thanks,
> bin
>>
>>
>>
>>
>>>
>>>
>>> 0003-tree-ssa-regpressure-20170504.txt
>>>
>>>
>>> From bf6e51ff68d87c372719de567d4de49d77744f77 Mon Sep 17 00:00:00 2001
>>> From: Bin Cheng 
>>> Date: Mon, 8 May 2017 15:20:27 +0100
>>> Subject: [PATCH 3/6] tree-ssa-regpressure-20170504.txt
>>>
>>> ---
>>>  gcc/Makefile.in|   1 +
>>>  gcc/tree-ssa-regpressure.c | 829 
>>> +
>>>  gcc/tree-ssa-regpressure.h |  21 ++
>>>  3 files changed, 851 insertions(+)
>>>  create mode 100644 gcc/tree-ssa-regpressure.c
>>>  create mode 100644 gcc/tree-ssa-regpressure.h
>>>
>>> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
>>> index 97259ac..abfd4bc 100644
>>> --- a/gcc/Makefile.in
>>> +++ b/gcc/Makefile.in
>>> @@ -1534,6 +1534,7 @@ OBJS = \
>>>   tree-ssa-pre.o \
>>>   tree-ssa-propagate.o \
>>>   tree-ssa-reassoc.o \
>>> + tree-ssa-regpressure.o \
>>>   tree-ssa-sccvn.o \
>>>   tree-ssa-scopedtables.o \
>>>   tree-ssa-sink.o \
>>> diff --git a/gcc/tree-ssa-regpressure.c b/gcc/tree-ssa-regpressure.c
>>> new file mode 100644
>>> index 000..ebc6576
>>> --- /dev/null
>>> +++ b/gcc/tree-ssa-regpressure.c
>>> @@ -0,0 +1,829 @@
>>> +/* Reg Pressure Model and Live Range Shrinking Optimization on TREE SSA.
>>> +   Copyright (C) 2017 Free Software Foundation, Inc.
>>> +
>>> +This file is part of GCC.
>>> +
>>> +GCC is free software; you can redistribute it and/or modify it
>>> +under the terms of the GNU General Public License as published by the
>>> +Free Software Foundation; either version 3, or (at your option) any
>>> +later version.
>>> +
>>> +GCC is distributed in the hope that it will be useful, but WITHOUT
>>> +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>>> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
>>> +for more details.
>>> +
>>> +You should have received a copy of the GNU General Public License
>>> +along with GCC; see the file COPYING3.  If not see
>>> +

Re: [PATCH] Move static chain and non-local goto init after NOTE_INSN_FUNCTION_BEG (PR sanitize/81186).

2017-06-28 Thread Martin Liška
On 06/27/2017 05:29 PM, Michael Matz wrote:
> Hi,
> 
> On Tue, 27 Jun 2017, Martin Liška wrote:
> 
>> Following bug was for me very educative. I learned that we support 
>> non-local gotos that can be combined with nested functions. Real fun :) 
>> Well, the problem is that both cfun->nonlocal_goto_save_area and 
>> cfun->static_chain_decl (emitted in expand_function_start) are put 
>> before NOTE_INSN_FUNCTION_BEG. And so result of expand_used_vars is put 
>> after these instrumentations. That causes problems as it uses stack 
>> before we initialize it (use-after-return checking):
> 
> I don't think that's the right fix.  The purpose of 
> NOTE_INSN_FUNCTION_BEG is to mark the "real" beginning of the function, 
> i.e. without all the compiler generated stuff that's necessary to set up 
> parameters or local variables and so on.  The goto save area and the 
> static chain are also such compiler generated implementation details, and 
> hence are correctly put in front of the function begin note.

Hello.

Thanks for the review. I'm not so familiar with RTL, but hopefully new version
of the patch should do it properly. Idea is to come up with a new 
asan_stack_birth_insn
that points after place where stack is ready to use (when one uses 
use-after-return).
And then in function.c static_chain_decl and nonlocal_goto_save_area expansion 
is
generated to a new sequence and the sequence is put after the 
asan_stack_birth_insn.

> 
> Also if you put something in front of the static_chain_decl initialization 
> (as you do if you move the parm_birth_insn in front of it) you'd have to 
> make sure that the incoming hidding parameter containing the static chain 
> (r10 on x86_64) isn't clobbered from function start up to that point.  
> So that won't work either generally.
> 
> I don't know what exactly is the issue with calling 
> __asan_handle_no_return before the other instructions emitted by 
> expand_used_vars.  Either it shouldn't be called for the static chain 
> (i.e. not instrumented) or whatever setup asan needs needs to happen in 
> front of the static chain setup, but then without clobbering the incoming 
> static chain param (!).

Here I need to have FRAME variable to be sanitized as seen in pr78541.c
and pr78541-2.c test-cases.

Thoughts?
Thanks,
Martin

> 
> 
> Ciao,
> Michael.
>>
>> expanded cfun->static_chain_decl:
>>
>> (note 1 0 5 NOTE_INSN_DELETED)
>> (note 5 1 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
>> (insn 2 5 3 2 (set (reg/f:DI 88 [ CHAIN.1 ])
>> (reg:DI 39 r10 [ CHAIN.1 ])) "pr81186.c":5 -1
>>  (nil))
>> (insn 3 2 4 2 (set (mem/c:DI (plus:DI (reg/f:DI 82 virtual-stack-vars)
>> (const_int -8 [0xfff8])) [0  S8 A64])
>> (reg:DI 39 r10 [ CHAIN.1 ])) "pr81186.c":5 -1
>>  (nil))
>> (note 4 3 7 2 NOTE_INSN_FUNCTION_BEG)
>> (insn 7 4 8 2 (set (reg/f:DI 87 [ _2 ])
>> (reg/f:DI 88 [ CHAIN.1 ])) "pr81186.c":5 -1
>>  (nil))
>> (call_insn 8 7 9 2 (call (mem:QI (symbol_ref:DI ("__asan_handle_no_return") 
>> [flags 0x41]  > __builtin___asan_handle_no_return>) [0 __builtin___asan_handle_no_return S1 
>> A8])
>> (const_int 0 [0])) "pr81186.c":5 -1
>>  (expr_list:REG_EH_REGION (const_int 0 [0])
>> (nil))
>> (nil))
>>
>> expanded cfun->nonlocal_goto_save_area:
>>
>> (note 1 0 34 NOTE_INSN_DELETED)
>> (note 34 1 31 2 [bb 2] NOTE_INSN_BASIC_BLOCK)
>> (insn 31 34 32 2 (set (mem/f/c:DI (plus:DI (reg:DI 95)
>> (const_int -64 [0xffc0])) [4 
>> FRAME.0.__nl_goto_buf+0 S8 A64])
>> (reg/f:DI 82 virtual-stack-vars)) "pr81186.c":3 -1
>>  (nil))
>> (insn 32 31 2 2 (set (mem/f/c:DI (plus:DI (reg:DI 95)
>> (const_int -56 [0xffc8])) [4 
>> FRAME.0.__nl_goto_buf+8 S8 A64])
>> (reg/f:DI 7 sp)) "pr81186.c":3 -1
>>  (nil))
>> (insn 2 32 3 2 (parallel [
>> (set (reg:DI 96)
>> (plus:DI (reg/f:DI 82 virtual-stack-vars)
>> (const_int -96 [0xffa0])))
>> (clobber (reg:CC 17 flags))
>> ]) "pr81186.c":3 -1
>>  (nil))
>> (insn 3 2 4 2 (set (reg:DI 97)
>> (reg:DI 96)) "pr81186.c":3 -1
>>  (nil))
>> (insn 4 3 5 2 (set (reg:CCZ 17 flags)
>> (compare:CCZ (mem/c:SI (symbol_ref:DI 
>> ("__asan_option_detect_stack_use_after_return") [flags 0x40]  > 0x2ba005b5b750 __asan_option_detect_stack_use_after_return>) [5 
>> __asan_option_detect_stack_use_after_return+0 S4 A32])
>> (const_int 0 [0]))) "pr81186.c":3 -1
>>  (nil))
>>
>> And thus I suggest to move both these instrumentations after 
>> NOTE_INSN_FUNCTION_BEG.
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>
>> Ready to be installed?
>> Martin
>>
>> gcc/ChangeLog:
>>
>> 2017-06-27  Martin Liska  
>>
>> PR sanitize/81186
>>  * function.c (expand_function_start): Move static chain and non-local
>>  goto init after NOTE_INSN_FUNCTION_BEG.
>>
>> gcc/testsuite/ChangeLog:
>>
>

Re: [PATCH, ARM] Implement __ARM_FEATURE_COPROC coprocessor intrinsic feature macro

2017-06-28 Thread Thomas Preudhomme



On 26/06/17 17:01, Thomas Preudhomme wrote:

On 26/06/17 15:16, Christophe Lyon wrote:




You mean the macro is expected not to be defined on ARMv8-A ?


Correct. Most instructions its value represent are not available on ARMv8-A and 
for those that are the intrinsics are deprecated.


I've just noticed that many such instructions not available on ARMv8-A are 
accepted by GNU as. I would like to enable/disable coprocessor intrinsics tests 
based on what GNU as returns regarding availability of these instructions so 
hold on a bit more.


Best regards,

Thomas


Re: [PATCH, GCC/ARM, gcc-5-branch, ping] Fix gcc.target/arm/fpscr.c

2017-06-28 Thread Thomas Preudhomme

Ping?

Best regards,

Thomas

On 26/06/17 12:32, Thomas Preudhomme wrote:

Hi,

As raised by Christophe Lyon, fpscr.c FAILs because arm_fp_ok and arm_fp
are not defined in GCC 5. This commit changes the test to use the same
recipe as gcc.target/arm/cmp-2.c

ChangeLog entry is as follows:


*** gcc/testsuite/ChangeLog ***

2017-06-26  Thomas Preud'homme  

 * gcc.target/arm/fpscr.c: Require arm_vfp_ok instead of arm_fp_ok and
 add -mfpu=vfp -mfloat-abi=softfp instead of fp_ok options.


Ok for GCC 5?

Best regards,

Thomas
diff --git a/gcc/testsuite/gcc.target/arm/fpscr.c b/gcc/testsuite/gcc.target/arm/fpscr.c
index 7b4d71d72d8964f6da0d0604bf59aeb4a895df43..cafba4e8d67545bd210477230b9682fe86620e23 100644
--- a/gcc/testsuite/gcc.target/arm/fpscr.c
+++ b/gcc/testsuite/gcc.target/arm/fpscr.c
@@ -1,9 +1,9 @@
 /* Test the fpscr builtins.  */
 
 /* { dg-do compile } */
-/* { dg-require-effective-target arm_fp_ok } */
+/* { dg-require-effective-target arm_vfp_ok } */
 /* { dg-skip-if "need fp instructions" { *-*-* } { "-mfloat-abi=soft" } { "" } } */
-/* { dg-add-options arm_fp } */
+/* { dg-options "-mfpu=vfp -mfloat-abi=softfp" } */
 
 void
 test_fpscr ()


Re: [PATCH GCC][13/13]Distribute loop with loop versioning under runtime alias check

2017-06-28 Thread Bin.Cheng
On Wed, Jun 28, 2017 at 11:58 AM, Richard Biener
 wrote:
> On Tue, Jun 27, 2017 at 4:07 PM, Bin.Cheng  wrote:
>> On Tue, Jun 27, 2017 at 1:44 PM, Richard Biener
>>  wrote:
>>> On Fri, Jun 23, 2017 at 12:30 PM, Bin.Cheng  wrote:
 On Tue, Jun 20, 2017 at 10:22 AM, Bin.Cheng  wrote:
> On Mon, Jun 12, 2017 at 6:03 PM, Bin Cheng  wrote:
>> Hi,
 Rebased V3 for changes in previous patches.  Bootstap and test on
 x86_64 and aarch64.
>>>
>>> why is ldist-12.c no longer distributed?  your comment says it doesn't 
>>> expose
>>> more "parallelism" but the point is to reduce memory bandwith requirements
>>> which it clearly does.
>>>
>>> Likewise for -13.c, -14.c.  -4.c may be a questionable case but the wording
>>> of "parallelism" still confuses me.
>>>
>>> Can you elaborate on that.  Now onto the patch:
>> Given we don't model data locality or memory bandwidth, whether
>> distribution enables loops that can be executed paralleled becomes the
>> major criteria for distribution.  BTW, I think a good memory stream
>> optimization model shouldn't consider small loops as in ldist-12.c,
>> etc., appropriate for distribution.
>
> True.  But what means "parallel" here?  ldist-13.c if partitioned into two 
> loops
> can be executed "in parallel"
So if a loop by itself can be vectorized (or so called can be executed
paralleled), we tend to no distribute it into small ones.  But there
is one exception here, if the distributed small loops are recognized
as builtin functions, we still distribute it.  I assume it's generally
better to call builtin memory functions than vectorize it by GCC?

>
>>>
>>> +   Loop distribution is the dual of loop fusion.  It separates statements
>>> +   of a loop (or loop nest) into multiple loops (or loop nests) with the
>>> +   same loop header.  The major goal is to separate statements which may
>>> +   be vectorized from those that can't.  This pass implements distribution
>>> +   in the following steps:
>>>
>>> misses the goal of being a memory stream optimization, not only a 
>>> vectorization
>>> enabler.  distributing a loop can also reduce register pressure.
>> I will revise the comment, but as explained, enabling more
>> vectorization is the major criteria for distribution to some extend
>> now.
>
> Yes, I agree -- originally it was written to optimize the stream benchmark 
> IIRC.
Let's see if any performance drop will be reported against this patch.
Let's see if we can create a cost model for it.

>
>>>
>>> You introduce ldist_alias_id in struct loop (probably in 01/n which I
>>> didn't look
>>> into yet).  If you don't use that please introduce it separately.
>> Hmm, yes it is introduced in patch [01/n] and set in this patch.
>>
>>>
>>> + /* Be conservative.  If data references are not well analyzed,
>>> +or the two data references have the same base address and
>>> +offset, add dependence and consider it alias to each other.
>>> +In other words, the dependence can not be resolved by
>>> +runtime alias check.  */
>>> + if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
>>> + || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
>>> + || !DR_INIT (dr1) || !DR_INIT (dr2)
>>> + || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
>>> + || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
>>> + || res == 0)
>>>
>>> ISTR a helper that computes whether we can handle a runtime alias check for
>>> a specific case?
>> I guess you mean runtime_alias_check_p that I factored out previously?
>>  Unfortunately, it's factored out vectorizer's usage and doesn't fit
>> here straightforwardly.  Shall I try to further generalize the
>> interface as independence patch to this one?
>
> That would be nice.
>
>>>
>>> +  /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
>>> +  if (flag_tree_loop_vectorize)
>>> +{
>>>
>>> so at this point I'd condition the whole runtime-alias check generating
>>> on flag_tree_loop_vectorize.  You seem to support versioning w/o
>>> that here but in other places disable versioning w/o 
>>> flag_tree_loop_vectorize.
>>> That looks somewhat inconsistent...
>> It is a bit complicated.  In function version_for_distribution_p, we have
>> +
>> +  /* Need to version loop if runtime alias check is necessary.  */
>> +  if (alias_ddrs->length () > 0)
>> +return true;
>> +
>> +  /* Don't version the loop with call to IFN_LOOP_DIST_ALIAS if
>> + vectorizer is not enable because no other pass can fold it.  */
>> +  if (!flag_tree_loop_vectorize)
>> +return false;
>> +
>>
>> It means we also versioning loops even if runtime alias check is
>> unnecessary.  I did this because we lack cost model and current
>> distribution may result in too many distribution?  If that's the case,
>> at least vectorizer will remove distributed version loop and fall back
>> to the original one.  Hmm, shall I change i

[PATCH] Fix PR81227

2017-06-28 Thread Richard Biener

The following avoids generating TREE_OVERFLOW constants from VRPs
avoidance (sic!) of them when generating MAX - 1 for symbolic 1-bit
ranges with -fwrapv.  The issue is that all constant folding is
agnostic of TREE_OVERFLOW_WRAPS but looks at TYPE_UNSIGNED only
and thus negation of signed -1 is overflowing even with -fwrapv but the 
negate_expr_p predicate says otherwise resulting in a + -1(OVF)
generated from a - -1.

I didn't feel like (had a dejavu with bad outcome) changing the
condition when to mark results with TREE_OVERFLOW and instead made
the predicates in negate_expr_p consistent with "overflow".

Bootstrapped and tested on x86_64-unknown-linux-gnu.

Richard.

2017-06-28  Richard Biener  

PR middle-end/81227
* fold-const.c (negate_expr_p): Use TYPE_UNSIGNED, not
TYPE_OVERFLOW_WRAPS.
* match.pd (negate_expr_p): Likewise.
* tree-ssa-reassoc.c (optimize_range_tests_diff): Use
fold_build2, not fold_binary.

* gcc.dg/pr81227.c: New testcase.

Index: gcc/fold-const.c
===
--- gcc/fold-const.c(revision 249729)
+++ gcc/fold-const.c(working copy)
@@ -383,7 +383,7 @@ negate_expr_p (tree t)
   switch (TREE_CODE (t))
 {
 case INTEGER_CST:
-  if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_WRAPS (type))
+  if (INTEGRAL_TYPE_P (type) && TYPE_UNSIGNED (type))
return true;
 
   /* Check that -CST will not overflow type.  */
Index: gcc/match.pd
===
--- gcc/match.pd(revision 249729)
+++ gcc/match.pd(working copy)
@@ -913,7 +913,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (match negate_expr_p
  INTEGER_CST
  (if ((INTEGRAL_TYPE_P (type)
-   && TYPE_OVERFLOW_WRAPS (type))
+   && TYPE_UNSIGNED (type))
   || (!TYPE_OVERFLOW_SANITIZED (type)
  && may_negate_without_overflow_p (t)
 (match negate_expr_p
Index: gcc/testsuite/gcc.dg/pr81227.c
===
--- gcc/testsuite/gcc.dg/pr81227.c  (revision 0)
+++ gcc/testsuite/gcc.dg/pr81227.c  (working copy)
@@ -0,0 +1,22 @@
+/* Copy of gcc.c-torture/compile/pr80443.c  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fwrapv" } */
+
+struct S { int a : 1; } b, c;
+signed char d, e, f;
+
+void
+foo ()
+{ 
+  while (f)
+{ 
+  signed char g = b.a;
+  if (g)
+   b.a = ~(1 + (d || c.a));
+  if (b.a < g && b.a)
+   g = 0;
+  if (b.a > c.a)
+   b.a = g;
+  c.a = e;
+}
+}
Index: gcc/tree-ssa-reassoc.c
===
--- gcc/tree-ssa-reassoc.c  (revision 249729)
+++ gcc/tree-ssa-reassoc.c  (working copy)
@@ -2561,7 +2561,7 @@ optimize_range_tests_diff (enum tree_cod
   tem2 = fold_convert (type, tem2);
   lowi = fold_convert (type, lowi);
   mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
-  tem1 = fold_binary (MINUS_EXPR, type,
+  tem1 = fold_build2 (MINUS_EXPR, type,
  fold_convert (type, rangei->exp), lowi);
   tem1 = fold_build2 (BIT_AND_EXPR, type, tem1, mask);
   lowj = build_int_cst (type, 0);


Re: [PATCH GCC][13/13]Distribute loop with loop versioning under runtime alias check

2017-06-28 Thread Richard Biener
On Wed, Jun 28, 2017 at 1:46 PM, Bin.Cheng  wrote:
> On Wed, Jun 28, 2017 at 11:58 AM, Richard Biener
>  wrote:
>> On Tue, Jun 27, 2017 at 4:07 PM, Bin.Cheng  wrote:
>>> On Tue, Jun 27, 2017 at 1:44 PM, Richard Biener
>>>  wrote:
 On Fri, Jun 23, 2017 at 12:30 PM, Bin.Cheng  wrote:
> On Tue, Jun 20, 2017 at 10:22 AM, Bin.Cheng  wrote:
>> On Mon, Jun 12, 2017 at 6:03 PM, Bin Cheng  wrote:
>>> Hi,
> Rebased V3 for changes in previous patches.  Bootstap and test on
> x86_64 and aarch64.

 why is ldist-12.c no longer distributed?  your comment says it doesn't 
 expose
 more "parallelism" but the point is to reduce memory bandwith requirements
 which it clearly does.

 Likewise for -13.c, -14.c.  -4.c may be a questionable case but the wording
 of "parallelism" still confuses me.

 Can you elaborate on that.  Now onto the patch:
>>> Given we don't model data locality or memory bandwidth, whether
>>> distribution enables loops that can be executed paralleled becomes the
>>> major criteria for distribution.  BTW, I think a good memory stream
>>> optimization model shouldn't consider small loops as in ldist-12.c,
>>> etc., appropriate for distribution.
>>
>> True.  But what means "parallel" here?  ldist-13.c if partitioned into two 
>> loops
>> can be executed "in parallel"
> So if a loop by itself can be vectorized (or so called can be executed
> paralleled), we tend to no distribute it into small ones.  But there
> is one exception here, if the distributed small loops are recognized
> as builtin functions, we still distribute it.  I assume it's generally
> better to call builtin memory functions than vectorize it by GCC?

Yes.

>>

 +   Loop distribution is the dual of loop fusion.  It separates statements
 +   of a loop (or loop nest) into multiple loops (or loop nests) with the
 +   same loop header.  The major goal is to separate statements which may
 +   be vectorized from those that can't.  This pass implements distribution
 +   in the following steps:

 misses the goal of being a memory stream optimization, not only a 
 vectorization
 enabler.  distributing a loop can also reduce register pressure.
>>> I will revise the comment, but as explained, enabling more
>>> vectorization is the major criteria for distribution to some extend
>>> now.
>>
>> Yes, I agree -- originally it was written to optimize the stream benchmark 
>> IIRC.
> Let's see if any performance drop will be reported against this patch.
> Let's see if we can create a cost model for it.

Fine.

>>

 You introduce ldist_alias_id in struct loop (probably in 01/n which I
 didn't look
 into yet).  If you don't use that please introduce it separately.
>>> Hmm, yes it is introduced in patch [01/n] and set in this patch.
>>>

 + /* Be conservative.  If data references are not well 
 analyzed,
 +or the two data references have the same base address and
 +offset, add dependence and consider it alias to each 
 other.
 +In other words, the dependence can not be resolved by
 +runtime alias check.  */
 + if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
 + || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
 + || !DR_INIT (dr1) || !DR_INIT (dr2)
 + || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
 + || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
 + || res == 0)

 ISTR a helper that computes whether we can handle a runtime alias check for
 a specific case?
>>> I guess you mean runtime_alias_check_p that I factored out previously?
>>>  Unfortunately, it's factored out vectorizer's usage and doesn't fit
>>> here straightforwardly.  Shall I try to further generalize the
>>> interface as independence patch to this one?
>>
>> That would be nice.
>>

 +  /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
 +  if (flag_tree_loop_vectorize)
 +{

 so at this point I'd condition the whole runtime-alias check generating
 on flag_tree_loop_vectorize.  You seem to support versioning w/o
 that here but in other places disable versioning w/o 
 flag_tree_loop_vectorize.
 That looks somewhat inconsistent...
>>> It is a bit complicated.  In function version_for_distribution_p, we have
>>> +
>>> +  /* Need to version loop if runtime alias check is necessary.  */
>>> +  if (alias_ddrs->length () > 0)
>>> +return true;
>>> +
>>> +  /* Don't version the loop with call to IFN_LOOP_DIST_ALIAS if
>>> + vectorizer is not enable because no other pass can fold it.  */
>>> +  if (!flag_tree_loop_vectorize)
>>> +return false;
>>> +
>>>
>>> It means we also versioning loops even if runtime alias check is
>>> unnecessary.  I did this because we lack cost model and current
>>

Re: [PATCH 3/3] Introduce IntegerRange for options (PR driver/79659).

2017-06-28 Thread Martin Liška
On 06/28/2017 06:52 AM, Jeff Law wrote:
> On 03/15/2017 03:58 AM, Martin Liška wrote:
>> Huh, I forgot to attach the patch.
>>
>> Martin
>>
>> 0001-Introduce-IntegerRange-for-options-PR-driver-79659.patch
>>
>>
>> From bb89456e6cecfa9497cf8e265d2083e762d5bc3e Mon Sep 17 00:00:00 2001
>> From: marxin 
>> Date: Mon, 27 Feb 2017 14:07:03 +0100
>> Subject: [PATCH] Introduce IntegerRange for options (PR driver/79659).
>>
>> gcc/ChangeLog:
>>
>> 2017-02-28  Martin Liska  
>>
>>  PR driver/79659
>>  * common.opt: Add IntegerRange to various options.
>>  * opt-functions.awk (integer_range_info): New function.
>>  * optc-gen.awk: Add integer_range_info to cl_options struct.
>>  * opts-common.c (decode_cmdline_option): Handle
>>  CL_ERR_INT_RANGE_ARG.
>>  (cmdline_handle_error): Likewise.
>>  * opts.c (print_filtered_help): Show valid interval in
>>  when --help is provided.
>>  * opts.h (struct cl_option): Add range_min and range_max fields.
>>  * config/i386/i386.opt: Add IntegerRange for -mbranch-cost.
>>
>> gcc/c-family/ChangeLog:
>>
>> 2017-02-28  Martin Liska  
>>
>>  PR driver/79659
>>  * c.opt: Add IntegerRange to various options.
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2017-02-28  Martin Liska  
>>
>>  PR driver/79659
>>  * g++.dg/opt/pr79659.C: New test.
> Presumably this never fully moved forward because it wasn't a regression?
> 
> This looks quite reasonable to me.  I'm not sure of the state of the
> prereqs and you may want/need to add IntegerRange checks on newly added
> options since this was first submitted.
> 
> If the prereqs are ack'd, then as far as I'm concerned this is good to
> go and you're free to add any new IntegerRange checks you deem
> necessary/desirable.
> 
> jeff
> 

Thank you Jeff for looking at the patch. I've just re-tested the patch and
I'm going to install it.

Martin


Re: [PATCH] Do not allow to inline ifunc resolvers (PR ipa/81128).

2017-06-28 Thread Martin Liška
On 06/27/2017 05:26 PM, Jan Hubicka wrote:
>> diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c
>> index d5a3ae56c46..69e6e295d55 100644
>> --- a/gcc/ipa-visibility.c
>> +++ b/gcc/ipa-visibility.c
>> @@ -97,7 +97,8 @@ non_local_p (struct cgraph_node *node, void *data 
>> ATTRIBUTE_UNUSED)
>>&& !DECL_EXTERNAL (node->decl)
>>&& !node->externally_visible
>>&& !node->used_from_other_partition
>> -  && !node->in_other_partition);
>> +  && !node->in_other_partition
>> +  && !lookup_attribute ("ifunc", DECL_ATTRIBUTES (node->decl)));
>>  }
>>  
>>  /* Return true when function can be marked local.  */
>>
>> It's questionable if local.local can be true for ifunc function? If can, one 
>> would need to
>> move check for ifunc aerly in:
>> /* Return function availability.  See cgraph.h for description of individual
>>return values.  */
>> enum availability
>> cgraph_node::get_availability (symtab_node *ref)
>> {
>>   if (ref)
>> {
>>   cgraph_node *cref = dyn_cast  (ref);
>>   if (cref)
>>  ref = cref->global.inlined_to;
>> }
>>   enum availability avail;
>>   if (!analyzed)
>> avail = AVAIL_NOT_AVAILABLE;
>>   else if (local.local)
>> avail = AVAIL_LOCAL;
>>   else if (global.inlined_to)
>> avail = AVAIL_AVAILABLE;
>>   else if (transparent_alias)
>> ultimate_alias_target (&avail, ref);
>>   else if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (decl)))
>> avail = AVAIL_INTERPOSABLE;
>>
>> ...
>>
>> What solution do you prefer Honza?
> 
> Probably just update non_local_p to also check that availability is at least 
> AVAIL_AVAILABLE.
> Then we will have one place to collect such a side cases where !EXTERNAL 
> function can be
> interposed.

Good, this is tested version of patch I'm going to install.

Martin

> 
> Honza
>>
>> Thanks,
>> Martin
>>
>>>
>>> Honza
 Martin

 gcc/ChangeLog:

 2017-06-22  Martin Liska  

* ipa-inline.c (can_inline_edge_p): Return false for ifunc fns.
* ipa-visibility.c (can_replace_by_local_alias): Likewise.

 gcc/c-family/ChangeLog:

 2017-06-22  Martin Liska  

* c-attribs.c (handle_alias_ifunc_attribute): Append ifunc alias
to a function declaration.

 gcc/testsuite/ChangeLog:

 2017-06-22  Martin Liska  

* gcc.target/i386/pr81128.c: New test.
 ---
  gcc/c-family/c-attribs.c| 11 --
  gcc/ipa-inline.c|  2 +
  gcc/ipa-visibility.c|  3 +-
  gcc/testsuite/gcc.target/i386/pr81128.c | 65 
 +
  4 files changed, 77 insertions(+), 4 deletions(-)
  create mode 100644 gcc/testsuite/gcc.target/i386/pr81128.c


>>>
 diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
 index 2b6845f2cbd..626ffa1cde7 100644
 --- a/gcc/c-family/c-attribs.c
 +++ b/gcc/c-family/c-attribs.c
 @@ -1846,9 +1846,14 @@ handle_alias_ifunc_attribute (bool is_alias, tree 
 *node, tree name, tree args,
TREE_STATIC (decl) = 1;
  
if (!is_alias)
 -  /* ifuncs are also aliases, so set that attribute too.  */
 -  DECL_ATTRIBUTES (decl)
 -= tree_cons (get_identifier ("alias"), args, DECL_ATTRIBUTES (decl));
 +  {
 +/* ifuncs are also aliases, so set that attribute too.  */
 +DECL_ATTRIBUTES (decl)
 +  = tree_cons (get_identifier ("alias"), args,
 +   DECL_ATTRIBUTES (decl));
 +DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("ifunc"),
 +NULL, DECL_ATTRIBUTES (decl));
 +  }
  }
else
  {
 diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
 index fb20d3723cc..588fa9c41e4 100644
 --- a/gcc/ipa-inline.c
 +++ b/gcc/ipa-inline.c
 @@ -370,6 +370,8 @@ can_inline_edge_p (struct cgraph_edge *e, bool report,
e->inline_failed = CIF_ATTRIBUTE_MISMATCH;
inlinable = false;
  }
 +  else if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (e->callee->decl)))
 +  inlinable = false;
/* Check if caller growth allows the inlining.  */
else if (!DECL_DISREGARD_INLINE_LIMITS (callee->decl)
   && !disregard_limits
 diff --git a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c
 index d5a3ae56c46..79d05b41085 100644
 --- a/gcc/ipa-visibility.c
 +++ b/gcc/ipa-visibility.c
 @@ -345,7 +345,8 @@ can_replace_by_local_alias (symtab_node *node)

return (node->get_availability () > AVAIL_INTERPOSABLE
  && !decl_binds_to_current_def_p (node->decl)
 -&& !node->can_be_discarded_p ());
 +&& !node->can_be_discarded_p ()
 +&& !lookup_attribute ("ifunc", DECL_ATTRIBUTES (node->decl)));
  }
  
  /* Return true if we can replace reference to NODE by local alias
 diff --git a

[PATCH v1] cxx: Make __func__, __FUNCTION__, and __PRETTY_FUNCTION__ constexpr.

2017-06-28 Thread Franklin “Snaipe” Mathieu
From: Franklin “Snaipe” Mathieu 

This patch makes the forementioned definitions `contexpr` when
compiling C++11 and above with GNU extensions.

gcc/cp/ChangeLog:
2017-06-27  Franklin “Snaipe” Mathieu  

PR c++/66639
* decl.c (cp_make_fname_decl): Make declaration constexpr.

gcc/testsuite/ChangeLog:
2017-06-27  Franklin “Snaipe” Mathieu  

PR c++/66639
* g++.dg/pr66639.c: New test.
---
 gcc/cp/decl.c  |  5 +
 gcc/testsuite/g++.dg/pr66639.C | 19 +++
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr66639.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 8e9a466..740ab71 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4384,6 +4384,11 @@ cp_make_fname_decl (location_t loc, tree id, int 
type_dep)
   TREE_READONLY (decl) = 1;
   DECL_ARTIFICIAL (decl) = 1;
 
+  /* extension: declare __func__, __FUNCTION__, and __PRETTY_FUNCTION__ as
+ constexpr.  */
+  if (!flag_iso && cxx_dialect >= cxx11)
+DECL_DECLARED_CONSTEXPR_P (decl) = 1;
+
   TREE_USED (decl) = 1;
 
   if (current_function_decl)
diff --git a/gcc/testsuite/g++.dg/pr66639.C b/gcc/testsuite/g++.dg/pr66639.C
new file mode 100644
index 000..51a92f9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr66639.C
@@ -0,0 +1,19 @@
+// PR c++/66639
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+constexpr bool
+streq(char const *lhs, char const *rhs)
+{
+  return *lhs && *rhs
+ ? *lhs == *rhs && streq(lhs + 1, rhs + 1)
+ : !*lhs && !*rhs;
+}
+
+int
+main()
+{
+   static_assert (streq(__func__, "main"), "");
+   static_assert (streq(__FUNCTION__, "main"), "");
+   static_assert (streq(__PRETTY_FUNCTION__, "int main()"), "");
+}
-- 
Franklin "Snaipe" Mathieu
Arista Networks, Ltd



Re: [PATCH][AArch64] Improve Cortex-A53 shift bypass

2017-06-28 Thread Wilco Dijkstra
Ramana Radhakrishnan wrote:
>    
> I'm about to run home for the day but this came in from
> https://gcc.gnu.org/ml/gcc-patches/2013-09/msg02109.html and James
> said in that email that this was put in to ensure no segfaults on
> cortex-a15 / cortex-a7 tuning.

The code is historical - an older version didn't specifically look just for 
shifts. Given we can only return shift rtx's now it's completely redundant.
A bootstrap on armhf with --with-cpu=cortex-a15 passed.

Wilco


[PATCH v1] cxx: Make __func__, __FUNCTION__, and __PRETTY_FUNCTION__ constexpr.

2017-06-28 Thread Franklin Snaipe Mathieu
From: "Franklin \"Snaipe\" Mathieu" 

This patch makes the forementioned definitions `contexpr` when
compiling C++11 and above with GNU extensions.

gcc/cp/ChangeLog:
2017-06-27  Franklin “Snaipe” Mathieu  

PR c++/66639
* decl.c (cp_make_fname_decl): Make declaration constexpr.

gcc/testsuite/ChangeLog:
2017-06-27  Franklin “Snaipe” Mathieu  

PR c++/66639
* g++.dg/pr66639.c: New test.
---
 gcc/cp/decl.c  |  5 +
 gcc/testsuite/g++.dg/pr66639.C | 19 +++
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr66639.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 8e9a466..740ab71 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4384,6 +4384,11 @@ cp_make_fname_decl (location_t loc, tree id, int 
type_dep)
   TREE_READONLY (decl) = 1;
   DECL_ARTIFICIAL (decl) = 1;
 
+  /* extension: declare __func__, __FUNCTION__, and __PRETTY_FUNCTION__ as
+ constexpr.  */
+  if (!flag_iso && cxx_dialect >= cxx11)
+DECL_DECLARED_CONSTEXPR_P (decl) = 1;
+
   TREE_USED (decl) = 1;
 
   if (current_function_decl)
diff --git a/gcc/testsuite/g++.dg/pr66639.C b/gcc/testsuite/g++.dg/pr66639.C
new file mode 100644
index 000..51a92f9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr66639.C
@@ -0,0 +1,19 @@
+// PR c++/66639
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+constexpr bool
+streq(char const *lhs, char const *rhs)
+{
+  return *lhs && *rhs
+ ? *lhs == *rhs && streq(lhs + 1, rhs + 1)
+ : !*lhs && !*rhs;
+}
+
+int
+main()
+{
+   static_assert (streq(__func__, "main"), "");
+   static_assert (streq(__FUNCTION__, "main"), "");
+   static_assert (streq(__PRETTY_FUNCTION__, "int main()"), "");
+}
-- 
Franklin "Snaipe" Mathieu
Arista Networks, Ltd



[PATCH v1] cxx: Make __func__, __FUNCTION__, and __PRETTY_FUNCTION__ constexpr.

2017-06-28 Thread Snaipe
From: Snaipe 

This patch makes the forementioned definitions `contexpr` when
compiling C++11 and above with GNU extensions.

gcc/cp/ChangeLog:
2017-06-27  Franklin “Snaipe” Mathieu  

PR c++/66639
* decl.c (cp_make_fname_decl): Make declaration constexpr.

gcc/testsuite/ChangeLog:
2017-06-27  Franklin “Snaipe” Mathieu  

PR c++/66639
* g++.dg/pr66639.c: New test.
---
 gcc/cp/decl.c  |  5 +
 gcc/testsuite/g++.dg/pr66639.C | 19 +++
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr66639.C

diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 8e9a466..740ab71 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -4384,6 +4384,11 @@ cp_make_fname_decl (location_t loc, tree id, int 
type_dep)
   TREE_READONLY (decl) = 1;
   DECL_ARTIFICIAL (decl) = 1;
 
+  /* extension: declare __func__, __FUNCTION__, and __PRETTY_FUNCTION__ as
+ constexpr.  */
+  if (!flag_iso && cxx_dialect >= cxx11)
+DECL_DECLARED_CONSTEXPR_P (decl) = 1;
+
   TREE_USED (decl) = 1;
 
   if (current_function_decl)
diff --git a/gcc/testsuite/g++.dg/pr66639.C b/gcc/testsuite/g++.dg/pr66639.C
new file mode 100644
index 000..51a92f9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr66639.C
@@ -0,0 +1,19 @@
+// PR c++/66639
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+constexpr bool
+streq(char const *lhs, char const *rhs)
+{
+  return *lhs && *rhs
+ ? *lhs == *rhs && streq(lhs + 1, rhs + 1)
+ : !*lhs && !*rhs;
+}
+
+int
+main()
+{
+   static_assert (streq(__func__, "main"), "");
+   static_assert (streq(__FUNCTION__, "main"), "");
+   static_assert (streq(__PRETTY_FUNCTION__, "int main()"), "");
+}
-- 
Franklin "Snaipe" Mathieu
Arista Networks, Ltd



Re: [PATCH v1] cxx: Make __func__, __FUNCTION__, and __PRETTY_FUNCTION__ constexpr.

2017-06-28 Thread Franklin Mathieu
Sorry about that, please ignore.

On Wed, Jun 28, 2017 at 1:52 PM, Franklin Snaipe Mathieu
 wrote:
> From: "Franklin \"Snaipe\" Mathieu" 
>
> This patch makes the forementioned definitions `contexpr` when
> compiling C++11 and above with GNU extensions.
>
> gcc/cp/ChangeLog:
> 2017-06-27  Franklin “Snaipe” Mathieu  
>
> PR c++/66639
> * decl.c (cp_make_fname_decl): Make declaration constexpr.
>
> gcc/testsuite/ChangeLog:
> 2017-06-27  Franklin “Snaipe” Mathieu  
>
> PR c++/66639
> * g++.dg/pr66639.c: New test.
> ---
>  gcc/cp/decl.c  |  5 +
>  gcc/testsuite/g++.dg/pr66639.C | 19 +++
>  2 files changed, 24 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/pr66639.C
>
> diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
> index 8e9a466..740ab71 100644
> --- a/gcc/cp/decl.c
> +++ b/gcc/cp/decl.c
> @@ -4384,6 +4384,11 @@ cp_make_fname_decl (location_t loc, tree id, int 
> type_dep)
>TREE_READONLY (decl) = 1;
>DECL_ARTIFICIAL (decl) = 1;
>
> +  /* extension: declare __func__, __FUNCTION__, and __PRETTY_FUNCTION__ as
> + constexpr.  */
> +  if (!flag_iso && cxx_dialect >= cxx11)
> +DECL_DECLARED_CONSTEXPR_P (decl) = 1;
> +
>TREE_USED (decl) = 1;
>
>if (current_function_decl)
> diff --git a/gcc/testsuite/g++.dg/pr66639.C b/gcc/testsuite/g++.dg/pr66639.C
> new file mode 100644
> index 000..51a92f9
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr66639.C
> @@ -0,0 +1,19 @@
> +// PR c++/66639
> +// { dg-do compile { target c++11 } }
> +// { dg-options "" }
> +
> +constexpr bool
> +streq(char const *lhs, char const *rhs)
> +{
> +  return *lhs && *rhs
> + ? *lhs == *rhs && streq(lhs + 1, rhs + 1)
> + : !*lhs && !*rhs;
> +}
> +
> +int
> +main()
> +{
> +   static_assert (streq(__func__, "main"), "");
> +   static_assert (streq(__FUNCTION__, "main"), "");
> +   static_assert (streq(__PRETTY_FUNCTION__, "int main()"), "");
> +}
> --
> Franklin "Snaipe" Mathieu
> Arista Networks, Ltd
>



-- 
Franklin "Snaipe" Mathieu


Re: [PATCH v1] cxx: Make __func__, __FUNCTION__, and __PRETTY_FUNCTION__ constexpr.

2017-06-28 Thread Franklin Mathieu
Sorry about that (--dry-run fail), please ignore.

On Wed, Jun 28, 2017 at 1:53 PM, Snaipe  wrote:
> From: Snaipe 
>
> This patch makes the forementioned definitions `contexpr` when
> compiling C++11 and above with GNU extensions.
>
> gcc/cp/ChangeLog:
> 2017-06-27  Franklin “Snaipe” Mathieu  
>
> PR c++/66639
> * decl.c (cp_make_fname_decl): Make declaration constexpr.
>
> gcc/testsuite/ChangeLog:
> 2017-06-27  Franklin “Snaipe” Mathieu  
>
> PR c++/66639
> * g++.dg/pr66639.c: New test.
> ---
>  gcc/cp/decl.c  |  5 +
>  gcc/testsuite/g++.dg/pr66639.C | 19 +++
>  2 files changed, 24 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/pr66639.C
>
> diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
> index 8e9a466..740ab71 100644
> --- a/gcc/cp/decl.c
> +++ b/gcc/cp/decl.c
> @@ -4384,6 +4384,11 @@ cp_make_fname_decl (location_t loc, tree id, int 
> type_dep)
>TREE_READONLY (decl) = 1;
>DECL_ARTIFICIAL (decl) = 1;
>
> +  /* extension: declare __func__, __FUNCTION__, and __PRETTY_FUNCTION__ as
> + constexpr.  */
> +  if (!flag_iso && cxx_dialect >= cxx11)
> +DECL_DECLARED_CONSTEXPR_P (decl) = 1;
> +
>TREE_USED (decl) = 1;
>
>if (current_function_decl)
> diff --git a/gcc/testsuite/g++.dg/pr66639.C b/gcc/testsuite/g++.dg/pr66639.C
> new file mode 100644
> index 000..51a92f9
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr66639.C
> @@ -0,0 +1,19 @@
> +// PR c++/66639
> +// { dg-do compile { target c++11 } }
> +// { dg-options "" }
> +
> +constexpr bool
> +streq(char const *lhs, char const *rhs)
> +{
> +  return *lhs && *rhs
> + ? *lhs == *rhs && streq(lhs + 1, rhs + 1)
> + : !*lhs && !*rhs;
> +}
> +
> +int
> +main()
> +{
> +   static_assert (streq(__func__, "main"), "");
> +   static_assert (streq(__FUNCTION__, "main"), "");
> +   static_assert (streq(__PRETTY_FUNCTION__, "int main()"), "");
> +}
> --
> Franklin "Snaipe" Mathieu
> Arista Networks, Ltd
>



-- 
Franklin "Snaipe" Mathieu


[PATCH] More vectorizer reduction TLC

2017-06-28 Thread Richard Biener

Upcoming refactoring likes the cond reduction special IV more in the
epilogue generation code, thus moved there.  Some more TLC as well.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2017-06-28  Richard Biener  

* tree-vect-loop.c (vectorizable_reduction): Move special
cond reduction IV var creation ...
(vect_create_epilog_for_reduction): ... here.  Remove induction_index
parameter.  Use STMT_VINFO_VECTYPE.
* tree-vect-slp.c (vect_get_constant_vectors): Properly reset
constant_p.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 249735)
+++ gcc/tree-vect-loop.c(working copy)
@@ -4263,8 +4263,6 @@ get_initial_defs_for_reduction (slp_tree
DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
SLP_NODE is an SLP node containing a group of reduction statements. The 
  first one in this group is STMT.
-   INDUCTION_INDEX is the index of the loop for condition reductions.
- Otherwise it is undefined.
 
This function:
1. Creates the reduction def-use cycles: sets the arguments for 
@@ -4310,7 +4308,7 @@ vect_create_epilog_for_reduction (vec reduction_phis,
   int reduc_index, bool double_reduc, 
- slp_tree slp_node, tree induction_index)
+ slp_tree slp_node)
 {
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   stmt_vec_info prev_phi_info;
@@ -4331,7 +4329,7 @@ vect_create_epilog_for_reduction (vecheader);
+  set_vinfo_for_stmt (new_phi,
+ new_stmt_vec_info (new_phi, loop_vinfo));
+  add_phi_arg (as_a  (new_phi), vec_zero,
+  loop_preheader_edge (loop), UNKNOWN_LOCATION);
+
+  /* Now take the condition from the loops original cond_expr
+(VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
+every match uses values from the induction variable
+(INDEX_BEFORE_INCR) otherwise uses values from the phi node
+(NEW_PHI_TREE).
+Finally, we update the phi (NEW_PHI_TREE) to take the value of
+the new cond_expr (INDEX_COND_EXPR).  */
+
+  /* Duplicate the condition from vec_stmt.  */
+  tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
+
+  /* Create a conditional, where the condition is taken from vec_stmt
+(CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
+else is the phi (NEW_PHI_TREE).  */
+  tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
+ccompare, indx_before_incr,
+new_phi_tree);
+  induction_index = make_ssa_name (cr_index_vector_type);
+  gimple *index_condition = gimple_build_assign (induction_index,
+index_cond_expr);
+  gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
+  stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition,
+   loop_vinfo);
+  STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
+  set_vinfo_for_stmt (index_condition, index_vec_info);
+
+  /* Update the phi with the vec cond.  */
+  add_phi_arg (as_a  (new_phi), induction_index,
+  loop_latch_edge (loop), UNKNOWN_LOCATION);
+}
+
   /* 2. Create epilog code.
 The reduction epilog code operates across the elements of the vector
 of partial results computed by the vectorized loop.
@@ -6248,100 +6335,14 @@ vectorizable_reduction (gimple *stmt, gi
   prev_phi_info = vinfo_for_stmt (new_phi);
 }
 
-  tree indx_before_incr, indx_after_incr, cond_name = NULL;
-
   /* Finalize the reduction-phi (set its arguments) and create the
  epilog reduction code.  */
   if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
-{
-  new_temp = gimple_assign_lhs (*vec_stmt);
-  vect_defs[0] = new_temp;
-
-  /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
-which is updated with the current index of the loop for every match of
-the original loop's cond_expr (VEC_STMT).  This results in a vector
-containing the last time the condition passed for that vector lane.
-The first match will be a 1 to allow 0 to be used for non-matching
-indexes.  If there are no matches at all then the vector will be all
-zeroes.  */
-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
-   {
- int nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- int k;
-
- gcc_assert (gimple_assign_rhs_code (*vec_stmt) == VEC_COND_EXPR);
-
- /* First we create a simple vector induction variable which starts
-with the values {1,2,3,...} (SERIES_VECT) and increme

Re: [PATCH][AArch64] Improve Cortex-A53 shift bypass

2017-06-28 Thread Ramana Radhakrishnan

On 6/28/17 1:49 PM, Wilco Dijkstra wrote:

Ramana Radhakrishnan wrote:
 
I'm about to run home for the day but this came in from

https://gcc.gnu.org/ml/gcc-patches/2013-09/msg02109.html and James
said in that email that this was put in to ensure no segfaults on
cortex-a15 / cortex-a7 tuning.


The code is historical - an older version didn't specifically look just for
shifts. Given we can only return shift rtx's now it's completely redundant.
A bootstrap on armhf with --with-cpu=cortex-a15 passed.



In which case - ok . thanks,

regards
Ramana


Wilco
 





Re: [PATCH v1] cxx: Make __func__, __FUNCTION__, and __PRETTY_FUNCTION__ constexpr.

2017-06-28 Thread Franklin Mathieu
Sorry about the two other failed attempts. I got confused about the
output of send-email and ended up sending two follow-up bogus emails.

This is the right email chain.

On Wed, Jun 28, 2017 at 1:49 PM, Franklin “Snaipe” Mathieu
 wrote:
> From: Franklin “Snaipe” Mathieu 
>
> This patch makes the forementioned definitions `contexpr` when
> compiling C++11 and above with GNU extensions.
>
> gcc/cp/ChangeLog:
> 2017-06-27  Franklin “Snaipe” Mathieu  
>
> PR c++/66639
> * decl.c (cp_make_fname_decl): Make declaration constexpr.
>
> gcc/testsuite/ChangeLog:
> 2017-06-27  Franklin “Snaipe” Mathieu  
>
> PR c++/66639
> * g++.dg/pr66639.c: New test.
> ---
>  gcc/cp/decl.c  |  5 +
>  gcc/testsuite/g++.dg/pr66639.C | 19 +++
>  2 files changed, 24 insertions(+)
>  create mode 100644 gcc/testsuite/g++.dg/pr66639.C
>
> diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
> index 8e9a466..740ab71 100644
> --- a/gcc/cp/decl.c
> +++ b/gcc/cp/decl.c
> @@ -4384,6 +4384,11 @@ cp_make_fname_decl (location_t loc, tree id, int 
> type_dep)
>TREE_READONLY (decl) = 1;
>DECL_ARTIFICIAL (decl) = 1;
>
> +  /* extension: declare __func__, __FUNCTION__, and __PRETTY_FUNCTION__ as
> + constexpr.  */
> +  if (!flag_iso && cxx_dialect >= cxx11)
> +DECL_DECLARED_CONSTEXPR_P (decl) = 1;
> +
>TREE_USED (decl) = 1;
>
>if (current_function_decl)
> diff --git a/gcc/testsuite/g++.dg/pr66639.C b/gcc/testsuite/g++.dg/pr66639.C
> new file mode 100644
> index 000..51a92f9
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/pr66639.C
> @@ -0,0 +1,19 @@
> +// PR c++/66639
> +// { dg-do compile { target c++11 } }
> +// { dg-options "" }
> +
> +constexpr bool
> +streq(char const *lhs, char const *rhs)
> +{
> +  return *lhs && *rhs
> + ? *lhs == *rhs && streq(lhs + 1, rhs + 1)
> + : !*lhs && !*rhs;
> +}
> +
> +int
> +main()
> +{
> +   static_assert (streq(__func__, "main"), "");
> +   static_assert (streq(__FUNCTION__, "main"), "");
> +   static_assert (streq(__PRETTY_FUNCTION__, "int main()"), "");
> +}
> --
> Franklin "Snaipe" Mathieu
> Arista Networks, Ltd
>



-- 
Franklin "Snaipe" Mathieu


Re: [PATCH GCC][13/13]Distribute loop with loop versioning under runtime alias check

2017-06-28 Thread Bin.Cheng
On Wed, Jun 28, 2017 at 1:29 PM, Richard Biener
 wrote:
> On Wed, Jun 28, 2017 at 1:46 PM, Bin.Cheng  wrote:
>> On Wed, Jun 28, 2017 at 11:58 AM, Richard Biener
>>  wrote:
>>> On Tue, Jun 27, 2017 at 4:07 PM, Bin.Cheng  wrote:
 On Tue, Jun 27, 2017 at 1:44 PM, Richard Biener
  wrote:
> On Fri, Jun 23, 2017 at 12:30 PM, Bin.Cheng  wrote:
>> On Tue, Jun 20, 2017 at 10:22 AM, Bin.Cheng  
>> wrote:
>>> On Mon, Jun 12, 2017 at 6:03 PM, Bin Cheng  wrote:
 Hi,
>> Rebased V3 for changes in previous patches.  Bootstap and test on
>> x86_64 and aarch64.
>
> why is ldist-12.c no longer distributed?  your comment says it doesn't 
> expose
> more "parallelism" but the point is to reduce memory bandwith requirements
> which it clearly does.
>
> Likewise for -13.c, -14.c.  -4.c may be a questionable case but the 
> wording
> of "parallelism" still confuses me.
>
> Can you elaborate on that.  Now onto the patch:
 Given we don't model data locality or memory bandwidth, whether
 distribution enables loops that can be executed paralleled becomes the
 major criteria for distribution.  BTW, I think a good memory stream
 optimization model shouldn't consider small loops as in ldist-12.c,
 etc., appropriate for distribution.
>>>
>>> True.  But what means "parallel" here?  ldist-13.c if partitioned into two 
>>> loops
>>> can be executed "in parallel"
>> So if a loop by itself can be vectorized (or so called can be executed
>> paralleled), we tend to no distribute it into small ones.  But there
>> is one exception here, if the distributed small loops are recognized
>> as builtin functions, we still distribute it.  I assume it's generally
>> better to call builtin memory functions than vectorize it by GCC?
>
> Yes.
>
>>>
>
> +   Loop distribution is the dual of loop fusion.  It separates statements
> +   of a loop (or loop nest) into multiple loops (or loop nests) with the
> +   same loop header.  The major goal is to separate statements which may
> +   be vectorized from those that can't.  This pass implements 
> distribution
> +   in the following steps:
>
> misses the goal of being a memory stream optimization, not only a 
> vectorization
> enabler.  distributing a loop can also reduce register pressure.
 I will revise the comment, but as explained, enabling more
 vectorization is the major criteria for distribution to some extend
 now.
>>>
>>> Yes, I agree -- originally it was written to optimize the stream benchmark 
>>> IIRC.
>> Let's see if any performance drop will be reported against this patch.
>> Let's see if we can create a cost model for it.
>
> Fine.
I will run some benchmarks to see if there is breakage.
>
>>>
>
> You introduce ldist_alias_id in struct loop (probably in 01/n which I
> didn't look
> into yet).  If you don't use that please introduce it separately.
 Hmm, yes it is introduced in patch [01/n] and set in this patch.

>
> + /* Be conservative.  If data references are not well 
> analyzed,
> +or the two data references have the same base address and
> +offset, add dependence and consider it alias to each 
> other.
> +In other words, the dependence can not be resolved by
> +runtime alias check.  */
> + if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
> + || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
> + || !DR_INIT (dr1) || !DR_INIT (dr2)
> + || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
> + || !DR_STEP (dr2) || !tree_fits_uhwi_p (DR_STEP (dr2))
> + || res == 0)
>
> ISTR a helper that computes whether we can handle a runtime alias check 
> for
> a specific case?
 I guess you mean runtime_alias_check_p that I factored out previously?
  Unfortunately, it's factored out vectorizer's usage and doesn't fit
 here straightforwardly.  Shall I try to further generalize the
 interface as independence patch to this one?
>>>
>>> That would be nice.
>>>
>
> +  /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS.  */
> +  if (flag_tree_loop_vectorize)
> +{
>
> so at this point I'd condition the whole runtime-alias check generating
> on flag_tree_loop_vectorize.  You seem to support versioning w/o
> that here but in other places disable versioning w/o 
> flag_tree_loop_vectorize.
> That looks somewhat inconsistent...
 It is a bit complicated.  In function version_for_distribution_p, we have
 +
 +  /* Need to version loop if runtime alias check is necessary.  */
 +  if (alias_ddrs->length () > 0)
 +return true;
 +
 +  /* Don't version the loop with call to IFN_LOOP_DIST_ALIAS if
 + vectorizer is not en

[PATCH] Make inlining consistent in LTO and non-LTO mode (PR target/71991).

2017-06-28 Thread Martin Liška
Hi.

Following patch makes non-LTO and LTO mode to behave same.
Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
The test-case works on x86_64-linux-gnu.

Ready to be installed?
Martin

gcc/testsuite/ChangeLog:

2017-06-28  Martin Liska  

PR target/71991
* gcc.dg/torture/pr71991.c: New test.

gcc/ChangeLog:

2017-06-28  Martin Liska  

PR target/71991
* config/i386/i386.c (ix86_can_inline_p): Make inlining consistent
in LTO and non-LTO mode.
---
 gcc/config/i386/i386.c |  5 +++--
 gcc/testsuite/gcc.dg/torture/pr71991.c | 12 
 2 files changed, 15 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/torture/pr71991.c


diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 5e8d0ff0e3d..090b93aecfe 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -7438,8 +7438,9 @@ ix86_can_inline_p (tree caller, tree callee)
   tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
   tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
 
-  /* If callee has no option attributes, then it is ok to inline.  */
-  if (!callee_tree)
+  /* If callee has no option attributes (or default),
+ then it is ok to inline.  */
+  if (!callee_tree || callee_tree == target_option_default_node)
 ret = true;
 
   /* If caller has no option attributes, but callee does then it is not ok to
diff --git a/gcc/testsuite/gcc.dg/torture/pr71991.c b/gcc/testsuite/gcc.dg/torture/pr71991.c
new file mode 100644
index 000..79c927f6844
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr71991.c
@@ -0,0 +1,12 @@
+/* PR target/71991 */
+
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-std=c99" } */
+
+static inline int __attribute__ ((__always_inline__)) fn1 () { return 0; }
+static inline int __attribute__ ((target("inline-all-stringops"))) fn2 () { return fn1 (); }
+
+int main()
+{
+  return fn2();
+}



Re: [PATCH] Call BUILT_IN_ASAN_HANDLE_NO_RETURN before BUILT_IN_UNWIND_RESUME (PR sanitizer/81021).

2017-06-28 Thread Martin Liška
PING^2

On 06/20/2017 02:15 PM, Martin Liška wrote:
> PING^1
> 
> On 06/13/2017 10:09 AM, Martin Liška wrote:
>> Hi.
>>
>> For a function that does not handle an expection (and calls 
>> BUILT_IN_UNWIND_RESUME),
>> we need to emit call to BUILT_IN_ASAN_HANDLE_NO_RETURN. That will clean up 
>> stack
>> which can possibly contain poisoned shadow memory that will not be cleaned-up
>> in function prologue.
>>
>> Patch can bootstrap on ppc64le-redhat-linux and survives regression tests.
>>
>> Ready to be installed?
>> Martin
>>
>> gcc/testsuite/ChangeLog:
>>
>> 2017-06-12  Martin Liska  
>>
>>  PR sanitizer/81021
>>  * g++.dg/asan/pr81021.C: New test.
>>
>> gcc/ChangeLog:
>>
>> 2017-06-12  Martin Liska  
>>
>>  PR sanitizer/81021
>>  * tree-eh.c (lower_resx): Call BUILT_IN_ASAN_HANDLE_NO_RETURN
>>  before BUILT_IN_UNWIND_RESUME when ASAN is used.
>> ---
>>  gcc/testsuite/g++.dg/asan/pr81021.C | 33 +
>>  gcc/tree-eh.c   | 14 ++
>>  2 files changed, 47 insertions(+)
>>  create mode 100644 gcc/testsuite/g++.dg/asan/pr81021.C
>>
>>
> 



Re: [PATCH] ASAN: handle addressable params (PR sanitize/81040).

2017-06-28 Thread Martin Liška
PING^1

On 06/20/2017 03:06 PM, Martin Liška wrote:
> On 06/20/2017 11:32 AM, Jakub Jelinek wrote:
>> On Tue, Jun 20, 2017 at 11:23:36AM +0200, Martin Liška wrote:
 Then something needs to be done for debugging too.  If it is without VTA,
 then probably just having DECL_VALUE_EXPR is good enough, otherwise
 (VTA) you probably don't want that (or can reset it at that point), but
 instead emit after the initialization stmt a debug stmt that the variable
 value now lives in a different var.  Though ideally we want the debugger
 to be able to also change the value of the var, that might be harder.
 With DECL_VALUE_EXPR on the other side the debug info will be incorrect in
 the prologue until it is assigned to the slot.
>>>
>>> Here I'm not sure about how to distinguish whether to build or not to build
>>> the debug statement. According to flag_var_tracking?
>>
>> More like if (target_for_debug_bind (arg))
>> And if that is false, just make sure DECL_VALUE_EXPR is set to var.
>>
>>> You mean something like:
>>> g = gimple_build_debug_bind (arg, var, g);
>>> ?
>>
>> Well, there is no stmt, so the last argument would be just NULL.
>>
 I don't understand the distinction.  If you turn the original parm
 for complex/vector DECL_GIMPLE_REG_P, you should need the exact same code
 (but I think it would be better to use the default SSA_NAME of the 
 PARM_DECL
 if it is a gimple reg type, rather than use the PARM_DECL itself
 and wait for update_ssa).
>>>
>>> Yes, the test-case /gcc/testsuite/g++.dg/asan/function-argument-3.C fails 
>>> for me
>>> as one needs to have a temporary SSA name, otherwise:
>>>
>>> /home/marxin/Programming/gcc/gcc/testsuite/g++.dg/asan/function-argument-3.C:13:1:
>>>  error: invalid rhs for gimple memory store
>>>  foo (v4si arg)
>>>  ^~~
>>> arg
>>>
>>> arg
>>>
>>> # .MEM_4 = VDEF <.MEM_1(D)>
>>> arg = arg;
>>> during GIMPLE pass: sanopt
>>>
>>> If I see correctly the function in my test-case does not have default def 
>>> SSA name for the parameter.
>>> Thus I guess I need to create a SSA name?
>>
>> I'd expect if you have DECL_GIMPLE_REG_P set on the PARM_DECL and
>> use the default def, you shouldn't run into this.
>>
>>  Jakub
>>
> 
> Good I fixed that in v2, that passes regression tests.
> Ale objections should be resolved in the version.
> 
> Ready for trunk?
> Martin
> 



Re: [PATCH][AArch64] Improve Cortex-A53 shift bypass

2017-06-28 Thread James Greenhalgh
On Wed, Jun 28, 2017 at 01:49:26PM +0100, Wilco Dijkstra wrote:
> Ramana Radhakrishnan wrote:
> >    
> > I'm about to run home for the day but this came in from
> > https://gcc.gnu.org/ml/gcc-patches/2013-09/msg02109.html and James
> > said in that email that this was put in to ensure no segfaults on
> > cortex-a15 / cortex-a7 tuning.
> 
> The code is historical - an older version didn't specifically look just for 
> shifts. Given we can only return shift rtx's now it's completely redundant.
> A bootstrap on armhf with --with-cpu=cortex-a15 passed.

The final commit was:

  https://gcc.gnu.org/ml/gcc-patches/2013-11/msg00533.html

That ends up being a more involved rewrite of the code in
arm_no_early_alu_shift_dep .

The check Wilco deletes was in their before the clean-up. Previosuly we
were manually walking the RTX for the consumer, assuming that after
stripping COND_EXEC and PARALLEL we'd have either:

  OP = (some_shift (register) (*))

or

  OP = (some_arithmetic_code (some_shift (register) (*)) (*))

That is, we're either looking at a shift, or an arithmetic operation that
contains a shift.

The early_op = XEXP (op, 0); would give you either:

  EARLY_OP = (regiser)

or

  EARLY_OP = (some_shift (register) (*))

But, we're interested in getting to the whole shift. So, we check if we
are now looking at a register, and if we are, use op instead:

 if (REG_P (early_op))
   early_op = op;

So we end up with either:

  EARLY_OP = (some_shift (register) (*))

or

  EARLY_OP = (some_shift (register) (*))

Which is exactly what we wanted.

After the refactoring, arm_find_shift_sub_rtx will always return you
(some_shift (*) (*)) - that's good we're now more resilient, and always
operate on a full shift, but it does mean the check for REG_P can never
match. I wrote this a fair while ago, but it looks like a simple oversight.

So, this code is safely dead and can be cleaned up as Wilco suggests with
no issue.

Hope that helps!

Thanks,
James



[v2] PR81136: ICE from inconsistent DR_MISALIGNMENTs

2017-06-28 Thread Richard Sandiford
Richard Biener  writes:
> On Mon, Jun 26, 2017 at 1:50 PM, Richard Sandiford
>  wrote:
>> Richard Biener  writes:
>>> On Mon, Jun 26, 2017 at 1:14 PM, Richard Sandiford
>>>  wrote:
 I don't think the problem is the lack of a cap.  In the test case we
 see that:

 1. B is known at compile time to be X * vecsize + Y when considered in
isolation, because the base alignment derived from its DR_REF >= 
 vecsize.
So DR_MISALIGNMENT (B) == Y.

 2. A's misalignment wrt vecsize is not known at compile time when
considered in isolation, because no useful base alignment can be
derived from its DR_REF.  (The DR_REF is to a plain int rather than
to a structure with a high alignment.)  So DR_MISALIGNMENT (A) == -1.

 3. A and B when considered as a pair trivially have the same misalignment
wrt vecsize, for the reasons above.

 Each of these results is individually correct.  The problem is that the
 assert is conflating two things: it's saying that if we know two datarefs
 have the same misaligment, we must either be able to calculate a
 compile-time misalignment for both datarefs in isolation, or we must
 fail to calculate a compile-time misalignment for both datarefs in
 isolation.  That isn't true: it's valid to have situations in which the
 compile-time misalignment is known for one dataref in isolation but not
 for the other.
>>>
>>> True.  So the assert should then become
>>>
>>>   gcc_assert (! known_alignment_for_access_p (dr)
>>>   || DR_MISALIGNMENT (dr) / dr_size ==
>>> DR_MISALIGNMENT (dr_peel) / dr_peel_size);
>>>
>>> ?
>>
>> I think it would need to be:
>>
>>   gcc_assert (!known_alignment_for_access_p (dr)
>>   || !known_alignment_for_access_p (dr_peel)
>>   || (DR_MISALIGNMENT (dr) / dr_size
>>   == DR_MISALIGNMENT (dr_peel) / dr_peel_size));
>
> I think for !known_alignment_for_access_p (dr_peel) the assert doesn't make
> any sense (DR_MISALIGNMENT is -1), so yes, you are right.
>
>> But yeah, that would work too.  The idea with the assert in the patch was
>> that for unconditional references we probably *do* want to try to compute
>> the same compile-time misalignment, but for optimisation reasons rather
>> than correctness.  Maybe that's more properly a gcc_checking_assert
>> though, since nothing goes wrong if it fails.  So perhaps:
>
> We shouldn't have asserts for optimization reasons, even with checking
> IMHO.

OK.

>>  gcc_checking_assert (DR_IS_CONDITIONAL_IN_STMT (dr)
>>   || DR_IS_CONDITIONAL_IN_STMT (dr_peel)
>>   || (known_alignment_for_access_p (dr)
>>   == known_alignment_for_access_p (dr_peel)));
>>
>> as a follow-on assert.
>>
>> Should I split it into two patches, one to change the gcc_assert and
>> another to add the optimisation?
>
> Yes please.

Here's the patch to relax the assert.  I'll post the rest in a new thread.

Tested as before.  OK to install?

Thanks,
Richard


2017-06-28  Richard Sandiford  

gcc/
PR tree-optimization/81136
* tree-vect-data-refs.c (vect_update_misalignment_for_peel): Only
assert that two references with the same misalignment have the same
compile-time misalignment if those compile-time misalignments
are known.

gcc/testsuite/
PR tree-optimization/81136
* gcc.dg/vect/pr81136.c: New test.

Index: gcc/tree-vect-data-refs.c
===
--- gcc/tree-vect-data-refs.c   2017-06-26 19:41:19.549571836 +0100
+++ gcc/tree-vect-data-refs.c   2017-06-28 14:25:58.811888377 +0100
@@ -906,8 +906,10 @@ vect_update_misalignment_for_peel (struc
 {
   if (current_dr != dr)
 continue;
-  gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
-  DR_MISALIGNMENT (dr_peel) / dr_peel_size);
+  gcc_assert (!known_alignment_for_access_p (dr)
+ || !known_alignment_for_access_p (dr_peel)
+ || (DR_MISALIGNMENT (dr) / dr_size
+ == DR_MISALIGNMENT (dr_peel) / dr_peel_size));
   SET_DR_MISALIGNMENT (dr, 0);
   return;
 }
Index: gcc/testsuite/gcc.dg/vect/pr81136.c
===
--- /dev/null   2017-06-28 07:28:02.991792729 +0100
+++ gcc/testsuite/gcc.dg/vect/pr81136.c 2017-06-28 14:25:58.810888422 +0100
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+
+struct __attribute__((aligned (32)))
+{
+  char misaligner;
+  int foo[100];
+  int bar[100];
+} *a;
+
+void
+fn1 (int n)
+{
+  int *b = a->foo;
+  for (int i = 0; i < n; i++)
+a->bar[i] = b[i];
+}


Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Richard Sandiford
dr_analyze_innermost had a "struct loop *nest" parameter that acted
like a boolean.  This was added in r179161, with the idea that a
null nest selected BB-level analysis rather than loop analysis.

The handling seemed strange though.  If the DR was part of a loop,
we still tried to express the base and offset values as IVs, potentially
giving a nonzero step.  If that failed for any reason, we'd revert to
using the original base and offset, just as we would if we hadn't asked
for an IV in the first place.

It seems more natural to use the !in_loop handling whenever nest is null
and always set the step to zero.  This actually enables one more SLP
opportunity in bb-slp-pr65935.c.

I checked out r179161 and tried the patch there.  The test case added
in that revision still passes, so I don't think there was any particular
need to check simple_iv.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-06-28  Richard Sandiford  

gcc/
* tree-data-ref.c (dr_analyze_innermost): Replace the "nest"
parameter with a "loop" parameter and use it instead of the
loop containing DR_STMT.  Don't check simple_iv when doing
BB analysis.  Describe the two analysis modes in the comment.

gcc/testsuite/
* gcc.dg/vect/bb-slp-pr65935.c: Expect SLP to be used in main
as well.

Index: gcc/tree-data-ref.c
===
--- gcc/tree-data-ref.c 2017-06-28 14:33:41.294720044 +0100
+++ gcc/tree-data-ref.c 2017-06-28 14:35:30.475155670 +0100
@@ -749,15 +749,29 @@ canonicalize_base_object_address (tree a
   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 }
 
-/* Analyzes the behavior of the memory reference DR in the innermost loop or
-   basic block that contains it.  Returns true if analysis succeed or false
-   otherwise.  */
+/* Analyze the behavior of memory reference DR.  There are two modes:
+
+   - BB analysis.  In this case we simply split the address into base,
+ init and offset components, without reference to any containing loop.
+ The resulting base and offset are general expressions and they can
+ vary arbitrarily from one iteration of the containing loop to the next.
+ The step is always zero.
+
+   - loop analysis.  In this case we analyze the reference both wrt LOOP
+ and on the basis that the reference occurs (is "used") in LOOP;
+ see the comment above analyze_scalar_evolution_in_loop for more
+ information about this distinction.  The base, init, offset and
+ step fields are all invariant in LOOP.
+
+   Perform BB analysis if LOOP is null, or if LOOP is the function's
+   dummy outermost loop.  In other cases perform loop analysis.
+
+   Return true if the analysis succeeded and store the results in DR if so.
+   BB analysis can only fail for bitfield or reversed-storage accesses.  */
 
 bool
-dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
+dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
 {
-  gimple *stmt = DR_STMT (dr);
-  struct loop *loop = loop_containing_stmt (stmt);
   tree ref = DR_REF (dr);
   HOST_WIDE_INT pbitsize, pbitpos;
   tree base, poffset;
@@ -806,22 +820,11 @@ dr_analyze_innermost (struct data_refere
 
   if (in_loop)
 {
-  if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv,
-  nest ? true : false))
+  if (!simple_iv (loop, loop, base, &base_iv, true))
 {
-  if (nest)
-{
-  if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "failed: evolution of base is not"
-" affine.\n");
-  return false;
-}
-  else
-{
-  base_iv.base = base;
-  base_iv.step = ssize_int (0);
-  base_iv.no_overflow = true;
-}
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "failed: evolution of base is not affine.\n");
+ return false;
 }
 }
   else
@@ -843,22 +846,11 @@ dr_analyze_innermost (struct data_refere
   offset_iv.base = poffset;
   offset_iv.step = ssize_int (0);
 }
-  else if (!simple_iv (loop, loop_containing_stmt (stmt),
-   poffset, &offset_iv,
-  nest ? true : false))
+  else if (!simple_iv (loop, loop, poffset, &offset_iv, true))
 {
-  if (nest)
-{
-  if (dump_file && (dump_flags & TDF_DETAILS))
-fprintf (dump_file, "failed: evolution of offset is not"
-" affine.\n");
-  return false;
-}
-  else
-{
-  offset_iv.base = poffset;
-  offset_iv.step = ssize_int (0);
-}
+ if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "failed: evolut

Add DR_BASE_ALIGNMENT

2017-06-28 Thread Richard Sandiford
This patch records the base alignment in data_reference, to avoid the
second-guessing that was previously done in vect_compute_data_ref_alignment.
It also makes vect_analyze_data_refs use dr_analyze_innermost, instead
of having an almost-copy of the same code.

I'd originally tried to do the second part as a standalone patch,
but on its own it caused us to miscompute the base alignment (due to
the second-guessing not quite working).  This was previously latent
because the old code set STMT_VINFO_DR_ALIGNED_TO to a byte value,
whereas it should have been bits.

After the previous patch, the only thing that dr_analyze_innermost
read from the dr was the DR_REF.  I thought it would be better to pass
that in and make data_reference write-only.  This means that callers
like vect_analyze_data_refs don't have to set any fields first
(and thus not worry about *which* fields they should set).

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-06-28  Richard Sandiford  

gcc/
* tree-data-ref.h (data_reference): Add a base_alignment field.
(DR_BASE_ALIGNMENT): New macro.
(dr_analyze_innermost): Add a tree argument.
* tree-data-ref.c: Include builtins.h.
(dr_analyze_innermost): Take the tree reference as argument.
Set up DR_BASE_ALIGNMENT.
(create_data_ref): Update call accordingly.
* tree-predcom.c (find_looparound_phi): Likewise.
* tree-vectorizer.h (_stmt_vec_info): Add dr_base_alignment.
(STMT_VINFO_DR_BASE_ALIGNMENT): New macro.
* tree-vect-data-refs.c: Include tree-cfg.h.
(vect_compute_data_ref_alignment): Use DR_BASE_ALIGNMENT instead
of calculating an alignment here.
(vect_analyze_data_refs): Use dr_analyze_innermost.  Record the
base alignment in STMT_VINFO_DR_BASE_ALIGNMENT.

Index: gcc/tree-data-ref.h
===
--- gcc/tree-data-ref.h 2017-06-26 19:41:19.549571836 +0100
+++ gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100
@@ -119,6 +119,10 @@ struct data_reference
   /* True when the data reference is in RHS of a stmt.  */
   bool is_read;
 
+  /* The alignment of INNERMOST.base_address, in bits.  This is logically
+ part of INNERMOST, but is kept here to avoid unnecessary padding.  */
+  unsigned int base_alignment;
+
   /* Behavior of the memory reference in the innermost loop.  */
   struct innermost_loop_behavior innermost;
 
@@ -139,6 +143,7 @@ #define DR_NUM_DIMENSIONS(DR)  DR_AC
 #define DR_IS_READ(DR) (DR)->is_read
 #define DR_IS_WRITE(DR)(!DR_IS_READ (DR))
 #define DR_BASE_ADDRESS(DR)(DR)->innermost.base_address
+#define DR_BASE_ALIGNMENT(DR)  (DR)->base_alignment
 #define DR_OFFSET(DR)  (DR)->innermost.offset
 #define DR_INIT(DR)(DR)->innermost.init
 #define DR_STEP(DR)(DR)->innermost.step
@@ -322,7 +327,7 @@ #define DDR_DIST_VECT(DDR, I) \
 #define DDR_REVERSED_P(DDR) (DDR)->reversed_p
 
 
-bool dr_analyze_innermost (struct data_reference *, struct loop *);
+bool dr_analyze_innermost (struct data_reference *, tree, struct loop *);
 extern bool compute_data_dependences_for_loop (struct loop *, bool,
   vec *,
   vec *,
Index: gcc/tree-data-ref.c
===
--- gcc/tree-data-ref.c 2017-06-28 14:26:12.946306736 +0100
+++ gcc/tree-data-ref.c 2017-06-28 14:26:19.651051322 +0100
@@ -94,6 +94,7 @@ Software Foundation; either version 3, o
 #include "dumpfile.h"
 #include "tree-affine.h"
 #include "params.h"
+#include "builtins.h"
 
 static struct datadep_stats
 {
@@ -749,7 +750,7 @@ canonicalize_base_object_address (tree a
   return build_fold_addr_expr (TREE_OPERAND (addr, 0));
 }
 
-/* Analyze the behavior of memory reference DR.  There are two modes:
+/* Analyze the behavior of memory reference REF.  There are two modes:
 
- BB analysis.  In this case we simply split the address into base,
  init and offset components, without reference to any containing loop.
@@ -767,12 +768,13 @@ canonicalize_base_object_address (tree a
dummy outermost loop.  In other cases perform loop analysis.
 
Return true if the analysis succeeded and store the results in DR if so.
-   BB analysis can only fail for bitfield or reversed-storage accesses.  */
+   BB analysis can only fail for bitfield or reversed-storage accesses.
+
+   Note that DR is purely an output; the contents on input don't matter.  */
 
 bool
-dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
+dr_analyze_innermost (struct data_reference *dr, tree ref, struct loop *loop)
 {
-  tree ref = DR_REF (dr);
   HOST_WIDE_INT pbitsize, pbitpos;
   tree base, poffset;
   machine_mode pmode;
@@ -802,6 +804,7 @@ dr_analyze_innermost (struct data_refere
   return false;
 }
 
+  unsigned

Pool alignment information for common bases

2017-06-28 Thread Richard Sandiford
We know that if a vectorised loop is reached, all statements in that
loop execute at least once, so it should be safe to pool the alignment
information for all the statements we're vectorising.  The only catch is
that DR_REFs for masked loads and stores only occur if the mask value is
nonzero.  For example, in:

  struct s __attribute__((aligned(32))) {
int misaligner;
int array[N];
  };

  int *ptr;
  for (int i = 0; i < n; ++i)
ptr[i] = c[i] ? ((struct s *) (ptr - 1))->array[i] : 0;

we can only guarantee that ptr points to a "struct s" if at least
one c[i] is true.

This patch adds a DR_IS_CONDITIONAL_IN_STMT flag to record whether
the DR_REF is guaranteed to occur every time that the statement
executes to completion.  It then pools the alignment information
for references that aren't conditional in this sense.

Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Richard


2017-06-28  Richard Sandiford  

gcc/
* tree-vectorizer.h: Include tree-hash-traits.h.
(vec_base_alignments): New typedef.
(vec_info): Add a base_alignments field.
(vect_record_base_alignments: Declare.
* tree-data-ref.h (data_reference): Add an is_conditional_in_stmt
field.
(DR_IS_CONDITIONAL_IN_STMT): New macro.
(create_data_ref): Add an is_conditional_in_stmt argument.
* tree-data-ref.c (create_data_ref): Likewise.  Use it to initialize
the is_conditional_in_stmt field.
(data_ref_loc): Add an is_conditional_in_stmt field.
(get_references_in_stmt): Set the is_conditional_in_stmt field.
(find_data_references_in_stmt): Update call to create_data_ref.
(graphite_find_data_references_in_stmt): Likewise.
* tree-ssa-loop-prefetch.c (determine_loop_nest_reuse): Likewise.
* tree-vect-data-refs.c (vect_analyze_data_refs): Likewise.
(vect_record_base_alignment): New function.
(vect_record_base_alignments): Likewise.
(vect_compute_data_ref_alignment): Adjust base_addr and aligned_to
for nested statements even if we fail to compute a misalignment.
Use pooled base alignments for unconditional references.
(vect_find_same_alignment_drs): Compare base addresses instead
of base objects.
(vect_compute_data_ref_alignment): Call vect_record_base_alignments.
* tree-vect-slp.c (vect_slp_analyze_bb_1): Likewise.
(new_bb_vec_info): Initialize base_alignments.
* tree-vect-loop.c (new_loop_vec_info): Likewise.
* tree-vectorizer.c (vect_destroy_datarefs): Release base_alignments.

gcc/testsuite/
* gcc.dg/vect/pr81136.c: Add scan test.

Index: gcc/tree-vectorizer.h
===
--- gcc/tree-vectorizer.h   2017-06-28 14:26:19.653051245 +0100
+++ gcc/tree-vectorizer.h   2017-06-28 14:26:24.027879710 +0100
@@ -22,6 +22,7 @@ Software Foundation; either version 3, o
 #define GCC_TREE_VECTORIZER_H
 
 #include "tree-data-ref.h"
+#include "tree-hash-traits.h"
 #include "target.h"
 
 /* Used for naming of new temporaries.  */
@@ -84,6 +85,10 @@ struct stmt_info_for_cost {
 
 typedef vec stmt_vector_for_cost;
 
+/* Maps base addresses to the largest alignment that we've been able
+   to calculate for them.  */
+typedef hash_map vec_base_alignments;
+
 /
   SLP
  /
@@ -156,6 +161,10 @@ struct vec_info {
   /* All data references.  */
   vec datarefs;
 
+  /* Maps the base addresses of all data references in DATAREFS to the
+ largest alignment that we've been able to calculate for them.  */
+  vec_base_alignments base_alignments;
+
   /* All data dependences.  */
   vec ddrs;
 
@@ -1123,6 +1132,7 @@ extern bool vect_prune_runtime_alias_tes
 extern bool vect_check_gather_scatter (gimple *, loop_vec_info,
   gather_scatter_info *);
 extern bool vect_analyze_data_refs (vec_info *, int *);
+extern void vect_record_base_alignments (vec_info *);
 extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree,
  tree *, gimple_stmt_iterator *,
  gimple **, bool, bool *,
Index: gcc/tree-data-ref.h
===
--- gcc/tree-data-ref.h 2017-06-28 14:26:19.651051322 +0100
+++ gcc/tree-data-ref.h 2017-06-28 14:26:24.025879789 +0100
@@ -119,6 +119,11 @@ struct data_reference
   /* True when the data reference is in RHS of a stmt.  */
   bool is_read;
 
+  /* True when the data reference is conditional within STMT,
+ i.e. if it might not occur even when the statement is executed
+ and runs to completion.  */
+  bool is_conditional_in_stmt;
+
   /* The alignment of INNERMOST.base_address, in bits.  This is logically
  part of INNERMOST, but is 

Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Bin.Cheng
On Wed, Jun 28, 2017 at 2:36 PM, Richard Sandiford
 wrote:
> Index: gcc/tree-data-ref.c
> ===
> --- gcc/tree-data-ref.c 2017-06-28 14:33:41.294720044 +0100
> +++ gcc/tree-data-ref.c 2017-06-28 14:35:30.475155670 +0100
> @@ -749,15 +749,29 @@ canonicalize_base_object_address (tree a
>return build_fold_addr_expr (TREE_OPERAND (addr, 0));
>  }
>
> -/* Analyzes the behavior of the memory reference DR in the innermost loop or
> -   basic block that contains it.  Returns true if analysis succeed or false
> -   otherwise.  */
> +/* Analyze the behavior of memory reference DR.  There are two modes:
> +
> +   - BB analysis.  In this case we simply split the address into base,
> + init and offset components, without reference to any containing loop.
> + The resulting base and offset are general expressions and they can
> + vary arbitrarily from one iteration of the containing loop to the next.
> + The step is always zero.
> +
> +   - loop analysis.  In this case we analyze the reference both wrt LOOP
> + and on the basis that the reference occurs (is "used") in LOOP;
> + see the comment above analyze_scalar_evolution_in_loop for more
> + information about this distinction.  The base, init, offset and
> + step fields are all invariant in LOOP.
> +
> +   Perform BB analysis if LOOP is null, or if LOOP is the function's
> +   dummy outermost loop.  In other cases perform loop analysis.
> +
> +   Return true if the analysis succeeded and store the results in DR if so.
> +   BB analysis can only fail for bitfield or reversed-storage accesses.  */
>
>  bool
> -dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
> +dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
>  {
> -  gimple *stmt = DR_STMT (dr);
> -  struct loop *loop = loop_containing_stmt (stmt);
>tree ref = DR_REF (dr);
>HOST_WIDE_INT pbitsize, pbitpos;
>tree base, poffset;
> @@ -806,22 +820,11 @@ dr_analyze_innermost (struct data_refere
>
>if (in_loop)
A nit.  No need to use bool variable now?  We can check loop != NULL instead.

Thanks,
bin


[patch][Ping #3] PR80929: Realistic PARALLEL cost in seq_cost.

2017-06-28 Thread Georg-Johann Lay

Ping #3


https://gcc.gnu.org/ml/gcc-patches/2017-06/msg00096.html

On 02.06.2017 09:53, Georg-Johann Lay wrote:
> Hi,
>
> this small addition improves costs of PARALLELs in
> rtlanal.c:seq_cost().  Up to now, these costs are
> assumed to be 1 which gives gross inexact costs for,
> e.g. divmod which is represented as PARALLEL.
>
> The patch just forwards cost computation to insn_rtx_cost
> which uses the cost of the 1st SET (if any) and otherwise
> assign costs of 1 insn.
>
> Bootstrapped & regtested on x86_64.
>
> Moreover, it fixed the division by constant on avr where
> the problem popped up since PR79665.
>
> Ok to install?
>
> Johann
>
> gcc/
> PR middle-end/80929
> * rtlanal.c (seq_cost) [PARALLEL]: Get cost from insn_rtx_cost
> instead of assuming cost of 1.

Index: rtlanal.c
===
--- rtlanal.c   (revision 248745)
+++ rtlanal.c   (working copy)
@@ -5300,6 +5300,9 @@ seq_cost (const rtx_insn *seq, bool spee
   set = single_set (seq);
   if (set)
 cost += set_rtx_cost (set, speed);
+  else if (INSN_P (seq)
+  && PARALLEL == GET_CODE (PATTERN (seq)))
+   cost += insn_rtx_cost (PATTERN (seq), speed);
   else
 cost++;
 }


Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Richard Sandiford
"Bin.Cheng"  writes:
> On Wed, Jun 28, 2017 at 2:36 PM, Richard Sandiford
>  wrote:
>> Index: gcc/tree-data-ref.c
>> ===
>> --- gcc/tree-data-ref.c 2017-06-28 14:33:41.294720044 +0100
>> +++ gcc/tree-data-ref.c 2017-06-28 14:35:30.475155670 +0100
>> @@ -749,15 +749,29 @@ canonicalize_base_object_address (tree a
>>return build_fold_addr_expr (TREE_OPERAND (addr, 0));
>>  }
>>
>> -/* Analyzes the behavior of the memory reference DR in the innermost loop or
>> -   basic block that contains it.  Returns true if analysis succeed or false
>> -   otherwise.  */
>> +/* Analyze the behavior of memory reference DR.  There are two modes:
>> +
>> +   - BB analysis.  In this case we simply split the address into base,
>> + init and offset components, without reference to any containing loop.
>> + The resulting base and offset are general expressions and they can
>> + vary arbitrarily from one iteration of the containing loop to the next.
>> + The step is always zero.
>> +
>> +   - loop analysis.  In this case we analyze the reference both wrt LOOP
>> + and on the basis that the reference occurs (is "used") in LOOP;
>> + see the comment above analyze_scalar_evolution_in_loop for more
>> + information about this distinction.  The base, init, offset and
>> + step fields are all invariant in LOOP.
>> +
>> +   Perform BB analysis if LOOP is null, or if LOOP is the function's
>> +   dummy outermost loop.  In other cases perform loop analysis.
>> +
>> +   Return true if the analysis succeeded and store the results in DR if so.
>> +   BB analysis can only fail for bitfield or reversed-storage accesses.  */
>>
>>  bool
>> -dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
>> +dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
>>  {
>> -  gimple *stmt = DR_STMT (dr);
>> -  struct loop *loop = loop_containing_stmt (stmt);
>>tree ref = DR_REF (dr);
>>HOST_WIDE_INT pbitsize, pbitpos;
>>tree base, poffset;
>> @@ -806,22 +820,11 @@ dr_analyze_innermost (struct data_refere
>>
>>if (in_loop)
> A nit.  No need to use bool variable now?  We can check loop != NULL instead.

As the comment says, we also want to do BB analysis if the loop is the
outermost dummy loop, so in_loop remains set to:

  bool in_loop = (loop && loop->num);

I think it's worth keeping that abstraction.

Thanks,
Richard



Re: [PATCH] Make inlining consistent in LTO and non-LTO mode (PR target/71991).

2017-06-28 Thread Jan Hubicka
> -  /* If callee has no option attributes, then it is ok to inline.  */
> -  if (!callee_tree)
> +  /* If callee has no option attributes (or default),
> + then it is ok to inline.  */
> +  if (!callee_tree || callee_tree == target_option_default_node)

I am not sure this actually makes sense, because target_option_default_node is 
not very
meaningful for LTO (it contains whatever was passed to LTO driver).  Perhaps 
one can check
for explicit optimization/machine attribute and whether caller and callee come 
from
same compilation unit, though this is quite hackish and will do unexpected 
things with COMDATs.

honza
>  ret = true;
>  
>/* If caller has no option attributes, but callee does then it is not ok to
> diff --git a/gcc/testsuite/gcc.dg/torture/pr71991.c 
> b/gcc/testsuite/gcc.dg/torture/pr71991.c
> new file mode 100644
> index 000..79c927f6844
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/torture/pr71991.c
> @@ -0,0 +1,12 @@
> +/* PR target/71991 */
> +
> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
> +/* { dg-options "-std=c99" } */
> +
> +static inline int __attribute__ ((__always_inline__)) fn1 () { return 0; }
> +static inline int __attribute__ ((target("inline-all-stringops"))) fn2 () { 
> return fn1 (); }
> +
> +int main()
> +{
> +  return fn2();
> +}
> 



Re: Backport [PATCH,rs6000] Handle conflicting target options -mno-power9-vector and -mcpu=power9

2017-06-28 Thread Kelvin Nilsen

I have bootstrapped and tested this patch on
powerpc64le-unkonwn-linux-gnu with no regressions.  Is this ok for
backporting to gcc 6?



On 03/22/2017 10:17 PM, Segher Boessenkool wrote:
> On Wed, Mar 22, 2017 at 05:55:53PM -0600, Kelvin Nilsen wrote:
>>> Or it could do -mpower9-dform-scalar but disable -mpower9-dform-vector?
>>> That seems more reasonable.
>>
>> The internal problem report sent to me said "-mno-power9-vector should
>> override power9-dform unless the latter has been deliberately specified
>> by the user."  I'm just following orders.
> 
> Heh :-)
> 
>> If you think it preferable to
>> only override -mpower-dform-vector, I'll make that modification.
> 
> It is more logical.  Or so I though.  But as it turns out,
> -mpower9-dform-scalar is about vector registers as well.
> 
> So the patch is approved for trunk as-is.  Thanks!
> 
* config/rs6000/rs6000.c (rs6000_option_override_internal): Change
handling of certain combinations of target options, including the
combinations -mpower8-vector vs. -mno-vsx, -mpower8-vector vs.
-mno-power8-vector, and -mpower9_dform vs. -mno-power9-vector.
>>>
>>> Those other changes are independent?
>>
>> Actually, these other changes are not independent.  My initial attempt
>> at a patch only changed the behavior of -mpower9_dform vs.
>> -mno-power9-vector.  But this actually resulted in a regression of an
>> existing test.  To "properly" handle the new case without impacting
>> existing "established" behavior (as represented in the existing dejagnu
>> testsuite), I had to make these other changes as well.
> 
> Too many options :-(
> 
> 
> Segher
> 
> 

-- 
Kelvin Nilsen, Ph.D.  kdnil...@linux.vnet.ibm.com
home office: 801-756-4821, cell: 520-991-6727
IBM Linux Technology Center - PPC Toolchain



Re: [PATCH 2/3] Simplify wrapped binops

2017-06-28 Thread Robin Dapp
> ideally you'd use a wide-int here and defer the tree allocation to the result

Did that in the attached version.

> So I guess we never run into the outer_op == minus case as the above is
> clearly wrong for that?

Right, damn, not only was the treatment for this missing but it was
bogus in the other pattern as well.  Since we are mostly dealing with
PLUS_EXPR anyways it's probably better to defer the MINUS_EXPR case for
now.  This will also slim down the patterns a bit.

> try to keep vertical spacing in patterns minimal -- I belive that patterns
> should be small enough to fit in a terminal window (24 lines).

I find using the expanded wrapped_range condition in the simplification
somewhat cumbersome, especially because I need the condition to evaluate
to true by default making the initialization unintuitive.  Yet, I guess
setting wrapped_range = true was not terribly intuitive either...

Regards
 Robin
diff --git a/gcc/match.pd b/gcc/match.pd
index 80a17ba..ed497cf 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1290,6 +1290,79 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (if (cst && !TREE_OVERFLOW (cst))
  (plus { cst; } @0
 
+/* ((T)(A + CST1)) + CST2 -> (T)(A) + CST  */
+#if GIMPLE
+  (simplify
+(plus (convert (plus@3 @0 INTEGER_CST@1)) INTEGER_CST@2)
+  (if (INTEGRAL_TYPE_P (type)
+   && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@3)))
+   /* Combine CST1 and CST2 to CST and convert to outer type if
+  (A + CST1)'s range does not wrap.  */
+   (with
+   {
+ tree inner_type = TREE_TYPE (@3);
+ wide_int wmin0, wmax0;
+ wide_int w1 = @1;
+ wide_int w2 = @2;
+ wide_int combined_cst;
+
+ bool ovf_undef = TYPE_OVERFLOW_UNDEFINED (inner_type);
+ bool min_ovf = true, max_ovf = false;
+
+ enum value_range_type vr0 =
+   get_range_info (@0, &wmin0, &wmax0);
+
+ if (ovf_undef || vr0 == VR_RANGE)
+   {
+ bool ovf = true;
+ if (!ovf_undef && vr0 == VR_RANGE)
+	   {
+		 wi::add (wmin0, w1, TYPE_SIGN (inner_type), &min_ovf);
+		 wi::add (wmax0, w1, TYPE_SIGN (inner_type), &max_ovf);
+		 ovf = min_ovf || max_ovf;
+	   }
+
+ /* Extend CST1 to TYPE. */
+ w1 = w1.from (w1, TYPE_PRECISION (type),
+			   ovf ? SIGNED : TYPE_SIGN (inner_type));
+   }
+   }
+   (if (ovf_undef || !((min_ovf && !max_ovf) || (!min_ovf && max_ovf)))
+(plus (convert @0) { wide_int_to_tree (type, wi::add (w1, w2)); }
+ )
+#endif
+
+/* ((T)(A)) + CST -> (T)(A + CST)  */
+#if GIMPLE
+  (simplify
+   (plus (convert SSA_NAME@0) INTEGER_CST@1)
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
+ && INTEGRAL_TYPE_P (type)
+ && TYPE_PRECISION (type) > TYPE_PRECISION (TREE_TYPE (@0))
+ && int_fits_type_p (@1, TREE_TYPE (@0)))
+ /* Perform binary operation inside the cast if the constant fits
+and (A + CST)'s range does not wrap.  */
+ (with
+  {
+bool min_ovf = true, max_ovf = false;
+tree inner_type = TREE_TYPE (@0);
+
+wide_int w1 = @1;
+w1 = w1.from (w1, TYPE_PRECISION (inner_type), TYPE_SIGN
+  		(inner_type));
+
+wide_int wmin0, wmax0;
+if (get_range_info (@0, &wmin0, &wmax0) == VR_RANGE)
+  {
+wi::add (wmin0, w1, TYPE_SIGN (inner_type), &min_ovf);
+wi::add (wmax0, w1, TYPE_SIGN (inner_type), &max_ovf);
+  }
+  }
+ (if (!((min_ovf && !max_ovf) || (!min_ovf && max_ovf)) )
+  (convert (plus @0 { {wide_int_to_tree (TREE_TYPE (@0), w1)}; })))
+ )))
+#endif
+
   /* ~A + A -> -1 */
   (simplify
(plus:c (bit_not @0) @0)


Re: [PATCH] Make inlining consistent in LTO and non-LTO mode (PR target/71991).

2017-06-28 Thread Martin Liška
On 06/28/2017 04:24 PM, Jan Hubicka wrote:
>> -  /* If callee has no option attributes, then it is ok to inline.  */
>> -  if (!callee_tree)
>> +  /* If callee has no option attributes (or default),
>> + then it is ok to inline.  */
>> +  if (!callee_tree || callee_tree == target_option_default_node)
> 
> I am not sure this actually makes sense, because target_option_default_node 
> is not very
> meaningful for LTO (it contains whatever was passed to LTO driver). 

I see!

 Perhaps one can check
> for explicit optimization/machine attribute and whether caller and callee 
> come from
> same compilation unit, though this is quite hackish and will do unexpected 
> things with COMDATs.

That's quite cumbersome. Any other idea than marking the PR as won't fix?

Martin

> 
> honza
>>  ret = true;
>>  
>>/* If caller has no option attributes, but callee does then it is not ok 
>> to
>> diff --git a/gcc/testsuite/gcc.dg/torture/pr71991.c 
>> b/gcc/testsuite/gcc.dg/torture/pr71991.c
>> new file mode 100644
>> index 000..79c927f6844
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.dg/torture/pr71991.c
>> @@ -0,0 +1,12 @@
>> +/* PR target/71991 */
>> +
>> +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
>> +/* { dg-options "-std=c99" } */
>> +
>> +static inline int __attribute__ ((__always_inline__)) fn1 () { return 0; }
>> +static inline int __attribute__ ((target("inline-all-stringops"))) fn2 () { 
>> return fn1 (); }
>> +
>> +int main()
>> +{
>> +  return fn2();
>> +}
>>
> 



Re: [PATCH][RFC] Canonize names of attributes.

2017-06-28 Thread Martin Liška
On 06/14/2017 06:40 PM, Joseph Myers wrote:
> On Wed, 14 Jun 2017, Richard Biener wrote:
> 
 are you sure this is needed?  This seems to be solely arguments to
 attributes.
>>>
>>> It's need for cases like:
>>>  __intN_t (8, __QI__);
>>
>> But __QI__ is not processed in lookup_attribute, is it?  So canonizing that
>> looks unrelated?  I didn't see similar handling in the C FE btw (but
>> maybe I missed it).
> 
> It's not clear to me that there is automatically a rule that where 
> identifiers are arguments to attributes, they must follow this rule about 
> foo and __foo__ being equivalent.
> 
> Specifically: c-attribs.c:attribute_takes_identifier_p says that the 
> cleanup attribute takes an identifier (a function name).  But it's 
> certainly the case that the exact function named there must be used; foo 
> and __foo__ as cleanup attribute arguments are not equivalent.  (You could 
> argue cleanup should take an expression, with an error then being given if 
> that doesn't evaluate to a function designator.)
> 

Hello.

This is obvious reason where original name must be preserved. Looks following 
patch
works and does not break test-suite:

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 8f638785e0e..08b4db5e5bd 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -24765,7 +24765,8 @@ cp_parser_gnu_attribute_list (cp_parser* parser)
  tree tv;
  if (arguments != NULL_TREE
  && ((tv = TREE_VALUE (arguments)) != NULL_TREE)
- && TREE_CODE (tv) == IDENTIFIER_NODE)
+ && TREE_CODE (tv) == IDENTIFIER_NODE
+ && !id_equal (TREE_PURPOSE (attribute), "cleanup"))
TREE_VALUE (arguments) = canonize_attr_name (tv);
  release_tree_vector (vec);
}
-- 

Well, the question is whether we want to in general canonicalize attribute 
names?
Special situations like "cleanup" can be handled.

Martin




Re: [PATCH][RFC] Canonize names of attributes.

2017-06-28 Thread Martin Liška
On 06/14/2017 07:24 PM, Jason Merrill wrote:
> On Tue, Jun 13, 2017 at 8:32 AM, Martin Liška  wrote:
>> (canonize_attr_name): New function.
> 
> I think this should be "canonicalize"; "canonize" means something else.
> 
> Jason
> 

Yes, I hope it's a cosmetic problem. In general, do you welcome the change
to canonicalize attribute names?

Martin


Re: [PATCH, GCC/testsuite/ARM] Consistently check for neon in vect effective targets

2017-06-28 Thread Thomas Preudhomme

On 20/06/17 13:44, Christophe Lyon wrote:




The results with a more recent trunk (r249356)) are here:
http://people.linaro.org/~christophe.lyon/cross-validation/gcc-test-patches/249356-consistent_neon_check.patch/report-build-info.html

They are slightly different, but still tedious to check ;-)


I've checked arm-none-linux-gnueabi and arm-none-linux-gnueabihf and found that:

* there's no new FAIL
* changes to UNSUPPORTED and NA are for the same files
* changes are only for tests in a vect directory
* changes for arm-none-linux-gnueabihf are only when targeting vfp without neon 
(tests are disabled because there is no vector unit)


Changes to arm-none-linux-gnueabi makes sense since this defaults to soft 
floating point and none of the test disabled adds any option to select another 
variant.


I believe this all makes sense.

Therefore, is this ok to commit?

Best regards,

Thomas
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index ded6383cc1f9a1489cd83e1dace0c2fc48e252c3..aa8550c9d2cf0ae7e157d9c67fa06ad811651421 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2916,7 +2916,7 @@ proc check_effective_target_vect_int { } {
 	 || [istarget alpha*-*-*]
 	 || [istarget ia64-*-*] 
 	 || [istarget aarch64*-*-*]
-	 || [check_effective_target_arm32]
+	 || [is-effective-target arm_neon]
 	 || ([istarget mips*-*-*]
 		 && ([et-is-effective-target mips_loongson]
 		 || [et-is-effective-target mips_msa])) } {
@@ -2944,8 +2944,7 @@ proc check_effective_target_vect_intfloat_cvt { } {
 if { [istarget i?86-*-*] || [istarget x86_64-*-*]
 	 || ([istarget powerpc*-*-*]
 		 && ![istarget powerpc-*-linux*paired*])
-	 || ([istarget arm*-*-*]
-		 && [check_effective_target_arm_neon_ok])
+	 || [is-effective-target arm_neon]
 	 || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_msa]) } {
 	set et_vect_intfloat_cvt_saved($et_index) 1
@@ -2987,8 +2986,7 @@ proc check_effective_target_vect_uintfloat_cvt { } {
 	 || ([istarget powerpc*-*-*]
 		 && ![istarget powerpc-*-linux*paired*])
 	 || [istarget aarch64*-*-*]
-	 || ([istarget arm*-*-*]
-		 && [check_effective_target_arm_neon_ok])
+	 || [is-effective-target arm_neon]
 	 || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_msa]) } {
 	set et_vect_uintfloat_cvt_saved($et_index) 1
@@ -3016,8 +3014,7 @@ proc check_effective_target_vect_floatint_cvt { } {
 if { [istarget i?86-*-*] || [istarget x86_64-*-*]
 	 || ([istarget powerpc*-*-*]
 		 && ![istarget powerpc-*-linux*paired*])
-	 || ([istarget arm*-*-*]
-		 && [check_effective_target_arm_neon_ok])
+	 || [is-effective-target arm_neon]
 	 || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_msa]) } {
 	set et_vect_floatint_cvt_saved($et_index) 1
@@ -3043,8 +3040,7 @@ proc check_effective_target_vect_floatuint_cvt { } {
 	set et_vect_floatuint_cvt_saved($et_index) 0
 if { ([istarget powerpc*-*-*]
 	  && ![istarget powerpc-*-linux*paired*])
-	|| ([istarget arm*-*-*]
-		&& [check_effective_target_arm_neon_ok])
+	|| [is-effective-target arm_neon]
 	|| ([istarget mips*-*-*]
 		&& [et-is-effective-target mips_msa]) } {
 	   set et_vect_floatuint_cvt_saved($et_index) 1
@@ -4903,7 +4899,7 @@ proc check_effective_target_vect_shift { } {
 	 || [istarget ia64-*-*]
 	 || [istarget i?86-*-*] || [istarget x86_64-*-*]
 	 || [istarget aarch64*-*-*]
-	 || [check_effective_target_arm32]
+	 || [is-effective-target arm_neon]
 	 || ([istarget mips*-*-*]
 		 && ([et-is-effective-target mips_msa]
 		 || [et-is-effective-target mips_loongson])) } {
@@ -4921,7 +4917,7 @@ proc check_effective_target_whole_vector_shift { } {
 	 || [istarget ia64-*-*]
 	 || [istarget aarch64*-*-*]
 	 || [istarget powerpc64*-*-*]
-	 || ([check_effective_target_arm32]
+	 || ([is-effective-target arm_neon]
 	 && [check_effective_target_arm_little_endian])
 	 || ([istarget mips*-*-*]
 	 && [et-is-effective-target mips_loongson]) } {
@@ -4945,8 +4941,7 @@ proc check_effective_target_vect_bswap { } {
 } else {
 	set et_vect_bswap_saved($et_index) 0
 	if { [istarget aarch64*-*-*]
- || ([istarget arm*-*-*]
-&& [check_effective_target_arm_neon])
+ || [is-effective-target arm_neon]
 	   } {
 	   set et_vect_bswap_saved($et_index) 1
 	}
@@ -4969,7 +4964,7 @@ proc check_effective_target_vect_shift_char { } {
 	set et_vect_shift_char_saved($et_index) 0
 	if { ([istarget powerpc*-*-*]
  && ![istarget powerpc-*-linux*paired*])
-	 || [check_effective_target_arm32]
+	 || [is-effective-target arm_neon]
 	 || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_msa]) } {
 	   set et_vect_shift_char_saved($et_index) 1
@@ -4987,10 +4982,10 @@ proc check_effective_target_vect_shift_char { } {
 
 proc check_effective_target_vect_long { } {
  

Re: [PATCH], Add check ppc_cpu_supports_hw to testsuite

2017-06-28 Thread Peter Bergner
On 6/27/17 6:53 PM, Michael Meissner wrote:
> This adds a target supports option in dejagnu so that future tests can use 
> this
> to determine whether or not to test target_clones.

I like the idea, but some comments...



> + #ifdef __MACH__
> +   asm volatile ("xxlor vs0,vs0,vs0");
> + #else
> +   asm volatile ("xxlor 0,0,0");
> + #endif

What is this hunk for?  We're only interested in the return value from
the builtin below, correct?



> +   if (!__builtin_cpu_supports ("vsx"))
> + return 1;
> +   return 0;

...and more importantly, why limit us to testing "vsx"?  Why not test
for "ppc32", which should be true for *all* kernels we'd ever run on,
including ppc32, ppc64 and ppc64le?

Peter



Re: [PATCH, GCC/testsuite/ARM] Consistently check for neon in vect effective targets

2017-06-28 Thread Kyrill Tkachov

Hi Thomas,

On 28/06/17 15:49, Thomas Preudhomme wrote:

On 20/06/17 13:44, Christophe Lyon wrote:




The results with a more recent trunk (r249356)) are here:
http://people.linaro.org/~christophe.lyon/cross-validation/gcc-test-patches/249356-consistent_neon_check.patch/report-build-info.html

They are slightly different, but still tedious to check ;-)


I've checked arm-none-linux-gnueabi and arm-none-linux-gnueabihf and found that:

* there's no new FAIL
* changes to UNSUPPORTED and NA are for the same files
* changes are only for tests in a vect directory
* changes for arm-none-linux-gnueabihf are only when targeting vfp without neon 
(tests are disabled because there is no vector unit)

Changes to arm-none-linux-gnueabi makes sense since this defaults to soft 
floating point and none of the test disabled adds any option to select another 
variant.

I believe this all makes sense.

Therefore, is this ok to commit?

Best regards,

Thomas


@@ -4987,10 +4982,10 @@ proc check_effective_target_vect_shift_char { } {
 
 proc check_effective_target_vect_long { } {

 if { [istarget i?86-*-*] || [istarget x86_64-*-*]
-|| (([istarget powerpc*-*-*]
-  && ![istarget powerpc-*-linux*paired*])
+|| (([istarget powerpc*-*-*]
+  && ![istarget powerpc-*-linux*paired*])
   && [check_effective_target_ilp32])


Is this just a whitespace change?
If it is intended then okay.

This is okay with a ChangeLog entry.

Thanks, this looks like a good change.
Kyrill


Re: [PATCH, GCC/testsuite/ARM] Consistently check for neon in vect effective targets

2017-06-28 Thread Thomas Preudhomme



On 28/06/17 15:59, Kyrill Tkachov wrote:

Hi Thomas,

On 28/06/17 15:49, Thomas Preudhomme wrote:

On 20/06/17 13:44, Christophe Lyon wrote:




The results with a more recent trunk (r249356)) are here:
http://people.linaro.org/~christophe.lyon/cross-validation/gcc-test-patches/249356-consistent_neon_check.patch/report-build-info.html 



They are slightly different, but still tedious to check ;-)


I've checked arm-none-linux-gnueabi and arm-none-linux-gnueabihf and found that:

* there's no new FAIL
* changes to UNSUPPORTED and NA are for the same files
* changes are only for tests in a vect directory
* changes for arm-none-linux-gnueabihf are only when targeting vfp without 
neon (tests are disabled because there is no vector unit)


Changes to arm-none-linux-gnueabi makes sense since this defaults to soft 
floating point and none of the test disabled adds any option to select another 
variant.


I believe this all makes sense.

Therefore, is this ok to commit?

Best regards,

Thomas


@@ -4987,10 +4982,10 @@ proc check_effective_target_vect_shift_char { } {

  proc check_effective_target_vect_long { } {
  if { [istarget i?86-*-*] || [istarget x86_64-*-*]
- || (([istarget powerpc*-*-*]
-  && ![istarget powerpc-*-linux*paired*])
+ || (([istarget powerpc*-*-*]
+  && ![istarget powerpc-*-linux*paired*])
&& [check_effective_target_ilp32])


Is this just a whitespace change?
If it is intended then okay.


It is yes, trailing whitespace. I took the liberty to fix it because I was 
changing some other issues in the same procedure.




This is okay with a ChangeLog entry.


Sorry, I should have pasted it again from the initial message.

2017-06-06  Thomas Preud'homme  

* lib/target-supports.exp (check_effective_target_vect_int): Replace
current ARM check by ARM NEON's availability check.
(check_effective_target_vect_intfloat_cvt): Likewise.
(check_effective_target_vect_uintfloat_cvt): Likewise.
(check_effective_target_vect_floatint_cvt): Likewise.
(check_effective_target_vect_floatuint_cvt): Likewise.
(check_effective_target_vect_shift): Likewise.
(check_effective_target_whole_vector_shift): Likewise.
(check_effective_target_vect_bswap): Likewise.
(check_effective_target_vect_shift_char): Likewise.
(check_effective_target_vect_long): Likewise.
(check_effective_target_vect_float): Likewise.
(check_effective_target_vect_perm): Likewise.
(check_effective_target_vect_perm_byte): Likewise.
(check_effective_target_vect_perm_short): Likewise.
(check_effective_target_vect_widen_sum_hi_to_si_pattern): Likewise.
(check_effective_target_vect_widen_sum_qi_to_hi): Likewise.
(check_effective_target_vect_widen_mult_qi_to_hi): Likewise.
(check_effective_target_vect_widen_mult_hi_to_si): Likewise.
(check_effective_target_vect_widen_mult_qi_to_hi_pattern): Likewise.
(check_effective_target_vect_widen_mult_hi_to_si_pattern): Likewise.
(check_effective_target_vect_widen_shift): Likewise.
(check_effective_target_vect_extract_even_odd): Likewise.
(check_effective_target_vect_interleave): Likewise.
(check_effective_target_vect_multiple_sizes): Likewise.
(check_effective_target_vect64): Likewise.
(check_effective_target_vect_max_reduc): Likewise.



Thanks, this looks like a good change.
Kyrill


Thanks!

Best regards,

Thomas


[arm] Fix incorrect __ARM_ARCH_PROFILE for -march=armv7

2017-06-28 Thread Richard Earnshaw (lists)
ACLE explicitly states that when targetting the common subset of
ARMv7-A, ARMv7-R and ARMv7-M, the __ARM_ARCH_PROFILE macro should not be
set.  We currently set it to 'M' which is clearly erroneous.

The logic for creating this is very convoluted and also somewhat
fragile, so I've taken the opportunity to use the new CPU and
architecture definition infrastructure to record the profile for each
architecture explicitly rather than try to reconstruct it from other
data.  I think this results in a much more robust solution.


2017-06-28  Richard Earnshaw  

* config/arm/parsecpu.awk (profile): Parse new keyword in an arch
context.
(gen_comm_data): Emit architectural setting of arch_prof.
* config/arm/arm-cpus.in (armv6-m, armv6s-m, armv7-a, armv7ve): Set the
profile.
(armv7-r, armv7-m, armv7e-m, armv8-a, armv8.1-a, armv8.2-a): Likewise.
(armv8-m.base, armv8-m.main): Likewise.
* arm-protos.h (arm_build_target): Add profile field.
(arch_option): Likewise.
* config/arm/arm.c (arm_configure_build_target): Copy the profile to
the active target.
* config/arm/arm.h (TARGET_ARM_ARCH_PROFILE): Use
arm_active_target.profile.


Committed.
diff --git a/gcc/config/arm/arm-cpu-cdata.h b/gcc/config/arm/arm-cpu-cdata.h
index 8406fa0..c5002b3 100644
--- a/gcc/config/arm/arm-cpu-cdata.h
+++ b/gcc/config/arm/arm-cpu-cdata.h
@@ -2106,6 +2106,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "2", BASE_ARCH_2,
+0,
 TARGET_CPU_arm2,
   },
   {
@@ -2116,6 +2117,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "2", BASE_ARCH_2,
+0,
 TARGET_CPU_arm2,
   },
   {
@@ -2126,6 +2128,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "3", BASE_ARCH_3,
+0,
 TARGET_CPU_arm6,
   },
   {
@@ -2136,6 +2139,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "3M", BASE_ARCH_3M,
+0,
 TARGET_CPU_arm7m,
   },
   {
@@ -2146,6 +2150,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "4", BASE_ARCH_4,
+0,
 TARGET_CPU_arm7tdmi,
   },
   {
@@ -2156,6 +2161,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "4T", BASE_ARCH_4T,
+0,
 TARGET_CPU_arm7tdmi,
   },
   {
@@ -2166,6 +2172,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "5", BASE_ARCH_5,
+0,
 TARGET_CPU_arm10tdmi,
   },
   {
@@ -2176,6 +2183,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "5T", BASE_ARCH_5T,
+0,
 TARGET_CPU_arm10tdmi,
   },
   {
@@ -2186,6 +2194,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "5E", BASE_ARCH_5E,
+0,
 TARGET_CPU_arm1026ejs,
   },
   {
@@ -2196,6 +2205,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "5TE", BASE_ARCH_5TE,
+0,
 TARGET_CPU_arm1026ejs,
   },
   {
@@ -2206,6 +2216,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "5TEJ", BASE_ARCH_5TEJ,
+0,
 TARGET_CPU_arm1026ejs,
   },
   {
@@ -2216,6 +2227,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6", BASE_ARCH_6,
+0,
 TARGET_CPU_arm1136js,
   },
   {
@@ -2226,6 +2238,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6J", BASE_ARCH_6J,
+0,
 TARGET_CPU_arm1136js,
   },
   {
@@ -2236,6 +2249,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6K", BASE_ARCH_6K,
+0,
 TARGET_CPU_mpcore,
   },
   {
@@ -2246,6 +2260,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6Z", BASE_ARCH_6Z,
+0,
 TARGET_CPU_arm1176jzs,
   },
   {
@@ -2256,6 +2271,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6KZ", BASE_ARCH_6KZ,
+0,
 TARGET_CPU_arm1176jzs,
   },
   {
@@ -2266,6 +2282,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6KZ", BASE_ARCH_6KZ,
+0,
 TARGET_CPU_arm1176jzs,
   },
   {
@@ -2276,6 +2293,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6T2", BASE_ARCH_6T2,
+0,
 TARGET_CPU_arm1156t2s,
   },
   {
@@ -2286,6 +2304,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6M", BASE_ARCH_6M,
+'M',
 TARGET_CPU_cortexm1,
   },
   {
@@ -2296,6 +2315,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "6M", BASE_ARCH_6M,
+'M',
 TARGET_CPU_cortexm1,
   },
   {
@@ -2306,6 +2326,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "7", BASE_ARCH_7,
+0,
 TARGET_CPU_cortexa8,
   },
   {
@@ -2316,6 +2337,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "7A", BASE_ARCH_7A,
+'A',
 TARGET_CPU_cortexa8,
   },
   {
@@ -2326,6 +2348,7 @@ const arch_option all_architectures[] =
   isa_nobit
 },
 "7A", BASE_ARCH_7A,
+

Re: [PATCH, GCC/ARM, ping] Remove ARMv8-M code for D17-D31

2017-06-28 Thread Thomas Preudhomme

Ping?

*** gcc/ChangeLog ***

2017-06-13  Thomas Preud'homme  

* config/arm/arm.c (arm_option_override): Forbid ARMv8-M Security
Extensions with more than 16 double VFP registers.
(cmse_nonsecure_entry_clear_before_return): Remove second entry of
to_clear_mask and all code related to it and make the remaining
entry a 64-bit scalar integer variable and adapt code accordingly.

Best regards,

Thomas

On 20/06/17 16:01, Thomas Preudhomme wrote:

Hi,

Function cmse_nonsecure_entry_clear_before_return has code to deal with
high VFP register (D16-D31) while ARMv8-M Baseline and Mainline both do
not support more than 16 double VFP registers (D0-D15). This makes this
security-sensitive code harder to read for not much benefit since
libcall for cmse_nonsecure_call functions do not deal with those high
VFP registers anyway.

This commit gets rid of this code for simplicity and fixes 2 issues in
the same function:

- stop the first loop when reaching maxregno to avoid dealing with VFP
   registers if targetting Thumb-1 or using -mfloat-abi=soft
- include maxregno in that loop

ChangeLog entry is as follows:

*** gcc/ChangeLog ***

2017-06-13  Thomas Preud'homme  

 * config/arm/arm.c (arm_option_override): Forbid ARMv8-M Security
 Extensions with more than 16 double VFP registers.
 (cmse_nonsecure_entry_clear_before_return): Remove second entry of
 to_clear_mask and all code related to it and make the remaining
 entry a 64-bit scalar integer variable and adapt code accordingly.

Testing: Testsuite shows no regression when run for ARMv8-M Baseline and
ARMv8-M Mainline.

Is this ok for trunk?

Best regards,

Thomas
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 259597d8890ee84c5bd92b12b6f9f6521c8dcd2e..60a4d1f46765d285de469f51fbb5a0ad76d56d9b 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3620,6 +3620,11 @@ arm_option_override (void)
   if (use_cmse && !arm_arch_cmse)
 error ("target CPU does not support ARMv8-M Security Extensions");
 
+  /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
+ and ARMv8-M Baseline and Mainline do not allow such configuration.  */
+  if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
+error ("ARMv8-M Security Extensions incompatible with selected FPU");
+
   /* Disable scheduling fusion by default if it's not armv7 processor
  or doesn't prefer ldrd/strd.  */
   if (flag_schedule_fusion == 2
@@ -24996,15 +25001,15 @@ thumb1_expand_prologue (void)
 void
 cmse_nonsecure_entry_clear_before_return (void)
 {
-  uint64_t to_clear_mask[2];
+  uint64_t to_clear_mask;
   uint32_t padding_bits_to_clear = 0;
   uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
   int regno, maxregno = IP_REGNUM;
   tree result_type;
   rtx result_rtl;
 
-  to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
-  to_clear_mask[0] |= (1ULL << IP_REGNUM);
+  to_clear_mask = (1ULL << (NUM_ARG_REGS)) - 1;
+  to_clear_mask |= (1ULL << IP_REGNUM);
 
   /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
  registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
@@ -25015,23 +25020,22 @@ cmse_nonsecure_entry_clear_before_return (void)
   maxregno = LAST_VFP_REGNUM;
 
   float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
-  to_clear_mask[0] |= float_mask;
-
-  float_mask = (1ULL << (maxregno - 63)) - 1;
-  to_clear_mask[1] = float_mask;
+  to_clear_mask |= float_mask;
 
   /* Make sure we don't clear the two scratch registers used to clear the
 	 relevant FPSCR bits in output_return_instruction.  */
   emit_use (gen_rtx_REG (SImode, IP_REGNUM));
-  to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
+  to_clear_mask &= ~(1ULL << IP_REGNUM);
   emit_use (gen_rtx_REG (SImode, 4));
-  to_clear_mask[0] &= ~(1ULL << 4);
+  to_clear_mask &= ~(1ULL << 4);
 }
 
+  gcc_assert ((unsigned) maxregno <= sizeof (to_clear_mask) * __CHAR_BIT__);
+
   /* If the user has defined registers to be caller saved, these are no longer
  restored by the function before returning and must thus be cleared for
  security purposes.  */
-  for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
+  for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
 {
   /* We do not touch registers that can be used to pass arguments as per
 	 the AAPCS, since these should never be made callee-saved by user
@@ -25041,7 +25045,7 @@ cmse_nonsecure_entry_clear_before_return (void)
   if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
 	continue;
   if (call_used_regs[regno])
-	to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
+	to_clear_mask |= (1ULL << regno);
 }
 
   /* Make sure we do not clear the registers used to return the result in.  */
@@ -25052,7 +25056,7 @@ cmse_nonsecure_entry_clear_before_return (void)
 
   /* No need to check that we return in registers, because we don't
 	 support returning on stac

Re: [PATCH] Make inlining consistent in LTO and non-LTO mode (PR target/71991).

2017-06-28 Thread Jan Hubicka
> On 06/28/2017 04:24 PM, Jan Hubicka wrote:
> >> -  /* If callee has no option attributes, then it is ok to inline.  */
> >> -  if (!callee_tree)
> >> +  /* If callee has no option attributes (or default),
> >> + then it is ok to inline.  */
> >> +  if (!callee_tree || callee_tree == target_option_default_node)
> > 
> > I am not sure this actually makes sense, because target_option_default_node 
> > is not very
> > meaningful for LTO (it contains whatever was passed to LTO driver). 
> 
> I see!
> 
>  Perhaps one can check
> > for explicit optimization/machine attribute and whether caller and callee 
> > come from
> > same compilation unit, though this is quite hackish and will do unexpected 
> > things with COMDATs.
> 
> That's quite cumbersome. Any other idea than marking the PR as won't fix?

Yep, it is not prettiest. The problem is that the concept that callee can 
change semantics
when no explicit attribute is present is sloppy.  I am not sure how many 
programs rely on it
(it is kind of surprising to see functions not being inlined into your target 
attribute annotated
function I guess).
Note that we check for original file in inliner already - this can be done by 
comparing lto_file_data
of corresponding cgraph nodes.

Honza


Re: C/C++ PATCH to add __typeof_noqual (PR c/65455, c/39985)

2017-06-28 Thread Martin Sebor

I don't know if that wouldn't be overkill.  Qualifiers on rvalues are
meaningless in C and that's why my __typeof_noqual strips them all.
We'd then need even e.g. __remove_restrict, not sure if there's need for
these.  Maybe it is.


Unless __typeof__ (p) q = p; declares a restrict-qualified q when
p is a restrict-qualified pointer I don't think __remove_restrict
is needed.


On second thought, I agree that if __remove_const and _volatile
were provided then __remove_restrict should also be, if only for
completeness.

Martin


[PATCH, rs6000] Signed builtin support

2017-06-28 Thread Carl Love
GCC Maintainers:

The following patch adds support for the vec_signed, vec_signede,
vec_signedo and vec_signed2 builtins.  The patch has been tested on
powerpc64le-unknown-linux-gnu (Power 8 LE) and
powerpc64-unknown-linux-gnu(Power 8 BE).

Is the fix OK for gcc mainline?

  Carl Love


--

gcc/ChangeLog:

2017-06-28  Carl Love  

* config/rs6000/rs6000-c.c: Add support for built-in functions
vector signed int vec_signed (vector float);
vector signed long long vec_signed (vector double);
vector signed int vec_signed2 (vector double, vector double);
vector signed int vec_signede (vector double);
vector signed int vec_signedo (vector double);
* config/rs6000/rs6000.c (rs6000_generate_vsigned2_code): Add
instruction generator.
* config/rs6000/vsx.md (UNSPEC_VSX_XVCVSPSXWS, UNSPEC_VSX_XVCVSPSXDS,
UNSPEC_VSX_VSIGNED2): Add UNSPECS
(vsx_xvcvspsxws, vsx_xvcvdpuxds_scale, vsx_xvcvspuxws, vsigned2_v2df):
Add define_insn.
(vsignedo_v2df, vsignede_v2df, vunsigned2_v2df, vunsignedo_v2df,
vunsignede_v2df): Add define_expands.
* config/rs6000/rs6000-builtin.def (VEC_SIGNED, VEC_UNSIGNED,
VEC_SIGNED2, VEC_UNSIGNED2, VEC_SIGNEDE, VEC_UNSIGNEDE, VEC_SIGNEDO,
VEC_UNSIGNEDO): Add definitions.
* config/vsx.md (UNSPEC_VSX_XVCVSPSXWS, UNSPEC_VSX_XVCVSPSXDS,
UNSPEC_VSX_VSIGNED2): Add UNSPECs.
(vsx_xvcvspsxws, vsx_xvcvspuxws): Add define_insn.
(vsigned2_v2df, vsigendo_v2df, vsignede_v2df,
vunsigned2_v2df, vunsignedo_v2df, vunsignede_v2df): Add define_expands.
* config/rs6000/altivec.h (vec_signed, vec_signed2,
vec_signede and vec_signedo, vec_unsigned, vec_unsigned2,
vec_unsignede, vec_unsignedo): Add builtin defines.
* config/rs6000-protos.h (rs6000_generate_vsigned2_code): Add extern
declaration.
* doc/extend.texi: Update the built-in documentation file for the
new built-in functions.

gcc/testsuite/ChangeLog:

2017-06-28  Carl Love  

* gcc.target/powerpc/builtins-3-runnable.c(test_int_result,
test_unsigned_int_result, test_ll_int_result,
test_ll_unsigned_int_result): Add result checking functions, add
debug support.
(main): Add builtin function tests, .

Signed-off-by: Carl Love 
---
 gcc/config/rs6000/altivec.h|   8 +
 gcc/config/rs6000/rs6000-builtin.def   |  23 +++
 gcc/config/rs6000/rs6000-c.c   |  23 +++
 gcc/config/rs6000/rs6000-protos.h  |   1 +
 gcc/config/rs6000/rs6000.c |  29 +++
 gcc/config/rs6000/vsx.md   | 194 +
 gcc/doc/extend.texi|  14 ++
 .../gcc.target/powerpc/builtins-3-runnable.c   | 229 +++--
 8 files changed, 502 insertions(+), 19 deletions(-)

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index d542315..417e143 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -181,6 +181,14 @@
 #define vec_rlnm(a,b,c) (__builtin_vec_rlnm((a),((b)<<8)|(c)))
 #define vec_rsqrt __builtin_vec_rsqrt
 #define vec_rsqrte __builtin_vec_rsqrte
+#define vec_signed __builtin_vec_vsigned
+#define vec_signed2 __builtin_vec_vsigned2
+#define vec_signede __builtin_vec_vsignede
+#define vec_signedo __builtin_vec_vsignedo
+#define vec_unsigned __builtin_vec_vunsigned
+#define vec_unsigned2 __builtin_vec_vunsigned2
+#define vec_unsignede __builtin_vec_vunsignede
+#define vec_unsignedo __builtin_vec_vunsignedo
 #define vec_vsubfp __builtin_vec_vsubfp
 #define vec_subc __builtin_vec_subc
 #define vec_vsubsws __builtin_vec_vsubsws
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index 963b9a8..9bdc2b4 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1602,6 +1602,9 @@ BU_VSX_2 (CMPLE_U2DI, "cmple_u2di", CONST,  
vector_ngtuv2di)
 BU_VSX_2 (FLOAT2_V2DI,"float2_v2di",CONST,  float2_v2di)
 BU_VSX_2 (UNS_FLOAT2_V2DI,"uns_float2_v2di",CONST,  uns_float2_v2di)
 
+BU_VSX_2 (VEC_VSIGNED2_V2DF,  "vsigned2_v2df",CONST,  vsigned2_v2df)
+BU_VSX_2 (VEC_VUNSIGNED2_V2DF,"vunsigned2_v2df",  CONST,  vunsigned2_v2df)
+
 /* VSX abs builtin functions.  */
 BU_VSX_A (XVABSDP,   "xvabsdp",CONST,  absv2df2)
 BU_VSX_A (XVNABSDP,  "xvnabsdp",   CONST,  vsx_nabsv2df2)
@@ -1693,6 +1696,16 @@ BU_VSX_1 (DOUBLEL_V4SI,  "doublel_v4si", CONST,  
doublelv4si2)
 BU_VSX_1 (DOUBLEL_V4SF,  "doublel_v4sf", CONST,doublelv4sf2)
 BU_VSX_1 (UNS_DOUBLEL_V4SI,  "uns_doublel_v4si", CONST,unsdoublelv4si2)
 
+BU_VSX_1 (VEC_VSIGNED_V4SF,  "vsigned_v4sf", CONST,  vsx_xvcvspsxws)
+BU_VSX_1 (VEC_V

Re: Backport [PATCH,rs6000] PR80103: Fix ICE with cross compiler

2017-06-28 Thread Kelvin Nilsen

Is the attached refinement of this patch previously applied to mainline
ok for backport to gcc 6?  I have bootstrapped and tested without
regressions on powerpc64le-unknown-linux-gnu.

This patch differs from the original mainline patch in the following
regards:

 1. Certain commentary changes are omitted because the context to which
they applied is missing from GCC 6.

 2. A typo in a test case has been corrected.  The typo was discovered
during scrutiny of the backport regression testing results.  I will
momentarily submit a patch to correct the same test case on main line.

On 03/24/2017 04:14 PM, Segher Boessenkool wrote:
> On Fri, Mar 24, 2017 at 04:04:33PM -0600, Kelvin Nilsen wrote:
>> PR 80103 provides a test case which results in an internal
>> compiler error when invoked with -mno-direct-move -mpower9-dform-
>> vector target options.  The internal compiler error results because
>> these two target options are incompatible with each other.
>>
>> The enclosed patch simply disables this particular combination of
>> target options, terminating gcc with an error message instead of
>> producing an internal compiler error.  Additionally, this patch
>> includes new comments to address omissions from a patch committed
>> on 2017/03/23 which deals with conflicts between the 
>> -mno-power9-vector and -mcpu=power9 target options.
>>
>> This patch has been bootstrapped and tested with no regressions on
>> both powerpc64-unknown-linux-gnu and powerpc64le-unknown-linux-gnu.
>> Is this ok for the trunk?
> 
> This looks good, please apply.  Thanks,
> 
> 
> Segher
> 
> 

gcc/ChangeLog:

2017-06-28  Kelvin Nilsen  

Backport from mainline
2017-03-27  Kelvin Nilsen  

PR target/80103
* config/rs6000/rs6000.c (rs6000_option_override_internal): Add
special handling for target option conflicts between dform options
(-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar) and
-mno-direct-move.

gcc/testsuite/ChangeLog:

2017-06-28  Kelvin Nilsen  

Backport from mainline
2017-03-27  Kelvin Nilsen  

PR target/80103
* gcc.target/powerpc/pr80103-1.c: New test.

Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 249572)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -4295,6 +4295,33 @@ rs6000_option_override_internal (bool global_init_
| OPTION_MASK_P9_DFORM_VECTOR);
 }
 
+  if ((TARGET_P9_DFORM_SCALAR || TARGET_P9_DFORM_VECTOR)
+  && !TARGET_DIRECT_MOVE)
+{
+  /* We prefer to not mention undocumented options in
+error messages.  However, if users have managed to select
+power9-dform without selecting power9-vector, they
+already know about undocumented flags.  */
+  if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+ && ((rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_VECTOR) ||
+ (rs6000_isa_flags_explicit & OPTION_MASK_P9_DFORM_SCALAR) ||
+ (TARGET_P9_DFORM_BOTH == 1)))
+   error ("-mpower9-dform, -mpower9-dform-vector, -mpower9-dform-scalar"
+  " require -mdirect-move");
+  else if ((rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE) == 0)
+   {
+ rs6000_isa_flags |= OPTION_MASK_DIRECT_MOVE;
+ rs6000_isa_flags_explicit |= OPTION_MASK_DIRECT_MOVE;
+   }
+  else
+   {
+ rs6000_isa_flags &=
+   ~(OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
+ rs6000_isa_flags_explicit |=
+   (OPTION_MASK_P9_DFORM_SCALAR | OPTION_MASK_P9_DFORM_VECTOR);
+   }
+}
+
   if (TARGET_P9_DFORM_SCALAR && !TARGET_UPPER_REGS_DF)
 {
   /* We prefer to not mention undocumented options in
Index: gcc/testsuite/gcc.target/powerpc/pr80103-1.c
===
--- gcc/testsuite/gcc.target/powerpc/pr80103-1.c(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/pr80103-1.c(working copy)
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power9" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mpower9-dform-vector -mno-direct-move" } */
+/* { dg-excess-errors "expect error due to conflicting target options" } */
+/* Since the error message is not associated with a particular line
+   number, we cannot use the dg-error directive and cannot specify a
+   regexp to describe the expected error message.  The expected error
+   message is: "-mpower9-dform, -mpower9-dform-vector,
+-mpower9-dform-scalar require -mdirect-move" */
+
+int a;
+void b (__attribute__ ((__vector_size__ (16))) char c)
+{
+  a = ((__attribute__ ((__vector_size__ (2 * sizeof (long long) c)[0];
+}



Re: C/C++ PATCH to add __typeof_noqual (PR c/65455, c/39985)

2017-06-28 Thread Martin Sebor

On 06/28/2017 03:19 AM, Joseph Myers wrote:

On Wed, 28 Jun 2017, Martin Sebor wrote:


I.e., just having blocks to remove qualifiers of kind X is not sufficient
without "remove all qualifiers (possibly except these kinds)" as well.  I
suppose you could have __remove_quals (const volatile _Atomic, expr) and
__remove_quals_except (_Atomic, expr) or similar (with some keyword that
goes in there to mean "any address space").


Right.  My point isn't that the bigger features shouldn't exist,
but that they can and should be built on top of the primitives
and defined not in the compiler but in a header.  With
__bultin_remove_const() and __builtin_remove_volatile()
a __typeof_noqual(x) can be a macro that expands to these two,
plus any others as/if necessary, with any other additional


My point is that users should not need to update their definitions every
time another qualifier is invented.  Either you need the "remove
qualifiers except" functionality in the language, or you need a
*compiler-provided* header


Yes, that's what I'm suggesting.  It could also be a macro defined
by the driver on the command line, to avoid having to #include
a header.   But providing it as a built-in, like the lower-level
primitives, as a convenience wrapper around them, would be fine
too.


that defines "remove qualifiers except"
operations for every combination of qualifiers in that version of the
compiler (because "remove qualifiers except _Atomic" and "remove
qualifiers except address spaces" cannot be composed into "remove
qualifiers except _Atomic and address spaces, and "remove const, volatile,
restrict and address spaces" is not something user code should need to
hardcode when the intent is "remove qualifiers except _Atomic").


I'm not sure I understand what you're trying to say.  I thought
we agreed earlier that __typeof_noqual__ would remove all type
qualifiers.

If you are saying that there are qualifiers that may need to be
removed for some use cases but kept for others (I mentioned one:
removing one or the other qualifier from const _Atomic T*, but
not both) then that's exactly why I argue for providing lower-
level primitives targeted at each of the qualifiers separately.
Having __typeof_noqual__ remove some but not all qualifiers would
be surprising to say the least.

Martin



Re: [PATCH] multiarch support for non-glibc linux systems

2017-06-28 Thread Jeff Law
On 06/28/2017 03:33 AM, Matthias Klose wrote:
> On 07.06.2017 19:22, Szabolcs Nagy wrote:
>> Current multiarch directory name is always *-linux-gnu* on linux,
>> this patch configures different names for uclibc and musl targets.
>> (tested by the debian rebootstrap scripts for various *-linux-musl
>> and *-linux-uclibc targets see debian bug #861588)
>>
>> gcc/
>> 2017-06-07  Szabolcs Nagy  
>>
>>  * config.gcc (*-linux-musl*): Add t-musl tmake_file.
>>  (*-linux-uclibc*): Add t-uclibc tmake_file.
>>  * config/t-musl: New.
>>  * config/t-uclibc: New.
> 
> I can't formally approve that, but this looks ok.
The patch is OK for the trunk.

I must have inadvertently removed it when walking through my pending
patches last week -- the downside of ignoring everything for a month is
it's much easier for something to fall through the cracks.

jeff


Re: [PATCH, GCC/ARM] Remove ARMv8-M code for D17-D31

2017-06-28 Thread Richard Earnshaw (lists)
On 20/06/17 16:01, Thomas Preudhomme wrote:
> Hi,
> 
> Function cmse_nonsecure_entry_clear_before_return has code to deal with
> high VFP register (D16-D31) while ARMv8-M Baseline and Mainline both do
> not support more than 16 double VFP registers (D0-D15). This makes this
> security-sensitive code harder to read for not much benefit since
> libcall for cmse_nonsecure_call functions do not deal with those high
> VFP registers anyway.
> 
> This commit gets rid of this code for simplicity and fixes 2 issues in
> the same function:
> 
> - stop the first loop when reaching maxregno to avoid dealing with VFP
>   registers if targetting Thumb-1 or using -mfloat-abi=soft
> - include maxregno in that loop
> 

This is silently baking in dangerous assumptions about GCC's internal
numbering of the registers.  That's not a good idea from a long-term
portability perspective.

At the very least you need to assert that all the interesting registers
are numbered in the range 0..63; but ideally the code should just handle
pretty much any assignment of internal register numbers.

Did you consider using sbitmaps rather than doing all the multi-word
stuff by steam?

R.

> ChangeLog entry is as follows:
> 
> *** gcc/ChangeLog ***
> 
> 2017-06-13  Thomas Preud'homme  
> 
> * config/arm/arm.c (arm_option_override): Forbid ARMv8-M Security
> Extensions with more than 16 double VFP registers.
> (cmse_nonsecure_entry_clear_before_return): Remove second entry of
> to_clear_mask and all code related to it and make the remaining
> entry a 64-bit scalar integer variable and adapt code accordingly.
> 
> Testing: Testsuite shows no regression when run for ARMv8-M Baseline and
> ARMv8-M Mainline.
> 
> Is this ok for trunk?
> 
> Best regards,
> 
> Thomas
> 
> remove_d16-d31_armv8m_clearing_code.patch
> 
> 
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 
> 259597d8890ee84c5bd92b12b6f9f6521c8dcd2e..60a4d1f46765d285de469f51fbb5a0ad76d56d9b
>  100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -3620,6 +3620,11 @@ arm_option_override (void)
>if (use_cmse && !arm_arch_cmse)
>  error ("target CPU does not support ARMv8-M Security Extensions");
>  
> +  /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
> + and ARMv8-M Baseline and Mainline do not allow such configuration.  */
> +  if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
> +error ("ARMv8-M Security Extensions incompatible with selected FPU");
> +
>/* Disable scheduling fusion by default if it's not armv7 processor
>   or doesn't prefer ldrd/strd.  */
>if (flag_schedule_fusion == 2
> @@ -24996,15 +25001,15 @@ thumb1_expand_prologue (void)
>  void
>  cmse_nonsecure_entry_clear_before_return (void)
>  {
> -  uint64_t to_clear_mask[2];
> +  uint64_t to_clear_mask;
>uint32_t padding_bits_to_clear = 0;
>uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
>int regno, maxregno = IP_REGNUM;
>tree result_type;
>rtx result_rtl;
>  
> -  to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
> -  to_clear_mask[0] |= (1ULL << IP_REGNUM);
> +  to_clear_mask = (1ULL << (NUM_ARG_REGS)) - 1;
> +  to_clear_mask |= (1ULL << IP_REGNUM);
>  
>/* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
>   registers.  We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
> @@ -25015,23 +25020,22 @@ cmse_nonsecure_entry_clear_before_return (void)
>maxregno = LAST_VFP_REGNUM;
>  
>float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
> -  to_clear_mask[0] |= float_mask;
> -
> -  float_mask = (1ULL << (maxregno - 63)) - 1;
> -  to_clear_mask[1] = float_mask;
> +  to_clear_mask |= float_mask;
>  
>/* Make sure we don't clear the two scratch registers used to clear the
>relevant FPSCR bits in output_return_instruction.  */
>emit_use (gen_rtx_REG (SImode, IP_REGNUM));
> -  to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
> +  to_clear_mask &= ~(1ULL << IP_REGNUM);
>emit_use (gen_rtx_REG (SImode, 4));
> -  to_clear_mask[0] &= ~(1ULL << 4);
> +  to_clear_mask &= ~(1ULL << 4);
>  }
>  
> +  gcc_assert ((unsigned) maxregno <= sizeof (to_clear_mask) * __CHAR_BIT__);
> +
>/* If the user has defined registers to be caller saved, these are no 
> longer
>   restored by the function before returning and must thus be cleared for
>   security purposes.  */
> -  for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
> +  for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
>  {
>/* We do not touch registers that can be used to pass arguments as per
>the AAPCS, since these should never be made callee-saved by user
> @@ -25041,7 +25045,7 @@ cmse_nonsecure_entry_clear_before_return (void)
>if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
>   continue;
>if (call_used_regs[regno])
> - to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
> 

Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Bin.Cheng
On Wed, Jun 28, 2017 at 3:04 PM, Richard Sandiford
 wrote:
> "Bin.Cheng"  writes:
>> On Wed, Jun 28, 2017 at 2:36 PM, Richard Sandiford
>>  wrote:
>>> Index: gcc/tree-data-ref.c
>>> ===
>>> --- gcc/tree-data-ref.c 2017-06-28 14:33:41.294720044 +0100
>>> +++ gcc/tree-data-ref.c 2017-06-28 14:35:30.475155670 +0100
>>> @@ -749,15 +749,29 @@ canonicalize_base_object_address (tree a
>>>return build_fold_addr_expr (TREE_OPERAND (addr, 0));
>>>  }
>>>
>>> -/* Analyzes the behavior of the memory reference DR in the innermost loop 
>>> or
>>> -   basic block that contains it.  Returns true if analysis succeed or false
>>> -   otherwise.  */
>>> +/* Analyze the behavior of memory reference DR.  There are two modes:
>>> +
>>> +   - BB analysis.  In this case we simply split the address into base,
>>> + init and offset components, without reference to any containing loop.
>>> + The resulting base and offset are general expressions and they can
>>> + vary arbitrarily from one iteration of the containing loop to the 
>>> next.
>>> + The step is always zero.
>>> +
>>> +   - loop analysis.  In this case we analyze the reference both wrt LOOP
>>> + and on the basis that the reference occurs (is "used") in LOOP;
>>> + see the comment above analyze_scalar_evolution_in_loop for more
>>> + information about this distinction.  The base, init, offset and
>>> + step fields are all invariant in LOOP.
>>> +
>>> +   Perform BB analysis if LOOP is null, or if LOOP is the function's
>>> +   dummy outermost loop.  In other cases perform loop analysis.
>>> +
>>> +   Return true if the analysis succeeded and store the results in DR if so.
>>> +   BB analysis can only fail for bitfield or reversed-storage accesses.  */
>>>
>>>  bool
>>> -dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
>>> +dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
>>>  {
>>> -  gimple *stmt = DR_STMT (dr);
>>> -  struct loop *loop = loop_containing_stmt (stmt);
>>>tree ref = DR_REF (dr);
>>>HOST_WIDE_INT pbitsize, pbitpos;
>>>tree base, poffset;
>>> @@ -806,22 +820,11 @@ dr_analyze_innermost (struct data_refere
>>>
>>>if (in_loop)
>> A nit.  No need to use bool variable now?  We can check loop != NULL instead.
>
> As the comment says, we also want to do BB analysis if the loop is the
> outermost dummy loop, so in_loop remains set to:
>
>   bool in_loop = (loop && loop->num);
>
> I think it's worth keeping that abstraction.
Sorry if I misunderstand this.   "loop != NULL" is different to
in_loop only when loop is the outermost dummy loop.  Which means nest
!= NULL based on change:
-  dr_analyze_innermost (dr, nest);
+  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);

I wonder if it's possible for caller to pass non-NULL nest when
create_data_ref for a statement in outermost dummy loop?  Or should
it?

Also noticed that in vect_analyze_data_refs, there is below code:

  /* If target supports vector gather loads or scatter stores, or if
 this might be a SIMD lane access, see if they can't be used.  */
  if (is_a  (vinfo)
  && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
  && !nested_in_vect_loop_p (loop, stmt))
{
  struct data_reference *newdr
= create_data_ref (NULL, loop_containing_stmt (stmt),
   DR_REF (dr), stmt, maybe_scatter ? false : true);

we have nest==NULL and loop is the inner loop, but after change NULL
is passed to dr_analyze_innermost because of:

-  dr_analyze_innermost (dr, nest);
+  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);

Is this the reason that bb-slp-pr65935.c gets vectorized after change?
 simple_iv should return {&s->phase[dir], 0} successfully though.
Question is what would happen if simple_iv succeeds with non-ZERO step
when called with nest==NULL?  The patch skips simple_iv and forces
ZERO step?

Thanks,
bin
>
> Thanks,
> Richard
>


Re: [PATCH][RFC] Canonize names of attributes.

2017-06-28 Thread Joseph Myers
On Wed, 28 Jun 2017, Martin Liška wrote:

> On 06/14/2017 07:24 PM, Jason Merrill wrote:
> > On Tue, Jun 13, 2017 at 8:32 AM, Martin Liška  wrote:
> >> (canonize_attr_name): New function.
> > 
> > I think this should be "canonicalize"; "canonize" means something else.
> > 
> > Jason
> > 
> 
> Yes, I hope it's a cosmetic problem. In general, do you welcome the change
> to canonicalize attribute names?

I think *names* (as opposed to arguments) should be canonicalized, but 
arguments need separate consideration.

-- 
Joseph S. Myers
jos...@codesourcery.com

Re: C/C++ PATCH to add __typeof_noqual (PR c/65455, c/39985)

2017-06-28 Thread Joseph Myers
On Wed, 28 Jun 2017, Martin Sebor wrote:

> > that defines "remove qualifiers except"
> > operations for every combination of qualifiers in that version of the
> > compiler (because "remove qualifiers except _Atomic" and "remove
> > qualifiers except address spaces" cannot be composed into "remove
> > qualifiers except _Atomic and address spaces, and "remove const, volatile,
> > restrict and address spaces" is not something user code should need to
> > hardcode when the intent is "remove qualifiers except _Atomic").
> 
> I'm not sure I understand what you're trying to say.  I thought
> we agreed earlier that __typeof_noqual__ would remove all type
> qualifiers.

Yes, it should.

What I'm saying is: if you provide an interface that enables users to 
write code that means "remove const" and will continue to mean exactly 
that with future GCC versions, even when those versions have additional 
qualifiers, you should also provide an interface that enables users to 
write code that means "remove all qualifiers except _Atomic and address 
spaces" (and likewise for any other combination) and will continue to mean 
exactly that with future GCC versions, even when those versions have 
additional qualifiers.  Allowing users to write "remove const, volatile 
and restrict" is *not* the same, as the meanings would diverge when future 
qualifiers are added.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Richard Sandiford
"Bin.Cheng"  writes:
> On Wed, Jun 28, 2017 at 3:04 PM, Richard Sandiford
>  wrote:
>> "Bin.Cheng"  writes:
>>> On Wed, Jun 28, 2017 at 2:36 PM, Richard Sandiford
>>>  wrote:
 Index: gcc/tree-data-ref.c
 ===
 --- gcc/tree-data-ref.c 2017-06-28 14:33:41.294720044 +0100
 +++ gcc/tree-data-ref.c 2017-06-28 14:35:30.475155670 +0100
 @@ -749,15 +749,29 @@ canonicalize_base_object_address (tree a
return build_fold_addr_expr (TREE_OPERAND (addr, 0));
  }

 -/* Analyzes the behavior of the memory reference DR in the innermost loop 
 or
 -   basic block that contains it.  Returns true if analysis succeed or 
 false
 -   otherwise.  */
 +/* Analyze the behavior of memory reference DR.  There are two modes:
 +
 +   - BB analysis.  In this case we simply split the address into base,
 + init and offset components, without reference to any containing loop.
 + The resulting base and offset are general expressions and they can
 + vary arbitrarily from one iteration of the containing loop to the 
 next.
 + The step is always zero.
 +
 +   - loop analysis.  In this case we analyze the reference both wrt LOOP
 + and on the basis that the reference occurs (is "used") in LOOP;
 + see the comment above analyze_scalar_evolution_in_loop for more
 + information about this distinction.  The base, init, offset and
 + step fields are all invariant in LOOP.
 +
 +   Perform BB analysis if LOOP is null, or if LOOP is the function's
 +   dummy outermost loop.  In other cases perform loop analysis.
 +
 +   Return true if the analysis succeeded and store the results in DR if 
 so.
 +   BB analysis can only fail for bitfield or reversed-storage accesses.  
 */

  bool
 -dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
 +dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
  {
 -  gimple *stmt = DR_STMT (dr);
 -  struct loop *loop = loop_containing_stmt (stmt);
tree ref = DR_REF (dr);
HOST_WIDE_INT pbitsize, pbitpos;
tree base, poffset;
 @@ -806,22 +820,11 @@ dr_analyze_innermost (struct data_refere

if (in_loop)
>>> A nit.  No need to use bool variable now?  We can check loop != NULL 
>>> instead.
>>
>> As the comment says, we also want to do BB analysis if the loop is the
>> outermost dummy loop, so in_loop remains set to:
>>
>>   bool in_loop = (loop && loop->num);
>>
>> I think it's worth keeping that abstraction.
> Sorry if I misunderstand this.   "loop != NULL" is different to
> in_loop only when loop is the outermost dummy loop.  Which means nest
> != NULL based on change:
> -  dr_analyze_innermost (dr, nest);
> +  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
>
> I wonder if it's possible for caller to pass non-NULL nest when
> create_data_ref for a statement in outermost dummy loop?  Or should
> it?

Yeah, it (intentionally) doesn't change whether in_loop is true for
calls via create_data_ref.  But the interface is useful for direct
calls to dr_analyze_innermost.  Currently that's just predcom,
but the later patch adds another in the vectoriser.

> Also noticed that in vect_analyze_data_refs, there is below code:
>
>   /* If target supports vector gather loads or scatter stores, or if
>  this might be a SIMD lane access, see if they can't be used.  */
>   if (is_a  (vinfo)
>   && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
>   && !nested_in_vect_loop_p (loop, stmt))
> {
>   struct data_reference *newdr
> = create_data_ref (NULL, loop_containing_stmt (stmt),
>DR_REF (dr), stmt, maybe_scatter ? false : true);
>
> we have nest==NULL and loop is the inner loop, but after change NULL
> is passed to dr_analyze_innermost because of:
>
> -  dr_analyze_innermost (dr, nest);
> +  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
>
> Is this the reason that bb-slp-pr65935.c gets vectorized after change?
>  simple_iv should return {&s->phase[dir], 0} successfully though.

No, bb-slp-pr65935.c is testing BB SLP, so we wouldn't have a
loop_vec_info.

> Question is what would happen if simple_iv succeeds with non-ZERO step
> when called with nest==NULL?  The patch skips simple_iv and forces
> ZERO step?

Yeah, I mentioned that in the covering note:

  The handling seemed strange though.  If the DR was part of a loop,
  we still tried to express the base and offset values as IVs, potentially
  giving a nonzero step.  If that failed for any reason, we'd revert to
  using the original base and offset, just as we would if we hadn't asked
  for an IV in the first place.

And like I say, the patch that introduced that behaviour didn't seem
to rely on it.

Thanks,
Richard


Re: C/C++ PATCH to add __typeof_noqual (PR c/65455, c/39985)

2017-06-28 Thread Martin Sebor

On 06/28/2017 10:09 AM, Joseph Myers wrote:

On Wed, 28 Jun 2017, Martin Sebor wrote:


that defines "remove qualifiers except"
operations for every combination of qualifiers in that version of the
compiler (because "remove qualifiers except _Atomic" and "remove
qualifiers except address spaces" cannot be composed into "remove
qualifiers except _Atomic and address spaces, and "remove const, volatile,
restrict and address spaces" is not something user code should need to
hardcode when the intent is "remove qualifiers except _Atomic").


I'm not sure I understand what you're trying to say.  I thought
we agreed earlier that __typeof_noqual__ would remove all type
qualifiers.


Yes, it should.

What I'm saying is: if you provide an interface that enables users to
write code that means "remove const" and will continue to mean exactly
that with future GCC versions, even when those versions have additional
qualifiers, you should also provide an interface that enables users to
write code that means "remove all qualifiers except _Atomic and address
spaces" (and likewise for any other combination) and will continue to mean
exactly that with future GCC versions, even when those versions have
additional qualifiers.  Allowing users to write "remove const, volatile
and restrict" is *not* the same, as the meanings would diverge when future
qualifiers are added.


I see.  That seems reasonable, though more advanced than what
I was envisioning (or what has been requested in either of the
two bugs in the subject).

I don't think there is an equivalent, dedicated trait in C++ to
do that either.  One would have to be composed of the lower-level
ones.  There also is no trait that would remove all type qualifiers
(including extensions), or even traits for querying extensions.
So (AFAIK) there is no way in C++ to do exactly what you described.
That of course doesn't mean that it doesn't make sense, just that
it's too advanced even for C++ despite its highly evolved support
for generic programming.  I would suggest that although providing
something like it would be nice, its absence shouldn't stand in
the way of defining the more limited interfaces first.

Martin


Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Bin.Cheng
On Wed, Jun 28, 2017 at 5:56 PM, Richard Sandiford
 wrote:
> "Bin.Cheng"  writes:
>> On Wed, Jun 28, 2017 at 3:04 PM, Richard Sandiford
>>  wrote:
>>> "Bin.Cheng"  writes:
 On Wed, Jun 28, 2017 at 2:36 PM, Richard Sandiford
  wrote:
> Index: gcc/tree-data-ref.c
> ===
> --- gcc/tree-data-ref.c 2017-06-28 14:33:41.294720044 +0100
> +++ gcc/tree-data-ref.c 2017-06-28 14:35:30.475155670 +0100
> @@ -749,15 +749,29 @@ canonicalize_base_object_address (tree a
>return build_fold_addr_expr (TREE_OPERAND (addr, 0));
>  }
>
> -/* Analyzes the behavior of the memory reference DR in the innermost 
> loop or
> -   basic block that contains it.  Returns true if analysis succeed or 
> false
> -   otherwise.  */
> +/* Analyze the behavior of memory reference DR.  There are two modes:
> +
> +   - BB analysis.  In this case we simply split the address into base,
> + init and offset components, without reference to any containing 
> loop.
> + The resulting base and offset are general expressions and they can
> + vary arbitrarily from one iteration of the containing loop to the 
> next.
> + The step is always zero.
> +
> +   - loop analysis.  In this case we analyze the reference both wrt LOOP
> + and on the basis that the reference occurs (is "used") in LOOP;
> + see the comment above analyze_scalar_evolution_in_loop for more
> + information about this distinction.  The base, init, offset and
> + step fields are all invariant in LOOP.
> +
> +   Perform BB analysis if LOOP is null, or if LOOP is the function's
> +   dummy outermost loop.  In other cases perform loop analysis.
> +
> +   Return true if the analysis succeeded and store the results in DR if 
> so.
> +   BB analysis can only fail for bitfield or reversed-storage accesses.  
> */
>
>  bool
> -dr_analyze_innermost (struct data_reference *dr, struct loop *nest)
> +dr_analyze_innermost (struct data_reference *dr, struct loop *loop)
>  {
> -  gimple *stmt = DR_STMT (dr);
> -  struct loop *loop = loop_containing_stmt (stmt);
>tree ref = DR_REF (dr);
>HOST_WIDE_INT pbitsize, pbitpos;
>tree base, poffset;
> @@ -806,22 +820,11 @@ dr_analyze_innermost (struct data_refere
>
>if (in_loop)
 A nit.  No need to use bool variable now?  We can check loop != NULL 
 instead.
>>>
>>> As the comment says, we also want to do BB analysis if the loop is the
>>> outermost dummy loop, so in_loop remains set to:
>>>
>>>   bool in_loop = (loop && loop->num);
>>>
>>> I think it's worth keeping that abstraction.
>> Sorry if I misunderstand this.   "loop != NULL" is different to
>> in_loop only when loop is the outermost dummy loop.  Which means nest
>> != NULL based on change:
>> -  dr_analyze_innermost (dr, nest);
>> +  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
>>
>> I wonder if it's possible for caller to pass non-NULL nest when
>> create_data_ref for a statement in outermost dummy loop?  Or should
>> it?
>
> Yeah, it (intentionally) doesn't change whether in_loop is true for
> calls via create_data_ref.  But the interface is useful for direct
> calls to dr_analyze_innermost.  Currently that's just predcom,
> but the later patch adds another in the vectoriser.
>
>> Also noticed that in vect_analyze_data_refs, there is below code:
>>
>>   /* If target supports vector gather loads or scatter stores, or if
>>  this might be a SIMD lane access, see if they can't be used.  */
>>   if (is_a  (vinfo)
>>   && (maybe_gather || maybe_scatter || maybe_simd_lane_access)
>>   && !nested_in_vect_loop_p (loop, stmt))
>> {
>>   struct data_reference *newdr
>> = create_data_ref (NULL, loop_containing_stmt (stmt),
>>DR_REF (dr), stmt, maybe_scatter ? false : true);
>>
>> we have nest==NULL and loop is the inner loop, but after change NULL
>> is passed to dr_analyze_innermost because of:
>>
>> -  dr_analyze_innermost (dr, nest);
>> +  dr_analyze_innermost (dr, nest != NULL ? loop : NULL);
>>
>> Is this the reason that bb-slp-pr65935.c gets vectorized after change?
>>  simple_iv should return {&s->phase[dir], 0} successfully though.
>
> No, bb-slp-pr65935.c is testing BB SLP, so we wouldn't have a
> loop_vec_info.
>
>> Question is what would happen if simple_iv succeeds with non-ZERO step
>> when called with nest==NULL?  The patch skips simple_iv and forces
>> ZERO step?
>
> Yeah, I mentioned that in the covering note:
>
>   The handling seemed strange though.  If the DR was part of a loop,
>   we still tried to express the base and offset values as IVs, potentially
>   giving a nonzero step.  If that failed for any reason, we'd revert to
>   using the original base and offset, just as we would if we ha

Re: [0/67] Add wrapper classes for machine_modes

2017-06-28 Thread Jeff Law
On 05/24/2017 08:27 AM, Richard Sandiford wrote:
> Jeff Law  writes:
>> On 12/09/2016 05:48 AM, Richard Sandiford wrote:
>>> This series includes most of the changes in group C from:
>>>
>>>  https://gcc.gnu.org/ml/gcc/2016-11/msg00033.html
>>>
>>> The idea is to add wrapper classes around machine_mode_enum
>>> for specific groups of modes, such as scalar integers, scalar floats,
>>> complex values, etc.  This has two main benefits: one specific to SVE
>>> and one not.
>>>
>>> The SVE-specific benefit is that it helps to introduce the concept
>>> of variable-length vectors.  To do that we need to change the size
>>> of a vector mode from being a known compile-time constant to being
>>> (possibly) a run-time invariant.  We then need to do the same for
>>> unconstrained machine_modes, which might or might not be vectors.
>>> Introducing these new constrained types means that we can continue
>>> to treat them as having a constant size.
>>>
>>> The other benefit is that it uses static type checking to enforce
>>> conditions that are easily forgotten otherwise.  The most common
>>> sources of problems seem to be:
>>>
>>> (a) using VOIDmode or BLKmode where a scalar integer was expected
>>>  (e.g. when getting the number of bits in the value).
>>>
>>> (b) simplifying vector operations in ways that only make sense for
>>>  scalars.
>>>
>>> The series helps with both of these, although we don't get the full
>>> benefit of (b) until variable-sized modes are introduced.
>>>
>>> I know of three specific cases in which the static type checking
>>> forced fixes for things that turned out to be real bugs (although
>>> we didn't know that at the time, otherwise we'd have posted patches).
>>> They were later fixed for trunk by:
>>>
>>>https://gcc.gnu.org/ml/gcc-patches/2016-07/msg01783.html
>>>https://gcc.gnu.org/ml/gcc-patches/2016-11/msg02983.html
>>>https://gcc.gnu.org/ml/gcc-patches/2016-11/msg02896.html
>>>
>>> The group C patches in ARM/sve-branch did slow compile time down a little.
>>> I've since taken steps to avoid that:
>>>
>>> - Make the tailcall pass handle aggregate parameters and return values
>>>(already in trunk).
>>>
>>> - Turn some of the new wrapper functions into inline functions.
>>>
>>> - Make all the machmode.h macros that used:
>>>
>>>  __builtin_constant_p (M) ? foo_inline (M) : foo_array[M[
>>>
>>>forward to an ALWAYS_INLINE function, so that (a) M is only evaluated
>>>once and (b) __builtin_constant_p is applied to a variable, and so is
>>>deferred until later passes.  This helped the optimisation to fire in
>>>more cases and to continue firing when M is a class rather than a
>>>raw enum.
>>>
>>> - In a similar vein, make sure that conditions like:
>>>
>>>   SImode == DImode
>>>
>>>are treated as builtin_constant_p by gencondmd, so that .md patterns
>>>with those conditions are dropped.
>>>
>>> With these changes the series is actually a very slight compile-time win.
>>> That might seem unlikely, but there are several possible reasons:
>>>
>>> 1. The machmode.h macro change above might allow more constant folding.
>>>
>>> 2. The series has a tendency to evaluate modes once, rather than
>>> continually fetching them from (sometimes quite deep) rtx nests.
>>> Refetching a mode is a particular problem if call comes between
>>> two uses, since the compiler then has to re-evaluate the whole thing.
>>>
>>> 3. The series introduces many uses of new SCALAR_*TYPE_MODE macros,
>>> as alternatives to TYPE_MODE.  The new macros avoid the usual:
>>>
>>>   (VECTOR_TYPE_P (TYPE_CHECK (NODE)) \
>>>? vector_type_mode (NODE) : (NODE)->type_common.mode)
>>>
>>> and become direct field accesses in release builds.
>>>
>>> VECTOR_TYPE_P would be consistently false for these uses,
>>> but call-clobbered registers would usually be treated as clobbered
>>> by the condition as a whole.
>>>
>>> Maybe (3) is the most likely reason.
>>>
>>> I tested this by compiling the testsuite for:
>>>
>>>  aarch64-linux-gnu alpha-linux-gnu arc-elf arm-linux-gnueabi
>>>  arm-linux-gnueabihf avr-elf bfin-elf c6x-elf cr16-elf cris-elf
>>>  epiphany-elf fr30-elf frv-linux-gnu ft32-elf h8300-elf
>>>  hppa64-hp-hpux11.23 ia64-linux-gnu i686-pc-linux-gnu
>>>  i686-apple-darwin iq2000-elf lm32-elf m32c-elf m32r-elf
>>>  m68k-linux-gnu mcore-elf microblaze-elf mips-linux-gnu
>>>  mipsisa64-linux-gnu mmix mn10300-elf moxie-rtems msp430-elf
>>>  nds32le-elf nios2-linux-gnu nvptx-none pdp11 powerpc-linux-gnuspe
>>>  powerpc-eabispe powerpc64-linux-gnu powerpc-ibm-aix7.0 rl78-elf
>>>  rx-elf s390-linux-gnu s390x-linux-gnu sh-linux-gnu sparc-linux-gnu
>>>  sparc64-linux-gnu sparc-wrs-vxworks spu-elf tilegx-elf tilepro-elf
>>>  xstormy16-elf v850-elf vax-netbsdelf visium-elf x86_64-darwin
>>>  x86_64-linux-gnu xtensa-elf
>>>
>>> and checking that there were no changes in assembly.  Also teste

Re: [PATCH] PR target/80556

2017-06-28 Thread Jeff Law
On 06/09/2017 07:57 AM, Simon Wright wrote:
> 2017-06-09 Simon Wright 
> 
> PR target/80556
> * configure.ac (stage1_ldflags): For Darwin, include -lSystem.
>   (poststage1_ldflags): likewise.
> * configure: regenerated.
I'm a bit confused here.  Isn't -lSystem included in darwin's LIB_SPEC
in which case the right things ought to already be happening, shouldn't it?

Jeff


Re: C/C++ PATCH to add __typeof_noqual (PR c/65455, c/39985)

2017-06-28 Thread Joseph Myers
On Wed, 28 Jun 2017, Martin Sebor wrote:

> I don't think there is an equivalent, dedicated trait in C++ to
> do that either.  One would have to be composed of the lower-level
> ones.  There also is no trait that would remove all type qualifiers
> (including extensions), or even traits for querying extensions.
> So (AFAIK) there is no way in C++ to do exactly what you described.
> That of course doesn't mean that it doesn't make sense, just that
> it's too advanced even for C++ despite its highly evolved support
> for generic programming.  I would suggest that although providing
> something like it would be nice, its absence shouldn't stand in
> the way of defining the more limited interfaces first.

Does (standard) C++ have any of restrict, _Atomic or address spaces, which 
are what indicate doing more than simply things for const and volatile?

The more limited interfaces could, of course, be __typeof_noqual in some 
form.

-- 
Joseph S. Myers
jos...@codesourcery.com


[PATCH, rs6000] builtins-3-vec_reve-runnable fix minimum platform

2017-06-28 Thread Carl Love
GCC Maintainers:

The vec_reve builtin test builtins-3-vec_reve-runnable did not have a
minimum Power processor specified.  The thought was the instructions for
the builtin were available on all the older processors.  Unfortunately,
it turns out the builtin does require vsx hardware (-mvsx option).  Bill
Schmidt said that the minimum platform that the builtins need to be
supported on is Power 7.  The following patch sets the minimum platform
as Power 7.

Please let me know if the following patch is acceptable.  Thanks.

Carl Love

--
>From 5b9a8b9d654f243345189247818196f4757e Mon Sep 17 00:00:00 2001
From: Carl Love 
Date: Wed, 28 Jun 2017 12:56:18 -0500
Subject: [PATCH] builtins-3-vec_reve-runnable fix minimum platform

gcc/ChangeLog:

2017-06-28  Carl Love  

* gcc.target/powerpc/builtins-3-vec_reve-runnable.c (dg-options,
dg-skip-if): add mcpu=power7
---
 gcc/testsuite/gcc.target/powerpc/builtins-3-vec_reve-runnable.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-3-vec_reve-runnable.c 
b/gcc/testsuite/gcc.target/powerpc/builtins-3-vec_reve-runnable.c
index 9c05de0..f7c3c3d 100644
--- a/gcc/testsuite/gcc.target/powerpc/builtins-3-vec_reve-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-3-vec_reve-runnable.c
@@ -1,6 +1,6 @@
 /* { dg-do run { target { powerpc*-*-linux* } } } */
-/* { dg-options "-O2" } */
-
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power7" } } */
+/* { dg-options "-O2 -mvsx -mcpu=power7" } */
 
 #include  // vector
 
-- 
1.9.1







[PATCH], PowerPC target_clones minor support

2017-06-28 Thread Michael Meissner
Some minor changes to the PowerPC target_clones support:

1) I added a warning if target_clones was used and the compiler whas configured
with an older glibc where __builtin_cpu_supports always returns 0;

2) I reworked how the ifunc resolver function is generated, and always made it
a static function;

3) I added an executable target_clones test, and I made both clone tests
dependent on GCC being configured with a new glibc.

I have done a full bootstrap and make check test on a little endian power8
system, and there were no regressions.  I did the bootstrap using the Advance
Toolchain 10.0-3 libraries, and verified that the 2 clone tests were run.
Are these patches ok to apply to the trunk?

[gcc]
2017-06-28  Michael Meissner  

* config/rs6000/rs6000.c
(rs6000_get_function_versions_dispatcher): Add warning if the
compiler is not configured to use at least GLIBC version 2.23.
(make_resolver_func): Make resolver function private and not a
COMDAT function.  Create the name with clone_function_name instead
of make_unique_name.

[gcc/testsuite]
2017-06-28  Michael Meissner  

* gcc.target/powerpc/clone2.c: New runtime test for
target_clones.
* gcc.target/powerpc/clone1.c: Add check to make sure the
__builtin_cpu_supports function is fully supported.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797
Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/config/rs6000)
(revision 249737)
+++ gcc/config/rs6000/rs6000.c  (.../gcc/config/rs6000) (working copy)
@@ -37266,6 +37266,11 @@ rs6000_get_function_versions_dispatcher 
 
   default_node = default_version_info->this_node;
 
+#ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
+  warning_at (DECL_SOURCE_LOCATION (default_node->decl), 0,
+ "target_clone needs at least glibc 2.23");
+#endif
+
   if (targetm.has_ifunc_p ())
 {
   struct cgraph_function_version_info *it_v = NULL;
@@ -37311,29 +37316,24 @@ make_resolver_func (const tree default_d
const tree dispatch_decl,
basic_block *empty_bb)
 {
-  /* IFUNC's have to be globally visible.  So, if the default_decl is
- not, then the name of the IFUNC should be made unique.  */
-  bool is_uniq = (TREE_PUBLIC (default_decl) == 0);
-
   /* Append the filename to the resolver function if the versions are
  not externally visible.  This is because the resolver function has
  to be externally visible for the loader to find it.  So, appending
  the filename will prevent conflicts with a resolver function from
  another module which is based on the same version name.  */
-  char *resolver_name = make_unique_name (default_decl, "resolver", is_uniq);
+  tree decl_name = clone_function_name (default_decl, "resolver");
+  const char *resolver_name = IDENTIFIER_POINTER (decl_name);
 
   /* The resolver function should return a (void *).  */
   tree type = build_function_type_list (ptr_type_node, NULL_TREE);
   tree decl = build_fn_decl (resolver_name, type);
-  tree decl_name = get_identifier (resolver_name);
   SET_DECL_ASSEMBLER_NAME (decl, decl_name);
 
   DECL_NAME (decl) = decl_name;
   TREE_USED (decl) = 1;
   DECL_ARTIFICIAL (decl) = 1;
   DECL_IGNORED_P (decl) = 0;
-  /* IFUNC resolvers have to be externally visible.  */
-  TREE_PUBLIC (decl) = 1;
+  TREE_PUBLIC (decl) = 0;
   DECL_UNINLINABLE (decl) = 1;
 
   /* Resolver is not external, body is generated.  */
@@ -37344,15 +37344,6 @@ make_resolver_func (const tree default_d
   DECL_INITIAL (decl) = make_node (BLOCK);
   DECL_STATIC_CONSTRUCTOR (decl) = 0;
 
-  if (DECL_COMDAT_GROUP (default_decl) || TREE_PUBLIC (default_decl))
-{
-  /* In this case, each translation unit with a call to this
-versioned function will put out a resolver.  Ensure it
-is comdat to keep just one copy.  */
-  DECL_COMDAT (decl) = 1;
-  make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
-}
-
   /* Build result decl and add to function_decl.  */
   tree t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, 
ptr_type_node);
   DECL_ARTIFICIAL (t) = 1;
@@ -37374,7 +37365,7 @@ make_resolver_func (const tree default_d
 = make_attribute ("ifunc", resolver_name, DECL_ATTRIBUTES (dispatch_decl));
 
   cgraph_node::create_same_body_alias (dispatch_decl, decl);
-  XDELETEVEC (resolver_name);
+
   return decl;
 }
 
Index: gcc/testsuite/gcc.target/powerpc/clone2.c
===
--- gcc/testsuite/gcc.target/powerpc/clone2.c   
(.../svn+ssh://meiss...@gcc.gnu.org/svn/gcc/trunk/gcc/testsuite/gcc.target/powerpc)
 (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/clone2.c   
(.../gcc/testsuite/gcc.target/powerpc)  (revision

Re: [PATCH], Add check ppc_cpu_supports_hw to testsuite

2017-06-28 Thread Michael Meissner
On Wed, Jun 28, 2017 at 09:58:40AM -0500, Peter Bergner wrote:
> On 6/27/17 6:53 PM, Michael Meissner wrote:
> > This adds a target supports option in dejagnu so that future tests can use 
> > this
> > to determine whether or not to test target_clones.
> 
> I like the idea, but some comments...
> 
> 
> 
> > +   #ifdef __MACH__
> > + asm volatile ("xxlor vs0,vs0,vs0");
> > +   #else
> > + asm volatile ("xxlor 0,0,0");
> > +   #endif
> 
> What is this hunk for?  We're only interested in the return value from
> the builtin below, correct?
>
> 
> 
> > + if (!__builtin_cpu_supports ("vsx"))
> > +   return 1;
> > + return 0;
> 
> ...and more importantly, why limit us to testing "vsx"?  Why not test
> for "ppc32", which should be true for *all* kernels we'd ever run on,
> including ppc32, ppc64 and ppc64le?

I wanted to make sure the test actually returned true in the correct test.
I.e. guarantee that the machine had VSX instructions (i.e. the asm) and that
__builtin_cpu_supports returned true.

But if you wanted to use a previous ISA flag, feel free to submit patches.  I
just wanted a quick way to make sure the clone tests were run properly when GCC
was configured with a 2.23 GLIBC or newer.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH][RFC] Canonize names of attributes.

2017-06-28 Thread Jason Merrill
On Wed, Jun 28, 2017 at 12:05 PM, Joseph Myers  wrote:
> On Wed, 28 Jun 2017, Martin Liška wrote:
>
>> On 06/14/2017 07:24 PM, Jason Merrill wrote:
>> > On Tue, Jun 13, 2017 at 8:32 AM, Martin Liška  wrote:
>> >> (canonize_attr_name): New function.
>> >
>> > I think this should be "canonicalize"; "canonize" means something else.
>> >
>> > Jason
>> >
>>
>> Yes, I hope it's a cosmetic problem. In general, do you welcome the change
>> to canonicalize attribute names?
>
> I think *names* (as opposed to arguments) should be canonicalized, but
> arguments need separate consideration.

I agree.

Jason


Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Richard Sandiford
"Bin.Cheng"  writes:
> On Wed, Jun 28, 2017 at 5:56 PM, Richard Sandiford
>  wrote:
>> "Bin.Cheng"  writes:
>>> Question is what would happen if simple_iv succeeds with non-ZERO step
>>> when called with nest==NULL?  The patch skips simple_iv and forces
>>> ZERO step?
>>
>> Yeah, I mentioned that in the covering note:
>>
>>   The handling seemed strange though.  If the DR was part of a loop,
>>   we still tried to express the base and offset values as IVs, potentially
>>   giving a nonzero step.  If that failed for any reason, we'd revert to
>>   using the original base and offset, just as we would if we hadn't asked
>>   for an IV in the first place.
> Hmm, it says "If that failed for any reason,  revert to using the
> original base and offset, just as we would if we hadn't asked for an
> IV in the first place", but question is what would happen if simple_iv
> succeeds with nest==NULL.  After change, the successful simple_iv is
> bypassed.  It's likely this case can't happen in reality.

I suspect we're really in violent agreement here :-) but the reason
I was surprised at the question was that you seemed to be treating
the nest==NULL && simple_iv/nonzero-step combination as a corner case
that hadn't really been addressed, whereas removing that combination is
the main point of the patch.  The reason for removing it isn't that it
can't happen (bb-slp-pr65935.c proves that it did, to detrimental effect).
The reason is that IMO it isn't useful.  I don't think it makes sense to
set the step to a nonzero value for "BB analysis".

E.g., to ask a few rhetorical questions: why should we do that only for
"simple" IVs when analysis will still succeed for complex IVs and leave
the step zero?  Why should the simple_iv case be restricted to constant
steps for nest==NULL, rather than allow steps that are invariant in the
containing loop, like nest!=NULL does?  Why should the IV analysis be
based on the immediately containing loop rather than some other loop,
if the caller has asked for BB analysis rather than analysis wrt a
particular loop?

The reason I went back to the revision that introduced this behavior
was to check whether it really did need the nest==NULL && simple_iv/
nonzero-step combination.  And AFAICT it didn't.  I think it was just
accidental.

> Also the patch is a simplification for me and I don't have any objection here.

Thanks,
Richard


C++ PATCHes to dependent template-id parsing

2017-06-28 Thread Jason Merrill
81204 is a regression whereby previously we would accidentally get the
parsing of res.template set right because when we did the lookup in
the surrounding context, we found the function template and then
ignored it.  This patch partially reverts the handling of .template to
how it was in GCC 6.

But this bug is really a special case of 54769; we should be treating
that name as dependent and not doing a lookup in the enclosing context
at all.  As I noted in discussion of 55576, we need to pass
template_keyword_p into nested_name_specifier_opt.  So this patch does
that, and also adjusts cp_parser_template_name to consider object
scope.

With that change, we see TEMPLATE_ID_EXPR in more error cases, so I
adjusted cp_parser_template_id to use the range on those expression
nodes as well as on the replacement token.

45976 and 55639 are cases where we gave unhelpful diagnostics for uses
of ::template that are pedantically ill-formed, but harmless and
accepted by other compilers.  So these patches fix G++ to accept them
with a pedwarn.

Tested x86_64-pc-linux-gnu, applying to trunk.  81204 also to 7.
commit 0a9d6a9e176acba3a8753caa2bd8b14d6bc622fb
Author: Jason Merrill 
Date:   Mon Jun 26 16:24:28 2017 -0400

PR c++/81204 - parse error with dependent template-name

* parser.c (cp_parser_lookup_name): Disqualify function templates
after lookup.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 2ff6afd..e0a6c8b 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -25813,11 +25813,22 @@ cp_parser_lookup_name (cp_parser *parser, tree name,
decl = NULL_TREE;
 
   if (!decl)
-   /* Look it up in the enclosing context.  DR 141: When looking for a
-  template-name after -> or ., only consider class templates.  */
-   decl = lookup_name_real (name, prefer_type_arg (tag_type, is_template),
-/*nonclass=*/0,
-/*block_p=*/true, is_namespace, 0);
+   {
+ /* Look it up in the enclosing context.  */
+ decl = lookup_name_real (name, prefer_type_arg (tag_type),
+  /*nonclass=*/0,
+  /*block_p=*/true, is_namespace, 0);
+ /* DR 141 says when looking for a template-name after -> or ., only
+consider class templates.  */
+ if (decl && is_template && !DECL_TYPE_TEMPLATE_P (decl))
+   {
+ tree d = decl;
+ if (is_overloaded_fn (d))
+   d = get_first_fn (d);
+ if (DECL_P (d) && !DECL_CLASS_SCOPE_P (d))
+   decl = NULL_TREE;
+   }
+   }
   if (object_type == unknown_type_node)
/* The object is type-dependent, so we can't look anything up; we used
   this to get the DR 141 behavior.  */
diff --git a/gcc/testsuite/g++.dg/template/lookup10.C 
b/gcc/testsuite/g++.dg/template/lookup10.C
new file mode 100644
index 000..fa90af4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/lookup10.C
@@ -0,0 +1,12 @@
+// PR c++/81204
+
+namespace std {
+  template struct set { };
+}
+using namespace std;
+
+template 
+inline void set(Result & res)
+{
+res.template set();
+}
commit bb0c81aecf7ac956e770e4eb6eca02f4fe6fd686
Author: Jason Merrill 
Date:   Mon Jun 26 16:52:57 2017 -0400

PR c++/54769 - wrong lookup of dependent template-name.

* parser.c (cp_parser_template_name): Handle dependent object type.
(cp_parser_nested_name_specifier_opt): Make template_keyword_p a
parameter.
(cp_parser_id_expression): Pass it.
(cp_parser_diagnose_invalid_type_name): Handle TEMPLATE_ID_EXPR.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 246af76..c9fc284 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -2039,7 +2039,7 @@ static cp_expr cp_parser_id_expression
 static cp_expr cp_parser_unqualified_id
   (cp_parser *, bool, bool, bool, bool);
 static tree cp_parser_nested_name_specifier_opt
-  (cp_parser *, bool, bool, bool, bool);
+  (cp_parser *, bool, bool, bool, bool, bool = false);
 static tree cp_parser_nested_name_specifier
   (cp_parser *, bool, bool, bool, bool);
 static tree cp_parser_qualifying_entity
@@ -3253,6 +3253,10 @@ cp_parser_diagnose_invalid_type_name (cp_parser *parser, 
tree id,
error_at (location_of (id),
  "%qE in namespace %qE does not name a template type",
  id, parser->scope);
+ else if (TREE_CODE (id) == TEMPLATE_ID_EXPR)
+   error_at (location_of (id),
+ "%qE in namespace %qE does not name a template type",
+ TREE_OPERAND (id, 0), parser->scope);
  else
error_at (location_of (id),
  "%qE in namespace %qE does not name a type",
@@ -3296,6 +3300,10 @@ cp_parser_diagnose_invalid_type_name (cp_parser *parser, 
tree id,
error_at (location_of (id),

C++ PATCHes for c++/61022 and 72801, variadic template issues

2017-06-28 Thread Jason Merrill
In 61022, we were stripping the pack expansion from a template
argument for considering what kind of argument it is, and then
forgetting to put it back in the case of an unbound class template.

In 72801, we were doing unification wrong when trying to find partial
specialization bindings because we weren't considering template
arguments from an instantiation of the enclosing class template.

Tested x86_64-pc-linux-gnu, applying to trunk and 7.
commit df4a6dd91a46b7b5d2b00cbe2835e509a73257ce
Author: Jason Merrill 
Date:   Tue Jun 27 18:04:52 2017 -0400

PR c++/61022 - error with variadic template template parm

* pt.c (convert_template_argument): Keep the TYPE_PACK_EXPANSION.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index b86b346..fa75037 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -7695,7 +7695,7 @@ convert_template_argument (tree parm,
  if (TREE_CODE (TREE_TYPE (arg)) == UNBOUND_CLASS_TEMPLATE)
/* The number of argument required is not known yet.
   Just accept it for now.  */
-   val = TREE_TYPE (arg);
+   val = orig_arg;
  else
{
  tree parmparm = DECL_INNERMOST_TEMPLATE_PARMS (parm);
diff --git a/gcc/testsuite/g++.dg/cpp0x/variadic-ttp8.C 
b/gcc/testsuite/g++.dg/cpp0x/variadic-ttp8.C
new file mode 100644
index 000..f3e576a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/variadic-ttp8.C
@@ -0,0 +1,27 @@
+// PR c++/69111
+// { dg-do compile { target c++11 } }
+
+template  class ...>
+struct template_list {};
+
+template 
+struct A
+{};
+
+template 
+struct B
+{
+ template 
+ using type = A;
+};
+
+template 
+struct C
+{
+ using type = template_list::template type...>;
+};
+
+int main()
+{
+ return 0;
+}
commit 962f55e50dcef0d94db6bab4dbf83f0b4966c6d8
Author: Jason Merrill 
Date:   Tue Jun 27 16:53:01 2017 -0400

PR c++/72801 - ICE with variadic partial specialization

* pt.c (unify_pack_expansion): Use PACK_EXPANSION_EXTRA_ARGS.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index b060a19..b86b346 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -20044,6 +20044,9 @@ unify_pack_expansion (tree tparms, tree targs, tree 
packed_parms,
   tree pack, packs = NULL_TREE;
   int i, start = TREE_VEC_LENGTH (packed_parms) - 1;
 
+  /* Add in any args remembered from an earlier partial instantiation.  */
+  targs = add_to_template_args (PACK_EXPANSION_EXTRA_ARGS (parm), targs);
+
   packed_args = expand_template_argument_pack (packed_args);
 
   int len = TREE_VEC_LENGTH (packed_args);
diff --git a/gcc/testsuite/g++.dg/cpp0x/variadic-partial1.C 
b/gcc/testsuite/g++.dg/cpp0x/variadic-partial1.C
new file mode 100644
index 000..6f8df3e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/variadic-partial1.C
@@ -0,0 +1,22 @@
+// PR c++/72801
+// { dg-do compile { target c++11 } }
+
+template < typename, typename > struct A {};
+
+template < typename ... T > struct B
+{ 
+  template < typename > struct C
+  { 
+static const int a = 0;
+  };
+
+  template < typename R, typename ... S >
+  struct C < R (A < T, S > ...) >
+  { 
+static const int a = 1;
+  };
+};
+
+#define SA(X) static_assert ((X), #X)
+SA(B <>::C::a == 1);
+


C++ PATCH for c++/69300, ICE with self-referential noexcept

2017-06-28 Thread Jason Merrill
In the testcase we SEGV due to infinite recursion because the
noexcept-specifier of f depends on itself.  Fixed by keeping track of
which functions we're currently trying to instantiate noexcept for.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 7a938fed4c07c7e28008b56e6bac05376b1f99fa
Author: Jason Merrill 
Date:   Tue Jun 27 17:25:18 2017 -0400

PR c++/69300 - ICE with self-referential noexcept

* pt.c (maybe_instantiate_noexcept): Check for recursion.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index fa75037..047d3ba 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -22557,8 +22557,20 @@ maybe_instantiate_noexcept (tree fn)
 
   if (TREE_CODE (noex) == DEFERRED_NOEXCEPT)
 {
+  static hash_set* fns = new hash_set;
+  bool added = false;
   if (DEFERRED_NOEXCEPT_PATTERN (noex) == NULL_TREE)
spec = get_defaulted_eh_spec (fn);
+  else if (!(added = !fns->add (fn)))
+   {
+ /* If hash_set::add returns true, the element was already there.  */
+ location_t loc = EXPR_LOC_OR_LOC (DEFERRED_NOEXCEPT_PATTERN (noex),
+   DECL_SOURCE_LOCATION (fn));
+ error_at (loc,
+   "exception specification of %qD depends on itself",
+   fn);
+ spec = noexcept_false_spec;
+   }
   else if (push_tinst_level (fn))
{
  push_access_scope (fn);
@@ -22579,6 +22591,9 @@ maybe_instantiate_noexcept (tree fn)
   else
spec = noexcept_false_spec;
 
+  if (added)
+   fns->remove (fn);
+
   TREE_TYPE (fn) = build_exception_variant (fntype, spec);
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/noexcept30.C 
b/gcc/testsuite/g++.dg/cpp0x/noexcept30.C
new file mode 100644
index 000..c51e94e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/noexcept30.C
@@ -0,0 +1,12 @@
+// PR c++/69300
+// { dg-do compile { target c++11 } }
+
+template
+struct F {
+  template
+  void f() noexcept(&F::template f) {} // { dg-error "exception 
specification" }
+};
+
+int main () {
+  F().f();
+}


Re: [PATCH 3/3] Introduce IntegerRange for options (PR driver/79659).

2017-06-28 Thread Rainer Orth
Hi Martin,

> On 06/28/2017 06:52 AM, Jeff Law wrote:
>> On 03/15/2017 03:58 AM, Martin Liška wrote:
>>> Huh, I forgot to attach the patch.
>>>
>>> Martin
>>>
>>> 0001-Introduce-IntegerRange-for-options-PR-driver-79659.patch
>>>
>>>
>>> From bb89456e6cecfa9497cf8e265d2083e762d5bc3e Mon Sep 17 00:00:00 2001
>>> From: marxin 
>>> Date: Mon, 27 Feb 2017 14:07:03 +0100
>>> Subject: [PATCH] Introduce IntegerRange for options (PR driver/79659).
>>>
>>> gcc/ChangeLog:
>>>
>>> 2017-02-28  Martin Liska  
>>>
>>> PR driver/79659
>>> * common.opt: Add IntegerRange to various options.
>>> * opt-functions.awk (integer_range_info): New function.
>>> * optc-gen.awk: Add integer_range_info to cl_options struct.
>>> * opts-common.c (decode_cmdline_option): Handle
>>> CL_ERR_INT_RANGE_ARG.
>>> (cmdline_handle_error): Likewise.
>>> * opts.c (print_filtered_help): Show valid interval in
>>> when --help is provided.
>>> * opts.h (struct cl_option): Add range_min and range_max fields.
>>> * config/i386/i386.opt: Add IntegerRange for -mbranch-cost.
>>>
>>> gcc/c-family/ChangeLog:
>>>
>>> 2017-02-28  Martin Liska  
>>>
>>> PR driver/79659
>>> * c.opt: Add IntegerRange to various options.
>>>
>>> gcc/testsuite/ChangeLog:
>>>
>>> 2017-02-28  Martin Liska  
>>>
>>> PR driver/79659
>>> * g++.dg/opt/pr79659.C: New test.
>> Presumably this never fully moved forward because it wasn't a regression?
>> 
>> This looks quite reasonable to me.  I'm not sure of the state of the
>> prereqs and you may want/need to add IntegerRange checks on newly added
>> options since this was first submitted.
>> 
>> If the prereqs are ack'd, then as far as I'm concerned this is good to
>> go and you're free to add any new IntegerRange checks you deem
>> necessary/desirable.
>> 
>> jeff
>> 
>
> Thank you Jeff for looking at the patch. I've just re-tested the patch and
> I'm going to install it.

seems you didn't test thoroughly enough: your patch introduced a couple
of testsuite regressions on i386-pc-solaris2.12 and x86_64-pc-linux-gnu
(any x86 target, in fact):

+FAIL: gcc.dg/uninit-pred-7_d.c (test for excess errors)
+FAIL: gcc.dg/uninit-pred-7_d.c warning (test for warnings, line 48)
+FAIL: gcc.dg/uninit-pred-8_d.c (test for excess errors)
+FAIL: gcc.dg/uninit-pred-8_d.c warning (test for warnings, line 42)

+FAIL: gcc.target/i386/branch-cost1.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/branch-cost1.c scan-tree-dump-not gimple " & "
+UNRESOLVED: gcc.target/i386/branch-cost1.c scan-tree-dump-times gimple "if " 2
+FAIL: gcc.target/i386/branch-cost4.c (test for excess errors)
+UNRESOLVED: gcc.target/i386/branch-cost4.c scan-tree-dump-not gimple " & "
+UNRESOLVED: gcc.target/i386/branch-cost4.c scan-tree-dump-times gimple "if " 2

In all cases, you get

Excess errors:
xgcc: error: argument to '-mbranch-cost=' is not between 1 and 5

since the tests are compiled with -mbranch-cost=0.

Please fix.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [patch][Ping #3] PR80929: Realistic PARALLEL cost in seq_cost.

2017-06-28 Thread Wilco Dijkstra
Georg-Johann Lay wrote:

@@ -5300,6 +5300,9 @@ seq_cost (const rtx_insn *seq, bool spee
   set = single_set (seq);
   if (set)
 cost += set_rtx_cost (set, speed);
+  else if (INSN_P (seq)
+  && PARALLEL == GET_CODE (PATTERN (seq)))
+   cost += insn_rtx_cost (PATTERN (seq), speed);
   else
 cost++;

insn_rtx_cost may return zero if it can't find something useful in the 
parallel, 
which means it may return a lower cost and even zero. Not sure whether this
is important, but in eg. combine a cost of zero means infinite and so could have
unintended consequences. So incrementing cost with a non-zero value 
if insn_rtx_cost == 0 would seem safer.

Also why does the else do cost++ and not cost += COSTS_N_INSNS (1)?

Wilco


Re: [PATCH rs6000] remove implicit static var outputs of toc_relative_expr_p

2017-06-28 Thread Aaron Sawdey
Hi Segher,

On Tue, 2017-06-27 at 18:35 -0500, Segher Boessenkool wrote:
> Hi Aaron,
> 
> On Tue, Jun 27, 2017 at 11:43:57AM -0500, Aaron Sawdey wrote:
> > The function toc_relative_expr_p implicitly sets two static vars
> > (tocrel_base and tocrel_offset) that are declared in rs6000.c. The
> > real
> > purpose of this is to communicate between
> > print_operand/print_operand_address and
> > rs6000_output_addr_const_extra,
> > which is called through the asm_out hook vector by something in the
> > call tree under output_addr_const.
> > 
> > This patch changes toc_relative_expr_p to make tocrel_base and
> > tocrel_offset be explicit const_rtx * args. All of the calls other
> > than
> > print_operand/print_operand_address are changed to have local
> > const_rtx
> > vars that are passed in.
> 
> If those locals aren't used, can you arrange to call
> toc_relative_expr_p
> with NULL instead?  Or are they always used?

There are calls where neither is used or only tocrel_base is used.

> 
> > The statics in rs6000.c are now called
> > tocrel_base_oac and tocrel_offset_oac to reflect their use to
> > communicate across output_addr_const, and that is now the only
> > thing
> > they are used for.
> 
> Can't say I like those names, very cryptical.  Not that I know
> something
> better, the short names as they were weren't very nice either.
> 
> > --- gcc/config/rs6000/rs6000.c  (revision 249639)
> > +++ gcc/config/rs6000/rs6000.c  (working copy)
> > @@ -8628,18 +8628,25 @@
> >       && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant
> > (base), Pmode));
> >  }
> >  
> > -static const_rtx tocrel_base, tocrel_offset;
> > +/* These are only used to pass through from
> > print_operand/print_operand_address
> > + * to rs6000_output_addr_const_extra over the intervening
> > function 
> > + * output_addr_const which is not target code.  */
> 
> No leading * in a block comment please.  (And you have a trailing
> space).
> 
> > +static const_rtx tocrel_base_oac, tocrel_offset_oac;
> >  
> >  /* Return true if OP is a toc pointer relative address (the output
> > of create_TOC_reference).  If STRICT, do not match non-split
> > -   -mcmodel=large/medium toc pointer relative addresses.  */
> > +   -mcmodel=large/medium toc pointer relative addresses.  Places
> > base 
> > +   and offset pieces in TOCREL_BASE and TOCREL_OFFSET
> > respectively.  */
> 
> s/Places/Place/ (and another trailing space).
> 
> > -  tocrel_base = op;
> > -  tocrel_offset = const0_rtx;
> > +  *tocrel_base = op;
> > +  *tocrel_offset = const0_rtx;
> >    if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1),
> > GET_MODE (op)))
> >  {
> > -  tocrel_base = XEXP (op, 0);
> > -  tocrel_offset = XEXP (op, 1);
> > +  *tocrel_base = XEXP (op, 0);
> > +  *tocrel_offset = XEXP (op, 1);
> >  }
> 
> Maybe write this as
> 
>   if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1),
> GET_MODE (op)))
> {
>   *tocrel_base = XEXP (op, 0);
>   *tocrel_offset = XEXP (op, 1);
> }
>   else
> {
>   *tocrel_base = op;
>   *tocrel_offset = const0_rtx;
> }
> 
> or, if you allow NULL pointers,
> 
>   bool with_offset = GET_CODE (op) == PLUS
>    && add_cint_operand (XEXP (op, 1), GET_MODE (op));
>   if (tocrel_base)
> *tocrel_base = with_offset ? XEXP (op, 0) : op;
>   if (tocrel_offset)
> *tocrel_offset = with_offset ? XEXP (op, 1) : const0_rtx;
> 
> or such.
> 
> > -  return (GET_CODE (tocrel_base) == UNSPEC
> > -     && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
> > +  return (GET_CODE (*tocrel_base) == UNSPEC
> > +     && XINT (*tocrel_base, 1) == UNSPEC_TOCREL);
> 
> Well, and then you have this, so you need to assign tocrel_base to a
> local
> as well.
> 
> >  legitimate_constant_pool_address_p (const_rtx x, machine_mode
> > mode,
> >     bool strict)
> >  {
> > -  return (toc_relative_expr_p (x, strict)
> > +  const_rtx tocrel_base, tocrel_offset;
> > +  return (toc_relative_expr_p (x, strict, &tocrel_base,
> > &tocrel_offset)
> 
> For example here it seems nothing uses tocrel_base?
> 
> It is probably nicer to have a separate function for
> toc_relative_expr_p
> and one to pull the base/offset out.  And maybe don't keep it cached
> for
> the output function either?  It has all info it needs, right, the
> full
> address RTX?  I don't think it is measurably slower to pull the
> address
> apart an extra time?

I think it doesn't make a lot of sense to have two functions as you
have to do nearly all the work just to get the true/false return value,
you have to completely compute tocrel_base. I've restructured this so
that either pointer can be null. The function uses locals for
tocrel_base/tocrel_offset, then assigns to the pointers if non-null.

bool
toc_relative_expr_p (const_rtx op, bool strict, const_rtx
*tocrel_base_ret,
 const_rtx *tocrel_offset_ret)
{
  if (!TARGET_TOC)
return false;

  if (TARGET_CMODEL != CMODEL_

Re: [PATCH], Add check ppc_cpu_supports_hw to testsuite

2017-06-28 Thread Segher Boessenkool
Hi!

On Tue, Jun 27, 2017 at 07:53:21PM -0400, Michael Meissner wrote:
> --- gcc/testsuite/lib/target-supports.exp (revision 249606)
> +++ gcc/testsuite/lib/target-supports.exp (working copy)
> @@ -1930,6 +1930,37 @@ proc check_effective_target_powerpc64_no
>   } {-O2}]
>  }
>  
> +# Return 1 if the target supports the __builtin_cpu_supports built-in,
> +# including having a new enough library to support the test.  Cache the 
> result.
> +# Require at least a power7 to run on.
> +
> +proc check_ppc_cpu_supports_hw_available { } {

This probably shouldn't have "ppc" in the name?  Or "hw"?

> +return [check_cached_effective_target ppc_cpu_supports_hw_available {
> + # Some simulators are known to not support VSX/power8 instructions.
> + # For now, disable on Darwin
> + if { [istarget powerpc-*-eabi]
> +  || [istarget powerpc*-*-eabispe]
> +  || [istarget *-*-darwin*]} {
> + expr 0
> + } else {
> + set options "-mvsx"
> + check_runtime_nocache ppc_cpu_supports_hw_available {
> + int main()
> + {
> + #ifdef __MACH__
> +   asm volatile ("xxlor vs0,vs0,vs0");
> + #else
> +   asm volatile ("xxlor 0,0,0");
> + #endif
> +   if (!__builtin_cpu_supports ("vsx"))
> + return 1;
> +   return 0;
> + }
> + } $options
> + }
> +}]
> +}

As Peter said, I'd rather test for "ppc32", so this works anywhere.

That would give

proc check_cpu_supports_available { } {
if { [istarget powerpc*-*-*] } {
return [check_runtime cpu_supports_available {
  int main() { return !__builtin_cpu_supports ("ppc32"); }
}]
}

return 0
}

(and other archs can add their stuff then).

Why did you use check_runtime_nocache btw?  Is that just copy-paste?


Segher


Re: [PATCH][x86] Add permutex[var]_epi[32,64] intrinsics

2017-06-28 Thread Uros Bizjak
On Wed, Jun 28, 2017 at 12:01 PM, Peryt, Sebastian
 wrote:
> Hi,
>
> This patch adds missing intrinsics:
> - _mm256_permutexvar_epi32
> - _mm256_permutex_epi64
> - _mm256_permutexvar_epi64
>
> gcc/
> * config/i386/avx512vlintrin.h (_mm256_permutexvar_epi64, 
> _mm256_permutexvar_epi32,
> _mm256_permutex_epi64): New intrinsics.
>
> gcc/tesuite/
> * gcc.target/i386/avx512vl-vpermd-1.c (_mm256_permutexvar_epi32): 
> Test new intrinsic.
> * gcc.target/i386/avx512vl-vpermq-imm-1.c (_mm256_permutex_epi64): 
> Ditto.
> * gcc.target/i386/avx512vl-vpermq-var-1.c (_mm256_permutexvar_epi64): 
> Ditto.
> *gcc.target/i386/avx512f-vpermd-2.c: Removed define length constraint.
> * gcc.target/i386/avx512f-vpermq-imm-2.c: Ditto.
> * gcc.target/i386/avx512f-vpermq-var-2.c: Ditto.
>
> Is it ok for trunk?

Approved and committed to mainline SVN.

Thanks,
Uros.


Re: Backport [PATCH,rs6000] Handle conflicting target options -mno-power9-vector and -mcpu=power9

2017-06-28 Thread Segher Boessenkool
On Wed, Jun 28, 2017 at 08:30:21AM -0600, Kelvin Nilsen wrote:
> I have bootstrapped and tested this patch on
> powerpc64le-unkonwn-linux-gnu with no regressions.  Is this ok for
> backporting to gcc 6?

Please wait until after 6.4.

Thanks,


Segher


Re: [PATCH], Add check ppc_cpu_supports_hw to testsuite

2017-06-28 Thread Michael Meissner
On Wed, Jun 28, 2017 at 03:48:50PM -0500, Segher Boessenkool wrote:
> As Peter said, I'd rather test for "ppc32", so this works anywhere.

Fair enough.

> That would give
> 
> proc check_cpu_supports_available { } {
> if { [istarget powerpc*-*-*] } {
> return [check_runtime cpu_supports_available {
>   int main() { return !__builtin_cpu_supports ("ppc32"); }
> }]
> }
> 
> return 0
> }
> 
> (and other archs can add their stuff then).
> 
> Why did you use check_runtime_nocache btw?  Is that just copy-paste?

Just copy-paste.

Like the target_clones stuff, right now, it is only x86 and PowerPC that
supports __builtin_cpu*.

I don't really see the point of having a machine independent test for
__builtin_cpu_*, but if you feel strongly about it go for it.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.vnet.ibm.com, phone: +1 (978) 899-4797



Re: Tweak BB analysis for dr_analyze_innermost

2017-06-28 Thread Bin.Cheng
On Wed, Jun 28, 2017 at 8:06 PM, Richard Sandiford
 wrote:
> "Bin.Cheng"  writes:
>> On Wed, Jun 28, 2017 at 5:56 PM, Richard Sandiford
>>  wrote:
>>> "Bin.Cheng"  writes:
 Question is what would happen if simple_iv succeeds with non-ZERO step
 when called with nest==NULL?  The patch skips simple_iv and forces
 ZERO step?
>>>
>>> Yeah, I mentioned that in the covering note:
>>>
>>>   The handling seemed strange though.  If the DR was part of a loop,
>>>   we still tried to express the base and offset values as IVs, potentially
>>>   giving a nonzero step.  If that failed for any reason, we'd revert to
>>>   using the original base and offset, just as we would if we hadn't asked
>>>   for an IV in the first place.
>> Hmm, it says "If that failed for any reason,  revert to using the
>> original base and offset, just as we would if we hadn't asked for an
>> IV in the first place", but question is what would happen if simple_iv
>> succeeds with nest==NULL.  After change, the successful simple_iv is
>> bypassed.  It's likely this case can't happen in reality.
>
> I suspect we're really in violent agreement here :-) but the reason
> I was surprised at the question was that you seemed to be treating
> the nest==NULL && simple_iv/nonzero-step combination as a corner case
> that hadn't really been addressed, whereas removing that combination is
> the main point of the patch.  The reason for removing it isn't that it
Yes, this is where I misunderstood the patch.  Thanks very much for
your patient explanation.  Now I can see it IS a nice simplification.

Thanks,
Bin
> can't happen (bb-slp-pr65935.c proves that it did, to detrimental effect).
> The reason is that IMO it isn't useful.  I don't think it makes sense to
> set the step to a nonzero value for "BB analysis".
>
> Thanks,
> Richard


C++ PATCH for c++/72764, ICE with invalid template typename

2017-06-28 Thread Jason Merrill
The problem in this testcase is that when we first parse C we look
up the canonical instantiation of C which doesn't see inside A
because it happens at global scope, but then in strip_typedefs we are
in the context of A, so the call to make_typename_type in
strip_typedefs was finding the error and causing trouble, because we
don't expect strip_typedefs to change anything.

This is an oddity with the current semi-specification of alias
templates that I hope to address in the standard soon, but for now
this patch addresses the issue by rebuilding a TYPENAME_TYPE in
strip_typedefs, even if we could resolve it in the current scope.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 9ab225e01a4aad9a95b6a744196cd03bce9c6e28
Author: Jason Merrill 
Date:   Tue Jun 27 15:25:30 2017 -0400

PR c++/72764 - ICE with invalid template typename.

* decl.c (build_typename_type): No longer static.
* tree.c (strip_typedefs): Use it instead of make_typename_type.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index f82b1b6..946a916 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6087,6 +6087,7 @@ extern tree define_label  (location_t, 
tree);
 extern void check_goto (tree);
 extern bool check_omp_return   (void);
 extern tree make_typename_type (tree, tree, enum tag_types, 
tsubst_flags_t);
+extern tree build_typename_type(tree, tree, tree, 
tag_types);
 extern tree make_unbound_class_template(tree, tree, tree, 
tsubst_flags_t);
 extern tree build_library_fn_ptr   (const char *, tree, int);
 extern tree build_cp_library_fn_ptr(const char *, tree, int);
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 93e24fc..3d96a3e 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -3594,7 +3594,7 @@ struct typename_hasher : ggc_ptr_hash
 
 static GTY (()) hash_table *typename_htab;
 
-static tree
+tree
 build_typename_type (tree context, tree name, tree fullname,
 enum tag_types tag_type)
 {
diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index bb17278..4535af6 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -1503,13 +1503,13 @@ strip_typedefs (tree t, bool *remove_attributes)
   break;
 case TYPENAME_TYPE:
   {
+   bool changed = false;
tree fullname = TYPENAME_TYPE_FULLNAME (t);
if (TREE_CODE (fullname) == TEMPLATE_ID_EXPR
&& TREE_OPERAND (fullname, 1))
  {
tree args = TREE_OPERAND (fullname, 1);
tree new_args = copy_node (args);
-   bool changed = false;
for (int i = 0; i < TREE_VEC_LENGTH (args); ++i)
  {
tree arg = TREE_VEC_ELT (args, i);
@@ -1533,12 +1533,15 @@ strip_typedefs (tree t, bool *remove_attributes)
else
  ggc_free (new_args);
  }
-   result = make_typename_type (strip_typedefs (TYPE_CONTEXT (t),
-remove_attributes),
-fullname, typename_type, tf_none);
-   /* Handle 'typedef typename A::N N;'  */
-   if (typedef_variant_p (result))
- result = TYPE_MAIN_VARIANT (DECL_ORIGINAL_TYPE (TYPE_NAME (result)));
+   tree ctx = strip_typedefs (TYPE_CONTEXT (t), remove_attributes);
+   if (!changed && ctx == TYPE_CONTEXT (t) && !typedef_variant_p (t))
+ return t;
+   tree name = fullname;
+   if (TREE_CODE (fullname) == TEMPLATE_ID_EXPR)
+ name = TREE_OPERAND (fullname, 0);
+   /* Use build_typename_type rather than make_typename_type because we
+  don't want to resolve it here, just strip typedefs.  */
+   result = build_typename_type (ctx, name, fullname, typename_type);
   }
   break;
 case DECLTYPE_TYPE:
diff --git a/gcc/testsuite/g++.dg/cpp0x/alias-decl-60.C 
b/gcc/testsuite/g++.dg/cpp0x/alias-decl-60.C
new file mode 100644
index 000..6bf9b7b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/alias-decl-60.C
@@ -0,0 +1,16 @@
+// PR c++/72764
+// { dg-do compile { target c++11 } }
+
+template < typename > struct A;
+template < typename > struct B {};
+
+template < typename T >
+using C = typename A < T >::template D < T >;
+
+template < typename T > struct A
+{ 
+  // should be: template < typename > struct D : B < C < T > > {};
+  struct D : B < C < T > > {}; // { dg-error "not a class template" }
+};
+
+A < int >::D a;// { dg-message "required" }


Re: C++ PATCH for c++/69300, ICE with self-referential noexcept

2017-06-28 Thread Marc Glisse

On Wed, 28 Jun 2017, Jason Merrill wrote:


In the testcase we SEGV due to infinite recursion because the
noexcept-specifier of f depends on itself.  Fixed by keeping track of
which functions we're currently trying to instantiate noexcept for.


Hello,

in the testcase, it makes sense that this is an error. In some other 
cases, say for instance


int fact(int n) noexcept(auto) { return (n>1)?n*f(n-1):1; }

(yes, I know we still do not have noexcept(auto))
it would make sense to pretend that the recursive call does not throw for 
the purpose of determining that fact is indeed noexcept. Then we get into 
solving the same thing as IPA already does... And the standard does not 
require implementations to be clever there. Still, it could have been nice 
:-/


--
Marc Glisse


  1   2   >