[PATCH] [AArch64] support -mfentry feature for arm64

2016-03-14 Thread Li Bin
From: Jiangjiji 

* gcc/config/aarch64/aarch64.opt: Add a new option.
* gcc/config/aarch64/aarch64.c: Add some new functions and Macros.
* gcc/config/aarch64/aarch64.h: Modify PROFILE_HOOK and FUNCTION_PROFILER.

Signed-off-by: Jiangjiji 
Signed-off-by: Li Bin 
---
 gcc/config/aarch64/aarch64.c   |   23 +++
 gcc/config/aarch64/aarch64.h   |   13 -
 gcc/config/aarch64/aarch64.opt |4 
 3 files changed, 35 insertions(+), 5 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 752df4e..c70b161 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -440,6 +440,17 @@ aarch64_is_long_call_p (rtx sym)
   return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
 }
 
+void
+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
+{
+   if (flag_fentry)
+   {
+   fprintf (file, "\tmov\tx9, x30\n");
+   fprintf (file, "\tbl\t__fentry__\n");
+   fprintf (file, "\tmov\tx30, x9\n");
+   }
+}
+
 /* Return true if the offsets to a zero/sign-extract operation
represent an expression that matches an extend operation.  The
operands represent the paramters from
@@ -7414,6 +7425,15 @@ aarch64_emit_unlikely_jump (rtx insn)
   add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
 }
 
+/* Return true, if profiling code should be emitted before
+ * prologue. Otherwise it returns false.
+ * Note: For x86 with "hotfix" it is sorried.  */
+static bool
+aarch64_profile_before_prologue (void)
+{
+   return flag_fentry != 0;
+}
+
 /* Expand a compare and swap pattern.  */
 
 void
@@ -8454,6 +8474,9 @@ aarch64_cannot_change_mode_class (enum machine_mode from,
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#undef TARGET_PROFILE_BEFORE_PROLOGUE
+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue
+
 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
   hook_bool_const_tree_hwi_hwi_const_tree_true
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 77b2bb9..65e34fc 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -804,13 +804,16 @@ do {  
 \
 #define PROFILE_HOOK(LABEL)\
   {\
 rtx fun, lr;   \
-lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);  \
-fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
-emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);   \
+   if (!flag_fentry)
+ {
+   lr = get_hard_reg_initial_val (Pmode, LR_REGNUM);   
\
+   fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME);  
\
+   emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode);
\
+ }
   }
 
-/* All the work done in PROFILE_HOOK, but still required.  */
-#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
+#define FUNCTION_PROFILER(STREAM, LABELNO)
+   aarch64_function_profiler(STREAM, LABELNO)
 
 /* For some reason, the Linux headers think they know how to define
these macros.  They don't!!!  */
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index 266d873..9e4b408 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -124,3 +124,7 @@ Enum(aarch64_abi) String(ilp32) Value(AARCH64_ABI_ILP32)
 
 EnumValue
 Enum(aarch64_abi) String(lp64) Value(AARCH64_ABI_LP64)
+
+mfentry
+Target Report Var(flag_fentry) Init(0)
+Emit profiling counter call at function entry immediately after prologue.
-- 
1.7.1



[PATCH] [AArch64] support -mfentry feature for arm64

2016-03-14 Thread Li Bin
As ARM64 is entering enterprise world, machines can not be stopped for
some critical enterprise production environment, that is, live patch as
one of the RAS features is increasing more important for ARM64 arch now.

Now, the mainstream live patch implementation which has been merged in
Linux kernel (x86/s390) is based on the 'ftrace with regs' feature, and
this feature needs the help of gcc. 

This patch proposes a generic solution for arm64 gcc which called mfentry,
following the example of x86, mips, s390, etc. and on these archs, this
feature has been used to implement the ftrace feature 'ftrace with regs'
to support live patch.

By now, there is an another solution from linaro [1], which proposes to
implement a new option -fprolog-pad=N that generate a pad of N nops at the
beginning of each function. This solution is a arch-independent way for gcc,
but there may be some limitations which have not been recognized for Linux
kernel to adapt to this solution besides the discussion on [2], typically
for powerpc archs. Furthermore I think there are no good reasons to promote
the other archs (such as x86) which have implemented the feature 'ftrace with 
regs'
to replace the current method with the new option, which may bring heavily
target-dependent code adaption, as a result it becomes a arm64 dedicated
solution, leaving kernel with two different forms of implementation. 

[1] https://gcc.gnu.org/ml/gcc/2015-10/msg00090.html
[2] 
http://lists.infradead.org/pipermail/linux-arm-kernel/2016-January/401854.html

Jiangjiji (1):
  [AArch64] support -mfentry feature for arm64

 gcc/config/aarch64/aarch64.c   |   23 +++
 gcc/config/aarch64/aarch64.h   |   13 -
 gcc/config/aarch64/aarch64.opt |4 
 3 files changed, 35 insertions(+), 5 deletions(-)



Re: [PATCH][GCC 7] Fix PR70171

2016-03-14 Thread Richard Biener
On Fri, 11 Mar 2016, Eric Botcazou wrote:

> > The following teaches phiprop to handle the case of aggregate copies
> > where the aggregate has non-BLKmode which means it is very likely
> > expanded as reg-reg moves (any better test for that apart from
> > checking for non-BLKmode?).  
> 
> !aggregate_value_p comes to mind, but non-BLKmode is the definitive test to 
> distinguish the register from the non-register case at the RTL level.

It looks like it might catch a few extra cases where the address of the
decl is required.  But it also looks like it's somewhat overly broad like

  /* Function types that are TREE_ADDRESSABLE force return in memory.  */
  if (fntype && TREE_ADDRESSABLE (fntype))
return 1;

without actually testing 'exp' is the return slot.  In fact most of
the function cares about function return values and some about
parameters.  I guess the predicate should be split up (a quick grep
shows most callers care about the return value case).

Richard.



Re: [PATCH, PR70045] Unshare create_empty_if_region_on_edge argument

2016-03-14 Thread Richard Biener
On Fri, 11 Mar 2016, Sebastian Pop wrote:

> On Fri, Mar 11, 2016 at 9:14 AM, Tom de Vries 
> wrote:
> 
> > Hi,
> >
> > this patch fixes PR70045, a graphite 6 regression.
> >
> > The problem is as follows: in graphite_create_new_loop_guard, a condition
> > cond_expr is constructed using an upper bound expression *ub.
> >
> > During the call:
> > ...
> > exit_edge = create_empty_if_region_on_edge (entry_edge, cond_expr);
> > ...
> > the cond_expr is modified in place, which has as side-effect that *ub is
> > modified.
> >
> > The patch fixes this by unsharing the cond_expr before passing it as
> > argument to create_empty_if_region_on_edge.
> >
> > Bootstrapped and reg-tested on x86_64.
> >
> > OK for stage4 trunk?
> >
> >
> Thanks Tom, the patch looks good to me.
> I will let Richi decide whether it is still appropriate to commit the
> change to trunk.

As it is a regression it's fine.

Richard.


Various selective scheduling fixes

2016-03-14 Thread Andrey Belevantsev

Hello,

In this thread I will be posting the patches for the fixed selective 
scheduling PRs (except the one that was already kindly checked in by Jeff). 
 The patches were tested both on x86-64 and ia64 with the following 
combination: 1) the usual bootstrap/regtest, which only utilizes sel-sched 
on its own tests, made by default to run on arm/ppc/x86-64/ia64; 2) the 
bootstrap/regtest with the second scheduler forced to sel-sched; 3) both 
schedulers forced to sel-sched.  In all cases everything seemed to be fine.


Three of the PRs are regressions, the other two showed different errors 
across the variety of releases tested by submitters;  I think all of them 
are appropriate at this stage -- they do not touch anything outside of 
selective scheduling except the first patch where a piece of code from 
sched-deps.c needs to be refactored into a function to be called from 
sel-sched.c.


Andrey


[01/05] Fix PR 64411

2016-03-14 Thread Andrey Belevantsev

Hello,

In this case, we get an inconsistency between the sched-deps interface, 
saying we can't move an insn writing the si register through a vector insn, 
and the liveness analysis, saying we can.  The latter doesn't take into 
account implicit_reg_pending_clobbers set calculated in sched-deps before 
register allocation.  The solution is to reflect this set in our insn data 
(sets/uses/clobbers).


Ok for trunk?

gcc/

2016-01-15  Andrey Belevantsev  

PR target/64411
* sched-deps.c (get_implicit_reg_pending_clobbers): New function,
factored out from ...
(sched_analyze_insn): ... here.
* sched-int.h (get_implicit_reg_pending_clobbers): Declare it.
* sel-sched-ir.c (setup_id_implicit_regs): New function, use
get_implicit_reg_pending_clobbers in it.
(setup_id_reg_sets): Use setup_id_implicit_regs.
(deps_init_id): Ditto.

testsuite/

2016-01-15  Andrey Belevantsev  

PR target/64411
* gcc.target/i386/pr64411.C: New test.

Best,
Andrey
diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c
index 4961dfb..3d4a1d5 100644
--- a/gcc/sched-deps.c
+++ b/gcc/sched-deps.c
@@ -2860,6 +2860,17 @@ sched_macro_fuse_insns (rtx_insn *insn)
 
 }
 
+/* Get the implicit reg pending clobbers for INSN.  */
+void
+get_implicit_reg_pending_clobbers (rtx_insn *insn, HARD_REG_SET *temp)
+{
+  extract_insn (insn);
+  preprocess_constraints (insn);
+  alternative_mask preferred = get_preferred_alternatives (insn);
+  ira_implicitly_set_insn_hard_regs (temp, preferred);
+  AND_COMPL_HARD_REG_SET (*temp, ira_no_alloc_regs);
+}
+
 /* Analyze an INSN with pattern X to find all dependencies.  */
 static void
 sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn)
@@ -2872,12 +2883,7 @@ sched_analyze_insn (struct deps_desc *deps, rtx x, rtx_insn *insn)
   if (! reload_completed)
 {
   HARD_REG_SET temp;
-
-  extract_insn (insn);
-  preprocess_constraints (insn);
-  alternative_mask prefrred = get_preferred_alternatives (insn);
-  ira_implicitly_set_insn_hard_regs (&temp, prefrred);
-  AND_COMPL_HARD_REG_SET (temp, ira_no_alloc_regs);
+  get_implicit_reg_pending_clobbers (insn, &temp);
   IOR_HARD_REG_SET (implicit_reg_pending_clobbers, temp);
 }
 
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 378c3aa..d0e2c0e 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -1351,6 +1351,7 @@ extern void finish_deps_global (void);
 extern void deps_analyze_insn (struct deps_desc *, rtx_insn *);
 extern void remove_from_deps (struct deps_desc *, rtx_insn *);
 extern void init_insn_reg_pressure_info (rtx_insn *);
+extern void get_implicit_reg_pending_clobbers (rtx_insn *, HARD_REG_SET *);
 
 extern dw_t get_dep_weak (ds_t, ds_t);
 extern ds_t set_dep_weak (ds_t, ds_t, dw_t);
diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index d6c86b8..e181cb9 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -2650,6 +2650,24 @@ maybe_downgrade_id_to_use (idata_t id, insn_t insn)
 IDATA_TYPE (id) = USE;
 }
 
+/* Setup implicit register clobbers calculated by sched-deps before reload.  */
+static void
+setup_id_implicit_regs (idata_t id, insn_t insn)
+{
+  if (reload_completed)
+return;
+
+  HARD_REG_SET temp;
+  unsigned regno;
+  hard_reg_set_iterator hrsi;
+
+  get_implicit_reg_pending_clobbers (insn, &temp);
+  EXECUTE_IF_SET_IN_HARD_REG_SET (temp, 0, regno, hrsi)
+{
+  SET_REGNO_REG_SET (IDATA_REG_SETS (id), regno);
+}
+}
+
 /* Setup register sets describing INSN in ID.  */
 static void
 setup_id_reg_sets (idata_t id, insn_t insn)
@@ -2704,6 +2722,9 @@ setup_id_reg_sets (idata_t id, insn_t insn)
 	}
 }
 
+  /* Also get implicit reg clobbers from sched-deps.  */
+  setup_id_implicit_regs (id, insn);
+
   return_regset_to_pool (tmp);
 }
 
@@ -2735,20 +2756,18 @@ deps_init_id (idata_t id, insn_t insn, bool force_unique_p)
   deps_init_id_data.force_use_p = false;
 
   init_deps (dc, false);
-
   memcpy (&deps_init_id_sched_deps_info,
 	  &const_deps_init_id_sched_deps_info,
 	  sizeof (deps_init_id_sched_deps_info));
-
   if (spec_info != NULL)
 deps_init_id_sched_deps_info.generate_spec_deps = 1;
-
   sched_deps_info = &deps_init_id_sched_deps_info;
 
   deps_analyze_insn (dc, insn);
+  /* Implicit reg clobbers received from sched-deps separately.  */
+  setup_id_implicit_regs (id, insn);
 
   free_deps (dc);
-
   deps_init_id_data.id = NULL;
 }
 
diff --git a/gcc/testsuite/gcc.target/i386/pr64411.C b/gcc/testsuite/gcc.target/i386/pr64411.C
new file mode 100644
index 000..55858fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr64411.C
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mcmodel=medium -fPIC -fschedule-insns -fselective-scheduling" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+extern "C"  long strtol ()
+  { return 0; }
+
+static struct {
+  void *sp[2];
+} info;
+
+union S813
+{
+  void * c[5];
+}
+s813;
+
+S813 a813[5];
+S813 check813 (S813, S813 *, 

[02/05] Fix PR 63384

2016-03-14 Thread Andrey Belevantsev

Hello,

Here we're looping because we decrease the counter of the insns we still 
can issue on a DEBUG_INSN thus rendering the counter negative.  The fix is 
to not count debug insns in the corresponding code.  The selective 
scheduling is known to spoil the result of var tracking, but still it is 
not the reason to hang in there.


The toggle option used in the test seems to be the equivalent of just 
enabling var-tracking-assignments which should lead to the same situation; 
however, if specified as is, var-tracking-assignments will be disabled by 
the toplev.c:1460 code.  Maybe we also need the same treatment for 
flag_var_tracking_assignments_toggle.


Ok for trunk?

gcc/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/63384
* sel-sched.c (invoke_aftermath_hooks): Do not decrease issue_more on 
DEBUG_INSN_P insns.


testsuite/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/63384
* testsuite/g++.dg/pr63384.C: New test.

Best,
Andrey

diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index c798935..893a3e5 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -4249,7 +4249,8 @@ invoke_aftermath_hooks (fence_t fence, rtx_insn *best_insn, int issue_more)
   issue_more);
   memcpy (FENCE_STATE (fence), curr_state, dfa_state_size);
 }
-  else if (GET_CODE (PATTERN (best_insn)) != USE
+  else if (! DEBUG_INSN_P (best_insn)
+	   && GET_CODE (PATTERN (best_insn)) != USE
&& GET_CODE (PATTERN (best_insn)) != CLOBBER)
 issue_more--;
 
diff --git a/gcc/testsuite/g++.dg/pr63384.C b/gcc/testsuite/g++.dg/pr63384.C
new file mode 100644
index 000..b4e0784
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr63384.C
@@ -0,0 +1,12 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fselective-scheduling2 -fsel-sched-pipelining  -fsel-sched-pipelining-outer-loops -fsel-sched-reschedule-pipelined -fvar-tracking-assignments-toggle -ftree-vectorize" } */
+
+template  T **make_test_matrix() {
+ T **data = new T *;
+ for (int i = 0; i < 1000; i++)
+;
+}
+
+template  void test() { T **c = make_test_matrix(); }
+
+main() { test(); }


[03/05] Fix PR 66660

2016-03-14 Thread Andrey Belevantsev

Hello,

We speculate an insn in the PR but we do not make a check for it though we 
should.  The thing that broke this was the fix for PR 45472.  In that pr, 
we have moved a volatile insn too far up because we failed to merge the 
bits describing its volatility when we have processed a control flow split. 
 The code to propagate the insn pattern with the insn merging was added 
when the volatility of the two insns from the both split branches differ. 
However, the volatility of the speculated insn and its original differ: the 
original insn may trap while the speculated version may not.  Thus, we 
replace a speculative pattern with the original one per the PR 45472 fix 
for no reason.


The patch for this problem just limits the original fix for PR 45472 to 
apply for non-speculative insns only.  There is no test as it is not so 
easy to construct one -- we could count the number of speculation check in 
the resulting assembly but there is no way to force speculation to happen.


Ok for trunk?

gcc/

2016-03-14  Andrey Belevantsev  

PR target/0
* sel-sched-ir.c (merge_expr): Do not propagate trap bits into 
speculative insns.


Best,
Andrey
commit 53ef39496acc26cc0021555e403068e93343aa20
Author: Andrey Belevantsev 
Date:   Wed Jan 27 17:20:27 2016 +0300

Fix pr0: do not propagate trap bits into speculative insns

diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index e181cb9..ec59280 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -1871,12 +1871,12 @@ merge_expr (expr_t to, expr_t from, insn_t split_point)
   /* Make sure that speculative pattern is propagated into exprs that
  have non-speculative one.  This will provide us with consistent
  speculative bits and speculative patterns inside expr.  */
-  if ((EXPR_SPEC_DONE_DS (from) != 0
-   && EXPR_SPEC_DONE_DS (to) == 0)
-  /* Do likewise for volatile insns, so that we always retain
-	 the may_trap_p bit on the resulting expression.  */
-  || (VINSN_MAY_TRAP_P (EXPR_VINSN (from))
-	  && !VINSN_MAY_TRAP_P (EXPR_VINSN (to
+  if (EXPR_SPEC_DONE_DS (to) == 0
+  && (EXPR_SPEC_DONE_DS (from) != 0
+	  /* Do likewise for volatile insns, so that we always retain
+	 the may_trap_p bit on the resulting expression.  */
+	  || (VINSN_MAY_TRAP_P (EXPR_VINSN (from))
+	  && !VINSN_MAY_TRAP_P (EXPR_VINSN (to)
 change_vinsn_in_expr (to, EXPR_VINSN (from));
 
   merge_expr_data (to, from, split_point);


[04/05] Fix PR 69032

2016-03-14 Thread Andrey Belevantsev

Hello,

We fail to find the proper seqno for the fresh bookkeeping copy in this PR. 
 The problem is that in get_seqno_by_preds we are iterating over bb from 
the given insn backwards up to the first bb insn.  We skip the initial insn 
when iterating over bb, yet we should take seqno from it.


The code in question originally didn't include bb head when iterating, and 
was patched to do so in 2011.  The patch was wrong and instead of including 
bb head managed to exclude the original insn itself.  By reading the 
original and patched code I've convinced myself that the right fix will be 
to do what the patch intended and include both the initial insn and the bb 
head in the iteration.


Ok for trunk?

gcc/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/69032
* sel-sched-ir.c (get_seqno_by_preds): Include both tmp and head when 
looping backwards over basic block insns.


testsuite/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/69032
* gcc.dg/pr69032.c: New test.

Best,
Andrey
diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index ec59280..c1a9e55 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -4103,11 +4103,14 @@ get_seqno_by_preds (rtx_insn *insn)
   insn_t *preds;
   int n, i, seqno;
 
-  while (tmp != head)
+  /* Loop backwards from insn to head including both.  */
+  while (1)
 {
-  tmp = PREV_INSN (tmp);
   if (INSN_P (tmp))
 return INSN_SEQNO (tmp);
+  if (tmp == head)
+	break;
+  tmp = PREV_INSN (tmp);
 }
 
   cfg_preds (bb, &preds, &n);
diff --git a/gcc/testsuite/gcc.dg/pr69032.c b/gcc/testsuite/gcc.dg/pr69032.c
new file mode 100644
index 000..e0925cd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr69032.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target powerpc*-*-* ia64-*-* i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -fsched-pressure -fsel-sched-pipelining -fselective-scheduling" } */
+
+void foo (long long i)
+{
+   while (i != -1)
+ {
+	++i;
+	 __asm__ ("");
+ }
+}


[05/05] Fix PR 69102

2016-03-14 Thread Andrey Belevantsev

Hello,

The problem here is readonly dependence contexts in selective scheduler. 
We're trying to cache the effect of initializing a dependence context with 
remembering that context and setting a readonly bit on it.  When first 
moving the insn 43 with REG_ARGS_SIZE note through the insn 3 (a simple eax 
set) we also set the last_args_size field of the context.  Later, when we 
make a copy of insn 43 and try to move it again through insn 3, we take the 
cached dependency context and notice the (fake) dep with last_args_size 
insn, which is the old insn 43.  Then the assert saying that we should be 
able to lift the bookkeeping copy up the same way as we did with the 
original insn breaks.


Fixed by the attached patch that makes us notice only deps with the current 
producer insn.


Ok for trunk?

gcc/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/69102
* sel-sched.c (has_dependence_note_dep): Only take into
account dependencies produced by the current producer insn.
(has_dependence_note_mem_dep): Likewise.

testsuite/

2016-03-14  Andrey Belevantsev  

PR rtl-optimization/69102
* gcc.c-torture/compile/pr69102.c: New test.

Best,
Andrey

diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index c1a9e55..b4aa933 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -3277,9 +3277,14 @@ has_dependence_note_reg_use (int regno)
 static void
 has_dependence_note_mem_dep (rtx mem ATTRIBUTE_UNUSED,
 			 rtx pending_mem ATTRIBUTE_UNUSED,
-			 insn_t pending_insn ATTRIBUTE_UNUSED,
+			 insn_t pending_insn,
 			 ds_t ds ATTRIBUTE_UNUSED)
 {
+  /* We're only interested in dependencies with the current producer.
+ We might get other insns that were saved in dependence context
+ as last_* or pending_* fields.  */
+  if (INSN_UID (pending_insn) != INSN_UID (has_dependence_data.pro))
+return;
   if (!sched_insns_conditions_mutex_p (has_dependence_data.pro,
    VINSN_INSN_RTX (has_dependence_data.con)))
 {
@@ -3291,9 +3296,14 @@ has_dependence_note_mem_dep (rtx mem ATTRIBUTE_UNUSED,
 
 /* Note a dependence.  */
 static void
-has_dependence_note_dep (insn_t pro ATTRIBUTE_UNUSED,
+has_dependence_note_dep (insn_t pro,
 			 ds_t ds ATTRIBUTE_UNUSED)
 {
+  /* We're only interested in dependencies with the current producer.
+ We might get other insns that were saved in dependence context
+ as last_* or pending_* fields.  */
+  if (INSN_UID (pro) != INSN_UID (has_dependence_data.pro))
+return;
   if (!sched_insns_conditions_mutex_p (has_dependence_data.pro,
    VINSN_INSN_RTX (has_dependence_data.con)))
 {
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr69102.c b/gcc/testsuite/gcc.c-torture/compile/pr69102.c
new file mode 100644
index 000..b1328ca
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr69102.c
@@ -0,0 +1,21 @@
+/* { dg-options "-Og -fPIC -fschedule-insns2 -fselective-scheduling2 -fno-tree-fre --param=max-sched-extend-regions-iters=10" } */
+void bar (unsigned int);
+
+void
+foo (void)
+{
+  char buf[1] = { 3 };
+  const char *p = buf;
+  const char **q = &p;
+  unsigned int ch;
+  switch (**q)
+{
+case 1:  ch = 5; break;
+case 2:  ch = 4; break;
+case 3:  ch = 3; break;
+case 4:  ch = 2; break;
+case 5:  ch = 1; break;
+default: ch = 0; break;
+}
+  bar (ch);
+}


Re: [PATCH] PR69195, Reload confused by invalid reg equivs

2016-03-14 Thread Richard Biener
On Sat, Mar 12, 2016 at 6:07 PM, Jeff Law  wrote:
> On 03/12/2016 04:10 AM, Richard Biener wrote:
>>
>> On March 12, 2016 10:29:40 AM GMT+01:00, Jakub Jelinek 
>> wrote:
>>>
>>> On Sat, Mar 12, 2016 at 07:37:25PM +1030, Alan Modra wrote:
>
> I believe Alan's point is DSE deleted the assignment to x which
>>>
>>> can't be
>
> right as long as we've left in goto *&x.
>
> The goto *&x should be a use of x and thus should have kept the
>>>
>>> assignment
>
> live.


 Right, I wasn't trying to say that ira.c:indirect_jump_optimize is
 OK.  It needs the patch I posted or perhaps even better a test of
 DF_REF_INSN_INFO rather than !DF_REF_IS_ARTIFICIAL (simply because
>>>
>>> the

 flag test is reading another field, and we need to read
 DF_REF_INSN_INFO anyway).
>>>
>>>
>>> Ok, that was my point.  BTW, DSE isn't the only one that deletes x = 0;
>>> cddce deletes it too.  -fno-tree-dse -fno-tree-dce preserves it till
>>> expansion.
>>
>>
>> GIMPLE_GOTO doesn't have VOPs and I don't think that we'd want VUSEs on
>> all gotos. But just having them on indirect gotos would be inconsistent.
>>
>> I believe the code is undefined anyway and out of scope of a reasonable
>> QOI.
>
> Undefined?  Most likely.  But we still have to do something sensible. As
> Jakub noted, a user could create the problematic code just as easily as
> DCE/DSE, so IRA probably needs to be tolerant of this situation.
>
> So it seems like you're suggesting we leave DCE/DSE alone (declaring this
> usage undefined) and fix IRA to be tolerant, right?

Tolerant as in not crash?  Yes.

Note that DCE/DSE would be happy if the stores were global memory.  After
my recent fix even addresses based on functions and labels work here.

What does not work is if you jump to automatic storage the compiler knows
how to compute liveness of as gotos are not considered here.  I can't think
of a way to make the IL handle this without severely pessimizing regular
DCE/DSE or making it very "hacky" in only giving VUSEs to gotos that
possibly may reach "local" memory.

That said, I can of course try and will once I see a testcase we break that
matters in real life from a correctness perspective - like I did for that ARM
kernel patching bug.

>> Using alloca to create/jump to code on the stack should work (we might
>> transform that into a decl though).
>
> Given that executable stacks are a huge security hole, I'd be willing to go
> out on a limb and declare that undefined as well.  It's not as clear cut,
> but that's the argument I'd make.

Well, I thought about somebody trying to build trampolines in a way exposed
to GCC.

> And yes, I realize that goes in opposition to what GCC has allowed for 20+
> years.  I still think it'd be the right thing to do.

Did we allow this?  Not by design but rather by accident I suppose.

Richard.

> jeff
>


Re: [PATCH][PR rtl-optimization/69307] Handle hard registers in modes that span more than one register properly

2016-03-14 Thread Andrey Belevantsev

Hello Jeff,

On 12.03.2016 20:13, Jeff Law wrote:


As Andrey outlined in the PR, selective-scheduling was missing a check &
handling of hard registers in modes that span more than one hard reg. This
caused an incorrect register selection during renaming.

I verified removing the printf call from the test would not compromise the
test.  Then I did a normal x86 bootstrap & regression test with the patch.
Of course that's essentially useless, so I also did another bootstrap and
regression test with -fselective-scheduling in BOOT_CFLAGS with and without
this patch.  In both cases there were no regressions.


Thank you for checking this in.  I've also tested this patch in the similar 
way (forcing selective scheduling for 2nd and both schedulers) both on 
x86-64 and ia64.  I've posted the patches for remaining sel-sched PRs just 
now -- it took some time bringing our Itaniums back to life.


Andrey



I'm installing Andrey's patch on the trunk.  I'm not sure this is worth
addressing in gcc-5.

Jeff




[wwwdocs] Fix broken link for Blackfin toolchain

2016-03-14 Thread Jonathan Wakely

Committed to CVS.


Index: htdocs/readings.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/readings.html,v
retrieving revision 1.245
diff -u -r1.245 readings.html
--- htdocs/readings.html	28 Feb 2016 20:42:31 -	1.245
+++ htdocs/readings.html	14 Mar 2016 09:58:39 -
@@ -94,8 +94,8 @@
 
  Blackfin
   Manufacturer: Analog Devices
-  http://blackfin.uclinux.org/gf/";>uClinux and GNU
-  toolchains for the Blackfin
+  https://blackfin.uclinux.org/doku.php?id=toolchain";>uClinux
+  and GNU toolchains for the Blackfin
   http://www.analog.com/en/processors-dsp/blackfin/products/index.html";>Blackfin Documentation
  
 


Re: [C PATCH] Prevent -Wunused-value warning with __atomic_fetch_* (PR c/69407)

2016-03-14 Thread Marek Polacek
Ping.

On Fri, Mar 04, 2016 at 07:03:09PM +0100, Marek Polacek wrote:
> On Fri, Mar 04, 2016 at 06:41:26PM +0100, Jakub Jelinek wrote:
> > I'm ok with it for gcc6.
> 
> Cool.
> 
> > But IMHO you should add dg-bogus directives here.
> 
> Ok, version with dg-bogus:
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 
> 2016-03-04  Marek Polacek  
> 
>   PR c/69407
>   * c-common.c (resolve_overloaded_builtin): Set TREE_USED for the fetch
>   operations.
> 
>   * gcc.dg/atomic-op-6.c: New test.
> 
> diff --git gcc/c-family/c-common.c gcc/c-family/c-common.c
> index 965cf49..25afa9c 100644
> --- gcc/c-family/c-common.c
> +++ gcc/c-family/c-common.c
> @@ -11443,6 +11443,10 @@ resolve_overloaded_builtin (location_t loc, tree 
> function,
>   && orig_code != BUILT_IN_ATOMIC_STORE_N)
> result = sync_resolve_return (first_param, result, orig_format);
>  
> + if (fetch_op)
> +   /* Prevent -Wunused-value warning.  */
> +   TREE_USED (result) = true;
> +
>   /* If new_return is set, assign function to that expr and cast the
>  result to void since the generic interface returned void.  */
>   if (new_return)
> diff --git gcc/testsuite/gcc.dg/atomic-op-6.c 
> gcc/testsuite/gcc.dg/atomic-op-6.c
> index e69de29..f88c293 100644
> --- gcc/testsuite/gcc.dg/atomic-op-6.c
> +++ gcc/testsuite/gcc.dg/atomic-op-6.c
> @@ -0,0 +1,11 @@
> +/* Test we don't generate bogus warnings.  */
> +/* PR c/69407 */
> +/* { dg-do compile } */
> +/* { dg-options "-Wall -Wextra" } */
> +
> +void
> +foo (int *p, int a)
> +{
> +  __atomic_fetch_add (&p, a, 0); /* { dg-bogus "value computed is not used" 
> } */
> +  __atomic_add_fetch (&p, a, 0); /* { dg-bogus "value computed is not used" 
> } */
> +}

Marek


[PATCH] Fix PR56365

2016-03-14 Thread Richard Biener

I am testing the following patch to fix the regression in min/max
detection introduced by comparison canonicalization like a < 267
to a <= 266.  The patch allows us to identify all four min/max
cases in the testcase below.

Bootstrap and regtest running on x86_64-unknown-linux-gnu.

Richard.

2016-03-14  Richard Biener  

PR tree-optimization/56365
* tree-ssa-phiopt.c (minmax_replacement): Handle alternate
constants to compare against.

* gcc.dg/tree-ssa/phi-opt-14.c: New testcase.

Index: gcc/tree-ssa-phiopt.c
===
*** gcc/tree-ssa-phiopt.c   (revision 234180)
--- gcc/tree-ssa-phiopt.c   (working copy)
*** minmax_replacement (basic_block cond_bb,
*** 1045,1051 
gassign *new_stmt;
edge true_edge, false_edge;
enum tree_code cmp, minmax, ass_code;
!   tree smaller, larger, arg_true, arg_false;
gimple_stmt_iterator gsi, gsi_from;
  
type = TREE_TYPE (PHI_RESULT (phi));
--- 1045,1051 
gassign *new_stmt;
edge true_edge, false_edge;
enum tree_code cmp, minmax, ass_code;
!   tree smaller, alt_smaller, larger, alt_larger, arg_true, arg_false;
gimple_stmt_iterator gsi, gsi_from;
  
type = TREE_TYPE (PHI_RESULT (phi));
*** minmax_replacement (basic_block cond_bb,
*** 1059,1073 
--- 1059,1117 
  
/* This transformation is only valid for order comparisons.  Record which
   operand is smaller/larger if the result of the comparison is true.  */
+   alt_smaller = NULL_TREE;
+   alt_larger = NULL_TREE;
if (cmp == LT_EXPR || cmp == LE_EXPR)
  {
smaller = gimple_cond_lhs (cond);
larger = gimple_cond_rhs (cond);
+   /* If we have smaller < CST it is equivalent to smaller <= CST-1.
+Likewise smaller <= CST is equivalent to smaller < CST+1.  */
+   if (TREE_CODE (larger) == INTEGER_CST)
+   {
+ if (cmp == LT_EXPR)
+   {
+ bool overflow;
+ wide_int alt = wi::sub (larger, 1, TYPE_SIGN (TREE_TYPE (larger)),
+ &overflow);
+ if (! overflow)
+   alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt);
+   }
+ else
+   {
+ bool overflow;
+ wide_int alt = wi::add (larger, 1, TYPE_SIGN (TREE_TYPE (larger)),
+ &overflow);
+ if (! overflow)
+   alt_larger = wide_int_to_tree (TREE_TYPE (larger), alt);
+   }
+   }
  }
else if (cmp == GT_EXPR || cmp == GE_EXPR)
  {
smaller = gimple_cond_rhs (cond);
larger = gimple_cond_lhs (cond);
+   /* If we have larger > CST it is equivalent to larger >= CST+1.
+Likewise larger >= CST is equivalent to larger > CST-1.  */
+   if (TREE_CODE (smaller) == INTEGER_CST)
+   {
+ if (cmp == GT_EXPR)
+   {
+ bool overflow;
+ wide_int alt = wi::add (smaller, 1, TYPE_SIGN (TREE_TYPE 
(smaller)),
+ &overflow);
+ if (! overflow)
+   alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt);
+   }
+ else
+   {
+ bool overflow;
+ wide_int alt = wi::sub (smaller, 1, TYPE_SIGN (TREE_TYPE 
(smaller)),
+ &overflow);
+ if (! overflow)
+   alt_smaller = wide_int_to_tree (TREE_TYPE (smaller), alt);
+   }
+   }
  }
else
  return false;
*** minmax_replacement (basic_block cond_bb,
*** 1098,1105 
  
if (empty_block_p (middle_bb))
  {
!   if (operand_equal_for_phi_arg_p (arg_true, smaller)
! && operand_equal_for_phi_arg_p (arg_false, larger))
{
  /* Case
  
--- 1142,1153 
  
if (empty_block_p (middle_bb))
  {
!   if ((operand_equal_for_phi_arg_p (arg_true, smaller)
!  || (alt_smaller
!  && operand_equal_for_phi_arg_p (arg_true, alt_smaller)))
! && (operand_equal_for_phi_arg_p (arg_false, larger)
! || (alt_larger
! && operand_equal_for_phi_arg_p (arg_true, alt_larger
{
  /* Case
  
*** minmax_replacement (basic_block cond_bb,
*** 1109,1116 
 rslt = larger;  */
  minmax = MIN_EXPR;
}
!   else if (operand_equal_for_phi_arg_p (arg_false, smaller)
!  && operand_equal_for_phi_arg_p (arg_true, larger))
minmax = MAX_EXPR;
else
return false;
--- 1157,1168 
 rslt = larger;  */
  minmax = MIN_EXPR;
}
!   else if ((operand_equal_for_phi_arg_p (arg_false, smaller)
!   || (alt_smaller
!   && operand_equal_for_phi_arg_p (arg_false, alt_smaller)))
!  && (operand_equal_for_phi_arg_p (arg_true, larger)
!  || (al

Re: [PATCH][ARM] PR driver/70132: Avoid double fclose in driver-arm.c

2016-03-14 Thread Bernd Schmidt

On 03/11/2016 04:32 PM, Kyrill Tkachov wrote:

 PR driver/70132
 * config/arm/driver-arm.c (host_detect_local_cpu): Set file pointer
 to NULL after closing file.


Doesn't match the patch. Either variant is fine but please use the right 
combination :)



Bernd


Re: [PATCH, 4/16] Implement -foffload-alias

2016-03-14 Thread Tom de Vries

On 02/12/15 10:58, Jakub Jelinek wrote:

On Fri, Nov 27, 2015 at 01:03:52PM +0100, Tom de Vries wrote:

Handle non-declared variables in kernels alias analysis

2015-11-27  Tom de Vries  

* gimplify.c (gimplify_scan_omp_clauses): Initialize
OMP_CLAUSE_ORIG_DECL.
* omp-low.c (install_var_field_1): Handle base_pointers_restrict for
pointers.
(map_ptr_clause_points_to_clause_p)
(nr_map_ptr_clauses_pointing_to_clause): New function.
(omp_target_base_pointers_restrict_p): Handle GOMP_MAP_POINTER.
* tree-pretty-print.c (dump_omp_clause): Print OMP_CLAUSE_ORIG_DECL.
* tree.c (omp_clause_num_ops): Set num_ops for OMP_CLAUSE_MAP to 3.
* tree.h (OMP_CLAUSE_ORIG_DECL): New macro.

* c-c++-common/goacc/kernels-alias-10.c: New test.
* c-c++-common/goacc/kernels-alias-9.c: New test.


I don't like this (mainly the addition of OMP_CLAUSE_ORIG_DECL),
but it also sounds wrong to me.
The primary question is how do you handle GOMP_MAP_POINTER
(which is something we don't use for C/C++ OpenMP anymore,
and Fortran OpenMP will stop using it in GCC 7 or 6.2?) on the OpenACC
libgomp side, does it work like GOMP_MAP_ALLOC or GOMP_MAP_FORCE_ALLOC?


When a GOMP_MAP_POINTER mapping is encountered, first we check if it has 
been mapped before:

- if it hasn't been mapped before, we check if the area the pointer
  points to has been mapped, and if not, error out. Else we map the
  pointer to a device pointer, and write the device pointer value
  to the device pointer variable.
- if the pointer has been mapped before, we reuse the mapping and write
  the device pointer value to the device pointer variable.


Similarly GOMP_MAP_TO_PSET.
If it works like GOMP_MAP_ALLOC (it does
on the OpenMP side in target.c, so if something is already mapped, no
further pointer assignment happens), then your change looks wrong.
If it works like GOMP_MAP_FORCE_ALLOC, then you just should treat
GOMP_MAP_POINTER on all OpenACC constructs as opcode that allows the
restrict operation.


I guess it works mostly like GOMP_MAP_ALLOC, but I don't understand the 
relevance of the comparison for the patch. What is interesting for the 
restrict optimization is whether what GOMP_MAP_POINTER points to has 
been mapped with or without the force flag during the same mapping sequence.



If it should behave differently depending on
if the corresponding array section has been mapped with GOMP_MAP_FORCE_*
or without it,


The mapping itself shouldn't behave differently.


then supposedly you should use a different code for
those two.


I could add f.i. an unsigned int aux_flags to struct tree_omp_clause, 
set a new POINTS_TO_FORCE_VAR flag when translating the acc clause into 
mapping clauses, and use that flag later on when dealing with the 
GOMP_MAP_POINTER clause. Is that an acceptable approach?


[ Instead I could define a new gcc-internal-only 
GOMP_MAP_POINTER_POINTS_TO_FORCE kind, but I'd rather avoid this, given 
that it would be handled the same as GOMP_MAP_POINTER everywhere, except 
for a single point in the source code. ]


Thanks,
- Tom


Re: Fix 69650, bogus line numbers from libcpp

2016-03-14 Thread Bernd Schmidt

On 03/11/2016 11:09 PM, David Malcolm wrote:

+ cpp_error (pfile, CPP_DL_ERROR,
+"file \"%s\" left but not entered", new_file);

  
Although it looks like you're preserving the existing behavior from
when this was in linemap_add, shouldn't this be
   ORDINARY_MAP_FILE_NAME (from)
rather than new_file?  (i.e. shouldn't it report the name of the file
being *left*, rather than the one being entered?)


Hmm, almost but not quite. We don't necessarily know the name of the 
file that's being left, if there's just a single #line directive as in 
the testcase. I don't think we can reliably get a meaningful filename 
other than the in the line directive. So maybe the error message needs 
to be changed to something like "file %s unexpectedly reentered"?



Can we also have a testcase with a non-empty filename?  I'm interested
in seeing what the exact error messages looks like.


  # 1 "v.c"
  # 1 "t.h" 1
  int t;
  # 2 "v.c" 2

  int b;

t.h:2:12: error: file "b.c" left but not entered

So this shows the line number for the file we think we are in, which is 
t.h. Would you accept this with the wording changed as suggested above?



Bernd


Re: patch for PR69614

2016-03-14 Thread Christophe Lyon
On 12 March 2016 at 18:17, Jeff Law  wrote:
> On 03/12/2016 07:56 AM, Vladimir Makarov wrote:
>>
>>The following patch should solve the PR which is discussed on
>>
>> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69614
>>
>>The patch was bootstrapped and tested on x86/x86-64.
>>
>>Committed as rev. 234162.
>>

Hi Vladimir,

I've noticed that the newly introduced testcase fails on
armeb-none-linux-gnueabihf,
with GCC configured as:
--with-cpu=cortex-a9 --with-fpu=neon-fp16
whether --with-mode=arm or thumb,
but the test passes if GCC is configured --with-fpu=vfpv3-d16-fp16

I'm using qemu, and the test prints:
00cef0a2b644
instead of the expected value.

Do you prefer me to update bugzilla with this information?

Thanks,

Christophe.

> Isn't this potentially a problem for the gcc-4.9 and gcc-5 release branches?
>
> I'm going to add regression markers for those and drop the gcc-6 regression
> marker.
>
> jeff


Re: [PATCH, match] Fix pr68714

2016-03-14 Thread Richard Biener
On Fri, 11 Mar 2016, Richard Henderson wrote:

> On 03/02/2016 01:31 AM, Richard Biener wrote:
> > As a general remark I think handling of this simplification is
> > better done in the reassoc pass (see Jakubs comment #4) given
> > || and && associate.  So I'd rather go down that route if possible.
> 
> This seems to do the trick.

There are a lot of tabs vs. white-space issues in the patch.  Otherwise
looks ok to me.

Thanks,
Richard.


Re: Wonly-top-basic-asm

2016-03-14 Thread Bernd Schmidt

On 03/11/2016 01:55 AM, David Wohlferd wrote:

So, we have been discussing this issue for 4 months now.  Over that
time, I have tried to incorporate everyone's feedback.

As a result we have gone from a tiny doc patch (just describe the
current semantics), to a big doc patch (completely deprecate basic asm
when used in a function) to a medium doc patch + code fix (warning when
using basic asm in a function) and now back to a
slightly-bigger-than-tiny doc patch.

I have made no changes since the last patch I posted
(https://gcc.gnu.org/ml/gcc-patches/2016-02/msg01406.html) for the
reasons discussed below.

I assert that this patch both contains important information users need
and is better than the current text.  I expect that Sandra is prepared
to check this in as soon as someone signs off on its technical accuracy.


The example is not good, as discussed previously, and IMO the best 
option is to remove it. Otherwise I have no objections to the latest 
variant.



Bernd



[PATCH, PR70161] Fix fdump-ipa-all-graph

2016-03-14 Thread Tom de Vries

Hi,

this patch fixes PR70161, a 4.9/5/6 regression.

Currently when using -fdump-ipa-all-graph, the compiler ICEs in 
execute_function_dump when testing for pass->graph_dump_initialized, 
because pass == NULL.


The patch fixes:
- the ICE by setting the pass argument in the call to
  execute_function_dump in execute_one_ipa_transform_pass
- a subsequent ICE (triggered with -fipa-pta) by saving, resetting and
  restoring dump_file_name in cgraph_node::get_body, alongside the
  saving and restoring of the dump_file variable.
- the duplicate edges in the subsequently generated dot file by
  ensuring that execute_function_dump is called only once per function
  per pass. [ Note that this bit also has an effect for the normal dump
  files for the ipa passes with transform function. For those functions,
  atm execute_function_dump is called both after execute and after
  transform. With the patch, it's only called after transform. ]

Bootstrapped and reg-tested on x86_64.

OK for stage4?

Thanks,
- Tom
Fix fdump-ipa-all-graph

2016-03-14  Tom de Vries  

	PR ipa/70161
	* cgraph.c (cgraph_node::get_body): Save, reset and restore
	dump_file_name.
	* passes.c (execute_one_ipa_transform_pass): Add missing argument to
	execute_function_dump.
	(execute_one_pass): Don't dump function if it will be dumped after ipa
	transform.

---
 gcc/cgraph.c |  3 +++
 gcc/passes.c | 14 +++---
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/gcc/cgraph.c b/gcc/cgraph.c
index 7727313..f187913 100644
--- a/gcc/cgraph.c
+++ b/gcc/cgraph.c
@@ -3365,7 +3365,9 @@ cgraph_node::get_body (void)
 {
   opt_pass *saved_current_pass = current_pass;
   FILE *saved_dump_file = dump_file;
+  const char *saved_dump_file_name = dump_file_name;
   int saved_dump_flags = dump_flags;
+  dump_file_name = NULL;
 
   push_cfun (DECL_STRUCT_FUNCTION (decl));
   execute_all_ipa_transforms ();
@@ -3377,6 +3379,7 @@ cgraph_node::get_body (void)
 
   current_pass = saved_current_pass;
   dump_file = saved_dump_file;
+  dump_file_name = saved_dump_file_name;
   dump_flags = saved_dump_flags;
 }
   return updated;
diff --git a/gcc/passes.c b/gcc/passes.c
index bbe35b3..5aa2b32 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -2219,7 +2219,7 @@ execute_one_ipa_transform_pass (struct cgraph_node *node,
 check_profile_consistency (pass->static_pass_number, 1, true);
 
   if (dump_file)
-do_per_function (execute_function_dump, NULL);
+do_per_function (execute_function_dump, pass);
   pass_fini_dump_file (pass);
 
   current_pass = NULL;
@@ -2356,15 +2356,15 @@ execute_one_pass (opt_pass *pass)
 check_profile_consistency (pass->static_pass_number, 1, true);
 
   verify_interpass_invariants ();
-  if (dump_file)
-do_per_function (execute_function_dump, pass);
-  if (pass->type == IPA_PASS)
+  if (pass->type == IPA_PASS
+  && ((ipa_opt_pass_d *)pass)->function_transform)
 {
   struct cgraph_node *node;
-  if (((ipa_opt_pass_d *)pass)->function_transform)
-	FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
-	  node->ipa_transforms_to_apply.safe_push ((ipa_opt_pass_d *)pass);
+  FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+	node->ipa_transforms_to_apply.safe_push ((ipa_opt_pass_d *)pass);
 }
+  else if (dump_file)
+do_per_function (execute_function_dump, pass);
 
   if (!current_function_decl)
 symtab->process_new_functions ();


Re: [AArch64] Disable pcrelative_literal_loads with fix-cortex-a53-843419

2016-03-14 Thread Christophe Lyon
On 10 March 2016 at 14:24, James Greenhalgh  wrote:
> On Thu, Mar 10, 2016 at 01:37:50PM +0100, Christophe Lyon wrote:
>> On 10 March 2016 at 12:43, James Greenhalgh  wrote:
>> > On Tue, Jan 26, 2016 at 03:43:36PM +0100, Christophe Lyon wrote:
>> >> With the attachment
>> >>
>> >>
>> >> On 26 January 2016 at 15:42, Christophe Lyon  
>> >> wrote:
>> >> > Hi,
>> >> >
>> >> > This is a followup to PR63304.
>> >> >
>> >> > As discussed in bugzilla, this patch disables pcrelative_literal_loads
>> >> > when -mfix-cortex-a53-843419 (or its default configure option) is
>> >> > used.
>> >> >
>> >> > I copied the behavior of -mfix-cortex-a53-835769 (e.g. in
>> >> > aarch64_can_inline_p), and I have tested by building the Linux kernel
>> >> > using -mfix-cortex-a53-843419 and checked that
>> >> > R_AARCH64_ADR_PREL_PG_HI21 relocations are not emitted anymore (under
>> >> > CONFIG_ARM64_ERRATUM_843419).
>> >> >
>> >> > For reference, this is motivated by:
>> >> > https://bugs.linaro.org/show_bug.cgi?id=1994
>> >> > and further details on Launchpad:
>> >> > https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1533009
>> >> >
>> >> > OK for trunk?
>> >
>> > Thanks, this looks like a clear regression from GCC 5 (we can no longer
>> > build the kernel, so this workaround is fine to go in now). Please remember
>> > to add the link to the relevant PR in the ChangeLog.
>> >
>> > I'd also really appreciate a nice big comment over this code:
>> >
>> >> +  /* If it is not set on the command line, we default to no pc
>> >> + relative literal loads, unless the workaround for Cortex-A53
>> >> + erratum 843419 is in effect.  */
>> >> +  if (opts->x_nopcrelative_literal_loads == 2
>> >> +  && !TARGET_FIX_ERR_A53_843419)
>> >
>> > Explaining why this is important (i.e. some summary of the discussion
>> > in PR63304 regarding the kernel module loader).
>> >
>> > Can you repost with that comment added? I don't have any other objections
>> > to the patch.
>> >
>>
>> OK, here is an updated version.
>
> Thanks.
>
> This is OK for trunk.
>

When GCC is configured to enable the A53 erratum 843419 workaround by default,
this patch caused gcc.target/aarch64/pr63304_1.c to fail.

The attached patch fixes the problem by forcing the use of
-mno-fix-cortex-a53-843419.

OK, or do we prefer not to bother?

Thanks,

Christophe


> James
>
2016-03-14  Christophe Lyon  

* gcc.target/aarch64/pr63304_1.c: Add -mno-fix-cortex-a53-843419.
diff --git a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c 
b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
index fa0fb56..c917f81c 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr63304_1.c
@@ -1,5 +1,5 @@
 /* { dg-do assemble } */
-/* { dg-options "-O1 --save-temps" } */
+/* { dg-options "-O1 --save-temps -mno-fix-cortex-a53-843419" } */
 #pragma GCC push_options
 #pragma GCC target ("+nothing+simd, cmodel=small")
 


[PATCH] genrecog: Fix crash on invalid input

2016-03-14 Thread Segher Boessenkool
If your machine description refers to a non-existent predicate genrecog
crashes.  This fixes it.

Is this okay for trunk?


Segher


2016-03-14  Segher Boeesenkool  

* genrecog.c (safe_predicate_mode): If PRED is NULL, return false.

---
 gcc/genrecog.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/genrecog.c b/gcc/genrecog.c
index f8b8ef4..65882f4 100644
--- a/gcc/genrecog.c
+++ b/gcc/genrecog.c
@@ -3378,6 +3378,9 @@ find_subroutines (routine_type type, state *s, vec  &procs)
 static bool
 safe_predicate_mode (const struct pred_data *pred, machine_mode mode)
 {
+  if (!pred)
+return false;
+
   /* Scalar integer constants have VOIDmode.  */
   if (GET_MODE_CLASS (mode) == MODE_INT
   && (pred->codes[CONST_INT]
-- 
1.9.3



Re: [01/05] Fix PR 64411

2016-03-14 Thread Alexander Monakov
On Mon, 14 Mar 2016, Andrey Belevantsev wrote:
> In this case, we get an inconsistency between the sched-deps interface, saying
> we can't move an insn writing the si register through a vector insn, and the
> liveness analysis, saying we can.  The latter doesn't take into account
> implicit_reg_pending_clobbers set calculated in sched-deps before register
> allocation.  The solution is to reflect this set in our insn data
> (sets/uses/clobbers).
> 
> Ok for trunk?

One nit; the prototype of the new function:

extern void get_implicit_reg_pending_clobbers (rtx_insn *, HARD_REG_SET *);

has source operand on the left, destination on the right; it's probably nicer
to swap them around.

OK as far as selective scheduler changes go, but this also needs a general
scheduler maintainer ack for the sched-deps.c change.  Vladimir, can you have
a look?

Thanks.
Alexander


Re: [AArch64] Emit square root using the Newton series

2016-03-14 Thread Evandro Menezes

On 03/10/16 19:06, Wilco Dijkstra wrote:

Evandro Menezes  wrote:

That's what I had in mind too, but around the approximation for x^-1/2
and using masks for vector cases thusly:

fcmne   v3.4s, v0.4s, #0.0
 frsqrte v1.4s, v0.4s
 fmulv2.4s, v1.4s, v1.4s
 frsqrts v2.4s, v0.4s, v2.4s
 fmulv1.4s, v1.4s, v2.4s
 fmulv2.4s, v1.4s, v1.4s
 frsqrts v2.4s, v0.4s, v2.4s
 fmulv1.4s, v1.4s, v2.4s
and v1.4s, v3.4s
 fmulv0.4s, v1.4s, v0.4s

That's possible but the overall latency is higher - according to exynos-1.md the
above takes 44 cycles while my version would be 37.


I'm currently working to get this prototyped without modifying the 
reciprocal square root.  Once I'm done, I'll merge both functions 
together to generate better code.


I got the scalar version going, but I'm stuck with the vector version.  
As you can see above, I need to use the complement of the mask produced 
by FCMEQ to squelch the offending vector element. However, the way in 
which FCMEQ is defined in GCC, it produces an integer vector and the 
SIMD AND only takes integer vectors.  I'm stuck at how to pass an FP 
vector to AND and then its integer vector back to an FP insn.


Here's how the function stands at the moment:

   void
   aarch64_emit_approx_sqrt (rtx dst, rtx src)
   {
  machine_mode mode = GET_MODE (src);
  gcc_assert (GET_MODE_INNER (mode) == SFmode
  || GET_MODE_INNER (mode) == DFmode);

  bool scalar = !VECTOR_MODE_P (mode);
  bool narrow = (mode == V2SFmode);

  rtx xsrc = gen_reg_rtx (mode);
  emit_move_insn (xsrc, src);

  rtx xcc, xne, xmsk;
  if (scalar)
{
  /* fcmp */
  xcc = aarch64_gen_compare_reg (NE, xsrc, CONST0_RTX (mode));
  xne = gen_rtx_NE (VOIDmode, xcc, const0_rtx);
}
  else
{
  machine_mode mcmp = mode_for_vector (int_mode_for_mode
   (GET_MODE_INNER (mode)), GET_MODE_NUNITS (mode));
  /* fcmne */
  xmsk = gen_reg_rtx (mode);
  /* Just V4SF for now */
  emit_insn (gen_aarch64_cmeqv4sf (xmsk, xsrc, CONST0_RTX (mode)));
  /* TODO: must use the complement of the this result.  */
}

  /* Calculate the approximate reciprocal square root.  */
  rtx xrsqrt = gen_reg_rtx (mode);
  aarch64_emit_approx_rsqrt (xrsqrt, xsrc);

  /* Calculate the approximate square root.  */
  rtx xsqrt = gen_reg_rtx (mode);
  emit_set_insn (xsqrt, gen_rtx_MULT (mode, xrsqrt, xsrc));

  /* Qualify the result for when the input is zero.  */
  rtx xdst = gen_reg_rtx (mode);
  if (scalar)
/* fcsel */
emit_set_insn (xdst, gen_rtx_IF_THEN_ELSE (mode, xne, xsqrt,
   xsrc));
  else
/* and */
emit_set_insn (xdst, gen_rtx_AND (mode, xsqrt, xmsk));

  emit_move_insn (dst, xdst);
   }

Any help is welcome.

Thank you,

--
Evandro Menezes



Re: [01/05] Fix PR 64411

2016-03-14 Thread Bernd Schmidt

On 03/14/2016 05:23 PM, Alexander Monakov wrote:

On Mon, 14 Mar 2016, Andrey Belevantsev wrote:

In this case, we get an inconsistency between the sched-deps interface, saying
we can't move an insn writing the si register through a vector insn, and the
liveness analysis, saying we can.  The latter doesn't take into account
implicit_reg_pending_clobbers set calculated in sched-deps before register
allocation.  The solution is to reflect this set in our insn data
(sets/uses/clobbers).

Ok for trunk?


One nit; the prototype of the new function:

extern void get_implicit_reg_pending_clobbers (rtx_insn *, HARD_REG_SET *);

has source operand on the left, destination on the right; it's probably nicer
to swap them around.

OK as far as selective scheduler changes go, but this also needs a general
scheduler maintainer ack for the sched-deps.c change.  Vladimir, can you have
a look?


Needs better documentation of the new function's arguments (as per 
general requirements for such things), but otherwise that part is ok 
(either arg order). The sel-sched parts should also have proper function 
comments however, and here:


+{
+  SET_REGNO_REG_SET (IDATA_REG_SETS (id), regno);
+}

we don't use braces around single statements.


Bernd


Re: [02/05] Fix PR 63384

2016-03-14 Thread Alexander Monakov
On Mon, 14 Mar 2016, Andrey Belevantsev wrote:
> Here we're looping because we decrease the counter of the insns we still can
> issue on a DEBUG_INSN thus rendering the counter negative.  The fix is to not
> count debug insns in the corresponding code.  The selective scheduling is
> known to spoil the result of var tracking, but still it is not the reason to
> hang in there.
> 
> The toggle option used in the test seems to be the equivalent of just enabling
> var-tracking-assignments which should lead to the same situation; however, if
> specified as is, var-tracking-assignments will be disabled by the
> toplev.c:1460 code.  Maybe we also need the same treatment for
> flag_var_tracking_assignments_toggle.

Hm, I've tried running the test by hand, and I don't follow you: it loops with
either -fvta or -fvta-toggle, producing the expected warning; and doesn't loop
with just -fvar-tracking, when VTA is implicitely disabled at toplev.c:1460.
Sorry, I might have misled you about this (off-list), but it seems toplev.c is
actually working as intended here.

> Ok for trunk?

OK with the formatting oddity fixed:

diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index c798935..893a3e5 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -4249,7 +4249,8 @@ invoke_aftermath_hooks (fence_t fence, rtx_insn 
*best_insn, int issue_more)
   issue_more);
   memcpy (FENCE_STATE (fence), curr_state, dfa_state_size);
 }
-  else if (GET_CODE (PATTERN (best_insn)) != USE
+  else if (! DEBUG_INSN_P (best_insn)
+  && GET_CODE (PATTERN (best_insn)) != USE
&& GET_CODE (PATTERN (best_insn)) != CLOBBER)
 issue_more--;
 
The prevailing style is '!DEBUG_INSN_P' (no space); it's probably better to
use the same indent style (spaces) on the two following lines too.

Thanks.
Alexander


Re: [03/05] Fix PR 66660

2016-03-14 Thread Alexander Monakov
On Mon, 14 Mar 2016, Andrey Belevantsev wrote:
> We speculate an insn in the PR but we do not make a check for it though we
> should.  The thing that broke this was the fix for PR 45472.  In that pr, we
> have moved a volatile insn too far up because we failed to merge the bits
> describing its volatility when we have processed a control flow split.  The
> code to propagate the insn pattern with the insn merging was added when the
> volatility of the two insns from the both split branches differ. However, the
> volatility of the speculated insn and its original differ: the original insn
> may trap while the speculated version may not.  Thus, we replace a speculative
> pattern with the original one per the PR 45472 fix for no reason.
> 
> The patch for this problem just limits the original fix for PR 45472 to apply
> for non-speculative insns only.  There is no test as it is not so easy to
> construct one -- we could count the number of speculation check in the
> resulting assembly but there is no way to force speculation to happen.
> 
> Ok for trunk?
> 
> gcc/
> 
> 2016-03-14  Andrey Belevantsev  
> 
> PR target/0
> * sel-sched-ir.c (merge_expr): Do not propagate trap bits into 
> speculative insns.

I think this line doesn't capture the issue at hand well; the issue is not in
propagating trap bits, but rather unintentionally dropping the speculative
pattern, right?  I'd be happier with something like "If the pattern is already
speculative, keep it, and do not check trap bits".

diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index e181cb9..ec59280 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -1871,12 +1871,12 @@ merge_expr (expr_t to, expr_t from, insn_t split_point)
   /* Make sure that speculative pattern is propagated into exprs that
  have non-speculative one.  This will provide us with consistent
  speculative bits and speculative patterns inside expr.  */
-  if ((EXPR_SPEC_DONE_DS (from) != 0
-   && EXPR_SPEC_DONE_DS (to) == 0)
-  /* Do likewise for volatile insns, so that we always retain
-the may_trap_p bit on the resulting expression.  */
-  || (VINSN_MAY_TRAP_P (EXPR_VINSN (from))
- && !VINSN_MAY_TRAP_P (EXPR_VINSN (to
+  if (EXPR_SPEC_DONE_DS (to) == 0
+  && (EXPR_SPEC_DONE_DS (from) != 0
+ /* Do likewise for volatile insns, so that we always retain
+the may_trap_p bit on the resulting expression.  */
+ || (VINSN_MAY_TRAP_P (EXPR_VINSN (from))
+ && !VINSN_MAY_TRAP_P (EXPR_VINSN (to)
 change_vinsn_in_expr (to, EXPR_VINSN (from));

The patch looks unusual in that it reshuffles code while keeping comments; it
seems the upper comment matches the code better now, while the lower one could
be improved to say that may_trap_p is deliberately ignored when 'to' is
already speculated.

Finally, I'd recommend to switch around the two VINSN_MAY_TRAP_P tests so that
condition on 'to' is consistently checked prior to condition on 'from'.

OK with those changes.

Thanks.
Alexander


Re: [PATCH] genrecog: Fix crash on invalid input

2016-03-14 Thread Bernd Schmidt

On 03/14/2016 04:38 PM, Segher Boessenkool wrote:

If your machine description refers to a non-existent predicate genrecog
crashes.  This fixes it.


Might be better to fix the caller?


Bernd



Re: [04/05] Fix PR 69032

2016-03-14 Thread Alexander Monakov
On Mon, 14 Mar 2016, Andrey Belevantsev wrote:
> We fail to find the proper seqno for the fresh bookkeeping copy in this PR.
> The problem is that in get_seqno_by_preds we are iterating over bb from the
> given insn backwards up to the first bb insn.  We skip the initial insn when
> iterating over bb, yet we should take seqno from it.
> 
> The code in question originally didn't include bb head when iterating, and was
> patched to do so in 2011.  The patch was wrong and instead of including bb
> head managed to exclude the original insn itself.  By reading the original and
> patched code I've convinced myself that the right fix will be to do what the
> patch intended and include both the initial insn and the bb head in the
> iteration.
> 
> Ok for trunk?
> 
> gcc/
> 
> 2016-03-14  Andrey Belevantsev  
> 
> PR rtl-optimization/69032
> * sel-sched-ir.c (get_seqno_by_preds): Include both tmp and head when 
> looping backwards over basic block insns.

"both 'insn' and 'head'" (not tmp).

> diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
> index ec59280..c1a9e55 100644
> --- a/gcc/sel-sched-ir.c
> +++ b/gcc/sel-sched-ir.c
> @@ -4103,11 +4103,14 @@ get_seqno_by_preds (rtx_insn *insn)
>insn_t *preds;
>int n, i, seqno;
>  
> -  while (tmp != head)
> +  /* Loop backwards from insn to head including both.  */

"from INSN to HEAD" (uppercase).

The following structure is equivalent, but would look a bit more canonical:

  for (rtx_insn *tmp = insn; ; tmp = PREV_INSN (tmp))
{
  if (INSN_P (tmp))
return INSN_SEQNO (tmp);
  if (tmp == head)
break;
}

> +  while (1)
>  {
> -  tmp = PREV_INSN (tmp);
>if (INSN_P (tmp))
>  return INSN_SEQNO (tmp);
> +  if (tmp == head)
> + break;
> +  tmp = PREV_INSN (tmp);
>  }
>  
>cfg_preds (bb, &preds, &n);

OK with formatting nits fixed ('while'/'for' spelling change at your choice).

Thanks.
Alexander


Re: [PATCH] Fix PR56365

2016-03-14 Thread Bernhard Reutner-Fischer
On March 14, 2016 12:58:20 PM GMT+01:00, Richard Biener  
wrote:
>
>I am testing the following patch to fix the regression in min/max
>detection introduced by comparison canonicalization like a < 267
>to a <= 266.  The patch allows us to identify all four min/max
>cases in the testcase below.
>
>Bootstrap and regtest running on x86_64-unknown-linux-gnu.

>InLikew/testsuite/gcc.dg/tree-ssa/phi-opt-14.c
>===
>*** gcc/testsuite/gcc.dg/tree-ssa/phi-opt-14.c (revision 0)
>--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-14.c (working copy)
>***
>*** 0 
>--- 1,37 
>+ /* { dg-do compile } */
>+ /* { dg-options "-O -fdump-tree-phiopt1" } */
>+ 
>+ int test_01 (int a)
>+ {
>+   if (127 <= a)

Shouldn't this be >= ?

>+ a = 127;
>+   else if (a <= -128)
>+ a = -128;
>+   return a;
>+ }
>+ int test_02 (int a)
>+ {
>+   if (127 < a)

and this >

>+ a = 127;
>+   else if (a <= -128)
>+ a = -128;
>+   return a;
>+ }
>+ int test_03 (int a)
>+ {
>+   if (127 <= a)

and this >=

>+ a = 127;
>+   else if (a < -128)
>+ a = -128;
>+   return a;
>+ }
>+ int test_04 (int a)
>+ {
>+   if (127 < a)

and >

TIA,
>+ a = 127;
>+   else if (a < -128)
>+ a = -128;
>+   return a;
>+ }
>+ 
>+ /* { dg-final { scan-tree-dump-not "if" "phiopt1" } } */




Re: [PATCH][PR rtl-optimization/69307] Handle hard registers in modes that span more than one register properly

2016-03-14 Thread Jeff Law

On 03/14/2016 03:56 AM, Andrey Belevantsev wrote:


Thank you for checking this in.  I've also tested this patch in the
similar way (forcing selective scheduling for 2nd and both schedulers)
both on x86-64 and ia64.  I've posted the patches for remaining
sel-sched PRs just now -- it took some time bringing our Itaniums back
to life.
No problem.  I found it trolling the P4/P5 regression list.  It was the 
only one that I could wrap my head around easily that night.


Thanks for following-up on the others.  Hopefully between Alexander, 
Bernd and myself we can get them reviewed and work towards getting those 
BZs resolved.


jeff



[PATCH] extend.texi: Expand on the perils of using the 'leaf' attribute.

2016-03-14 Thread Carlos O'Donell
Using the 'leaf' attribute is difficult in certain use cases, and the
documentation rightly points out that signals is one such problem.

We should additionally document the following caveats:

* Indirect function resolvers (thanks to Florian Weimer for catching this).
* Indirect function implementations
* ELF symbol interposition.

Note that neither the C nor C++ standards talks at all about how
memory is synchronized between the current execution context and that
of a signal handler. Therefore this patch rewords the text to say
"There is no standards compliant way..." although in practice is just
works and one would expect the standards (POSIX) to adopt such language
that existing practice works.

Lastly, we mention that the 'leaf' attribute might simply be removed
if that is the easiest option.

OK to checkin?

For completeness the motivating example from a user was like this:
cat >> leaf.c <
#include 
#include 
#include 

static int tst;

void my_h(int sig)
{
  if (tst == 1)
_exit (0);
  _exit (1);
}

int main()
{
  signal(SIGUSR1, my_h);
  tst++;
  pthread_kill(pthread_self(), SIGUSR1);
  tst--;
  return 2;
}
EOF
gcc -g3 -O3 -o leaf leaf.c -lpthread
./test; echo $?
1

Where the global write of tst is elided by the compiler because
pthread_kill is marked __THROW (includes leaf). It's an open
question if pthread_kill should or should not use __THROWNL.
Even if we fix that in glibc, the changes below to the docs are
still important clarifications.

gcc/
2016-03-14  Carlos O'Donell  

* doc/extend.texi (Common Function Attributes): Describe ifunc impact
on leaf attribute.

Index: extend.texi
===
--- extend.texi (revision 234183)
+++ extend.texi (working copy)
@@ -2786,9 +2786,17 @@
 is a leaf function, but @code{qsort} is not.
 
 Note that leaf functions might invoke signals and signal handlers might be
-defined in the current compilation unit and use static variables.  The only
-compliant way to write such a signal handler is to declare such variables
-@code{volatile}.
+defined in the current compilation unit and use static variables.  Similarly,
+when lazy symbol resolution is in effect, leaf functions might invoke indirect
+functions whose resolver function or implementation function might be defined
+in the current compilation unit and use static variables. There is no standards
+compliant way to write such a signal handler, resolver function, or
+implementation function, and the best that you can do is to remove the leaf
+attribute or mark all such variables @code{volatile}.  Lastly, for ELF-based
+systems which support symbol interposition one should take care that functions
+defined in the current compilation unit do not unexpectedly interpose other
+symbols based on the defined standards mode otherwise an inadvertent callback
+would be added.
 
 The attribute has no effect on functions defined within the current compilation
 unit.  This is to allow easy merging of multiple compilation units into one,
--
Cheers,
Carlos.


Re: [PATCH] genrecog: Fix crash on invalid input

2016-03-14 Thread Segher Boessenkool
On Mon, Mar 14, 2016 at 06:39:12PM +0100, Bernd Schmidt wrote:
> On 03/14/2016 04:38 PM, Segher Boessenkool wrote:
> >If your machine description refers to a non-existent predicate genrecog
> >crashes.  This fixes it.
> 
> Might be better to fix the caller?

Yeah maybe.  The next function that takes a pred also tests for !pred,
but let me see what the caller of the failing case is.


Segher


Re: [PATCH] PR69195, Reload confused by invalid reg equivs

2016-03-14 Thread Jeff Law

On 03/14/2016 03:56 AM, Richard Biener wrote:


Undefined?  Most likely.  But we still have to do something sensible. As
Jakub noted, a user could create the problematic code just as easily as
DCE/DSE, so IRA probably needs to be tolerant of this situation.

So it seems like you're suggesting we leave DCE/DSE alone (declaring this
usage undefined) and fix IRA to be tolerant, right?


Tolerant as in not crash?  Yes.

Right.  Tolerant as in not crash.




Using alloca to create/jump to code on the stack should work (we might
transform that into a decl though).


Given that executable stacks are a huge security hole, I'd be willing to go
out on a limb and declare that undefined as well.  It's not as clear cut,
but that's the argument I'd make.


Well, I thought about somebody trying to build trampolines in a way exposed
to GCC.

Right or other dynamic, short-lived code fragments.




And yes, I realize that goes in opposition to what GCC has allowed for 20+
years.  I still think it'd be the right thing to do.


Did we allow this?  Not by design but rather by accident I suppose.
I don't think it was ever specifically allowed or disallowed; like many 
of the old extensions, it was never crisply defined.


I can distinctly remember having to declare that taking the address of a 
blob of code on the stack, then calling/jumping to it after the 
containing function went out of scope as undefined.  I think it was the 
address of a trampoline, but I'm not entirely sure -- there's a small 
chance it was user-created code.  I only remember it because I was 
surprised at how controversial it was to declare that as undefined :(


That most likely predates egcs, so the discussion is not likely in the 
public archives.  It may have been a private discussion between Kenner, 
Jim, Doug and myself or some subset thereof.



jeff


Re: [AArch64] Emit square root using the Newton series

2016-03-14 Thread Wilco Dijkstra
Evandro Menezes  wrote:
>
> I got the scalar version going, but I'm stuck with the vector version.
> As you can see above, I need to use the complement of the mask produced
> by FCMEQ to squelch the offending vector element. However, the way in
> which FCMEQ is defined in GCC, it produces an integer vector and the
> SIMD AND only takes integer vectors.  I'm stuck at how to pass an FP
> vector to AND and then its integer vector back to an FP insn.

You can use gen_rtx_SUBREG(mcmp, xsqrt, 0) to change the mode to an 
integer vector on the AND instruction and back to mode for the destination.

Wilco



C++ PATCH for range-for tweak

2016-03-14 Thread Jason Merrill
A proposal accepted at the last meeting allows the deduced iterator and 
end variables in range-based for to have different types, as long as 
they can be compared.  This is a very simple change, limited to C++1z 
mode, and desired by some of the heaviest users of concepts, so I'm 
going to go ahead and make it.


Tested x86_64-pc-linux-gnu, applying to trunk.
commit 196599301ea3a46cda7eb633623b0dfd2079bc45
Author: Jason Merrill 
Date:   Sat Mar 5 07:45:02 2016 -0500

	P08184R0: Generalizing the Range-Based For Loop

	* parser.c (cp_convert_range_for): Set the type of __end separately.
	(cp_parser_perform_range_for_lookup): Allow different begin/end
	types if they are comparable.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 6ae45b0..d38f1dd 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -11353,6 +11353,8 @@ cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
 		  /*is_constant_init*/false, NULL_TREE,
 		  LOOKUP_ONLYCONVERTING);
 
+  if (cxx_dialect >= cxx1z)
+iter_type = cv_unqualified (TREE_TYPE (end_expr));
   end = build_decl (input_location, VAR_DECL,
 		get_identifier ("__for_end"), iter_type);
   TREE_USED (end) = 1;
@@ -11488,9 +11490,21 @@ cp_parser_perform_range_for_lookup (tree range, tree *begin, tree *end)
 	  /* The unqualified type of the __begin and __end temporaries should
 	 be the same, as required by the multiple auto declaration.  */
 	  if (!same_type_p (iter_type, cv_unqualified (TREE_TYPE (*end
-	error ("inconsistent begin/end types in range-based % "
-		   "statement: %qT and %qT",
-		   TREE_TYPE (*begin), TREE_TYPE (*end));
+	{
+	  if (cxx_dialect >= cxx1z
+		  && (build_x_binary_op (input_location, NE_EXPR,
+	 *begin, ERROR_MARK,
+	 *end, ERROR_MARK,
+	 NULL, tf_none)
+		  != error_mark_node))
+		/* P08184R0 allows __begin and __end to have different types,
+		   but make sure they are comparable so we can give a better
+		   diagnostic.  */;
+	  else
+		error ("inconsistent begin/end types in range-based % "
+		   "statement: %qT and %qT",
+		   TREE_TYPE (*begin), TREE_TYPE (*end));
+	}
 	  return iter_type;
 	}
 }
diff --git a/gcc/testsuite/g++.dg/cpp0x/range-for5.C b/gcc/testsuite/g++.dg/cpp0x/range-for5.C
index bf04406..2a20db4 100644
--- a/gcc/testsuite/g++.dg/cpp0x/range-for5.C
+++ b/gcc/testsuite/g++.dg/cpp0x/range-for5.C
@@ -31,7 +31,7 @@ struct Explicit
 void test1()
 {
   container c;
-  for (int x : c) // { dg-error "inconsistent|conversion" }
+  for (int x : c) // { dg-error "inconsistent|conversion|comparison" }
 ;
 
   int a[2] = {1,2};
diff --git a/gcc/testsuite/g++.dg/cpp1z/range-for1.C b/gcc/testsuite/g++.dg/cpp1z/range-for1.C
new file mode 100644
index 000..370381a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/range-for1.C
@@ -0,0 +1,23 @@
+// P08184R0: Generalizing the Range-Based For Loop
+// { dg-options "-std=c++1z" }
+
+struct A {
+  int ar[4];
+  int *begin() { return ar; }
+  struct end_t {
+int *p;
+friend bool operator!= (int *p, end_t e) { return p != e.p; }
+  };
+  end_t end() { return { &ar[4] }; }
+};
+
+int main()
+{
+  A a { 1, 2, 3, 4 };
+  int i = 1;
+  for (auto x: a)
+if (x != i++)
+  __builtin_abort ();
+  if (i != 5)
+__builtin_abort ();
+}


Re: [PATCH] genrecog: Fix crash on invalid input

2016-03-14 Thread Segher Boessenkool
On Mon, Mar 14, 2016 at 01:41:38PM -0500, Segher Boessenkool wrote:
> On Mon, Mar 14, 2016 at 06:39:12PM +0100, Bernd Schmidt wrote:
> > On 03/14/2016 04:38 PM, Segher Boessenkool wrote:
> > >If your machine description refers to a non-existent predicate genrecog
> > >crashes.  This fixes it.
> > 
> > Might be better to fix the caller?
> 
> Yeah maybe.  The next function that takes a pred also tests for !pred,
> but let me see what the caller of the failing case is.

There is just the single caller, and pred is set right before the call
there.  How about this patch, then?


Segher


Subject: [PATCH] genrecog: Fix crash on invalid input

If your machine description refers to a non-existent predicate genrecog
crashes.  This fixes it.


2016-03-14  Segher Boeesenkool  

* genrecog.c (match_pattern_2): If pred is NULL don't call
safe_predicate_mode on it.

---
 gcc/genrecog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/genrecog.c b/gcc/genrecog.c
index f8b8ef4..47e4266 100644
--- a/gcc/genrecog.c
+++ b/gcc/genrecog.c
@@ -4037,7 +4037,7 @@ match_pattern_2 (state *s, md_rtx_info *info, position 
*pos, rtx pattern)
/* Check the mode first, to distinguish things like SImode
   and DImode register_operands, as described above.  */
machine_mode mode = GET_MODE (e->pattern);
-   if (safe_predicate_mode (pred, mode))
+   if (pred && safe_predicate_mode (pred, mode))
  s = add_decision (s, rtx_test::mode (e->pos), mode, true);
 
/* Assign to operands[] first, so that the rtx usually doesn't
-- 
1.9.3



Re: [PATCH] Fix PR56365

2016-03-14 Thread Richard Biener
On March 14, 2016 7:25:31 PM GMT+01:00, Bernhard Reutner-Fischer 
 wrote:
>On March 14, 2016 12:58:20 PM GMT+01:00, Richard Biener
> wrote:
>>
>>I am testing the following patch to fix the regression in min/max
>>detection introduced by comparison canonicalization like a < 267
>>to a <= 266.  The patch allows us to identify all four min/max
>>cases in the testcase below.
>>
>>Bootstrap and regtest running on x86_64-unknown-linux-gnu.
>
>>InLikew/testsuite/gcc.dg/tree-ssa/phi-opt-14.c
>>===
>>*** gcc/testsuite/gcc.dg/tree-ssa/phi-opt-14.c(revision 0)
>>--- gcc/testsuite/gcc.dg/tree-ssa/phi-opt-14.c(working copy)
>>***
>>*** 0 
>>--- 1,37 
>>+ /* { dg-do compile } */
>>+ /* { dg-options "-O -fdump-tree-phiopt1" } */
>>+ 
>>+ int test_01 (int a)
>>+ {
>>+   if (127 <= a)
>
>Shouldn't this be >= ?

No, note how the constant is left of the <=.

Richard.

>>+ a = 127;
>>+   else if (a <= -128)
>>+ a = -128;
>>+   return a;
>>+ }
>>+ int test_02 (int a)
>>+ {
>>+   if (127 < a)
>
>and this >
>
>>+ a = 127;
>>+   else if (a <= -128)
>>+ a = -128;
>>+   return a;
>>+ }
>>+ int test_03 (int a)
>>+ {
>>+   if (127 <= a)
>
>and this >=
>
>>+ a = 127;
>>+   else if (a < -128)
>>+ a = -128;
>>+   return a;
>>+ }
>>+ int test_04 (int a)
>>+ {
>>+   if (127 < a)
>
>and >
>
>TIA,
>>+ a = 127;
>>+   else if (a < -128)
>>+ a = -128;
>>+   return a;
>>+ }
>>+ 
>>+ /* { dg-final { scan-tree-dump-not "if" "phiopt1" } } */




[PATCH, testsuite] Fix ifcvt-4.c for PowerPC

2016-03-14 Thread Pat Haugen
As stated in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68232, this 
test needs -misel on powerpc to pass. Verified the following fixes the 
test on both powerpc64/powerpc64le. Ok for trunk?


-Pat

testsuite/ChangeLog:
2016-03-14  Pat Haugen  

* gcc.dg/ifcvt-4.c: Add -misel for powerpc* and remove skip for 
powerpc64le.



Index: testsuite/gcc.dg/ifcvt-4.c
===
--- testsuite/gcc.dg/ifcvt-4.c  (revision 234189)
+++ testsuite/gcc.dg/ifcvt-4.c  (working copy)
@@ -1,5 +1,6 @@
 /* { dg-options "-fdump-rtl-ce1 -O2 --param 
max-rtl-if-conversion-insns=3" } */
-/* { dg-skip-if "Multiple set if-conversion not guaranteed on all 
subtargets" { "arm*-*-* powerpc64le*-*-* visium-*-*" } {"*"} { "" } }  */

+/* { dg-additional-options "-misel" { target { powerpc*-*-* } } } */
+/* { dg-skip-if "Multiple set if-conversion not guaranteed on all 
subtargets" { "arm*-*-* visium-*-*" } {"*"} { "" } }  */


 int
 foo (int x, int y, int a)



[PATCH] c++/67376 Comparison with pointer to past-the-end, of array fails inside constant expression

2016-03-14 Thread Martin Sebor

The attached patch fixes the outstanding cases mentioned in comment
10 on bug c++/67376.  While testing the fix I uncovered a number of
other related problems without which the test would have been
incomplete.  They include:

PR c++/70170 - [6 regression] bogus not a constant expression error
comparing pointer to array to null
PR c++/70172 - incorrect reinterpret_cast from integer to pointer
error on invalid constexpr initialization
PR c++/60760 - arithmetic on null pointers should not be allowed
in constant expressions

In addition, I include a fix for the issue below that I also came
across while testing the patch and that makes root causing constexpr
problems due to out-of-bounds array subscripts easier:
PR c++/70228 - insufficient detail in diagnostics for a constexpr
out of bounds array subscript

In a discussion of bug 70170 between those CC'd Marek posted
a prototype patch for match.pd.  While the patch seems to do
the right thing as far as the bug goes, like my own first attempt
at a fix in const-fold.c it caused a couple of regressions (in
pr21294.c and in pr44555.c).  Since I'm not yet familiar enough
with match.pd, in the interest of time I solved those regressions
in const-fold.c rather than in match.pd.

Tested on x86_64.

Martin
PR c++/67376 - [5/6 regression] Comparison with pointer to past-the-end
	of array fails inside constant expression
PR c++/70170 - [6 regression] bogus not a constant expression error comparing
	pointer to array to null
PR c++/70172 - incorrect reinterpret_cast from integer to pointer error
	on invalid constexpr initialization
PR c++/60760 - arithmetic on null pointers should not be allowed in constant
	expressions
PR c++/70228 - insufficient detail in diagnostics for a constexpr out of bounds
	array subscript

gcc/testsuite/ChangeLog:
2016-03-14  Martin Sebor  

	PR c++/67376
	PR c++/70170
	PR c++/70172
	PR c++/60760
	PR c++/70228
	* g++.dg/cpp0x/constexpr-array-ptr10.C: New test.
	* g++.dg/cpp0x/constexpr-array-ptr11.C: New test.
	* g++.dg/cpp0x/constexpr-array-ptr9.C: New test.
	* g++.dg/cpp0x/constexpr-array5.C: Adjust text of expected diagnostic.
	* g++.dg/cpp0x/constexpr-string.C: Same.
	* g++.dg/cpp0x/constexpr-wstring2.C: Same.
	* g++.dg/cpp0x/pr65398.C: Same.
	* g++.dg/ext/constexpr-vla1.C: Same.
	* g++.dg/ext/constexpr-vla2.C: Same.
	* g++.dg/ext/constexpr-vla3.C: Same.
	* g++.dg/ubsan/pr63956.C: Same.

gcc/cp/ChangeLog:
2016-03-14  Martin Sebor  

	PR c++/67376
	PR c++/70170
	PR c++/70172
	PR c++/60760
	PR c++/70228
	(cxx_eval_binary_expression): Add argument.
	(cxx_eval_component_reference): Same.
	(cxx_eval_constant_expression): Same.
	(cxx_eval_indirect_ref): Same.
	(cxx_eval_outermost_constant_expr): Same.
	(diag_array_subscript): New function.
	* constexpr.c (cxx_eval_call_expression): Adjust.
	(cxx_eval_conditional_expression): Same.
	(cxx_eval_array_reference): Detect null pointers.
	(cxx_eval_statement_list): Adjust.

gcc/ChangeLog:
2016-03-14  Martin Sebor  

	PR c++/67376
	* fold-const.c (fold_comparison): Fold equality and relational
	expressions involving null pointers.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 5f97c9d..5ec5034 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -918,7 +918,8 @@ struct constexpr_ctx {
 static GTY (()) hash_table *constexpr_call_table;
 
 static tree cxx_eval_constant_expression (const constexpr_ctx *, tree,
-	  bool, bool *, bool *, tree * = NULL);
+	  bool, bool *, bool *, bool * = NULL,
+  tree * = NULL);
 
 /* Compute a hash value for a constexpr call representation.  */
 
@@ -1390,7 +1391,7 @@ cxx_eval_call_expression (const constexpr_ctx *ctx, tree t,
 	  tree jump_target = NULL_TREE;
 	  cxx_eval_constant_expression (ctx, body,
 	lval, non_constant_p, overflow_p,
-	&jump_target);
+NULL, &jump_target);
 
 	  if (DECL_CONSTRUCTOR_P (fun))
 	/* This can be null for a subobject constructor call, in
@@ -1607,20 +1608,21 @@ cxx_eval_unary_expression (const constexpr_ctx *ctx, tree t,
 static tree
 cxx_eval_binary_expression (const constexpr_ctx *ctx, tree t,
 			bool /*lval*/,
-			bool *non_constant_p, bool *overflow_p)
+			bool *non_constant_p, bool *overflow_p,
+bool *nullptr_p)
 {
   tree r = NULL_TREE;
   tree orig_lhs = TREE_OPERAND (t, 0);
   tree orig_rhs = TREE_OPERAND (t, 1);
   tree lhs, rhs;
   lhs = cxx_eval_constant_expression (ctx, orig_lhs, /*lval*/false,
-  non_constant_p, overflow_p);
+  non_constant_p, overflow_p, nullptr_p);
   /* Don't VERIFY_CONSTANT here, it's unnecessary and will break pointer
  subtraction.  */
   if (*non_constant_p)
 return t;
   rhs = cxx_eval_constant_expression (ctx, orig_rhs, /*lval*/false,
-  non_constant_p, overflow_p);
+  non_constant_p, overflow_p, nullptr_p);
   if (*non_constant_p)
 return t;
 
@@ -1642,6 +1644,15 @@ cxx_eval_binary_expre

Re: [PATCH] 69517 - [5/6 regression] SEGV on a VLA with excess initializer elements

2016-03-14 Thread Martin Sebor

Ping:
  https://gcc.gnu.org/ml/gcc-patches/2016-03/msg00441.html

On 03/06/2016 06:38 PM, Martin Sebor wrote:

GCC 4.9 had added support for C++ VLAs as specified in WG21
document N3639 expected to be included in C++ 14.  However,
WG21 ultimately decided not to include N3639 in C++ 14 and
the G++ support was partially removed in 5.1.  Unfortunately,
the removal rendered some safe albeit erroneous G++ 4.9 code
undefined.  This patch restores the well-defined behavior of
such code by having it throw an exception in response to
the erroneous conditions.

While testing the patch I found a number of other problems in
the G++ support for VLAs, including PR c++/70019 - VLA size
overflow not detected, which was never implemented (not even
in 4.9).  Since this is closely related to the regression
discussed in 69517 the patch also provides that support.

There are a few additional points to note about the patch:

1) It restores the std::bad_array_length exception from N3639,
even though the class isn't specified by the C++ standard.
At first I thought that introducing a different (private)
type would be more appropriate, but in the end couldn't come
up with a good argument for not keeping the same type.  Using
the same type also allows programs that rely on the exception
and that were built with GCC 4.9 to be ported to GCC 6 without
change.

2) It hardwires a rather arbitrarily restrictive limit of 64 KB
on the size of the biggest C++ VLA.  (This could stand to be
improved and made more intelligent, and perhaps integrated
with stack  checking via -fstack-limit, after the GCC 6
release.)

3) By throwing an exception for erroneous VLAs the patch largely
defeats the VLA Sanitizer.  The sanitizer is still useful in
C++ 98 mode where the N3639 VLA runtime checking is disabled,
and when exceptions are disabled via -fno-exceptions.
Disabling  the VLA checking in C++ 98 mode doesn't seem like
a useful feature, but I didn't feel like reverting what was
a deliberate decision.

Martin




Re: C++ PATCH for range-for tweak

2016-03-14 Thread Florian Weimer
* Jason Merrill:

>   P08184R0: Generalizing the Range-Based For Loop

How can one resolve this reference?  It's obviously not a PR number in
GCC Bugzilla.

I found this after some searching:



But it lacks the additional “8”.


[PATCH] Fix LRA ICE (PR middle-end/70219)

2016-03-14 Thread Jakub Jelinek
Hi!

The newly added assert in delete_move_and_clobber requires dregno > 0,
but dregno == 0 is also normal (e.g. in the testcase below we get dregno ==
0, because it uses %rax).  Only dregno < 0 is special and we shouldn't see
it here.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-14  Jakub Jelinek  

PR middle-end/70219
* lra-constraints.c (delete_move_and_clobber): Change assertion
to also allow dregno == 0.

* gcc.dg/pr70219.c: New test.

--- gcc/lra-constraints.c.jj2016-03-13 21:39:24.0 +0100
+++ gcc/lra-constraints.c   2016-03-14 09:16:39.849241403 +0100
@@ -5861,7 +5861,7 @@ delete_move_and_clobber (rtx_insn *insn,
   rtx_insn *prev_insn = PREV_INSN (insn);
 
   lra_set_insn_deleted (insn);
-  lra_assert (dregno > 0);
+  lra_assert (dregno >= 0);
   if (prev_insn != NULL && NONDEBUG_INSN_P (prev_insn)
   && GET_CODE (PATTERN (prev_insn)) == CLOBBER
   && dregno == get_regno (XEXP (PATTERN (prev_insn), 0)))
--- gcc/testsuite/gcc.dg/pr70219.c.jj   2016-03-14 09:15:48.887941755 +0100
+++ gcc/testsuite/gcc.dg/pr70219.c  2016-03-14 09:13:58.0 +0100
@@ -0,0 +1,18 @@
+/* PR middle-end/70219 */
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O1 -w -Wno-psabi" } */
+
+typedef int B __attribute__ ((vector_size (32)));
+typedef int D __attribute__ ((vector_size (32)));
+typedef long E __attribute__ ((vector_size (32)));
+typedef __int128 F;
+typedef __int128 G __attribute__ ((vector_size (32)));
+
+F
+foo (int a, unsigned b, F c, B d, G e, B f, D g, E h, G i)
+{
+  b /= c;
+  e /= (G) ~d;
+  h -= (E){ g[4], e[1], 64, ~f[1] };
+  return b + e[1] + h[0] + h[1] + i[1];
+}

Jakub


[C++ PATCH] Fix -fsanitize=vptr (PR c++/70147)

2016-03-14 Thread Jakub Jelinek
Hi!

My recent patch for PR70035 broke -fsanitize=vptr, the early clearing
of _vptr.* pointers in the objects can crash in constructors with _vtt_parm
parameter.

This patch arranges to use in them *_vtt_parm instead the base vptr (which
we clear, instead of initialize).

The first testcase is just to make sure we don't crash, the second one
is an attempt to find out what is and what is not valid C++ and what can we
(easily) diagnose.

According to Jonathan, in the second testcase in B::B() all the calls
to g(), h() and i() methods before the E and F base classes are constructed
are invalid, while in the non-static data member mem initializers and
within body they are ok.  With the patch we diagnose all but the
g() call in F() base constructor argument (because the E() construction
initializes the vptr and we don't clear it once again afterwards).
clang doesn't instrument any of these, but they warn at compile time about
the g() call in E(g() + ...) and both h() calls (in E() and F()), but e.g.
doesn't warn about either of the calls to i().  At the point where they are
called, A() is already constructed, but other bases aren't constructed yet.
If we should instrument just what clang complains about at compile time,
we could e.g. in addition to this patch conditionalize the clearing of virtual
base vptr pointers by test on the __in_chrg argument, thus only clear those
in the comp ctor.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-14  Jakub Jelinek  

PR c++/70147
* cp-ubsan.c (cp_ubsan_maybe_initialize_vtbl_ptrs): Temporarily
set in_base_initializer.

* g++.dg/ubsan/pr70147-1.C: New test.
* g++.dg/ubsan/pr70147-2.C: New test.

--- gcc/cp/cp-ubsan.c.jj2016-03-04 23:09:53.0 +0100
+++ gcc/cp/cp-ubsan.c   2016-03-14 15:08:18.107919371 +0100
@@ -318,9 +318,15 @@ cp_ubsan_maybe_initialize_vtbl_ptrs (tre
 
   tree type = TREE_TYPE (TREE_TYPE (addr));
   tree list = build_tree_list (type, addr);
+  /* We cannot rely on the vtable being set up.  We have to indirect via the
+ vtt_parm.  */
+  int save_in_base_initializer = in_base_initializer;
+  in_base_initializer = 1;
 
   /* Walk through the hierarchy, initializing the vptr in each base
  class to NULL.  */
   dfs_walk_once (TYPE_BINFO (type), cp_ubsan_dfs_initialize_vtbl_ptrs,
 NULL, list);
+
+  in_base_initializer = save_in_base_initializer;
 }
--- gcc/testsuite/g++.dg/ubsan/pr70147-1.C.jj   2016-03-14 15:20:03.648332034 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr70147-1.C  2016-03-14 15:18:52.0 
+0100
@@ -0,0 +1,12 @@
+// PR c++/70147
+// { dg-do run }
+// { dg-options "-fsanitize=vptr" }
+
+struct A { A () {} virtual void f () {} };
+struct B : virtual A { B () {} virtual void f () {} };
+struct C : B, virtual A { C () {} } c;
+
+int
+main ()
+{
+}
--- gcc/testsuite/g++.dg/ubsan/pr70147-2.C.jj   2016-03-14 17:07:19.638824667 
+0100
+++ gcc/testsuite/g++.dg/ubsan/pr70147-2.C  2016-03-14 18:09:45.934170053 
+0100
@@ -0,0 +1,83 @@
+// PR c++/70147
+// { dg-do run }
+// { dg-skip-if "" { *-*-* }  { "*" } { "-O0" } }
+// { dg-options "-fsanitize=vptr" }
+
+struct A
+{
+  A () : a (0) {}
+  A (int x) : a (x) {}
+  virtual void f () {}
+  virtual int i () { int r = 0; __asm ("" : "+r" (r)); return r; }
+  int a;
+};
+struct E
+{
+  E () : e (0) {}
+  E (int x) : e (x) {}
+  virtual void f () {}
+  virtual int g () { int r = 0; __asm ("" : "+r" (r)); return r; }
+  int e;
+};
+struct F
+{
+  F () : f (0) {}
+  F (int x) : f (x) {}
+  virtual int h () { int r = 0; __asm ("" : "+r" (r)); return r; }
+  int f;
+};
+struct B : virtual A, public E, public F
+{
+  B ()
+: E (
+ g ()
+ + h ()
+ + i ()
+),
+  F (g ()
+ + h ()
+ + i ()),
+  b (g () + h () + i ())   // It is ok to call the methods here.
+  {
+b += g () + h () + i ();   // And here too.
+  }
+  virtual void f () {}
+  int b;
+};
+struct C : B, virtual A
+{
+  C () {}
+};
+
+int
+main ()
+{
+  C c;
+}
+
+// { dg-output "\[^\n\r]*pr70147-2.C:33:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'E'(\n|\r\n|\r)" }
+// { dg-output "0x\[0-9a-fA-F]*: note: object has invalid vptr(\n|\r\n|\r)" }
+// { dg-output "  ?.. .. .. ..  ?.. .. .. ..  ?.. .. .. .. 
\[^\n\r]*(\n|\r\n|\r)" }
+// { dg-output "  ?\\^~~\[^\n\r]*(\n|\r\n|\r)" }
+// { dg-output "  ?invalid vptr(\n|\r\n|\r)" }
+// { dg-output "\[^\n\r]*pr70147-2.C:34:\[0-9]*: runtime error: member call on 
address 0x\[0-9a-fA-F]* which does not point to an object of type 
'F'(\n|\r\n|\r)" }
+// { dg-output "0x\[0-9a-fA-F]*: note: object has invalid vptr(\n|\r\n|\r)" }
+// { dg-output "  ?.. .. .. ..  ?.. .. .. ..  ?.. .. .. .. 
\[^\n\r]*(\n|\r\n|\r)" }
+// { dg-output "  ?\\^~~\[^\n\r]*(\n|\r\n|\r)" }
+// { dg-output "  ?invalid vptr\[^\n\r]*(\n|\r\n|\r)" }
+// { 

Re: [PATCH] Fix LRA ICE (PR middle-end/70219)

2016-03-14 Thread Jeff Law

On 03/14/2016 03:32 PM, Jakub Jelinek wrote:

Hi!

The newly added assert in delete_move_and_clobber requires dregno > 0,
but dregno == 0 is also normal (e.g. in the testcase below we get dregno ==
0, because it uses %rax).  Only dregno < 0 is special and we shouldn't see
it here.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2016-03-14  Jakub Jelinek  

PR middle-end/70219
* lra-constraints.c (delete_move_and_clobber): Change assertion
to also allow dregno == 0.

* gcc.dg/pr70219.c: New test.

OK.

Jeff



Re: [PATCH] c++/67376 Comparison with pointer to past-the-end, of array fails inside constant expression

2016-03-14 Thread Jakub Jelinek
On Mon, Mar 14, 2016 at 03:25:07PM -0600, Martin Sebor wrote:
> PR c++/67376 - [5/6 regression] Comparison with pointer to past-the-end
>   of array fails inside constant expression
> PR c++/70170 - [6 regression] bogus not a constant expression error comparing
>   pointer to array to null
> PR c++/70172 - incorrect reinterpret_cast from integer to pointer error
>   on invalid constexpr initialization
> PR c++/60760 - arithmetic on null pointers should not be allowed in constant
>   expressions
> PR c++/70228 - insufficient detail in diagnostics for a constexpr out of 
> bounds
>   array subscript

Can you please check up the formatting in the patch?
Seems e.g. you've replaced tons of tabs with 8 spaces etc. (check your
editor setting, and check the patch with contrib/check-GNU-style.sh).
There is some trailing whitespace too, spaces before [, etc.

Jakub


Re: [PATCH] extend.texi: Expand on the perils of using the 'leaf' attribute.

2016-03-14 Thread Sandra Loosemore

On 03/14/2016 12:40 PM, Carlos O'Donell wrote:

Using the 'leaf' attribute is difficult in certain use cases, and the
documentation rightly points out that signals is one such problem.

We should additionally document the following caveats:

* Indirect function resolvers (thanks to Florian Weimer for catching this).
* Indirect function implementations
* ELF symbol interposition.

[snip]

gcc/
2016-03-14  Carlos O'Donell  

* doc/extend.texi (Common Function Attributes): Describe ifunc impact
on leaf attribute.



H.  Both your patch and the original text really need some 
copy-editing to fix noun/verb agreement, punctuation, etc.  How about 
something like the attached patch?  I just threw this together and 
haven't tested this in any way, but you confirm that it builds and it 
looks OK to you, feel free to check it in.


-Sandra

Index: gcc/doc/extend.texi
===
--- gcc/doc/extend.texi	(revision 234198)
+++ gcc/doc/extend.texi	(working copy)
@@ -2773,22 +2773,33 @@ refer to the following subsections for d
 @item leaf
 @cindex @code{leaf} function attribute
 Calls to external functions with this attribute must return to the current
-compilation unit only by return or by exception handling.  In particular, leaf
-functions are not allowed to call callback function passed to it from the current
-compilation unit or directly call functions exported by the unit or longjmp
-into the unit.  Leaf function might still call functions from other compilation
+compilation unit only by return or by exception handling.  In particular,
+a leaf function is not allowed to invoke callback functions passed to it
+from the current compilation unit, directly call functions exported by the
+unit, or @code{longjmp} into the unit.  
+Leaf functions might still call functions from other compilation
 units and thus they are not necessarily leaf in the sense that they contain no
 function calls at all.
 
 The attribute is intended for library functions to improve dataflow analysis.
-The compiler takes the hint that any data not escaping the current compilation unit can
-not be used or modified by the leaf function.  For example, the @code{sin} function
+The compiler takes the hint that any data not escaping the current
+compilation unit cannot be used or modified by the leaf function.  
+For example, the @code{sin} function
 is a leaf function, but @code{qsort} is not.
 
-Note that leaf functions might invoke signals and signal handlers might be
-defined in the current compilation unit and use static variables.  The only
-compliant way to write such a signal handler is to declare such variables
-@code{volatile}.
+Note that leaf functions might indirectly run a signal handler
+defined in the current compilation unit that uses static variables.  Similarly,
+when lazy symbol resolution is in effect, leaf functions might invoke indirect
+functions whose resolver function or implementation function is defined
+in the current compilation unit and uses static variables. There is no 
+standard-compliant way to write such a signal handler, resolver function, or
+implementation function, and the best that you can do is to remove the
+@code{leaf} attribute or mark all such static variables @code{volatile}.  
+Lastly, for ELF-based
+systems that support symbol interposition, you should take care that functions
+defined in the current compilation unit do not unexpectedly interpose other
+symbols based on the defined standards mode; otherwise an inadvertent callback
+would be added.
 
 The attribute has no effect on functions defined within the current compilation
 unit.  This is to allow easy merging of multiple compilation units into one,


Re: [RFA][PATCH][PR tree-optimization/64058] Improve and stabilize sorting of coalesce pairs

2016-03-14 Thread Jeff Law

On 03/11/2016 03:02 AM, Richard Biener wrote:



For the other part I noticed a few things
  1) having a bitmap_count_ior_bits () would be an improvement

Yea, I almost built one.  That's easy enough to add.


  2) you might end up with redundant work(?) as you are iterating
  over SSA name coalesce candidates but look at partition conflicts
We'd have redundant work if the elements mapped back to SSA_NAMEs which 
in turn mapped to partitions which appeared as a coalescing pair 
already.  But there's no way to know that in advance.


This is mitigated somewhat in the next version which computes the 
conflict sizes lazily when the qsort comparison function is given two 
conflict pairs with an equal cost.





  3) having this extra heuristic might be best guarded by
flag_expensive_optimizations
Perhaps.  I don't see this tie breaker as being all that expensive.  But 
I won't object to guarding with flag_expensive_optimizations.



  as it is a quite expensive "tie breaker" - maybe even improve things
by first sorting
  after cost and then only doing the tie breaking when necessary, re-sorting the
  sub-sequence with same original cost.  It may also be enough to only perform
  this for "important" candidates, say within the first 100 of the function or 
so
  or with cost > X.
The problem with this is qsort's interface into the comparison function 
has a terribly narrow API and I don't think we want to rely on qsort_r. 
 In fact that's the whole reason why I didn't do lazy evaluation on the 
conflict sizes initially.


To work around the narrow API in the comparison function we have to 
either store additional data in each node or have them available in 
globals.  The former would be horribly wasteful, the latter is just 
ugly.  I choose the latter in the lazy evaluation of the conflicts version.




And finally - if we really think that looking at the conflict size
increase is the way to go
it would maybe be better to use a fibheap updating keys in attempt_coalesce
when we merge the conflicts.  That would also mean to work on a list (fibheap)
of coalesces of partitions rather than SSA names.
I really doubt it's worth this effort.  The literature I've been looking 
at in this space essentially says that given a reasonable coalescer, 
improvements, while measurable, are very very small in terms of the 
efficiency of the final code.


Thus I rejected conservative coalescing + iteration, biased coalescing, 
& trial coalescing as too expensive given the trivial benefit. 
Similarly I rejected trying to update the costs as we coalesce 
partitions.  A single successful coalesce could have a significant 
ripple effect.  Perhaps that could be mitigated by realizing that many 
updates wouldn't be needed, but it's just a level of complexity that's 
not needed here.


And note we work on partitions, not SSA_NAMEs.  It just happens that we 
start with each SSA_NAME in its own partition.  Most SSA_NAMEs actually 
don't participate in coalescing as they're not used in a copy 
instruction or as a phi arg/result.   That's why we compact the 
partitions after we've scanned the IL for names that are going to 
participate in coalescing.








I think the patch is reasonable enough for GCC 6 if we can bring compile-time
cost down a bit (it can be quadratic in the number of SSA names if we have
a lot of coalesce candidates and nearly full conflict bitmaps - of course that's
not a case we handle very well right now but still).  I would have hoped the
index part of the patch fixed the regression (by luck)...
I'd hoped it'd fix the regression by luck as well, but that was not the 
case :(





As far as a testcase goes we want to scan the dumps for the actual coalesces
being done.  Might be a bit fragile though...
I suspect that's going to be quite fragile and may have more target 
dependencies than we'd like (due to branch costing and such).




Jeff


Re: [PATCH] Fix PR56365

2016-03-14 Thread Bernhard Reutner-Fischer
On March 14, 2016 9:21:25 PM GMT+01:00, Richard Biener 
 wrote:
>On March 14, 2016 7:25:31 PM GMT+01:00, Bernhard Reutner-Fischer
> wrote:
>>On March 14, 2016 12:58:20 PM GMT+01:00, Richard Biener
>> wrote:

>>>+ 
>>>+ int test_01 (int a)
>>>+ {
>>>+   if (127 <= a)
>>
>>Shouldn't this be >= ?
>
>No, note how the constant is left of the <=.
>
Right, I managed to misread this.
Sorry for the noise!



Re: [PATCH, 4/16] Implement -foffload-alias

2016-03-14 Thread Tom de Vries

On 14/03/16 14:16, Tom de Vries wrote:

On 02/12/15 10:58, Jakub Jelinek wrote:

On Fri, Nov 27, 2015 at 01:03:52PM +0100, Tom de Vries wrote:

Handle non-declared variables in kernels alias analysis

2015-11-27  Tom de Vries  

* gimplify.c (gimplify_scan_omp_clauses): Initialize
OMP_CLAUSE_ORIG_DECL.
* omp-low.c (install_var_field_1): Handle base_pointers_restrict for
pointers.
(map_ptr_clause_points_to_clause_p)
(nr_map_ptr_clauses_pointing_to_clause): New function.
(omp_target_base_pointers_restrict_p): Handle GOMP_MAP_POINTER.
* tree-pretty-print.c (dump_omp_clause): Print OMP_CLAUSE_ORIG_DECL.
* tree.c (omp_clause_num_ops): Set num_ops for OMP_CLAUSE_MAP to 3.
* tree.h (OMP_CLAUSE_ORIG_DECL): New macro.

* c-c++-common/goacc/kernels-alias-10.c: New test.
* c-c++-common/goacc/kernels-alias-9.c: New test.


I don't like this (mainly the addition of OMP_CLAUSE_ORIG_DECL),
but it also sounds wrong to me.
The primary question is how do you handle GOMP_MAP_POINTER
(which is something we don't use for C/C++ OpenMP anymore,
and Fortran OpenMP will stop using it in GCC 7 or 6.2?) on the OpenACC
libgomp side, does it work like GOMP_MAP_ALLOC or GOMP_MAP_FORCE_ALLOC?


When a GOMP_MAP_POINTER mapping is encountered, first we check if it has
been mapped before:
- if it hasn't been mapped before, we check if the area the pointer
   points to has been mapped, and if not, error out. Else we map the
   pointer to a device pointer, and write the device pointer value
   to the device pointer variable.
- if the pointer has been mapped before, we reuse the mapping and write
   the device pointer value to the device pointer variable.


Similarly GOMP_MAP_TO_PSET.
If it works like GOMP_MAP_ALLOC (it does
on the OpenMP side in target.c, so if something is already mapped, no
further pointer assignment happens), then your change looks wrong.
If it works like GOMP_MAP_FORCE_ALLOC, then you just should treat
GOMP_MAP_POINTER on all OpenACC constructs as opcode that allows the
restrict operation.


I guess it works mostly like GOMP_MAP_ALLOC, but I don't understand the
relevance of the comparison for the patch. What is interesting for the
restrict optimization is whether what GOMP_MAP_POINTER points to has
been mapped with or without the force flag during the same mapping
sequence.


If it should behave differently depending on
if the corresponding array section has been mapped with GOMP_MAP_FORCE_*
or without it,


The mapping itself shouldn't behave differently.


then supposedly you should use a different code for
those two.


I could add f.i. an unsigned int aux_flags to struct tree_omp_clause,
set a new POINTS_TO_FORCE_VAR flag when translating the acc clause into
mapping clauses, and use that flag later on when dealing with the
GOMP_MAP_POINTER clause. Is that an acceptable approach?

[ Instead I could define a new gcc-internal-only
GOMP_MAP_POINTER_POINTS_TO_FORCE kind, but I'd rather avoid this, given
that it would be handled the same as GOMP_MAP_POINTER everywhere, except
for a single point in the source code. ]


I found the example of OMP_CLAUSE_MAP_ZERO_BIAS_ARRAY_SECTION and 
OMP_CLAUSE_MAP_MAYBE_ZERO_LENGTH_ARRAY_SECTION, which re-purpose 
existing but unused fields, and used something similar in attached patch 
(untested, c-only for the moment).


Thanks,
- Tom

2016-03-14  Tom de Vries  

	* omp-low.c (install_var_field): Handle base_pointers_restrict for
	pointers.
	(omp_target_base_pointers_restrict_p): Handle GOMP_MAP_POINTER.
	* tree.h (OMP_CLAUSE_MAP_POINTER_TO_FORCED): define.

	* c-typeck.c (handle_omp_array_sections): Set
	OMP_CLAUSE_MAP_POINTER_TO_FORCED on GOMP_MAP_POINTER clause.

	* c-c++-common/goacc/kernels-alias-10.c: New test.
	* c-c++-common/goacc/kernels-alias-9.c: New test.

Handle non-declared variables in kernels alias analysis

---
 gcc/c/c-typeck.c   | 15 ++-
 gcc/omp-low.c  | 48 ++
 .../c-c++-common/goacc/kernels-alias-10.c  | 29 +
 gcc/testsuite/c-c++-common/goacc/kernels-alias-9.c | 29 +
 gcc/tree.h |  3 ++
 5 files changed, 123 insertions(+), 1 deletion(-)

diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 6aa0f03..a05831d 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -12446,7 +12446,20 @@ handle_omp_array_sections (tree c, bool is_omp)
 	  }
   tree c2 = build_omp_clause (OMP_CLAUSE_LOCATION (c), OMP_CLAUSE_MAP);
   if (!is_omp)
-	OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
+	{
+	  OMP_CLAUSE_SET_MAP_KIND (c2, GOMP_MAP_POINTER);
+	  switch (OMP_CLAUSE_MAP_KIND (c))
+	{
+	case GOMP_MAP_FORCE_ALLOC:
+	case GOMP_MAP_FORCE_TO:
+	case GOMP_MAP_FORCE_FROM:
+	case GOMP_MAP_FORCE_TOFROM:
+	  OMP_CLAUSE_MAP_POINTER_TO_FORCED (c2) = 1;
+	  break;
+	default:
+	  break;
+	}
+	}
   else if (TREE_CODE (t) == COMPONENT_R

Re: [PATCH, testsuite] Fix ifcvt-4.c for PowerPC

2016-03-14 Thread Jeff Law

On 03/14/2016 02:23 PM, Pat Haugen wrote:

As stated in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68232, this
test needs -misel on powerpc to pass. Verified the following fixes the
test on both powerpc64/powerpc64le. Ok for trunk?

-Pat

testsuite/ChangeLog:
2016-03-14  Pat Haugen  

 * gcc.dg/ifcvt-4.c: Add -misel for powerpc* and remove skip for
powerpc64le.

OK.
jeff



Re: [PATCH, testsuite] Fix ifcvt-4.c for PowerPC

2016-03-14 Thread David Edelsohn
On Mon, Mar 14, 2016 at 7:35 PM, Jeff Law  wrote:
> On 03/14/2016 02:23 PM, Pat Haugen wrote:
>>
>> As stated in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=68232, this
>> test needs -misel on powerpc to pass. Verified the following fixes the
>> test on both powerpc64/powerpc64le. Ok for trunk?
>>
>> -Pat
>>
>> testsuite/ChangeLog:
>> 2016-03-14  Pat Haugen  
>>
>>  * gcc.dg/ifcvt-4.c: Add -misel for powerpc* and remove skip for
>> powerpc64le.
>
> OK.
> jeff

The change is going to fail on PowerPC systems that don't support
ISEL, so it needs to be adjusted.

- David


Re: [PATCH] genrecog: Fix crash on invalid input

2016-03-14 Thread Bernd Schmidt

On 03/14/2016 09:00 PM, Segher Boessenkool wrote:

There is just the single caller, and pred is set right before the call
there.  How about this patch, then?


Looks alright.


Bernd


Re: [RFA][PATCH][PR tree-optimization/64058] Improve and stabilize sorting of coalesce pairs

2016-03-14 Thread Trevor Saunders
On Mon, Mar 14, 2016 at 04:32:06PM -0600, Jeff Law wrote:
> On 03/11/2016 03:02 AM, Richard Biener wrote:
> >
> >
> >For the other part I noticed a few things
> >  1) having a bitmap_count_ior_bits () would be an improvement
> Yea, I almost built one.  That's easy enough to add.
> 
> >  2) you might end up with redundant work(?) as you are iterating
> >  over SSA name coalesce candidates but look at partition conflicts
> We'd have redundant work if the elements mapped back to SSA_NAMEs which in
> turn mapped to partitions which appeared as a coalescing pair already.  But
> there's no way to know that in advance.
> 
> This is mitigated somewhat in the next version which computes the conflict
> sizes lazily when the qsort comparison function is given two conflict pairs
> with an equal cost.
> 
> 
> 
> >  3) having this extra heuristic might be best guarded by
> >flag_expensive_optimizations
> Perhaps.  I don't see this tie breaker as being all that expensive.  But I
> won't object to guarding with flag_expensive_optimizations.
> 
> >  as it is a quite expensive "tie breaker" - maybe even improve things
> >by first sorting
> >  after cost and then only doing the tie breaking when necessary, re-sorting 
> > the
> >  sub-sequence with same original cost.  It may also be enough to only 
> > perform
> >  this for "important" candidates, say within the first 100 of the function 
> > or so
> >  or with cost > X.
> The problem with this is qsort's interface into the comparison function has
> a terribly narrow API and I don't think we want to rely on qsort_r.  In fact
> that's the whole reason why I didn't do lazy evaluation on the conflict
> sizes initially.
> 
> To work around the narrow API in the comparison function we have to either
> store additional data in each node or have them available in globals.  The
> former would be horribly wasteful, the latter is just ugly.  I choose the
> latter in the lazy evaluation of the conflicts version.

its a bit ugly in C++98, but you can give std::sort a random object with
operator () to compare with.

Trev


Re: [PATCH] PR69195, Reload confused by invalid reg equivs

2016-03-14 Thread Alan Modra
On Mon, Mar 14, 2016 at 01:00:39PM -0600, Jeff Law wrote:
> Right.  Tolerant as in not crash.

So can someone please approve my ira.c:indirect_jump_optimize patch?
I'm not quite audacious enough to claim it is obvious.  The original
is at https://gcc.gnu.org/ml/gcc-patches/2016-03/msg00720.html,
reposted here with some additional comments.  Bootstrapped and
regression tested powerpc64le-linux and x86_64-linux.

(In the thread I mentioned we could use DF_REF_INSN_INFO in place of
!DF_REF_IS_ARTIFICAL.  I believe that is true, and it saves an access
to another field of df_ref, but !DF_REF_IS_ARTIFICIAL before access to
DF_REF_INSN or DF_REF_INSN_UID is somewhat more prevalent in the gcc
sources.)

PR rtl-optimization/69195
PR rtl-optimization/47992
* ira.c (indirect_jump_optimize): Ignore artificial defs.
Add comments.

diff --git a/gcc/ira.c b/gcc/ira.c
index 5e7a2ed..a543d90 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -3842,7 +3842,8 @@ update_equiv_regs (void)
   free (pdx_subregs);
 }
 
-/* A pass over indirect jumps, converting simple cases to direct jumps.  */
+/* A pass over indirect jumps, converting simple cases to direct jumps.
+   Combine does this optimization too, but only within a basic block.  */
 static void
 indirect_jump_optimize (void)
 {
@@ -3862,14 +3863,23 @@ indirect_jump_optimize (void)
   int regno = REGNO (SET_SRC (x));
   if (DF_REG_DEF_COUNT (regno) == 1)
{
- rtx_insn *def_insn = DF_REF_INSN (DF_REG_DEF_CHAIN (regno));
- rtx note = find_reg_note (def_insn, REG_LABEL_OPERAND, NULL_RTX);
-
- if (note)
+ df_ref def = DF_REG_DEF_CHAIN (regno);
+ if (!DF_REF_IS_ARTIFICIAL (def))
{
- rtx lab = gen_rtx_LABEL_REF (Pmode, XEXP (note, 0));
- if (validate_replace_rtx (SET_SRC (x), lab, insn))
-   rebuild_p = true;
+ rtx_insn *def_insn = DF_REF_INSN (def);
+ rtx note = find_reg_note (def_insn, REG_LABEL_OPERAND, NULL_RTX);
+
+ if (note)
+   {
+ /* Substitute a LABEL_REF to the label given by the
+note rather than using SET_SRC of DEF_INSN.
+DEF_INSN might be loading the label constant from
+a constant pool, which isn't what we want in a
+direct branch.  */
+ rtx lab = gen_rtx_LABEL_REF (Pmode, XEXP (note, 0));
+ if (validate_replace_rtx (SET_SRC (x), lab, insn))
+   rebuild_p = true;
+   }
}
}
 }

-- 
Alan Modra
Australia Development Lab, IBM


Re: C++ PATCH for range-for tweak

2016-03-14 Thread Jason Merrill

On 03/14/2016 05:30 PM, Florian Weimer wrote:

* Jason Merrill:


P08184R0: Generalizing the Range-Based For Loop


How can one resolve this reference?  It's obviously not a PR number in
GCC Bugzilla.

I found this after some searching:



But it lacks the additional “8”.


Oops, typo.  Fixed, along with adjusting the feature-test macro.


commit 83c74d69d3cc41bc764d75d52effadd2802140ec
Author: Jason Merrill 
Date:   Mon Mar 14 22:55:55 2016 -0400

	* c-cppbuiltin.c (c_cpp_builtins): Bump __cpp_range_based_for.

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 1c7..dc1f426 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -871,7 +871,7 @@ c_cpp_builtins (cpp_reader *pfile)
 	  cpp_define (pfile, "__cpp_namespace_attributes=201411");
 	  cpp_define (pfile, "__cpp_enumerator_attributes=201411");
 	  cpp_define (pfile, "__cpp_nested_namespace_definitions=201411");
-	  cpp_define (pfile, "__cpp_fold_expressions=201411");
+	  cpp_define (pfile, "__cpp_fold_expressions=201603");
 	  cpp_define (pfile, "__cpp_nontype_template_args=201411");
 	}
   if (flag_concepts)


[PATCH] Fix 70199

2016-03-14 Thread Richard Henderson

The problem here is that

  void* labels[] = {
&&l0, &&l1, &&l2
  };

gets gimplified to

  labels = *.LC0;

but .LC0 is not in the set of local decls, so that when copy_forbidden is 
called during sra versioning we fail to forbid the copy.  We could set a 
different flag, but I think it's easiest to just add the artificial decl to 
where it can be seen.


Ok?


r~
* gimplify.c (gimplify_init_constructor): Add the constant def decl
to the function's local decls.


diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index b331e41..884482e 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -4016,6 +4016,7 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
 
walk_tree (&ctor, force_labels_r, NULL, NULL);
ctor = tree_output_constant_def (ctor);
+   add_local_decl (cfun, ctor);
if (!useless_type_conversion_p (type, TREE_TYPE (ctor)))
  ctor = build1 (VIEW_CONVERT_EXPR, type, ctor);
TREE_OPERAND (*expr_p, 1) = ctor;


Re: [PING 4, PATCH] PR/68089: C++-11: Ingore "alignas(0)".

2016-03-14 Thread Dominik Vogt
On Mon, Jan 04, 2016 at 12:33:21PM +0100, Dominik Vogt wrote:
> On Fri, Jan 01, 2016 at 05:53:08PM -0700, Martin Sebor wrote:
> > On 12/31/2015 04:50 AM, Dominik Vogt wrote:
> > >The attached patch fixes C++-11 handling of "alignas(0)" which
> > >should be ignored but currently generates an error message.  A
> > >test case is included; the patch has been tested on S390x.  Since
> > >it's a language issue it should be independent of the backend
> > >used.
> > 
> > The patch doesn't handle value-dependent expressions(*).
> 
> > It
> > seems that the problem is in handle_aligned_attribute() calling
> > check_user_alignment() with the second argument (ALLOW_ZERO)
> > set to false.  Calling it with true fixes the problem and handles
> > value-dependent expressions (I haven't done any more testing beyond
> > that).
> 
> Like the attached patch?  (Passes the testsuite on s390x.)
> 
> But wouldn't an "aligned" attribute be added, allowing the backend
> to possibly generate an error or a warning?
> 
> > Also, in the test, I noticed the definition of the first struct
> > is missing the terminating semicolon.
> 
> Yeah.

> gcc/c-family/ChangeLog
> 
>   PR/69089
>   * c-common.c (handle_aligned_attribute): Allow 0 as an argument to the
>   "aligned" attribute.
> 
> gcc/testsuite/ChangeLog
> 
>   PR/69089
>   * g++.dg/cpp0x/alignas5.C: New test.

> >From 2461293b9070da74950fd0ae055d1239cc69ce67 Mon Sep 17 00:00:00 2001
> From: Dominik Vogt 
> Date: Wed, 30 Dec 2015 15:08:52 +0100
> Subject: [PATCH] C++-11: Ingore "alignas(0)" instead of generating an
>  error message.
> 
> This is required by the C++-11 standard.
> ---
>  gcc/c-family/c-common.c   |  2 +-
>  gcc/testsuite/g++.dg/cpp0x/alignas5.C | 29 +
>  2 files changed, 30 insertions(+), 1 deletion(-)
>  create mode 100644 gcc/testsuite/g++.dg/cpp0x/alignas5.C
> 
> diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
> index 653d1dc..9eb25a9 100644
> --- a/gcc/c-family/c-common.c
> +++ b/gcc/c-family/c-common.c
> @@ -7804,7 +7804,7 @@ handle_aligned_attribute (tree *node, tree ARG_UNUSED 
> (name), tree args,
>else if (TYPE_P (*node))
>  type = node, is_type = 1;
>  
> -  if ((i = check_user_alignment (align_expr, false)) == -1
> +  if ((i = check_user_alignment (align_expr, true)) == -1
>|| !check_cxx_fundamental_alignment_constraints (*node, i, flags))
>  *no_add_attrs = true;
>else if (is_type)
> diff --git a/gcc/testsuite/g++.dg/cpp0x/alignas5.C 
> b/gcc/testsuite/g++.dg/cpp0x/alignas5.C
> new file mode 100644
> index 000..f3252a9
> --- /dev/null
> +++ b/gcc/testsuite/g++.dg/cpp0x/alignas5.C
> @@ -0,0 +1,29 @@
> +// PR c++/69089
> +// { dg-do compile { target c++11 } }
> +// { dg-options "-Wno-attributes" }
> +
> +alignas (0) int valid1;
> +alignas (1 - 1) int valid2;
> +struct Tvalid
> +{
> +  alignas (0) int i;
> +  alignas (2 * 0) int j;
> +};
> +
> +alignas (-1) int invalid1; /* { dg-error "not a positive power of 2" } */
> +alignas (1 - 2) int invalid2; /* { dg-error "not a positive power of 2" } */
> +struct Tinvalid
> +{
> +  alignas (-1) int i; /* { dg-error "not a positive power of 2" } */
> +  alignas (2 * 0 - 1) int j; /* { dg-error "not a positive power of 2" } */
> +};
> +
> +template  struct TNvalid1 { alignas (N) int i; };
> +TNvalid1<0> SNvalid1;
> +template  struct TNvalid2 { alignas (N) int i; };
> +TNvalid2<1 - 1> SNvalid2;
> +
> +template  struct TNinvalid1 { alignas (N) int i; }; /* { dg-error 
> "not a positive power of 2" } */
> +TNinvalid1<-1> SNinvalid1;
> +template  struct TNinvalid2 { alignas (N) int i; }; /* { dg-error 
> "not a positive power of 2" } */
> +TNinvalid2<1 - 2> SNinvalid2;
> -- 
> 2.3.0
> 



Ciao

Dominik ^_^  ^_^

-- 

Dominik Vogt
IBM Germany