date:20190211

[PATCH] Construct ipa_reduced_postorder always for overwritable (PR ipa/89009).

2019-02-11 Thread Martin Liška

Hi.

IPA pure const should always construct ipa_reduced_postorder with
possibility to cross AVAIL_INTERPOSABLE boundary. The pass itself
can then properly stop propagation on these symbols.

The patch is pre-approved by Honza.
Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Thanks,
Martin

gcc/ChangeLog:

2019-02-08  Martin Liska  

PR ipa/89009
* ipa-cp.c (build_toporder_info): Remove usage of a param.
* ipa-inline.c (inline_small_functions): Likewise.
* ipa-pure-const.c (propagate_pure_const): Likewise.
(propagate_nothrow): Likewise.
* ipa-reference.c (propagate): Likewise.
* ipa-utils.c (struct searchc_env): Remove unused field.
(searchc): Always search across AVAIL_INTERPOSABLE.
(ipa_reduced_postorder): Always allow AVAIL_INTERPOSABLE as
the only called IPA pure const can properly not propagate
across interposable boundary.
* ipa-utils.h (ipa_reduced_postorder): Remove param.
---
 gcc/ipa-cp.c | 2 +-
 gcc/ipa-inline.c | 2 +-
 gcc/ipa-pure-const.c | 4 ++--
 gcc/ipa-reference.c  | 2 +-
 gcc/ipa-utils.c  | 9 +++--
 gcc/ipa-utils.h  | 2 +-
 6 files changed, 9 insertions(+), 12 deletions(-)


diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 3489ed59ce2..442d5c63eff 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -815,7 +815,7 @@ build_toporder_info (struct ipa_topo_info *topo)
   topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 
   gcc_checking_assert (topo->stack_top == 0);
-  topo->nnodes = ipa_reduced_postorder (topo->order, true, true, NULL);
+  topo->nnodes = ipa_reduced_postorder (topo->order, true, NULL);
 }
 
 /* Free information about strongly connected components and the arrays in
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 4ddbfdf772c..360c3de3289 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -1778,7 +1778,7 @@ inline_small_functions (void)
  metrics.  */
 
   max_count = profile_count::uninitialized ();
-  ipa_reduced_postorder (order, true, true, NULL);
+  ipa_reduced_postorder (order, true, NULL);
   free (order);
 
   FOR_EACH_DEFINED_FUNCTION (node)
diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
index 8227eed29bc..a8a3956d2d5 100644
--- a/gcc/ipa-pure-const.c
+++ b/gcc/ipa-pure-const.c
@@ -1422,7 +1422,7 @@ propagate_pure_const (void)
   bool remove_p = false;
   bool has_cdtor;
 
-  order_pos = ipa_reduced_postorder (order, true, false,
+  order_pos = ipa_reduced_postorder (order, true,
  ignore_edge_for_pure_const);
   if (dump_file)
 {
@@ -1751,7 +1751,7 @@ propagate_nothrow (void)
   int i;
   struct ipa_dfs_info * w_info;
 
-  order_pos = ipa_reduced_postorder (order, true, false,
+  order_pos = ipa_reduced_postorder (order, true,
  ignore_edge_for_nothrow);
   if (dump_file)
 {
diff --git a/gcc/ipa-reference.c b/gcc/ipa-reference.c
index 8ad12d30bb2..d1759a374bc 100644
--- a/gcc/ipa-reference.c
+++ b/gcc/ipa-reference.c
@@ -712,7 +712,7 @@ propagate (void)
  the global information.  All the nodes within a cycle will have
  the same info so we collapse cycles first.  Then we can do the
  propagation in one pass from the leaves to the roots.  */
-  order_pos = ipa_reduced_postorder (order, true, true, ignore_edge_p);
+  order_pos = ipa_reduced_postorder (order, true, ignore_edge_p);
   if (dump_file)
 ipa_print_order (dump_file, "reduced", order, order_pos);
 
diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c
index aebcb8ea91d..79b250c3943 100644
--- a/gcc/ipa-utils.c
+++ b/gcc/ipa-utils.c
@@ -63,7 +63,6 @@ struct searchc_env {
   int order_pos;
   splay_tree nodes_marked_new;
   bool reduce;
-  bool allow_overwritable;
   int count;
 };
 
@@ -105,7 +104,7 @@ searchc (struct searchc_env* env, struct cgraph_node *v,
 
   if (w->aux
 	  && (avail > AVAIL_INTERPOSABLE
-	  || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
+	  || avail == AVAIL_INTERPOSABLE))
 	{
 	  w_info = (struct ipa_dfs_info *) w->aux;
 	  if (w_info->new_node)
@@ -162,7 +161,7 @@ searchc (struct searchc_env* env, struct cgraph_node *v,
 
 int
 ipa_reduced_postorder (struct cgraph_node **order,
-		   bool reduce, bool allow_overwritable,
+		   bool reduce,
 		   bool (*ignore_edge) (struct cgraph_edge *))
 {
   struct cgraph_node *node;
@@ -175,15 +174,13 @@ ipa_reduced_postorder (struct cgraph_node **order,
   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
   env.count = 1;
   env.reduce = reduce;
-  env.allow_overwritable = allow_overwritable;
 
   FOR_EACH_DEFINED_FUNCTION (node)
 {
   enum availability avail = node->get_availability ();
 
   if (avail > AVAIL_INTERPOSABLE
-	  || (allow_overwritable
-	  && (avail == AVAIL_INTERPOSABLE)))
+	  || avail == AVAIL_INTERPOSABLE)
 	{
 	  /* Reuse the info if it is already there.  */
 	  struct ipa_dfs_info *info = (struct ipa_dfs_info *) node->aux;
diff --git a/gcc/ipa-

Re: [PATCH] Remove a barrier when EDGE_CROSSING is remoed (PR lto/88858).

2019-02-11 Thread Martin Liška

@Honza: PING^2

On 2/4/19 9:09 AM, Martin Liška wrote:
> @Honza: PING^1
> 
> Martin
> 
> On 1/24/19 9:10 AM, Martin Liška wrote:
>> On 1/23/19 7:28 PM, Segher Boessenkool wrote:
>>> Hi Martin,
>>>
>>> On Wed, Jan 23, 2019 at 10:29:40AM +0100, Martin Liška wrote:
 diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
 index 172bdf585d0..5dd316efb63 100644
 --- a/gcc/cfgrtl.c
 +++ b/gcc/cfgrtl.c
 @@ -4396,6 +4396,25 @@ cfg_layout_redirect_edge_and_branch (edge e, 
 basic_block dest)
 "Removing crossing jump while redirecting edge form %i to 
 %i\n",
 e->src->index, dest->index);
delete_insn (BB_END (src));
 +
 +  /* Unlink a BARRIER that can be still in BB_FOOTER.  */
 +  rtx_insn *insn = BB_FOOTER (src);
 +  while (insn != NULL_RTX && !BARRIER_P (insn))
 +  insn = NEXT_INSN (insn);
 +
 +  if (insn != NULL_RTX)
 +  {
 +if (insn == BB_FOOTER (src))
 +  BB_FOOTER (src) = NULL;
 +else
 +  {
 +if (PREV_INSN (insn))
 +  SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
 +if (NEXT_INSN (insn))
 +  SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
 +  }
 +  }
>>>
>>>
>>> combine.c has nicer code to do this, in update_cfg_for_uncondjump.  Split
>>> it out into some common routine?  Something in cfgrtl.c I guess.
>>
>> Thanks Segher for the advice. I'll do it as soon as Honza will make a review
>> about the fundament of the patch.
>>
>> Martin
>>
>>>
>>>
>>> Segher
>>>
>>
>

Re: [PATCH] Construct ipa_reduced_postorder always for overwritable (PR ipa/89009).

2019-02-11 Thread Martin Liška

Updated version of the patch where I added a test-case.
I'm going to install this version.

Martin
>From ffe0985846a8827fce4e47f090804e664059b650 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Fri, 8 Feb 2019 13:04:11 +0100
Subject: [PATCH] Construct ipa_reduced_postorder always for overwritable (PR
 ipa/89009).

gcc/ChangeLog:

2019-02-08  Martin Liska  

	PR ipa/89009
	* ipa-cp.c (build_toporder_info): Remove usage of a param.
	* ipa-inline.c (inline_small_functions): Likewise.
	* ipa-pure-const.c (propagate_pure_const): Likewise.
	(propagate_nothrow): Likewise.
	* ipa-reference.c (propagate): Likewise.
	* ipa-utils.c (struct searchc_env): Remove unused field.
	(searchc): Always search across AVAIL_INTERPOSABLE.
	(ipa_reduced_postorder): Always allow AVAIL_INTERPOSABLE as
	the only called IPA pure const can properly not propagate
	across interposable boundary.
	* ipa-utils.h (ipa_reduced_postorder): Remove param.

gcc/testsuite/ChangeLog:

2019-02-11  Martin Liska  

	PR ipa/89009
	* g++.dg/ipa/pr89009.C: New test.
---
 gcc/ipa-cp.c   |  2 +-
 gcc/ipa-inline.c   |  2 +-
 gcc/ipa-pure-const.c   |  4 ++--
 gcc/ipa-reference.c|  2 +-
 gcc/ipa-utils.c|  9 +++--
 gcc/ipa-utils.h|  2 +-
 gcc/testsuite/g++.dg/ipa/pr89009.C | 12 
 7 files changed, 21 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr89009.C

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 3489ed59ce2..442d5c63eff 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -815,7 +815,7 @@ build_toporder_info (struct ipa_topo_info *topo)
   topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
 
   gcc_checking_assert (topo->stack_top == 0);
-  topo->nnodes = ipa_reduced_postorder (topo->order, true, true, NULL);
+  topo->nnodes = ipa_reduced_postorder (topo->order, true, NULL);
 }
 
 /* Free information about strongly connected components and the arrays in
diff --git a/gcc/ipa-inline.c b/gcc/ipa-inline.c
index 4ddbfdf772c..360c3de3289 100644
--- a/gcc/ipa-inline.c
+++ b/gcc/ipa-inline.c
@@ -1778,7 +1778,7 @@ inline_small_functions (void)
  metrics.  */
 
   max_count = profile_count::uninitialized ();
-  ipa_reduced_postorder (order, true, true, NULL);
+  ipa_reduced_postorder (order, true, NULL);
   free (order);
 
   FOR_EACH_DEFINED_FUNCTION (node)
diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
index 8227eed29bc..a8a3956d2d5 100644
--- a/gcc/ipa-pure-const.c
+++ b/gcc/ipa-pure-const.c
@@ -1422,7 +1422,7 @@ propagate_pure_const (void)
   bool remove_p = false;
   bool has_cdtor;
 
-  order_pos = ipa_reduced_postorder (order, true, false,
+  order_pos = ipa_reduced_postorder (order, true,
  ignore_edge_for_pure_const);
   if (dump_file)
 {
@@ -1751,7 +1751,7 @@ propagate_nothrow (void)
   int i;
   struct ipa_dfs_info * w_info;
 
-  order_pos = ipa_reduced_postorder (order, true, false,
+  order_pos = ipa_reduced_postorder (order, true,
  ignore_edge_for_nothrow);
   if (dump_file)
 {
diff --git a/gcc/ipa-reference.c b/gcc/ipa-reference.c
index 8ad12d30bb2..d1759a374bc 100644
--- a/gcc/ipa-reference.c
+++ b/gcc/ipa-reference.c
@@ -712,7 +712,7 @@ propagate (void)
  the global information.  All the nodes within a cycle will have
  the same info so we collapse cycles first.  Then we can do the
  propagation in one pass from the leaves to the roots.  */
-  order_pos = ipa_reduced_postorder (order, true, true, ignore_edge_p);
+  order_pos = ipa_reduced_postorder (order, true, ignore_edge_p);
   if (dump_file)
 ipa_print_order (dump_file, "reduced", order, order_pos);
 
diff --git a/gcc/ipa-utils.c b/gcc/ipa-utils.c
index aebcb8ea91d..79b250c3943 100644
--- a/gcc/ipa-utils.c
+++ b/gcc/ipa-utils.c
@@ -63,7 +63,6 @@ struct searchc_env {
   int order_pos;
   splay_tree nodes_marked_new;
   bool reduce;
-  bool allow_overwritable;
   int count;
 };
 
@@ -105,7 +104,7 @@ searchc (struct searchc_env* env, struct cgraph_node *v,
 
   if (w->aux
 	  && (avail > AVAIL_INTERPOSABLE
-	  || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
+	  || avail == AVAIL_INTERPOSABLE))
 	{
 	  w_info = (struct ipa_dfs_info *) w->aux;
 	  if (w_info->new_node)
@@ -162,7 +161,7 @@ searchc (struct searchc_env* env, struct cgraph_node *v,
 
 int
 ipa_reduced_postorder (struct cgraph_node **order,
-		   bool reduce, bool allow_overwritable,
+		   bool reduce,
 		   bool (*ignore_edge) (struct cgraph_edge *))
 {
   struct cgraph_node *node;
@@ -175,15 +174,13 @@ ipa_reduced_postorder (struct cgraph_node **order,
   env.nodes_marked_new = splay_tree_new (splay_tree_compare_ints, 0, 0);
   env.count = 1;
   env.reduce = reduce;
-  env.allow_overwritable = allow_overwritable;
 
   FOR_EACH_DEFINED_FUNCTION (node)
 {
   enum availability avail = node->get_availability ();
 
   if (avail > AVAIL_INTERPOSABLE
-	  || (allow_overwritable
-

Re: [PATCH v2, i386]: Fix PR89221, --enable-frame-pointer does not work as intended

2019-02-11 Thread Rainer Orth

Hi Uros,

> On Fri, Feb 8, 2019 at 1:24 PM Uros Bizjak  wrote:
>
>> Attached patch fixes --enable-frame-pointer handling, and this way
>> makes a couple of defines in config/i386/sol2.h obsolete.
>
> It turned out that --enable-frame-pointer does not work for multilibs
> at all. So, instead of pretending that -m32 on x86_64 and -m64 on i686
> works as advertised, unify 32bit and 64bit handling.

this is effectively what I came up with when testing the first version
of the patch on i386-pc-solaris2.11 and noticing the -m64 regressions.

I've now re-bootstrapped this exact version without regressions, so from
a Solaris/x86 POV this is good to go.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University

Re: [PATCH,GDC] Add netbsd support to GDC

2019-02-11 Thread coypu

On Wed, Jan 23, 2019 at 09:35:03AM +, co...@sdf.org wrote:
> > Is this necessary?  I'd prefer to not set this field if it can be
> > avoided.  The same goes for the others, I intend to remove them at
> > soonest convenience once the problematic parts of the front-end logic
> > has been abstracted away.
> > 
> > Other than that, looks reasonable to me.
> 
> It's not necessary. Here's an updated diff without it.
> I also forgot to add netbsd-d.c, which is referenced in the previous
> diff.

ping :-)
If it is good, can someone commit it? I don't have the ability to do so.

Re: [PATCH] Construct ipa_reduced_postorder always for overwritable (PR ipa/89009).

2019-02-11 Thread Jan Hubicka

> Hi.
> 
> IPA pure const should always construct ipa_reduced_postorder with
> possibility to cross AVAIL_INTERPOSABLE boundary. The pass itself
> can then properly stop propagation on these symbols.
> 
> The patch is pre-approved by Honza.
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> 
> Thanks,
> Martin
> 
> gcc/ChangeLog:
> 
> 2019-02-08  Martin Liska  
> 
>   PR ipa/89009
>   * ipa-cp.c (build_toporder_info): Remove usage of a param.
>   * ipa-inline.c (inline_small_functions): Likewise.
>   * ipa-pure-const.c (propagate_pure_const): Likewise.
>   (propagate_nothrow): Likewise.
>   * ipa-reference.c (propagate): Likewise.
>   * ipa-utils.c (struct searchc_env): Remove unused field.
>   (searchc): Always search across AVAIL_INTERPOSABLE.
>   (ipa_reduced_postorder): Always allow AVAIL_INTERPOSABLE as
>   the only called IPA pure const can properly not propagate
>   across interposable boundary.
>   * ipa-utils.h (ipa_reduced_postorder): Remove param.
> @@ -105,7 +104,7 @@ searchc (struct searchc_env* env, struct cgraph_node *v,
>  
>if (w->aux
> && (avail > AVAIL_INTERPOSABLE
> -   || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
> +   || avail == AVAIL_INTERPOSABLE))
>   {
> w_info = (struct ipa_dfs_info *) w->aux;
> if (w_info->new_node)

This optimizes >= AVAIL_INTERPOSABLE then, but for ipa-cp, ipa-inline,
ipa-pure-const we do not want interposable calls to close SCC regions
(because we can not propagate here). ipa-reference is more subtle -
interposable calls close SCC regions IFF they are declared leaf.

Can you, please, update ignore_edge_p of indiviual optimization passes
this way? (I.e. for all but ipa-reference add aval <= AVAIL_INTERPOSABLE
check for the availability of the target function and for ipa-reference
test also leaf attribute?). 

In all those cases we also want to check that given optimization pass is
enabled for both caller and callee.  In other cases we do not want to
consider them part of SCCs as well.  It may make sense to do this as one
change or I can do that incrementally.

Thanks,
Honza

Re: Follow-up-fix to "[PATCH] Move PR84877 fix elsewhere (PR bootstrap/88450)"

2019-02-11 Thread Eric Botcazou

> To fix the r267812 incorrectness we need to use the *stored*
> size, i.e. that which will be stored into using register-sized
> writes.  It's seems like the bug is just a typo, so the fix is
> as simple as follows.  Note the usage of "diff -U 10" to show
> that size_stored is used in the "then"-arm.

Right, thanks for catching it.

> Ok to commit?
> 
> gcc/ChangeLog:
>   * function.c (assign_parm_setup_block): Use the stored
>   size, not the passed size, when allocating stack-space,
>   also for a parameter with alignment larger than
>   MAX_SUPPORTED_STACK_ALIGNMENT.

OK, thanks.

-- 
Eric Botcazou

Re: Follow-up-fix 2 to "[PATCH] Move PR84877 fix elsewhere (PR bootstrap/88450)"

2019-02-11 Thread Hans-Peter Nilsson

> Date: Mon, 11 Feb 2019 07:38:14 +0100
> From: Richard Biener 

> >+  HOST_WIDE_INT min_parm_align
> >+= STRICT_ALIGNMENT ? BITS_PER_WORD : PREFERRED_STACK_BOUNDARY;
> 
> Shouldn't it be MIN (...) of BOTH? 

That *does* seem logical...  Take 2 as follows, in testing as
before.

Ok to commit, if testing passes?

gcc:
* function.c (assign_parm_setup_block): Align the
parameter to a minimum of PREFERRED_STACK_BOUNDARY and
BITS_PER_WORD instead of always BITS_PER_WORD.

--- gcc/function.c.orig2Sat Feb  9 00:53:17 2019
+++ gcc/function.c  Mon Feb 11 09:37:28 2019
@@ -2912,7 +2912,10 @@ assign_parm_setup_block (struct assign_p
   size_stored = CEIL_ROUND (size, UNITS_PER_WORD);
   if (stack_parm == 0)
 {
-  SET_DECL_ALIGN (parm, MAX (DECL_ALIGN (parm), BITS_PER_WORD));
+  HOST_WIDE_INT min_parm_align
+   = MIN (BITS_PER_WORD, PREFERRED_STACK_BOUNDARY);
+
+  SET_DECL_ALIGN (parm, MAX (DECL_ALIGN (parm), min_parm_align));
   if (DECL_ALIGN (parm) > MAX_SUPPORTED_STACK_ALIGNMENT)
{
  rtx allocsize = gen_int_mode (size_stored, Pmode);

brgds, H-P

Re: [PATCH] Remove a barrier when EDGE_CROSSING is remoed (PR lto/88858).

2019-02-11 Thread Jan Hubicka

> @Honza: PING^2
> 
> On 2/4/19 9:09 AM, Martin Liška wrote:
> > @Honza: PING^1
> > 
> > Martin
> > 
> > On 1/24/19 9:10 AM, Martin Liška wrote:
> >> On 1/23/19 7:28 PM, Segher Boessenkool wrote:
> >>> Hi Martin,
> >>>
> >>> On Wed, Jan 23, 2019 at 10:29:40AM +0100, Martin Liška wrote:
>  diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
>  index 172bdf585d0..5dd316efb63 100644
>  --- a/gcc/cfgrtl.c
>  +++ b/gcc/cfgrtl.c
>  @@ -4396,6 +4396,25 @@ cfg_layout_redirect_edge_and_branch (edge e, 
>  basic_block dest)
>    "Removing crossing jump while redirecting edge form %i 
>  to %i\n",
>    e->src->index, dest->index);
> delete_insn (BB_END (src));
>  +
>  +  /* Unlink a BARRIER that can be still in BB_FOOTER.  */
>  +  rtx_insn *insn = BB_FOOTER (src);
>  +  while (insn != NULL_RTX && !BARRIER_P (insn))
>  +insn = NEXT_INSN (insn);
>  +
>  +  if (insn != NULL_RTX)
>  +{
>  +  if (insn == BB_FOOTER (src))
>  +BB_FOOTER (src) = NULL;
>  +  else
>  +{
>  +  if (PREV_INSN (insn))
>  +SET_NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
>  +  if (NEXT_INSN (insn))
>  +SET_PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
>  +}
>  +}
> >>>
> >>>
> >>> combine.c has nicer code to do this, in update_cfg_for_uncondjump.  Split
> >>> it out into some common routine?  Something in cfgrtl.c I guess.
> >>
> >> Thanks Segher for the advice. I'll do it as soon as Honza will make a 
> >> review
> >> about the fundament of the patch.

Aha, yes, fundament of the patch is obvious - the barrier has to go :)
There is same hunk of code in cfgrtl.c:1061, so please just merge it
Note that I am not rtl reviewer. But as author of the code I would say
that the updated patch can go in as obvious.

Honza
> >>
> >> Martin
> >>
> >>>
> >>>
> >>> Segher
> >>>
> >>
> > 
>

Re: [rs6000] 64-bit integer loads/stores and FP instructions

2019-02-11 Thread Eric Botcazou

> You make an arbitrary distinction between certain CPUs and duplicate
> patterns based on that.

Sure, somewhat arbitrary, but that's a proof of concept and IMO better than 
treating every processor the same way.  The alternative would be to complicate 
further the existing patterns by means of the "enabled" attribute or somesuch, 
but I can try it too.

> About the bug: it should behave the same as before, use GPRs only in your
> testcase.

It's fixed by backporting the last part of PR target/85755 on the 7 branch, 
but I can certainly add the testcase to the gcc.target/powerpc testsuite.

-- 
Eric Botcazou

Re: Follow-up-fix 2 to "[PATCH] Move PR84877 fix elsewhere (PR bootstrap/88450)"

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 10:00:03AM +0100, Hans-Peter Nilsson wrote:
> > Date: Mon, 11 Feb 2019 07:38:14 +0100
> > From: Richard Biener 
> 
> > >+  HOST_WIDE_INT min_parm_align
> > >+  = STRICT_ALIGNMENT ? BITS_PER_WORD : PREFERRED_STACK_BOUNDARY;
> > 
> > Shouldn't it be MIN (...) of BOTH? 
> 
> That *does* seem logical...  Take 2 as follows, in testing as
> before.
> 
> Ok to commit, if testing passes?

Is PREFERRED_STACK_BOUNDARY what we want here?  Shouldn't that be
STACK_BOUNDARY, or PARM_BOUNDARY?  Though, PARM_BOUNDARY on cris is 32...

> gcc:
>   * function.c (assign_parm_setup_block): Align the
>   parameter to a minimum of PREFERRED_STACK_BOUNDARY and
>   BITS_PER_WORD instead of always BITS_PER_WORD.
> 
> --- gcc/function.c.orig2  Sat Feb  9 00:53:17 2019
> +++ gcc/function.cMon Feb 11 09:37:28 2019
> @@ -2912,7 +2912,10 @@ assign_parm_setup_block (struct assign_p
>size_stored = CEIL_ROUND (size, UNITS_PER_WORD);
>if (stack_parm == 0)
>  {
> -  SET_DECL_ALIGN (parm, MAX (DECL_ALIGN (parm), BITS_PER_WORD));
> +  HOST_WIDE_INT min_parm_align
> + = MIN (BITS_PER_WORD, PREFERRED_STACK_BOUNDARY);
> +
> +  SET_DECL_ALIGN (parm, MAX (DECL_ALIGN (parm), min_parm_align));
>if (DECL_ALIGN (parm) > MAX_SUPPORTED_STACK_ALIGNMENT)
>   {
> rtx allocsize = gen_int_mode (size_stored, Pmode);

Jakub

Re: [PATCH] ARM: fix -masm-syntax-unified (PR88648)

2019-02-11 Thread Kyrill Tkachov


Hi Stefan,

On 09/02/19 16:25, Stefan Agner wrote:

Hi Kyrill,

On 10.01.2019 12:38, Kyrill  Tkachov wrote:

Hi Stefan,

On 08/01/19 09:33, Kyrill Tkachov wrote:

Hi Stefan,

On 01/01/19 23:34, Stefan Agner wrote:

This allows to use unified asm syntax when compiling for the
ARM instruction. This matches documentation and seems what the
initial patch was intended doing when the flag got added.
---
  gcc/config/arm/arm.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 3419b6bd0f8..67b2b199f3f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3095,7 +3095,8 @@ arm_option_override_internal (struct gcc_options *opts,

/* Thumb2 inline assembly code should always use unified syntax.
   This will apply to ARM and Thumb1 eventually.  */
-  opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
+  if (TARGET_THUMB2_P (opts->x_target_flags))
+opts->x_inline_asm_unified = true;

This looks right to me and is the logic we had in GCC 5.
How has this patch been tested?

Can you please provide a ChangeLog entry for this patch[1].


I've bootstrapped and tested this, together with your testsuite patch
on arm-none-linux-gnueabihf
and committed both with r267804 with the following ChangeLog entries:

2019-01-10  Stefan Agner  

 PR target/88648
 * config/arm/arm.c (arm_option_override_internal): Force
 opts->x_inline_asm_unified to true only if TARGET_THUMB2_P.

2019-01-10  Stefan Agner  

 PR target/88648
 * gcc.target/arm/pr88648-asm-syntax-unified.c: Add test to
 check if -masm-syntax-unified gets applied properly.

Thank you for the patch. If you plan to contribute more patches in the
future I suggest you
sort out the copyright assignment paperwork.

I believe this fix needs to be backported to the branches.
I'll do so after a few days of testing on trunk.

Thanks for applying the patch! As far as I can see it did not made it
into the branch yet, do you think it can get backported there too?


Thanks for reminding me. I've now committed your patch and the test to the 
gcc-8 branch with r268764.

Kyrill


--
Stefan


Thanks again,
Kyrill


Thanks,
Kyrill

[1] https://gcc.gnu.org/contribute.html


  #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
--
2.20.1

[patch] Disable store merging in asan_expand_mark_ifn

2019-02-11 Thread Eric Botcazou

Hi,

asan_expand_mark_ifn does manual store merging but doesn't take into account 
the alignment, so this can break on strict-alignment platforms.

Tested on SPARC/Solaris 11, where this fixes this regression:

FAIL: gcc.dg/asan/use-after-scope-5.c   -O0  output pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -O1  output pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -O2  output pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -O3 -fomit-frame-pointer -funroll-
loops -fpeel-loops -ftracer -finline-functions  output pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -O3 -g  output pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -Os  output pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -O2 -flto -flto-partition=none  output 
pattern test
FAIL: gcc.dg/asan/use-after-scope-5.c   -O2 -flto  output pattern test

OK for mainline?


2019-02-11  Eric Botcazou  

* asan.c (asan_expand_mark_ifn): Always use a size of 1 byte for the
stores on strict-alignment platforms.

-- 
Eric BotcazouIndex: asan.c
===
--- asan.c	(revision 268508)
+++ asan.c	(working copy)
@@ -3226,10 +3226,13 @@ asan_expand_mark_ifn (gimple_stmt_iterat
   for (unsigned HOST_WIDE_INT offset = 0; offset < shadow_size;)
 	{
 	  unsigned size = 1;
-	  if (shadow_size - offset >= 4)
-	size = 4;
-	  else if (shadow_size - offset >= 2)
-	size = 2;
+	  if (!STRICT_ALIGNMENT)
+	{
+	  if (shadow_size - offset >= 4)
+		size = 4;
+	  else if (shadow_size - offset >= 2)
+		size = 2;
+	}
 
 	  unsigned HOST_WIDE_INT last_chunk_size = 0;
 	  unsigned HOST_WIDE_INT s = (offset + size) * ASAN_SHADOW_GRANULARITY;

Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 11:03:41AM +0100, Eric Botcazou wrote:
> asan_expand_mark_ifn does manual store merging but doesn't take into account 
> the alignment, so this can break on strict-alignment platforms.
> 
> Tested on SPARC/Solaris 11, where this fixes this regression:
> 
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O0  output pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O1  output pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O2  output pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O3 -fomit-frame-pointer -funroll-
> loops -fpeel-loops -ftracer -finline-functions  output pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O3 -g  output pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -Os  output pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O2 -flto -flto-partition=none  
> output 
> pattern test
> FAIL: gcc.dg/asan/use-after-scope-5.c   -O2 -flto  output pattern test
> 
> OK for mainline?
> 
> 
> 2019-02-11  Eric Botcazou  
> 
>   * asan.c (asan_expand_mark_ifn): Always use a size of 1 byte for the
>   stores on strict-alignment platforms.

So, wouldn't it be better to check for STRICT_ALIGNMENT
get_pointer_alignment (base_addr) and do this only if that alignment
(shifted right by ASAN_SHADOW_SHIFT) is not sufficient and e.g. if we would
know that the shadow is at least 2 byte aligned but not 4 byte aligned, use
size = 2 instead of always 1?  E.g. compute this before the loop as
max_size and for !STRICT_ALIGNMENT use always max_size 4?

> Index: asan.c
> ===
> --- asan.c(revision 268508)
> +++ asan.c(working copy)
> @@ -3226,10 +3226,13 @@ asan_expand_mark_ifn (gimple_stmt_iterat
>for (unsigned HOST_WIDE_INT offset = 0; offset < shadow_size;)
>   {
> unsigned size = 1;
> -   if (shadow_size - offset >= 4)
> - size = 4;
> -   else if (shadow_size - offset >= 2)
> - size = 2;
> +   if (!STRICT_ALIGNMENT)
> + {
> +   if (shadow_size - offset >= 4)
> + size = 4;
> +   else if (shadow_size - offset >= 2)
> + size = 2;
> + }
>  
> unsigned HOST_WIDE_INT last_chunk_size = 0;
> unsigned HOST_WIDE_INT s = (offset + size) * ASAN_SHADOW_GRANULARITY;


Jakub

Re: [Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes

2019-02-11 Thread Kyrill Tkachov

Hi Jakub,

On 08/02/19 11:40, Jakub Jelinek wrote:

On Fri, Feb 08, 2019 at 11:29:10AM +, Matthew Malcomson wrote:
> I'm pretty sure there's no difference between the iwmmxt target and
> others so believe your simpler fix of just using 'q' is a good idea.
> (there's no difference in gas and no documentation I have found mentions
> a difference).

The simpler patch would be then (but of course in that case the question is
why iwmmxt.md doesn't use those q constraints for the output_move_double
alternatives).

I think this is ok.

The "q" constraint was introduced after the iwmmxt.md patterns were written and 
it seems
that they were just never updated to use it.
It's hard for anyone to get a hold of the relevant hardware to test iwmmxt 
these days,
so I suspect that path hasn't been thoroughly tested.
In my opinion the ldrd/strd-related logic there should follow the same approach 
as the rest of
the arm backend, that is, using 'q'.

A patch to update the iwmmxt.md constraints in that way is pre-approved.

Thanks,
Kyrill

2019-02-08  Jakub Jelinek  

PR bootstrap/88714
* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use q constraint
instead of r.

--- gcc/config/arm/ldrdstrd.md.jj   2019-02-08 11:25:42.368916124 +0100
+++ gcc/config/arm/ldrdstrd.md  2019-02-08 12:38:33.647585108 +0100
@@ -157,9 +157,9 @@ (define_peephole2 ; swap the destination
 ;; We use gen_operands_ldrd_strd() with a modify argument as false so that the
 ;; operands are not changed.
 (define_insn "*arm_ldrd"
-  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
(match_operand:SI 2 "memory_operand" "m"))
- (set (match_operand:SI 1 "s_register_operand" "=r")
+ (set (match_operand:SI 1 "s_register_operand" "=q")
(match_operand:SI 3 "memory_operand" "m"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, true)"
@@ -178,9 +178,9 @@ (define_insn "*arm_ldrd"

 (define_insn "*arm_strd"
   [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
-  (match_operand:SI 0 "s_register_operand" "r"))
+  (match_operand:SI 0 "s_register_operand" "q"))
   (set (match_operand:SI 3 "memory_operand" "=m")
-  (match_operand:SI 1 "s_register_operand" "r"))])]
+  (match_operand:SI 1 "s_register_operand" "q"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, false)"
   {

Jakub

Re: Follow-up-fix 2 to "[PATCH] Move PR84877 fix elsewhere (PR bootstrap/88450)"

2019-02-11 Thread Hans-Peter Nilsson

> Date: Mon, 11 Feb 2019 10:22:12 +0100
> From: Jakub Jelinek 

> Is PREFERRED_STACK_BOUNDARY what we want here?  Shouldn't that be
> STACK_BOUNDARY, or PARM_BOUNDARY?  Though, PARM_BOUNDARY on cris is 32...

Hm.

I wish there was a better distinction both in the code and in
peoples minds between boundary (or whatever you'd call the
multiple to which the size of something must be rounded) and
(memory/stack-)alignment.  Alas, there are lots of mixups in
usage and the documentation even calls this alignment.  I'm not
even sure it was *size* boundary when I set PARM_BOUNDARY to 32
in the first place!  I may have to change that to reflect the
current sources. :(

Still, this isn't strictly a parameter (to pass or to be
expected incoming according to an ABI), but a local copy of what
was incoming in registers, so picking the preferred stack
boundary as per PREFERRED_STACK_BOUNDARY seems natural.

brgds, H-P

Re: [Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 10:32:23AM +, Kyrill Tkachov wrote:
> I think this is ok.

Ok, committed the simpler version.

> The "q" constraint was introduced after the iwmmxt.md patterns were written 
> and it seems
> that they were just never updated to use it.
> It's hard for anyone to get a hold of the relevant hardware to test iwmmxt 
> these days,
> so I suspect that path hasn't been thoroughly tested.
> In my opinion the ldrd/strd-related logic there should follow the same 
> approach as the rest of
> the arm backend, that is, using 'q'.
> 
> A patch to update the iwmmxt.md constraints in that way is pre-approved.

Thinking about it some more, given that the "q" constraint has been
introduced exactly for the ldrdstrd.md created movdi patterns
(https://gcc.gnu.org/ml/gcc-patches/2013-02/msg00604.html),
we don't generate those anymore, and r13 aka sp is a FIXED_REGISTERS, I
wonder if instead of that we shouldn't change *arm_movdi and *movdi_vfp
patterns (what about *movdf_soft_insn ?) to use r constraint again - the RA
shouldn't be putting DImode regs at ip, because only half of that register
is non-fixed and previously the only way to get there was ldrdstrd
peephole2s, but those use *arm_ldrd/*arm_strd patterns now.

So like the patch below (though, I have only limited possibilities to test
this, can throw it in armv7hl-linux-gnueabi distro build).

2019-02-11  Jakub Jelinek  

PR bootstrap/88714
* config/arm/arm.md (*arm_movdi, *movdf_soft_insn): Use "r" instead of
"q" constraint.
* config/arm/vfp.md (*movdi_vfp): Likewise.
* config/arm/ldrdstrd.md (*arm_ldrd, *arm_strd): Use "r" instead of
"q" constraint for operands[1].

--- gcc/config/arm/arm.md.jj2019-01-31 00:26:04.417738975 +0100
+++ gcc/config/arm/arm.md   2019-02-11 12:02:32.778707056 +0100
@@ -5817,8 +5817,8 @@ (define_expand "movdi"
 )
 
 (define_insn "*arm_movdi"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, q, m")
-   (match_operand:DI 1 "di_operand"  "rDa,Db,Dc,mi,q"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m")
+   (match_operand:DI 1 "di_operand"  "rDa,Db,Dc,mi,r"))]
   "TARGET_32BIT
&& !(TARGET_HARD_FLOAT)
&& !TARGET_IWMMXT
@@ -7102,8 +7102,8 @@ (define_expand "reload_outdf"
 )
 
 (define_insn "*movdf_soft_insn"
-  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,q,m")
-   (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,q"))]
+  [(set (match_operand:DF 0 "nonimmediate_soft_df_operand" "=r,r,r,r,m")
+   (match_operand:DF 1 "soft_df_operand" "rDa,Db,Dc,mF,r"))]
   "TARGET_32BIT && TARGET_SOFT_FLOAT
&& (   register_operand (operands[0], DFmode)
|| register_operand (operands[1], DFmode))"
--- gcc/config/arm/vfp.md.jj2019-01-31 00:26:04.312740661 +0100
+++ gcc/config/arm/vfp.md   2019-02-11 12:03:13.232045976 +0100
@@ -307,8 +307,8 @@ (define_insn "*thumb2_movsi_vfp"
 ;; DImode moves
 
 (define_insn "*movdi_vfp"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
"=r,r,r,r,q,q,m,w,!r,w,w, Uv")
-   (match_operand:DI 1 "di_operand"  
"r,rDa,Db,Dc,mi,mi,q,r,w,w,UvTu,w"))]
+  [(set (match_operand:DI 0 "nonimmediate_di_operand" 
"=r,r,r,r,r,r,m,w,!r,w,w, Uv")
+   (match_operand:DI 1 "di_operand"  
"r,rDa,Db,Dc,mi,mi,r,r,w,w,UvTu,w"))]
   "TARGET_32BIT && TARGET_HARD_FLOAT
&& (   register_operand (operands[0], DImode)
|| register_operand (operands[1], DImode))
--- gcc/config/arm/ldrdstrd.md.jj   2019-02-11 11:39:39.977125795 +0100
+++ gcc/config/arm/ldrdstrd.md  2019-02-11 12:03:57.978314745 +0100
@@ -159,7 +159,7 @@ (define_peephole2 ; swap the destination
 (define_insn "*arm_ldrd"
   [(parallel [(set (match_operand:SI 0 "s_register_operand" "=q")
   (match_operand:SI 2 "memory_operand" "m"))
- (set (match_operand:SI 1 "s_register_operand" "=q")
+ (set (match_operand:SI 1 "s_register_operand" "=r")
   (match_operand:SI 3 "memory_operand" "m"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, true)"
@@ -180,7 +180,7 @@ (define_insn "*arm_strd"
   [(parallel [(set (match_operand:SI 2 "memory_operand" "=m")
   (match_operand:SI 0 "s_register_operand" "q"))
  (set (match_operand:SI 3 "memory_operand" "=m")
-  (match_operand:SI 1 "s_register_operand" "q"))])]
+  (match_operand:SI 1 "s_register_operand" "r"))])]
   "TARGET_LDRD && TARGET_ARM && reload_completed
   && valid_operands_ldrd_strd (operands, false)"
   {


Jakub

Re: [PATCH] Updated patches for the port of gccgo to GNU/Hurd

2019-02-11 Thread Svante Signell

On Sun, 2019-02-10 at 22:08 -0800, Ian Lance Taylor wrote:
> On Sun, Feb 10, 2019 at 3:41 AM Svante Signell 
> wrote:
> > On Sat, 2019-02-09 at 23:57 +0100, Svante Signell wrote:
> > > On Sat, 2019-02-09 at 14:40 -0800, Ian Lance Taylor wrote:
> > > > On Fri, Feb 8, 2019 at 3:07 PM Matthias Klose  wrote:
> > > > > On 07.02.19 06:04, Ian Lance Taylor wrote:
> > > > What are the lines before that in the log?  For some reason libtool is
> > > > being invoke with no source files.  The lines before the failing line
> > > > should show an invocation of match.sh that determines the source
> > > > files.
> > > 
> > > Thanks for your job upstreaming the patches!
> > > 
> > > I've found some problems. Current problem is with the mksysinfo.sh patch.
> > > But there are some other things missing. New patches will be submitted
> > > tomorrow.
> > 
> > Attached are three additional patches needed to build libgo on GNU/Hurd:
> > src_libgo_mksysinfo.sh.diff
> > src_libgo_go_syscall_wait.c.diff
> > src_libgo_testsuite_gotest.diff
> > 
> > For the first patch, src_libgo_mksysinfo.sh.diff, I had to go back to the
> > old version, using sed -i -e. As written now ${fsid_to_dev} expands to
> > fsid_to_dev='-e '\''s/st_fsid/Dev/'\''' resulting in: "sed: -e expression
> > #4, char 1: unknown command: `''". Unfortunately, I have not yet been able
> > to modify the expansion omitting the single qoutes around the shell
> > variable.
> 
> I'm sorry, I don't want to use "sed -i".  That loses the original file
> and makes it harder to reconstruct what has happened.

What to do then?

> > The second patch, src_libgo_go_syscall_wait.c.diff, is needed since
> > WCONTINUED is not defined and is needed for WIFCONTINUED to be defined in
> > wait.h.
> 
> I don't understand that.   is a system header file.  Are
> you saying that it is impossible to use  and WIFCONTINUED
> unless your source code does a #define WCONTINUED before #include'ing
> ?  That seems like a bug in the Hurd library code.

The problem is that WCONTINUED is not defined in /usr/include/i386-
gnu/bits/waitflags.h on Hurd. Only WNOHANG and WUNTRACED are. That causes
WIFCONTINUED not to be defined in /usr/include/i386-gnu/bits/waitstatus.h. As
WCONTINUED is not defined, I assume that WIFCONTINUED is not supported.

>From waitpid(2):
WCONTINUED (since Linux 2.6.10)
   also return if a stopped child has been resumed by delivery of SIGCONT.

@Samuel: more info?

I think that that call to WIFCONTINUED in libgo/go/syscall/wait.c
_Bool
Continued (uint32_t *w)
{
  return WIFCONTINUED (*w) != 0;
}

has to be omitted somehow for Hurd.

> > The third patch, src_libgo_testsuite_gotest.diff, is not strictly needed,
> > but running the tests the annoying text is displayed: "ps: comm: Unknown
> > format spec"
> 
> I get that "comm" doesn't work, but the change in that patch is simply
> incorrect.  If you don't pass "comm", the "grep sleep" will never
> succeed.  If there is no way to support this code on Hurd then we
> should skip it, not put in a command that can never work.

OK, let's drop that part then.

@Samuel: more info?

Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-11 Thread Eric Botcazou

> So, wouldn't it be better to check for STRICT_ALIGNMENT
> get_pointer_alignment (base_addr) and do this only if that alignment
> (shifted right by ASAN_SHADOW_SHIFT) is not sufficient and e.g. if we would
> know that the shadow is at least 2 byte aligned but not 4 byte aligned, use
> size = 2 instead of always 1?  E.g. compute this before the loop as
> max_size and for !STRICT_ALIGNMENT use always max_size 4?

In practice this makes a difference only for objects aligned on 128-bit or 
above boundaries though.  Moreover, don't you need to take into account the 
offset as well, which can be modified through -fasan-shadow-offset?

-- 
Eric Botcazou

Re: Andrew Stubbs and Julian Brown appointed AMD GCN maintainers

2019-02-11 Thread Andrew Stubbs


On 08/02/2019 16:10, David Edelsohn wrote:

I am pleased to announce that the GCC Steering Committee has
appointed Andrew Stubbs and Julian Brown as AMD GCN maintainers.


Many thanks to David and the Steering Committee!

I've committed the attached.

Andrew
Add AMD GCN maintainers

2019-02-11  Andrew Stubbs  

	* MAINTAINERS (amdgcn port): Add myself and Julian Brown.
	(Write After Approval): Remove myself and Julian.

diff --git a/MAINTAINERS b/MAINTAINERS
index 4cd7a4741b6..1490ab21150 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -48,6 +48,8 @@ aarch64 port		James Greenhalgh	
 aarch64 port		Marcus Shawcroft	
 aarch64 SVE port	Richard Sandiford	
 alpha port		Richard Henderson	
+amdgcn port		Julian Brown		
+amdgcn port		Andrew Stubbs		
 arc port		Joern Rennecke		
 arm port		Nick Clifton		
 arm port		Richard Earnshaw	
@@ -333,7 +335,6 @@ Neil Booth	
 Robert Bowdidge	
 Joel Brobecker	
 Dave Brolley	
-Julian Brown	
 Christian Bruel	
 Kevin Buettner	
 Adam Butcher	
@@ -586,7 +587,6 @@ Richard Stallman
 Basile Starynkevitch
 Jakub Staszak	
 Graham Stott	
-Andrew Stubbs	
 Jeff Sturm	
 Robert Suchanek	
 Andrew Sutton

Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 12:31:43PM +0100, Eric Botcazou wrote:
> > So, wouldn't it be better to check for STRICT_ALIGNMENT
> > get_pointer_alignment (base_addr) and do this only if that alignment
> > (shifted right by ASAN_SHADOW_SHIFT) is not sufficient and e.g. if we would
> > know that the shadow is at least 2 byte aligned but not 4 byte aligned, use
> > size = 2 instead of always 1?  E.g. compute this before the loop as
> > max_size and for !STRICT_ALIGNMENT use always max_size 4?
> 
> In practice this makes a difference only for objects aligned on 128-bit or 

No.  64-bit aligned offsets too.  If you know 64-bit alignment of base_addr,
you can use size 2 stores (though not size 4 stores) on the
!STRICT_ALIGNMENT targets.  And that is something still pretty common.

> above boundaries though.  Moreover, don't you need to take into account the 
> offset as well, which can be modified through -fasan-shadow-offset?

No.  If people use a bogus shadow offset, prologues/epilogues will not work
either on strict aligned targets, and -fasan-shadow-offset is for
-fsanitize=kernel-address only.  Only page aligned offsets are something
that is supported/reasonable.

Jakub

Re: please approval my patch - add new logical traits to type_traits for logical completeness

2019-02-11 Thread Jonathan Wakely


On 10/02/19 11:46 +0800, 李苏旺 wrote:

hi Marc,
   I am very glad to receive you reply, in fact , I have neither "  C++
standard these are specified" nor "feature test macro" , I saw negation,
conjunction and disjunction have added in type_traits , and similar , after
I reference to
https://whatis.techtarget.com/definition/logic-gate-AND-OR-XOR-NOT-NAND-NOR-and-XNOR#xnor
 , I think  XOR(exclusive_or), NAND(not_conjunction),
NOR(not_disjunction), XNOR(not_exclusive_or) can be added in type_traits ,
that is all . any way , thank you reminder , maybe I may write a proposal
in the future , I hope these logical gates can be added too


I agree with Marc that we don't want to add these, unless they are
first added to the C++ standard (and I don't think they are useful
enough to add to the standard either).

Re: [RS6000] Don't support inline PLT for ABI_V4 bss-plt

2019-02-11 Thread Segher Boessenkool

Hi!

On Mon, Feb 11, 2019 at 11:08:12AM +1030, Alan Modra wrote:
> On Fri, Feb 08, 2019 at 04:53:44PM -0600, Segher Boessenkool wrote:
> > > @@ -37981,7 +37982,7 @@ rs6000_call_sysv (rtx value, rtx func_desc, rtx 
> > > tlsarg, rtx cookie)
> > >func = rs6000_longcall_ref (func_desc, tlsarg);
> > >/* If the longcall was implemented using PLT16 relocs, then r11
> > >needs to be valid at the call for lazy linking.  */
> > 
> > This comment could use some work.
> 
> True.  I've rectified that and combined this patch with the
> -mno-pltseq patch, documented as requested and enhanced to emit errors
> and warnings when invalid options are combined.  The two patches go
> together, because someone building their ppc32 compiler with
> --enable-secureplt but having old -mbss-plt relocatable objects on
> their system will find that linking new -msecure-plt -mlongcall
> objects against -mbss-plt objects will fail.  One solution to that
> problem is to compile with -mbss-plt whenever -mlongcall is needed,
> but -mlongcall -mno-pltseq provides a way to transition everything to
> -msecure-plt objects.

Okay.  The best is much nicer now, thanks!

> Bootstrapped and regression tested powerpc64le-linux and
> powerpc64-linux biarch.  I also built an rs6000-aix7.2 cross compiler
> (well, cc1 and cc1plus, I don't have the aix headers to build libgcc)

There are gcc111 and gcc119 in the cfarm.  Bootstrap is slow, but
--disable-bootstrap is okayish (but a bit fiddly).  You could also just
make a sysroot from them (no idea if that works, I haven't tried).

> I'd also like to fix the formatting in linux64.h
> SUBSUBTARGET_OVERRIDE_OPTIONS by moving all the continuation
> backslashes one tab stop to the right when I commit this patch.  Is
> that OK too?

I wonder if we could get rid of that completely?  Use an inline function
instead?  Or a real function, shock horror.

> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index b4ff18d414c..99f04bba148 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -21665,7 +21665,7 @@ rs6000_indirect_call_template_1 (rtx *operands, 
> unsigned int funop,
>   || (REG_P (operands[funop])
>   && REGNO (operands[funop]) == LR_REGNO));
>  
> -  if (!TARGET_MACHO && HAVE_AS_PLTSEQ && GET_CODE (operands[funop]) == 
> UNSPEC)
> +  if (!TARGET_MACHO && TARGET_PLTSEQ && GET_CODE (operands[funop]) == UNSPEC)

Does this need !TARGET_MACHO?  That's implied by TARGET_PLTSEQ?

> @@ -24834,6 +24835,11 @@ generate slower code.  As of this writing, the AIX 
> linker can do this,
>  as can the GNU linker for PowerPC/64.  It is planned to add this feature
>  to the GNU linker for 32-bit PowerPC systems as well.
>  
> +On PowerPC64le and 32-bit PowerPC systems with newer GNU linkers, GCC

The mixed capitalisation in "PowerPC64le" makes my head hurt.  Luckily,
what you really mean are PowerPC64 ELFv2 systems.  Right?  (Once more
below).

Okay for trunk, with those little tweaks (the inline thing is for another
day of course).

This was the last linker-related thing for GCC 9, right?

Thanks,


Segher

[PATCH] S/390: Make legitimate_address_p accept UNSPEC_LTREF

2019-02-11 Thread Ilya Leoshkevich

Bootstrapped and regtested on s390x-redhat-linux.

After r265371 (S/390: Make "b" constraint match literal pool references),
satisfies_constraint_b () started accepting memory references, whose
addresses do not pass legitimate_address_p ().  Specifically, literal
pool accesses past the end of the entry they are based on are explicitly
rejected by s390_decompose_address ().  This leads to ICE in early_mach
pass when trying to perform UNSPEC_LTREF substitution on such addresses.

s390_decompose_address () check does not apply to relative addresses.
The reason it is even made is that larl_operand () does not accept
literal pool references transformed into UNSPEC_LTREF.  This patch
makes larl_operand () treat plain and transformed literal pool
references identically.

gcc/ChangeLog:

2019-02-08  Ilya Leoshkevich  

PR target/89233
* config/s390/predicates.md (larl_operand): Allow UNSPEC_LTREF.
* config/s390/s390.c (s390_decompose_address): Update comment.
(s390_loadrelative_operand_p): Allow UNSPEC_LTREF.

gcc/testsuite/ChangeLog:

2019-02-08  Ilya Leoshkevich  

PR target/89233
* gcc.target/s390/pr89233.c: New test.
---
 gcc/config/s390/predicates.md   | 21 +++--
 gcc/config/s390/s390.c  |  8 ++--
 gcc/testsuite/gcc.target/s390/pr89233.c | 11 +++
 3 files changed, 24 insertions(+), 16 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr89233.c

diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 9a3d99e7897..1af505d156d 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -145,20 +145,11 @@
 ;;  Return true if OP a valid operand for the LARL instruction.
 
 (define_predicate "larl_operand"
-  (match_code "label_ref, symbol_ref, const")
+  (match_code "label_ref, symbol_ref, const, plus, unspec")
 {
-  /* Allow labels and local symbols.  */
-  if (GET_CODE (op) == LABEL_REF)
-return true;
-  if (SYMBOL_REF_P (op))
-return (!SYMBOL_FLAG_NOTALIGN2_P (op)
-   && SYMBOL_REF_TLS_MODEL (op) == 0
-   && s390_rel_address_ok_p (op));
-
-  /* Everything else must have a CONST, so strip it.  */
-  if (GET_CODE (op) != CONST)
-return false;
-  op = XEXP (op, 0);
+  /* Strip CONST to unify SYMBOL_REF and UNSPEC_LTREF handling.  */
+  if (GET_CODE (op) == CONST)
+op = XEXP (op, 0);
 
   /* Allow adding *even* in-range constants.  */
   if (GET_CODE (op) == PLUS)
@@ -172,9 +163,11 @@
   op = XEXP (op, 0);
 }
 
-  /* Labels and local symbols allowed here as well.  */
+  /* Allow labels and local symbols.  */
   if (GET_CODE (op) == LABEL_REF)
 return true;
+  if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_LTREF)
+op = XVECEXP (op, 0, 0);
   if (SYMBOL_REF_P (op))
 return (!SYMBOL_FLAG_NOTALIGN2_P (op)
&& SYMBOL_REF_TLS_MODEL (op) == 0
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 6a571a3e054..8755f613d43 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3019,8 +3019,9 @@ s390_decompose_address (rtx addr, struct s390_address 
*out)
  orig_disp = gen_rtx_CONST (Pmode, disp);
  if (offset)
{
- /* If we have an offset, make sure it does not
-exceed the size of the constant pool entry.  */
+ /* If we have an offset, make sure it does not exceed the size of
+the constant pool entry.  Otherwise we might generate an
+out-of-range displacement for the base register form.  */
  rtx sym = XVECEXP (disp, 0, 0);
  if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
return false;
@@ -3140,6 +3141,9 @@ s390_loadrelative_operand_p (rtx addr, rtx *symref, 
HOST_WIDE_INT *addend)
   addr = XEXP (addr, 0);
 }
 
+  if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_LTREF)
+addr = XVECEXP (addr, 0, 0);
+
   if (GET_CODE (addr) == SYMBOL_REF
   || (GET_CODE (addr) == UNSPEC
  && (XINT (addr, 1) == UNSPEC_GOTENT
diff --git a/gcc/testsuite/gcc.target/s390/pr89233.c 
b/gcc/testsuite/gcc.target/s390/pr89233.c
new file mode 100644
index 000..f572bfa08d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr89233.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=z13 -O1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+f ()
+{
+  v4si x = {0, 1, 2, 3};
+  return x[4];
+}
-- 
2.20.1

[build] Restore .init_array etc. detection on 64-bit Solaris

2019-02-11 Thread Rainer Orth

I noticed that HAVE_INITFINI_ARRAY_SUPPORT wasn't properly detected on
Solaris 11.4 and up in 64-bit-default configurations when using gas
(2.32) and Solaris ld.

Closer inspection revealed the following: both Solaris as and gas <=
2.28 emit

Section Header[7]:  sh_name: .init_array.01005
sh_addr:  0   sh_flags:   [ SHF_ALLOC ]
sh_size:  0x4 sh_type:[ SHT_PROGBITS ]
sh_offset:0x4csh_entsize: 0
sh_link:  0   sh_info:0
sh_addralign: 0x4   

while since gas 2.29 one gets both a warning

initfini.s:1: Warning: ignoring incorrect section type for .init_array.01005

and

Section Header[7]:  sh_name: .init_array.01005
sh_addr:  0   sh_flags:   [ SHF_WRITE SHF_ALLOC ]
sh_size:  0x4 sh_type:[ SHT_INIT_ARRAY ]
sh_offset:0x4csh_entsize: 0x8 (0 entries)
sh_link:  0   sh_info:0
sh_addralign: 0x4   

Since sh_size < sh_entsize, when the resulting object is linked into an
executable, the latter lacks the .init_array section completely.  This
seems a plausible thing to do and the workaround is easy: just double
the size of test strings.  The following patch does just that.

Tested on {i386,amd64}-pc-solaris2.11 and sparc{,v9}-sun-solaris2.11
without regressions and proper HAVE_INITFINI_ARRAY_SUPPORT detection.

Since this only affects Solaris ld, I've checked it in.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2019-02-10  Rainer Orth  

* acinclude.m4 (gcc_AC_INITFINI_ARRAY): Use 8-byte strings with
Solaris ld.
* configure: Regenerate.

# HG changeset patch
# Parent  7f704296017d34ec13aa97b574555028c8a81ca6
Restore .init_array detection on 64-bit Solaris/x86

diff --git a/gcc/acinclude.m4 b/gcc/acinclude.m4
--- a/gcc/acinclude.m4
+++ b/gcc/acinclude.m4
@@ -374,16 +374,16 @@ EOF
 	  cat > conftest.s < /dev/null 2>&1 \
 	 && $gcc_cv_ld -o conftest conftest.o > /dev/null 2>&1 \
 	 && $gcc_cv_objdump -s -j .init_array conftest \
-		| grep  > /dev/null 2>&1 \
+		| grep  > /dev/null 2>&1 \
 	 && $gcc_cv_objdump -s -j .fini_array conftest \
-		| grep  > /dev/null 2>&1; then
+		| grep  > /dev/null 2>&1; then
 	gcc_cv_initfini_array=yes
 	  fi
 	  ;;

Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE

2019-02-11 Thread H.J. Lu

On Sun, Feb 10, 2019 at 11:25 PM Uros Bizjak  wrote:
>
> On Mon, Feb 11, 2019 at 2:04 AM H.J. Lu  wrote:
> >
> > On Sun, Feb 10, 2019 at 1:49 PM Uros Bizjak  wrote:
> > >
> > > On Sun, Feb 10, 2019 at 10:45 PM Uros Bizjak  wrote:
> > >
> > > > > > > +  [(const_int 0)]
> > > > > > > +{
> > > > > > > +  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
> > > > > > > +  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > > +  rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > > +  emit_insn (insn);
> > > > > > > +  DONE;
> > > > > >
> > > > > > Please write this simple RTX explicitly in the place of (const_int 
> > > > > > 0) above.
> > > > >
> > > > > rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > >
> > > > > is easy.   How do I write
> > > > >
> > > > > rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > >
> > > > > in place of  (const_int 0)?
> > > >
> > > >   [(set (match_dup 2)
> > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > >
> > > > with
> > > >
> > > > "operands[2] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > >
> > > > or even better:
> > > >
> > > > "operands[2] = gen_lowpart (V4SImode, operands[0]);"
> > > >
> > > > in the preparation statement.
> > >
> > > Even shorter is
> > >
> > > "operands[0] = gen_lowpart (V4SImode, operands[0]);"
> > >
> > > and use (match_dup 0) instead of (match_dup 2) in the RTX.
> > >
> > > There is plenty of examples throughout sse.md.
> > >
> >
> > This works:
> >
> > (define_insn_and_split "*vec_dupv2si"
> >   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > (vec_duplicate:V2SI
> >   (match_operand:SI 1 "register_operand" "0,0,Yv")))]
> >   "TARGET_MMX || TARGET_MMX_WITH_SSE"
> >   "@
> >punpckldq\t%0, %0
> >#
> >#"
> >   "TARGET_MMX_WITH_SSE && reload_completed"
> >   [(set (match_dup 0)
> > (vec_duplicate:V4SI (match_dup 1)))]
> >   "operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> >   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> >(set_attr "type" "mmxcvt,ssemov,ssemov")
> >(set_attr "mode" "DI,TI,TI")])
>
> If it works, then gen_lowpart is preferred due to extra checks.
> However, it would result in a paradoxical subreg, so I wonder if these
> extra checks allow this transformation.

gen_lowpart dosn't work:

#include 

__m64
foo (int i)
{
  __v2si x = { i, i };
  return (__m64) x;
}

(gdb) f 1
#1  0x00ba7cca in gen_reg_rtx (mode=E_V2SImode)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c:1155
1155   gcc_assert (can_create_pseudo_p ());
(gdb) bt
#0  fancy_abort (
file=0x22180e0 "/export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c",
line=1155,
function=0x22193a8  "gen_reg_rtx")
at /export/gnu/import/git/gitlab/x86-gcc/gcc/diagnostic.c:1607
#1  0x00ba7cca in gen_reg_rtx (mode=E_V2SImode)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c:1155
#2  0x00bd3044 in copy_to_reg (x=0x7fffea99b528)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/explow.c:594
#3  0x010c7c0a in gen_lowpart_general (mode=E_V4SImode,
x=0x7fffea99b528)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/rtlhooks.c:56
...
#1  0x00ba7cca in gen_reg_rtx (mode=E_V2SImode)
at /export/gnu/import/git/gitlab/x86-gcc/gcc/emit-rtl.c:1155
1155   gcc_assert (can_create_pseudo_p ());
(gdb)

-- 
H.J.

Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-11 Thread Eric Botcazou

> No.  64-bit aligned offsets too.  If you know 64-bit alignment of base_addr,
> you can use size 2 stores (though not size 4 stores) on the
> !STRICT_ALIGNMENT targets.  And that is something still pretty common.

But we're talking about STRICT_ALIGNMENT targets here: an array of 2 doubles 
at address 0x1008 will have a shadow address of 0x2001 modulo the 
offset so you cannot use size 2.  Moveover the store merging pass should be 
able to merge the stores so I don't really understand why this matters at all.

-- 
Eric Botcazou

Re: [PATCH] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Segher Boessenkool

On Sun, Feb 10, 2019 at 08:42:42PM -0600, Bill Schmidt wrote:
> On 2/10/19 4:05 PM, Segher Boessenkool wrote:
> > On Sun, Feb 10, 2019 at 10:10:02AM -0600, Bill Schmidt wrote:
> >> I've added executable tests for both shift-right algebraic and shift-right 
> >> logical.
> >> Both fail prior to applying the patch, and work correctly afterwards.
> > Please add a test for left shifts, as well?
> 
> Can do.  I verified that left shifts were not broken and figured a test case
> had been added then, but have not checked.  Good to test this particular
> scenario, though.

Well you actually added code for left shifts, too ;-)

> >> 2019-02-08  Bill Schmidt  
> >>
> >>* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Shift-right
> >>and shift-left vector built-ins need to include a TRUNC_MOD_EXPR
> >>for correct semantics.  Also, don't expand a vector-splat if there
> >>is a type mismatch; let the back end handle it.
> > Does it always result in just the shift instruction now?  Does the modulo
> > ever remain?  (Maybe at -O0?)  Modulo is hugely expensive; it is always
> > modulo by a power of two, so a simple bitmask, so maybe write that directly
> > instead?
> 
> We always get the shift.  For -mcpu=power8, we always load the mask from
> memory rather than generating the vspltisb, which is not ideal code 
> generation,
> but is at least correct.

_Just_ the shift, I meant.  I know we load a constant from memory, for
constants; I am worried about non-constants, do those get a modulo, or
just a mask.  Also at -O0; if we get an actual division at -O0, there
probably are other cases where it isn't optimised away as well.

> For -mcpu=power9, we get close, but have some bad register allocation and
> an unnecessary extend:
> 
> xxspltib 0,4   <- why not just xxspltib 32,4?
> xxlor 32,0,0   <- wasted copy

Yeah, huh.  Where does that come from...  I blame splitters after reload.

> vextsb2d 0,0   <- unnecessary due to vsrad semantics
> vsrad 2,2,0
> 
> Again, this is at least correct.  We have more work to do to produce the
> most efficient code, but we have PR89213 open for that.

Yes.  But I'd like to see at least a mask instead of a modulo.

> >> +  /* It's sometimes useful to use one of these to build a
> >> + splat for V2DImode, since the upper bits will be ignored.
> >> + Don't fold if we detect that situation, as we end up
> >> + losing the splat instruction in this case.  */
> >> +  if (size != TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (lhs)
> >> +return false;
> > This isn't really detecting that situation...  Instead, it doesn't fold
> > whenever the size of the splatted elements isn't the same as the size of
> > the elts of the target vector.  That's probably perfectly safe, but please
> > spell it out.  It's fine to mention the motivating case, of course.
> 
> Yep, will correct.  Actually, as I look back at my notes, I believe that this
> change is not necessary after all (same code generated with and without it).
> I'll verify.

Ha, that would be nice :-)

> > Testing for vsx_hw but not enabling vsx is probably wrong, too.
> 
> Weird.  I just tried adding -mvsx

Does it _need_ VSX anyway?  Are these builtins defined without it, too?

> and I get this peculiar error we've seen
> before about AMD graphics card offloading:
> 
> spawn -ignore SIGHUP /home/wschmidt/gcc/build/gcc-mainline-test2/gcc/xgcc 
> -B/home/wschmidt/gcc/build/gcc-mainline-test2/gcc/ 
> /home/wschmidt/gcc/gcc-mainline-t\
> est2/gcc/testsuite/gcc.target/powerpc/srad-modulo.c 
> -fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers 
> -fdiagnostics-color=never -O2 -mvsx -lm -o \
> ./srad-modulo.exe^M
> ^[[01m^[[Kcc1:^[[m^[[K ^[[01;31m^[[Kerror: ^[[m^[[Kargument to 
> '^[[01m^[[K-O^[[m^[[K' should be a non-negative integer, 
> '^[[01m^[[Kg^[[m^[[K', '^[[01m^[[Ks^[[\
> m^[[K' or '^[[01m^[[Kfast^[[m^[[K'^M
> compiler exited with status 1

Huh, why do you get colour escapes at all?  I thought the testsuite used
GCC_COLORS= to get rid of that nonsense.  It's quite unreadable like this.

> Executing on host: /home/wschmidt/gcc/build/gcc-mainline-test2/gcc/xgcc 
> -B/home/wschmidt/gcc/build/gcc-mainline-test2/gcc/ offload_gcn7262.c
> -fno-diagnosti\
> cs-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never  
> -foffload=amdgcn-unknown-amdhsa -S -o offload_gcn7262.s(timeout = 300)
> spawn -ignore SIGHUP /home/wschmidt/gcc/build/gcc-mainline-test2/gcc/xgcc 
> -B/home/wschmidt/gcc/build/gcc-mainline-test2/gcc/ offload_gcn7262.c 
> -fno-diagnostic\
> s-show-caret -fno-diagnostics-show-line-numbers -fdiagnostics-color=never 
> -foffload=amdgcn-unknown-amdhsa -S -o offload_gcn7262.s^M
> xgcc: fatal error: GCC is not configured to support amdgcn-unknown-amdhsa as 
> offload target^M
> compilation terminated.^M
> compiler exited with status 1

This is a test to see if there is offload stuff.  There isn't.

> FAIL: gcc.target/powerpc/srad-modulo

Re: Make clear, when contributions will be ignored

2019-02-11 Thread Дилян Палаузов

Hello Segher,

the current procdure is:

-- write at https://gcc.gnu.org/bugzilla/
-- read an answer, that the update shall be posted to gcc-patches
-- subscribe to gcc-patches, post the change and wait for an answer.

This waiting is not for free.  There are a lot of emails, for the person might 
not be interested, but only waits for a
reply on the own email.  So after some time, I made filters sorting the emails 
from the mailing list, in order to make
the waiting cheaper.

-- at https://www.gnu.org/software/gcc/contribute.html is written “If you do 
not receive a response to a patch that you
have submitted within two weeks or so, it may be a good idea to chase it by 
sending a follow-up email to the same
list(s).”

Because it is written that reminders are..., I have sent a reminder.


> > If yes, how do you propose to proceed, so that a 
> > no-reminders-are-necessary-state is reached?
> 
> Keep things as is?  Reminders already are not necessary.
> 

This statement does not align with the aforementioned webpage.

The optimal way will be, if a bug/patch is filled in bugzilla and nothing more 
is necessary from the reporter.  Postgres
sends bugs collected over website over a mailing list.

Regards
  Дилян

On Sun, 2019-02-10 at 14:56 -0600, Segher Boessenkool wrote:
> Hi Dilyan,
> 
> On Sun, Feb 10, 2019 at 02:45:02PM +, Дилян Палаузов wrote:
> > Do you share the opinion, that whatever can be done after receiving a 
> > reminder, can be arranged also without reminder? 
> 
> Yes.  When people have time for it, they can trivially check what PRs are
> still open that they are involved in.
> 
> > If yes, how do you propose to proceed, so that a 
> > no-reminders-are-necessary-state is reached?
> 
> Keep things as is?  Reminders already are not necessary.
> 
> If you want more attention given to the bugs you are involved in, you can
> hire people to do that, or file reports for more interesting bugs, or make
> your bug reports easier to work with.
> 
> Since GCC has one major release every year, handling less urgent bugs can
> take up to a year as well.
> 
> > I read in the answer of Segher, that the purpose of reminding is not only 
> > to ping, but also to filter the ones who are
> > pernetrant and sending manually reminders is the means to verify, that the 
> > persons really want to make progress.  It was
> > certainly not intentionally meant this way, but this is a possible reading.
> 
> The point is that automated reminders for PRs *are spam*.
> 
> 
> Segher

Re: [PATCH] S/390: Make legitimate_address_p accept UNSPEC_LTREF

2019-02-11 Thread Ulrich Weigand

Ilya Leoshkevich wrote:

> After r265371 (S/390: Make "b" constraint match literal pool references),
> satisfies_constraint_b () started accepting memory references, whose
> addresses do not pass legitimate_address_p ().  Specifically, literal
> pool accesses past the end of the entry they are based on are explicitly
> rejected by s390_decompose_address ().  This leads to ICE in early_mach
> pass when trying to perform UNSPEC_LTREF substitution on such addresses.
> 
> s390_decompose_address () check does not apply to relative addresses.
> The reason it is even made is that larl_operand () does not accept
> literal pool references transformed into UNSPEC_LTREF.  This patch
> makes larl_operand () treat plain and transformed literal pool
> references identically.

I don't think this change is correct.  Literal pool references that
match a larl_operand ("b" constraint) should not have been transformed
into UNSPEC_LTREF in the first place; see this comment and code:

/* Annotate every literal pool reference in INSN by an UNSPEC_LTREF expression.
   Fix up MEMs as required.
   Skip insns which support relative addressing, because they do not use a base
   register.  */

static void
annotate_constant_pool_refs (rtx_insn *insn)
{
  if (s390_safe_relative_long_p (insn))
return;

Bye,
Ulrich

-- 
  Dr. Ulrich Weigand
  GNU/Linux compilers and toolchain
  ulrich.weig...@de.ibm.com

Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE

2019-02-11 Thread Uros Bizjak

On Mon, Feb 11, 2019 at 1:26 PM H.J. Lu  wrote:
>
> On Sun, Feb 10, 2019 at 11:25 PM Uros Bizjak  wrote:
> >
> > On Mon, Feb 11, 2019 at 2:04 AM H.J. Lu  wrote:
> > >
> > > On Sun, Feb 10, 2019 at 1:49 PM Uros Bizjak  wrote:
> > > >
> > > > On Sun, Feb 10, 2019 at 10:45 PM Uros Bizjak  wrote:
> > > >
> > > > > > > > +  [(const_int 0)]
> > > > > > > > +{
> > > > > > > > +  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
> > > > > > > > +  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > > > +  rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > > > +  emit_insn (insn);
> > > > > > > > +  DONE;
> > > > > > >
> > > > > > > Please write this simple RTX explicitly in the place of 
> > > > > > > (const_int 0) above.
> > > > > >
> > > > > > rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > >
> > > > > > is easy.   How do I write
> > > > > >
> > > > > > rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > >
> > > > > > in place of  (const_int 0)?
> > > > >
> > > > >   [(set (match_dup 2)
> > > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > > >
> > > > > with
> > > > >
> > > > > "operands[2] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > > >
> > > > > or even better:
> > > > >
> > > > > "operands[2] = gen_lowpart (V4SImode, operands[0]);"
> > > > >
> > > > > in the preparation statement.
> > > >
> > > > Even shorter is
> > > >
> > > > "operands[0] = gen_lowpart (V4SImode, operands[0]);"
> > > >
> > > > and use (match_dup 0) instead of (match_dup 2) in the RTX.
> > > >
> > > > There is plenty of examples throughout sse.md.
> > > >
> > >
> > > This works:
> > >
> > > (define_insn_and_split "*vec_dupv2si"
> > >   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > > (vec_duplicate:V2SI
> > >   (match_operand:SI 1 "register_operand" "0,0,Yv")))]
> > >   "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > >   "@
> > >punpckldq\t%0, %0
> > >#
> > >#"
> > >   "TARGET_MMX_WITH_SSE && reload_completed"
> > >   [(set (match_dup 0)
> > > (vec_duplicate:V4SI (match_dup 1)))]
> > >   "operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > >   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > >(set_attr "type" "mmxcvt,ssemov,ssemov")
> > >(set_attr "mode" "DI,TI,TI")])
> >
> > If it works, then gen_lowpart is preferred due to extra checks.
> > However, it would result in a paradoxical subreg, so I wonder if these
> > extra checks allow this transformation.
>
> gen_lowpart dosn't work:

Ah, we need lowpart_subreg after reload.

Uros.

[PATCH] PR libstdc++/89023 fix test that fails when not available

2019-02-11 Thread Jonathan Wakely


Instead of a single test that only checks whether  can be
included in Parallel Mode, add tests for each of C++11/C++14/C++17 that
check whether  is compatible with _GLIBCXX_PARALLEL.
This increases the coverage to (almost) all headers.

If  is not available then the tests will trivially pass, because
we don't care about compatibility with _GLIBCXX_PARALLEL in that case.

PR libstdc++/89023
* testsuite/17_intro/headers/c++2011/parallel_mode.cc: New test.
* testsuite/17_intro/headers/c++2014/parallel_mode.cc: New test.
* testsuite/17_intro/headers/c++2017/parallel_mode.cc: New test.
* testsuite/28_regex/headers/regex/parallel_mode.cc: Remove.

Tested x86_64-linux, committed to trunk.


commit 4e00db1844ee703699865a4f2fb0a9e27a4087c7
Author: Jonathan Wakely 
Date:   Mon Feb 11 12:22:05 2019 +

PR libstdc++/89023 fix test that fails when  not available

Instead of a single test that only checks whether  can be
included in Parallel Mode, add tests for each of C++11/C++14/C++17 that
check whether  is compatible with _GLIBCXX_PARALLEL.
This increases the coverage to (almost) all headers.

If  is not available then the tests will trivially pass, because
we don't care about compatibility with _GLIBCXX_PARALLEL in that case.

PR libstdc++/89023
* testsuite/17_intro/headers/c++2011/parallel_mode.cc: New test.
* testsuite/17_intro/headers/c++2014/parallel_mode.cc: New test.
* testsuite/17_intro/headers/c++2017/parallel_mode.cc: New test.
* testsuite/28_regex/headers/regex/parallel_mode.cc: Remove.

diff --git a/libstdc++-v3/testsuite/28_regex/headers/regex/parallel_mode.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2011/parallel_mode.cc
similarity index 81%
rename from libstdc++-v3/testsuite/28_regex/headers/regex/parallel_mode.cc
rename to libstdc++-v3/testsuite/17_intro/headers/c++2011/parallel_mode.cc
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2011/parallel_mode.cc
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2019 Free Software Foundation, Inc.
+// Copyright (C) 2019 Free Software Foundation, Inc.
 //
 // This file is part of the GNU ISO C++ Library.  This library is free
 // software; you can redistribute it and/or modify it under the
@@ -15,8 +15,11 @@
 // with this library; see the file COPYING3.  If not see
 // .
 
+// { dg-options "-std=gnu++11" }
 // { dg-do compile { target c++11 } }
 // { dg-require-normal-mode "" }
 
-#define _GLIBCXX_PARALLEL
-#include 
+#if __has_include()
+# define _GLIBCXX_PARALLEL 1
+# include 
+#endif
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2014/parallel_mode.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2014/parallel_mode.cc
new file mode 100644
index 000..a8184540b7c
--- /dev/null
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2014/parallel_mode.cc
@@ -0,0 +1,25 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++14" }
+// { dg-do compile { target c++14 } }
+// { dg-require-normal-mode "" }
+
+#if __has_include()
+# define _GLIBCXX_PARALLEL 1
+# include 
+#endif
diff --git a/libstdc++-v3/testsuite/17_intro/headers/c++2017/parallel_mode.cc 
b/libstdc++-v3/testsuite/17_intro/headers/c++2017/parallel_mode.cc
new file mode 100644
index 000..e84f59e5819
--- /dev/null
+++ b/libstdc++-v3/testsuite/17_intro/headers/c++2017/parallel_mode.cc
@@ -0,0 +1,25 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-st

Re: [patch] Disable store merging in asan_expand_mark_ifn

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 01:31:24PM +0100, Eric Botcazou wrote:
> > No.  64-bit aligned offsets too.  If you know 64-bit alignment of base_addr,
> > you can use size 2 stores (though not size 4 stores) on the
> > !STRICT_ALIGNMENT targets.  And that is something still pretty common.
> 
> But we're talking about STRICT_ALIGNMENT targets here: an array of 2 doubles 
> at address 0x1008 will have a shadow address of 0x2001 modulo the 

Ok, stand corrected on that, 128-bit indeed, but even that is nothing not
really used.

> offset so you cannot use size 2.  Moveover the store merging pass should be 
> able to merge the stores so I don't really understand why this matters at all.

For STRICT_ALIGNMENT targets store merging pass obviously can't do anything
with those, because unlike asan.c it can't figure out the alignment.

Jakub

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread H.J. Lu

On Sat, Feb 09, 2019 at 02:39:30PM +0100, Jakub Jelinek wrote:
> On Sat, Feb 09, 2019 at 01:22:30PM +0100, Jakub Jelinek wrote:
> > On Sat, Feb 09, 2019 at 04:11:43AM -0800, H.J. Lu wrote:
> > > I believe all usages of
> > > 
> > > (ior (match_operand 0 "ext_sse_reg_operand")
> > >   (match_operand 1 "ext_sse_reg_operand"))
> > > 
> > > should be checked.  I am not sure if they should be there at all.
> > 
> > E.g. in i386.md all the other spots look fine, because {DI,SI,DF,SF}mode
> > is allowed in ext sse regs even with -mavx512f.  And sse.md doesn't use this
> > at all.  What I'm wondering is if we need the sse.md (*mov_internal)
> > code I've cited earlier, doing bootstrap/regtest now with gcc_unreachable in
> > there (and in *mov{o,x}i_internal* for MODE_XI too) too see if it ever
> > triggers.
> 
> The following didn't ICE on anything, which is not a proof, but given that
> hard_regno_mode_ok should return false for ext_sse_reg_operand regs for
> avx512f && !avx512vl, it matches my expectations, on the other hand, it was
> a normal defaults bootstrap, don't have a knl which might be best for this
> to test -mavx512f -mno-avx512vl on everything.
> So perhaps we can also nuke the large if from mov_internal.
> 
> --- gcc/config/i386/i386.md.jj2019-02-09 12:35:57.971475641 +0100
> +++ gcc/config/i386/i386.md   2019-02-09 12:37:40.776802962 +0100
> @@ -1905,6 +1905,7 @@ (define_insn "*movoi_internal_avx"
>return standard_sse_constant_opcode (insn, operands);
>  
>  case TYPE_SSEMOV:
> +  gcc_assert (get_attr_mode (insn) != MODE_XI);
>if (misaligned_operand (operands[0], OImode)
> || misaligned_operand (operands[1], OImode))
>   {
> @@ -1970,6 +1971,7 @@ (define_insn "*movti_internal"
>  case TYPE_SSEMOV:
>/* TDmode values are passed as TImode on the stack.  Moving them
>to stack may result in unaligned memory access.  */
> +  gcc_assert (get_attr_mode (insn) != MODE_XI);
>if (misaligned_operand (operands[0], TImode)
> || misaligned_operand (operands[1], TImode))
>   {
> --- gcc/config/i386/sse.md.jj 2019-01-28 21:57:39.301110220 +0100
> +++ gcc/config/i386/sse.md2019-02-09 12:36:45.863696416 +0100
> @@ -989,6 +989,7 @@ (define_insn "mov_internal"
> && (EXT_REX_SSE_REG_P (operands[0])
> || EXT_REX_SSE_REG_P (operands[1])))
>   {
> +   gcc_unreachable ();
> if (memory_operand (operands[0], mode))
>   {
> if ( == 32)
> 

Here is the updated patch to remove ext_sse_reg_operand check with a
testcase.

OK for trunk?

Thanks.

H.J.
---
Since hard_regno_mode_ok only allows xmm16-xmm31 in OImode or TImode
with TARGET_AVX512VL:

  /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
  if (TARGET_AVX512VL
  && (mode == OImode
  || mode == TImode
  || VALID_AVX256_REG_MODE (mode)
  || VALID_AVX512VL_128_REG_MODE (mode)))
return true;

  /* xmm16-xmm31 are only available for AVX-512.  */
  if (EXT_REX_SSE_REGNO_P (regno))
return false;

there is no need to check ext_sse_reg_operand in *movoi_internal_avx nor
*movti_internal.  Instead, we should check EXT_REX_SSE_REG_P for upper 16
vector registers.

2019-02-11  H.J. Lu  
Jakub Jelinek  

gcc/

PR target/89229
* config/i386/i386.md (*movoi_internal_avx): Check
EXT_REX_SSE_REG_P instead of MODE_XI for upper 16 vector
registers.
(*movti_internal): Likewise.

gcc/testsuite/

PR target/89229
* gcc.target/i386/pr89229-1.c: New test.
---
 gcc/config/i386/i386.md   | 22 +--
 gcc/testsuite/gcc.target/i386/pr89229-1.c | 47 +++
 2 files changed, 56 insertions(+), 13 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-1.c

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3d9141ae450..5b89e52493e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1910,7 +1910,8 @@
{
  if (get_attr_mode (insn) == MODE_V8SF)
return "vmovups\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
+ else if (EXT_REX_SSE_REG_P (operands[0])
+  || EXT_REX_SSE_REG_P (operands[1]))
return "vmovdqu32\t{%1, %0|%0, %1}";
  else
return "vmovdqu\t{%1, %0|%0, %1}";
@@ -1919,7 +1920,8 @@
{
  if (get_attr_mode (insn) == MODE_V8SF)
return "vmovaps\t{%1, %0|%0, %1}";
- else if (get_attr_mode (insn) == MODE_XI)
+ else if (EXT_REX_SSE_REG_P (operands[0])
+  || EXT_REX_SSE_REG_P (operands[1]))
return "vmovdqa32\t{%1, %0|%0, %1}";
  else
return "vmovdqa\t{%1, %0|%0, %1}";
@@ -1933,11 +1935,7 @@
(set_attr "type" "sselog1,sselog1,ssemov,ssemov")
(set_attr "prefix" "vex")
(set (attr "mode")
-   (cond [(and (not (match_test

Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE

2019-02-11 Thread graham stott via gcc-patches

All these patches from HJL have no testcases. Are they even sutable for gcc 9 
at this stage

 Original message 
From: Uros Bizjak  
Date: 11/02/2019  12:51  (GMT+00:00) 
To: "H.J. Lu"  
Cc: GCC Patches  
Subject: Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE 

On Mon, Feb 11, 2019 at 1:26 PM H.J. Lu  wrote:
>
> On Sun, Feb 10, 2019 at 11:25 PM Uros Bizjak  wrote:
> >
> > On Mon, Feb 11, 2019 at 2:04 AM H.J. Lu  wrote:
> > >
> > > On Sun, Feb 10, 2019 at 1:49 PM Uros Bizjak  wrote:
> > > >
> > > > On Sun, Feb 10, 2019 at 10:45 PM Uros Bizjak  wrote:
> > > >
> > > > > > > > +  [(const_int 0)]
> > > > > > > > +{
> > > > > > > > +  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
> > > > > > > > +  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > > > +  rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > > > +  emit_insn (insn);
> > > > > > > > +  DONE;
> > > > > > >
> > > > > > > Please write this simple RTX explicitly in the place of 
> > > > > > > (const_int 0) above.
> > > > > >
> > > > > > rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > >
> > > > > > is easy.   How do I write
> > > > > >
> > > > > > rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > >
> > > > > > in place of  (const_int 0)?
> > > > >
> > > > >   [(set (match_dup 2)
> > > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > > >
> > > > > with
> > > > >
> > > > > "operands[2] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > > >
> > > > > or even better:
> > > > >
> > > > > "operands[2] = gen_lowpart (V4SImode, operands[0]);"
> > > > >
> > > > > in the preparation statement.
> > > >
> > > > Even shorter is
> > > >
> > > > "operands[0] = gen_lowpart (V4SImode, operands[0]);"
> > > >
> > > > and use (match_dup 0) instead of (match_dup 2) in the RTX.
> > > >
> > > > There is plenty of examples throughout sse.md.
> > > >
> > >
> > > This works:
> > >
> > > (define_insn_and_split "*vec_dupv2si"
> > >   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > > (vec_duplicate:V2SI
> > >   (match_operand:SI 1 "register_operand" "0,0,Yv")))]
> > >   "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > >   "@
> > >    punpckldq\t%0, %0
> > >    #
> > >    #"
> > >   "TARGET_MMX_WITH_SSE && reload_completed"
> > >   [(set (match_dup 0)
> > > (vec_duplicate:V4SI (match_dup 1)))]
> > >   "operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > >   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > >    (set_attr "type" "mmxcvt,ssemov,ssemov")
> > >    (set_attr "mode" "DI,TI,TI")])
> >
> > If it works, then gen_lowpart is preferred due to extra checks.
> > However, it would result in a paradoxical subreg, so I wonder if these
> > extra checks allow this transformation.
>
> gen_lowpart dosn't work:

Ah, we need lowpart_subreg after reload.

Uros.

Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE

2019-02-11 Thread H.J. Lu

In Mon, Feb 11, 2019 at 4:51 AM Uros Bizjak  wrote:
>
> On Mon, Feb 11, 2019 at 1:26 PM H.J. Lu  wrote:
> >
> > On Sun, Feb 10, 2019 at 11:25 PM Uros Bizjak  wrote:
> > >
> > > On Mon, Feb 11, 2019 at 2:04 AM H.J. Lu  wrote:
> > > >
> > > > On Sun, Feb 10, 2019 at 1:49 PM Uros Bizjak  wrote:
> > > > >
> > > > > On Sun, Feb 10, 2019 at 10:45 PM Uros Bizjak  
> > > > > wrote:
> > > > >
> > > > > > > > > +  [(const_int 0)]
> > > > > > > > > +{
> > > > > > > > > +  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
> > > > > > > > > +  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > > > > +  rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > > > > +  emit_insn (insn);
> > > > > > > > > +  DONE;
> > > > > > > >
> > > > > > > > Please write this simple RTX explicitly in the place of 
> > > > > > > > (const_int 0) above.
> > > > > > >
> > > > > > > rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > >
> > > > > > > is easy.   How do I write
> > > > > > >
> > > > > > > rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > >
> > > > > > > in place of  (const_int 0)?
> > > > > >
> > > > > >   [(set (match_dup 2)
> > > > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > > > >
> > > > > > with
> > > > > >
> > > > > > "operands[2] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > > > >
> > > > > > or even better:
> > > > > >
> > > > > > "operands[2] = gen_lowpart (V4SImode, operands[0]);"
> > > > > >
> > > > > > in the preparation statement.
> > > > >
> > > > > Even shorter is
> > > > >
> > > > > "operands[0] = gen_lowpart (V4SImode, operands[0]);"
> > > > >
> > > > > and use (match_dup 0) instead of (match_dup 2) in the RTX.
> > > > >
> > > > > There is plenty of examples throughout sse.md.
> > > > >
> > > >
> > > > This works:
> > > >
> > > > (define_insn_and_split "*vec_dupv2si"
> > > >   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > > > (vec_duplicate:V2SI
> > > >   (match_operand:SI 1 "register_operand" "0,0,Yv")))]
> > > >   "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > > >   "@
> > > >punpckldq\t%0, %0
> > > >#
> > > >#"
> > > >   "TARGET_MMX_WITH_SSE && reload_completed"
> > > >   [(set (match_dup 0)
> > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > >   "operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > >   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > > >(set_attr "type" "mmxcvt,ssemov,ssemov")
> > > >(set_attr "mode" "DI,TI,TI")])
> > >
> > > If it works, then gen_lowpart is preferred due to extra checks.
> > > However, it would result in a paradoxical subreg, so I wonder if these
> > > extra checks allow this transformation.
> >
> > gen_lowpart dosn't work:
>
> Ah, we need lowpart_subreg after reload.
>
> Uros.

 "operands[0] = lowpart_subreg (V4SImode, operands[0],
 GET_MODE (operands[0]));"

works.

-- 
H.J.

Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE

2019-02-11 Thread H.J. Lu

On Mon, Feb 11, 2019 at 5:11 AM graham stott
 wrote:
>
> All these patches from HJL have no testcases. Are they even sutable for gcc 9 
> at this stage

All my changes are covered by

https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00632.html

>  Original message 
> From: Uros Bizjak 
> Date: 11/02/2019 12:51 (GMT+00:00)
> To: "H.J. Lu" 
> Cc: GCC Patches 
> Subject: Re: [PATCH 12/43] i386: Emulate MMX vec_dupv2si with SSE
>
> On Mon, Feb 11, 2019 at 1:26 PM H.J. Lu  wrote:
> >
> > On Sun, Feb 10, 2019 at 11:25 PM Uros Bizjak  wrote:
> > >
> > > On Mon, Feb 11, 2019 at 2:04 AM H.J. Lu  wrote:
> > > >
> > > > On Sun, Feb 10, 2019 at 1:49 PM Uros Bizjak  wrote:
> > > > >
> > > > > On Sun, Feb 10, 2019 at 10:45 PM Uros Bizjak  
> > > > > wrote:
> > > > >
> > > > > > > > > +  [(const_int 0)]
> > > > > > > > > +{
> > > > > > > > > +  /* Emulate MMX vec_dupv2si with SSE vec_dupv4si.  */
> > > > > > > > > +  rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > > > > +  rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > > > > +  emit_insn (insn);
> > > > > > > > > +  DONE;
> > > > > > > >
> > > > > > > > Please write this simple RTX explicitly in the place of 
> > > > > > > > (const_int 0) above.
> > > > > > >
> > > > > > > rtx insn = gen_vec_dupv4si (op0, operands[1]);
> > > > > > >
> > > > > > > is easy.   How do I write
> > > > > > >
> > > > > > > rtx op0 = gen_rtx_REG (V4SImode, REGNO (operands[0]));
> > > > > > >
> > > > > > > in place of  (const_int 0)?
> > > > > >
> > > > > >   [(set (match_dup 2)
> > > > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > > > >
> > > > > > with
> > > > > >
> > > > > > "operands[2] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > > > >
> > > > > > or even better:
> > > > > >
> > > > > > "operands[2] = gen_lowpart (V4SImode, operands[0]);"
> > > > > >
> > > > > > in the preparation statement.
> > > > >
> > > > > Even shorter is
> > > > >
> > > > > "operands[0] = gen_lowpart (V4SImode, operands[0]);"
> > > > >
> > > > > and use (match_dup 0) instead of (match_dup 2) in the RTX.
> > > > >
> > > > > There is plenty of examples throughout sse.md.
> > > > >
> > > >
> > > > This works:
> > > >
> > > > (define_insn_and_split "*vec_dupv2si"
> > > >   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > > > (vec_duplicate:V2SI
> > > >   (match_operand:SI 1 "register_operand" "0,0,Yv")))]
> > > >   "TARGET_MMX || TARGET_MMX_WITH_SSE"
> > > >   "@
> > > >punpckldq\t%0, %0
> > > >#
> > > >#"
> > > >   "TARGET_MMX_WITH_SSE && reload_completed"
> > > >   [(set (match_dup 0)
> > > > (vec_duplicate:V4SI (match_dup 1)))]
> > > >   "operands[0] = gen_rtx_REG (V4SImode, REGNO (operands[0]));"
> > > >   [(set_attr "mmx_isa" "native,x64_noavx,x64_avx")
> > > >(set_attr "type" "mmxcvt,ssemov,ssemov")
> > > >(set_attr "mode" "DI,TI,TI")])
> > >
> > > If it works, then gen_lowpart is preferred due to extra checks.
> > > However, it would result in a paradoxical subreg, so I wonder if these
> > > extra checks allow this transformation.
> >
> > gen_lowpart dosn't work:
>
> Ah, we need lowpart_subreg after reload.
>
> Uros.



-- 
H.J.

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread Uros Bizjak

On Mon, Feb 11, 2019 at 2:10 PM H.J. Lu  wrote:
>
> On Sat, Feb 09, 2019 at 02:39:30PM +0100, Jakub Jelinek wrote:
> > On Sat, Feb 09, 2019 at 01:22:30PM +0100, Jakub Jelinek wrote:
> > > On Sat, Feb 09, 2019 at 04:11:43AM -0800, H.J. Lu wrote:
> > > > I believe all usages of
> > > >
> > > > (ior (match_operand 0 "ext_sse_reg_operand")
> > > >   (match_operand 1 "ext_sse_reg_operand"))
> > > >
> > > > should be checked.  I am not sure if they should be there at all.
> > >
> > > E.g. in i386.md all the other spots look fine, because {DI,SI,DF,SF}mode
> > > is allowed in ext sse regs even with -mavx512f.  And sse.md doesn't use 
> > > this
> > > at all.  What I'm wondering is if we need the sse.md (*mov_internal)
> > > code I've cited earlier, doing bootstrap/regtest now with gcc_unreachable 
> > > in
> > > there (and in *mov{o,x}i_internal* for MODE_XI too) too see if it ever
> > > triggers.
> >
> > The following didn't ICE on anything, which is not a proof, but given that
> > hard_regno_mode_ok should return false for ext_sse_reg_operand regs for
> > avx512f && !avx512vl, it matches my expectations, on the other hand, it was
> > a normal defaults bootstrap, don't have a knl which might be best for this
> > to test -mavx512f -mno-avx512vl on everything.
> > So perhaps we can also nuke the large if from mov_internal.
> >
> > --- gcc/config/i386/i386.md.jj2019-02-09 12:35:57.971475641 +0100
> > +++ gcc/config/i386/i386.md   2019-02-09 12:37:40.776802962 +0100
> > @@ -1905,6 +1905,7 @@ (define_insn "*movoi_internal_avx"
> >return standard_sse_constant_opcode (insn, operands);
> >
> >  case TYPE_SSEMOV:
> > +  gcc_assert (get_attr_mode (insn) != MODE_XI);
> >if (misaligned_operand (operands[0], OImode)
> > || misaligned_operand (operands[1], OImode))
> >   {
> > @@ -1970,6 +1971,7 @@ (define_insn "*movti_internal"
> >  case TYPE_SSEMOV:
> >/* TDmode values are passed as TImode on the stack.  Moving them
> >to stack may result in unaligned memory access.  */
> > +  gcc_assert (get_attr_mode (insn) != MODE_XI);
> >if (misaligned_operand (operands[0], TImode)
> > || misaligned_operand (operands[1], TImode))
> >   {
> > --- gcc/config/i386/sse.md.jj 2019-01-28 21:57:39.301110220 +0100
> > +++ gcc/config/i386/sse.md2019-02-09 12:36:45.863696416 +0100
> > @@ -989,6 +989,7 @@ (define_insn "mov_internal"
> > && (EXT_REX_SSE_REG_P (operands[0])
> > || EXT_REX_SSE_REG_P (operands[1])))
> >   {
> > +   gcc_unreachable ();
> > if (memory_operand (operands[0], mode))
> >   {
> > if ( == 32)
> >
>
> Here is the updated patch to remove ext_sse_reg_operand check with a
> testcase.
>
> OK for trunk?

No. As said, please correctly set mode to XImode in mode attribute calculation.

Uros.

> Thanks.
>
> H.J.
> ---
> Since hard_regno_mode_ok only allows xmm16-xmm31 in OImode or TImode
> with TARGET_AVX512VL:
>
>   /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
>   if (TARGET_AVX512VL
>   && (mode == OImode
>   || mode == TImode
>   || VALID_AVX256_REG_MODE (mode)
>   || VALID_AVX512VL_128_REG_MODE (mode)))
> return true;
>
>   /* xmm16-xmm31 are only available for AVX-512.  */
>   if (EXT_REX_SSE_REGNO_P (regno))
> return false;
>
> there is no need to check ext_sse_reg_operand in *movoi_internal_avx nor
> *movti_internal.  Instead, we should check EXT_REX_SSE_REG_P for upper 16
> vector registers.
>
> 2019-02-11  H.J. Lu  
> Jakub Jelinek  
>
> gcc/
>
> PR target/89229
> * config/i386/i386.md (*movoi_internal_avx): Check
> EXT_REX_SSE_REG_P instead of MODE_XI for upper 16 vector
> registers.
> (*movti_internal): Likewise.
>
> gcc/testsuite/
>
> PR target/89229
> * gcc.target/i386/pr89229-1.c: New test.
> ---
>  gcc/config/i386/i386.md   | 22 +--
>  gcc/testsuite/gcc.target/i386/pr89229-1.c | 47 +++
>  2 files changed, 56 insertions(+), 13 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr89229-1.c
>
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 3d9141ae450..5b89e52493e 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -1910,7 +1910,8 @@
> {
>   if (get_attr_mode (insn) == MODE_V8SF)
> return "vmovups\t{%1, %0|%0, %1}";
> - else if (get_attr_mode (insn) == MODE_XI)
> + else if (EXT_REX_SSE_REG_P (operands[0])
> +  || EXT_REX_SSE_REG_P (operands[1]))
> return "vmovdqu32\t{%1, %0|%0, %1}";
>   else
> return "vmovdqu\t{%1, %0|%0, %1}";
> @@ -1919,7 +1920,8 @@
> {
>   if (get_attr_mode (insn) == MODE_V8SF)
> return "vmovaps\t{%1, %0|%0, %1}";
> - else if (get_attr_mode (insn) == MODE_XI)
> +

Re: [rs6000] 64-bit integer loads/stores and FP instructions

2019-02-11 Thread Segher Boessenkool

On Mon, Feb 11, 2019 at 10:07:44AM +0100, Eric Botcazou wrote:
> > You make an arbitrary distinction between certain CPUs and duplicate
> > patterns based on that.
> 
> Sure, somewhat arbitrary, but that's a proof of concept and IMO better than 
> treating every processor the same way.  The alternative would be to 
> complicate 
> further the existing patterns by means of the "enabled" attribute or 
> somesuch, 
> but I can try it too.

No, we should allow both integer and floating point insns for integer stores
always.  We just get the cost estimates slightly wrong now, apparently.


Segher

Re: [PATCH] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Bill Schmidt

On 2/11/19 6:43 AM, Segher Boessenkool wrote:
> On Sun, Feb 10, 2019 at 08:42:42PM -0600, Bill Schmidt wrote:
>> On 2/10/19 4:05 PM, Segher Boessenkool wrote:
>>> On Sun, Feb 10, 2019 at 10:10:02AM -0600, Bill Schmidt wrote:
 I've added executable tests for both shift-right algebraic and shift-right 
 logical.
 Both fail prior to applying the patch, and work correctly afterwards.
>>> Please add a test for left shifts, as well?
>> Can do.  I verified that left shifts were not broken and figured a test case
>> had been added then, but have not checked.  Good to test this particular
>> scenario, though.
> Well you actually added code for left shifts, too ;-)

Nope. :-)  It just looks like it because of the way the diff hunks come out.
The changes are to shift-right-logical and shift-right-algebraic.  But it
still doesn't hurt to add the test.

>
 2019-02-08  Bill Schmidt  

* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Shift-right
and shift-left vector built-ins need to include a TRUNC_MOD_EXPR
for correct semantics.  Also, don't expand a vector-splat if there
is a type mismatch; let the back end handle it.
>>> Does it always result in just the shift instruction now?  Does the modulo
>>> ever remain?  (Maybe at -O0?)  Modulo is hugely expensive; it is always
>>> modulo by a power of two, so a simple bitmask, so maybe write that directly
>>> instead?
>> We always get the shift.  For -mcpu=power8, we always load the mask from
>> memory rather than generating the vspltisb, which is not ideal code 
>> generation,
>> but is at least correct.
> _Just_ the shift, I meant.  I know we load a constant from memory, for
> constants; I am worried about non-constants, do those get a modulo, or
> just a mask.  Also at -O0; if we get an actual division at -O0, there
> probably are other cases where it isn't optimised away as well.

Even at -O1 this is all folded away in GIMPLE:

test_sradi_4 (vi64_t a)
{
  vi64_t result;

   [local count: 1073741824]:
  result_3 = a_2(D) >> 4;
  return result_3;

}

At -O0 (if I hand-inline everything myself to avoid errors), we scalarize
the modulo/masking operation into a rldicl for each doubleword.  I really
don't see any reason to change the code.

>
>> For -mcpu=power9, we get close, but have some bad register allocation and
>> an unnecessary extend:
>>
>> xxspltib 0,4   <- why not just xxspltib 32,4?
>> xxlor 32,0,0   <- wasted copy
> Yeah, huh.  Where does that come from...  I blame splitters after reload.

This only happens at -O2 and up, FWIW.  At -O1 we allocate the registers
reasonably.

>
>> vextsb2d 0,0   <- unnecessary due to vsrad semantics
>> vsrad 2,2,0
>>
>> Again, this is at least correct.  We have more work to do to produce the
>> most efficient code, but we have PR89213 open for that.
> Yes.  But I'd like to see at least a mask instead of a modulo.

Again, there is no modulo by the time we exit GIMPLE.  Our efficiency
failings are all in the back end.

>
 +  /* It's sometimes useful to use one of these to build a
 + splat for V2DImode, since the upper bits will be ignored.
 + Don't fold if we detect that situation, as we end up
 + losing the splat instruction in this case.  */
 +  if (size != TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (lhs)
 +return false;
>>> This isn't really detecting that situation...  Instead, it doesn't fold
>>> whenever the size of the splatted elements isn't the same as the size of
>>> the elts of the target vector.  That's probably perfectly safe, but please
>>> spell it out.  It's fine to mention the motivating case, of course.
>> Yep, will correct.  Actually, as I look back at my notes, I believe that this
>> change is not necessary after all (same code generated with and without it).
>> I'll verify.
> Ha, that would be nice :-)

Confirmed, BTW.

>
>>> Testing for vsx_hw but not enabling vsx is probably wrong, too.
>> Weird.  I just tried adding -mvsx
> Does it _need_ VSX anyway?  Are these builtins defined without it, too?

Yes (vector long long / V2DImode requires VSX).

>
>> and I get this peculiar error we've seen
>> before about AMD graphics card offloading:
>>
>> spawn -ignore SIGHUP /home/wschmidt/gcc/build/gcc-mainline-test2/gcc/xgcc 
>> -B/home/wschmidt/gcc/build/gcc-mainline-test2/gcc/ 
>> /home/wschmidt/gcc/gcc-mainline-t\
>> est2/gcc/testsuite/gcc.target/powerpc/srad-modulo.c 
>> -fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers 
>> -fdiagnostics-color=never -O2 -mvsx -lm -o \
>> ./srad-modulo.exe^M
>> ^[[01m^[[Kcc1:^[[m^[[K ^[[01;31m^[[Kerror: ^[[m^[[Kargument to 
>> '^[[01m^[[K-O^[[m^[[K' should be a non-negative integer, 
>> '^[[01m^[[Kg^[[m^[[K', '^[[01m^[[Ks^[[\
>> m^[[K' or '^[[01m^[[Kfast^[[m^[[K'^M
>> compiler exited with status 1
> Huh, why do you get colour escapes at all?  I thought the testsuite used
> GCC_COLORS= to get rid of that nonsense.  It's quite unreadable

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread H.J. Lu

On Mon, Feb 11, 2019 at 5:15 AM Uros Bizjak  wrote:
>
> On Mon, Feb 11, 2019 at 2:10 PM H.J. Lu  wrote:
> >
> > On Sat, Feb 09, 2019 at 02:39:30PM +0100, Jakub Jelinek wrote:
> > > On Sat, Feb 09, 2019 at 01:22:30PM +0100, Jakub Jelinek wrote:
> > > > On Sat, Feb 09, 2019 at 04:11:43AM -0800, H.J. Lu wrote:
> > > > > I believe all usages of
> > > > >
> > > > > (ior (match_operand 0 "ext_sse_reg_operand")
> > > > >   (match_operand 1 "ext_sse_reg_operand"))
> > > > >
> > > > > should be checked.  I am not sure if they should be there at all.
> > > >
> > > > E.g. in i386.md all the other spots look fine, because {DI,SI,DF,SF}mode
> > > > is allowed in ext sse regs even with -mavx512f.  And sse.md doesn't use 
> > > > this
> > > > at all.  What I'm wondering is if we need the sse.md 
> > > > (*mov_internal)
> > > > code I've cited earlier, doing bootstrap/regtest now with 
> > > > gcc_unreachable in
> > > > there (and in *mov{o,x}i_internal* for MODE_XI too) too see if it ever
> > > > triggers.
> > >
> > > The following didn't ICE on anything, which is not a proof, but given that
> > > hard_regno_mode_ok should return false for ext_sse_reg_operand regs for
> > > avx512f && !avx512vl, it matches my expectations, on the other hand, it 
> > > was
> > > a normal defaults bootstrap, don't have a knl which might be best for this
> > > to test -mavx512f -mno-avx512vl on everything.
> > > So perhaps we can also nuke the large if from mov_internal.
> > >
> > > --- gcc/config/i386/i386.md.jj2019-02-09 12:35:57.971475641 +0100
> > > +++ gcc/config/i386/i386.md   2019-02-09 12:37:40.776802962 +0100
> > > @@ -1905,6 +1905,7 @@ (define_insn "*movoi_internal_avx"
> > >return standard_sse_constant_opcode (insn, operands);
> > >
> > >  case TYPE_SSEMOV:
> > > +  gcc_assert (get_attr_mode (insn) != MODE_XI);
> > >if (misaligned_operand (operands[0], OImode)
> > > || misaligned_operand (operands[1], OImode))
> > >   {
> > > @@ -1970,6 +1971,7 @@ (define_insn "*movti_internal"
> > >  case TYPE_SSEMOV:
> > >/* TDmode values are passed as TImode on the stack.  Moving them
> > >to stack may result in unaligned memory access.  */
> > > +  gcc_assert (get_attr_mode (insn) != MODE_XI);
> > >if (misaligned_operand (operands[0], TImode)
> > > || misaligned_operand (operands[1], TImode))
> > >   {
> > > --- gcc/config/i386/sse.md.jj 2019-01-28 21:57:39.301110220 +0100
> > > +++ gcc/config/i386/sse.md2019-02-09 12:36:45.863696416 +0100
> > > @@ -989,6 +989,7 @@ (define_insn "mov_internal"
> > > && (EXT_REX_SSE_REG_P (operands[0])
> > > || EXT_REX_SSE_REG_P (operands[1])))
> > >   {
> > > +   gcc_unreachable ();
> > > if (memory_operand (operands[0], mode))
> > >   {
> > > if ( == 32)
> > >
> >
> > Here is the updated patch to remove ext_sse_reg_operand check with a
> > testcase.
> >
> > OK for trunk?
>
> No. As said, please correctly set mode to XImode in mode attribute 
> calculation.

There is

 switch (get_attr_type (insn))
{
case TYPE_SSELOG1:
  return standard_sse_constant_opcode (insn, operands);

standard_sse_constant_opcode has

else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
{
  enum attr_mode insn_mode = get_attr_mode (insn);

  switch (insn_mode)
{
case MODE_XI:
case MODE_V8DF:
case MODE_V16SF:
  gcc_assert (TARGET_AVX512F);
  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";

What mode should be used to set %xmm23 to -1 with AVX512VL?  What mode
should be used to load %xmm23 with AVX512VL? There is no need to
check ext_sse_reg_operand here the same as in

(define_insn "mov_internal"
  [(set (match_operand:VMOVE 0 "nonimmediate_operand"
 "=v,v ,v ,m")
(match_operand:VMOVE 1 "nonimmediate_or_sse_const_operand"
 " C,BC,vm,v"))]
  "TARGET_SSE
   && (register_operand (operands[0], mode)
   || register_operand (operands[1], mode))"
{

> Uros.
>
> > Thanks.
> >
> > H.J.
> > ---
> > Since hard_regno_mode_ok only allows xmm16-xmm31 in OImode or TImode
> > with TARGET_AVX512VL:
> >
> >   /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
> >   if (TARGET_AVX512VL
> >   && (mode == OImode
> >   || mode == TImode
> >   || VALID_AVX256_REG_MODE (mode)
> >   || VALID_AVX512VL_128_REG_MODE (mode)))
> > return true;
> >
> >   /* xmm16-xmm31 are only available for AVX-512.  */
> >   if (EXT_REX_SSE_REGNO_P (regno))
> > return false;
> >
> > there is no need to check ext_sse_reg_operand in *movoi_internal_avx nor
> > *movti_internal.  Instead, we should check EXT_REX_SSE_REG_P for upper 16
> > vector registers.
> >
> > 2019-02-11  H.J. Lu  
> > Jakub Jelinek  
> >
> > gcc/
> >
> > PR target/89229
> > * config/i386/i386.md (*movoi_internal_avx): Check
> >

Re: [PATCH] Construct ipa_reduced_postorder always for overwritable (PR ipa/89009).

2019-02-11 Thread Martin Jambor

Hi,

On Mon, Feb 11 2019, Jan Hubicka wrote:
>> Hi.
>> 
>> IPA pure const should always construct ipa_reduced_postorder with
>> possibility to cross AVAIL_INTERPOSABLE boundary. The pass itself
>> can then properly stop propagation on these symbols.
>> 
>> The patch is pre-approved by Honza.
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>> 
>> Thanks,
>> Martin
>> 
>> gcc/ChangeLog:
>> 
>> 2019-02-08  Martin Liska  
>> 
>>  PR ipa/89009
>>  * ipa-cp.c (build_toporder_info): Remove usage of a param.
>>  * ipa-inline.c (inline_small_functions): Likewise.
>>  * ipa-pure-const.c (propagate_pure_const): Likewise.
>>  (propagate_nothrow): Likewise.
>>  * ipa-reference.c (propagate): Likewise.
>>  * ipa-utils.c (struct searchc_env): Remove unused field.
>>  (searchc): Always search across AVAIL_INTERPOSABLE.
>>  (ipa_reduced_postorder): Always allow AVAIL_INTERPOSABLE as
>>  the only called IPA pure const can properly not propagate
>>  across interposable boundary.
>>  * ipa-utils.h (ipa_reduced_postorder): Remove param.
>> @@ -105,7 +104,7 @@ searchc (struct searchc_env* env, struct cgraph_node *v,
>>  
>>if (w->aux
>>&& (avail > AVAIL_INTERPOSABLE
>> -  || (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
>> +  || avail == AVAIL_INTERPOSABLE))

This looks a tad too mechanical, surely

   (avail > AVAIL_INTERPOSABLE || avail == AVAIL_INTERPOSABLE)

can be simplified into just one comparison :-)

>>  {
>>w_info = (struct ipa_dfs_info *) w->aux;
>>if (w_info->new_node)
>
> This optimizes >= AVAIL_INTERPOSABLE then, but for ipa-cp, ipa-inline,
> ipa-pure-const we do not want interposable calls to close SCC regions
> (because we can not propagate here). ipa-reference is more subtle -
> interposable calls close SCC regions IFF they are declared leaf.
>
> Can you, please, update ignore_edge_p of indiviual optimization passes
> this way? (I.e. for all but ipa-reference add aval <= AVAIL_INTERPOSABLE
> check for the availability of the target function and for ipa-reference
> test also leaf attribute?). 

At least IPA-CP does not have any ignore_edge_p... does it really need
any?  Such node can incur an extra iteration of the propagator over an
entire SCC, that is true, but is it worth adding a callback?  Are SCCs
with an interposable node common?

>
> In all those cases we also want to check that given optimization pass is
> enabled for both caller and callee.  In other cases we do not want to
> consider them part of SCCs as well.  It may make sense to do this as one
> change or I can do that incrementally.

Likewise.

Martin

[PATCH, v2] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Bill Schmidt

Hi!

We had a problem report for code attempting to implement a vector right-shift 
for a
vector long long (V2DImode) type.  The programmer noted that we don't have a 
vector
splat-immediate for this mode, but cleverly realized he could use a vector char
splat-immediate since only the lower 6 bits of the shift count are read.  This 
is a
documented feature of both the vector shift built-ins and the underlying 
instruction.

Starting with GCC 8, the vector shifts are expanded early in 
rs6000_gimple_fold_builtin.
However, the GIMPLE folding does not currently perform the required 
TRUNC_MOD_EXPR to
implement the built-in semantics.  It appears that this was caught earlier for 
vector
shift-left and fixed, but the same problem was not fixed for vector shift-right.
This patch fixes that.

I've added executable tests for both shift-right algebraic and shift-right 
logical.
Both fail prior to applying the patch, and work correctly afterwards.

Minor differences from v1 of this patch:
 * Deleted code to defer some vector splats to expand, which was unnecessary.
 * Removed powerpc64*-*-* target selector, added -mvsx option, removed extra
   braces from dg-options directive.
 * Added __attribute__ ((noinline)) to test_s*di_4 functions.
 * Corrected typoed function names.
 * Changed test case names.
 * Added vec-sld-modulo.c as requested (works both before and after this patch).

Bootstrapped and tested on powerpc64le-unknown-linux-gnu with no regressions.  
Is
this okay for trunk, and for GCC 8.3 if there is no fallout by the end of the
week?

Thanks,
Bill


[gcc]

2019-02-11  Bill Schmidt  

* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Shift-right
and shift-left vector built-ins need to include a TRUNC_MOD_EXPR
for correct semantics.

[gcc/testsuite]

2019-02-11  Bill Schmidt  

* gcc.target/powerpc/vec-sld-modulo.c: New.
* gcc.target/powerpc/vec-srad-modulo.c: New.
* gcc.target/powerpc/vec-srd-modulo.c: New.


Index: gcc/config/rs6000/rs6000.c
===
--- gcc/config/rs6000/rs6000.c  (revision 268707)
+++ gcc/config/rs6000/rs6000.c  (working copy)
@@ -15735,13 +15735,37 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *
 case ALTIVEC_BUILTIN_VSRAH:
 case ALTIVEC_BUILTIN_VSRAW:
 case P8V_BUILTIN_VSRAD:
-  arg0 = gimple_call_arg (stmt, 0);
-  arg1 = gimple_call_arg (stmt, 1);
-  lhs = gimple_call_lhs (stmt);
-  g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, arg1);
-  gimple_set_location (g, gimple_location (stmt));
-  gsi_replace (gsi, g, true);
-  return true;
+  {
+   arg0 = gimple_call_arg (stmt, 0);
+   arg1 = gimple_call_arg (stmt, 1);
+   lhs = gimple_call_lhs (stmt);
+   tree arg1_type = TREE_TYPE (arg1);
+   tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
+   tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
+   location_t loc = gimple_location (stmt);
+   /* Force arg1 into the range valid matching the arg0 type.  */
+   /* Build a vector consisting of the max valid bit-size values.  */
+   int n_elts = VECTOR_CST_NELTS (arg1);
+   tree element_size = build_int_cst (unsigned_element_type,
+  128 / n_elts);
+   tree_vector_builder elts (unsigned_arg1_type, n_elts, 1);
+   for (int i = 0; i < n_elts; i++)
+ elts.safe_push (element_size);
+   tree modulo_tree = elts.build ();
+   /* Modulo the provided shift value against that vector.  */
+   gimple_seq stmts = NULL;
+   tree unsigned_arg1 = gimple_build (&stmts, VIEW_CONVERT_EXPR,
+  unsigned_arg1_type, arg1);
+   tree new_arg1 = gimple_build (&stmts, loc, TRUNC_MOD_EXPR,
+ unsigned_arg1_type, unsigned_arg1,
+ modulo_tree);
+   gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+   /* And finally, do the shift.  */
+   g = gimple_build_assign (lhs, RSHIFT_EXPR, arg0, new_arg1);
+   gimple_set_location (g, loc);
+   gsi_replace (gsi, g, true);
+   return true;
+  }
/* Flavors of vector shift left.
   builtin_altivec_vsl{b,h,w} -> vsl{b,h,w}.  */
 case ALTIVEC_BUILTIN_VSLB:
@@ -15795,14 +15819,34 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
lhs = gimple_call_lhs (stmt);
+   tree arg1_type = TREE_TYPE (arg1);
+   tree unsigned_arg1_type = unsigned_type_for (TREE_TYPE (arg1));
+   tree unsigned_element_type = unsigned_type_for (TREE_TYPE (arg1_type));
+   location_t loc = gimple_location (stmt);
gimple_seq stmts = NULL;
/* Convert arg0 to unsigned.  */
tree arg0_unsigned
  = gimple_build (&stmts, VIEW_CONVERT_EXPR,
  unsigne

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 02:15:18PM +0100, Uros Bizjak wrote:
> > Here is the updated patch to remove ext_sse_reg_operand check with a
> > testcase.
> >
> > OK for trunk?
> 
> No. As said, please correctly set mode to XImode in mode attribute 
> calculation.

The instructions in question are
vmovdqu32 mem, %{x,y}mm{1[6-9],2[0-9],3[01]}
vmovdqu32 %{x,y}mm{1[6-9],2[0-9],3[01]}, mem
vmovdqa32 mem, %{x,y}mm{1[6-9],2[0-9],3[01]}
vmovdqa32 %{x,y}mm{1[6-9],2[0-9],3[01]}, mem
vmovdqa32 %{x,y}mm{[0-9],[12][0-9],3[01]}, %{x,y}mm{1[6-9],2[0-9],3[01]}
vmovdqa32 %{x,y}mm{1[6-9],2[0-9],3[01]}, %{x,y}mm{[0-9],[12][0-9],3[01]}
Why should those instructions be XImode?  They have 16 or 32 byte operands,
never 64 byte.

Using EXT_REX_SSE_REG_P is what *movdi_internal uses too:
case MODE_TI:
  /* Handle AVX512 registers set.  */
  if (EXT_REX_SSE_REG_P (operands[0])
  || EXT_REX_SSE_REG_P (operands[1]))
return "vmovdqa64\t{%1, %0|%0, %1}";
  return "%vmovdqa\t{%1, %0|%0, %1}";
Sure, in that case it is not MODE_DI, but MODE_TI, because the move is
actually 128-bit, not 64-bit, but we do not claim it is 512-bit.

*movsi_internal is incorrect (and inefficient):
case MODE_TI:
  return "%vmovdqa\t{%1, %0|%0, %1}";
case MODE_XI:
  return "vmovdqa32\t{%g1, %g0|%g0, %g1}";
...
(eq_attr "alternative" "8,9")
  (cond [(ior (match_operand 0 "ext_sse_reg_operand")
  (match_operand 1 "ext_sse_reg_operand"))
   (const_string "XI")
 (ior (not (match_test "TARGET_SSE2"))
  (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL"))
   (const_string "V4SF")
 (match_test "TARGET_AVX")
   (const_string "TI")
 (match_test "optimize_function_for_size_p (cfun)")
   (const_string "V4SF")
]
(const_string "TI"))
In my reading, for (set (reg:SI xmm16) (reg:SI xmm17)) the above will
emit vmovdqa32  %zmm17, %zmm16 even for -mavx512vl, which looks wrong.
So, I'd suggest (and (not (match_test "TARGET_AVX512VL"))
 (ior (match_operand 0 "ext_sse_reg_operand")
  (match_operand 1 "ext_sse_reg_operand"))
   (const_string "XI")

I see other wierdo stuff e.g. in *movdf_internal:
   /* movaps is one byte shorter for non-AVX targets.  */
   (eq_attr "alternative" "13,17")
 (cond [(and (ior (not (match_test "TARGET_PREFER_AVX256"))
  (not (match_test "TARGET_AVX512VL")))
 (ior (match_operand 0 "ext_sse_reg_operand")
  (match_operand 1 "ext_sse_reg_operand")))
  (const_string "V8DF")
If !TARGET_AVX512VL and one or both of the operands is (are)
ext_sse_reg_operand, obviously MODE_V8DF needs to be used.  But doesn't the
above force use of vmovapd  %zmm16, %zmm17 even if just -mavx512vl
-mprefer-vector-width=512?  I don't see any reason not to use
vmovapd %xmm16, %xmm17 in that case.  -mprefer-vector-width=512 is not you
must use ZMM all the time, but it is fine to use even EVEX instructions with
512-bit width.  Ditto *movsf_internal.

Jakub

Re: [PATCH] Construct ipa_reduced_postorder always for overwritable (PR ipa/89009).

2019-02-11 Thread Jan Hubicka

> Hi,
> 
> On Mon, Feb 11 2019, Jan Hubicka wrote:
> >> Hi.
> >> 
> >> IPA pure const should always construct ipa_reduced_postorder with
> >> possibility to cross AVAIL_INTERPOSABLE boundary. The pass itself
> >> can then properly stop propagation on these symbols.
> >> 
> >> The patch is pre-approved by Honza.
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >> 
> >> Thanks,
> >> Martin
> >> 
> >> gcc/ChangeLog:
> >> 
> >> 2019-02-08  Martin Liska  
> >> 
> >>PR ipa/89009
> >>* ipa-cp.c (build_toporder_info): Remove usage of a param.
> >>* ipa-inline.c (inline_small_functions): Likewise.
> >>* ipa-pure-const.c (propagate_pure_const): Likewise.
> >>(propagate_nothrow): Likewise.
> >>* ipa-reference.c (propagate): Likewise.
> >>* ipa-utils.c (struct searchc_env): Remove unused field.
> >>(searchc): Always search across AVAIL_INTERPOSABLE.
> >>(ipa_reduced_postorder): Always allow AVAIL_INTERPOSABLE as
> >>the only called IPA pure const can properly not propagate
> >>across interposable boundary.
> >>* ipa-utils.h (ipa_reduced_postorder): Remove param.
> >> @@ -105,7 +104,7 @@ searchc (struct searchc_env* env, struct cgraph_node 
> >> *v,
> >>  
> >>if (w->aux
> >>  && (avail > AVAIL_INTERPOSABLE
> >> -|| (env->allow_overwritable && avail == AVAIL_INTERPOSABLE)))
> >> +|| avail == AVAIL_INTERPOSABLE))
> 
> This looks a tad too mechanical, surely
> 
>(avail > AVAIL_INTERPOSABLE || avail == AVAIL_INTERPOSABLE)
> 
> can be simplified into just one comparison :-)
> 
> >>{
> >>  w_info = (struct ipa_dfs_info *) w->aux;
> >>  if (w_info->new_node)
> >
> > This optimizes >= AVAIL_INTERPOSABLE then, but for ipa-cp, ipa-inline,
> > ipa-pure-const we do not want interposable calls to close SCC regions
> > (because we can not propagate here). ipa-reference is more subtle -
> > interposable calls close SCC regions IFF they are declared leaf.
> >
> > Can you, please, update ignore_edge_p of indiviual optimization passes
> > this way? (I.e. for all but ipa-reference add aval <= AVAIL_INTERPOSABLE
> > check for the availability of the target function and for ipa-reference
> > test also leaf attribute?). 
> 
> At least IPA-CP does not have any ignore_edge_p... does it really need
> any?  Such node can incur an extra iteration of the propagator over an

You probably want to ignore edges with no useful jump functions to get
finer SCC structure as well.

> entire SCC, that is true, but is it worth adding a callback?  Are SCCs
> with an interposable node common?

In a PIC library without visibility attributes it would be all of them.
Yep, this is mostly about quality of implementation - with SCCs bigger
than necessary code will work, just produce worse code.

Honza
> 
> >
> > In all those cases we also want to check that given optimization pass is
> > enabled for both caller and callee.  In other cases we do not want to
> > consider them part of SCCs as well.  It may make sense to do this as one
> > change or I can do that incrementally.
> 
> Likewise.
> 
> Martin

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread Uros Bizjak

On Mon, Feb 11, 2019 at 2:29 PM H.J. Lu  wrote:

> > No. As said, please correctly set mode to XImode in mode attribute 
> > calculation.
>
> There is
>
>  switch (get_attr_type (insn))
> {
> case TYPE_SSELOG1:
>   return standard_sse_constant_opcode (insn, operands);
>
> standard_sse_constant_opcode has
>
> else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
> {
>   enum attr_mode insn_mode = get_attr_mode (insn);
>
>   switch (insn_mode)
> {
> case MODE_XI:
> case MODE_V8DF:
> case MODE_V16SF:
>   gcc_assert (TARGET_AVX512F);
>   return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";

If there is something wrong with standard_sse_constant_opcode, then
fix the problem in the function itself. With your previous patch, you
introduced a regression, and the presented fix is another kludge to
fix a stack of kludges inside standard_sse_constant_opcode.

Please take your time and propose some acceptable solution that would
put some logic into const_0/const_1 handling. The situation is not OK
and your patch makes it even worse.

Uros.

Re: Make clear, when contributions will be ignored

2019-02-11 Thread Segher Boessenkool

On Mon, Feb 11, 2019 at 12:44:31PM +, Дилян Палаузов wrote:
> -- at https://www.gnu.org/software/gcc/contribute.html is written “If you do 
> not receive a response to a patch that you
> have submitted within two weeks or so, it may be a good idea to chase it by 
> sending a follow-up email to the same
> list(s).”

That is about patches.  Not about bugzilla.  Sending reminders for bugzilla
reports is useless and annoying.  Sending reminders for patches however is
necessary, the way our development works currently.  It isn't clear any
change to procedures would help at all, since the fundamental problems need
to be attacked to make any progress.  Maintainers do not have infinite time,
and there is no incentive to deal with high-cost low-profit patches.

Segher

Re: [PATCH] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Segher Boessenkool

On Mon, Feb 11, 2019 at 07:17:16AM -0600, Bill Schmidt wrote:
> At -O0 (if I hand-inline everything myself to avoid errors), we scalarize
> the modulo/masking operation into a rldicl for each doubleword.  I really
> don't see any reason to change the code.

So what does this look like at expand (at -O0)?  Is it something that
is done at gimple level, is it expand itself, is it some target thing?

> >> For -mcpu=power9, we get close, but have some bad register allocation and
> >> an unnecessary extend:
> >>
> >> xxspltib 0,4   <- why not just xxspltib 32,4?
> >> xxlor 32,0,0   <- wasted copy
> > Yeah, huh.  Where does that come from...  I blame splitters after reload.
> 
> This only happens at -O2 and up, FWIW.  At -O1 we allocate the registers
> reasonably.

Heh.

> >> Weird.  I just tried adding -mvsx
> > Does it _need_ VSX anyway?  Are these builtins defined without it, too?
> 
> Yes (vector long long / V2DImode requires VSX).

So something like

/* { dg-do run } */
/* { dg-require-effective-target vsx_hw } */
/* { dg-options "-mvsx -O2" } */

then?

> I pointed to the bugzilla in another reply -- which was "resolved" with a 
> hack.
> I consider it still broken this way...

Reopen that PR?

> I tested a revised version of the patch overnight and will submit shortly.

Thanks.


Segher

Re: Make clear, when contributions will be ignored

2019-02-11 Thread Дилян Палаузов

Hello Segher,

my question was how do you propose to proceed, so that a 
no-reminders-for-patches-are-necessary-state is reached.

There is no relation with having infinite time or dealing with high-cost 
low-profit patches.

Previously I raised the quesion, whether automating the process for sending 
reminders, is a good idea.  This saves time
of people to write reminders.

Greetings
  Дилян

On Mon, 2019-02-11 at 07:57 -0600, Segher Boessenkool wrote:
> On Mon, Feb 11, 2019 at 12:44:31PM +, Дилян Палаузов wrote:
> > -- at https://www.gnu.org/software/gcc/contribute.html is written “If you 
> > do not receive a response to a patch that you
> > have submitted within two weeks or so, it may be a good idea to chase it by 
> > sending a follow-up email to the same
> > list(s).”
> 
> That is about patches.  Not about bugzilla.  Sending reminders for bugzilla
> reports is useless and annoying.  Sending reminders for patches however is
> necessary, the way our development works currently.  It isn't clear any
> change to procedures would help at all, since the fundamental problems need
> to be attacked to make any progress.  Maintainers do not have infinite time,
> and there is no incentive to deal with high-cost low-profit patches.
> 
> 
> Segher

Re: arm access to stack slot out of allocated area

2019-02-11 Thread Olivier Hainque

Hi Wilco,

Thanks for your feedback.

> On 8 Feb 2019, at 22:35, Wilco Dijkstra  wrote:
> 
> Hi Olivier,
> 
>> Sorry, I had -mapcs-frame in mind.
> 
> That's identical to -mapcs, and equally deprecated. It was superceded 2 
> decades
> ago. -mpcs-frame bugs have been reported multiple times, including on VxWorks.

> For example https://gcc.gnu.org/bugzilla/show_bug.cgi?id=64379 suggests
> VxWorks doesn't need -mapcs-frame. And that was in 2014!

> So I think we need to push much harder on getting rid of obsolete stuff and
> avoid people encountering these nasty issues.

People are strongly encouraged to move to VxWorks7 (bpabi)
these days and we'll probably reinforce that message.

VxWorks6 is however still around and the environment (toolchain,
libraries, ...) is abi=apcs-gnu + apcs-frame, so right now we
unfortunately don't have much of a choice, and it was not working
that bad until gcc-7 in our experience.

I performed a round of testing with an aapcs compiler, out of
curiosity to double check, and the incompatibilities are indeed
visible.

On calls to printf with a "double" as the first va-argument, for
example, where the argument is passed through r2 & r3 but expected
in r1 & r2 by the target library.

apcs-frame apart, it looks like stack-checking could be affected as
well. I need to think about this some more.

Olivier

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread H.J. Lu

On Mon, Feb 11, 2019 at 5:51 AM Uros Bizjak  wrote:
>
> On Mon, Feb 11, 2019 at 2:29 PM H.J. Lu  wrote:
>
> > > No. As said, please correctly set mode to XImode in mode attribute 
> > > calculation.
> >
> > There is
> >
> >  switch (get_attr_type (insn))
> > {
> > case TYPE_SSELOG1:
> >   return standard_sse_constant_opcode (insn, operands);
> >
> > standard_sse_constant_opcode has
> >
> > else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
> > {
> >   enum attr_mode insn_mode = get_attr_mode (insn);
> >
> >   switch (insn_mode)
> > {
> > case MODE_XI:
> > case MODE_V8DF:
> > case MODE_V16SF:
> >   gcc_assert (TARGET_AVX512F);
> >   return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
>
> If there is something wrong with standard_sse_constant_opcode, then
> fix the problem in the function itself. With your previous patch, you
> introduced a regression, and the presented fix is another kludge to
> fix a stack of kludges inside standard_sse_constant_opcode.
>
> Please take your time and propose some acceptable solution that would
> put some logic into const_0/const_1 handling. The situation is not OK
> and your patch makes it even worse.
>

Let's first define what MODE_XI means in standard_sse_constant_opcode
as well as in all these mov patterns for with and without AVX512VL.   Without
a clear definition, we can't get out of this mess.

-- 
H.J.

Re: [Patch] [arm] Fix 88714, Arm LDRD/STRD peepholes

2019-02-11 Thread Matthew Malcomson

On 10/02/19 09:42, Christophe Lyon wrote:
> 
> Both this simple patch or the previous fix all the ICEs I reported, thanks.
> 
> Of course, the scan-assembler failures remain to be fixed.
> 

In the testcase I failed to account for targets that don't support arm 
mode or
targets that do not support the ldrd/strd instructions.

This patch accounts for both of these by adding some
dg-require-effective-target lines to the testcase.

This patch also adds a new effective-target procedure to check a target
supports arm ldrd/strd.
This check uses the 'r' constraint to ensure SP is not used so that it will
work for thumb mode code generation as well as arm mode.

Tested by running this testcase with cross compilers using "-march=armv5t",
"-mcpu=cortex-m3", "-mcpu-arm7tdmi", "-mcpu=cortex-a9 -march=armv5t" for 
both
arm-none-eabi and arm-none-linux-gnueabihf.
Also ran this testcase with `make check` natively.

Ok for trunk?

gcc/testsuite/ChangeLog:

2019-02-11  Matthew Malcomson  

* gcc.dg/rtl/arm/ldrd-peepholes.c: Restrict testcase.
* lib/target-supports.exp: Add procedure to check for ldrd.

diff --git a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c 
b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
index 
4c3949c0963b8482545df670c31db2d9ec0f26b3..cbb64a770f5d796250601cafe481d7c2ea13f2eb

100644
--- a/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
+++ b/gcc/testsuite/gcc.dg/rtl/arm/ldrd-peepholes.c
@@ -1,4 +1,6 @@
  /* { dg-do compile { target arm*-*-* } } */
+/* { dg-require-effective-target arm_arm_ok } */
+/* { dg-require-effective-target arm_ldrd_strd_ok } */
  /* { dg-skip-if "Ensure only targetting arm with TARGET_LDRD" { *-*-* 
} { "-mthumb" } { "" } } */
  /* { dg-options "-O3 -marm -fdump-rtl-peephole2" } */

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 
a0b4b99067f9ae225bde3b6bc719e89e1ea8e0e1..16dd018e8020fdf8e104690fed6a4e8919aa4aa1

100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4918,6 +4918,27 @@ proc check_effective_target_arm_prefer_ldrd_strd 
{ } {
  }  "-O2 -mthumb" ]
  }

+# Return true if LDRD/STRD instructions are available on this target.
+proc check_effective_target_arm_ldrd_strd_ok { } {
+if { ![check_effective_target_arm32] } {
+  return 0;
+}
+
+return [check_no_compiler_messages arm_ldrd_strd_ok object {
+  int main(void)
+  {
+__UINT64_TYPE__ a = 1, b = 10;
+__UINT64_TYPE__ *c = &b;
+// `a` will be in a valid register since it's a DImode quantity.
+asm ("ldrd %0, %1"
+ : "=r" (a)
+ : "m" (c));
+return a == 10;
+  }
+}]
+}
+
  # Return 1 if this is a PowerPC target supporting -meabi.

  proc check_effective_target_powerpc_eabi_ok { } {

Re: [PATCH] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Bill Schmidt

On 2/11/19 8:11 AM, Segher Boessenkool wrote:
> On Mon, Feb 11, 2019 at 07:17:16AM -0600, Bill Schmidt wrote:
>> At -O0 (if I hand-inline everything myself to avoid errors), we scalarize
>> the modulo/masking operation into a rldicl for each doubleword.  I really
>> don't see any reason to change the code.
> So what does this look like at expand (at -O0)?  Is it something that
> is done at gimple level, is it expand itself, is it some target thing?

It's already a mask at expand, even at -O0.  vregs dump excerpt:

(insn 13 12 14 2 (set (reg:DI 129 [ _16 ])
(vec_select:DI (reg:V2DI 126 [ _9 ])
(parallel [
(const_int 0 [0])
]))) "vec-srad-modulo.c":40:10 1223 {vsx_extract_v2di}
 (nil))
(insn 14 13 15 2 (set (reg:DI 130 [ _17 ])
(and:DI (reg:DI 129 [ _16 ])
(const_int 63 [0x3f]))) "vec-srad-modulo.c":40:10 195 {anddi3_mask}
 (nil))
(insn 15 14 16 2 (set (reg:DI 131 [ _18 ])
(vec_select:DI (reg:V2DI 126 [ _9 ])
(parallel [
(const_int 1 [0x1])
]))) "vec-srad-modulo.c":40:10 1223 {vsx_extract_v2di}
 (nil))
(insn 16 15 17 2 (set (reg:DI 132 [ _19 ])
(and:DI (reg:DI 131 [ _18 ])
(const_int 63 [0x3f]))) "vec-srad-modulo.c":40:10 195 {anddi3_mask}
 (nil))

>
 For -mcpu=power9, we get close, but have some bad register allocation and
 an unnecessary extend:

 xxspltib 0,4   <- why not just xxspltib 32,4?
 xxlor 32,0,0   <- wasted copy
>>> Yeah, huh.  Where does that come from...  I blame splitters after reload.
>> This only happens at -O2 and up, FWIW.  At -O1 we allocate the registers
>> reasonably.
> Heh.
>
 Weird.  I just tried adding -mvsx
>>> Does it _need_ VSX anyway?  Are these builtins defined without it, too?
>> Yes (vector long long / V2DImode requires VSX).
> So something like
>
> /* { dg-do run } */
> /* { dg-require-effective-target vsx_hw } */
> /* { dg-options "-mvsx -O2" } */
>
> then?

What I have now:

/* { dg-do run { target { vsx_hw } } } */  
/* { dg-options "-O2 -mvsx" } */   

>
>> I pointed to the bugzilla in another reply -- which was "resolved" with a 
>> hack.
>> I consider it still broken this way...
> Reopen that PR?

Already done. ;-)

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=88920

Bill

>
>> I tested a revised version of the patch overnight and will submit shortly.
> Thanks.
>
>
> Segher
>

[PATCH][GCC][AArch64] Allow any offset for SVE addressing modes before reload

2019-02-11 Thread Tamar Christina

Hi All,

On AArch64 aarch64_classify_address has a case for when it's non-strict
that will allow it to accept any byte offset from a reg when validating
an address in a given addressing mode.

This because reload would later make the address valid. SVE however requires
the address always be valid, but currently allows any address when a MEM +
offset is used.  This causes an ICE as nothing later forces the address to be
legitimate.

The patch forces aarch64_emit_sve_pred_move to ensure that the addressing mode
is valid for any loads/stores it creates, which follows the SVE way of handling
address classifications.

Bootstrapped on aarch64-none-linux-gnu and no issues.
Regtested on aarch64-none-elf with SVE on and no issues.

Ok for trunk?

Thanks,
Tamar

gcc/ChangeLog:

2019-02-11  Tamar Christina  

PR target/88847
* config/aarch64/aarch64.c (aarch64_classify_address):
For SVE enforce that the address is always valid when doing a MEM +
offset.

gcc/testsuite/ChangeLog:

2019-02-11  Tamar Christina  

PR target/88847
* gcc.target/aarch64/sve/pr88847.c: New test.

-- 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5df5a8b78439e69705e62845a4d1f86166a01894..59f03e688e58c1aab37629555c7b3f19e5075935 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3414,6 +3414,14 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
 void
 aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
 {
+  /* Make sure that the address is legitimate.  */
+  if (MEM_P (dest)
+  && !aarch64_sve_struct_memory_operand_p (dest))
+{
+  rtx addr = force_reg (Pmode, XEXP (dest, 0));
+  dest = replace_equiv_address (dest, addr);
+}
+
   emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
 		gen_rtvec (2, pred, src),
 		UNSPEC_MERGE_PTRUE)));
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr88847.c b/gcc/testsuite/gcc.target/aarch64/sve/pr88847.c
new file mode 100644
index ..b7504add9a9f2eedf9421328a87b75b53d492860
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr88847.c
@@ -0,0 +1,21 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-additional-options "-O0 -msve-vector-bits=256 -mbig-endian --save-temps" } */
+
+typedef struct _b {
+  __attribute__((__vector_size__(32))) int a[2];
+} b;
+
+b *c;
+
+void
+foo (void)
+{
+  char *p = '\0';
+  b e = c[0];
+}
+
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+.s, p[0-9]+/z, \[x[0-9]+\]\n} } } */
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+.s, p[0-9]+/z, \[x[0-9]+, #1, mul vl\]\n} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+.s, p[0-9]+, \[(sp|x[0-9]+)\]\n} } } */
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+.s, p[0-9]+, \[(sp|x[0-9]+), #1, mul vl\]\n} } } */
+

[PATCH][GCC][Arm] Update tests after register allocation changes. (PR/target 88560)

2019-02-11 Thread Tamar Christina

Hi all,

After the register allocator changes of r268705 we need to update a few tests
with new output.

In all cases the compiler is now generating the expected code, since the tests
are all float16 testcases using a hard-floar abi, we expect that actual fp16
instructions are used rather than using integer loads and stores.  Because of
we also save on some mov.f16s that were being emitted before to move between the
two.

The aapcs cases now match the f32 cases in using floating point operations.

Regtested on arm-none-eabi and no issues.

Ok for trunk?

Thanks,
Tamar

2019-02-11  Tamar Christina  

PR middle-end/88560
* gcc.target/arm/armv8_2-fp16-move-1.c: Update assembler scans.
* gcc.target/arm/fp16-aapcs-1.c: Likewise.
* gcc.target/arm/fp16-aapcs-3.c: Likewise.

-- 
diff --git a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
index 56d87eb6f716718595dc6acdf0744b1d9ecf4a42..2321dd38cc6d7a3635f01180ad0f235b2a183ec2 100644
--- a/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
+++ b/gcc/testsuite/gcc.target/arm/armv8_2-fp16-move-1.c
@@ -16,7 +16,6 @@ test_load_2 (__fp16* a, int i)
   return a[i];
 }
 
-/* { dg-final { scan-assembler-times {vld1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 2 } }  */
 
 void
 test_store_1 (__fp16* a, __fp16 b)
@@ -30,7 +29,6 @@ test_store_2 (__fp16* a, int i, __fp16 b)
   a[i] = b;
 }
 
-/* { dg-final { scan-assembler-times {vst1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 2 } }  */
 
 __fp16
 test_load_store_1 (__fp16* a, int i, __fp16* b)
@@ -44,8 +42,9 @@ test_load_store_2 (__fp16* a, int i, __fp16* b)
   a[i] = b[i + 2];
   return a[i];
 }
-/* { dg-final { scan-assembler-times {ldrh\tr[0-9]+} 2 } }  */
-/* { dg-final { scan-assembler-times {strh\tr[0-9]+} 2 } }  */
+
+/* { dg-final { scan-assembler-times {vst1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 3 } }  */
+/* { dg-final { scan-assembler-times {vld1\.16\t\{d[0-9]+\[[0-9]+\]\}, \[r[0-9]+\]} 3 } }  */
 
 __fp16
 test_select_1 (int sel, __fp16 a, __fp16 b)
@@ -102,7 +101,7 @@ test_select_8 (__fp16 a, __fp16 b, __fp16 c)
 /* { dg-final { scan-assembler-times {vselgt\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
 /* { dg-final { scan-assembler-times {vselge\.f16\ts[0-9]+, s[0-9]+, s[0-9]+} 1 } }  */
 
-/* { dg-final { scan-assembler-times {vmov\.f16\ts[0-9]+, r[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler-not {vmov\.f16} } }  */
 
 int
 test_compare_1 (__fp16 a, __fp16 b)
diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c b/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c
index b91168d43b389675909cabc1950c750c1c5dbf24..0a0a60f3503387f96eed881645aae031275d21ff 100644
--- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c
+++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-1.c
@@ -16,6 +16,7 @@ F (__fp16 a, __fp16 b, __fp16 c)
   return c;
 }
 
-/* { dg-final { scan-assembler {vmov(\.f16)?\tr[0-9]+, s[0-9]+} } }  */
-/* { dg-final { scan-assembler {vmov(\.f32)?\ts1, s0} } }  */
-/* { dg-final { scan-assembler {vmov(\.f16)?\ts0, r[0-9]+} } }  */
+/* { dg-final { scan-assembler {vmov\.f32\ts[0-9]+, s1} } }  */
+/* { dg-final { scan-assembler {vmov\.f32\ts1, s0} } }  */
+/* { dg-final { scan-assembler {vmov\.f32\ts[0-9]+, s2+} } }  */
+/* { dg-final { scan-assembler-times {vmov\.f32\ts0, s[0-9]+} 2 } }  */
diff --git a/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c b/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c
index 84fc0a0f5f06b1714a70f4703213ca10ea0b268e..56a3ae2618432a408cd9b20f9e1334106efab98b 100644
--- a/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c
+++ b/gcc/testsuite/gcc.target/arm/fp16-aapcs-3.c
@@ -16,6 +16,8 @@ F (__fp16 a, __fp16 b, __fp16 c)
   return c;
 }
 
-/* { dg-final { scan-assembler-times {vmov\tr[0-9]+, s[0-2]} 2 } }  */
-/* { dg-final { scan-assembler-times {vmov.f32\ts1, s0} 1 } }  */
-/* { dg-final { scan-assembler-times {vmov\ts0, r[0-9]+} 2 } }  */
+/* { dg-final { scan-assembler {vmov\.f32\ts[0-9]+, s1} } }  */
+/* { dg-final { scan-assembler {vmov\.f32\ts1, s0} } }  */
+/* { dg-final { scan-assembler {vmov\.f32\ts[0-9]+, s2+} } }  */
+/* { dg-final { scan-assembler-times {vmov\.f32\ts0, s[0-9]+} 2 } }  */
+

RE: [Aarch64][SVE] Vectorise sum-of-absolute-differences

2019-02-11 Thread Alejandro Martinez Vicente

> -Original Message-
> From: James Greenhalgh 
> Sent: 06 February 2019 17:42
> To: Alejandro Martinez Vicente 
> Cc: GCC Patches ; nd ; Richard
> Sandiford ; Richard Biener
> 
> Subject: Re: [Aarch64][SVE] Vectorise sum-of-absolute-differences
> 
> On Mon, Feb 04, 2019 at 07:34:05AM -0600, Alejandro Martinez Vicente
> wrote:
> > Hi,
> >
> > This patch adds support to vectorize sum of absolute differences
> > (SAD_EXPR) using SVE. It also uses the new functionality to ensure
> > that the resulting loop is masked. Therefore, it depends on
> >
> > https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00016.html
> >
> > Given this input code:
> >
> > int
> > sum_abs (uint8_t *restrict x, uint8_t *restrict y, int n) {
> >   int sum = 0;
> >
> >   for (int i = 0; i < n; i++)
> > {
> >   sum += __builtin_abs (x[i] - y[i]);
> > }
> >
> >   return sum;
> > }
> >
> > The resulting SVE code is:
> >
> >  :
> >0:   715fcmp w2, #0x0
> >4:   5400026db.le50 
> >8:   d283mov x3, #0x0// #0
> >c:   93407c42sxtwx2, w2
> >   10:   2538c002mov z2.b, #0
> >   14:   25221fe0whilelo p0.b, xzr, x2
> >   18:   2538c023mov z3.b, #1
> >   1c:   2518e3e1ptrue   p1.b
> >   20:   a4034000ld1b{z0.b}, p0/z, [x0, x3]
> >   24:   a4034021ld1b{z1.b}, p0/z, [x1, x3]
> >   28:   0430e3e3incbx3
> >   2c:   0520c021sel z1.b, p0, z1.b, z0.b
> >   30:   25221c60whilelo p0.b, x3, x2
> >   34:   040d0420uabdz0.b, p1/m, z0.b, z1.b
> >   38:   44830402udotz2.s, z0.b, z3.b
> >   3c:   5421b.ne20   // b.any
> >   40:   2598e3e0ptrue   p0.s
> >   44:   04812042uaddv   d2, p0, z2.s
> >   48:   1e260040fmovw0, s2
> >   4c:   d65f03c0ret
> >   50:   1e2703e2fmovs2, wzr
> >   54:   1e260040fmovw0, s2
> >   58:   d65f03c0ret
> >
> > Notice how udot is used inside a fully masked loop.
> >
> > I tested this patch in an aarch64 machine bootstrapping the compiler
> > and running the checks.
> 
> This doesn't give us much confidence in SVE coverage; unless you have been
> running in an environment using SVE by default? Do you have some set of
> workloads you could test the compiler against to ensure correct operation of
> the SVE vectorization?
> 
I tested it using an SVE model and a big set of workloads, including SPEC 2000,
2006 and 2017. On the plus side, nothing got broken. But impact on performance
was very minimal (on average, a tiny gain over the whole set of workloads).

I still want this patch (and the companion dot product patch) to make into the
compiler because they are the first steps towards vectorising workloads using
fully masked loops when the target ISA (like SVE) doesn't support masking in
all the operations.

Alejandro

> >
> > I admit it is too late to merge this into gcc 9, but I'm posting it
> > anyway so it can be considered for gcc 10.
> 
> Richard Sandiford has the call on whether this patch is OK for trunk now or
> GCC 10. With the minimal testing it has had, I'd be uncomfortable with it as a
> GCC 9 patch. That said, it is a fairly self-contained pattern for the compiler
> and it would be good to see this optimization in GCC 9.
> 
> >
> > Alejandro
> >
> >
> > gcc/Changelog:
> >
> > 2019-02-04  Alejandro Martinez  
> >
> > * config/aarch64/aarch64-sve.md (abd_3): New
> define_expand.
> > (aarch64_abd_3): Likewise.
> > (*aarch64_abd_3): New define_insn.
> > (sad): New define_expand.
> > * config/aarch64/iterators.md: Added MAX_OPP and max_opp
> attributes.
> > Added USMAX iterator.
> > * config/aarch64/predicates.md: Added aarch64_smin and
> aarch64_umin
> > predicates.
> > * tree-vect-loop.c (use_mask_by_cond_expr_p): Add SAD_EXPR.
> > (build_vect_cond_expr): Likewise.
> >
> > gcc/testsuite/Changelog:
> >
> > 2019-02-04  Alejandro Martinez  
> >
> > * gcc.target/aarch64/sve/sad_1.c: New test for sum of absolute
> > differences.
>

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread Uros Bizjak

On Mon, Feb 11, 2019 at 3:32 PM H.J. Lu  wrote:
>
> On Mon, Feb 11, 2019 at 5:51 AM Uros Bizjak  wrote:
> >
> > On Mon, Feb 11, 2019 at 2:29 PM H.J. Lu  wrote:
> >
> > > > No. As said, please correctly set mode to XImode in mode attribute 
> > > > calculation.
> > >
> > > There is
> > >
> > >  switch (get_attr_type (insn))
> > > {
> > > case TYPE_SSELOG1:
> > >   return standard_sse_constant_opcode (insn, operands);
> > >
> > > standard_sse_constant_opcode has
> > >
> > > else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
> > > {
> > >   enum attr_mode insn_mode = get_attr_mode (insn);
> > >
> > >   switch (insn_mode)
> > > {
> > > case MODE_XI:
> > > case MODE_V8DF:
> > > case MODE_V16SF:
> > >   gcc_assert (TARGET_AVX512F);
> > >   return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
> >
> > If there is something wrong with standard_sse_constant_opcode, then
> > fix the problem in the function itself. With your previous patch, you
> > introduced a regression, and the presented fix is another kludge to
> > fix a stack of kludges inside standard_sse_constant_opcode.
> >
> > Please take your time and propose some acceptable solution that would
> > put some logic into const_0/const_1 handling. The situation is not OK
> > and your patch makes it even worse.
> >
>
> Let's first define what MODE_XI means in standard_sse_constant_opcode
> as well as in all these mov patterns for with and without AVX512VL.   Without
> a clear definition, we can't get out of this mess.

INT_MODE (OI, 32);
INT_MODE (XI, 64);

So, XI_MODE represents 64 INTEGER bytes = 64 * 8 = 512 bit operation,
in case of const_1, all 512 bits set.

We can load zeros with narrower instruction, (e.g. 256 bit by inherent
zeroing of highpart in case of 128 bit xor), so TImode in this case.

Some targets prefer V4SF mode, so they will emit float xorps for zeroing

Then the introduction of AVX512F fubared everything by overloading the
meaning of insn mode.

Uros.

Re: [PATCH, v2] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Segher Boessenkool

Hi Bill,

On Mon, Feb 11, 2019 at 07:36:11AM -0600, Bill Schmidt wrote:
> 2019-02-11  Bill Schmidt  
> 
>   * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Shift-right
>   and shift-left vector built-ins need to include a TRUNC_MOD_EXPR
>   for correct semantics.
> 
> [gcc/testsuite]
> 
> 2019-02-11  Bill Schmidt  
> 
>   * gcc.target/powerpc/vec-sld-modulo.c: New.
>   * gcc.target/powerpc/vec-srad-modulo.c: New.
>   * gcc.target/powerpc/vec-srd-modulo.c: New.

This is okay for trunk and backports.  Thanks!

One comment:

> +vec_sldi (vui64_t vra, const unsigned int shb)
> +{
> +  vui64_t lshift;
> +  vui64_t result;
> +
> +  /* Note legitimate use of wrong-type splat due to expectation that only
> + lower 6-bits are read.  */
> +  lshift = (vui64_t) vec_splat_s8((const unsigned char)shb);
> +
> +  /* Vector Shift Left Doublewords based on the lower 6-bits
> + of corresponding element of lshift.  */
> +  result = vec_vsld (vra, lshift);
> +
> +  return (vui64_t) result;
> +}

I realise this is a testcase, and in one frame of mind it is good to test
all different styles and bad habits.  But please never use casts that do not
do anything in correct programs: the only thing such casts do is they shut
up warnings in incorrect programs (including the same program after a wrong
change).  


Segher

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread H.J. Lu

In Mon, Feb 11, 2019 at 7:56 AM Uros Bizjak  wrote:
>
> On Mon, Feb 11, 2019 at 3:32 PM H.J. Lu  wrote:
> >
> > On Mon, Feb 11, 2019 at 5:51 AM Uros Bizjak  wrote:
> > >
> > > On Mon, Feb 11, 2019 at 2:29 PM H.J. Lu  wrote:
> > >
> > > > > No. As said, please correctly set mode to XImode in mode attribute 
> > > > > calculation.
> > > >
> > > > There is
> > > >
> > > >  switch (get_attr_type (insn))
> > > > {
> > > > case TYPE_SSELOG1:
> > > >   return standard_sse_constant_opcode (insn, operands);
> > > >
> > > > standard_sse_constant_opcode has
> > > >
> > > > else if (x == constm1_rtx || vector_all_ones_operand (x, mode))
> > > > {
> > > >   enum attr_mode insn_mode = get_attr_mode (insn);
> > > >
> > > >   switch (insn_mode)
> > > > {
> > > > case MODE_XI:
> > > > case MODE_V8DF:
> > > > case MODE_V16SF:
> > > >   gcc_assert (TARGET_AVX512F);
> > > >   return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 
> > > > 0xFF}";
> > >
> > > If there is something wrong with standard_sse_constant_opcode, then
> > > fix the problem in the function itself. With your previous patch, you
> > > introduced a regression, and the presented fix is another kludge to
> > > fix a stack of kludges inside standard_sse_constant_opcode.
> > >
> > > Please take your time and propose some acceptable solution that would
> > > put some logic into const_0/const_1 handling. The situation is not OK
> > > and your patch makes it even worse.
> > >
> >
> > Let's first define what MODE_XI means in standard_sse_constant_opcode
> > as well as in all these mov patterns for with and without AVX512VL.   
> > Without
> > a clear definition, we can't get out of this mess.
>
> INT_MODE (OI, 32);
> INT_MODE (XI, 64);
>
> So, XI_MODE represents 64 INTEGER bytes = 64 * 8 = 512 bit operation,
> in case of const_1, all 512 bits set.
>
> We can load zeros with narrower instruction, (e.g. 256 bit by inherent
> zeroing of highpart in case of 128 bit xor), so TImode in this case.
>
> Some targets prefer V4SF mode, so they will emit float xorps for zeroing
>
> Then the introduction of AVX512F fubared everything by overloading the
> meaning of insn mode.

Exactly.

How should we use INSN mode,  MODE_XI, in standard_sse_constant_opcode
and patterns which use standard_sse_constant_opcode? 2 options:

1.  MODE_XI should only used to check if EXT_REX_SSE_REG_P is true
in any register operand.  The operand size must be determined by operand
itself , not by MODE_XI.  The operand encoding size should be determined
by the operand size, EXT_REX_SSE_REG_P and AVX512VL.
2. MODE_XI should be used to determine the operand encoding size.
EXT_REX_SSE_REG_P and AVX512VL should be checked for encoding
instructions.

Which way should we go?

-- 
H.J.

Re: [PATCH][GCC][Arm] Update tests after register allocation changes. (PR/target 88560)

2019-02-11 Thread Kyrill Tkachov




On 11/02/19 15:17, Tamar Christina wrote:

Hi all,

After the register allocator changes of r268705 we need to update a few tests
with new output.

In all cases the compiler is now generating the expected code, since the tests
are all float16 testcases using a hard-floar abi, we expect that actual fp16
instructions are used rather than using integer loads and stores.  Because of
we also save on some mov.f16s that were being emitted before to move between the
two.

The aapcs cases now match the f32 cases in using floating point operations.

Regtested on arm-none-eabi and no issues.

Ok for trunk?



Ok.
Thanks,
Kyrill


Thanks,
Tamar

2019-02-11  Tamar Christina  

PR middle-end/88560
* gcc.target/arm/armv8_2-fp16-move-1.c: Update assembler scans.
* gcc.target/arm/fp16-aapcs-1.c: Likewise.
* gcc.target/arm/fp16-aapcs-3.c: Likewise.

--

Re: [PATCH, v2] rs6000: Vector shift-right should honor modulo semantics

2019-02-11 Thread Bill Schmidt

On 2/11/19 10:01 AM, Segher Boessenkool wrote:

> Hi Bill,
>
> On Mon, Feb 11, 2019 at 07:36:11AM -0600, Bill Schmidt wrote:
>> 2019-02-11  Bill Schmidt  
>>
>>  * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Shift-right
>>  and shift-left vector built-ins need to include a TRUNC_MOD_EXPR
>>  for correct semantics.
>>
>> [gcc/testsuite]
>>
>> 2019-02-11  Bill Schmidt  
>>
>>  * gcc.target/powerpc/vec-sld-modulo.c: New.
>>  * gcc.target/powerpc/vec-srad-modulo.c: New.
>>  * gcc.target/powerpc/vec-srd-modulo.c: New.
> This is okay for trunk and backports.  Thanks!
>
> One comment:
>
>> +vec_sldi (vui64_t vra, const unsigned int shb)
>> +{
>> +  vui64_t lshift;
>> +  vui64_t result;
>> +
>> +  /* Note legitimate use of wrong-type splat due to expectation that only
>> + lower 6-bits are read.  */
>> +  lshift = (vui64_t) vec_splat_s8((const unsigned char)shb);
>> +
>> +  /* Vector Shift Left Doublewords based on the lower 6-bits
>> + of corresponding element of lshift.  */
>> +  result = vec_vsld (vra, lshift);
>> +
>> +  return (vui64_t) result;
>> +}
> I realise this is a testcase, and in one frame of mind it is good to test
> all different styles and bad habits.  But please never use casts that do not
> do anything in correct programs: the only thing such casts do is they shut
> up warnings in incorrect programs (including the same program after a wrong
> change).  

Agreed!  Thanks.  I wasn't careful to remove these as I modified the original
test where they were pertinent.  Will fix before committing.

Thanks,
Bill

>
>
> Segher
>

Re: [PATCH] i386: Use EXT_REX_SSE_REG_P in *movoi_internal_avx/movti_internal

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 04:56:45PM +0100, Uros Bizjak wrote:
> > Let's first define what MODE_XI means in standard_sse_constant_opcode
> > as well as in all these mov patterns for with and without AVX512VL.   
> > Without
> > a clear definition, we can't get out of this mess.
> 
> INT_MODE (OI, 32);
> INT_MODE (XI, 64);
> 
> So, XI_MODE represents 64 INTEGER bytes = 64 * 8 = 512 bit operation,
> in case of const_1, all 512 bits set.
> 
> We can load zeros with narrower instruction, (e.g. 256 bit by inherent
> zeroing of highpart in case of 128 bit xor), so TImode in this case.
> 
> Some targets prefer V4SF mode, so they will emit float xorps for zeroing
> 
> Then the introduction of AVX512F fubared everything by overloading the
> meaning of insn mode.

I don't see much changes in AVX512F here, most of the behavior has been
there already in AVX.
Most of the SSE/AVX/AVX512 instructions affect the whole register,
usually there is DEST[MAX_VL-1:VL] <- 0 at the end of each instruction.
But, using the MAX_VL to determine get_attr_mode doesn't seem really useful,
because that changes dynamically at runtime based on the actual hw, not on
what we've been compiled for.
So, I believe we want to use that VL value to determine the bitsize of the
mode corresponding to get_attr_mode.  And in that case, for
*movoi_internal_avx and *movti_internal, I believe the right mode is MODE_OI
resp. MODE_TI for AVX512VL, because e.g.
vmovdqa32 %ymm12, %ymm23
is a VL = 256 instruction, not VL = 512.  Similarly, if we want to set
%ymm25 to all ones, i.e. movoi_internal_avx, we use
vpternlogd  $0xFF, %ymm25, %ymm25, %ymm25
which is again VL = 256 instruction, so should use MODE_OI.
We'd need to use
vmovdqa32 %zmm12, %zmm23
or
vpternlogd  $0xFF, %zmm25, %zmm25, %zmm25
instructions for AVX512F without AVX512VL, but as has been discussed, this
won't really happen, because hard_regno_mode_ok refuses to allocate 256-bit
or 128-bit modes in ext sse registers.

Jakub

Re: Make clear, when contributions will be ignored

2019-02-11 Thread Segher Boessenkool

On Mon, Feb 11, 2019 at 02:16:27PM +, Дилян Палаузов wrote:
> Hello Segher,
> 
> my question was how do you propose to proceed, so that a 
> no-reminders-for-patches-are-necessary-state is reached.
> 
> There is no relation with having infinite time or dealing with high-cost 
> low-profit patches.
> 
> Previously I raised the quesion, whether automating the process for sending 
> reminders, is a good idea.  This saves time
> of people to write reminders.

But that would be "optimising" exactly the wrong thing!  The choke point is
patch review.  So you should make it easier to review a patch, instead of
making it easier to send in more patches.  Your complaint is that many
patches are sent in but then not reviewed, or not reviewed for a long while,
after all.

Easy to review patches are of course first and foremost patches that do the
correct thing.  But also they need to clearly say what they fix (and how),
how the patch was tested, and they should often contain testcases for the
testsuite.  Easy to review patches usually use the same style and
presentation as all other easy to review patches.

Segher

[PATCH] correct comments in tree-prof/inliner-1.c

2019-02-11 Thread Martin Sebor


I noticed the comments in the test don't correspond to what it's
designed to exercise: namely that the call to hot_function() is
inlined and the call to cold_function() is not, rather than
the other way around.

Attached is a patch that adjusts the comments.  Honza, please let
me know if this looks correct to you.

Thaks
Martin
gcc/testsuite/ChangeLog:

	* gcc.dg/tree-prof/inliner-1.c: Correct comments.

Index: gcc/testsuite/gcc.dg/tree-prof/inliner-1.c
===
--- gcc/testsuite/gcc.dg/tree-prof/inliner-1.c	(revision 268755)
+++ gcc/testsuite/gcc.dg/tree-prof/inliner-1.c	(working copy)
@@ -28,15 +28,15 @@ main ()
   for (i = 0; i < 100; i++)
 {
   if (a)
-cold_function ();
+cold_function ();   /* Should not be inlined.  */
   else
-hot_function ();
+hot_function ();/* Should be inlined.  */
 }
   return 0;
 }
 
-/* cold function should be inlined, while hot function should not.  
-   Look for "cold_function () [tail call];" call statement not for the
-   declaration or other appearances of the string in dump.  */
+/* The call to hot_function should be inlined, while cold_function should
+   not be.  Look for the "cold_function ();" call statement and not for
+   its declaration or other occurrences of the string in the dump.  */
 /* { dg-final-use { scan-tree-dump "cold_function ..;" "optimized"} } */
 /* { dg-final-use { scan-tree-dump-not "hot_function ..;" "optimized"} } */

Re: [PATCH] correct comments in tree-prof/inliner-1.c

2019-02-11 Thread Jan Hubicka

> I noticed the comments in the test don't correspond to what it's
> designed to exercise: namely that the call to hot_function() is
> inlined and the call to cold_function() is not, rather than
> the other way around.
> 
> Attached is a patch that adjusts the comments.  Honza, please let
> me know if this looks correct to you.
> 
> Thaks
> Martin

> gcc/testsuite/ChangeLog:
> 
>   * gcc.dg/tree-prof/inliner-1.c: Correct comments.

This looks ok, thanks!
Honza

> 
> Index: gcc/testsuite/gcc.dg/tree-prof/inliner-1.c
> ===
> --- gcc/testsuite/gcc.dg/tree-prof/inliner-1.c(revision 268755)
> +++ gcc/testsuite/gcc.dg/tree-prof/inliner-1.c(working copy)
> @@ -28,15 +28,15 @@ main ()
>for (i = 0; i < 100; i++)
>  {
>if (a)
> -cold_function ();
> +cold_function ();   /* Should not be inlined.  */
>else
> -hot_function ();
> +hot_function ();/* Should be inlined.  */
>  }
>return 0;
>  }
>  
> -/* cold function should be inlined, while hot function should not.  
> -   Look for "cold_function () [tail call];" call statement not for the
> -   declaration or other appearances of the string in dump.  */
> +/* The call to hot_function should be inlined, while cold_function should
> +   not be.  Look for the "cold_function ();" call statement and not for
> +   its declaration or other occurrences of the string in the dump.  */
>  /* { dg-final-use { scan-tree-dump "cold_function ..;" "optimized"} } */
>  /* { dg-final-use { scan-tree-dump-not "hot_function ..;" "optimized"} } */

Re: arm access to stack slot out of allocated area

2019-02-11 Thread Olivier Hainque

Hi Wilco,

> On 8 Feb 2019, at 22:35, Wilco Dijkstra  wrote:

> So I think we need to push much harder on getting rid of obsolete stuff and
> avoid people encountering these nasty issues.

Numbers I just received indicate that we can legitimately head
in this direction for VxWorks as well (move towards VxWorks 7 only
ports, AAPCS based).

Good news :)

Thanks for your input!

Re: arm access to stack slot out of allocated area

2019-02-11 Thread Olivier Hainque

Hello Ramana,

> Olivier, while you are here could you also document the choices made by
> the vxworks port in terms of the ABI and how it differs from EABI ? It
> would certainly help with patch review.

Thanks for your feedback as well.

Yes, I'll add a comment and macro defs to the VxWorks
headers to state the ABI settings explicitly.

We're using the defaults from arm.h for vxworks6 today
(ABI_APCS & MASK_APCS_FRAME), which matches what the system
environment provides.

I'll stick a note mentioning the deprecation Wilco
described and pushing towards VxWorks 7.

We'll probably end up removing support entirely at
some point.

Cheers,

Olivier

[PATCH][ARM] Fix PR89222

2019-02-11 Thread Wilco Dijkstra

The GCC optimizer can generate symbols with non-zero offset from simple
if-statements. Bit zero is used for the Arm/Thumb state bit, so relocations
with offsets fail if it changes bit zero and the relocation forces bit zero
to true.  The fix is to disable offsets on function pointer symbols.  

ARMv5te bootstrap OK, regression tests pass. OK for commit?

ChangeLog:
2019-02-06  Wilco Dijkstra  

gcc/
PR target/89222
* config/arm/arm.md (movsi): Use arm_cannot_force_const_mem
to decide when to split off an offset from a symbol.
* config/arm/arm.c (arm_cannot_force_const_mem): Disallow offsets
in function symbols.
* config/arm/arm-protos.h (arm_cannot_force_const_mem): Add.

testsuite/
PR target/89222
* gcc.target/arm/pr89222.c: Add new test.

--
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 
79ede0db174fcce87abe8b4d18893550d4c7e2f6..0bedbe5110853617ecf7456bbaa56b1405fb65dd
 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -184,6 +184,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, 
tree, rtx, tree);
 extern bool arm_pad_reg_upward (machine_mode, tree, int);
 #endif
 extern int arm_apply_result_size (void);
+extern bool arm_cannot_force_const_mem (machine_mode, rtx);
 
 #endif /* RTX_CODE */
 
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 
c4c9b4a667100d81d918196713e40b01ee232ee2..ccd4211045066d8edb89dd4c23d554517639f8f6
 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -178,7 +178,6 @@ static void arm_internal_label (FILE *, const char *, 
unsigned long);
 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
 tree);
 static bool arm_have_conditional_execution (void);
-static bool arm_cannot_force_const_mem (machine_mode, rtx);
 static bool arm_legitimate_constant_p (machine_mode, rtx);
 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
@@ -8936,15 +8935,20 @@ arm_legitimate_constant_p (machine_mode mode, rtx x)
 
 /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
 
-static bool
+bool
 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 {
   rtx base, offset;
+  split_const (x, &base, &offset);
 
-  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+  if (GET_CODE (base) == SYMBOL_REF)
 {
-  split_const (x, &base, &offset);
-  if (GET_CODE (base) == SYMBOL_REF
+  /* Function symbols cannot have an offset due to the Thumb bit.  */
+  if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
+ && INTVAL (offset) != 0)
+   return true;
+
+  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
  && !offset_within_block_p (base, INTVAL (offset)))
return true;
 }
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 
aa759624f8f617576773aa75fd6239d6e06e8a13..00fccd964a86dd814f15e4a1fdf5b47173a3ee3f
 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -5981,17 +5981,13 @@ (define_expand "movsi"
 }
 }
 
-  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
+  if (arm_cannot_force_const_mem (SImode, operands[1]))
 {
   split_const (operands[1], &base, &offset);
-  if (GET_CODE (base) == SYMBOL_REF
- && !offset_within_block_p (base, INTVAL (offset)))
-   {
- tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
- emit_move_insn (tmp, base);
- emit_insn (gen_addsi3 (operands[0], tmp, offset));
- DONE;
-   }
+  tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
+  emit_move_insn (tmp, base);
+  emit_insn (gen_addsi3 (operands[0], tmp, offset));
+  DONE;
 }
 
   /* Recognize the case where operand[1] is a reference to thread-local
diff --git a/gcc/testsuite/gcc.target/arm/pr89222.c 
b/gcc/testsuite/gcc.target/arm/pr89222.c
new file mode 100644
index 
..d26d7df17544db8426331e67b9a36d749ec6c6d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pr89222.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void g (void);
+
+void f1 (int x)
+{
+  if (x != (int) g + 3)
+return;
+  g();
+}
+
+void (*a2)(void);
+
+void f2 (void)
+{
+  a2 = &g + 3;
+}
+
+typedef void (*__sighandler_t)(int);
+void handler (int);
+
+void f3 (int x)
+{
+  __sighandler_t h = &handler;
+  if (h != (__sighandler_t) 2 && h != (__sighandler_t) 1)
+h (x);
+}
+
+/* { dg-final { scan-assembler-times {add(?:s)?\tr[0-9]+, r[0-9]+, #3} 2 } } */
+/* { dg-final { scan-assembler-not {.word\tg\+3} } } */
+/* { dg-final { scan-assembler-not {.word\thandler-1} } } */

[PATCH] S/390: Reject invalid Q/R/S/T addresses after LRA

2019-02-11 Thread Ilya Leoshkevich

Bootstrapped and regtested on s390x-redhat-linux.

The previous attempt to fix PR89233 [1] went into wrong direction of
dealing with symptoms rather than the root cause.  Since the approach
here is completely different, I'm not sending it as v2.

The following insn:

(insn (set (reg:DI %r2)
   (sign_extend:DI (mem:SI
(const:DI (plus:DI (symbol_ref:DI ("*.LC0"))
   (const_int 16)))

is correctly recognized by LRA as RIL alternative of extendsidi2
define_insn.  However, when recognition runs after LRA, it returns RXY
alternative, which is incorrect, since the offset 16 points past the
end of of *.LC0 literal pool entry.  Such addresses are normally
rejected by s390_decompose_address ().

This inconsistency confuses annotate_constant_pool_refs: the selected
alternative makes it proceed with annotation, only to find that the
annotated address is invalid, causing ICE.

This patch fixes the root cause, namely, that s390_check_qrst_address ()
behaves differently during and after LRA.

[1] https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00736.html

gcc/ChangeLog:

2019-02-11  Ilya Leoshkevich  

PR target/89233
* config/s390/s390.c (s390_decompose_address): Update comment.
(s390_check_qrst_address): Reject invalid address forms after
LRA.

gcc/testsuite/ChangeLog:

2019-02-11  Ilya Leoshkevich  

PR target/89233
* gcc.target/s390/pr89233.c: New test.
---
 gcc/config/s390/s390.c  | 10 +++---
 gcc/testsuite/gcc.target/s390/pr89233.c | 11 +++
 2 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/pr89233.c

diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 6a571a3e054..713973a3fd4 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -3020,7 +3020,9 @@ s390_decompose_address (rtx addr, struct s390_address 
*out)
  if (offset)
{
  /* If we have an offset, make sure it does not
-exceed the size of the constant pool entry.  */
+exceed the size of the constant pool entry.
+Otherwise we might generate an out-of-range
+displacement for the base register form.  */
  rtx sym = XVECEXP (disp, 0, 0);
  if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
return false;
@@ -3193,8 +3195,10 @@ s390_check_qrst_address (char c, rtx op, bool 
lit_pool_ok)
  generic cases below ('R' or 'T'), since reload will in fact fix
  them up.  LRA behaves differently here; we never see such forms,
  but on the other hand, we need to strictly reject every invalid
- address form.  Perform this check right up front.  */
-  if (lra_in_progress)
+ address form.  After both reload and LRA invalid address forms
+ must be rejected, because nothing will fix them up later.  Perform
+ this check right up front.  */
+  if (lra_in_progress || reload_completed)
 {
   if (!decomposed && !s390_decompose_address (op, &addr))
return 0;
diff --git a/gcc/testsuite/gcc.target/s390/pr89233.c 
b/gcc/testsuite/gcc.target/s390/pr89233.c
new file mode 100644
index 000..f572bfa08d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/pr89233.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=z13 -O1" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+int
+f ()
+{
+  v4si x = {0, 1, 2, 3};
+  return x[4];
+}
-- 
2.20.1

[PATCH] PR c++/89267 - change of error location.

2019-02-11 Thread Jason Merrill

My patch for 86943 on the branch removed this code, which led to a location
change on one of the diagnostics in constexpr-lambda8.C.  Removing this bit
wasn't the point of the patch, so let's put it back.

Applying to 8 branch.

* pt.c (tsubst_copy_and_build): Do still clear expr location
for instantiated thunk calls.
---
 gcc/cp/pt.c  | 8 +++-
 gcc/cp/ChangeLog | 6 ++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index aa57811d7b7..72dc1e0b569 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -18539,12 +18539,18 @@ tsubst_copy_and_build (tree t,
bool op = CALL_EXPR_OPERATOR_SYNTAX (t);
bool ord = CALL_EXPR_ORDERED_ARGS (t);
bool rev = CALL_EXPR_REVERSE_ARGS (t);
-   if (op || ord || rev)
+   bool thk = CALL_FROM_THUNK_P (t);
+   if (op || ord || rev || thk)
  {
function = extract_call_expr (ret);
CALL_EXPR_OPERATOR_SYNTAX (function) = op;
CALL_EXPR_ORDERED_ARGS (function) = ord;
CALL_EXPR_REVERSE_ARGS (function) = rev;
+   if (thk)
+ {
+   /* The thunk location is not interesting.  */
+   SET_EXPR_LOCATION (function, UNKNOWN_LOCATION);
+ }
  }
  }
 
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 27f7032652f..7e3d056dc7b 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,9 @@
+2019-02-11  Jason Merrill  
+
+   PR c++/89267 - change of error location.
+   * pt.c (tsubst_copy_and_build): Do still clear expr location
+   for instantiated thunk calls.
+
 2019-02-08  Jason Merrill  
 
PR c++/88761 - ICE with reference capture of constant.

base-commit: 161d165c044a1cc7e5d4c15358817afaf6e82f58
-- 
2.20.1

Re: [PATCH 13/43] i386: Emulate MMX pshufw with SSE

2019-02-11 Thread H.J. Lu

On Sun, Feb 10, 2019 at 3:16 AM Uros Bizjak  wrote:
>
> On 2/10/19, H.J. Lu  wrote:
> > Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.
> >
> >   PR target/89021
> >   * config/i386/mmx.md (mmx_pshufw_1): Add SSE emulation.
> >   (*vec_dupv4hi): Likewise.
> >   emulation.
> > ---
> >  gcc/config/i386/mmx.md | 33 +
> >  1 file changed, 21 insertions(+), 12 deletions(-)
> >
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index 1ee51c5deb7..dc81d7f45df 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -1364,7 +1364,8 @@
> >[(match_operand:V4HI 0 "register_operand")
> > (match_operand:V4HI 1 "nonimmediate_operand")
> > (match_operand:SI 2 "const_int_operand")]
> > -  "TARGET_SSE || TARGET_3DNOW_A"
> > +  "((TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE)
> > +   || TARGET_3DNOW_A"
>
> I think that the above condition should read
>
> (TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)
>
> and with TARGET_MMX_WITH_SSE (which implies SSE2) we always use XMM
> registers. Without SSE2, we use MMX registers, as before.

Done.

> >  {
> >int mask = INTVAL (operands[2]);
> >emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
> > @@ -1376,14 +1377,15 @@
> >  })
> >
> >  (define_insn "mmx_pshufw_1"
> > -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> > +  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
> >  (vec_select:V4HI
> > -  (match_operand:V4HI 1 "nonimmediate_operand" "ym")
> > +  (match_operand:V4HI 1 "nonimmediate_operand" "ym,Yv")
> >(parallel [(match_operand 2 "const_0_to_3_operand")
> >   (match_operand 3 "const_0_to_3_operand")
> >   (match_operand 4 "const_0_to_3_operand")
> >   (match_operand 5 "const_0_to_3_operand")])))]
> > -  "TARGET_SSE || TARGET_3DNOW_A"
> > +  "((TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE)
> > +   || TARGET_3DNOW_A"
> >  {
> >int mask = 0;
> >mask |= INTVAL (operands[2]) << 0;
> > @@ -1392,11 +1394,15 @@
> >mask |= INTVAL (operands[5]) << 6;
> >operands[2] = GEN_INT (mask);
> >
> > -  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
> > +  if (TARGET_MMX_WITH_SSE)
> > +return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
> > +  else
> > +return "pshufw\t{%2, %1, %0|%0, %1, %2}";
>
> The above should be implemented as multi-output template.

I have

{
  int mask = 0;
  mask |= INTVAL (operands[2]) << 0;
  mask |= INTVAL (operands[3]) << 2;
  mask |= INTVAL (operands[4]) << 4;
  mask |= INTVAL (operands[5]) << 6;
  operands[2] = GEN_INT (mask);

  if (TARGET_MMX_WITH_SSE)
return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
  else
return "pshufw\t{%2, %1, %0|%0, %1, %2}";
}

How can I build mask before multi-output template?

> >  }
> > -  [(set_attr "type" "mmxcvt")
> > +  [(set_attr "mmx_isa" "native,x64")
> > +   (set_attr "type" "mmxcvt,sselog")
> > (set_attr "length_immediate" "1")
> > -   (set_attr "mode" "DI")])
> > +   (set_attr "mode" "DI,TI")])
> >
> >  (define_insn "mmx_pswapdv2si2"
> >[(set (match_operand:V2SI 0 "register_operand" "=y")
> > @@ -1410,15 +1416,18 @@
> > (set_attr "mode" "DI")])
> >
> >  (define_insn "*vec_dupv4hi"
> > -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> > +  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
> >   (vec_duplicate:V4HI
> > (truncate:HI
> > - (match_operand:SI 1 "register_operand" "0"]
> > + (match_operand:SI 1 "register_operand" "0,Yv"]
> >"TARGET_SSE || TARGET_3DNOW_A"
>
> Here we also need "(TARGET_MMX || TARGET_MMX_WITH_SSE) &&"

Fixed.

> Uros.
>
> > -  "pshufw\t{$0, %0, %0|%0, %0, 0}"
> > -  [(set_attr "type" "mmxcvt")
> > +  "@
> > +   pshufw\t{$0, %0, %0|%0, %0, 0}
> > +   %vpshuflw\t{$0, %1, %0|%0, %1, 0}"
> > +  [(set_attr "mmx_isa" "native,x64")
> > +   (set_attr "type" "mmxcvt,sselog1")
> > (set_attr "length_immediate" "1")
> > -   (set_attr "mode" "DI")])
> > +   (set_attr "mode" "DI,TI")])
> >
> >  (define_insn_and_split "*vec_dupv2si"
> >[(set (match_operand:V2SI 0 "register_operand" "=y,x,Yv")
> > --
> > 2.20.1
> >
> >



-- 
H.J.

Re: [PATCH][ARM] Fix PR89222

2019-02-11 Thread Alexander Monakov

On Mon, 11 Feb 2019, Wilco Dijkstra wrote:

> The GCC optimizer can generate symbols with non-zero offset from simple
> if-statements. Bit zero is used for the Arm/Thumb state bit, so relocations
> with offsets fail if it changes bit zero and the relocation forces bit zero
> to true.  The fix is to disable offsets on function pointer symbols.  
> 
> ARMv5te bootstrap OK, regression tests pass. OK for commit?

Just to be sure the issue is analyzed properly: if it's certain that this usage
is not allowed, shouldn't the linker produce a diagnostic instead of silently
concealing the issue?

With Gold linker this is handled correctly.  So it looks to me like a
bug in BFD linker, where it ignores any addend (not just +1/-1) when
resolving a relocation against a Thumb function.

Alexander

[COMMITTED] Fix pthread errors in pr86637-2.c

2019-02-11 Thread Wilco Dijkstra

Fix test errors on targets which do not support pthreads.

Committed as obvious.

ChangeLog:
2019-02-11  Wilco Dijkstra  

PR tree-optimization/86637
* gcc.c-torture/compile/pr86637-2.c: Test pthread and graphite target.
---
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c 
b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
index 
3b675eae1b685fb1f7a431bb84dc5b3dbb327177..2f69c292f7e9d205e92f797b78cebd769e0c1dc6
 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr86637-2.c
@@ -1,4 +1,6 @@
-/* { dg-do compile { target fgraphite } } */
+/* { dg-do compile } */
+/* { dg-require-effective-target fgraphite } */
+/* { dg-require-effective-target pthread } */
 /* { dg-options "-floop-parallelize-all -fsave-optimization-record 
-ftree-parallelize-loops=2 -ftree-slp-vectorize" } */
 
 #include

Re: [PATCH][GCC][AArch64] Allow any offset for SVE addressing modes before reload

2019-02-11 Thread Richard Sandiford

Tamar Christina  writes:
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 
> 5df5a8b78439e69705e62845a4d1f86166a01894..59f03e688e58c1aab37629555c7b3f19e5075935
>  100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3414,6 +3414,14 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm,
>  void
>  aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
>  {
> +  /* Make sure that the address is legitimate.  */
> +  if (MEM_P (dest)
> +  && !aarch64_sve_struct_memory_operand_p (dest))
> +{
> +  rtx addr = force_reg (Pmode, XEXP (dest, 0));
> +  dest = replace_equiv_address (dest, addr);
> +}
> +
>emit_insn (gen_rtx_SET (dest, gen_rtx_UNSPEC (GET_MODE (dest),
>   gen_rtvec (2, pred, src),
>   UNSPEC_MERGE_PTRUE)));

I think the same thing could happen for the src as well as dest.
The function is also used for single-vector modes, not just struct modes.

This code predated the support for "@" patterns.  Now that we have them,
it might be better to make:

(define_insn "*pred_mov"
  [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, m")
(unspec:SVE_ALL
  [(match_operand: 1 "register_operand" "Upl, Upl")
   (match_operand:SVE_ALL 2 "nonimmediate_operand" "m, w")]
  UNSPEC_MERGE_PTRUE))]

and:

(define_insn_and_split "pred_mov"
  [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" 
"=w, Utx")
(unspec:SVE_STRUCT
  [(match_operand: 1 "register_operand" "Upl, Upl")
   (match_operand:SVE_STRUCT 2 
"aarch64_sve_struct_nonimmediate_operand" "Utx, w")]
  UNSPEC_MERGE_PTRUE))]

public as "@aarch64_pred_mov" ("aarch64_" so that we don't
pollute the target-independent namespace).  Then we can use something like:

  expand_operand ops[3];
  create_output_operand (&ops[0], dest, mode);
  create_input_operand (&ops[1], pred, GET_MODE (pred));
  create_input_operand (&ops[2], src, mode);
  expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);

(completely untested).

Thanks,
Richard

Re: [PATCH] avoid 4095/INT_MAX warning for fprintf (PR 88993)

2019-02-11 Thread Martin Sebor


Ping: https://gcc.gnu.org/ml/gcc-patches/2019-02/msg00224.html

(This patch also handles bug 88835.)

On 2/4/19 8:58 PM, Martin Sebor wrote:

The attached patch relaxes -Wformat-overflow=2 to avoid warning about
individual directives that might (but need not) exceed the 4095 byte
limit, and about the total function output that likewise might (but
need not) exceed the INT_MAX limit.

The bug report actually requests that instead of the standard minimum
of 4095 bytes, GCC consider real libc limits, but trying to figure
out what these real limits might be (they're not documented anywhere,
AFAIK) and hardcoding them into GCC doesn't seem like a good solution.

Instead, the patch only does little more than the bare minimum to
suppress these pedantic warnings, and it only does that for the "may
exceed" cases and not for those where the size of output definitely
exceeds either limit.  Using the formatted functions to write such
large amounts of data seems more likely to be a bug than intentional,
and at level 2 issuing the warning seems appropriate unless the return
value of the function is tested.  When it is, even tough exceeding
these limits is strictly undefined, it seems reasonable to assume that
a quality libc implementation will detect it and return an error (as
required by POSIX).

So with the patch, the only way to get this warning is for calls to
sprintf or to unchecked snprintf.

Martin

Re: [PATCH] Updated patches for the port of gccgo to GNU/Hurd

2019-02-11 Thread Ian Lance Taylor

On Mon, Feb 11, 2019 at 3:10 AM Svante Signell  wrote:
>
> On Sun, 2019-02-10 at 22:08 -0800, Ian Lance Taylor wrote:
> > On Sun, Feb 10, 2019 at 3:41 AM Svante Signell 
> > wrote:
> > > On Sat, 2019-02-09 at 23:57 +0100, Svante Signell wrote:
> > > > On Sat, 2019-02-09 at 14:40 -0800, Ian Lance Taylor wrote:
> > > > > On Fri, Feb 8, 2019 at 3:07 PM Matthias Klose  wrote:
> > > > > > On 07.02.19 06:04, Ian Lance Taylor wrote:
> > > > > What are the lines before that in the log?  For some reason libtool is
> > > > > being invoke with no source files.  The lines before the failing line
> > > > > should show an invocation of match.sh that determines the source
> > > > > files.
> > > >
> > > > Thanks for your job upstreaming the patches!
> > > >
> > > > I've found some problems. Current problem is with the mksysinfo.sh 
> > > > patch.
> > > > But there are some other things missing. New patches will be submitted
> > > > tomorrow.
> > >
> > > Attached are three additional patches needed to build libgo on GNU/Hurd:
> > > src_libgo_mksysinfo.sh.diff
> > > src_libgo_go_syscall_wait.c.diff
> > > src_libgo_testsuite_gotest.diff
> > >
> > > For the first patch, src_libgo_mksysinfo.sh.diff, I had to go back to the
> > > old version, using sed -i -e. As written now ${fsid_to_dev} expands to
> > > fsid_to_dev='-e '\''s/st_fsid/Dev/'\''' resulting in: "sed: -e expression
> > > #4, char 1: unknown command: `''". Unfortunately, I have not yet been able
> > > to modify the expansion omitting the single qoutes around the shell
> > > variable.
> >
> > I'm sorry, I don't want to use "sed -i".  That loses the original file
> > and makes it harder to reconstruct what has happened.
>
> What to do then?
>
> > > The second patch, src_libgo_go_syscall_wait.c.diff, is needed since
> > > WCONTINUED is not defined and is needed for WIFCONTINUED to be defined in
> > > wait.h.
> >
> > I don't understand that.   is a system header file.  Are
> > you saying that it is impossible to use  and WIFCONTINUED
> > unless your source code does a #define WCONTINUED before #include'ing
> > ?  That seems like a bug in the Hurd library code.
>
> The problem is that WCONTINUED is not defined in /usr/include/i386-
> gnu/bits/waitflags.h on Hurd. Only WNOHANG and WUNTRACED are. That causes
> WIFCONTINUED not to be defined in /usr/include/i386-gnu/bits/waitstatus.h. As
> WCONTINUED is not defined, I assume that WIFCONTINUED is not supported.
>
> From waitpid(2):
> WCONTINUED (since Linux 2.6.10)
>also return if a stopped child has been resumed by delivery of SIGCONT.
>
> @Samuel: more info?
>
> I think that that call to WIFCONTINUED in libgo/go/syscall/wait.c
> _Bool
> Continued (uint32_t *w)
> {
>   return WIFCONTINUED (*w) != 0;
> }
>
> has to be omitted somehow for Hurd.

It sound like the right fix is to use #ifdef WIFCONTINUED in
syscall/wait.c.  If WIFCONTINUED is not defined, the Continued
function should always return 0.

Ian

Re: [PATCH] Updated patches for the port of gccgo to GNU/Hurd

2019-02-11 Thread Samuel Thibault

Svante Signell, le lun. 11 févr. 2019 12:10:21 +0100, a ecrit:
> WCONTINUED is not defined, I assume that WIFCONTINUED is not supported.
> 
> From waitpid(2):
> WCONTINUED (since Linux 2.6.10)
>also return if a stopped child has been resumed by delivery of SIGCONT.
> 
> @Samuel: more info?

git grep WCONTINUED .
yields nothing in hurd/proc, so it's probably just not supported yet
indeed.

> > > The third patch, src_libgo_testsuite_gotest.diff, is not strictly needed,
> > > but running the tests the annoying text is displayed: "ps: comm: Unknown
> > > format spec"
> > 
> > I get that "comm" doesn't work, but the change in that patch is simply
> > incorrect.  If you don't pass "comm", the "grep sleep" will never
> > succeed.  If there is no way to support this code on Hurd then we
> > should skip it, not put in a command that can never work.
> 
> OK, let's drop that part then.
> 
> @Samuel: more info?

arg0 can be used instead.

That said, we should implement comm since it's defined by posix, it's
probably a matter of duplicating the line "Arg0" in hurd/libps/spec.c

Samuel

Re: C++ PATCH for c++/89217 - ICE with list-initialization in range-based for loop

2019-02-11 Thread Jason Merrill


On 2/7/19 6:02 PM, Marek Polacek wrote:

Since r268321 we can call digest_init even in a template, when the compound
literal isn't instantiation-dependent.


Right.  And since digest_init modifies the CONSTRUCTOR in place, that 
means the template trees are digested rather than the original parse 
trees that we try to use.  If we're going to use digest_init, we should 
probably save another CONSTRUCTOR with the original trees.


Jason

Re: [Patch] PR rtl-optimization/87763 - generate more bfi instructions on aarch64

2019-02-11 Thread Steve Ellcey

On Thu, 2019-02-07 at 18:13 +, Wilco Dijkstra wrote:
> External Email
> 
> Hi Steve,
> 
> > > After special cases you could do something like t = mask2 +
> > > (HWI_1U << shift);
> > > return t == (t & -t) to check for a valid bfi.
> > 
> > I am not sure I follow this logic and my attempts to use this did not
> > work so I kept my original code.
> 
> It's similar to the initial code in aarch64_bitmask_imm, but rather than 
> adding
> the lowest bit to the value to verify it is a mask (val + (val & -val)), we 
> use the
> shift instead. If the shift is exactly right, it reaches the first
> set bit of the mask.
> Adding the low bit to a valid mask always results in zero or a single set bit.
> The standard idiom to check that is t == (t & -t).
> 
> > > +  "bfi\t%0, %1, 0, %P2"
> > > 
> > > This could emit a width that may be 32 too large in SImode if bit 31 is 
> > > set
> > > (there is another use of %P in aarch64.md which may have the same
> > > issue).
> > 
> > I am not sure why having bit 31 set would be a problem.  Sign
> > extension?
> 
> Yes, if bit 31 is set, %P will emit 33 for a 32-bit constant which is 
> obviously wrong.
> Your patch avoids this for bfi by explicitly computing the correct value.
> 
> This looks good to me (and creates useful bfi's as expected), but I
> can't approve.
> 
> Wilco

Thanks for looking this over.  I have updated the mask check to use
your method and retested to make sure it still works.  Can one of the
aarch64 maintainers approve this patch?

2018-02-11  Steve Ellcey  

PR rtl-optimization/87763
* config/aarch64/aarch64-protos.h (aarch64_masks_and_shift_for_bfi_p):
New prototype.
* config/aarch64/aarch64.c (aarch64_masks_and_shift_for_bfi_p):
New function.
* config/aarch64/aarch64.md (*aarch64_bfi5_shift):
New instruction.
(*aarch64_bfi4_noand): Ditto.
(*aarch64_bfi4_noshift): Ditto.
(*aarch64_bfi4_noshift_alt): Ditto.

2018-02-11  Steve Ellcey  

PR rtl-optimization/87763
* gcc.target/aarch64/combine_bfxil.c: Change some bfxil checks
to bfi.
* gcc.target/aarch64/combine_bfi_2.c: New test.


diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index b035e35f33b..b6c0d0a8eb6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -429,6 +429,9 @@ bool aarch64_label_mentioned_p (rtx);
 void aarch64_declare_function_name (FILE *, const char*, tree);
 bool aarch64_legitimate_pic_operand_p (rtx);
 bool aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode, rtx, rtx);
+bool aarch64_masks_and_shift_for_bfi_p (scalar_int_mode, unsigned HOST_WIDE_INT,
+	unsigned HOST_WIDE_INT,
+	unsigned HOST_WIDE_INT);
 bool aarch64_zero_extend_const_eq (machine_mode, rtx, machine_mode, rtx);
 bool aarch64_move_imm (HOST_WIDE_INT, machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7c453cdad0..a7ef952ad1b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -9330,6 +9330,35 @@ aarch64_mask_and_shift_for_ubfiz_p (scalar_int_mode mode, rtx mask,
 	 & ((HOST_WIDE_INT_1U << INTVAL (shft_amnt)) - 1)) == 0;
 }
 
+/* Return true if the masks and a shift amount from an RTX of the form
+   ((x & MASK1) | ((y << SHIFT_AMNT) & MASK2)) are valid to combine into
+   a BFI instruction of mode MODE.  See *arch64_bfi patterns.  */
+
+bool
+aarch64_masks_and_shift_for_bfi_p (scalar_int_mode mode,
+   unsigned HOST_WIDE_INT mask1,
+   unsigned HOST_WIDE_INT shft_amnt,
+   unsigned HOST_WIDE_INT mask2)
+{
+  unsigned HOST_WIDE_INT t;
+
+  /* Verify that there is no overlap in what bits are set in the two masks.  */
+  if (mask1 != ~mask2)
+return false;
+
+  /* Verify that mask2 is not all zeros or ones.  */
+  if (mask2 == 0 || mask2 == HOST_WIDE_INT_M1U)
+return false;
+
+  /* The shift amount should always be less than the mode size.  */
+  gcc_assert (shft_amnt < GET_MODE_BITSIZE (mode));
+
+  /* Verify that the mask being shifted is contiguous and would be in the
+ least significant bits after shifting by shft_amnt.  */
+  t = mask2 + (HOST_WIDE_INT_1U << shft_amnt);
+  return (t == (t & -t));
+}
+
 /* Calculate the cost of calculating X, storing it in *COST.  Result
is true if the total cost of the operation has now been calculated.  */
 static bool
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b7f6fe0f135..2bbd3f1055c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -5476,6 +5476,76 @@
   [(set_attr "type" "bfm")]
 )
 
+;;  Match a bfi instruction where the shift of OP3 means that we are
+;;  actually copying the least significant bits of OP3 into OP0 by way
+;;  of the AND masks and the IOR instruction.  A similar instruction
+;;  with the two parts of the IOR swapped around was never trigger

Re: [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-11 Thread H.J. Lu

On Sun, Feb 10, 2019 at 2:48 AM Uros Bizjak  wrote:
>
> On 2/10/19, H.J. Lu  wrote:
> > Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.
> >
> >   PR target/89021
> >   * config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
> >   (sse_cvttps2pi): Likewise.
>
> It looks to me that this description is wrong. We don't have V4SF
> modes here, but V2SF, so we have to fake 64bit load in case of MMX.
> The cvtps2dq will access memory in true 128bit width, so this is
> wrong.
>
> We have to fix the description to not fake wide mode.
>

What do you propose to implement

__m64 _mm_cvtps_pi32 (__m128 __A);

We also have

(define_insn "sse2_cvtps2pd"
  [(set (match_operand:V2DF 0 "register_operand" "=v")
(float_extend:V2DF
  (vec_select:V2SF
(match_operand:V4SF 1 "vector_operand" "vm")
(parallel [(const_int 0) (const_int 1)]]
  "TARGET_SSE2 && "
  "%vcvtps2pd\t{%1, %0|%0, %q1}"

These aren't new problems introduced by my MMX work.

-- 
H.J.

[PATCH] fix ICE in __builtin_has_attribute (PR 88383 and 89288)

2019-02-11 Thread Martin Sebor


This is a repost of a patch for PR 88383 updated to also fix the just
reported PR 89288 (the original patch only partially handles this case).
The review of the first patch was derailed by questions about the design
of the built-in so the fix for the ICE was never approved.  I think
the ICEs should be fixed for GCC 9 and any open design questions should
be dealt with independently.

Martin

The patch for PR 88383 was originally posted last December:
  https://gcc.gnu.org/ml/gcc-patches/2018-12/msg00337.html
PR c/88383 - ICE calling __builtin_has_attribute on a reference
PR c/89288 - ICE in tree_code_size, at tree.c:865

gcc/c-family/ChangeLog:

	PR c/88383
	PR c/89288
	* c-attribs.c (validate_attribute): Handle expressions.
	(has_attribute): Handle types referenced by expressions.
	Avoid considering array attributes in ARRAY_REF expressions .

gcc/cp/ChangeLog:

	PR c/88383
	PR c/89288
	* parser.c (cp_parser_has_attribute_expression): Handle assignment
	expressions.

gcc/testsuite/ChangeLog:

	PR c/88383
	PR c/89288
	* c-c++-common/builtin-has-attribute-4.c: Adjust expectations.
	* c-c++-common/builtin-has-attribute-6.c: New test.

Index: gcc/c-family/c-attribs.c
===
--- gcc/c-family/c-attribs.c	(revision 268774)
+++ gcc/c-family/c-attribs.c	(working copy)
@@ -4032,8 +4032,12 @@ validate_attribute (location_t atloc, tree oper, t
 
   if (TYPE_P (oper))
 tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, oper);
+  else if (DECL_P (oper))
+tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
+  else if (EXPR_P (oper))
+tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, TREE_TYPE (oper));
   else
-tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
+return false;
 
   /* Temporarily clear CURRENT_FUNCTION_DECL to make decl_attributes
  believe the DECL declared above is at file scope.  (See bug 87526.)  */
@@ -4042,7 +4046,7 @@ validate_attribute (location_t atloc, tree oper, t
   if (DECL_P (tmpdecl))
 {
   if (DECL_P (oper))
-	/* An alias cannot be a defintion so declare the symbol extern.  */
+	/* An alias cannot be a definition so declare the symbol extern.  */
 	DECL_EXTERNAL (tmpdecl) = true;
   /* Attribute visibility only applies to symbols visible from other
 	 translation units so make it "public."   */
@@ -4078,11 +4082,17 @@ has_attribute (location_t atloc, tree t, tree attr
   do
 	{
 	  /* Determine the array element/member declaration from
-	 an ARRAY/COMPONENT_REF.  */
+	 a COMPONENT_REF and an INDIRECT_REF involving a refeence.  */
 	  STRIP_NOPS (t);
 	  tree_code code = TREE_CODE (t);
-	  if (code == ARRAY_REF)
-	t = TREE_OPERAND (t, 0);
+	  if (code == INDIRECT_REF)
+	{
+	  tree op0 = TREE_OPERAND (t, 0);
+	  if (TREE_CODE (TREE_TYPE (op0)) == REFERENCE_TYPE)
+		t = op0;
+	  else
+		break;
+	}
 	  else if (code == COMPONENT_REF)
 	t = TREE_OPERAND (t, 1);
 	  else
@@ -4133,7 +4143,8 @@ has_attribute (location_t atloc, tree t, tree attr
 	}
   else
 	{
-	  atlist = TYPE_ATTRIBUTES (TREE_TYPE (expr));
+	  type = TREE_TYPE (expr);
+	  atlist = TYPE_ATTRIBUTES (type);
 	  done = true;
 	}
 
Index: gcc/cp/parser.c
===
--- gcc/cp/parser.c	(revision 268774)
+++ gcc/cp/parser.c	(working copy)
@@ -8542,9 +8542,9 @@ cp_parser_has_attribute_expression (cp_parser *par
   cp_parser_parse_definitely (parser);
 
   /* If the type-id production did not work out, then we must be
- looking at the unary-expression production.  */
+ looking at an expression.  */
   if (!oper || oper == error_mark_node)
-oper = cp_parser_unary_expression (parser);
+oper = cp_parser_assignment_expression (parser);
 
   STRIP_ANY_LOCATION_WRAPPER (oper);
 
Index: gcc/testsuite/c-c++-common/builtin-has-attribute-4.c
===
--- gcc/testsuite/c-c++-common/builtin-has-attribute-4.c	(revision 268774)
+++ gcc/testsuite/c-c++-common/builtin-has-attribute-4.c	(working copy)
@@ -154,7 +154,8 @@ void test_packed (struct PackedMember *p)
   A (0, gpak[0].c, packed);
   A (0, gpak[1].s, packed);
   A (1, gpak->a, packed);
-  A (1, (*gpak).a[0], packed);
+  /* It's the array that's declared packed but not its elements.  */
+  A (0, (*gpak).a[0], packed);
 
   /* The following fails because in C it's represented as
INDIRECT_REF (POINTER_PLUS (NOP_EXPR (ADDR_EXPR (gpak)), ...))
@@ -164,7 +165,8 @@ void test_packed (struct PackedMember *p)
   A (0, p->c, packed);
   A (0, p->s, packed);
   A (1, p->a, packed);
-  A (1, p->a[0], packed);
+  /* It's the array that's declared packed but not its elements.  */
+  A (0, p->a[0], packed);
   /* Similar to the comment above.
A (1, *p->a, packed);  */
 }
Index: gcc/testsuite/c-c++-common/builtin-has-attribute-6.c
==

Re: C++ PATCH for c++/89212 - ICE converting nullptr to pointer-to-member-function

2019-02-11 Thread Marek Polacek

On Fri, Feb 08, 2019 at 05:37:00PM -0500, Jason Merrill wrote:
> On 2/8/19 12:21 PM, Marek Polacek wrote:
> > r256999 removed early bailout for pointer-to-member-function types, so we
> > now try to tsubst each element of a pointer-to-member-function CONSTRUCTOR.
> > 
> > That's fine but the problem here is that we end up converting a null pointer
> > to pointer-to-member-function type and that crashes in fold_convert:
> > 
> > 16035 case INTEGER_CST:
> > 16039   {
> > 16040 /* Instantiate any typedefs in the type.  */
> > 16041 tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
> > 16042 r = fold_convert (type, t);
> > 
> > It seems obvious to use cp_fold_convert which handles TYPE_PTRMEM_P, but
> > that then ICEs too, the infamous "canonical types differ for identical 
> > types":
> > type is
> > 
> > struct
> > {
> >void A:: (struct A *) * __pfn;
> >long int __delta;
> > }
> > 
> > and the type of "0" is "void A:: (struct A *) *".  These types are
> > structurally equivalent but have different canonical types.  (What's up
> > with that, anyway?  It seems OK that the canonical type of the struct is
> > the struct itself and that the canonical type of the pointer is the pointer
> > itself.)
> > 
> > That could be handled in cp_fold_convert: add code to convert an 
> > integer_zerop to
> > TYPE_PTRMEMFUNC_P.  Unfortunately the 0 is not null_ptr_cst_p because it's 
> > got
> > a pointer type.
> > 
> > Or just don't bother substituting null_member_pointer_value_p and avoid the
> > above.
> > 
> > Bootstrapped/regtested on x86_64-linux, ok for trunk and 8?
> > 
> > 2019-02-08  Marek Polacek  
> > 
> > PR c++/89212 - ICE converting nullptr to pointer-to-member-function.
> > * pt.c (tsubst_copy_and_build) : Return early for
> > null member pointer value.
> > 
> > * g++.dg/cpp0x/nullptr40.C: New test.
> > 
> > diff --git gcc/cp/pt.c gcc/cp/pt.c
> > index b8fbf4046f0..acc2d8f1feb 100644
> > --- gcc/cp/pt.c
> > +++ gcc/cp/pt.c
> > @@ -19251,6 +19251,9 @@ tsubst_copy_and_build (tree t,
> >looked up by digest_init.  */
> > process_index_p = !(type && MAYBE_CLASS_TYPE_P (type));
> > +   if (null_member_pointer_value_p (t))
> > + RETURN (t);
> 
> I would expect this to do the wrong thing if type is different from
> TREE_TYPE (t).  Can we get here for a dependent PMF type like T (A::*)()?
> If not, let's assert that they're the same.  Otherwise, maybe cp_convert
> (type, nullptr_node)?

Yup, that's a concern.  But I'm not seeing any ICEs with the assert added and a
dependent PMF as in the new testcase.  And it seems we get a conversion error
if the types of the PMFs don't match.  If I'm wrong, this would be easy to
fix anyway.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2019-02-11  Marek Polacek  

PR c++/89212 - ICE converting nullptr to pointer-to-member-function.
* pt.c (tsubst_copy_and_build) : Return early for
null member pointer value.

* g++.dg/cpp0x/nullptr40.C: New test.
* g++.dg/cpp0x/nullptr41.C: New test.

diff --git gcc/cp/pt.c gcc/cp/pt.c
index b8fbf4046f0..2682c68dcfa 100644
--- gcc/cp/pt.c
+++ gcc/cp/pt.c
@@ -19251,6 +19251,12 @@ tsubst_copy_and_build (tree t,
   looked up by digest_init.  */
process_index_p = !(type && MAYBE_CLASS_TYPE_P (type));
 
+   if (null_member_pointer_value_p (t))
+ {
+   gcc_assert (same_type_p (type, TREE_TYPE (t)));
+   RETURN (t);
+ }
+
n = vec_safe_copy (CONSTRUCTOR_ELTS (t));
 newlen = vec_safe_length (n);
FOR_EACH_VEC_SAFE_ELT (n, idx, ce)
diff --git gcc/testsuite/g++.dg/cpp0x/nullptr40.C 
gcc/testsuite/g++.dg/cpp0x/nullptr40.C
new file mode 100644
index 000..21c188bdb5e
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/nullptr40.C
@@ -0,0 +1,19 @@
+// PR c++/89212
+// { dg-do compile { target c++11 } }
+
+template  using enable_if_t = int;
+
+template
+struct p
+{
+template>
+p(T) { }
+p() = default;
+};
+
+struct A
+{
+p i = 1;
+void bar();
+p j;
+};
diff --git gcc/testsuite/g++.dg/cpp0x/nullptr41.C 
gcc/testsuite/g++.dg/cpp0x/nullptr41.C
new file mode 100644
index 000..54e66af2095
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp0x/nullptr41.C
@@ -0,0 +1,19 @@
+// PR c++/89212
+// { dg-do compile { target c++11 } }
+
+template  using enable_if_t = int;
+
+template
+struct p
+{
+template>
+p(T) { }
+p() = default;
+};
+
+struct A
+{
+p i = 1;
+void bar();
+p j;
+};

Trivial C++ PATCH to remove commented code

2019-02-11 Thread Marek Polacek

This I don't like.

Obvious, but ok?

2019-02-11  Marek Polacek  

* typeck2.c (digest_init_r): Remove commented code.

--- gcc/cp/typeck2.c
+++ gcc/cp/typeck2.c
@@ -1099,7 +1099,6 @@ digest_init_r (tree type, tree init, int nested, int 
flags,
 
   tree typ1 = TYPE_MAIN_VARIANT (TREE_TYPE (type));
   if (char_type_p (typ1)
- /*&& init */
  && TREE_CODE (stripped_init) == STRING_CST)
{
  tree char_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (init)));

Re: [PATCH] fix ICE in __builtin_has_attribute (PR 88383 and 89288)

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 12:20:42PM -0700, Martin Sebor wrote:
> This is a repost of a patch for PR 88383 updated to also fix the just
> reported PR 89288 (the original patch only partially handles this case).
> The review of the first patch was derailed by questions about the design
> of the built-in so the fix for the ICE was never approved.  I think
> the ICEs should be fixed for GCC 9 and any open design questions should
> be dealt with independently.

Well, it is closely coupled with the design questions.

>if (TYPE_P (oper))
>  tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, oper);
> +  else if (DECL_P (oper))
> +tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
> +  else if (EXPR_P (oper))
> +tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, TREE_TYPE (oper));
>else
> -tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
> +return false;

The EXPR_P conditional makes no sense.  Why should __builtin_has_attribute (1 + 
1, ...)
do something (if unfolded yet) and __builtin_has_attribute (2, ...)
something different?  1 + 1 when unfolded is EXPR_P, but 2 is not.

Jakub

Re: [PATCH 13/43] i386: Emulate MMX pshufw with SSE

2019-02-11 Thread Uros Bizjak

On Mon, Feb 11, 2019 at 7:09 PM H.J. Lu  wrote:
>
> On Sun, Feb 10, 2019 at 3:16 AM Uros Bizjak  wrote:
> >
> > On 2/10/19, H.J. Lu  wrote:
> > > Emulate MMX pshufw with SSE.  Only SSE register source operand is allowed.
> > >
> > >   PR target/89021
> > >   * config/i386/mmx.md (mmx_pshufw_1): Add SSE emulation.
> > >   (*vec_dupv4hi): Likewise.
> > >   emulation.
> > > ---
> > >  gcc/config/i386/mmx.md | 33 +
> > >  1 file changed, 21 insertions(+), 12 deletions(-)
> > >
> > > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > > index 1ee51c5deb7..dc81d7f45df 100644
> > > --- a/gcc/config/i386/mmx.md
> > > +++ b/gcc/config/i386/mmx.md
> > > @@ -1364,7 +1364,8 @@
> > >[(match_operand:V4HI 0 "register_operand")
> > > (match_operand:V4HI 1 "nonimmediate_operand")
> > > (match_operand:SI 2 "const_int_operand")]
> > > -  "TARGET_SSE || TARGET_3DNOW_A"
> > > +  "((TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE)
> > > +   || TARGET_3DNOW_A"
> >
> > I think that the above condition should read
> >
> > (TARGET_MMX || TARGET_MMX_WITH_SSE) && (TARGET_SSE || TARGET_3DNOW_A)
> >
> > and with TARGET_MMX_WITH_SSE (which implies SSE2) we always use XMM
> > registers. Without SSE2, we use MMX registers, as before.
>
> Done.
>
> > >  {
> > >int mask = INTVAL (operands[2]);
> > >emit_insn (gen_mmx_pshufw_1 (operands[0], operands[1],
> > > @@ -1376,14 +1377,15 @@
> > >  })
> > >
> > >  (define_insn "mmx_pshufw_1"
> > > -  [(set (match_operand:V4HI 0 "register_operand" "=y")
> > > +  [(set (match_operand:V4HI 0 "register_operand" "=y,Yv")
> > >  (vec_select:V4HI
> > > -  (match_operand:V4HI 1 "nonimmediate_operand" "ym")
> > > +  (match_operand:V4HI 1 "nonimmediate_operand" "ym,Yv")
> > >(parallel [(match_operand 2 "const_0_to_3_operand")
> > >   (match_operand 3 "const_0_to_3_operand")
> > >   (match_operand 4 "const_0_to_3_operand")
> > >   (match_operand 5 "const_0_to_3_operand")])))]
> > > -  "TARGET_SSE || TARGET_3DNOW_A"
> > > +  "((TARGET_MMX || TARGET_MMX_WITH_SSE) && TARGET_SSE)
> > > +   || TARGET_3DNOW_A"
> > >  {
> > >int mask = 0;
> > >mask |= INTVAL (operands[2]) << 0;
> > > @@ -1392,11 +1394,15 @@
> > >mask |= INTVAL (operands[5]) << 6;
> > >operands[2] = GEN_INT (mask);
> > >
> > > -  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
> > > +  if (TARGET_MMX_WITH_SSE)
> > > +return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
> > > +  else
> > > +return "pshufw\t{%2, %1, %0|%0, %1, %2}";
> >
> > The above should be implemented as multi-output template.
>
> I have
>
> {
>   int mask = 0;
>   mask |= INTVAL (operands[2]) << 0;
>   mask |= INTVAL (operands[3]) << 2;
>   mask |= INTVAL (operands[4]) << 4;
>   mask |= INTVAL (operands[5]) << 6;
>   operands[2] = GEN_INT (mask);
>
>   if (TARGET_MMX_WITH_SSE)
> return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
>   else
> return "pshufw\t{%2, %1, %0|%0, %1, %2}";
> }
>
> How can I build mask before multi-output template?

You are right, mask has to be adjusted before output.

Maybe we should be more explicit here with:

  switch (which_alternative)
{
case 0:
  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
case 1:
  return "pshufw\t{%2, %1, %0|%0, %1, %2}";
default:
  gcc_unreachable ();
}

Uros.

Re: [PATCH][ARM] Fix PR89222

2019-02-11 Thread Wilco Dijkstra

Hi Alexander,

> Just to be sure the issue is analyzed properly: if it's certain that this 
> usage
> is not allowed, shouldn't the linker produce a diagnostic instead of silently
> concealing the issue?

The ABI doesn't require this but yes a linker could report a warning if the
addend of a function symbol has bit 0 set.

> With Gold linker this is handled correctly.  So it looks to me like a
> bug in BFD linker, where it ignores any addend (not just +1/-1) when
> resolving a relocation against a Thumb function.

If the Gold linker doesn't fail that means Gold has a serious bug in the way
it handles Thumb relocations. Can you elaborate, does it do S+A+1 rather than
(S+A) | 1 as the ARM-ELF spec requires?

Cheers,
Wilco

Re: Trivial C++ PATCH to remove commented code

2019-02-11 Thread Jason Merrill


OK.

On 2/11/19 2:27 PM, Marek Polacek wrote:

This I don't like.

Obvious, but ok?

2019-02-11  Marek Polacek  

* typeck2.c (digest_init_r): Remove commented code.

--- gcc/cp/typeck2.c
+++ gcc/cp/typeck2.c
@@ -1099,7 +1099,6 @@ digest_init_r (tree type, tree init, int nested, int 
flags,
  
tree typ1 = TYPE_MAIN_VARIANT (TREE_TYPE (type));

if (char_type_p (typ1)
- /*&& init */
  && TREE_CODE (stripped_init) == STRING_CST)
{
  tree char_type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (init)));

Re: [PATCH 14/43] i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE

2019-02-11 Thread Uros Bizjak

On Mon, Feb 11, 2019 at 8:08 PM H.J. Lu  wrote:
>
> On Sun, Feb 10, 2019 at 2:48 AM Uros Bizjak  wrote:
> >
> > On 2/10/19, H.J. Lu  wrote:
> > > Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE.
> > >
> > >   PR target/89021
> > >   * config/i386/mmx.md (sse_cvtps2pi): Add SSE emulation.
> > >   (sse_cvttps2pi): Likewise.
> >
> > It looks to me that this description is wrong. We don't have V4SF
> > modes here, but V2SF, so we have to fake 64bit load in case of MMX.
> > The cvtps2dq will access memory in true 128bit width, so this is
> > wrong.
> >
> > We have to fix the description to not fake wide mode.
> >
>
> What do you propose to implement
>
> __m64 _mm_cvtps_pi32 (__m128 __A);

Hm...

In your original patch, we *do* have V4SF memory access, but the
original insn accesses it in __m64 mode. This should be OK, but then
accessing this memory in __m128 mode should also be OK. So, on a more
detailed look, the original patch looks OK to me. Luckily, a false
alarm...

>
> We also have
>
> (define_insn "sse2_cvtps2pd"
>   [(set (match_operand:V2DF 0 "register_operand" "=v")
> (float_extend:V2DF
>   (vec_select:V2SF
> (match_operand:V4SF 1 "vector_operand" "vm")
> (parallel [(const_int 0) (const_int 1)]]
>   "TARGET_SSE2 && "
>   "%vcvtps2pd\t{%1, %0|%0, %q1}"
>
> These aren't new problems introduced by my MMX work.

This one is not problematic, since the instruction accesses memory in
__m64 mode, which is narrower that V4SFmode.

Uros.

Re: C++ PATCH for c++/89212 - ICE converting nullptr to pointer-to-member-function

2019-02-11 Thread Jason Merrill


On 2/11/19 2:21 PM, Marek Polacek wrote:

On Fri, Feb 08, 2019 at 05:37:00PM -0500, Jason Merrill wrote:

On 2/8/19 12:21 PM, Marek Polacek wrote:

r256999 removed early bailout for pointer-to-member-function types, so we
now try to tsubst each element of a pointer-to-member-function CONSTRUCTOR.

That's fine but the problem here is that we end up converting a null pointer
to pointer-to-member-function type and that crashes in fold_convert:

16035 case INTEGER_CST:
16039   {
16040 /* Instantiate any typedefs in the type.  */
16041 tree type = tsubst (TREE_TYPE (t), args, complain, in_decl);
16042 r = fold_convert (type, t);

It seems obvious to use cp_fold_convert which handles TYPE_PTRMEM_P, but
that then ICEs too, the infamous "canonical types differ for identical types":
type is

struct
{
void A:: (struct A *) * __pfn;
long int __delta;
}

and the type of "0" is "void A:: (struct A *) *".  These types are
structurally equivalent but have different canonical types.  (What's up
with that, anyway?  It seems OK that the canonical type of the struct is
the struct itself and that the canonical type of the pointer is the pointer
itself.)

That could be handled in cp_fold_convert: add code to convert an integer_zerop 
to
TYPE_PTRMEMFUNC_P.  Unfortunately the 0 is not null_ptr_cst_p because it's got
a pointer type.

Or just don't bother substituting null_member_pointer_value_p and avoid the
above.

Bootstrapped/regtested on x86_64-linux, ok for trunk and 8?

2019-02-08  Marek Polacek  

PR c++/89212 - ICE converting nullptr to pointer-to-member-function.
* pt.c (tsubst_copy_and_build) : Return early for
null member pointer value.

* g++.dg/cpp0x/nullptr40.C: New test.

diff --git gcc/cp/pt.c gcc/cp/pt.c
index b8fbf4046f0..acc2d8f1feb 100644
--- gcc/cp/pt.c
+++ gcc/cp/pt.c
@@ -19251,6 +19251,9 @@ tsubst_copy_and_build (tree t,
   looked up by digest_init.  */
process_index_p = !(type && MAYBE_CLASS_TYPE_P (type));
+   if (null_member_pointer_value_p (t))
+ RETURN (t);


I would expect this to do the wrong thing if type is different from
TREE_TYPE (t).  Can we get here for a dependent PMF type like T (A::*)()?
If not, let's assert that they're the same.  Otherwise, maybe cp_convert
(type, nullptr_node)?


Yup, that's a concern.  But I'm not seeing any ICEs with the assert added and a
dependent PMF as in the new testcase.  And it seems we get a conversion error
if the types of the PMFs don't match.  If I'm wrong, this would be easy to
fix anyway.

Bootstrapped/regtested on x86_64-linux, ok for trunk?


OK.

Jason

Re: [C++ PATCH] Fix std::is_constant_evaluated() in non-type template parameters (PR c++/88977)

2019-02-11 Thread Jason Merrill


On 2/8/19 6:18 PM, Jakub Jelinek wrote:

Hi!

Non-type template arguments are constant-expression in the grammar and thus
manifestly constant-evaluated.
For e.g. class templates, convert_nontype_argument is called with
tf_warning_or_error and so while we called in the below spots
maybe_constant_value without manifestly_const_eval=true, there is a
   if (TREE_CODE (expr) != INTEGER_CST
   && !value_dependent_expression_p (expr))
 {
   if (complain & tf_error)
 {
   int errs = errorcount, warns = warningcount + werrorcount;
   if (!require_potential_constant_expression (expr))
 expr = error_mark_node;
   else
 expr = cxx_constant_value (expr);
later on and cxx_constant_value will do the manifestly_const_eval=true.
On the testcase below with function template, complain is tf_none and
so we only call that maybe_constant_value and not cxx_constant_value.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?


OK.

Re: [PATCH] fix ICE in __builtin_has_attribute (PR 88383 and 89288)

2019-02-11 Thread Jakub Jelinek

On Mon, Feb 11, 2019 at 12:20:42PM -0700, Martin Sebor wrote:
> --- gcc/c-family/c-attribs.c  (revision 268774)
> +++ gcc/c-family/c-attribs.c  (working copy)
> @@ -4032,8 +4032,12 @@ validate_attribute (location_t atloc, tree oper, t
>  
>if (TYPE_P (oper))
>  tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, oper);
> +  else if (DECL_P (oper))
> +tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
> +  else if (EXPR_P (oper))
> +tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, TREE_TYPE (oper));
>else
> -tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
> +return false;
>  
>/* Temporarily clear CURRENT_FUNCTION_DECL to make decl_attributes
>   believe the DECL declared above is at file scope.  (See bug 87526.)  */

Why do you this kind of validation at all?  You do compare the arguments
later on in the caller, why isn't that sufficient?  Creating some decl (and
ignoring for that whether the attribute is decl_required, type_required and/or
function_type_required) is a sure way to get many warnings, even if the
arguments are reasonable.

Jakub

Re: [PATCH][ARM] Fix PR89222

2019-02-11 Thread Alexander Monakov

On Mon, 11 Feb 2019, Wilco Dijkstra wrote:
> > With Gold linker this is handled correctly.  So it looks to me like a
> > bug in BFD linker, where it ignores any addend (not just +1/-1) when
> > resolving a relocation against a Thumb function.
> 
> If the Gold linker doesn't fail that means Gold has a serious bug in the way
> it handles Thumb relocations. Can you elaborate, does it do S+A+1 rather than
> (S+A) | 1 as the ARM-ELF spec requires?

Apologies - it appears I might have mistyped something, as re-trying my tests
shows that both linkers properly implement the required '(S+A) | 1'.  I can't
reproduce any linker bug I suspected.

It seems odd to me that the spec requires '(S+A) | T' instead of the (imho
more intuitive) '(S|T) + A', but apart from the missing diagnostic from the
linkers, it seems they do as they must and GCC was at fault.

(perhaps it's okay to allow addends with low bit zero though, instead of
allowing only zero addends as your patch does?)

Thanks for clearing this up!
Alexander

Re: [PATCH] fix ICE in __builtin_has_attribute (PR 88383 and 89288)

2019-02-11 Thread Martin Sebor


On 2/11/19 1:23 PM, Jakub Jelinek wrote:

On Mon, Feb 11, 2019 at 12:20:42PM -0700, Martin Sebor wrote:

--- gcc/c-family/c-attribs.c(revision 268774)
+++ gcc/c-family/c-attribs.c(working copy)
@@ -4032,8 +4032,12 @@ validate_attribute (location_t atloc, tree oper, t
  
if (TYPE_P (oper))

  tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, oper);
+  else if (DECL_P (oper))
+tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
+  else if (EXPR_P (oper))
+tmpdecl = build_decl (atloc, TYPE_DECL, tmpid, TREE_TYPE (oper));
else
-tmpdecl = build_decl (atloc, TREE_CODE (oper), tmpid, TREE_TYPE (oper));
+return false;
  
/* Temporarily clear CURRENT_FUNCTION_DECL to make decl_attributes

   believe the DECL declared above is at file scope.  (See bug 87526.)  */


Why do you this kind of validation at all?  You do compare the arguments
later on in the caller, why isn't that sufficient?  Creating some decl (and
ignoring for that whether the attribute is decl_required, type_required and/or
function_type_required) is a sure way to get many warnings, even if the
arguments are reasonable.



The snippet above deals with validating an attribute with one or
more arguments in the context where it's being queried, to make
sure the whole attribute specification makes any sense at all.
It's meant to catch gross mistakes like

  __builtin_has_attribute (func, alloc_size ("1"));

but it's far from perfect.

The difference in the example you asked about in your other mail
can be seen here:

  const int i = 1;

  enum {
e = __builtin_has_attribute (1 + 0, alloc_size ("foo")),
f = __builtin_has_attribute (i + 1, alloc_size ("bar"))
  };

Because the EPPR_P(oper) test fails for the first enumerator
the first invalid attribute specification is not diagnosed.  But
because it succeeds for (i + 1), the second specification triggers
a warning about alloc_size being only applicable to function types.
I suppose this could improved by handling constants the same as
expressions.

But this is far from the only limitation.  Attributes with no
arguments don't even make it this far.  Because the validation
depends on decl_attributes to detect these mistakes and that
function doesn't distinguish success from failure, the validation
succeeds even for non-sensical attributes.  But it should never
cause the built-in to give a false positive.  There are at least
a couple of FIXMEs in the code where I would like to improve
the validation in GCC 10.  But none of them is inherent in
the design of the built-in or serious enough to seriously
compromise its usefulness.

Martin

Re: [PATCH][ARM] Fix PR89222

2019-02-11 Thread Ramana Radhakrishnan

On Mon, Feb 11, 2019 at 5:35 PM Wilco Dijkstra  wrote:
>
> The GCC optimizer can generate symbols with non-zero offset from simple
> if-statements. Bit zero is used for the Arm/Thumb state bit, so relocations
> with offsets fail if it changes bit zero and the relocation forces bit zero
> to true.  The fix is to disable offsets on function pointer symbols.
>
> ARMv5te bootstrap OK, regression tests pass. OK for commit?

Interesting bug. armv5te-linux bootstrap ? Can you share your --target
and --with-arch flags ?

>
> ChangeLog:
> 2019-02-06  Wilco Dijkstra  
>
> gcc/
> PR target/89222
> * config/arm/arm.md (movsi): Use arm_cannot_force_const_mem
> to decide when to split off an offset from a symbol.
> * config/arm/arm.c (arm_cannot_force_const_mem): Disallow offsets
> in function symbols.
> * config/arm/arm-protos.h (arm_cannot_force_const_mem): Add.
>
> testsuite/
> PR target/89222
> * gcc.target/arm/pr89222.c: Add new test.
>
> --
> diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
> index 
> 79ede0db174fcce87abe8b4d18893550d4c7e2f6..0bedbe5110853617ecf7456bbaa56b1405fb65dd
>  100644
> --- a/gcc/config/arm/arm-protos.h
> +++ b/gcc/config/arm/arm-protos.h
> @@ -184,6 +184,7 @@ extern void arm_init_cumulative_args (CUMULATIVE_ARGS *, 
> tree, rtx, tree);
>  extern bool arm_pad_reg_upward (machine_mode, tree, int);
>  #endif
>  extern int arm_apply_result_size (void);
> +extern bool arm_cannot_force_const_mem (machine_mode, rtx);
>
>  #endif /* RTX_CODE */
>
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 
> c4c9b4a667100d81d918196713e40b01ee232ee2..ccd4211045066d8edb89dd4c23d554517639f8f6
>  100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -178,7 +178,6 @@ static void arm_internal_label (FILE *, const char *, 
> unsigned long);
>  static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
>  tree);
>  static bool arm_have_conditional_execution (void);
> -static bool arm_cannot_force_const_mem (machine_mode, rtx);
>  static bool arm_legitimate_constant_p (machine_mode, rtx);
>  static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
>  static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
> @@ -8936,15 +8935,20 @@ arm_legitimate_constant_p (machine_mode mode, rtx x)
>
>  /* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
>
> -static bool

Let's keep this static ...

> +bool
>  arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
>  {
>rtx base, offset;
> +  split_const (x, &base, &offset);
>
> -  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
> +  if (GET_CODE (base) == SYMBOL_REF)

Isn't there a SYMBOL_REF_P predicate for this ?

>  {
> -  split_const (x, &base, &offset);
> -  if (GET_CODE (base) == SYMBOL_REF
> +  /* Function symbols cannot have an offset due to the Thumb bit.  */
> +  if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
> + && INTVAL (offset) != 0)
> +   return true;
> +

Can we look to allow anything that is a power of 2 as an offset i.e.
anything with bit 0 set to 0 ? Could you please file an enhancement
request on binutils for both gold and ld to catch the linker warning
case ? I suspect we are looking for addends which have the lower bit
set and function symbols ?


> +  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
>   && !offset_within_block_p (base, INTVAL (offset)))
> return true;
>  }

this looks ok.

> diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
> index 
> aa759624f8f617576773aa75fd6239d6e06e8a13..00fccd964a86dd814f15e4a1fdf5b47173a3ee3f
>  100644
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -5981,17 +5981,13 @@ (define_expand "movsi"
>  }
>  }
>
> -  if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
> +  if (arm_cannot_force_const_mem (SImode, operands[1]))

Firstly (targetm.cannot_force_const_mem (...)) please instead of
arm_cannot_force_const_mem , then that can remain static.  Let's look
to use the targetm interface instead of direct calls here. We weren't
hitting this path for non-vxworks code , however now we do so if
arm_tls_referenced_p is true at the end of arm_cannot_force_const_mem
which means that we could well have a TLS address getting spat out or
am I mis-reading something ?

This is my main concern with this patch ..

>  {
>split_const (operands[1], &base, &offset);

> -  if (GET_CODE (base) == SYMBOL_REF
> - && !offset_within_block_p (base, INTVAL (offset)))
> -   {
> - tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
> - emit_move_insn (tmp, base);
> - emit_insn (gen_addsi3 (operands[0], tmp, offset));
> - DONE;
> -   }
> +  tmp = can_create_pseudo_p () ? gen_reg_rtx (SImode) : operands[0];
> +  emit_move_insn (tmp, base);
> +  emit_insn (gen_addsi3 (oper

Re: arm access to stack slot out of allocated area

2019-02-11 Thread Ramana Radhakrishnan

On Mon, Feb 11, 2019 at 4:48 PM Olivier Hainque  wrote:
>
> Hi Wilco,
>
> > On 8 Feb 2019, at 22:35, Wilco Dijkstra  wrote:
>
> > So I think we need to push much harder on getting rid of obsolete stuff and
> > avoid people encountering these nasty issues.
>
> Numbers I just received indicate that we can legitimately head
> in this direction for VxWorks as well (move towards VxWorks 7 only
> ports, AAPCS based).
>
> Good news :)
>

Yay !

Ramana

Re: [PATCH] Updated patches for the port of gccgo to GNU/Hurd

2019-02-11 Thread Svante Signell

On Mon, 2019-02-11 at 10:27 -0800, Ian Lance Taylor wrote:
> On Mon, Feb 11, 2019 at 3:10 AM Svante Signell 
> wrote:
> > On Sun, 2019-02-10 at 22:08 -0800, Ian Lance Taylor wrote:
> > > On Sun, Feb 10, 2019 at 3:41 AM Svante Signell 
> > > wrote:
> > > > On Sat, 2019-02-09 at 23:57 +0100, Svante Signell wrote:
> > > > > On Sat, 2019-02-09 at 14:40 -0800, Ian Lance Taylor wrote:
> > > > > > On Fri, Feb 8, 2019 at 3:07 PM Matthias Klose 
> > > > > > wrote:
> > > > > > > On 07.02.19 06:04, Ian Lance Taylor wrote:
> > > > > > What are the lines before that in the log?  For some reason libtool
> > > > > > is
> > > > > > being invoke with no source files.  The lines before the failing
> > > > > > line
> > > > > > should show an invocation of match.sh that determines the source
> > > > > > files.
> > > > > 
> > > > > Thanks for your job upstreaming the patches!
> > > > > 
> > > > > I've found some problems. Current problem is with the mksysinfo.sh
> > > > > patch.
> > > > > But there are some other things missing. New patches will be submitted
> > > > > tomorrow.
> > > > 
> > > > Attached are three additional patches needed to build libgo on GNU/Hurd:
> > > > src_libgo_mksysinfo.sh.diff
> > > > src_libgo_go_syscall_wait.c.diff
> > > > src_libgo_testsuite_gotest.diff
> > > > 
> > > > For the first patch, src_libgo_mksysinfo.sh.diff, I had to go back to
> > > > the
> > > > old version, using sed -i -e. As written now ${fsid_to_dev} expands to
> > > > fsid_to_dev='-e '\''s/st_fsid/Dev/'\''' resulting in: "sed: -e
> > > > expression
> > > > #4, char 1: unknown command: `''". Unfortunately, I have not yet been
> > > > able
> > > > to modify the expansion omitting the single qoutes around the shell
> > > > variable.
> > > 
> > > I'm sorry, I don't want to use "sed -i".  That loses the original file
> > > and makes it harder to reconstruct what has happened.
> > 
> > What to do then?
> > 
> > > > The second patch, src_libgo_go_syscall_wait.c.diff, is needed since
> > > > WCONTINUED is not defined and is needed for WIFCONTINUED to be defined
> > > > in
> > > > wait.h.
> > > 
> > > I don't understand that.   is a system header file.  Are
> > > you saying that it is impossible to use  and WIFCONTINUED
> > > unless your source code does a #define WCONTINUED before #include'ing
> > > ?  That seems like a bug in the Hurd library code.
> > 
> > The problem is that WCONTINUED is not defined in /usr/include/i386-
> > gnu/bits/waitflags.h on Hurd. Only WNOHANG and WUNTRACED are. That causes
> > WIFCONTINUED not to be defined in /usr/include/i386-gnu/bits/waitstatus.h.
> > As
> > WCONTINUED is not defined, I assume that WIFCONTINUED is not supported.
> > 
> > From waitpid(2):
> > WCONTINUED (since Linux 2.6.10)
> >also return if a stopped child has been resumed by delivery of SIGCONT.
> > 
> > @Samuel: more info?
> > 
> > I think that that call to WIFCONTINUED in libgo/go/syscall/wait.c
> > _Bool
> > Continued (uint32_t *w)
> > {
> >   return WIFCONTINUED (*w) != 0;
> > }
> > 
> > has to be omitted somehow for Hurd.
> 
> It sound like the right fix is to use #ifdef WIFCONTINUED in
> syscall/wait.c.  If WIFCONTINUED is not defined, the Continued
> function should always return 0.

I've got some ideas on how to solve the mksysinfo.sh problem. I just don't have
time to try it out now. The idea is:
fsid_to_dev='s/st_dev/Dev/'
if grep 'define st_dev st_fsid' gen-sysinfo.go > /dev/null 2>&1; then
fsid_to_dev='s/st_fsid/Dev/'
...
remove: -e 's/st_dev/Dev/' \
add:-e ${fsid_to_dev} \


I can also easily submit a patch for WIFCONTINUED returning 0. Problem is I'll
be AFK for the next week. Maybe this can wait, or you find a solution? 
Regardinga comm opttion for ps Samuel is the best source. 

Thanks!

[PR fortran/88299, patch] - [F18] COMMON in a legacy module produces bogus warnings in dependent code

2019-02-11 Thread Harald Anlauf

The attached patch moves the check for this F2018 obsolescent feature
to a better place where the warning is only emitted when the COMMON is
declared.  No warning should be emitted when such a legacy module is
simply used.

Regtested on x86_64-pc-linux-gnu.

OK for trunk?

Thanks,
Harald

2019-02-11  Harald Anlauf  

PR fortran/88299
* resolve.c (resolve_common_blocks,resolve_common_vars): Move
check for obsolent COMMON feature in F2018 to better place.

2019-02-11  Harald Anlauf  

PR fortran/88299
* gfortran.dg/pr88299.f90: New test.

Index: gcc/fortran/resolve.c
===
--- gcc/fortran/resolve.c   (revision 268778)
+++ gcc/fortran/resolve.c   (working copy)
@@ -940,7 +940,11 @@
 have been ignored to continue parsing.
 We do the checks again here.  */
   if (!csym->attr.use_assoc)
-   gfc_add_in_common (&csym->attr, csym->name, &common_block->where);
+   {
+ gfc_add_in_common (&csym->attr, csym->name, &common_block->where);
+ gfc_notify_std (GFC_STD_F2018_OBS, "COMMON block at %L",
+ &common_block->where);
+   }
 
   if (csym->value || csym->attr.data)
{
@@ -998,10 +1002,6 @@
 
   resolve_common_vars (common_root->n.common, true);
 
-  if (!gfc_notify_std (GFC_STD_F2018_OBS, "COMMON block at %L",
-  &common_root->n.common->where))
-return;
-
   /* The common name is a global name - in Fortran 2003 also if it has a
  C binding name, since Fortran 2008 only the C binding name is a global
  identifier.  */
Index: gcc/testsuite/gfortran.dg/pr88299.f90
===
--- gcc/testsuite/gfortran.dg/pr88299.f90   (nonexistent)
+++ gcc/testsuite/gfortran.dg/pr88299.f90   (working copy)
@@ -0,0 +1,16 @@
+! { dg-do compile }
+! { dg-options "-std=f2018" }
+!
+! PR 85839: [F18] COMMON in a legacy module produces bogus warnings
+!   in dependent code
+
+module legacy
+  integer :: major, n
+  common /version/ major  ! { dg-warning "obsolescent feature" }
+  public  :: n
+  private
+end module legacy
+
+module mod1
+  use legacy, only: n ! No warning expected here
+end module mod1

[PATCH 00/40] V4: Emulate MMX intrinsics with SSE

2019-02-11 Thread H.J. Lu

On x86-64, since __m64 is returned and passed in XMM registers, we can
emulate MMX intrinsics with SSE instructions. To support it, we added

 #define TARGET_MMX_WITH_SSE \
  (TARGET_64BIT && TARGET_SSE2)
 #define TARGET_MMX_WITH_SSE_P(x) \
  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x))

;; Define instruction set of MMX instructions
(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string 
"base"))

 (eq_attr "mmx_isa" "native")
   (symbol_ref "!TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64")
   (symbol_ref "TARGET_MMX_WITH_SSE")
 (eq_attr "mmx_isa" "x64_avx")
   (symbol_ref "TARGET_MMX_WITH_SSE && TARGET_AVX")
 (eq_attr "mmx_isa" "x64_noavx")
   (symbol_ref "TARGET_MMX_WITH_SSE && !TARGET_AVX")

We added SSE emulation to MMX patterns and disabled MMX alternatives with
TARGET_MMX_WITH_SSE.

Most of MMX instructions have equivalent SSE versions and results of some
SSE versions need to be reshuffled to the right order for MMX.  Thee are
couple tricky cases:

1. MMX maskmovq and SSE2 maskmovdqu aren't equivalent.  We emulate MMX
maskmovq with SSE2 maskmovdqu by zeroing out the upper 64 bits of the
mask operand and handle unmapped bits 64:127 at memory address by
adjusting source and mask operands together with memory address.

2. MMX movntq is emulated with SSE2 DImode movnti, which is available
in 64-bit mode.

3. MMX pshufb takes a 3-bit index while SSE pshufb takes a 4-bit index.
SSE emulation must clear the bit 4 in the shuffle control mask.

4. To emulate MMX cvtpi2ps with SSE2 cvtdq2ps, we must properly preserve
the upper 64 bits of destination XMM register.

Tests are also added to check each SSE emulation of MMX intrinsics.

With SSE emulation in 64-bit mode, 8-byte vectorizer is enabled with SSE2.

There are no regressions on i686 and x86-64.  For x86-64, GCC is also
tested with

--with-arch=native --with-cpu=native

on AVX2 and AVX512F machines.

H.J. Lu (40):
  i386: Allow MMX register modes in SSE registers
  i386: Emulate MMX packsswb/packssdw/packuswb with SSE2
  i386: Emulate MMX punpcklXX/punpckhXX with SSE punpcklXX
  i386: Emulate MMX plusminus/sat_plusminus with SSE
  i386: Emulate MMX mulv4hi3 with SSE
  i386: Emulate MMX smulv4hi3_highpart with SSE
  i386: Emulate MMX mmx_pmaddwd with SSE
  i386: Emulate MMX ashr3/3 with SSE
  i386: Emulate MMX 3 with SSE
  i386: Emulate MMX mmx_andnot3 with SSE
  i386: Emulate MMX mmx_eq/mmx_gt3 with SSE
  i386: Emulate MMX vec_dupv2si with SSE
  i386: Emulate MMX pshufw with SSE
  i386: Emulate MMX sse_cvtps2pi/sse_cvttps2pi with SSE
  i386: Emulate MMX sse_cvtpi2ps with SSE
  i386: Emulate MMX mmx_pextrw with SSE
  i386: Emulate MMX mmx_pinsrw with SSE
  i386: Emulate MMX V4HI smaxmin/V8QI umaxmin with SSE
  i386: Emulate MMX mmx_pmovmskb with SSE
  i386: Emulate MMX mmx_umulv4hi3_highpart with SSE
  i386: Emulate MMX maskmovq with SSE2 maskmovdqu
  i386: Emulate MMX mmx_uavgv8qi3 with SSE
  i386: Emulate MMX mmx_uavgv4hi3 with SSE
  i386: Emulate MMX mmx_psadbw with SSE
  i386: Emulate MMX movntq with SSE2 movntidi
  i386: Emulate MMX umulv1siv1di3 with SSE2
  i386: Emulate MMX ssse3_phwv4hi3 with SSE
  i386: Emulate MMX ssse3_phdv2si3 with SSE
  i386: Emulate MMX ssse3_pmaddubsw with SSE
  i386: Emulate MMX ssse3_pmulhrswv4hi3 with SSE
  i386: Emulate MMX pshufb with SSE version
  i386: Emulate MMX ssse3_psign3 with SSE
  i386: Emulate MMX ssse3_palignrdi with SSE
  i386: Emulate MMX abs2 with SSE
  i386: Allow MMXMODE moves with TARGET_MMX_WITH_SSE
  i386: Allow MMX vector expanders with TARGET_MMX_WITH_SSE
  i386: Allow MMX intrinsic emulation with SSE
  i386: Add tests for MMX intrinsic emulations with SSE
  i386: Also enable SSSE3 __m64 tests in 64-bit mode
  i386: Enable 8-byte vectorizer for TARGET_MMX_WITH_SSE

 gcc/config/i386/constraints.md|   6 +
 gcc/config/i386/i386-builtin.def  | 126 +--
 gcc/config/i386/i386-protos.h |   4 +
 gcc/config/i386/i386.c| 208 +++-
 gcc/config/i386/i386.h|   5 +
 gcc/config/i386/i386.md   |  12 +
 gcc/config/i386/mmintrin.h|  10 +-
 gcc/config/i386/mmx.md| 980 --
 gcc/config/i386/sse.md| 365 +--
 gcc/config/i386/xmmintrin.h   |  61 ++
 gcc/testsuite/gcc.dg/tree-ssa/pr84512.c   |   2 +-
 gcc/testsuite/gcc.target/i386/mmx-vals.h  |  77 ++
 gcc/testsuite/gcc.target/i386/pr82483-1.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr82483-2.c |   2 +-
 gcc/testsuite/gcc.target/i386/pr89028-1.c |  10 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-10.c   |  42 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-11.c   |  39 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-12.c   |  41 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-13.c   |  40 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-14.c   |  30 +
 gcc/testsuite/gcc.target/i386/sse2-mmx-15.c

[PATCH 02/40] i386: Emulate MMX packsswb/packssdw/packuswb with SSE2

2019-02-11 Thread H.J. Lu

Emulate MMX packsswb/packssdw/packuswb with SSE packsswb/packssdw/packuswb
plus moving bits 64:95 to bits 32:63 in SSE register.  Only SSE register
source operand is allowed.

2019-02-08  H.J. Lu  
Uros Bizjak  

PR target/89021
* config/i386/i386-protos.h (ix86_move_vector_high_sse_to_mmx):
New prototype.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.c (ix86_move_vector_high_sse_to_mmx): New
function.
(ix86_split_mmx_pack): Likewise.
* config/i386/i386.md (mmx_isa): New.
(enabled): Also check mmx_isa.
* config/i386/mmx.md (any_s_truncate): New code iterator.
(s_trunsuffix): New code attr.
(mmx_packsswb): Removed.
(mmx_packssdw): Likewise.
(mmx_packuswb): Likewise.
(mmx_packswb): New define_insn_and_split to emulate
MMX packsswb/packuswb with SSE2.
(mmx_packssdw): Likewise.
---
 gcc/config/i386/i386-protos.h |  3 ++
 gcc/config/i386/i386.c| 54 
 gcc/config/i386/i386.md   | 12 +++
 gcc/config/i386/mmx.md| 67 +++
 4 files changed, 106 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 2d600173917..bb96a420a85 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -200,6 +200,9 @@ extern void ix86_expand_vecop_qihi (enum rtx_code, rtx, 
rtx, rtx);
 
 extern rtx ix86_split_stack_guard (void);
 
+extern void ix86_move_vector_high_sse_to_mmx (rtx);
+extern void ix86_split_mmx_pack (rtx[], enum rtx_code);
+
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 #endif /* TREE_CODE  */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 61e602bdb38..b8d5ba7f28f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -19955,6 +19955,60 @@ ix86_expand_vector_move_misalign (machine_mode mode, 
rtx operands[])
 gcc_unreachable ();
 }
 
+/* Move bits 64:95 to bits 32:63.  */
+
+void
+ix86_move_vector_high_sse_to_mmx (rtx op)
+{
+  rtx mask = gen_rtx_PARALLEL (VOIDmode,
+  gen_rtvec (4, GEN_INT (0), GEN_INT (2),
+ GEN_INT (0), GEN_INT (0)));
+  rtx dest = gen_rtx_REG (V4SImode, REGNO (op));
+  op = gen_rtx_VEC_SELECT (V4SImode, dest, mask);
+  rtx insn = gen_rtx_SET (dest, op);
+  emit_insn (insn);
+}
+
+/* Split MMX pack with signed/unsigned saturation with SSE/SSE2.  */
+
+void
+ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
+{
+  rtx op0 = operands[0];
+  rtx op1 = operands[1];
+  rtx op2 = operands[2];
+
+  machine_mode dmode = GET_MODE (op0);
+  machine_mode smode = GET_MODE (op1);
+  machine_mode inner_dmode = GET_MODE_INNER (dmode);
+  machine_mode inner_smode = GET_MODE_INNER (smode);
+
+  /* Get the corresponding SSE mode for destination.  */
+  int nunits = 16 / GET_MODE_SIZE (inner_dmode);
+  machine_mode sse_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+   nunits).require ();
+  machine_mode sse_half_dmode = mode_for_vector (GET_MODE_INNER (dmode),
+nunits / 2).require ();
+
+  /* Get the corresponding SSE mode for source.  */
+  nunits = 16 / GET_MODE_SIZE (inner_smode);
+  machine_mode sse_smode = mode_for_vector (GET_MODE_INNER (smode),
+   nunits).require ();
+
+  /* Generate SSE pack with signed/unsigned saturation.  */
+  rtx dest = gen_rtx_REG (sse_dmode, REGNO (op0));
+  op1 = gen_rtx_REG (sse_smode, REGNO (op1));
+  op2 = gen_rtx_REG (sse_smode, REGNO (op2));
+
+  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
+   op1, op2));
+  emit_insn (insn);
+
+  ix86_move_vector_high_sse_to_mmx (op0);
+}
+
 /* Helper function of ix86_fixup_binary_operands to canonicalize
operand order.  Returns true if the operands should be swapped.  */
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 5b89e52493e..633b1dab523 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -792,6 +792,9 @@
avx512vl,noavx512vl,x64_avx512dq,x64_avx512bw"
   (const_string "base"))
 
+;; Define instruction set of MMX instructions
+(define_attr "mmx_isa" "base,native,x64,x64_noavx,x64_avx" (const_string 
"base"))
+
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "x64") (symbol_ref "TARGET_64BIT")
 (eq_attr "isa" "x64_sse2")
@@ -830,6 +833,15 @@
 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
 (eq_attr "isa" "avx512vl") (symbol_ref "TARGET_AVX512VL")
 (eq_attr "isa" "noavx512vl") (symbol_ref "!TARGET_AVX512VL")
+
+(eq_attr "mmx_isa" "native")
+  (symbol_ref

[PATCH 01/40] i386: Allow MMX register modes in SSE registers

2019-02-11 Thread H.J. Lu

In 64-bit mode, SSE2 can be used to emulate MMX instructions without
3DNOW.  We can use SSE2 to support MMX register modes.

PR target/89021
* config/i386/i386.c (ix86_set_reg_reg_cost): Also support
VALID_MMX_WITH_SSE_REG_MODE.
(ix86_vector_mode_supported_p): Likewise.
* config/i386/i386.h (TARGET_MMX_WITH_SSE): New.
(TARGET_MMX_WITH_SSE_P): Likewise.
---
 gcc/config/i386/i386.c | 5 +++--
 gcc/config/i386/i386.h | 5 +
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 12bc7926f86..61e602bdb38 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -40235,7 +40235,8 @@ ix86_set_reg_reg_cost (machine_mode mode)
  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
- || (TARGET_MMX && VALID_MMX_REG_MODE (mode)))
+ || ((TARGET_MMX || TARGET_MMX_WITH_SSE)
+ && VALID_MMX_REG_MODE (mode)))
units = GET_MODE_SIZE (mode);
 }
 
@@ -44061,7 +44062,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
 return true;
   if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
 return true;
-  if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
+  if ((TARGET_MMX ||TARGET_MMX_WITH_SSE) && VALID_MMX_REG_MODE (mode))
 return true;
   if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
 return true;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 83b025e0cf5..db814d9ed17 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -201,6 +201,11 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #define TARGET_16BIT   TARGET_CODE16
 #define TARGET_16BIT_P(x)  TARGET_CODE16_P(x)
 
+#define TARGET_MMX_WITH_SSE \
+  (TARGET_64BIT && TARGET_SSE2)
+#define TARGET_MMX_WITH_SSE_P(x) \
+  (TARGET_64BIT_P (x) && TARGET_SSE2_P (x))
+
 #include "config/vxworks-dummy.h"
 
 #include "config/i386/i386-opts.h"
-- 
2.20.1

1 2 >

1 - 100 of 146 matches

Mail list logo