[PATCH] Add initial octeontx2 support.

2020-01-10 Thread apinski
From: Andrew Pinski 

This adds octeontx2 naming.  It currently uses the cortexa57
cost model and schedule model until I submit this.  This is
more a place holder to get the naming of the cores in GCC 10.
I will submit the cost model in the next couple of days.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64-cores.def (octeontx2): New define.
(octeontx2t98): New define.
(octeontx2t96): New define.
(octeontx2t93): New define.
(octeontx2f95): New define.
(octeontx2f95n): New define.
(octeontx2f95mm): New define.
* config/aarch64/aarch64-tune.md: Regenerate.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64-cores.def | 10 ++
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 2dd2b86bd92..057ed1ee131 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -109,6 +109,16 @@ AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_F
 AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD 
| AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
 AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD 
| AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
 
+/* Cavium ('C') cores. */
+AARCH64_CORE("octeontx2",  octeontx2,  cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b0, -1)
+AARCH64_CORE("octeontx2t98",   octeontx2t98,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b1, -1)
+AARCH64_CORE("octeontx2t96",   octeontx2t96,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b2, -1)
+/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
+AARCH64_CORE("octeontx2t93",   octeontx2t93,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2f95",   octeontx2f95,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b3, -1)
+AARCH64_CORE("octeontx2f95n",  octeontx2f95n,  cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b4, -1)
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE , cortexa57, 
0x43, 0x0b5, -1)
+
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | 
AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,  
 0x48, 0xd01, -1)
 
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index a6a14b7fc77..3cc1c4d761f 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
-- 
2.17.1



[PATCHv2] Add initial octeontx2 support.

2020-01-11 Thread apinski
From: Andrew Pinski 

This adds octeontx2 naming.  It currently uses the cortexa57
cost model and schedule model until I submit this.  This is
more a place holder to get the naming of the cores in GCC 10.
I will submit the cost model in the next couple of days.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.

v2 changes: Add documentation and fix minor whitespace issues before the `,'.

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64-cores.def (octeontx2): New define.
(octeontx2t98): New define.
(octeontx2t96): New define.
(octeontx2t93): New define.
(octeontx2f95): New define.
(octeontx2f95n): New define.
(octeontx2f95mm): New define.
* config/aarch64/aarch64-tune.md: Regenerate.
* doc/invoke.texi (-mcpu=): Document the new cpu types.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64-cores.def | 10 ++
 gcc/config/aarch64/aarch64-tune.md   |  2 +-
 gcc/doc/invoke.texi  |  6 +-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def 
b/gcc/config/aarch64/aarch64-cores.def
index 2dd2b86bd92..ea9b98b4b0a 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -109,6 +109,16 @@ AARCH64_CORE("ares",  ares, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_F
 AARCH64_CORE("neoverse-n1",  neoversen1, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD 
| AARCH64_FL_PROFILE, neoversen1, 0x41, 0xd0c, -1)
 AARCH64_CORE("neoverse-e1",  neoversee1, cortexa53, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_RCPC | AARCH64_FL_DOTPROD 
| AARCH64_FL_SSBS, cortexa73, 0x41, 0xd4a, -1)
 
+/* Cavium ('C') cores. */
+AARCH64_CORE("octeontx2",  octeontx2,  cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b0, -1)
+AARCH64_CORE("octeontx2t98",   octeontx2t98,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b1, -1)
+AARCH64_CORE("octeontx2t96",   octeontx2t96,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b2, -1)
+/* Note OcteonTX2 T93 is an alias to OcteonTX2 T96. */
+AARCH64_CORE("octeontx2t93",   octeontx2t93,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b2, -1)
+AARCH64_CORE("octeontx2f95",   octeontx2f95,   cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b3, -1)
+AARCH64_CORE("octeontx2f95n",  octeontx2f95n,  cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b4, -1)
+AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A,  
AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_PROFILE, cortexa57, 
0x43, 0x0b5, -1)
+
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | 
AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,  
 0x48, 0xd01, -1)
 
diff --git a/gcc/config/aarch64/aarch64-tune.md 
b/gcc/config/aarch64/aarch64-tune.md
index a6a14b7fc77..3cc1c4d761f 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+   
"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,tsv110,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index f2c805c0a64..279b97f51ba 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -16323,7 +16323,11 @@ performance of the code.  Permissible values for this 
option are:
 @samp{ares}, @samp{exynos-m1}, @samp{emag}, @samp{falkor},
 @samp{neoverse-e1},@samp{neoverse-n1},@samp{qdf24xx}, @samp{saphira},
 @samp{phecda}, @samp{xgene1}, @samp{vulcan}, @samp{octeontx

[PATCH] Decrease cortexa57_extra_costs's alu.shift_reg

2020-01-11 Thread apinski
From: Andrew Pinski 

Like I mentioned in https://gcc.gnu.org/ml/gcc/2020-01/msg00157.html,
The shift by a register should be just COSTS_N_INSNS (1) rather than
COSTS_N_INSNS (2).  This allows lshift_cheap_p to return true now
and converting switches to be using shift and other like
structures.  I noticed this difference when I was working
through PR 93131 and understanding what reassoc could handle.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/arm/aarch-cost-tables.h (cortexa57_extra_costs): Change
alu.shift_reg to 0.
---
 gcc/config/arm/aarch-cost-tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/arm/aarch-cost-tables.h 
b/gcc/config/arm/aarch-cost-tables.h
index 6a30d92cde9..cf818659901 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -235,7 +235,7 @@ const struct cpu_cost_table cortexa57_extra_costs =
 0, /* arith.  */
 0, /* logical.  */
 0, /* shift.  */
-COSTS_N_INSNS (1), /* shift_reg.  */
+0, /* shift_reg.  */
 COSTS_N_INSNS (1), /* arith_shift.  */
 COSTS_N_INSNS (1), /* arith_shift_reg.  */
 COSTS_N_INSNS (1), /* log_shift.  */
-- 
2.17.1



[PATCH 1/2] Fix uninitialized field in expand_operand.

2020-01-15 Thread apinski
From: Andrew Pinski 

Commit g:f96bf49a0 added the target field to expand_operand.
But it leaves it uninitialized when doing a full initialization
inside create_expand_operand.  This fixes the problem and improves
the code generation inside create_expand_operand too.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

ChangeLog:
* optabs.h (create_expand_operand): Initialize target field also.

Change-Id: Ib653fbfbb2b0709970db87fb94de14b59758bc6c
---
 gcc/optabs.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/optabs.h b/gcc/optabs.h
index 07bdc56586e..5bd19503a0a 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -78,6 +78,7 @@ create_expand_operand (class expand_operand *op,
 {
   op->type = type;
   op->unsigned_p = unsigned_p;
+  op->target = 0;
   op->unused = 0;
   op->mode = mode;
   op->value = value;
-- 
2.17.1



[PATCH 2/2] Uninitialized padding in struct _dep.

2020-01-15 Thread apinski
From: Andrew Pinski 

In struct _dep, there is an implicit padding of 4bits.  This
bit-field padding is uninitialized when init_dep_1 is being called.
This means we access uninitialized memory but never use it for
anything.  Adding an unused bit-field field and initializing it
in init_dep_1 will improve code generation also as we initialize
the whole 32bits now rather than just part of it.

OK?  Bootstrapped and tested on x86_64-linux-gnu.

Thanks,
Andrew Pinski

ChangeLog:
* sched-int.h (_dep): Add unused bit-field field for the padding.
* sched-deps.c (init_dep_1): Init unused field.

Change-Id: I27000323e728f8a73189426e0b9a98c5235b8c55
---
 gcc/sched-deps.c | 1 +
 gcc/sched-int.h  | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c
index 9182aba5588..331af5ffdb3 100644
--- a/gcc/sched-deps.c
+++ b/gcc/sched-deps.c
@@ -101,6 +101,7 @@ init_dep_1 (dep_t dep, rtx_insn *pro, rtx_insn *con, enum 
reg_note type, ds_t ds
   DEP_NONREG (dep) = 0;
   DEP_MULTIPLE (dep) = 0;
   DEP_REPLACE (dep) = NULL;
+  dep->unused = 0;
 }
 
 /* Init DEP with the arguments.
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 833b552a340..a847f876e65 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -238,6 +238,8 @@ struct _dep
   /* Cached cost of the dependency.  Make sure to update UNKNOWN_DEP_COST
  when changing the size of this field.  */
   int cost:20;
+
+  unsigned unused:4;
 };
 
 #define UNKNOWN_DEP_COST ((int) ((unsigned int) -1 << 19))
-- 
2.17.1



[PATCH] Fix value numbering dealing with reverse byte order

2020-01-16 Thread apinski
From: Andrew Pinski 

Hi,
  While working on bit-field lowering pass, I came across this bug.
The IR looks like:
  VIEW_CONVERT_EXPR(var1) = _12;
  _1 = BIT_FIELD_REF ;

Where the BIT_FIELD_REF has REF_REVERSE_STORAGE_ORDER set on it
and var1's type has TYPE_REVERSE_STORAGE_ORDER set on it.
PRE/FRE would decided to prop _12 into the BFR statement
which would produce wrong code.
And yes _12 has the correct byte order already; bit-field lowering
removes the implicit byte swaps in the IR and adds the explicity
to make it easier optimize later on.

This patch adds a check for storage_order_barrier_p on the lhs tree
which returns true in the case where we had a reverse order with a VCE.

gcc.c-torture/execute/pr86659-1.c was the testcase which showed the issue.

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regression.

Thanks,
Andrew Pinski

ChangeLog:
* tree-ssa-sccvn.c(vn_reference_lookup_3): Check lhs for
!storage_order_barrier_p.

Change-Id: I7810de6fc4ff01e431033fa7f7f7b3ec95f67644
---
 gcc/tree-ssa-sccvn.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 3b27c50..4d13015 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -2593,6 +2593,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
 &offset2, &size2, &maxsize2, &reverse);
   if (base2
  && !reverse
+ && !storage_order_barrier_p (lhs)
  && known_eq (maxsize2, size2)
  && multiple_p (size2, BITS_PER_UNIT)
  && multiple_p (offset2, BITS_PER_UNIT)
@@ -2695,6 +2696,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void 
*data_,
 &offset2, &size2, &maxsize2, &reverse);
   tree def_rhs = gimple_assign_rhs1 (def_stmt);
   if (!reverse
+ && !storage_order_barrier_p (lhs)
  && known_size_p (maxsize2)
  && known_eq (maxsize2, size2)
  && adjust_offsets_for_equal_base_address (base, &offset,
-- 
1.8.3.1



[PATCH] Fix target/93119 (aarch64): ICE with traditional TLS support on ILP32

2020-01-17 Thread apinski
From: Andrew Pinski 

The problem here was g:23b88fda665d2f995c was not a complete fix
for supporting tranditional TLS on ILP32.

So the problem here is a couple of things, first __tls_get_addr
call will return a C pointer value so we need to use ptr_mode
when we are creating the call.  Then we need to convert
back that register to the correct mode, either zero extending
it or just creating a move instruction.
Also symbol_ref can either be in SImode or DImode.  So we need to
allow both modes.

Built and tested on aarch64-linux-gnu with no regressions.
Also built a full toolchain (including glibc) defaulting to traditional
TLS that targets ilp32 and lp64.

OK?

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64.md (tlsgd_small_): Have operand 0 as PTR mode.
Have operand 1 as being modeless, it can be P mode.
(*tlsgd_small_): Likewise.
* config/aarch64/aarch64.c (aarch64_load_symref_appropriately)
: Call gen_tlsgd_small_* with a ptr_mode
register.  Convert that register back to dest using convert_mode.

Change-Id: I76826350d6bace0b731f21df1f125e9122da843f
---
 gcc/config/aarch64/aarch64.c   | 18 ++
 gcc/config/aarch64/aarch64.md  |  8 
 gcc/testsuite/gcc.target/aarch64/pr93119.c | 10 ++
 3 files changed, 28 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr93119.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c26ac0db942..6c825459dc6 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2607,19 +2607,29 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 case SYMBOL_SMALL_TLSGD:
   {
rtx_insn *insns;
-   machine_mode mode = GET_MODE (dest);
-   rtx result = gen_rtx_REG (mode, R0_REGNUM);
+   /* The return type of __tls_get_addr is the C pointer type
+  so use ptr_mode.  */
+   rtx result = gen_rtx_REG (ptr_mode, R0_REGNUM);
+   rtx tmp_reg = dest;
+
+   if (GET_MODE (dest) != ptr_mode)
+ tmp_reg = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : result;
 
start_sequence ();
-   if (TARGET_ILP32)
+   if (ptr_mode == SImode)
  aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
else
  aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
+
insns = get_insns ();
end_sequence ();
 
RTL_CONST_CALL_P (insns) = 1;
-   emit_libcall_block (insns, dest, result, imm);
+   emit_libcall_block (insns, tmp_reg, result, imm);
+   /* Convert back to the mode of the dest, adding a zero-extend as
+  needed. */
+   if (dest != tmp_reg)
+ convert_move (dest, tmp_reg, true);
return;
   }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 86c2cdfc797..55dde54b16a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6755,10 +6755,10 @@ (define_insn "aarch64_load_tp_hard"
 ;; instructions in the TLS stubs, in order to enable linker relaxation.
 ;; Therefore we treat the stubs as an atomic sequence.
 (define_expand "tlsgd_small_"
- [(parallel [(set (match_operand 0 "register_operand")
+ [(parallel [(set (match_operand:PTR 0 "register_operand")
   (call (mem:DI (match_dup 2)) (const_int 1)))
 (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI)
-(unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref")] 
UNSPEC_GOTSMALLTLS)
+(unspec:DI [(match_operand 1 "aarch64_valid_symref")] 
UNSPEC_GOTSMALLTLS)
 (clobber (reg:DI LR_REGNUM))])]
  ""
 {
@@ -6766,10 +6766,10 @@ (define_expand "tlsgd_small_"
 })
 
 (define_insn "*tlsgd_small_"
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand:PTR 0 "register_operand" "")
(call (mem:DI (match_operand:DI 2 "" "")) (const_int 1)))
(unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI)
-   (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] 
UNSPEC_GOTSMALLTLS)
+   (unspec:DI [(match_operand 1 "aarch64_valid_symref" "S")] 
UNSPEC_GOTSMALLTLS)
(clobber (reg:DI LR_REGNUM))
   ]
   ""
diff --git a/gcc/testsuite/gcc.target/aarch64/pr93119.c 
b/gcc/testsuite/gcc.target/aarch64/pr93119.c
new file mode 100644
index 000..93fa80e10b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr93119.c
@@ -0,0 +1,10 @@
+/* { dg-require-effective-target fpic } */
+/* { dg-options "-mtls-dialect=trad -fpic" } */
+
+__thread int g_tlsdata;
+
+int func1()
+{
+  g_tlsdata++;
+  return g_tlsdata;
+}
-- 
2.17.1



[PATCH] Fix PR 93242: patchable-function-entry broken on MIPS

2020-01-17 Thread apinski
From: Andrew Pinski 

On MIPS, .set noreorder/reorder needs to emitted around
the nop.  The template for the nop instruction uses %(/%) to
do that.  But default_print_patchable_function_entry uses
fprintf rather than output_asm_insn to output the instruction.

This fixes the problem by using output_asm_insn to emit the nop
instruction.

OK?  Bootstrapped and tested on x86_64-linux-gnu and built a full
mips toolchain also.

Thanks,
Andrew Pinski

ChangeLog:

* targhooks.c (default_print_patchable_function_entry): Use
output_asm_insn to emit the nop instruction.

Change-Id: I9d7cff2fc227a41461b9068e3af1fd3a5a9c059b
---
 gcc/targhooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 4819bb8..415c21b 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1822,7 +1822,7 @@ default_print_patchable_function_entry (FILE *file,
 
   unsigned i;
   for (i = 0; i < patch_area_size; ++i)
-fprintf (file, "\t%s\n", nop_templ);
+output_asm_insn (nop_templ, NULL);
 }
 
 bool
-- 
1.8.3.1



[PATCH] Manually handle recursiveness in prepare_block_for_update

2020-01-19 Thread apinski
From: Andrew Pinski 

Reported as PR 93321, prepare_block_for_update with some huge
recusive inlining can go past the stack limit.  The loop
at the end, could be transformed such that the last iteration goes
back to the begining of the function instead of the call.
This reduces the stack usage and speeds up slightly
the function.

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions.

ChangeLog:
* tree-into-ssa.c (prepare_block_for_update): Manaually sibcall
optimize to self.
---
 gcc/tree-into-ssa.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index c27bf2ce121..6e139c3b056 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
@@ -2616,6 +2616,7 @@ prepare_block_for_update (basic_block bb, bool 
insert_phi_p)
   edge e;
   edge_iterator ei;
 
+again:
   mark_block_for_update (bb);
 
   /* Process PHI nodes marking interesting those that define or use
@@ -2695,10 +2696,17 @@ prepare_block_for_update (basic_block bb, bool 
insert_phi_p)
 }
 
   /* Now visit all the blocks dominated by BB.  */
-  for (son = first_dom_son (CDI_DOMINATORS, bb);
-   son;
-   son = next_dom_son (CDI_DOMINATORS, son))
-prepare_block_for_update (son, insert_phi_p);
+  for (son = first_dom_son (CDI_DOMINATORS, bb); son; )
+{
+  basic_block next = next_dom_son (CDI_DOMINATORS, son);
+  if (!next)
+   {
+ bb = son;
+ goto again;
+   }
+  prepare_block_for_update (son, insert_phi_p);
+  son = next;
+}
 }
 
 
-- 
2.17.1



[PATCHv2] Change recursive prepare_block_for_update to use a worklist

2020-01-21 Thread apinski
From: Andrew Pinski 

Reported as PR 93321, prepare_block_for_update with some huge
recusive inlining can go past the stack limit. Transforming this
recursive into worklist improves the stack usage here and we no
longer seg fault for the testcase.  Note the order we

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions.

ChangeLog:
* tree-into-ssa.c (prepare_block_for_update_1): Split out from ...
(prepare_block_for_update): This.  Use a worklist instead of recursiving
into the function.  Remove bb argument.
(update_ssa): Update call to prepare_block_for_update.
---
 gcc/tree-into-ssa.c | 61 +++--
 1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index c27bf2ce121..9f1e8ece737 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
@@ -2593,11 +2593,9 @@ mark_use_interesting (tree var, gimple *stmt, 
basic_block bb,
 }
 }
 
-
-/* Do a dominator walk starting at BB processing statements that
-   reference symbols in SSA operands.  This is very similar to
-   mark_def_sites, but the scan handles statements whose operands may
-   already be SSA names.
+/* Processing statements in BB that reference symbols in SSA operands.
+   This is very similar to mark_def_sites, but the scan handles
+   statements whose operands may already be SSA names.
 
If INSERT_PHI_P is true, mark those uses as live in the
corresponding block.  This is later used by the PHI placement
@@ -2610,9 +2608,8 @@ mark_use_interesting (tree var, gimple *stmt, basic_block 
bb,
   that.  */
 
 static void
-prepare_block_for_update (basic_block bb, bool insert_phi_p)
+prepare_block_for_update_1 (basic_block bb, bool insert_phi_p)
 {
-  basic_block son;
   edge e;
   edge_iterator ei;
 
@@ -2694,13 +2691,51 @@ prepare_block_for_update (basic_block bb, bool 
insert_phi_p)
}
 }
 
-  /* Now visit all the blocks dominated by BB.  */
-  for (son = first_dom_son (CDI_DOMINATORS, bb);
-   son;
-   son = next_dom_son (CDI_DOMINATORS, son))
-prepare_block_for_update (son, insert_phi_p);
 }
 
+/* Do a dominator walk starting at entry block processing statements that
+   reference symbols in SSA operands.  This is very similar to
+   mark_def_sites, but the scan handles statements whose operands may
+   already be SSA names.
+
+   If INSERT_PHI_P is true, mark those uses as live in the
+   corresponding block.  This is later used by the PHI placement
+   algorithm to make PHI pruning decisions.
+
+   FIXME.  Most of this would be unnecessary if we could associate a
+  symbol to all the SSA names that reference it.  But that
+  sounds like it would be expensive to maintain.  Still, it
+  would be interesting to see if it makes better sense to do
+  that.  */
+static void
+prepare_block_for_update (bool insert_phi_p)
+{
+  size_t sp = 0;
+  basic_block *worklist;
+
+  /* Allocate the worklist.  */
+  worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  /* Add the entry BB to the worklist.  */
+  worklist[sp++] = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+
+  while (sp)
+{
+  basic_block bb;
+  basic_block son;
+
+  /* Pick a block from the worklist.  */
+  bb = worklist[--sp];
+
+  prepare_block_for_update_1 (bb, insert_phi_p);
+
+  /* Now add all the blocks dominated by BB to the worklist.  */
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+  son;
+  son = next_dom_son (CDI_DOMINATORS, son))
+   worklist[sp++] = son;
+}
+  free (worklist);
+}
 
 /* Helper for prepare_names_to_update.  Mark all the use sites for
NAME as interesting.  BLOCKS and INSERT_PHI_P are as in
@@ -3392,7 +3427,7 @@ update_ssa (unsigned update_flags)
 symbols in SSA operands.  Mark interesting blocks and
 statements and set local live-in information for the PHI
 placement heuristics.  */
-  prepare_block_for_update (start_bb, insert_phi_p);
+  prepare_block_for_update (insert_phi_p);
 
   tree name;
 
-- 
2.17.1



[PATCH/commited] Change recursive prepare_block_for_update to use a worklist

2020-01-21 Thread apinski
From: Andrew Pinski 


This is what I committed.

Reported as PR 93321, prepare_block_for_update with some huge
recusive inlining can go past the stack limit. Transforming this
recursive into worklist improves the stack usage here and we no
longer seg fault for the testcase.  Note the order we walk the siblings
change.

ChangeLog:
PR tree-opt/93321
* tree-into-ssa.c (prepare_block_for_update_1): Split out from ...
(prepare_block_for_update): This.  Use a worklist instead of recursing.
---
 gcc/ChangeLog   |  8 ++
 gcc/tree-into-ssa.c | 59 -
 2 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8c17e5992d2..262f0d6506f 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2020-01-21  Andrew Pinski  
+
+   PR tree-opt/93321
+   * tree-into-ssa.c (prepare_block_for_update_1): Split out
+   from ...
+   (prepare_block_for_update): This.  Use a worklist instead of
+   recursing.
+
 2020-01-21  Mihail-Calin Ionescu  
 
* gcc/config/arm/arm.c (clear_operation_p):
diff --git a/gcc/tree-into-ssa.c b/gcc/tree-into-ssa.c
index c27bf2ce121..6528acac31a 100644
--- a/gcc/tree-into-ssa.c
+++ b/gcc/tree-into-ssa.c
@@ -2593,11 +2593,9 @@ mark_use_interesting (tree var, gimple *stmt, 
basic_block bb,
 }
 }
 
-
-/* Do a dominator walk starting at BB processing statements that
-   reference symbols in SSA operands.  This is very similar to
-   mark_def_sites, but the scan handles statements whose operands may
-   already be SSA names.
+/* Processing statements in BB that reference symbols in SSA operands.
+   This is very similar to mark_def_sites, but the scan handles
+   statements whose operands may already be SSA names.
 
If INSERT_PHI_P is true, mark those uses as live in the
corresponding block.  This is later used by the PHI placement
@@ -2610,9 +2608,8 @@ mark_use_interesting (tree var, gimple *stmt, basic_block 
bb,
   that.  */
 
 static void
-prepare_block_for_update (basic_block bb, bool insert_phi_p)
+prepare_block_for_update_1 (basic_block bb, bool insert_phi_p)
 {
-  basic_block son;
   edge e;
   edge_iterator ei;
 
@@ -2694,13 +2691,51 @@ prepare_block_for_update (basic_block bb, bool 
insert_phi_p)
}
 }
 
-  /* Now visit all the blocks dominated by BB.  */
-  for (son = first_dom_son (CDI_DOMINATORS, bb);
-   son;
-   son = next_dom_son (CDI_DOMINATORS, son))
-prepare_block_for_update (son, insert_phi_p);
 }
 
+/* Do a dominator walk starting at BB processing statements that
+   reference symbols in SSA operands.  This is very similar to
+   mark_def_sites, but the scan handles statements whose operands may
+   already be SSA names.
+
+   If INSERT_PHI_P is true, mark those uses as live in the
+   corresponding block.  This is later used by the PHI placement
+   algorithm to make PHI pruning decisions.
+
+   FIXME.  Most of this would be unnecessary if we could associate a
+  symbol to all the SSA names that reference it.  But that
+  sounds like it would be expensive to maintain.  Still, it
+  would be interesting to see if it makes better sense to do
+  that.  */
+static void
+prepare_block_for_update (basic_block bb, bool insert_phi_p)
+{
+  size_t sp = 0;
+  basic_block *worklist;
+
+  /* Allocate the worklist.  */
+  worklist = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
+  /* Add the BB to the worklist.  */
+  worklist[sp++] = bb;
+
+  while (sp)
+{
+  basic_block bb;
+  basic_block son;
+
+  /* Pick a block from the worklist.  */
+  bb = worklist[--sp];
+
+  prepare_block_for_update_1 (bb, insert_phi_p);
+
+  /* Now add all the blocks dominated by BB to the worklist.  */
+  for (son = first_dom_son (CDI_DOMINATORS, bb);
+  son;
+  son = next_dom_son (CDI_DOMINATORS, son))
+   worklist[sp++] = son;
+}
+  free (worklist);
+}
 
 /* Helper for prepare_names_to_update.  Mark all the use sites for
NAME as interesting.  BLOCKS and INSERT_PHI_P are as in
-- 
2.17.1



[PATCH] Fix target/93119 (aarch64): ICE with traditional TLS support on ILP32

2020-01-21 Thread apinski
From: Andrew Pinski 


This is what I committed after Richard's comments.

The problem here was g:23b88fda665d2f995c was not a complete fix
for supporting tranditional TLS on ILP32.

So the problem here is a couple of things, first __tls_get_addr
call will return a C pointer value so we need to use ptr_mode
when we are creating the call.  Then we need to convert
back that register to the correct mode, either zero extending
it or just creating a move instruction.
Also symbol_ref can either be in SImode or DImode.  So we need to
allow both modes.

Built and tested on aarch64-linux-gnu with no regressions.
Also built a full toolchain (including glibc) defaulting to traditional
TLS that targets ilp32 and lp64.

ChangeLog:
PR target/93119
* config/aarch64/aarch64.md (tlsgd_small_): Have operand 0
as PTR mode. Have operand 1 as being modeless, it can be P mode.
(*tlsgd_small_): Likewise.
* config/aarch64/aarch64.c (aarch64_load_symref_appropriately)
: Call gen_tlsgd_small_* with a ptr_mode
register.  Convert that register back to dest using convert_mode.
---
 gcc/ChangeLog  | 11 +++
 gcc/config/aarch64/aarch64.c   | 17 +
 gcc/config/aarch64/aarch64.md  |  8 
 gcc/testsuite/gcc.target/aarch64/pr93119.c | 10 ++
 4 files changed, 38 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr93119.c

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 2f6e603df7a..85cf788cc50 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,14 @@
+2020-01-21  Andrew Pinski  
+
+   PR target/9311
+   * config/aarch64/aarch64.md (tlsgd_small_): Have operand 0
+   as PTR mode. Have operand 1 as being modeless, it can be P mode.
+   (*tlsgd_small_): Likewise.
+   * config/aarch64/aarch64.c (aarch64_load_symref_appropriately)
+   : Call gen_tlsgd_small_* with a ptr_mode
+   register.  Convert that register back to dest using convert_mode.
+
+
 2020-01-21  Jim Wilson  
 
* config/riscv/riscv-sr.c (riscv_sr_match_prologue): Use INTVAL
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ef037e226a7..9acf33dbe64 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2607,11 +2607,16 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 case SYMBOL_SMALL_TLSGD:
   {
rtx_insn *insns;
-   machine_mode mode = GET_MODE (dest);
-   rtx result = gen_rtx_REG (mode, R0_REGNUM);
+   /* The return type of __tls_get_addr is the C pointer type
+  so use ptr_mode.  */
+   rtx result = gen_rtx_REG (ptr_mode, R0_REGNUM);
+   rtx tmp_reg = dest;
+
+   if (GET_MODE (dest) != ptr_mode)
+ tmp_reg = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : result;
 
start_sequence ();
-   if (TARGET_ILP32)
+   if (ptr_mode == SImode)
  aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
else
  aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
@@ -2619,7 +2624,11 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
end_sequence ();
 
RTL_CONST_CALL_P (insns) = 1;
-   emit_libcall_block (insns, dest, result, imm);
+   emit_libcall_block (insns, tmp_reg, result, imm);
+   /* Convert back to the mode of the dest adding a zero_extend
+  from SImode (ptr_mode) to DImode (Pmode). */
+   if (dest != tmp_reg)
+ convert_move (dest, tmp_reg, true);
return;
   }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 86c2cdfc797..55dde54b16a 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -6755,10 +6755,10 @@ (define_insn "aarch64_load_tp_hard"
 ;; instructions in the TLS stubs, in order to enable linker relaxation.
 ;; Therefore we treat the stubs as an atomic sequence.
 (define_expand "tlsgd_small_"
- [(parallel [(set (match_operand 0 "register_operand")
+ [(parallel [(set (match_operand:PTR 0 "register_operand")
   (call (mem:DI (match_dup 2)) (const_int 1)))
 (unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI)
-(unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref")] 
UNSPEC_GOTSMALLTLS)
+(unspec:DI [(match_operand 1 "aarch64_valid_symref")] 
UNSPEC_GOTSMALLTLS)
 (clobber (reg:DI LR_REGNUM))])]
  ""
 {
@@ -6766,10 +6766,10 @@ (define_expand "tlsgd_small_"
 })
 
 (define_insn "*tlsgd_small_"
-  [(set (match_operand 0 "register_operand" "")
+  [(set (match_operand:PTR 0 "register_operand" "")
(call (mem:DI (match_operand:DI 2 "" "")) (const_int 1)))
(unspec:DI [(const_int 0)] UNSPEC_CALLEE_ABI)
-   (unspec:DI [(match_operand:PTR 1 "aarch64_valid_symref" "S")] 
UNSPEC_GOTSMALLTLS)
+   (unspec:DI [(match_operand 1 "aarch64_valid_symref" "S")] 
UNSPEC_GOTSMALLTLS)
(clobber (reg:DI LR_REGNUM))
   ]
   ""
diff --git a/gcc/testsuite/gcc.target/aarch64/pr93119.c 
b/gc

[PATCH] Allow tree-ssa.exp to be run by itself

2020-01-21 Thread apinski
From: Andrew Pinski 

tree-ssa testcases sometimes check autovect effective target
but does not set it up.  On MIPS, those testcases fail with
some TCL error messages.  This fixes the issue by calling
check_vect_support_and_set_flags inside tree-ssa.exp.
There might be other .exp files which need to be done this
way too but I have not checked all of them.

OK?  Tested on x86_64-linux-gnu and a cross to mips64-octeon-linux-gnu.
Both full run of the testsuite and running tree-ssa.exp by itself.

Thanks,
Andrew Pinski

testsuite/ChangeLog:
* tree-ssa.exp: Set DEFAULT_VECTCFLAGS and DEFAULT_VECTCFLAGS.
Call check_vect_support_and_set_flags also.
---
 gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp 
b/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp
index 7d262ffd0fb..464fef628f8 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp
+++ b/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp
@@ -19,6 +19,17 @@
 # Load support procs.
 load_lib gcc-dg.exp
 
+# Set up flags used for tests that don't specify options.
+global DEFAULT_VECTCFLAGS
+set DEFAULT_VECTCFLAGS ""
+
+# Set up a list of effective targets to run vector tests for all supported
+# targets.
+global EFFECTIVE_TARGETS
+set EFFECTIVE_TARGETS ""
+
+check_vect_support_and_set_flags
+
 # If a testcase doesn't have special options, use these.
 global DEFAULT_CFLAGS
 if ![info exists DEFAULT_CFLAGS] then {
-- 
2.17.1



[committed/PATCH] Revert "Allow tree-ssa.exp to be run by itself" and move some testcases

2020-01-22 Thread apinski
From: Andrew Pinski 

This reverts commit 9085381f1931cc3667412c8fff91878184835901 as it was
causing default dg-do to be set incorrectly on most targets.
Instead move testcases that are vect related testcase that
use "dg-require-effective-target vect_*" to the vect test area.

Committed as obvious after testing for sure on x86_64 but vect.exp
and tree-ssa.exp.

ChangeLog:
* gcc.dg/tree-ssa/pr88497-1.c: Move to ...
* gcc.dg/vect/pr88497-1.c: This.
* gcc.dg/tree-ssa/pr88497-2.c: Move to ...
* gcc.dg/vect/pr88497-2.c: This.
* gcc.dg/tree-ssa/pr88497-3.c: Move to ...
* gcc.dg/vect/pr88497-3.c: This.
* gcc.dg/tree-ssa/pr88497-4.c: Move to ...
* gcc.dg/vect/pr88497-4.c: This.
* gcc.dg/tree-ssa/pr88497-5.c: Move to ...
* gcc.dg/vect/pr88497-5.c: This.
* gcc.dg/tree-ssa/pr88497-6.c: Move to ...
* gcc.dg/vect/pr88497-6.c: This.
* gcc.dg/tree-ssa/pr88497-7.c: Move to ...
* gcc.dg/vect/pr88497-7.c: This.

Revert:
* tree-ssa.exp: Set DEFAULT_VECTCFLAGS and DEFAULT_VECTCFLAGS.
Call check_vect_support_and_set_flags also.
---
 gcc/testsuite/ChangeLog   | 21 +++
 gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp| 11 --
 .../gcc.dg/{tree-ssa => vect}/pr88497-1.c |  0
 .../gcc.dg/{tree-ssa => vect}/pr88497-2.c |  0
 .../gcc.dg/{tree-ssa => vect}/pr88497-3.c |  0
 .../gcc.dg/{tree-ssa => vect}/pr88497-4.c |  0
 .../gcc.dg/{tree-ssa => vect}/pr88497-5.c |  0
 .../gcc.dg/{tree-ssa => vect}/pr88497-6.c |  0
 .../gcc.dg/{tree-ssa => vect}/pr88497-7.c |  0
 9 files changed, 21 insertions(+), 11 deletions(-)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-1.c (100%)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-2.c (100%)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-3.c (100%)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-4.c (100%)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-5.c (100%)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-6.c (100%)
 rename gcc/testsuite/gcc.dg/{tree-ssa => vect}/pr88497-7.c (100%)

diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9a87cfe595b..5160e86fc9d 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,24 @@
+2020-01-22  Andrew Pinski  
+
+   * gcc.dg/tree-ssa/pr88497-1.c: Move to ...
+   * gcc.dg/vect/pr88497-1.c: This.
+   * gcc.dg/tree-ssa/pr88497-2.c: Move to ...
+   * gcc.dg/vect/pr88497-2.c: This.
+   * gcc.dg/tree-ssa/pr88497-3.c: Move to ...
+   * gcc.dg/vect/pr88497-3.c: This.
+   * gcc.dg/tree-ssa/pr88497-4.c: Move to ...
+   * gcc.dg/vect/pr88497-4.c: This.
+   * gcc.dg/tree-ssa/pr88497-5.c: Move to ...
+   * gcc.dg/vect/pr88497-5.c: This.
+   * gcc.dg/tree-ssa/pr88497-6.c: Move to ...
+   * gcc.dg/vect/pr88497-6.c: This.
+   * gcc.dg/tree-ssa/pr88497-7.c: Move to ...
+   * gcc.dg/vect/pr88497-7.c: This.
+
+   Revert:
+   * tree-ssa.exp: Set DEFAULT_VECTCFLAGS and DEFAULT_VECTCFLAGS.
+   Call check_vect_support_and_set_flags also.
+
 2020-01-22  Andrew Pinski  
 
* tree-ssa.exp: Set DEFAULT_VECTCFLAGS and DEFAULT_VECTCFLAGS.
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp 
b/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp
index 464fef628f8..7d262ffd0fb 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp
+++ b/gcc/testsuite/gcc.dg/tree-ssa/tree-ssa.exp
@@ -19,17 +19,6 @@
 # Load support procs.
 load_lib gcc-dg.exp
 
-# Set up flags used for tests that don't specify options.
-global DEFAULT_VECTCFLAGS
-set DEFAULT_VECTCFLAGS ""
-
-# Set up a list of effective targets to run vector tests for all supported
-# targets.
-global EFFECTIVE_TARGETS
-set EFFECTIVE_TARGETS ""
-
-check_vect_support_and_set_flags
-
 # If a testcase doesn't have special options, use these.
 global DEFAULT_CFLAGS
 if ![info exists DEFAULT_CFLAGS] then {
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c 
b/gcc/testsuite/gcc.dg/vect/pr88497-1.c
similarity index 100%
rename from gcc/testsuite/gcc.dg/tree-ssa/pr88497-1.c
rename to gcc/testsuite/gcc.dg/vect/pr88497-1.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c 
b/gcc/testsuite/gcc.dg/vect/pr88497-2.c
similarity index 100%
rename from gcc/testsuite/gcc.dg/tree-ssa/pr88497-2.c
rename to gcc/testsuite/gcc.dg/vect/pr88497-2.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c 
b/gcc/testsuite/gcc.dg/vect/pr88497-3.c
similarity index 100%
rename from gcc/testsuite/gcc.dg/tree-ssa/pr88497-3.c
rename to gcc/testsuite/gcc.dg/vect/pr88497-3.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c 
b/gcc/testsuite/gcc.dg/vect/pr88497-4.c
similarity index 100%
rename from gcc/testsuite/gcc.dg/tree-ssa/pr88497-4.c
rename to gcc/testsuite/gcc.dg/vect/pr88497-4.c
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c 
b/gcc/testsuite/gcc.dg/vect/pr88497-5.c
similarity index 100%
rename from gcc/testsuite/gcc.dg/tree-ssa/pr88497-5.c
rename to gcc/testsuite/gcc.dg/vect/pr88497-5.c
diff --git a/gcc/testsuite/gcc.dg/t

[PATCH] Fix patchable-function-entry on arc

2020-01-22 Thread apinski
From: Andrew Pinski 

The problem here is arc looks at current_output_insn unconditional
but sometimes current_output_insn is NULL.  With patchable-function-entry,
it will be. This is similar to how the nios2, handles "%.".

Committed as obvious after a simple test with -fpatchable-function-entry=1.

ChangeLog:
* config/arc/arc.c (output_short_suffix): Check insn for nullness.
---
 gcc/ChangeLog| 4 
 gcc/config/arc/arc.c | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 07e7356..55b2392 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2020-01-22  Andrew Pinski  
+
+   * config/arc/arc.c (output_short_suffix): Check insn for nullness.
+
 2020-01-22  David Malcolm  
 
PR analyzer/93307
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 02ab339..22475f2 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -5537,6 +5537,8 @@ static void
 output_short_suffix (FILE *file)
 {
   rtx_insn *insn = current_output_insn;
+  if (!insn)
+return;
 
   if (arc_verify_short (insn, cfun->machine->unalign, 1))
 {
-- 
1.8.3.1



[PATCH] Fix gcc.target/aarch64/vec_zeroextend.c for big-endian

2020-01-25 Thread apinski
From: Andrew Pinski 

vec_zeroextend.c fails on big-endian as it assumes
0 index is the lower part but it is not for
big-endian case.  This fixes the problem by
using the correct index for the lower part
for big-endian.

Committed as obvious after a test on aarch64_be-linux-gnu.

Thanks,
Andrew Pinski

ChangeLog:
* gcc.target/aarch64/vec_zeroextend.c: Fix for big-endian.
---
 gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c 
b/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c
index 9c3971f036a..5a74cbc5aba 100644
--- a/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c
+++ b/gcc/testsuite/gcc.target/aarch64/vec_zeroextend.c
@@ -3,17 +3,21 @@
 
 #define vector __attribute__((vector_size(16) ))
 
+#define lowull (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ? 1 : 0)
+#define lowui (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ ? 3 : 0)
+
+
 vector unsigned long long
 f1(vector unsigned long long b, vector unsigned int a)
 {
-  b[0] = a[0];
+  b[lowull] = a[lowui];
   return b;
 }
 
 unsigned long long
 f2(vector unsigned int a)
 {
-  return a[0];
+  return a[lowui];
 }
 
 /* { dg-final { scan-assembler-times {fmov} 2 } } */
-- 
2.17.1



[PATCH] Add link to porting_to.html from the changes page for GCC 9

2020-02-05 Thread apinski
From: Andrew Pinski 

Looks like the porting_to page was not linked to the changes
page for GCC 9. So uncomments it out.

Committed as obvious.

---
 htdocs/gcc-9/changes.html | 2 --
 1 file changed, 2 deletions(-)

diff --git a/htdocs/gcc-9/changes.html b/htdocs/gcc-9/changes.html
index c0e581fe..22b069c5 100644
--- a/htdocs/gcc-9/changes.html
+++ b/htdocs/gcc-9/changes.html
@@ -17,11 +17,9 @@
 
 This page is a "brief" summary of some of the huge number of improvements
 in GCC 9.
-
 
 
 
-- 
2.17.1



[COMMITTED] aarch64: fix strict alignment for vector load/stores (PR 91927)

2020-02-08 Thread apinski
From: Andrew Pinski 

Hi,
  The problem here is that the vector mode version of movmisalign
was only conditionalized on if SIMD was enabled instead of being
also conditionalized on STRICT_ALIGNMENT too.

Applied as pre-approved in the bug report by Richard Sandiford
after a bootstrap/test on aarch64-linux-gnu.

Thanks,
Andrew Pinski

ChangeLog:
PR target/91927
* config/aarch64/aarch64-simd.md (movmisalign): Check
STRICT_ALIGNMENT also.

testsuite/ChangeLog:
PR target/91927
* gcc.target/aarch64/pr91927.c: New testcase.
---
 gcc/config/aarch64/aarch64-simd.md |  2 +-
 gcc/testsuite/gcc.target/aarch64/pr91927.c | 38 ++
 2 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/pr91927.c

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index c8e1012bd7f..4c651f45d0c 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -41,7 +41,7 @@ (define_expand "mov"
 (define_expand "movmisalign"
   [(set (match_operand:VALL 0 "nonimmediate_operand")
 (match_operand:VALL 1 "general_operand"))]
-  "TARGET_SIMD"
+  "TARGET_SIMD && !STRICT_ALIGNMENT"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
  are non-registers (e.g. memory := constant, which can be created by the
diff --git a/gcc/testsuite/gcc.target/aarch64/pr91927.c 
b/gcc/testsuite/gcc.target/aarch64/pr91927.c
new file mode 100644
index 000..f5cde1a5336
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr91927.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mstrict-align -O3" } */
+
+#define NULL 0
+
+typedef unsigned uint32_t;
+typedef struct __attribute__((__packed__))
+{
+   uint32_t nTagID;
+   uint32_t nValueBufferSize;
+   uint32_t nValueLength;
+   
+}  PropertyTags_t;
+
+typedef struct
+{
+   char *szName;
+   uint32_t nBufferSize;
+   uint32_t nLength;
+   
+}  Something_t;
+
+void SetTag(PropertyTags_t *pTag, uint32_t nBufferSize, uint32_t nLength);
+
+void TestCase(Something_t *pSome, uint32_t nBufferSize, uint32_t nLength)
+{
+   if (pSome != NULL)
+   {
+   PropertyTags_t sTag = { 0 };
+   
+   SetTag(&sTag, nBufferSize, nLength);
+   
+   pSome->nBufferSize = sTag.nValueBufferSize;
+   pSome->nLength = sTag.nValueLength;
+   }
+}
+
+/* { dg-final { scan-assembler-not "ldr\td" } } */
-- 
2.17.1



[PATCH] aarch64: Allow -mcpu=generic -march=armv8.5-a

2020-02-13 Thread apinski
From: Andrew Pinski 

Right if someone supplies a -mcpu= option and then overrides
that option with -march=*, we get a warning when they conflict.
What we need is a generic cpu for each arch level but that is not
that useful because the only difference would be the arch level.
The best option is to allow -mcpu=generic -march=armv8.5-a not to
warn and that is now a generic armv8.5-a arch.

OK?  Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64.c (aarch64_override_options): Don't
warn when the selected cpu was generic.
---
 gcc/config/aarch64/aarch64.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4a34dce..9173afe 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14075,10 +14075,12 @@ aarch64_override_options (void)
explicit_tune_core = selected_tune->ident;
 }
   /* If both -mcpu and -march are specified check that they are architecturally
- compatible, warn if they're not and prefer the -march ISA flags.  */
+ compatible, warn if they're not and prefer the -march ISA flags.
+ Only warn if not using the generic cpu.  */
   else if (selected_arch)
 {
-  if (selected_arch->arch != selected_cpu->arch)
+  if (selected_cpu->ident != generic
+ && selected_arch->arch != selected_cpu->arch)
{
  warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch",
   all_architectures[selected_cpu->arch].name,
-- 
1.8.3.1



[PATCH] Fix 'A' operand modifier: PR inline-asm/94095

2020-03-09 Thread apinski
From: Andrew Pinski 

The problem here is there was a typo in the documentation
for the 'A' modifier in the table, it was recorded as 'a'
in the table on the modifier column.

Committed as obvious.

2020-03-09  Andrew Pinski  

PR inline-asm/94095
* doc/extend.texi (x86 Operand Modifiers): Fix column
for 'A' modifier.
---
 gcc/ChangeLog   | 6 ++
 gcc/doc/extend.texi | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6c4a505..99f0011 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2020-03-09  Andrew Pinski  
+
+   PR inline-asm/94095
+   * doc/extend.texi (x86 Operand Modifiers): Fix column
+   for 'A' modifier.
+
 2020-03-09  Martin Liska  
 
PR target/93800
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 11b79a5..e0e7f54 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -10437,7 +10437,7 @@ The table below shows the list of supported modifiers 
and their effects.
 
 @multitable {Modifier} {Print the opcode suffix for the size of th} {Operand} 
{@samp{att}} {@samp{intel}}
 @headitem Modifier @tab Description @tab Operand @tab @samp{att} @tab 
@samp{intel}
-@item @code{a}
+@item @code{A}
 @tab Print an absolute memory reference.
 @tab @code{%A0}
 @tab @code{*%rax}
-- 
1.8.3.1



[PATCH] Fix libstdc++ compiling for an aarch64 multilib with big-endian.

2019-11-23 Thread apinski
From: Andrew Pinski 

Hi if we have a aarch64 compiler that has a big-endian
multi-lib, it fails to compile libstdc++ because
simd_fast_mersenne_twister_engine is only defined for little-endian
in ext/random but ext/opt_random.h thinks it is defined always.

OK?  Built an aarch64-elf toolchain which has a big-endian multi-lib enabled.

Thanks,
Andrew Pinski

libstdc++/ChangeLog:
* config/cpu/aarch64/opt/ext/opt_random.h: Wrap around with check
for little-endian like ext/random is done.

Signed-off-by: Andrew Pinski 
---
 libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h 
b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
index 696a6d18ab4..9eca9b7df74 100644
--- a/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
+++ b/libstdc++-v3/config/cpu/aarch64/opt/ext/opt_random.h
@@ -44,6 +44,7 @@
  _C+8, _C+9, _C+10, _C+11, _C+12, _C+13, _C+14, _C+15})
 #endif
 
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 namespace __gnu_cxx _GLIBCXX_VISIBILITY (default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -175,6 +176,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 _GLIBCXX_END_NAMESPACE_VERSION
   } // namespace
 
+#endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #endif // __ARM_NEON
 
 #endif // _EXT_OPT_RANDOM_H
-- 
2.17.1



[PATCH] [PATCH/AARCH64] Use neon_move instead of move_logic in some places

2019-03-10 Thread apinski
From: Andrew Pinski 

Hi,
  In some places in the aarch64 backend, neon_logic is used for
mov v0.16b, v1.16b
mov v0., v1.

This patch moves them over to use neon_move instead.
For most cores, this does not matter but for OcteonTX2 core
it does matter.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/aarch64/aarch64.md (*movti_aarch64): Fix type attribute
for neon move.
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov): Likewise.
(*aarch64_simd_mov): Likewise.
---
 gcc/config/aarch64/aarch64-simd.md | 4 ++--
 gcc/config/aarch64/aarch64.md  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-simd.md 
b/gcc/config/aarch64/aarch64-simd.md
index e3852c5..24fbb3d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -125,7 +125,7 @@
  }
 }
   [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\
-neon_logic, neon_to_gp, f_mcr,\
+neon_move, neon_to_gp, f_mcr,\
 mov_reg, neon_move")]
 )
 
@@ -159,7 +159,7 @@
 }
 }
   [(set_attr "type" "neon_load1_1reg, store_16, neon_store1_1reg,\
-neon_logic, multiple, multiple,\
+neon_move, multiple, multiple,\
 multiple, neon_move")
(set_attr "length" "4,4,4,4,8,8,8,4")]
 )
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 70f0418..350ae11 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1195,7 +1195,7 @@
stp\\txzr, xzr, %0
ldr\\t%q0, %1
str\\t%q1, %0"
-  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \
+  [(set_attr "type" "multiple,f_mcr,f_mrc,neon_move_q, \
 load_16,store_16,store_16,\
  load_16,store_16")
(set_attr "length" "8,8,8,4,4,4,4,4,4")
-- 
1.8.3.1



[PATCH] [AARCH64] Improve vector generation cost model

2019-03-14 Thread apinski
From: Andrew Pinski 

Hi,
  On OcteonTX2, ld1r and ld1 (with a single lane) are split
into two different micro-ops unlike most other targets.
This adds three extra costs to the cost table:
ld1_dup: used for "ld1r {v0.4s}, [x0]"
merge_dup: used for "dup v0.4s, v0.4s[0]" and "ins v0.4s[0], v0.4s[0]"
ld1_merge: used fir "ld1 {v0.4s}[0], [x0]"

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

ChangeLog:
* config/arm/aarch-common-protos.h (vector_cost_table):
Add merge_dup, ld1_merge, and ld1_dup.
* config/aarch64/aarch64-cost-tables.h (qdf24xx_extra_costs):
Update for the new fields.
(thunderx_extra_costs): Likewise.
(thunderx2t99_extra_costs): Likewise.
(tsv110_extra_costs): Likewise.
* config/arm/aarch-cost-tables.h (generic_extra_costs): Likewise.
(cortexa53_extra_costs): Likewise.
(cortexa57_extra_costs): Likewise.
(exynosm1_extra_costs): Likewise.
(xgene1_extra_costs): Likewise.
* config/aarch64/aarch64.c (aarch64_rtx_costs): Handle vec_dup of a memory.
Hanlde vec_merge of a memory.

Signed-off-by: Andrew Pinski 
---
 gcc/config/aarch64/aarch64-cost-tables.h | 20 +++
 gcc/config/aarch64/aarch64.c | 22 +
 gcc/config/arm/aarch-common-protos.h |  3 +++
 gcc/config/arm/aarch-cost-tables.h   | 25 +++-
 4 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cost-tables.h 
b/gcc/config/aarch64/aarch64-cost-tables.h
index 5c9442e1b89..9a7c70ba595 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -123,7 +123,10 @@ const struct cpu_cost_table qdf24xx_extra_costs =
   },
   /* Vector */
   {
-COSTS_N_INSNS (1)  /* alu.  */
+COSTS_N_INSNS (1),  /* Alu.  */
+COSTS_N_INSNS (1), /* dup_merge.  */
+COSTS_N_INSNS (1), /* ld1_merge.  */
+COSTS_N_INSNS (1)  /* ld1_dup.  */
   }
 };
 
@@ -227,7 +230,10 @@ const struct cpu_cost_table thunderx_extra_costs =
   },
   /* Vector */
   {
-COSTS_N_INSNS (1)  /* Alu.  */
+COSTS_N_INSNS (1), /* Alu.  */
+COSTS_N_INSNS (1), /* dup_merge.  */
+COSTS_N_INSNS (1), /* ld1_merge.  */
+COSTS_N_INSNS (1)  /* ld1_dup.  */
   }
 };
 
@@ -330,7 +336,10 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
   },
   /* Vector */
   {
-COSTS_N_INSNS (1)  /* Alu.  */
+COSTS_N_INSNS (1), /* Alu.  */
+COSTS_N_INSNS (1), /* dup_merge.  */
+COSTS_N_INSNS (1), /* ld1_merge.  */
+COSTS_N_INSNS (1)  /* ld1_dup.  */
   }
 };
 
@@ -434,7 +443,10 @@ const struct cpu_cost_table tsv110_extra_costs =
   },
   /* Vector */
   {
-COSTS_N_INSNS (1)  /* alu.  */
+COSTS_N_INSNS (1), /* Alu.  */
+COSTS_N_INSNS (1), /* dup_merge.  */
+COSTS_N_INSNS (1), /* ld1_merge.  */
+COSTS_N_INSNS (1)  /* ld1_dup.  */
   }
 };
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b38505b0872..dc4d3d39af8 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10568,6 +10568,28 @@ cost_plus:
 }
   break;
 
+case VEC_DUPLICATE:
+  if (!speed)
+   return false;
+
+  if (GET_CODE (XEXP (x, 0)) == MEM)
+   *cost += extra_cost->vect.ld1_dup;
+  else
+   *cost += extra_cost->vect.merge_dup;
+  return true;
+
+case VEC_MERGE:
+  if (speed && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+   {
+ if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MEM)
+   *cost += extra_cost->vect.ld1_merge;
+ else
+   *cost += extra_cost->vect.merge_dup;
+ return true;
+   }
+  break;
+
+
 case TRUNCATE:
 
   /* Decompose muldi3_highpart.  */
diff --git a/gcc/config/arm/aarch-common-protos.h 
b/gcc/config/arm/aarch-common-protos.h
index 11cd5145bbc..dbc1282402a 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -131,6 +131,9 @@ struct fp_cost_table
 struct vector_cost_table
 {
   const int alu;
+  const int merge_dup;
+  const int ld1_merge;
+  const int ld1_dup;
 };
 
 struct cpu_cost_table
diff --git a/gcc/config/arm/aarch-cost-tables.h 
b/gcc/config/arm/aarch-cost-tables.h
index bc33efadc6c..a51bc668f56 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -121,7 +121,10 @@ const struct cpu_cost_table generic_extra_costs =
   },
   /* Vector */
   {
-COSTS_N_INSNS (1)  /* alu.  */
+COSTS_N_INSNS (1),  /* alu.  */
+COSTS_N_INSNS (1), /* dup_merge.  */
+COSTS_N_INSNS (1), /* ld1_merge.  */
+COSTS_N_INSNS (1)  /* ld1_dup.  */
   }
 };
 
@@ -224,7 +227,10 @@ const struct cpu_cost_table cortexa53_extra_costs =
   },
   /* Vector */
   {
-COSTS_N_INSNS (1)  /* alu.  */
+COSTS_N_INSNS (1),  /* alu.  */
+COSTS_N_INSNS (1), /* dup_merge.  */
+COSTS_N_INSNS (1), /* ld1_merge.  */
+COSTS_N_INSNS (1)  /* ld1_dup.  */
   }
 };
 
@@ -327,7 +333,10 @@ const struct cpu_cost_table cortexa57_extra_costs =
   },
   

[PATCH] Fix PR 81721: ICE with PCH and Pragma warning and C++ operator

2019-04-01 Thread apinski
From: Andrew Pinski 

Hi,
  The problem here is the token->val.node is not saved over
a precompiled header for C++ operator.  This can cause an
internal compiler error as we tried to print out the spelling
of the token as we assumed it was valid.
The fix is to have cpp_token_val_index return CPP_TOKEN_FLD_NODE
for operator tokens that have NAMED_OP set.

OK?  Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

libcpp/ChangeLog:
* lex.c (cpp_token_val_index ): If tok->flags
has NAMED_OP set, then return CPP_TOKEN_FLD_NODE.

gcc/testsuite/ChangeLog:
* g++.dg/pch/operator-1.C: New testcase.
* g++.dg/pch/operator-1.Hs: New file.
---
 gcc/testsuite/g++.dg/pch/operator-1.C  | 2 ++
 gcc/testsuite/g++.dg/pch/operator-1.Hs | 9 +
 libcpp/lex.c   | 6 +-
 3 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/pch/operator-1.C
 create mode 100644 gcc/testsuite/g++.dg/pch/operator-1.Hs

diff --git a/gcc/testsuite/g++.dg/pch/operator-1.C 
b/gcc/testsuite/g++.dg/pch/operator-1.C
new file mode 100644
index 000..290b5f7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pch/operator-1.C
@@ -0,0 +1,2 @@
+#include "operator-1.H"
+int main(void){ major(0);} /* { dg-warning "Did not Work" } */
diff --git a/gcc/testsuite/g++.dg/pch/operator-1.Hs 
b/gcc/testsuite/g++.dg/pch/operator-1.Hs
new file mode 100644
index 000..657dae1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pch/operator-1.Hs
@@ -0,0 +1,9 @@
+# define __glibc_macro_warning1(message) _Pragma (#message)
+# define __glibc_macro_warning(message) \
+  __glibc_macro_warning1 (GCC warning message)
+#define __SYSMACROS_DM1(...) __glibc_macro_warning (#__VA_ARGS__)
+
+#define __SYSMACROS_DM(symbol) __SYSMACROS_DM1 \
+ (Did not Work)
+
+# define major(dev) __SYSMACROS_DM (major) (dev+0)
diff --git a/libcpp/lex.c b/libcpp/lex.c
index eedfcbb..16ded6e 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -3756,7 +3756,11 @@ cpp_token_val_index (const cpp_token *tok)
 case SPELL_LITERAL:
   return CPP_TOKEN_FLD_STR;
 case SPELL_OPERATOR:
-  if (tok->type == CPP_PASTE)
+  /* Operands which were originally spelled as ident keep around
+ the node for the exact spelling.  */
+  if (tok->flags & NAMED_OP)
+   return CPP_TOKEN_FLD_NODE;
+  else if (tok->type == CPP_PASTE)
return CPP_TOKEN_FLD_TOKEN_NO;
   else
return CPP_TOKEN_FLD_NONE;
-- 
1.8.3.1



[PATCH] Fix tree-opt/PR106087: ICE with inline-asm with multiple output and assigned only static vars

2022-07-07 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is that when we mark the ssa name that was referenced in the 
now removed
dead store (to a write only static variable), the inline-asm would also be 
removed
even though it was defining another ssa name. This fixes the problem by checking
to make sure that the statement was only defining one ssa name.

OK? Bootstrapped and tested on x86_64 with no regressions.

PR tree-optimization/106087

gcc/ChangeLog:

* tree-ssa-dce.cc (simple_dce_from_worklist): Check
to make sure the statement is only defining one operand.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/inline-asm-1.c: New test.
---
 gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c | 14 ++
 gcc/tree-ssa-dce.cc|  5 +
 2 files changed, 19 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c

diff --git a/gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c 
b/gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c
new file mode 100644
index 000..0044cb761b6
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c
@@ -0,0 +1,14 @@
+/* PR tree-opt/106087,
+   simple_dce_from_worklist would delete the
+   inline-asm when it was still being referenced
+   by the other ssa name. */
+
+static int t;
+
+int f(void)
+{
+  int tt, tt1;
+  asm("":"=r"(tt), "=r"(tt1));
+  t = tt1;
+  return tt;
+}
diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc
index bc533582673..602cdb30ceb 100644
--- a/gcc/tree-ssa-dce.cc
+++ b/gcc/tree-ssa-dce.cc
@@ -2061,6 +2061,11 @@ simple_dce_from_worklist (bitmap worklist)
   if (gimple_has_side_effects (t))
continue;
 
+  /* The defining statement needs to be defining one this name. */
+  if (!is_a(t)
+ && !single_ssa_def_operand (t, SSA_OP_DEF))
+   continue;
+
   /* Don't remove statements that are needed for non-call
 eh to work.  */
   if (stmt_unremovable_because_of_non_call_eh_p (cfun, t))
-- 
2.17.1



[COMMITTED] Fix tree-opt/PR106087: ICE with inline-asm with multiple output and assigned only static vars

2022-07-08 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is that when we mark the ssa name that was referenced in the 
now removed
dead store (to a write only static variable), the inline-asm would also be 
removed
even though it was defining another ssa name. This fixes the problem by checking
to make sure that the statement was only defining one ssa name.

Committed as approved after a bootstrapped and tested on x86_64 with no 
regressions.

PR tree-optimization/106087

gcc/ChangeLog:

* tree-ssa-dce.cc (simple_dce_from_worklist): Check
to make sure the statement is only defining one operand.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/inline-asm-1.c: New test.
---
 gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c | 14 ++
 gcc/tree-ssa-dce.cc|  7 +++
 2 files changed, 21 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c

diff --git a/gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c 
b/gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c
new file mode 100644
index 000..0044cb761b6
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/inline-asm-1.c
@@ -0,0 +1,14 @@
+/* PR tree-opt/106087,
+   simple_dce_from_worklist would delete the
+   inline-asm when it was still being referenced
+   by the other ssa name. */
+
+static int t;
+
+int f(void)
+{
+  int tt, tt1;
+  asm("":"=r"(tt), "=r"(tt1));
+  t = tt1;
+  return tt;
+}
diff --git a/gcc/tree-ssa-dce.cc b/gcc/tree-ssa-dce.cc
index bc533582673..daf0782b0e1 100644
--- a/gcc/tree-ssa-dce.cc
+++ b/gcc/tree-ssa-dce.cc
@@ -2061,6 +2061,13 @@ simple_dce_from_worklist (bitmap worklist)
   if (gimple_has_side_effects (t))
continue;
 
+  /* The defining statement needs to be defining only this name.
+ASM is the only statement that can define more than one
+(non-virtual) name. */
+  if (is_a(t)
+ && !single_ssa_def_operand (t, SSA_OP_DEF))
+   continue;
+
   /* Don't remove statements that are needed for non-call
 eh to work.  */
   if (stmt_unremovable_because_of_non_call_eh_p (cfun, t))
-- 
2.17.1



[COMMITED] [RSIC-V] Fix 32bit riscv with zbs extension enabled

2022-08-04 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here was a disconnect between splittable_const_int_operand
predicate and the function riscv_build_integer_1 for 32bits with zbs enabled.
The splittable_const_int_operand predicate had a check for TARGET_64BIT which
was not needed so this patch removed it.

Committed as obvious after a build for risc32-elf configured with 
--with-arch=rv32imac_zba_zbb_zbc_zbs.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/predicates.md (splittable_const_int_operand):
Remove the check for TARGET_64BIT for single bit const values.
---
 gcc/config/riscv/predicates.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 90db5dfcdd5..e98db2cb574 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -76,7 +76,7 @@ (define_predicate "splittable_const_int_operand"
 
   /* Check whether the constant can be loaded in a single
  instruction with zbs extensions.  */
-  if (TARGET_64BIT && TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (INTVAL (op)))
+  if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (INTVAL (op)))
 return false;
 
   /* Otherwise check whether the constant can be loaded in a single
-- 
2.27.0



[COMMITTED] Move testcase gcc.dg/tree-ssa/pr93776.c to gcc.c-torture/compile/pr93776.c

2022-08-07 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Since this testcase is not exactly SSA specific and it would
be a good idea to compile this at more than just at -O1, moving
it to gcc.c-torture/compile would do that.

Committed as obvious after a test on x86_64-linux-gnu.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr93776.c: Moved to...
* gcc.c-torture/compile/pr93776.c: ...here.
---
 .../{gcc.dg/tree-ssa => gcc.c-torture/compile}/pr93776.c  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename gcc/testsuite/{gcc.dg/tree-ssa => gcc.c-torture/compile}/pr93776.c (76%)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr93776.c 
b/gcc/testsuite/gcc.c-torture/compile/pr93776.c
similarity index 76%
rename from gcc/testsuite/gcc.dg/tree-ssa/pr93776.c
rename to gcc/testsuite/gcc.c-torture/compile/pr93776.c
index c407a627718..3852736c040 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr93776.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr93776.c
@@ -1,5 +1,5 @@
-/* { dg-do compile } */
-/* { dg-options "-O1" } */
+/* This used to ICE in SRA as SRA got
+   confused by the zero signed assigment. */
 
 struct empty {};
 struct s { int i; };
-- 
2.27.0



[PATCH] Fix middle-end/103645: empty struct store not removed when using compound literal

2022-08-07 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

For compound literals empty struct stores are not removed as they go down a
different path of the gimplifier; trying to optimize the init constructor.
This fixes the problem by not adding the gimple assignment at the end
of gimplify_init_constructor if it was an empty type.

Note this updates gcc.dg/pr87052.c where we had:
const char d[0] = { };
And was expecting a store to d but after this, there is no store
as the decl's type is zero in size.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR middle-end/103645
* gimplify.c (gimplify_init_constructor): Don't build/add
gimple assignment of an empty type.

testsuite/ChangeLog:
* gcc.dg/pr87052.c: Update d var to expect nothing.
---
 gcc/gimplify.cc| 7 +--
 gcc/testsuite/gcc.dg/pr87052.c | 6 +++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc
index 2ac7ca0855e..f0fbdb48012 100644
--- a/gcc/gimplify.cc
+++ b/gcc/gimplify.cc
@@ -5488,8 +5488,11 @@ gimplify_init_constructor (tree *expr_p, gimple_seq 
*pre_p, gimple_seq *post_p,
   if (ret == GS_ERROR)
 return GS_ERROR;
   /* If we have gimplified both sides of the initializer but have
- not emitted an assignment, do so now.  */
-  if (*expr_p)
+ not emitted an assignment, do so now.   */
+  if (*expr_p
+  /* If the type is an empty type, we don't need to emit the
+assignment. */
+  && !is_empty_type (TREE_TYPE (TREE_OPERAND (*expr_p, 0
 {
   tree lhs = TREE_OPERAND (*expr_p, 0);
   tree rhs = TREE_OPERAND (*expr_p, 1);
diff --git a/gcc/testsuite/gcc.dg/pr87052.c b/gcc/testsuite/gcc.dg/pr87052.c
index 18e092c4674..796fe6440c1 100644
--- a/gcc/testsuite/gcc.dg/pr87052.c
+++ b/gcc/testsuite/gcc.dg/pr87052.c
@@ -23,8 +23,7 @@ void test (void)
 
   const char d[0] = { };
 
-  /* Expect the following:
- d = ""; */
+  /* Expect nothing.  */
 
   const char e[0] = "";
 
@@ -36,6 +35,7 @@ void test (void)
 /* { dg-final { scan-tree-dump-times "a = \"x00ab\";" 1 "gimple" } }
{ dg-final { scan-tree-dump-times "b = \"ax00bc\";"  1 "gimple" } }
{ dg-final { scan-tree-dump-times "c = \"\";"  1 "gimple" } }
-   { dg-final { scan-tree-dump-times "d = { *};"  1 "gimple" } }
+   { dg-final { scan-tree-dump-times "d = "  1 "gimple" } }
+   { dg-final { scan-tree-dump-times "d = {CLOBBER\\(eol\\)}"  1 "gimple" } }
{ dg-final { scan-tree-dump-times "e = "  1 "gimple" } }
{ dg-final { scan-tree-dump-times "e = {CLOBBER\\(eol\\)}"  1 "gimple" } }  
*/
-- 
2.27.0



[PATCH] Fix PR c++/66590: incorrect warning "reaches end of non-void function" for switch

2021-08-13 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So the problem here is there is code in the C++ front-end not to add a
break statement (to the IR) if the previous block does not fall through.
The problem is the code which does the check to see if the block
may fallthrough does not check a CLEANUP_STMT; it assumes it is always
fall through.  Anyways this adds the code for the case of a CLEANUP_STMT
that is only for !CLEANUP_EH_ONLY (the try/finally case).

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/cp/ChangeLog:

* cp-objcp-common.c (cxx_block_may_fallthru): Handle
CLEANUP_STMT for the case which will be try/finally.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wreturn-5.C: New test.
---
 gcc/cp/cp-objcp-common.c  |  9 +
 gcc/testsuite/g++.dg/warn/Wreturn-5.C | 15 +++
 2 files changed, 24 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/warn/Wreturn-5.C

diff --git a/gcc/cp/cp-objcp-common.c b/gcc/cp/cp-objcp-common.c
index beef0123b04..43888507b85 100644
--- a/gcc/cp/cp-objcp-common.c
+++ b/gcc/cp/cp-objcp-common.c
@@ -317,6 +317,15 @@ cxx_block_may_fallthru (const_tree stmt)
return true;
   return block_may_fallthru (ELSE_CLAUSE (stmt));
 
+case CLEANUP_STMT:
+  /* Just handle the try/finally cases.  */
+  if (!CLEANUP_EH_ONLY (stmt))
+   {
+ return (block_may_fallthru (CLEANUP_BODY (stmt))
+ && block_may_fallthru (CLEANUP_EXPR (stmt)));
+   }
+  return true;
+
 default:
   return c_block_may_fallthru (stmt);
 }
diff --git a/gcc/testsuite/g++.dg/warn/Wreturn-5.C 
b/gcc/testsuite/g++.dg/warn/Wreturn-5.C
new file mode 100644
index 000..543e33e905d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/warn/Wreturn-5.C
@@ -0,0 +1,15 @@
+// PR C++/66590
+// { dg-do compile }
+// { dg-options "-Wall" }
+
+struct A{ ~A();};
+
+int f(int x)
+{
+A a;
+switch (x)
+{
+case 1: { A tmp; return 1; } break;
+default: return 0;
+}
+}  // { dg-bogus "control reaches end of non-void function" }
-- 
2.27.0



[PATCH 1/2] Add gimple_truth_valued_p to match.pd and use it

2021-08-13 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

While working on some more boolean optimizations, I noticed
that there are places which does SSA_NAME@0 and then look
at then either use get_nonzero_bits or ssa_name_has_boolean_range
to see if the ssa name had a boolean range. This cleans this
up slightly by have a simple match pattern call gimple_truth_valued_p
which matches on SSA_NAME and checks ssa_name_has_boolean_range.
This is the first of the few cleanups I am going to do for
match and simplify and boolean related changes.

gcc/ChangeLog:

* match.pd: New match, gimple_truth_valued_p.
Use it for "{ 0 or 1 } * { 0 or 1 }",
"X / bool_range_Y", and "-(type)!A" simplifcations.
---
 gcc/match.pd | 23 +++
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 5cc6a9fd41c..b1f2c02 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -98,6 +98,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (define_operator_list COND_TERNARY
   IFN_COND_FMA IFN_COND_FMS IFN_COND_FNMA IFN_COND_FNMS)
 
+/* Match for a SSA_NAME which has a range of [0,1] */
+(match gimple_truth_valued_p
+ SSA_NAME@0
+ (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@0
+
 /* With nop_convert? combine convert? and view_convert? in one pattern
plus conditionalize on tree_nop_conversion_p conversions.  */
 (match (nop_convert @0)
@@ -230,11 +235,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* Transform { 0 or 1 } * { 0 or 1 } into { 0 or 1 } & { 0 or 1 } */
 (simplify
- (mult SSA_NAME@1 SSA_NAME@2)
-  (if (INTEGRAL_TYPE_P (type)
-   && get_nonzero_bits (@1) == 1
-   && get_nonzero_bits (@2) == 1)
-   (bit_and @1 @2)))
+ (mult gimple_truth_valued_p@1 gimple_truth_valued_p@2)
+  (bit_and @1 @2))
 
 /* Transform x * { 0 or 1, 0 or 1, ... } into x & { 0 or -1, 0 or -1, ...},
unless the target has native support for the former but not the latter.  */
@@ -347,9 +349,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(negate @0)))
  /* X / bool_range_Y is X.  */ 
  (simplify
-  (div @0 SSA_NAME@1)
-  (if (INTEGRAL_TYPE_P (type) && ssa_name_has_boolean_range (@1))
-   @0))
+  (div @0 gimple_truth_valued_p@1)
+   @0)
  /* X / X is one.  */
  (simplify
   (div @0 @0)
@@ -4207,12 +4208,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* -(type)!A -> (type)A - 1.  */
 (simplify
- (negate (convert?:s (logical_inverted_value:s @0)))
+ (negate (convert?:s (logical_inverted_value:s gimple_truth_valued_p@0)))
  (if (INTEGRAL_TYPE_P (type)
   && TREE_CODE (type) != BOOLEAN_TYPE
-  && TYPE_PRECISION (type) > 1
-  && TREE_CODE (@0) == SSA_NAME
-  && ssa_name_has_boolean_range (@0))
+  && TYPE_PRECISION (type) > 1)
   (plus (convert:type @0) { build_all_ones_cst (type); })))
 
 /* A + (B vcmp C ? 1 : 0) -> A - (B vcmp C ? -1 : 0), since vector comparisons
-- 
2.27.0



[PATCH 2/2] Fix 101805: Simplify min/max of boolean arguments

2021-08-13 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

I noticed this while Richard B. fixing PR101756.
Basically min of two bools is the same as doing an "and"
and max of two bools is doing an "ior".

gcc/ChangeLog:

* match.pd: Add min/max patterns for bool types.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/bool-12.c: New test.
---
 gcc/match.pd| 10 +
 gcc/testsuite/gcc.dg/tree-ssa/bool-12.c | 27 +
 2 files changed, 37 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/bool-12.c

diff --git a/gcc/match.pd b/gcc/match.pd
index b1f2c02..8fd60d08cfe 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -3103,6 +3103,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
&& (GIMPLE || !TREE_SIDE_EFFECTS (@1)))
(cond (cmp @2 @3) @1 @0
 
+/* max(bool0, bool1) -> bool0 | bool1 */
+(simplify
+ (max gimple_truth_valued_p@0 gimple_truth_valued_p@1)
+ (bit_ior @0 @1))
+
+/* min(bool0, bool1) -> bool0 & bool1 */
+(simplify
+ (min gimple_truth_valued_p@0 gimple_truth_valued_p@1)
+ (bit_and @0 @1))
+
 /* Simplifications of shift and rotates.  */
 
 (for rotate (lrotate rrotate)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/bool-12.c 
b/gcc/testsuite/gcc.dg/tree-ssa/bool-12.c
new file mode 100644
index 000..2d8ad9912d3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/bool-12.c
@@ -0,0 +1,27 @@
+/* { dg-do compile } */
+/* { dg-options "-O1 -fdump-tree-optimized -fdump-tree-original" } */
+#define bool _Bool
+int maxbool(bool ab, bool bb)
+{
+  int a = ab;
+  int b = bb;
+  int c;
+  c = (a > b)?a : b;
+  return c;
+}
+int minbool(bool ab, bool bb)
+{
+  int a = ab;
+  int b = bb;
+  int c;
+  c = (a < b)?a : b;
+  return c;
+}
+/* Original should have one of each MAX/MIN expressions. */
+/* { dg-final { scan-tree-dump-times "MAX_EXPR" 1 "original" } */
+/* { dg-final { scan-tree-dump-times "MIN_EXPR" 1 "original"} } */
+
+/* By the time we reach optimized, the MAX and MIN expressions
+   should have been removed. */
+/* { dg-final { scan-tree-dump-times "MAX_EXPR" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "MIN_EXPR" 0 "optimized"} } */
-- 
2.27.0



[PATCH] Add range/nonzero info to generated ADD_OVERFLOW and simplify

2021-08-13 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Even though this does not change the generated code,
it does improve the initial RTL generation.

gcc/ChangeLog:

* tree-ssa-math-opts.c (match_arith_overflow):
Add range and nonzero bits information to
the new overflow ssa name.  Also fold
the use statement.
---
 gcc/tree-ssa-math-opts.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
index c4a6492b50d..bb7edeaa6f7 100644
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -4221,6 +4221,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
}
 }
   tree ovf = make_ssa_name (type);
+  set_range_info (ovf, VR_RANGE, wi::zero (TYPE_PRECISION (type)), wi::one 
(TYPE_PRECISION (type)));
+  set_nonzero_bits (ovf, wi::one (TYPE_PRECISION (type)));
   g2 = gimple_build_assign (ovf, IMAGPART_EXPR,
build1 (IMAGPART_EXPR, type, ctmp));
   if (code != BIT_NOT_EXPR)
@@ -4279,6 +4281,8 @@ match_arith_overflow (gimple_stmt_iterator *gsi, gimple 
*stmt,
  gimple_assign_set_rhs1 (use_stmt, cond);
}
}
+  gimple_stmt_iterator gsi1 = gsi_for_stmt (use_stmt);
+  fold_stmt (&gsi1);
   update_stmt (use_stmt);
   if (code == MULT_EXPR && use_stmt != orig_use_stmt)
{
-- 
2.27.0



[PATCH] Fix a few problems with download_prerequisites.

2021-08-23 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

There are a few problems with download_prerequisites are
described in PR 82704.  The first is on busy-box version of
shasum and md5sum the extended option --check don't exist
so just use -c.  The second issue is the code for which
shasum program to use is included twice and is different.
So move which program to use for the checksum after argument
parsing.  The last issue is --md5 option has been broken for
sometime now as the program is named md5sum and not just md5.
Nobody updated switch table to be correct.

contrib/ChangeLog:

PR other/82704
* download_prerequisites: Fix issues with --md5 and
--sha512 options.
---
 contrib/download_prerequisites | 59 +-
 1 file changed, 30 insertions(+), 29 deletions(-)

diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites
index 51e715f..8f69b61 100755
--- a/contrib/download_prerequisites
+++ b/contrib/download_prerequisites
@@ -46,18 +46,6 @@ verify=1
 force=0
 OS=$(uname)
 
-case $OS in
-  "Darwin"|"FreeBSD"|"DragonFly"|"AIX")
-chksum='shasum -a 512 --check'
-  ;;
-  "OpenBSD")
-chksum='sha512 -c'
-  ;;
-  *)
-chksum='sha512sum -c'
-  ;;
-esac
-
 if type wget > /dev/null ; then
   fetch='wget'
 else
@@ -113,7 +101,7 @@ do
 done
 unset arg
 
-# Emulate Linux's 'md5 --check' on macOS
+# Emulate Linux's 'md5sum --check' on macOS
 md5_check() {
   # Store the standard input: a line from contrib/prerequisites.md5:
   md5_checksum_line=$(cat -)
@@ -162,26 +150,10 @@ do
 verify=0
 ;;
 --sha512)
-case $OS in
-  "Darwin")
-chksum='shasum -a 512 --check'
-  ;;
-  *)
-chksum='sha512sum --check'
-  ;;
-esac
 chksum_extension='sha512'
 verify=1
 ;;
 --md5)
-case $OS in
-  "Darwin")
-chksum='md5_check'
-  ;;
-  *)
-chksum='md5 --check'
-  ;;
-esac
 chksum_extension='md5'
 verify=1
 ;;
@@ -212,6 +184,35 @@ done
 [ "x${argnext}" = x ] || die "Missing argument for option --${argnext}"
 unset arg argnext
 
+case $chksum_extension in
+  sha512)
+case $OS in
+  "Darwin"|"FreeBSD"|"DragonFly"|"AIX")
+chksum='shasum -a 512 --check'
+  ;;
+  "OpenBSD")
+chksum='sha512 -c'
+  ;;
+  *)
+chksum='sha512sum -c'
+  ;;
+esac
+  ;;
+  md5)
+case $OS in
+  "Darwin")
+chksum='md5_check'
+  ;;
+  *)
+chksum='md5sum -c'
+  ;;
+esac
+;;
+  *)
+die "Unkown checksum $chksum_extension"
+  ;;
+esac
+
 [ -e ./gcc/BASE-VER ] \
 || die "You must run this script in the top-level GCC source directory"
 
-- 
1.8.3.1



[PATCH] Fix PR 90142: contrib/download_prerequisites uses test ==

2021-08-30 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Since == is not portable, it is better to use = in contrib/
download_prerequisites.  The only place == was used is inside
the function md5_check which is used only on Mac OS X.

Tested on Mac OS X as:
./contrib/download_prerequisites --md5
Both with all files having the correct checksum and one with a broken one.

contrib/ChangeLog:

* download_prerequisites (md5_check): Replace == inside
test with = to be more portable.
---
 contrib/download_prerequisites | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites
index 8f69b61f5a9..11c283ecb1a 100755
--- a/contrib/download_prerequisites
+++ b/contrib/download_prerequisites
@@ -113,7 +113,7 @@ md5_check() {
   md5_checksum_output=$(md5 -r "${file_to_check}")
   # Grab the text before the first space
   md5_checksum_detected="${md5_checksum_output%% *}"
-  [ "${md5_checksum_expected}" == "${md5_checksum_detected}" ] \
+  [ "${md5_checksum_expected}" = "${md5_checksum_detected}" ] \
 || die "Cannot verify integrity of possibly corrupted file 
${file_to_check}"
   echo "${file_to_check}: OK"
 }
-- 
2.17.1



[PATCH] Fix PR driver/79181 (and others), not deleting some /tmp/cc* files for LTO.

2021-08-30 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So the main issue here is that some signals are not setup unlike collect2.
So this merges the setting up of the signal handlers to one function in
collect-utils and has collect2 and lto-wrapper call that function.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

PR driver/79181
* collect-utils.c (setup_signals): New declaration.
* collect-utils.h (setup_signals): New function.
* collect2.c (handler): Delete.
(main): Instead of manually setting up the signals,
just call setup_signals.
* lto-wrapper.c (main): Likewise.
---
 gcc/collect-utils.c | 37 +
 gcc/collect-utils.h |  1 +
 gcc/collect2.c  | 36 +---
 gcc/lto-wrapper.c   | 18 +-
 4 files changed, 40 insertions(+), 52 deletions(-)

diff --git a/gcc/collect-utils.c b/gcc/collect-utils.c
index 6b5d61d5162..19423d31885 100644
--- a/gcc/collect-utils.c
+++ b/gcc/collect-utils.c
@@ -57,6 +57,43 @@ fatal_signal (int signum)
  so its normal effect occurs.  */
   kill (getpid (), signum);
 }
+
+/* Setup the signal handlers for the utils. */
+void
+setup_signals (void)
+{
+#ifdef SIGQUIT
+  if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
+signal (SIGQUIT, fatal_signal);
+#endif
+  if (signal (SIGINT, SIG_IGN) != SIG_IGN)
+signal (SIGINT, fatal_signal);
+#ifdef SIGALRM
+  if (signal (SIGALRM, SIG_IGN) != SIG_IGN)
+signal (SIGALRM, fatal_signal);
+#endif
+#ifdef SIGHUP
+  if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
+signal (SIGHUP, fatal_signal);
+#endif
+  if (signal (SIGSEGV, SIG_IGN) != SIG_IGN)
+signal (SIGSEGV, fatal_signal);
+  if (signal (SIGTERM, SIG_IGN) != SIG_IGN)
+signal (SIGTERM, fatal_signal);
+#ifdef SIGPIPE
+  if (signal (SIGPIPE, SIG_IGN) != SIG_IGN)
+signal (SIGPIPE, fatal_signal);
+#endif
+#ifdef SIGBUS
+  if (signal (SIGBUS, SIG_IGN) != SIG_IGN)
+signal (SIGBUS, fatal_signal);
+#endif
+#ifdef SIGCHLD
+  /* We *MUST* set SIGCHLD to SIG_DFL so that the wait4() call will
+ receive the signal.  A different setting is inheritable */
+  signal (SIGCHLD, SIG_DFL);
+#endif
+}
 
 /* Wait for a process to finish, and exit if a nonzero status is found.  */
 
diff --git a/gcc/collect-utils.h b/gcc/collect-utils.h
index 4f0e3ce9832..15f831d778a 100644
--- a/gcc/collect-utils.h
+++ b/gcc/collect-utils.h
@@ -24,6 +24,7 @@ along with GCC; see the file COPYING3.  If not see
 extern void notice (const char *, ...)
   __attribute__ ((format (printf, 1, 2)));
 extern void fatal_signal (int);
+extern void setup_signals (void);
 
 extern struct pex_obj *collect_execute (const char *, char **,
const char *, const char *,
diff --git a/gcc/collect2.c b/gcc/collect2.c
index 07092c2733a..cf04a58ba4d 100644
--- a/gcc/collect2.c
+++ b/gcc/collect2.c
@@ -301,7 +301,6 @@ const char tool_name[] = "collect2";
 
 static symkind is_ctor_dtor (const char *);
 
-static void handler (int);
 static void maybe_unlink_list (char **);
 static void add_to_list (struct head *, const char *);
 static int extract_init_priority (const char *);
@@ -408,14 +407,6 @@ collect_atexit (void)
   tool_cleanup (false);
 }
 
-static void
-handler (int signo)
-{
-  tool_cleanup (true);
-
-  signal (signo, SIG_DFL);
-  raise (signo);
-}
 /* Notify user of a non-error, without translating the format string.  */
 void
 notice_translated (const char *cmsgid, ...)
@@ -907,11 +898,7 @@ main (int argc, char **argv)
   COLLECT2_HOST_INITIALIZATION;
 #endif
 
-#ifdef SIGCHLD
-  /* We *MUST* set SIGCHLD to SIG_DFL so that the wait4() call will
- receive the signal.  A different setting is inheritable */
-  signal (SIGCHLD, SIG_DFL);
-#endif
+  setup_signals ();
 
   /* Unlock the stdio streams.  */
   unlock_std_streams ();
@@ -1051,27 +1038,6 @@ main (int argc, char **argv)
   if (argc < 2)
 fatal_error (input_location, "no arguments");
 
-#ifdef SIGQUIT
-  if (signal (SIGQUIT, SIG_IGN) != SIG_IGN)
-signal (SIGQUIT, handler);
-#endif
-  if (signal (SIGINT, SIG_IGN) != SIG_IGN)
-signal (SIGINT, handler);
-#ifdef SIGALRM
-  if (signal (SIGALRM, SIG_IGN) != SIG_IGN)
-signal (SIGALRM, handler);
-#endif
-#ifdef SIGHUP
-  if (signal (SIGHUP, SIG_IGN) != SIG_IGN)
-signal (SIGHUP, handler);
-#endif
-  if (signal (SIGSEGV, SIG_IGN) != SIG_IGN)
-signal (SIGSEGV, handler);
-#ifdef SIGBUS
-  if (signal (SIGBUS, SIG_IGN) != SIG_IGN)
-signal (SIGBUS, handler);
-#endif
-
   /* Extract COMPILER_PATH and PATH into our prefix list.  */
   prefix_from_env ("COMPILER_PATH", &cpath);
   prefix_from_env ("PATH", &path);
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index aae48aff100..903c258a03a 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -2125,23 +2125,7 @@ main (int argc, char *argv[])
   if (atexit (lto_wrapper_cleanup) != 0)
 fatal_error (input_location, "% failed");
 
-  if (signal (SIGINT, SIG_IGN) != S

[PATCH] Fix gcc.dg/ipa/inline-8.c for -fPIC

2021-08-30 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is with -fPIC, both cmp and move
don't bind locally so they are not even tried to be
inlined.  This fixes the issue by marking both
functions as static and now the testcase passes
for both -fPIC and -fno-PIC cases.

OK? Tested on x86_64-linux-gnu.

gcc/testsuite/ChangeLog:

* gcc.dg/ipa/inline-8.c: Mark cmp and move as
static so they both bind local and available for
inlinine.
---
 gcc/testsuite/gcc.dg/ipa/inline-8.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/ipa/inline-8.c 
b/gcc/testsuite/gcc.dg/ipa/inline-8.c
index 388283ca213..c51eec20fc8 100644
--- a/gcc/testsuite/gcc.dg/ipa/inline-8.c
+++ b/gcc/testsuite/gcc.dg/ipa/inline-8.c
@@ -6,13 +6,13 @@
 #include 
 extern int isnanf (float);
 /* Can't be inlined because isnanf will be optimized out.  */
-int
+static int
 cmp (float a)
 {
   return isnanf (a);
 }
 /* Can be inlined.  */
-int
+static int
 move (int a)
 {
   return a;
-- 
2.17.1



[PATCH] Fix tree-optimization/101941: IPA splitting out function with error attribute

2022-01-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The Linux kernel started to fail compile when the jump threader was improved
(r12-2591-g2e96b5f14e4025691). This failure was due to the IPA splitting code
decided now to split off the basic block which contained two functions,
one of those functions included the error attribute on them.  This patch fixes
the problem by disallowing basic blocks from being split which contain functions
that have either the error or warning attribute on them.

The two new testcases are to make sure we still split the function for other
places if we reject the one case.

Committed as approved after Bootstrapped and tested on x86_64-linux-gnu with no 
regressions.

PR tree-optimization/101941

gcc/ChangeLog:

* ipa-split.cc (visit_bb): Disallow function calls where
the function has either error or warning attribute.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr101941-1.c: New test.
* gcc.dg/tree-ssa/pr101941-1.c: New test.
---
 gcc/ipa-split.cc  | 74 ---
 .../gcc.c-torture/compile/pr101941-1.c| 50 +
 gcc/testsuite/gcc.dg/tree-ssa/pr101941-1.c| 53 +
 3 files changed, 149 insertions(+), 28 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr101941-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101941-1.c

diff --git a/gcc/ipa-split.cc b/gcc/ipa-split.cc
index fc6be8eadef..6ca45f3b745 100644
--- a/gcc/ipa-split.cc
+++ b/gcc/ipa-split.cc
@@ -873,7 +873,6 @@ visit_bb (basic_block bb, basic_block return_bb,
   gimple *stmt = gsi_stmt (bsi);
   tree op;
   ssa_op_iter iter;
-  tree decl;
 
   if (is_gimple_debug (stmt))
continue;
@@ -899,33 +898,52 @@ visit_bb (basic_block bb, basic_block return_bb,
  can_split = false;
}
 
-  /* Check builtins that prevent splitting.  */
-  if (gimple_code (stmt) == GIMPLE_CALL
- && (decl = gimple_call_fndecl (stmt)) != NULL_TREE
- && fndecl_built_in_p (decl, BUILT_IN_NORMAL))
-   switch (DECL_FUNCTION_CODE (decl))
- {
- /* FIXME: once we will allow passing non-parm values to split part,
-we need to be sure to handle correct builtin_stack_save and
-builtin_stack_restore.  At the moment we are safe; there is no
-way to store builtin_stack_save result in non-SSA variable
-since all calls to those are compiler generated.  */
- case BUILT_IN_APPLY:
- case BUILT_IN_APPLY_ARGS:
- case BUILT_IN_VA_START:
-   if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file,
-  "Cannot split: builtin_apply and va_start.\n");
-   can_split = false;
-   break;
- case BUILT_IN_EH_POINTER:
-   if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Cannot split: builtin_eh_pointer.\n");
-   can_split = false;
-   break;
- default:
-   break;
- }
+  /* Check calls that would prevent splitting.  */
+  if (gimple_code (stmt) == GIMPLE_CALL)
+   {
+ if (tree decl = gimple_call_fndecl (stmt))
+   {
+ /* Check builtins that would prevent splitting.  */
+ if (fndecl_built_in_p (decl, BUILT_IN_NORMAL))
+   switch (DECL_FUNCTION_CODE (decl))
+ {
+ /* FIXME: once we will allow passing non-parm values to
+split part, we need to be sure to handle correct
+builtin_stack_save and builtin_stack_restore.  At the
+moment we are safe; there is no way to store
+builtin_stack_save result in non-SSA variable since all
+calls to those are compiler generated.  */
+ case BUILT_IN_APPLY:
+ case BUILT_IN_APPLY_ARGS:
+ case BUILT_IN_VA_START:
+   if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+  "Cannot split: builtin_apply and va_start.\n");
+   can_split = false;
+   break;
+ case BUILT_IN_EH_POINTER:
+   if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file,
+  "Cannot split: builtin_eh_pointer.\n");
+   can_split = false;
+   break;
+ default:
+   break;
+ }
+
+ /* Calls that function has either the warning or error
+attribute on it should not be split off into another
+function.  */
+ if (lookup_attribute ("warning", DECL_ATTRIBUTES (decl))
+  || lookup_attribute ("error", DECL_ATTRIBUTES (decl)))
+   {
+ if (dump_file && (dump_flags & TDF_DETAILS))

[PATCH] [COMMITTED] Improve coment for the newly added code in ipa-split.

2022-01-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

It was pointed out to me by Jakub, that the comment in front of
the new code which handles warning/error attribute was not really
understandable. This fixes the comment to be understandable; I
don't know why I wrote the original comment that way even.

Committed as obvious after a quick build.

gcc/ChangeLog:

* ipa-split.cc (visit_bb): Fix comment before the
warning/error attribute checking code.
---
 gcc/ipa-split.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/ipa-split.cc b/gcc/ipa-split.cc
index 6ca45f3b745..ff11cf34b23 100644
--- a/gcc/ipa-split.cc
+++ b/gcc/ipa-split.cc
@@ -931,8 +931,8 @@ visit_bb (basic_block bb, basic_block return_bb,
break;
  }
 
- /* Calls that function has either the warning or error
-attribute on it should not be split off into another
+ /* Calls to functions (which have the warning or error
+attribute on them) should not be split off into another
 function.  */
  if (lookup_attribute ("warning", DECL_ATTRIBUTES (decl))
   || lookup_attribute ("error", DECL_ATTRIBUTES (decl)))
-- 
2.27.0



[PATCH] [aarch64/64821]: Simplify __builtin_aarch64_sqrt* into internal function .SQRT.

2022-01-23 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This is a simple patch which simplifies the __builtin_aarch64_sqrt* builtins
into the internal function SQRT which allows for constant folding and other
optimizations at the gimple level. It was originally suggested we do to
__builtin_sqrt just for __builtin_aarch64_sqrtdf when -fno-math-errno
but since r6-4969-g686ee9719a4 we have the internal function SQRT which does
the same so it makes we don't need to check -fno-math-errno either now.

Applied as approved after bootstrapped and tested on aarch64-linux-gnu with no 
regressions.

PR target/64821

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.cc
(aarch64_general_gimple_fold_builtin): Handle
__builtin_aarch64_sqrt* and simplify into SQRT internal
function.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vsqrt-1.c: New test.
* gcc.target/aarch64/vsqrt-2.c: New test.
---
 gcc/config/aarch64/aarch64-builtins.cc |  7 ++
 gcc/testsuite/gcc.target/aarch64/vsqrt-1.c | 17 +
 gcc/testsuite/gcc.target/aarch64/vsqrt-2.c | 28 ++
 3 files changed, 52 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vsqrt-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/vsqrt-2.c

diff --git a/gcc/config/aarch64/aarch64-builtins.cc 
b/gcc/config/aarch64/aarch64-builtins.cc
index b7f338d6229..5217dbdb2ac 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -2820,6 +2820,13 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, 
gcall *stmt,
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
break;
 
+  /* Lower sqrt builtins to gimple/internal function sqrt. */
+  BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
+   new_stmt = gimple_build_call_internal (IFN_SQRT,
+  1, args[0]);
+   gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+   break;
+
  /*lower store and load neon builtins to gimple.  */
  BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
  BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c 
b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c
new file mode 100644
index 000..e614c7d5a0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-1.c
@@ -0,0 +1,17 @@
+/* PR target/64821 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* Check that we constant fold sqrt(4.0) into 2.0. */
+/* { dg-final { scan-tree-dump-not " \\\.SQRT" "optimized" } } */
+/* { dg-final { scan-tree-dump " 2\\\.0e\\\+0" "optimized" } } */
+/* { dg-final { scan-assembler-not "fsqrt" } } */
+/* We should produce a fmov to d0 with 2.0 but currently don't, see PR 103959. 
*/
+/* { dg-final { scan-assembler-times "\n\tfmov\td0, 2.0e.0" 1 { xfail *-*-* } 
} } */
+
+#include 
+
+float64x1_t f64(void)
+{
+   float64x1_t a = (float64x1_t){4.0};
+   return vsqrt_f64 (a);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c 
b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c
new file mode 100644
index 000..4dea4da7da6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vsqrt-2.c
@@ -0,0 +1,28 @@
+/* PR target/64821 */
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-optimized" } */
+#include 
+
+/* Check that we lower __builtin_aarch64_sqrt* into the internal function 
SQRT. */
+/* { dg-final { scan-tree-dump-times " __builtin_aarch64_sqrt" 0 "optimized" } 
} */
+/* { dg-final { scan-tree-dump-times " \\\.SQRT " 4 "optimized" } } */
+
+float64x1_t f64(float64x1_t a)
+{
+  return vsqrt_f64 (a);
+}
+
+float64x2_t f64q(float64x2_t a)
+{
+  return vsqrtq_f64 (a);
+}
+
+float32x2_t f32(float32x2_t a)
+{
+  return vsqrt_f32 (a);
+}
+
+float32x4_t f32q(float32x4_t a)
+{
+  return vsqrtq_f32 (a);
+}
-- 
2.17.1



[PATCH v3] [AARCH64] Fix PR target/103100 -mstrict-align and memset on not aligned buffers

2022-01-25 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is that aarch64_expand_setmem does not change the alignment
for strict alignment case. This is version 3 of this patch, is is based on
version 2 and moves the check for the number of instructions from the
optimizing for size case to be always and change the cost of libcalls for
the !size case to be max_size/16 + 1 (or 17) which was the same as before
when handling just the max_size. The main change is dealing with strict
alignment case where we only inline a max of 17 instructions as at that
point the call to the memset will be faster and could handle the dynamic
alignment instead of just the static alignment.

Note the reason why it is +1 is to count for the setting of the simd
duplicate.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

PR target/103100
gcc/ChangeLog:

* config/aarch64/aarch64.cc (aarch64_expand_setmem): Constraint
copy_limit to the alignment of the mem if STRICT_ALIGNMENT is
true. Also constraint the number of instructions for the !size
case to max_size/16 + 1.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/memset-strict-align-1.c: Update test.
Reduce the size down to 207 and make s1 global and aligned
to 16 bytes.
* gcc.target/aarch64/memset-strict-align-2.c: New test.
---
 gcc/config/aarch64/aarch64.cc | 55 ++-
 .../aarch64/memset-strict-align-1.c   | 20 +++
 .../aarch64/memset-strict-align-2.c   | 14 +
 3 files changed, 53 insertions(+), 36 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/memset-strict-align-2.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 296145e6008..02ecb2154ea 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23831,8 +23831,11 @@ aarch64_expand_setmem (rtx *operands)
 (zero constants can use XZR directly).  */
   unsigned mops_cost = 3 + 1 + cst_val;
   /* A libcall to memset in the worst case takes 3 instructions to prepare
- the arguments + 1 for the call.  */
-  unsigned libcall_cost = 4;
+ the arguments + 1 for the call.
+ In the case of not optimizing for size the cost of doing a libcall
+ is the max_set_size / 16 + 1 or 17 instructions. The one instruction
+ is for the vector dup which may or may not be used.  */
+  unsigned libcall_cost = size_p ? 4 : (max_set_size / 16 + 1);
 
   /* Upper bound check.  For large constant-sized setmem use the MOPS sequence
  when available.  */
@@ -23842,12 +23845,12 @@ aarch64_expand_setmem (rtx *operands)
 
   /* Attempt a sequence with a vector broadcast followed by stores.
  Count the number of operations involved to see if it's worth it
- against the alternatives.  A simple counter simd_ops on the
+ against the alternatives.  A simple counter inlined_ops on the
  algorithmically-relevant operations is used rather than an rtx_insn count
  as all the pointer adjusmtents and mode reinterprets will be optimized
  away later.  */
   start_sequence ();
-  unsigned simd_ops = 0;
+  unsigned inlined_ops = 0;
 
   base = copy_to_mode_reg (Pmode, XEXP (dst, 0));
   dst = adjust_automodify_address (dst, VOIDmode, base, 0);
@@ -23855,15 +23858,22 @@ aarch64_expand_setmem (rtx *operands)
   /* Prepare the val using a DUP/MOVI v0.16B, val.  */
   src = expand_vector_broadcast (V16QImode, val);
   src = force_reg (V16QImode, src);
-  simd_ops++;
+  inlined_ops++;
   /* Convert len to bits to make the rest of the code simpler.  */
   n = len * BITS_PER_UNIT;
 
   /* Maximum amount to copy in one go.  We allow 256-bit chunks based on the
  AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter.  */
-  const int copy_limit = (aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
- ? GET_MODE_BITSIZE (TImode) : 256;
+  int copy_limit;
+
+  if (aarch64_tune_params.extra_tuning_flags
+  & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
+copy_limit = GET_MODE_BITSIZE (TImode);
+  else
+copy_limit = 256;
+
+  if (STRICT_ALIGNMENT)
+copy_limit = MIN (copy_limit, (int)MEM_ALIGN (dst));
 
   while (n > 0)
 {
@@ -23878,7 +23888,7 @@ aarch64_expand_setmem (rtx *operands)
 
   mode_bits = GET_MODE_BITSIZE (cur_mode).to_constant ();
   aarch64_set_one_block_and_progress_pointer (src, &dst, cur_mode);
-  simd_ops++;
+  inlined_ops++;
   n -= mode_bits;
 
   /* Do certain trailing copies as overlapping if it's going to be
@@ -23897,24 +23907,17 @@ aarch64_expand_setmem (rtx *operands)
   rtx_insn *seq = get_insns ();
   end_sequence ();
 
-  if (size_p)
-{
-  /* When optimizing for size we have 3 options: the SIMD broadcast 
sequence,
-call to memset or the MOPS expansion.  */
-  if (TARGET_MOPS
- && mops_cost <= libcall_cost
- && mops_cost <= simd_ops)
-   return aarch64_expand_s

[PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion

2022-01-26 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, just use the
original input directly.

Committed as approved after bootstrapped and tested on
aarch64-linux-gnu with no regressions.
Note the testcases are not backported as __builtin_shufflevector
does not exist in GCC 11.

PR target/101529

gcc/ChangeLog:

* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
as an input, use original one.

(cherry picked from commit 52fa771758635d9c53cddb9116e5a66fae592230)
---
 gcc/config/aarch64/aarch64.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index bbcf5ed4a61..b58a379759d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23026,11 +23026,10 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
 }
   gcc_assert (extractindex < nelt);
 
-  emit_move_insn (d->target, insv);
   insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], d->target, mode);
-  create_input_operand (&ops[1], d->target, mode);
+  create_input_operand (&ops[1], insv, mode);
   create_integer_operand (&ops[2], 1 << idx);
   create_input_operand (&ops[3], extractv, mode);
   create_integer_operand (&ops[4], extractindex);
-- 
2.17.1



[PATCH] Fix aarch64/104201: branch-protection-attr.c fails after quoting difference

2022-01-27 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

After the quoting changes in r12-6521-g03a1a86b5ee40d4e240, 
branch-protection-attr.c
fails due to expecting a different quoting type for "leaf".
This patch changes the quoting from "" to '' as that is what is used now.

Committed as obvious after a test of the testcase.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/branch-protection-attr.c: Fix quoting for
the expected error message on line 5 of leaf.
---
 gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c 
b/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c
index 229ce1ca7be..1d6e55f3907 100644
--- a/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c
+++ b/gcc/testsuite/gcc.target/aarch64/branch-protection-attr.c
@@ -4,7 +4,7 @@ void __attribute__ ((target("branch-protection=leaf")))
 foo1 ()
 {
 }
-/* { dg-error {invalid protection type \("leaf"\) in 
'target\("branch-protection="\)' pragma or attribute} "" { target *-*-* } 5 } */
+/* { dg-error {invalid protection type \('leaf'\) in 
'target\("branch-protection="\)' pragma or attribute} "" { target *-*-* } 5 } */
 /* { dg-error {pragma or attribute 'target\("branch-protection=leaf"\)' is not 
valid} "" { target *-*-* } 5 } */
 
 void __attribute__ ((target("branch-protection=none+pac-ret")))
-- 
2.17.1



[PATCH] Fix comment for operand_compare::operand_equal_p.

2022-01-31 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The OEP_* enums were moved to tree-core.h in
r0-124973-g5e351e960763 but the comment was correct
when it was added added to fold-const.h in
r10-4231-g7f4a8ee03d40. This fixes the reference
to the OEP_* enum to reference tree-core.

Committed as obvious after a bootstrap/test on x86_64-linux.

gcc/ChangeLog:

* fold-const.h (operand_compare::operand_equal_p):
Fix comment about OEP_* flags.
---
 gcc/fold-const.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index a9a3062e4f6..394a67ece79 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -243,7 +243,7 @@ class operand_compare
 {
 public:
   /* Return true if two operands are equal.  The flags fields can be used
- to specify OEP flags described above.  */
+ to specify OEP flags described in tree-core.h.  */
   virtual bool operand_equal_p (const_tree, const_tree, unsigned int flags);
 
   /* Generate a hash value for an expression.  This can be used iteratively
-- 
2.17.1



[PATCH] [COMMITTED] Change multiprecision.org to use https

2022-02-01 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

As reported at
https://gcc.gnu.org/pipermail/gcc/2022-February/238216.html,
multiprecision.org now uses https so this updates the documentation
to use https instead of http.

Committed as obvious.

gcc/ChangeLog:

* doc/install.texi:
---
 gcc/doc/install.texi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index dae7c0acc36..f8898af027d 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -406,7 +406,7 @@ download_prerequisites installs.
 @item MPC Library version 1.0.1 (or later)
 
 Necessary to build GCC@.  It can be downloaded from
-@uref{http://www.multiprecision.org/mpc/}.  If an MPC source distribution
+@uref{https://www.multiprecision.org/mpc/}.  If an MPC source distribution
 is found in a subdirectory of your GCC sources named @file{mpc}, it
 will be built together with GCC.  Alternatively, if MPC is already
 installed but it is not in your default library search path, the
-- 
2.17.1



[PATCH] [COMMITTED] Fix PR aarch64/104474: ICE with vector float initializers and non-consts.

2022-02-09 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is that the aarch64 back-end was placing const0_rtx
into the constant vector RTL even if the mode was a floating point mode.
The fix is instead to use CONST0_RTX and pass the mode to select the
correct zero (either const_int or const_double).

Committed as obvious after a bootstrap/test on aarch64-linux-gnu with
no regressions.

PR target/104474

gcc/ChangeLog:

* config/aarch64/aarch64.cc
(aarch64_sve_expand_vector_init_handle_trailing_constants):
Use CONST0_RTX instead of const0_rtx for the non-constant elements.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/sve/pr104474-1.c: New test.
* gcc.target/aarch64/sve/pr104474-2.c: New test.
* gcc.target/aarch64/sve/pr104474-3.c: New test.
---
 gcc/config/aarch64/aarch64.cc | 2 +-
 gcc/testsuite/gcc.target/aarch64/sve/pr104474-1.c | 9 +
 gcc/testsuite/gcc.target/aarch64/sve/pr104474-2.c | 9 +
 gcc/testsuite/gcc.target/aarch64/sve/pr104474-3.c | 9 +
 4 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr104474-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr104474-2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/pr104474-3.c

diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8dc6d55e0f2..828ee472be2 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -21164,7 +21164,7 @@ aarch64_sve_expand_vector_init_handle_trailing_constants
{
  rtx x = builder.elt (i + nelts_reqd - n_trailing_constants);
  if (!valid_for_const_vector_p (elem_mode, x))
-   x = const0_rtx;
+   x = CONST0_RTX (elem_mode);
  v.quick_push (x);
}
   rtx const_vec = v.build ();
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr104474-1.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr104474-1.c
new file mode 100644
index 000..9e5bfe64467
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr104474-1.c
@@ -0,0 +1,9 @@
+/* { dg-options "-mcpu=neoverse-512tvb -frounding-math -msve-vector-bits=512" 
} */
+
+typedef float __attribute__((__vector_size__ (64))) F;
+
+F
+foo (void)
+{
+  return (F){68435453, 0, 0, 0, 0, 0, 0, 5, 0, 431144844};
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr104474-2.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr104474-2.c
new file mode 100644
index 000..02a4b6a8fdc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr104474-2.c
@@ -0,0 +1,9 @@
+/* { dg-options "-mcpu=neoverse-512tvb -msve-vector-bits=512" } */
+
+typedef float __attribute__((__vector_size__ (64))) F;
+
+F
+foo (float t)
+{
+  return (F){t, 0, 0, 0, 0, 0, 0, 5, 0, t};
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr104474-3.c 
b/gcc/testsuite/gcc.target/aarch64/sve/pr104474-3.c
new file mode 100644
index 000..7bed0142968
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr104474-3.c
@@ -0,0 +1,9 @@
+/* { dg-options "-mcpu=neoverse-v1 -frounding-math -msve-vector-bits=256" } */
+
+typedef _Float16 __attribute__((__vector_size__ (32))) F;
+
+F
+foo (void)
+{
+  return (F){0, 6270, 0, 0, 0, 0, 0, 0, 3229, 0, 40};
+}
-- 
2.27.0



[PATCH] c: [PR104506] Fix ICE after error due to change of type to error_mark_node

2022-02-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is we end up with an error_mark_node when calling
useless_type_conversion_p and that ICEs. STRIP_NOPS/tree_nop_conversion
has had a check for the inner type being an error_mark_node since g9a6bb3f78c96
(2000). This just adds the check also to tree_ssa_useless_type_conversion.
STRIP_USELESS_TYPE_CONVERSION is mostly used inside the gimplifier
and the places where it is used outside of the gimplifier would not
be adding too much overhead.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

Thanks,
Andrew Pinski

PR c/104506

gcc/ChangeLog:

* tree-ssa.cc (tree_ssa_useless_type_conversion):
Check the inner type before calling useless_type_conversion_p.

gcc/testsuite/ChangeLog:

* gcc.dg/pr104506-1.c: New test.
* gcc.dg/pr104506-2.c: New test.
* gcc.dg/pr104506-3.c: New test.
---
 gcc/testsuite/gcc.dg/pr104506-1.c | 12 
 gcc/testsuite/gcc.dg/pr104506-2.c | 11 +++
 gcc/testsuite/gcc.dg/pr104506-3.c | 11 +++
 gcc/tree-ssa.cc   | 20 +---
 4 files changed, 47 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/pr104506-1.c
 create mode 100644 gcc/testsuite/gcc.dg/pr104506-2.c
 create mode 100644 gcc/testsuite/gcc.dg/pr104506-3.c

diff --git a/gcc/testsuite/gcc.dg/pr104506-1.c 
b/gcc/testsuite/gcc.dg/pr104506-1.c
new file mode 100644
index 000..5eb71911b71
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104506-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu11" } */
+/* PR c/104506: we used to ICE after the error of
+   changing the type.  */
+
+void
+foo (double x)
+/* { dg-message "note: previous definition" "previous definition" { target 
*-*-* } .-1 } */
+{
+  (void)x;
+  int x; /* { dg-error "redeclared as different kind of symbol" } */
+}
diff --git a/gcc/testsuite/gcc.dg/pr104506-2.c 
b/gcc/testsuite/gcc.dg/pr104506-2.c
new file mode 100644
index 000..3c3c4f8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104506-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-std=gnu11" } */
+/* PR c/104506: we used to ICE after the error of
+   changing the type.  */
+void
+foo (double x)
+/* { dg-message "note: previous definition" "previous definition" { target 
*-*-* } .-1 } */
+{
+  x;
+  int x; /* { dg-error "redeclared as different kind of symbol" } */
+}
diff --git a/gcc/testsuite/gcc.dg/pr104506-3.c 
b/gcc/testsuite/gcc.dg/pr104506-3.c
new file mode 100644
index 000..b14deb5cf25
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr104506-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* PR c/104506: we used to ICE after the error of
+   changing the type.  */
+double x;
+/* { dg-message "note: previous declaration" "previous declaration" { target 
*-*-* } .-1 } */
+void
+foo (void)
+{
+  x;
+}
+int x; /* { dg-error "conflicting types" } */
diff --git a/gcc/tree-ssa.cc b/gcc/tree-ssa.cc
index 430875ae37a..423dd871d9e 100644
--- a/gcc/tree-ssa.cc
+++ b/gcc/tree-ssa.cc
@@ -1256,18 +1256,24 @@ delete_tree_ssa (struct function *fn)
 bool
 tree_ssa_useless_type_conversion (tree expr)
 {
+  tree outer_type, inner_type;
+
   /* If we have an assignment that merely uses a NOP_EXPR to change
  the top of the RHS to the type of the LHS and the type conversion
  is "safe", then strip away the type conversion so that we can
  enter LHS = RHS into the const_and_copies table.  */
-  if (CONVERT_EXPR_P (expr)
-  || TREE_CODE (expr) == VIEW_CONVERT_EXPR
-  || TREE_CODE (expr) == NON_LVALUE_EXPR)
-return useless_type_conversion_p
-  (TREE_TYPE (expr),
-   TREE_TYPE (TREE_OPERAND (expr, 0)));
+  if (!CONVERT_EXPR_P (expr)
+  && TREE_CODE (expr) != VIEW_CONVERT_EXPR
+  && TREE_CODE (expr) != NON_LVALUE_EXPR)
+return false;
 
-  return false;
+  outer_type = TREE_TYPE (expr);
+  inner_type = TREE_TYPE (TREE_OPERAND (expr, 0));
+
+  if (inner_type == error_mark_node)
+return false;
+
+  return useless_type_conversion_p (outer_type, inner_type);
 }
 
 /* Strip conversions from EXP according to
-- 
2.17.1



[PATCH v2] Fix PR tree-optimization/103228 and 103228: folding of (type) X op CST where type is a nop convert

2021-11-17 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Currently we fold (type) X op CST into (type) (X op ((type-x) CST)) when the 
conversion widens
but not when the conversion is a nop. For the same reason why we move the 
widening conversion
(the possibility of removing an extra conversion), we should do the same if the 
conversion is a
nop.

Committed as approved with the comment change.

PR tree-optimization/103228
PR tree-optimization/55177

gcc/ChangeLog:

* match.pd ((type) X bitop CST): Also do this
transformation for nop conversions.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr103228-1.c: New test.
* gcc.dg/tree-ssa/pr55177-1.c: New test.
---
 gcc/match.pd   |  6 --
 gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c | 11 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c  | 14 ++
 3 files changed, 29 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index cd8f349f618..4dc66fb47f2 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1616,8 +1616,10 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  Restrict it to GIMPLE to avoid endless recursions.  */
&& (bitop != BIT_AND_EXPR || GIMPLE)
&& (/* That's a good idea if the conversion widens the operand, thus
- after hoisting the conversion the operation will be narrower.  */
-  TYPE_PRECISION (TREE_TYPE (@0)) < TYPE_PRECISION (type)
+ after hoisting the conversion the operation will be narrower.
+ It is also a good if the conversion is a nop as moves the
+ conversion to one side; allowing for combining of the 
conversions.  */
+  TYPE_PRECISION (TREE_TYPE (@0)) <= TYPE_PRECISION (type)
   /* It's also a good idea if the conversion is to a non-integer
  mode.  */
   || GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c
new file mode 100644
index 000..a7539819cf2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103228-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+int f(int a, int b)
+{
+  b|=1u;
+  b|=2;
+  return b;
+}
+/* { dg-final { scan-tree-dump-times "\\\| 3" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "\\\| 1" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "\\\| 2" 0 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c
new file mode 100644
index 000..de1a264345c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr55177-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+extern int x;
+
+void foo(void)
+{
+  int a = __builtin_bswap32(x);
+  a &= 0x5a5b5c5d;
+  x = __builtin_bswap32(a);
+}
+
+/* { dg-final { scan-tree-dump-times "__builtin_bswap32" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& 1566333786" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& 1515936861" 0 "optimized"} } */
-- 
2.17.1



[PATCH] Fix PR 103317, ICE after PHI-OPT, minmax_replacement producing invalid SSA

2021-11-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem is r12-5300-gf98f373dd822b35c allows phiopt to recognize more basic 
blocks
but missed one location where the basic block does not need to be empty but 
still
needs to have a single predecessor. This patch fixes that over sight.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/103317

gcc/ChangeLog:

* tree-ssa-phiopt.c (minmax_replacement): For the non empty
middle bb case, check to make sure it has a single predecessor.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr103317-1.c: New test.
---
 gcc/testsuite/gcc.c-torture/compile/pr103317-1.c | 13 +
 gcc/tree-ssa-phiopt.c|  3 +++
 2 files changed, 16 insertions(+)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr103317-1.c

diff --git a/gcc/testsuite/gcc.c-torture/compile/pr103317-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr103317-1.c
new file mode 100644
index 000..f9d145e0da9
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr103317-1.c
@@ -0,0 +1,13 @@
+int a, b;
+char c;
+void
+d (void)
+{
+  char e = c;
+  if (b)
+if (c < 16 - 11)
+  e = 16 - 11;
+  if (e > 8)
+e = 8;
+  a = e;
+}
diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
index f0431684089..1abc4ea21cc 100644
--- a/gcc/tree-ssa-phiopt.c
+++ b/gcc/tree-ssa-phiopt.c
@@ -1780,6 +1780,9 @@ minmax_replacement (basic_block cond_bb, basic_block 
middle_bb,
   gimple *assign = last_and_only_stmt (middle_bb);
   tree lhs, op0, op1, bound;
 
+  if (!single_pred_p (middle_bb))
+   return false;
+
   if (!assign
  || gimple_code (assign) != GIMPLE_ASSIGN)
return false;
-- 
2.17.1



[PATCH v2] [AARCH64] Fix PR target/103100 -mstrict-align and memset on not aligned buffers

2021-11-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is that aarch64_expand_setmem does not change the alignment
for strict alignment case. This is a simplified patch from what I had 
previously.
So constraining copy_limit to the alignment of the mem in the case of strict 
align
fixes the issue without checking to many other changes to the code.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions/

gcc/ChangeLog:

* config/aarch64/aarch64.c (aarch64_expand_setmem): Constraint
copy_limit to the alignment of the mem if STRICT_ALIGNMENT is
true.
---
 gcc/config/aarch64/aarch64.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 7389b5953dc..e9c2e89d8ce 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23744,9 +23744,16 @@ aarch64_expand_setmem (rtx *operands)
   /* Maximum amount to copy in one go.  We allow 256-bit chunks based on the
  AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS tuning parameter.  setmem expand
  pattern is only turned on for TARGET_SIMD.  */
-  const int copy_limit = (aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
- ? GET_MODE_BITSIZE (TImode) : 256;
+  int copy_limit;
+
+  if (aarch64_tune_params.extra_tuning_flags
+  & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS)
+copy_limit = GET_MODE_BITSIZE (TImode);
+  else
+copy_limit = 256;
+
+  if (STRICT_ALIGNMENT)
+copy_limit = MIN (copy_limit, (int)MEM_ALIGN (dst));
 
   while (n > 0)
 {
-- 
2.17.1



[PATCH] Fix tree-optimization/103314 : Limit folding of (type) X op CST where type is a nop convert to gimple

2021-11-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

There is some re-association code in fold_binary which conflicts with
this optimization due keeping around some "constants" which are not
INTEGER_CST (1 << -1) so we end up in an infinite loop because of that.
So we need to limit this case to GIMPLE level only.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/103314

gcc/ChangeLog:

* match.pd ((type) X op CST): Restrict the equal
TYPE_PRECISION case to GIMPLE only.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/pr103314-1.c: New test.
---
 gcc/match.pd | 6 +-
 gcc/testsuite/gcc.c-torture/compile/pr103314-1.c | 6 ++
 2 files changed, 11 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr103314-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 4dc66fb47f2..24a84e3b504 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1619,7 +1619,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  after hoisting the conversion the operation will be narrower.
  It is also a good if the conversion is a nop as moves the
  conversion to one side; allowing for combining of the 
conversions.  */
-  TYPE_PRECISION (TREE_TYPE (@0)) <= TYPE_PRECISION (type)
+  TYPE_PRECISION (TREE_TYPE (@0)) < TYPE_PRECISION (type)
+  /* The conversion check for being a nop can only be done at the 
gimple
+ level as fold_binary has some re-association code which can 
conflict
+ with this if there is a "constant" which is not a full 
INTEGER_CST.  */
+  || (GIMPLE && TYPE_PRECISION (TREE_TYPE (@0)) == TYPE_PRECISION 
(type))
   /* It's also a good idea if the conversion is to a non-integer
  mode.  */
   || GET_MODE_CLASS (TYPE_MODE (type)) != MODE_INT
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr103314-1.c 
b/gcc/testsuite/gcc.c-torture/compile/pr103314-1.c
new file mode 100644
index 000..f4a63130421
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/pr103314-1.c
@@ -0,0 +1,6 @@
+/* { dg-options "" } */
+int main() {
+  int t = 1;
+  unsigned c = 0, d1 = t ? 1 ^ c ^ 1 >> (-1) : 0; /* { dg-warning "is 
negative"  } */
+  return d1;
+}
-- 
2.17.1



[PATCH] Fix tree-optimization/103220: Another missing folding of (type) X op CST where type is a nop convert

2021-11-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is that int_fits_type_p will return false if we just
change the sign of things like -2 (or 254) so we should accept the case
where we just change the sign (and not the precision) of the type.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/103220

gcc/ChangeLog:

* match.pd ((type) X bitop CST): Don't check if CST
fits into the type if only the sign changes.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr103220-1.c: New test.
* gcc.dg/tree-ssa/pr103220-2.c: New test.
* gcc.dg/pr25530.c: Update test to check for
4294967294 in the case -2 is not matched.
---
 gcc/match.pd   |  3 ++-
 gcc/testsuite/gcc.dg/pr25530.c |  2 +-
 gcc/testsuite/gcc.dg/tree-ssa/pr103220-1.c | 15 +++
 gcc/testsuite/gcc.dg/tree-ssa/pr103220-2.c | 16 
 4 files changed, 34 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103220-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr103220-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 24a84e3b504..37c5be9e5f4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1607,7 +1607,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (bitop (convert@2 @0) (convert?@3 @1))
   (if (((TREE_CODE (@1) == INTEGER_CST
 && INTEGRAL_TYPE_P (TREE_TYPE (@0))
-&& int_fits_type_p (@1, TREE_TYPE (@0)))
+&& (int_fits_type_p (@1, TREE_TYPE (@0))
+|| tree_nop_conversion_p (TREE_TYPE (@0), type)))
|| types_match (@0, @1))
/* ???  This transform conflicts with fold-const.c doing
  Convert (T)(x & c) into (T)x & (T)c, if c is an integer
diff --git a/gcc/testsuite/gcc.dg/pr25530.c b/gcc/testsuite/gcc.dg/pr25530.c
index b846ab30140..771b36b9c29 100644
--- a/gcc/testsuite/gcc.dg/pr25530.c
+++ b/gcc/testsuite/gcc.dg/pr25530.c
@@ -8,4 +8,4 @@ f (unsigned t)
   return (t / 2) * 2;
 }
 
-/* { dg-final { scan-tree-dump "\& -2" "optimized" } } */
+/* { dg-final { scan-tree-dump "\& -2|4294967294" "optimized" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103220-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr103220-1.c
new file mode 100644
index 000..f2ef3f1d93c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103220-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+unsigned char f(unsigned char a)
+{
+  signed char d = (signed char) a;
+  signed char e = d & ~1;
+  unsigned char t = e;
+  t &= ~2;
+  return t;
+}
+/* The above should reduce down to just & 252 rather than keping
+   the two &s there. */
+/* { dg-final { scan-tree-dump-times "& 252" 1 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& -2" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& 253" 0 "optimized"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr103220-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr103220-2.c
new file mode 100644
index 000..25d7412a095
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr103220-2.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+signed char f(unsigned char a)
+{
+  unsigned char b = a & 127;
+  signed char c = (signed char) b;
+  signed char d = (signed char) a;
+  signed char e = d & -128;
+  signed char h = c | e;
+  return h;
+}
+/* The above should reduce down to just return with a cast.
+   removing the two &s there and |'s. */
+/* { dg-final { scan-tree-dump-times "& 127" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "& -128" 0 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "\\\| " 0 "optimized"} } */
-- 
2.17.1



[PATCH] tree-optimization: [PR31531] Improve ~a < CST, allow a nop cast inbetween ~ and a

2021-11-21 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This PR was orignally for the missed optimization of a few isnegative which
had been solved a long time ago (sometime before 4.4.0). I noticed there was
one missed optimization on the gimple level. There is a match.pd pattern
for ~a < CST but we miss that there could be a nop_convert between the the
comparison and the bit_not. This adds the optional option cast to the current
match.pd pattern.

OK? Bootstrapped and tested on x86_64 with no regressions.

PR tree-optimization/31531

gcc/ChangeLog:

* match.pd (~X op C): Allow for an optional nop convert.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr31531-1.c: New test.
---
 gcc/match.pd  |  5 +++--
 gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c | 19 +++
 2 files changed, 22 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 37c5be9e5f4..ca6c9eff624 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -4729,10 +4729,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for cmp (simple_comparison)
  scmp (swapped_simple_comparison)
  (simplify
-  (cmp (bit_not@2 @0) CONSTANT_CLASS_P@1)
+  (cmp (nop_convert?:s (bit_not@2 @0)) CONSTANT_CLASS_P@1)
   (if (single_use (@2)
&& (TREE_CODE (@1) == INTEGER_CST || TREE_CODE (@1) == VECTOR_CST))
-   (scmp @0 (bit_not @1)
+   (with { tree type1 = TREE_TYPE (@1); }
+(scmp (convert:type1 @0) (bit_not @1))
 
 (for cmp (simple_comparison)
  /* Fold (double)float1 CMP (double)float2 into float1 CMP float2.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
new file mode 100644
index 000..c27299151eb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr31531-1.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/31531 */
+
+int f(int a)
+{
+  int b = ~a;
+  return b<0;
+}
+
+
+int f1(unsigned a)
+{
+  int b = ~a;
+  return b<0;
+}
+/* We should convert the above two functions from b <0 to ((int)a) >= 0. */
+/* { dg-final { scan-tree-dump-times ">= 0" 2 "optimized"} } */
+/* { dg-final { scan-tree-dump-times "~" 0 "optimized"} } */
-- 
2.17.1



[PATCH 1/2] Improve/Fix (m1 CMP m2) * d -> (m1 CMP m2) ? d : 0 pattern.

2021-11-21 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The pattern here was not catching all comparisons and the multiply
was not commutative when it should have been. This patches fixes
that by using tcc_comparison and adding :c to the multiply.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd ((m1 CMP m2) * d -> (m1 CMP m2) ? d : 0):
Use tcc_comparison and :c for the multiply.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/multcmp-1.c: New test.
* gcc.dg/tree-ssa/multcmp-2.c: New test.
---
 gcc/match.pd  |  4 ++--
 gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c | 12 
 gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c | 12 
 3 files changed, 26 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ca6c9eff624..ed43c321cbc 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1791,9 +1791,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
 /* (m1 CMP m2) * d -> (m1 CMP m2) ? d : 0  */
 (if (!canonicalize_math_p ())
- (for cmp (gt lt ge le)
+ (for cmp (tcc_comparison)
   (simplify
-   (mult (convert (cmp @0 @1)) @2)
+   (mult:c (convert (cmp @0 @1)) @2)
(cond (cmp @0 @1) @2 { build_zero_cst (type); }
 
 /* For integral types with undefined overflow and C != 0 fold
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
new file mode 100644
index 000..fb44cacde77
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 == m2;
+  int e = d * c;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
new file mode 100644
index 000..be38b2e0044
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/multcmp-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 != m2;
+  int e = c * d;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
-- 
2.17.1



[PATCH 2/2] tree-optimization: [PR92342] Move b & -(a==c) optimization to the gimple level

2021-11-21 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Combine disabled this optimization in r10-254-gddbb5da5199fb42 but it makes
sense to do this on the gimple level and then let expand decide which way is
better. So this adds the transformation on the gimple level (late like was
done for the multiply case).

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/92342

gcc/ChangeLog:

* match.pd (b & -(a CMP c) -> (a CMP c)?b:0): New pattern.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/andnegcmp-1.c: New test.
* gcc.dg/tree-ssa/andnegcmp-2.c: New test.
---
 gcc/match.pd|  8 +++-
 gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c | 14 ++
 gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c | 14 ++
 3 files changed, 35 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c

diff --git a/gcc/match.pd b/gcc/match.pd
index ed43c321cbc..b55cbc91b57 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1794,7 +1794,13 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
  (for cmp (tcc_comparison)
   (simplify
(mult:c (convert (cmp @0 @1)) @2)
-   (cond (cmp @0 @1) @2 { build_zero_cst (type); }
+   (cond (cmp @0 @1) @2 { build_zero_cst (type); }))
+/* (-(m1 CMP m2)) & d -> (m1 CMP m2) ? d : 0  */
+  (simplify
+   (bit_and:c (negate (convert (cmp @0 @1))) @2)
+   (cond (cmp @0 @1) @2 { build_zero_cst (type); }))
+ )
+)
 
 /* For integral types with undefined overflow and C != 0 fold
x * C EQ/NE y * C into x EQ/NE y.  */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
new file mode 100644
index 000..6f16783f169
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/92342 */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 == m2;
+  d = -d;
+  int e = d & c;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c 
b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
new file mode 100644
index 000..0e25c8abc39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/andnegcmp-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* PR tree-optimization/92342 */
+
+int
+f (int m1, int m2, int c)
+{
+  int d = m1 < m2;
+  d = -d;
+  int e = c & d;
+  return e;
+}
+
+/* { dg-final { scan-tree-dump-times "\\? c_\[0-9\]\\(D\\) : 0" 1 "optimized" 
} } */
-- 
2.17.1



[PATCH v2] Canonicalize &MEM[ssa_n, CST] to ssa_n p+ CST in fold_stmt_1

2021-11-22 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This is a new version of the patch to fix PR 102216.
Instead of doing the canonicalization inside forwprop, Richi
mentioned we should do it inside fold_stmt_1 and that is what
this patch does.

PR tree-optimization/102216

gcc/ChangeLog:

* gimple-fold.c (fold_stmt_1): Add canonicalization
of "&MEM[ssa_n, CST]" to "ssa_n p+ CST", note this
can only be done if !in_place.

gcc/testsuite/ChangeLog:

* g++.dg/tree-ssa/pr102216-1.C: New test.
* g++.dg/tree-ssa/pr102216-2.C: New test.
---
 gcc/gimple-fold.c  | 21 ++
 gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C | 21 ++
 gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C | 45 ++
 3 files changed, 87 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C
 create mode 100644 gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C

diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index ad9703ee471..aab6818c93f 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -6061,6 +6061,27 @@ fold_stmt_1 (gimple_stmt_iterator *gsi, bool inplace, 
tree (*valueize) (tree))
  if (REFERENCE_CLASS_P (*lhs)
  && maybe_canonicalize_mem_ref_addr (lhs))
changed = true;
+ /* Canonicalize &MEM[ssa_n, CST] to ssa_n p+ CST.
+This cannot be done in maybe_canonicalize_mem_ref_addr
+as the gimple now has two operands rather than one.
+The same reason why this can't be done in
+maybe_canonicalize_mem_ref_addr is the same reason why
+this can't be done inplace.  */
+ if (!inplace && TREE_CODE (*rhs) == ADDR_EXPR)
+   {
+ tree inner = TREE_OPERAND (*rhs, 0);
+ if (TREE_CODE (inner) == MEM_REF
+ && TREE_CODE (TREE_OPERAND (inner, 0)) == SSA_NAME
+ && TREE_CODE (TREE_OPERAND (inner, 1)) == INTEGER_CST)
+   {
+ tree ptr = TREE_OPERAND (inner, 0);
+ tree addon = TREE_OPERAND (inner, 1);
+ addon = fold_convert (sizetype, addon);
+ gimple_assign_set_rhs_with_ops (gsi, POINTER_PLUS_EXPR,
+ ptr, addon);
+ changed = true;
+   }
+   }
}
   else
{
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C
new file mode 100644
index 000..21f7f6797ff
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216-1.C
@@ -0,0 +1,21 @@
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+void link_error ();
+void g ()
+{
+  const char **language_names;
+
+  language_names = new const char *[6];
+
+  const char **language_names_p = language_names;
+
+  language_names_p++;
+  language_names_p++;
+  language_names_p++;
+
+  if ( (language_names_p) - (language_names+3) != 0)
+link_error();
+  delete[] language_names;
+}
+/* We should have removed the link_error on the gimple level as GCC should
+   be able to tell that language_names_p is the same as language_names+3.  */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" } } */
diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C
new file mode 100644
index 000..8d351a9bad0
--- /dev/null
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr102216-2.C
@@ -0,0 +1,45 @@
+/* { dg-options "-O2 -Wall" } */
+#include 
+
+static inline bool
+compare_cstrings (const char *str1, const char *str2)
+{
+  return str1 < str2;
+}
+
+void
+add_set_language_command ()
+{
+  static const char **language_names;
+
+  language_names = new const char *[6];
+
+  language_names[0] = "auto";
+  language_names[1] = "local";
+  language_names[2] = "unknown";
+
+  const char **language_names_p = language_names;
+  /* language_names_p == &language_names[0].  */
+  language_names_p++;
+  /* language_names_p == &language_names[1].  */
+  language_names_p++;
+  /* language_names_p == &language_names[2].  */
+  language_names_p++;
+  /* language_names_p == &language_names[3].  */
+
+  const char **sort_begin;
+
+  if (0)
+sort_begin = &language_names[3];
+  else
+sort_begin = language_names_p;
+
+  language_names[3] = "";
+  language_names[4] = "";
+  language_names[5] = NULL;
+
+  /* There should be no warning associated with this std::sort as
+ sort_begin != &language_names[5] and GCC should be able to figure
+ that out.  */
+  std::sort (sort_begin, &language_names[5], compare_cstrings);
+}
-- 
2.17.1



[PATCH] Fix PR 62157: disclean in libsanitizer not working

2021-11-27 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So what is happening is DIST_SUBDIRS contains the conditional
directories which is wrong, so we need to force DIST_SUBDIRS
to be the same as SUBDIRS as recommened by the automake manual.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.
Also now make distclean works inside libsanitizer directory.

libsanitizer/ChangeLog:

PR sanitizer/62157
* Makefile.am: Force DIST_SUBDIRS to be SUBDIRS.
* Makefile.in: Regenerate.
* asan/Makefile.in: Likewise.
* hwasan/Makefile.in: Likewise.
* interception/Makefile.in: Likewise.
* libbacktrace/Makefile.in: Likewise.
* lsan/Makefile.in: Likewise.
* sanitizer_common/Makefile.in: Likewise.
* tsan/Makefile.in: Likewise.
* ubsan/Makefile.in: Likewise.
---
 libsanitizer/Makefile.am  | 3 +++
 libsanitizer/Makefile.in  | 4 ++--
 libsanitizer/asan/Makefile.in | 1 +
 libsanitizer/hwasan/Makefile.in   | 1 +
 libsanitizer/interception/Makefile.in | 1 +
 libsanitizer/libbacktrace/Makefile.in | 1 +
 libsanitizer/lsan/Makefile.in | 1 +
 libsanitizer/sanitizer_common/Makefile.in | 1 +
 libsanitizer/tsan/Makefile.in | 1 +
 libsanitizer/ubsan/Makefile.in| 1 +
 10 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/libsanitizer/Makefile.am b/libsanitizer/Makefile.am
index 065a65e78d4..53e20bdac2c 100644
--- a/libsanitizer/Makefile.am
+++ b/libsanitizer/Makefile.am
@@ -28,6 +28,9 @@ SUBDIRS += hwasan
 endif
 endif
 
+## Force DIST_SUBDIRS so that make distclean works
+DIST_SUBDIRS = $(SUBDIRS)
+
 ## May be used by toolexeclibdir.
 gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
 
diff --git a/libsanitizer/Makefile.in b/libsanitizer/Makefile.in
index 3873ea4d705..486c7920ee2 100644
--- a/libsanitizer/Makefile.in
+++ b/libsanitizer/Makefile.in
@@ -208,8 +208,6 @@ am__define_uniq_tagged_files = \
 ETAGS = etags
 CTAGS = ctags
 CSCOPE = cscope
-DIST_SUBDIRS = sanitizer_common interception libbacktrace lsan asan \
-   ubsan tsan hwasan
 ACLOCAL = @ACLOCAL@
 ALLOC_FILE = @ALLOC_FILE@
 AMTAR = @AMTAR@
@@ -345,6 +343,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
@@ -366,6 +365,7 @@ nodist_saninclude_HEADERS = $(am__append_1)
 @SANITIZER_SUPPORTED_TRUE@SUBDIRS = sanitizer_common $(am__append_2) \
 @SANITIZER_SUPPORTED_TRUE@ $(am__append_3) lsan asan ubsan \
 @SANITIZER_SUPPORTED_TRUE@ $(am__append_4) $(am__append_5)
+DIST_SUBDIRS = $(SUBDIRS)
 gcc_version := $(shell @get_gcc_base_ver@ $(top_srcdir)/../gcc/BASE-VER)
 
 # Work around what appears to be a GNU make bug handling MAKEFLAGS
diff --git a/libsanitizer/asan/Makefile.in b/libsanitizer/asan/Makefile.in
index 528ab61312c..e00927dd13d 100644
--- a/libsanitizer/asan/Makefile.in
+++ b/libsanitizer/asan/Makefile.in
@@ -397,6 +397,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
diff --git a/libsanitizer/hwasan/Makefile.in b/libsanitizer/hwasan/Makefile.in
index 1729349e682..0021e783c61 100644
--- a/libsanitizer/hwasan/Makefile.in
+++ b/libsanitizer/hwasan/Makefile.in
@@ -385,6 +385,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
diff --git a/libsanitizer/interception/Makefile.in 
b/libsanitizer/interception/Makefile.in
index 326ee9a1818..c14d5270286 100644
--- a/libsanitizer/interception/Makefile.in
+++ b/libsanitizer/interception/Makefile.in
@@ -315,6 +315,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
diff --git a/libsanitizer/libbacktrace/Makefile.in 
b/libsanitizer/libbacktrace/Makefile.in
index 3f05cdf3d62..f008f8eb552 100644
--- a/libsanitizer/libbacktrace/Makefile.in
+++ b/libsanitizer/libbacktrace/Makefile.in
@@ -365,6 +365,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
diff --git a/libsanitizer/lsan/Makefile.in b/libsanitizer/lsan/Makefile.in
index 0ee0be6526e..25ac00cf8db 100644
--- a/libsanitizer/lsan/Makefile.in
+++ b/libsanitizer/lsan/Makefile.in
@@ -360,6 +360,7 @@ pdfdir = @pdfdir@
 prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@

[PATCH] Fix PR 19089: Environment variable TMP may yield gcc: abort

2021-11-27 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Even though I cannot reproduce the ICE any more, this is still
a bug. We check already to see if we can access the directory
but never check to see if the path is actually a directory.

This adds the check and now we reject the file as not usable
as a tmp directory.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

libiberty/ChangeLog:

* make-temp-file.c (try_dir): Check to see if the dir
is actually a directory.
---
 libiberty/make-temp-file.c | 16 +++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/libiberty/make-temp-file.c b/libiberty/make-temp-file.c
index 31f87fbcfde..11eb03d12ec 100644
--- a/libiberty/make-temp-file.c
+++ b/libiberty/make-temp-file.c
@@ -39,6 +39,10 @@ Boston, MA 02110-1301, USA.  */
 #if defined(_WIN32) && !defined(__CYGWIN__)
 #include 
 #endif
+#if HAVE_SYS_STAT_H
+#include 
+#endif
+
 
 #ifndef R_OK
 #define R_OK 4
@@ -76,7 +80,17 @@ try_dir (const char *dir, const char *base)
 return base;
   if (dir != 0
   && access (dir, R_OK | W_OK | X_OK) == 0)
-return dir;
+{
+  /* Check to make sure dir is actually a directory. */
+#ifdef S_ISDIR
+  struct stat s;
+  if (stat(dir, &s))
+   return NULL;
+  if (!S_ISDIR (s.st_mode))
+   return NULL;
+#endif
+  return dir;
+}
   return 0;
 }
 
-- 
2.27.0



[PATCH] tree-optimization: [PR101540] Simplify CONSTRUCTOR for vector(1) to be VCE

2021-11-28 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This just adds a simplification to simplify_vector_constructor for
vector of 1 element to be VCE which should reduce memory usage in
the compiler and maybe allow for some more optimizations.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR tree-optimization/101540

gcc/ChangeLog:

* tree-ssa-forwprop.c (simplify_vector_constructor):
Simplify constructor of vector of 1 element to just
be a VIEW_CONVERT_EXPR.

gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/pr101540-1.c: New test.
---
 gcc/testsuite/gcc.dg/tree-ssa/pr101540-1.c | 13 +
 gcc/tree-ssa-forwprop.c| 13 +
 2 files changed, 26 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr101540-1.c

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr101540-1.c 
b/gcc/testsuite/gcc.dg/tree-ssa/pr101540-1.c
new file mode 100644
index 000..73fb342e029
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr101540-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-forwprop1" } */
+/* PR tree-optimization/101540 */
+typedef unsigned char __attribute__((__vector_size__ (1))) W;
+
+W foo (unsigned char uc)
+{
+  return (W){uc};
+}
+/* The constructor in the above function should be converted into a VCE.  */
+/* { dg-final { scan-tree-dump-times "VIEW_CONVERT_EXPR" 1 "forwprop1"} } */
+// {uc_1(D)}
+/* { dg-final { scan-tree-dump-times "{uc_\[0-9\]+.D.}" 0 "forwprop1"} } */
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index a830bab78ba..94b92d3d0af 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2392,6 +2392,19 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
   elem_type = TREE_TYPE (type);
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
+  /* Special case V1 constructor with the same type to being a VCE.  */
+  if (nelts == 1 && CONSTRUCTOR_NELTS (op) == 1)
+{
+  tree op1 = CONSTRUCTOR_ELT (op, 0)->value;
+  if (useless_type_conversion_p (elem_type, TREE_TYPE (op1)))
+   {
+ op1 = build1 (VIEW_CONVERT_EXPR, type, op1);
+ gimple_assign_set_rhs_from_tree (gsi, op1);
+ update_stmt (gsi_stmt (*gsi));
+ return true;
+   }
+}
+
   orig[0] = NULL;
   orig[1] = NULL;
   conv_code = ERROR_MARK;
-- 
2.17.1



[PATCH] [Committed] New testcase for C++/71792, bitfields and auto

2021-12-03 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This testcase used to fail before GCC 6.4.0 due to the wrong
type being used for auto when used with bitfields, the C++
front-end was using the "bitfield" type rather than the
underlaying type.

Committed the testcase after a quick check.

PR c++/71792

gcc/testsuite/ChangeLog:

* g++.dg/torture/pr71792.C: New test.
---
 gcc/testsuite/g++.dg/torture/pr71792.C | 42 ++
 1 file changed, 42 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/torture/pr71792.C

diff --git a/gcc/testsuite/g++.dg/torture/pr71792.C 
b/gcc/testsuite/g++.dg/torture/pr71792.C
new file mode 100644
index 000..607774d755d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/torture/pr71792.C
@@ -0,0 +1,42 @@
+// { dg-do run { target c++11 } }
+// PR C++/71792
+
+class some_class
+{
+public:
+  unsigned int np  : 4;
+  unsigned int nc  : 8;
+  unsigned int nc0 : 1;
+};
+
+template
+static void test_bug (const some_class &mp) {
+  if (what) {
+int t = 0;
+for (auto i = mp.nc0; i < mp.nc; i++) {
+  if (t != i) __builtin_abort ();
+  t++;
+}
+  }
+}
+
+static void test_ok (const some_class &mp) {
+  int t = 0;
+  for (auto i = mp.nc0; i < mp.nc; i++) {
+if (t != i) __builtin_abort ();
+t++;
+  }
+}
+
+int main ()
+{
+  some_class mp;
+  mp.nc0 = 0;
+  mp.nc = 9;
+  mp.np = 3;
+
+  test_bug (mp);
+  test_ok (mp);
+
+  return 0;
+}
-- 
2.17.1



[PATCH] Fix C++/93809 and C++/83469: typenames and unions

2021-12-07 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

There are a few issues here with typenames and unions (and even struct
keywords with unions). First in cp_parser_check_class_key,
we need to allow typenames to name union types and union key
to be able to use with typenames.

The next issue is we need to record if we had a union key,
right now we just record it was a struct/class/typename one
which is wrong.

OK? Boostrapped and tested on x86_64-linux-gnu with no regressions.

PR c++/83469
PR c++/93809

gcc/cp/ChangeLog:

* cp-tree.h (UNION_CLASS_TYPE_P): New define.
(TYPENAME_IS_UNION_P): New Define.
* decl.c (struct typename_info): Add union_p field.
(struct typename_hasher::equal): Compare union_p field.
(build_typename_type): Move union_type to
union_p/TYPENAME_IS_UNION_P.
* error.c (dump_type) : Handle TYPENAME_IS_UNION_P
as "union"
* module.cc (trees_out::type_node): Handle TYPENAME_IS_UNION_P.
* parser.c (cp_parser_check_class_key): Allow
typename key for union types and allow union keyword for
typename types.
* pt.c (tsubst) : For TYPENAME_IS_CLASS_P,
check NON_UNION_CLASS_TYPE_P rather than CLASS_TYPE_P.
Add TYPENAME_IS_UNION_P handling.

gcc/testsuite/ChangeLog:

* g++.dg/warn/Wredundant-tags-3.C: Remove xfail.
* g++.dg/pr83469-1.C: New test.
* g++.dg/pr83469-2.C: New test.
* g++.dg/pr83469-3.C: New test.
* g++.dg/pr93809-1.C: New test.
* g++.dg/pr93809-2.C: New test.
* g++.dg/pr93809-3.C: New test.
---
 gcc/cp/cp-tree.h  | 11 +--
 gcc/cp/decl.c |  9 ++---
 gcc/cp/error.c|  1 +
 gcc/cp/module.cc  |  2 ++
 gcc/cp/parser.c   |  4 +++-
 gcc/cp/pt.c   | 10 +-
 gcc/testsuite/g++.dg/pr83469-1.C  | 15 +++
 gcc/testsuite/g++.dg/pr83469-2.C  | 13 +
 gcc/testsuite/g++.dg/pr83469-3.C  | 13 +
 gcc/testsuite/g++.dg/pr93809-1.C  | 11 +++
 gcc/testsuite/g++.dg/pr93809-2.C  |  5 +
 gcc/testsuite/g++.dg/pr93809-3.C  |  4 
 gcc/testsuite/g++.dg/warn/Wredundant-tags-3.C |  2 +-
 13 files changed, 92 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/pr83469-1.C
 create mode 100644 gcc/testsuite/g++.dg/pr83469-2.C
 create mode 100644 gcc/testsuite/g++.dg/pr83469-3.C
 create mode 100644 gcc/testsuite/g++.dg/pr93809-1.C
 create mode 100644 gcc/testsuite/g++.dg/pr93809-2.C
 create mode 100644 gcc/testsuite/g++.dg/pr93809-3.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 3510512d751..ea9cbb775e6 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -2206,6 +2206,10 @@ enum languages { lang_c, lang_cplusplus };
 #define NON_UNION_CLASS_TYPE_P(T) \
   (TREE_CODE (T) == RECORD_TYPE && TYPE_LANG_FLAG_5 (T))
 
+/* Nonzero if T is a class type and is a union.  */
+#define UNION_CLASS_TYPE_P(T) \
+  (TREE_CODE (T) == UNION_TYPE && TYPE_LANG_FLAG_5 (T))
+
 /* Keep these checks in ascending code order.  */
 #define RECORD_OR_UNION_CODE_P(T)  \
   ((T) == RECORD_TYPE || (T) == UNION_TYPE)
@@ -4184,11 +4188,14 @@ more_aggr_init_expr_args_p (const 
aggr_init_expr_arg_iterator *iter)
 #define TYPENAME_IS_ENUM_P(NODE) \
   (TREE_LANG_FLAG_0 (TYPENAME_TYPE_CHECK (NODE)))
 
-/* True if a TYPENAME_TYPE was declared as a "class", "struct", or
-   "union".  */
+/* True if a TYPENAME_TYPE was declared as a "class", "struct".  */
 #define TYPENAME_IS_CLASS_P(NODE) \
   (TREE_LANG_FLAG_1 (TYPENAME_TYPE_CHECK (NODE)))
 
+/* True if a TYPENAME_TYPE was declared as an "union".  */
+#define TYPENAME_IS_UNION_P(NODE) \
+  (TREE_LANG_FLAG_3 (TYPENAME_TYPE_CHECK (NODE)))
+
 /* True if a TYPENAME_TYPE is in the process of being resolved.  */
 #define TYPENAME_IS_RESOLVING_P(NODE) \
   (TREE_LANG_FLAG_2 (TYPENAME_TYPE_CHECK (NODE)))
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 56f80775ca0..8fa07e30d69 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -3930,6 +3930,7 @@ struct typename_info {
   tree template_id;
   bool enum_p;
   bool class_p;
+  bool union_p;
 };
 
 struct typename_hasher : ggc_ptr_hash
@@ -3958,7 +3959,8 @@ struct typename_hasher : ggc_ptr_hash
&& TYPE_CONTEXT (t1) == t2->scope
&& TYPENAME_TYPE_FULLNAME (t1) == t2->template_id
&& TYPENAME_IS_ENUM_P (t1) == t2->enum_p
-   && TYPENAME_IS_CLASS_P (t1) == t2->class_p);
+   && TYPENAME_IS_CLASS_P (t1) == t2->class_p
+   && TYPENAME_IS_UNION_P (t1) == t2->union_p);
   }
 };
 
@@ -3983,8 +3985,8 @@ build_typename_type (tree context, tree name, tree 
fullname,
   ti.template_id = fullname;
   ti.enum_p = tag_type == enum_type;
   ti.class_p = (tag_type == class_type
-   || tag_type == record_type
-   

[PATCH] Use simple_dce_from_worklist with match_simplify_replacement.

2022-10-27 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This is a simple patch to do some DCE after a successful
match and simplify replacement in PHI-OPT. match and simplify
likes to generate some extra statements which should be cleaned
up.

OK? Bootstrapped and tested on x86_64-linux with no regressions.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* tree-ssa-phiopt.cc: Include tree-ssa-dce.h
(replace_phi_edge_with_variable):
New argument, dce_ssa_names. Call simple_dce_from_worklist.
(match_simplify_replacement): If we inserted a sequence,
mark the lhs of the new sequence to be possible dce.
Always move the statement and mark the lhs (if it is a name)
as possible to remove.
---
 gcc/tree-ssa-phiopt.cc | 35 ++-
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/gcc/tree-ssa-phiopt.cc b/gcc/tree-ssa-phiopt.cc
index 925bd7d..996700b 100644
--- a/gcc/tree-ssa-phiopt.cc
+++ b/gcc/tree-ssa-phiopt.cc
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-match.h"
 #include "dbgcnt.h"
 #include "tree-ssa-propagate.h"
+#include "tree-ssa-dce.h"
 
 static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
 static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
@@ -74,7 +75,6 @@ static bool cond_store_replacement (basic_block, basic_block, 
edge, edge,
hash_set *);
 static bool cond_if_else_store_replacement (basic_block, basic_block, 
basic_block);
 static hash_set * get_non_trapping ();
-static void replace_phi_edge_with_variable (basic_block, edge, gphi *, tree);
 static void hoist_adjacent_loads (basic_block, basic_block,
  basic_block, basic_block);
 static bool gate_hoist_loads (void);
@@ -402,7 +402,8 @@ tree_ssa_phiopt_worker (bool do_store_elim, bool 
do_hoist_loads, bool early_p)
 
 static void
 replace_phi_edge_with_variable (basic_block cond_block,
-   edge e, gphi *phi, tree new_tree)
+   edge e, gphi *phi, tree new_tree,
+   bitmap dce_ssa_names = auto_bitmap())
 {
   basic_block bb = gimple_bb (phi);
   gimple_stmt_iterator gsi;
@@ -477,6 +478,8 @@ replace_phi_edge_with_variable (basic_block cond_block,
gimple_cond_make_true (cond);
 }
 
+  simple_dce_from_worklist (dce_ssa_names);
+
   statistics_counter_event (cfun, "Replace PHI with variable", 1);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -986,6 +989,7 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   gimple_seq seq = NULL;
   tree result;
   gimple *stmt_to_move = NULL;
+  auto_bitmap inserted_exprs;
 
   /* Special case A ? B : B as this will always simplify to B. */
   if (operand_equal_for_phi_arg_p (arg0, arg1))
@@ -1060,14 +1064,22 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
   gsi = gsi_last_bb (cond_bb);
   /* Insert the sequence generated from gimple_simplify_phiopt.  */
   if (seq)
+{
+  // Mark the lhs of the new statements maybe for dce
+  gimple_stmt_iterator gsi1 = gsi_start (seq);
+  for (; !gsi_end_p (gsi1); gsi_next (&gsi1))
+   {
+ gimple *stmt = gsi_stmt (gsi1);
+ tree name = gimple_get_lhs (stmt);
+ if (name && TREE_CODE (name) == SSA_NAME)
+   bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name));
+   }
 gsi_insert_seq_before (&gsi, seq, GSI_CONTINUE_LINKING);
+  }
 
-  /* If there was a statement to move and the result of the statement
- is going to be used, move it to right before the original
- conditional.  */
-  if (stmt_to_move
-  && (gimple_assign_lhs (stmt_to_move) == result
- || !has_single_use (gimple_assign_lhs (stmt_to_move
+  /* If there was a statement to move, move it to right before
+ the original conditional.  */
+  if (stmt_to_move)
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
{
@@ -1075,12 +1087,17 @@ match_simplify_replacement (basic_block cond_bb, 
basic_block middle_bb,
  print_gimple_stmt (dump_file, stmt_to_move, 0,
   TDF_VOPS|TDF_MEMSYMS);
}
+
+  tree name = gimple_get_lhs (stmt_to_move);
+  // Mark the name to be renamed if there is one.
+  if (name && TREE_CODE (name) == SSA_NAME)
+   bitmap_set_bit (inserted_exprs, SSA_NAME_VERSION (name));
   gimple_stmt_iterator gsi1 = gsi_for_stmt (stmt_to_move);
   gsi_move_before (&gsi1, &gsi);
   reset_flow_sensitive_info (gimple_assign_lhs (stmt_to_move));
 }
 
-  replace_phi_edge_with_variable (cond_bb, e1, phi, result);
+  replace_phi_edge_with_variable (cond_bb, e1, phi, result, inserted_exprs);
 
   /* Add Statistic here even though replace_phi_edge_with_variable already
  does it as we want to be able to count when match-simplify happens vs
-- 
1.8.3.1



[PATCH 1/2] Fix PR 105532: match.pd patterns calling tree_nonzero_bits with vector types

2022-11-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Even though this PR was reported with an ubsan issue, the problem is
tree_nonzero_bits is being called with an expression which is a vector type.
This fixes three patterns I noticed which does that.
And adds a testcase for one of the patterns.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions

gcc/ChangeLog:

PR tree-optimization/105532
* match.pd (~(X >> Y) -> ~X >> Y): Check if it is an integral
type before calling tree_nonzero_bits.
(popcount(X) + popcount(Y)): Likewise.
(popcount(X&C1)): Likewise.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/vector-shift-1.c: New test.
---
 gcc/match.pd  | 25 +++
 .../gcc.c-torture/compile/vector-shift-1.c|  8 ++
 2 files changed, 22 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/vector-shift-1.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 194ba8f5188..5833e05a926 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1371,7 +1371,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
/* For logical right shifts, this is possible only if @0 doesn't
   have MSB set and the logical right shift is changed into
   arithmetic shift.  */
-   (if (!wi::neg_p (tree_nonzero_bits (@0)))
+   (if (INTEGRAL_TYPE_P (type)
+&& !wi::neg_p (tree_nonzero_bits (@0)))
 (with { tree stype = signed_type_for (TREE_TYPE (@0)); }
  (convert (rshift (bit_not! (convert:stype @0)) @1))
 
@@ -7518,7 +7519,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 /* popcount(X) + popcount(Y) is popcount(X|Y) when X&Y must be zero.  */
 (simplify
   (plus (POPCOUNT:s @0) (POPCOUNT:s @1))
-  (if (wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0)
+  (if (INTEGRAL_TYPE_P (type)
+   && wi::bit_and (tree_nonzero_bits (@0), tree_nonzero_bits (@1)) == 0)
 (POPCOUNT (bit_ior @0 @1
 
 /* popcount(X) == 0 is X == 0, and related (in)equalities.  */
@@ -7550,15 +7552,16 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 (for pfun (POPCOUNT PARITY)
   (simplify
 (pfun @0)
-(with { wide_int nz = tree_nonzero_bits (@0); }
-  (switch
-   (if (nz == 1)
- (convert @0))
-   (if (wi::popcount (nz) == 1)
- (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
-   (convert (rshift:utype (convert:utype @0)
-  { build_int_cst (integer_type_node,
-   wi::ctz (nz)); }
+(if (INTEGRAL_TYPE_P (type))
+ (with { wide_int nz = tree_nonzero_bits (@0); }
+   (switch
+(if (nz == 1)
+  (convert @0))
+(if (wi::popcount (nz) == 1)
+  (with { tree utype = unsigned_type_for (TREE_TYPE (@0)); }
+(convert (rshift:utype (convert:utype @0)
+   { build_int_cst (integer_type_node,
+wi::ctz (nz)); })
 
 #if GIMPLE
 /* 64- and 32-bits branchless implementations of popcount are detected:
diff --git a/gcc/testsuite/gcc.c-torture/compile/vector-shift-1.c 
b/gcc/testsuite/gcc.c-torture/compile/vector-shift-1.c
new file mode 100644
index 000..142ea56d5bb
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/vector-shift-1.c
@@ -0,0 +1,8 @@
+typedef unsigned char __attribute__((__vector_size__ (1))) U;
+
+U
+foo (U u)
+{
+  u = u == u;
+  return (~(u >> 255));
+}
-- 
2.17.1



[PATCH 0/2] tree_nonzero_bits vs vector and complex types

2022-11-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 


While looking at older unconfirmed bug reports, I noticed there was
an ubsan found issue and noticed tree_nonzero_bits was being called with
a vector type. How ubsan found it was at the end of tree_nonzero_bits,
did "return wi::shwi (-1, TYPE_PRECISION (TREE_TYPE (t)));" and
it was with a vector of 1 elements which meant precision was 0
as precision stores the log2 of the number of elements in a vector.

Anyways we want to catch these kind of errors of calling tree_nonzero_bits
with a vector or a complex type. And fix the places where it is called.

Thanks,
Andrew Pinski


Andrew Pinski (2):
  Fix PR 105532: match.pd patterns calling tree_nonzero_bits with vector
types
  Add assert for type on tree_nonzero_bits

 gcc/fold-const.cc |  3 +++
 gcc/match.pd  | 25 +++
 .../gcc.c-torture/compile/vector-shift-1.c|  8 ++
 3 files changed, 25 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.c-torture/compile/vector-shift-1.c

-- 
2.17.1



[PATCH 2/2] Add assert for type on tree_nonzero_bits

2022-11-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Right now anyone could call tree_nonzero_bits with
either complex or vector types and this will return
the wrong thing. So just assert that nobody calls
it with this.

OK? Bootstrapped and tested with no regressions on x86_64-linux-gnu.

gcc/ChangeLog:

* fold-const.cc (tree_nonzero_bits): Add
assert.
---
 gcc/fold-const.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc
index 7e1ea58518b..3ccac9b28df 100644
--- a/gcc/fold-const.cc
+++ b/gcc/fold-const.cc
@@ -16567,6 +16567,9 @@ c_getstr (tree str)
 wide_int
 tree_nonzero_bits (const_tree t)
 {
+  gcc_assert (TREE_CODE (TREE_TYPE (t)) != VECTOR_TYPE
+ && TREE_CODE (TREE_TYPE (t)) != COMPLEX_TYPE);
+
   switch (TREE_CODE (t))
 {
 case INTEGER_CST:
-- 
2.17.1



[COMMITTED] [AARCH64] Remove reference to MD_INCLUDES

2022-08-11 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The comment reference to MD_INCLUDES is not needed
as it is auto generated for long time now even before
aarch64 target was added.

MD_INCLUDES has been auto generated since r0-64489.
Note some targets still manually set MD_INCLUDES and
I suspect those can be changed but I don't have access
to those targets.

Committed as obvious.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/aarch64/aarch64.md: Remove comment
about MD_INCLUDES as it is out of date and not needed.
---
 gcc/config/aarch64/aarch64.md | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index ef0aed25c6b..3ea16dbc255 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -347,9 +347,6 @@ (define_constants
; must not operate on inactive inputs if doing so could induce a fault.
(SVE_STRICT_GP 1)])
 
-;; If further include files are added the defintion of MD_INCLUDES
-;; must be updated.
-
 (include "constraints.md")
 (include "predicates.md")
 (include "iterators.md")
-- 
2.17.1



[PATCH 04/10] [RISCV] Add the list of operand modifiers to riscv.md too

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

To make it easier to find operands modifiers while in the md
file, add the list of modifiers to the top of the md file.
This is similar to i386 target.

OK? Built and tested for riscv32-linux-gnu and riscv64-linux-gnu.

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_print_operand): Make a mention to
keep the list in riscv.md in sync with this list.
* config/riscv/riscv.md: Add list of modifiers as comments.
---
 gcc/config/riscv/riscv.cc |   4 +-
 gcc/config/riscv/riscv.md | 184 --
 2 files changed, 18 insertions(+), 170 deletions(-)

diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 7c120eaa8e3..189be5e4e6f 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3730,7 +3730,9 @@ riscv_memmodel_needs_release_fence (enum memmodel model)
'z' Print x0 if OP is zero, otherwise print OP normally.
'i' Print i if the operand is not a register.
'S' Print shift-index of single-bit mask OP.
-   'T' Print shift-index of inverted single-bit mask OP.  */
+   'T' Print shift-index of inverted single-bit mask OP.
+
+   Note please keep this list and the list in riscv.md in sync.  */
 
 static void
 riscv_print_operand (FILE *file, rtx op, int letter)
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index f4a5ff07fe4..aad2836d179 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -19,6 +19,20 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; .
 
+
+;; Keep this list and the one above riscv_print_operand in sync.
+;; The special asm out single letter directives following a '%' are:
+;; h -- Print the high-part relocation associated with OP, after stripping
+;;   any outermost HIGH.
+;; R -- Print the low-part relocation associated with OP.
+;; C -- Print the integer branch condition for comparison OP.
+;; A -- Print the atomic operation suffix for memory model OP.
+;; F -- Print a FENCE if the memory model requires a release.
+;; z -- Print x0 if OP is zero, otherwise print OP normally.
+;; i -- Print i if the operand is not a register.
+;; S -- Print shift-index of single-bit mask OP.
+;; T -- Print shift-index of inverted single-bit mask OP.
+
 (define_c_enum "unspec" [
   ;; Override return address for exception handling.
   UNSPEC_EH_RETURN
@@ -107,6 +121,7 @@ (define_constants
 
 (include "predicates.md")
 (include "constraints.md")
+(include "iterators.md")
 
 ;; 
 ;;
@@ -269,175 +284,6 @@ (define_attr "tune"
 (define_asm_attributes
   [(set_attr "type" "multi")])
 
-;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated
-;; from the same template.
-(define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
-
-;; This mode iterator allows :P to be used for patterns that operate on
-;; pointer-sized quantities.  Exactly one of the two alternatives will match.
-(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
-
-;; Likewise, but for XLEN-sized quantities.
-(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
-
-;; Branches operate on XLEN-sized quantities, but for RV64 we accept
-;; QImode values so we can force zero-extension.
-(define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
-
-;; 32-bit moves for which we provide move patterns.
-(define_mode_iterator MOVE32 [SI])
-
-;; 64-bit modes for which we provide move patterns.
-(define_mode_iterator MOVE64 [DI DF])
-
-;; Iterator for sub-32-bit integer modes.
-(define_mode_iterator SHORT [QI HI])
-
-;; Iterator for HImode constant generation.
-(define_mode_iterator HISI [HI SI])
-
-;; Iterator for QImode extension patterns.
-(define_mode_iterator SUPERQI [HI SI (DI "TARGET_64BIT")])
-
-;; Iterator for hardware integer modes narrower than XLEN.
-(define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
-
-;; Iterator for hardware-supported integer modes.
-(define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
-
-;; Iterator for hardware-supported floating-point modes.
-(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
-   (DF "TARGET_DOUBLE_FLOAT")
-   (HF "TARGET_ZFH")])
-
-;; Iterator for floating-point modes that can be loaded into X registers.
-(define_mode_iterator SOFTF [SF (DF "TARGET_64BIT") (HF "TARGET_ZFHMIN")])
-
-;; This attribute gives the length suffix for a sign- or zero-extension
-;; instruction.
-(define_mode_attr size [(QI "b") (HI "h")])
-
-;; Mode attributes for loads.
-(define_mode_attr load [(QI "lb") (HI "lh") (SI "lw") (DI "ld") (HF "flh") (SF 
"flw") (DF "fld")])
-
-;; Instruction names for integer loads that aren't explicitly sign or zero
-;; extended.  See riscv_output_move and LOAD_EXTEND_OP.
-(define_mode_attr default_load [(QI "lbu") (HI "lhu") (SI "lw") (DI "ld")])
-
-;; Mode attribute for FP loads into integer registers.
-(define_mode_attr softload [(HF "lh") (SF "lw") (DF "ld")])

[PATCH 00/10] [RISCV] Fix/improve the RISCV backend

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This set of patches fixes a few RISCV issues and does a few
cleanups. Including moving all of the iterators to iterators.md like
many newer backends.
It also fixes a few PRs which I filed including the RISCV32 issue
with ZBS enabled.

Thanks,
Andrew Pinski

Andrew Pinski (10):
  [RISCV] Move iterators from riscv.md to iterators.md
  [RISCV] Move iterators from bitmanip.md to iterators.md
  [RISCV] Move iterators from sync.md to iterators.md
  [RISCV] Add the list of operand modifiers to riscv.md too
  [RISCV] Add %~ to print w if TARGET_64BIT and use it
  [RISCV] Use constraints/predicates instead of checking const_int
directly for shNadd patterns
  [RISCV] Use a constraint for bset_mask and bset_1_mask
  [RISCV] Fix PR 106586: riscv32 vs ZBS
  [RISCV] Add constraints for
not_single_bit_mask_operand/single_bit_mask_operand
  [RISCV] Fix PR 106632 and PR 106588 a few constraints in bitmanip.md

 gcc/config/riscv/bitmanip.md|  56 ++--
 gcc/config/riscv/constraints.md |  28 
 gcc/config/riscv/iterators.md   | 245 
 gcc/config/riscv/predicates.md  |   9 +-
 gcc/config/riscv/riscv.cc   |  35 -
 gcc/config/riscv/riscv.h|   4 +-
 gcc/config/riscv/riscv.md   | 199 +++---
 gcc/config/riscv/sync.md|   4 -
 8 files changed, 352 insertions(+), 228 deletions(-)
 create mode 100644 gcc/config/riscv/iterators.md

-- 
2.27.0



[PATCH 02/10] [RISCV] Move iterators from bitmanip.md to iterators.md

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Just like the previous patch this move all of the iterators
of bitmanip.md to iterators.md.  All modern backends put the
iterators in iterators.md for easier access.

OK? Built and tested for riscv32-linux-gnu with 
--with-arch=rv32imafdc_zba_zbb_zbc_zbs.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/bitmanip.md
(bitmanip_bitwise, bitmanip_minmax, clz_ctz_pcna, tbitmanip_optab,
bitmanip_insn, shiftm1: Move to ...
* config/riscv/iterators.md: Here.
---
 gcc/config/riscv/bitmanip.md  | 25 -
 gcc/config/riscv/iterators.md | 27 ++-
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d1570ce8508..3329dd54eb6 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -17,31 +17,6 @@
 ;; along with GCC; see the file COPYING3.  If not see
 ;; .
 
-(define_code_iterator bitmanip_bitwise [and ior])
-
-(define_code_iterator bitmanip_minmax [smin umin smax umax])
-
-(define_code_iterator clz_ctz_pcnt [clz ctz popcount])
-
-(define_code_attr bitmanip_optab [(smin "smin")
- (smax "smax")
- (umin "umin")
- (umax "umax")
- (clz "clz")
- (ctz "ctz")
- (popcount "popcount")])
-
-
-(define_code_attr bitmanip_insn [(smin "min")
-(smax "max")
-(umin "minu")
-(umax "maxu")
-(clz "clz")
-(ctz "ctz")
-(popcount "cpop")])
-
-(define_mode_attr shiftm1 [(SI "const31_operand") (DI "const63_operand")])
-
 ;; ZBA extension.
 
 (define_insn "*zero_extendsidi2_bitmanip"
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 351aa7f3cea..54590f43193 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -113,6 +113,9 @@ (define_mode_attr UNITMODE [(HF "HF") (SF "SF") (DF "DF")])
 ;; the controlling mode.
 (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
 
+; bitmanip mode attribute
+(define_mode_attr shiftm1 [(SI "const31_operand") (DI "const63_operand")])
+
 ;; ---
 ;; Code Iterators
 ;; ---
@@ -148,11 +151,17 @@ (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
 
+; bitmanip code iterators
+(define_code_iterator bitmanip_bitwise [and ior])
+
+(define_code_iterator bitmanip_minmax [smin umin smax umax])
+
+(define_code_iterator clz_ctz_pcnt [clz ctz popcount])
+
 ;; ---
 ;; Code Attributes
 ;; ---
 
-
 ;;  expands to an empty string when doing a signed operation and
 ;; "u" when doing an unsigned operation.
 (define_code_attr u [(sign_extend "") (zero_extend "u")
@@ -196,6 +205,22 @@ (define_code_attr insn [(ashift "sll")
(plus "add")
(minus "sub")])
 
+; bitmanip code attributes
+(define_code_attr bitmanip_optab [(smin "smin")
+ (smax "smax")
+ (umin "umin")
+ (umax "umax")
+ (clz "clz")
+ (ctz "ctz")
+ (popcount "popcount")])
+(define_code_attr bitmanip_insn [(smin "min")
+(smax "max")
+(umin "minu")
+(umax "maxu")
+(clz "clz")
+(ctz "ctz")
+(popcount "cpop")])
+
 ;; ---
 ;; Int Iterators.
 ;; ---
-- 
2.27.0



[PATCH 03/10] [RISCV] Move iterators from sync.md to iterators.md

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Like the previous two patches this moves the iterators
that are in sync.md to iterators.md.

OK? build and tested for riscv64-linux-gnu.

gcc/ChangeLog:

* config/riscv/sync.md (any_atomic, atomic_optab): Move to ...
* config/riscv/iterators.md: Here.
---
 gcc/config/riscv/iterators.md | 7 +++
 gcc/config/riscv/sync.md  | 4 
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 54590f43193..6c8a6d2dd59 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -151,6 +151,9 @@ (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
 
+; atomics code iterator
+(define_code_iterator any_atomic [plus ior xor and])
+
 ; bitmanip code iterators
 (define_code_iterator bitmanip_bitwise [and ior])
 
@@ -205,6 +208,10 @@ (define_code_attr insn [(ashift "sll")
(plus "add")
(minus "sub")])
 
+; atomics code attribute
+(define_code_attr atomic_optab
+  [(plus "add") (ior "or") (xor "xor") (and "and")])
+
 ; bitmanip code attributes
 (define_code_attr bitmanip_optab [(smin "smin")
  (smax "smax")
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index 86b41e6b00a..7deb290d9dc 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -27,10 +27,6 @@ (define_c_enum "unspec" [
   UNSPEC_MEMORY_BARRIER
 ])
 
-(define_code_iterator any_atomic [plus ior xor and])
-(define_code_attr atomic_optab
-  [(plus "add") (ior "or") (xor "xor") (and "and")])
-
 ;; Memory barriers.
 
 (define_expand "mem_thread_fence"
-- 
2.27.0



[PATCH 01/10] [RISCV] Move iterators from riscv.md to iterators.md

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This moves the iterators out from riscv.md to iterators.md
like most modern backends.
I have not moved the iterators from the other .md files yet.

OK? Build and tested on riscv64-linux-gnu and riscv32-linux-gnu.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/riscv.md
(GPR): Move to new file.
(P, X, BR): Likewise.
(MOVE32, MOVE64, SHORT): Likewise.
(HISI, SUPERQI, SUBX): Likewise.
(ANYI, ANYF, SOFTF): Likewise.
(size, load, default_load): Likewise.
(softload, store, softstore): Likewise.
(reg, fmt, ifmt, amo): Likewise.
(UNITMODE, HALFMODE): Likewise.
(RINT, rint_pattern, rint_rm): Likewise.
(QUIET_COMPARISON, quiet_pattern, QUIET_PATTERN): Likewise.
(any_extend, any_shiftrt, any_shift): Likewise.
(any_bitwise): Likewise.
(any_div, any_mod): Likewise.
(any_gt, any_ge, any_lt, any_le): Likewise.
(u, su): Likewise.
(optab, insn): Likewise.
* config/riscv/iterators.md: New file.
---
 gcc/config/riscv/iterators.md | 212 ++
 1 file changed, 212 insertions(+)
 create mode 100644 gcc/config/riscv/iterators.md

diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
new file mode 100644
index 000..351aa7f3cea
--- /dev/null
+++ b/gcc/config/riscv/iterators.md
@@ -0,0 +1,212 @@
+;; Iterators for the machine description for RISC-V
+;; Copyright (C) 2011-2022 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; .
+
+
+;; ---
+;; Mode Iterators
+;; ---
+
+;; This mode iterator allows 32-bit and 64-bit GPR patterns to be generated
+;; from the same template.
+(define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
+
+;; This mode iterator allows :P to be used for patterns that operate on
+;; pointer-sized quantities.  Exactly one of the two alternatives will match.
+(define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
+
+;; Likewise, but for XLEN-sized quantities.
+(define_mode_iterator X [(SI "!TARGET_64BIT") (DI "TARGET_64BIT")])
+
+;; Branches operate on XLEN-sized quantities, but for RV64 we accept
+;; QImode values so we can force zero-extension.
+(define_mode_iterator BR [(QI "TARGET_64BIT") SI (DI "TARGET_64BIT")])
+
+;; 32-bit moves for which we provide move patterns.
+(define_mode_iterator MOVE32 [SI])
+
+;; 64-bit modes for which we provide move patterns.
+(define_mode_iterator MOVE64 [DI DF])
+
+;; Iterator for sub-32-bit integer modes.
+(define_mode_iterator SHORT [QI HI])
+
+;; Iterator for HImode constant generation.
+(define_mode_iterator HISI [HI SI])
+
+;; Iterator for QImode extension patterns.
+(define_mode_iterator SUPERQI [HI SI (DI "TARGET_64BIT")])
+
+;; Iterator for hardware integer modes narrower than XLEN.
+(define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
+
+;; Iterator for hardware-supported integer modes.
+(define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
+
+;; Iterator for hardware-supported floating-point modes.
+(define_mode_iterator ANYF [(SF "TARGET_HARD_FLOAT")
+   (DF "TARGET_DOUBLE_FLOAT")
+   (HF "TARGET_ZFH")])
+
+;; Iterator for floating-point modes that can be loaded into X registers.
+(define_mode_iterator SOFTF [SF (DF "TARGET_64BIT") (HF "TARGET_ZFHMIN")])
+
+
+;; ---
+;; Mode attributes
+;; ---
+
+
+;; This attribute gives the length suffix for a sign- or zero-extension
+;; instruction.
+(define_mode_attr size [(QI "b") (HI "h")])
+
+;; Mode attributes for loads.
+(define_mode_attr load [(QI "lb") (HI "lh") (SI "lw") (DI "ld") (SF "flw") (HF 
"flh") (DF "fld")])
+
+;; Instruction names for integer loads that aren't explicitly sign or zero
+;; extended.  See riscv_output_move and LOAD_EXTEND_OP.
+(define_mode_attr default_load [(QI "lbu") (HI "lhu") (SI "lw") (DI "ld")])
+
+;; Mode attribute for FP loads into integer registers.
+(define_mode_attr softload [(HF "lh") (SF "lw") (DF "ld")])
+
+;; Instruction names for stores.
+(define_mode_attr store [(QI "sb") (HI "sh") (S

[PATCH 06/10] [RISCV] Use constraints/predicates instead of checking const_int directly for shNadd patterns

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This simplifies the code by adding a predicate and a constraint for 1/2/3.
The aarch64 backend has a similar predicate called aarch64_shift_imm_
which they use there.

OK? Built and tested on riscv32-linux-gnu and riscv64-linux-gnu with no 
regressions.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/constraints.md (Ds3): New constraint.
* config/riscv/predicates.md (imm123_operand): New predicate.
* config/riscv/bitmanip.md (*shNadd): Use Ds3 and imm123_operand.
(*shNadduw): Likewise.
---
 gcc/config/riscv/bitmanip.md| 8 +++-
 gcc/config/riscv/constraints.md | 6 ++
 gcc/config/riscv/predicates.md  | 5 +
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index ebd6eee1a22..73a36f7751b 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -32,10 +32,9 @@ (define_insn "*zero_extendsidi2_bitmanip"
 (define_insn "*shNadd"
   [(set (match_operand:X 0 "register_operand" "=r")
(plus:X (ashift:X (match_operand:X 1 "register_operand" "r")
- (match_operand:QI 2 "immediate_operand" "I"))
+ (match_operand:QI 2 "imm123_operand" "Ds3"))
(match_operand:X 3 "register_operand" "r")))]
-  "TARGET_ZBA
-   && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)"
+  "TARGET_ZBA"
   "sh%2add\t%0,%1,%3"
   [(set_attr "type" "bitmanip")
(set_attr "mode" "")])
@@ -44,11 +43,10 @@ (define_insn "*shNadduw"
   [(set (match_operand:DI 0 "register_operand" "=r")
(plus:DI
  (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
-(match_operand:QI 2 "immediate_operand" "I"))
+(match_operand:QI 2 "imm123_operand" "Ds3"))
 (match_operand 3 "immediate_operand" ""))
  (match_operand:DI 4 "register_operand" "r")))]
   "TARGET_64BIT && TARGET_ZBA
-   && (INTVAL (operands[2]) >= 1) && (INTVAL (operands[2]) <= 3)
&& (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0x"
   "sh%2add.uw\t%0,%1,%4"
   [(set_attr "type" "bitmanip")
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index bafa4188ccb..61b84875fd9 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -54,6 +54,12 @@ (define_constraint "L"
   (and (match_code "const_int")
(match_test "LUI_OPERAND (ival)")))
 
+(define_constraint "Ds3"
+  "@internal
+   1, 2 or 3 immediate"
+  (and (match_code "const_int")
+   (match_test "IN_RANGE (ival, 1, 3)")))
+
 ;; Floating-point constant +0.0, used for FCVT-based moves when FMV is
 ;; not available in RV32.
 (define_constraint "G"
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 79e0c1d5589..2af7f661d6f 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -244,6 +244,11 @@ (define_predicate "imm5_operand"
   (and (match_code "const_int")
(match_test "INTVAL (op) < 5")))
 
+;; A const_int for sh1add/sh2add/sh3add
+(define_predicate "imm123_operand"
+  (and (match_code "const_int")
+   (match_test "IN_RANGE (INTVAL (op), 1, 3)")))
+
 ;; A CONST_INT operand that consists of a single run of consecutive set bits.
 (define_predicate "consecutive_bits_operand"
   (match_code "const_int")
-- 
2.27.0



[PATCH 08/10] [RISCV] Fix PR 106586: riscv32 vs ZBS

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is two fold. With RISCV32, 32bit
const_int are always signed extended to 64bit in HWI.
So that means for SINGLE_BIT_MASK_OPERAND, it should
mask off the upper bits to see it is a single bit
for !TARGET_64BIT.
Plus there are a few locations which forget to call
trunc_int_for_mode when generating a SImode constant
so they are not sign extended correctly for HWI.
The predicates single_bit_mask_operand and
not_single_bit_mask_operand need get the same handling
as SINGLE_BIT_MASK_OPERAND so just use SINGLE_BIT_MASK_OPERAND.

OK? Built and tested on riscv32-linux-gnu and riscv64-linux-gnu with
--with-arch=rvNimafdc_zba_zbb_zbc_zbs where N is replaced with 32 or 64.

Thanks,
Andrew Pinski

gcc/ChangeLog:

PR target/106586
* config/riscv/predicates.md (single_bit_mask_operand):
Use SINGLE_BIT_MASK_OPERAND instead of directly calling pow2p_hwi.
(not_single_bit_mask_operand): Likewise.
* config/riscv/riscv.cc (riscv_build_integer_1): Don't special case
1<<31 for 32bits as it is already handled.
Call trunc_int_for_mode on the upper part after the subtraction.
(riscv_move_integer): Call trunc_int_for_mode before generating
the integer just make sure the constant has been sign extended
corectly.
(riscv_emit_int_compare): Call trunc_int_for_mode after doing the
addition for the new rhs.
* config/riscv/riscv.h (SINGLE_BIT_MASK_OPERAND): If !TARGET64BIT,
then mask off the upper 32bits of the HWI as it will be sign extended.
---
 gcc/config/riscv/predicates.md |  4 ++--
 gcc/config/riscv/riscv.cc  | 12 +---
 gcc/config/riscv/riscv.h   |  4 +++-
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 2af7f661d6f..862e72b0983 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -226,11 +226,11 @@ (define_special_predicate "gpr_save_operation"
 ;; Predicates for the ZBS extension.
 (define_predicate "single_bit_mask_operand"
   (and (match_code "const_int")
-   (match_test "pow2p_hwi (INTVAL (op))")))
+   (match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
 
 (define_predicate "not_single_bit_mask_operand"
   (and (match_code "const_int")
-   (match_test "pow2p_hwi (~INTVAL (op))")))
+   (match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
 
 (define_predicate "const31_operand"
   (and (match_code "const_int")
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 22d0f6d604c..026c69ce40d 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -432,7 +432,7 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
 sign-extended (negative) representation (-1 << 31) for the
 value, if we want to build (1 << 31) in SImode.  This will
 then expand to an LUI instruction.  */
-  if (mode == SImode && value == (HOST_WIDE_INT_1U << 31))
+  if (TARGET_64BIT && mode == SImode && value == (HOST_WIDE_INT_1U << 31))
codes[0].value = (HOST_WIDE_INT_M1U << 31);
 
   return 1;
@@ -445,7 +445,11 @@ riscv_build_integer_1 (struct riscv_integer_op 
codes[RISCV_MAX_INTEGER_OPS],
   && (mode != HImode
  || value - low_part <= ((1 << (GET_MODE_BITSIZE (HImode) - 1)) - 1)))
 {
-  alt_cost = 1 + riscv_build_integer_1 (alt_codes, value - low_part, mode);
+  HOST_WIDE_INT upper_part = value - low_part;
+  if (mode != VOIDmode)
+   upper_part = trunc_int_for_mode (value - low_part, mode);
+
+  alt_cost = 1 + riscv_build_integer_1 (alt_codes, upper_part, mode);
   if (alt_cost < cost)
{
  alt_codes[alt_cost-1].code = PLUS;
@@ -1550,6 +1554,7 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT 
value,
 x = riscv_split_integer (value, mode);
   else
 {
+  codes[0].value = trunc_int_for_mode (codes[0].value, mode);
   /* Apply each binary operation to X. */
   x = GEN_INT (codes[0].value);
 
@@ -1559,7 +1564,7 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT 
value,
x = riscv_emit_set (temp, x);
  else
x = force_reg (mode, x);
-
+ codes[i].value = trunc_int_for_mode (codes[i].value, mode);
  x = gen_rtx_fmt_ee (codes[i].code, mode, x, GEN_INT (codes[i].value));
}
 }
@@ -2651,6 +2656,7 @@ riscv_emit_int_compare (enum rtx_code *code, rtx *op0, 
rtx *op1)
continue;
 
  new_rhs = rhs + (increment ? 1 : -1);
+ new_rhs = trunc_int_for_mode (new_rhs, GET_MODE (*op0));
  if (riscv_integer_cost (new_rhs) < riscv_integer_cost (rhs)
  && (rhs < 0) == (new_rhs < 0))
{
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index 4b07c5487c6..5394776eb50 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -535,7 +53

[PATCH 09/10] [RISCV] Add constraints for not_single_bit_mask_operand/single_bit_mask_operand

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Like a previous patch, just add constraints for predicates
not_single_bit_mask_operand and single_bit_mask_operand.

OK? Built and tested for riscv32-linux-gnu and riscv64-linux-gnu.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/constraints.md (DbS): New constraint.
(DnS): New constraint.
* config/riscv/bitmanip.md (*bset_1_mask): Use new constraint.
(*bclr): Likewise.
(*binvi): Likewise.
---
 gcc/config/riscv/bitmanip.md|  6 +++---
 gcc/config/riscv/constraints.md | 10 ++
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d362f526e79..026299d6703 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -300,7 +300,7 @@ (define_insn "*bset_1_mask"
 (define_insn "*bseti"
   [(set (match_operand:X 0 "register_operand" "=r")
(ior:X (match_operand:X 1 "register_operand" "r")
-  (match_operand 2 "single_bit_mask_operand" "i")))]
+  (match_operand:X 2 "single_bit_mask_operand" "DbS")))]
   "TARGET_ZBS"
   "bseti\t%0,%1,%S2"
   [(set_attr "type" "bitmanip")])
@@ -317,7 +317,7 @@ (define_insn "*bclr"
 (define_insn "*bclri"
   [(set (match_operand:X 0 "register_operand" "=r")
(and:X (match_operand:X 1 "register_operand" "r")
-  (match_operand 2 "not_single_bit_mask_operand" "i")))]
+  (match_operand:X 2 "not_single_bit_mask_operand" "DnS")))]
   "TARGET_ZBS"
   "bclri\t%0,%1,%T2"
   [(set_attr "type" "bitmanip")])
@@ -334,7 +334,7 @@ (define_insn "*binv"
 (define_insn "*binvi"
   [(set (match_operand:X 0 "register_operand" "=r")
(xor:X (match_operand:X 1 "register_operand" "r")
-  (match_operand 2 "single_bit_mask_operand" "i")))]
+  (match_operand:X 2 "single_bit_mask_operand" "DbS")))]
   "TARGET_ZBS"
   "binvi\t%0,%1,%S2"
   [(set_attr "type" "bitmanip")])
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 444870ad060..2873d533cb5 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -72,6 +72,16 @@ (define_constraint "DsD"
   (and (match_code "const_int")
(match_test "ival == 63")))
 
+(define_constraint "DbS"
+  "@internal"
+  (and (match_code "const_int")
+   (match_test "SINGLE_BIT_MASK_OPERAND (ival)")))
+
+(define_constraint "DnS"
+  "@internal"
+  (and (match_code "const_int")
+   (match_test "SINGLE_BIT_MASK_OPERAND (~ival)")))
+
 ;; Floating-point constant +0.0, used for FCVT-based moves when FMV is
 ;; not available in RV32.
 (define_constraint "G"
-- 
2.27.0



[PATCH 10/10] [RISCV] Fix PR 106632 and PR 106588 a few constraints in bitmanip.md

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The constraints should be n instead of i. Also there
needs to a check for out of bounds zero_extract for
*bexti.

gcc/ChangeLog:

PR target/106632
PR target/106588
* config/riscv/bitmanip.md (*shNadduw): Use n constraint
instead of i.
(*slliuw): Likewise.
(*bexti): Likewise. Also add a check for operands[2] to be less
than the mode bitsize.
---
 gcc/config/riscv/bitmanip.md | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 026299d6703..ecf5b51b533 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -44,7 +44,7 @@ (define_insn "*shNadduw"
(plus:DI
  (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
 (match_operand:QI 2 "imm123_operand" "Ds3"))
-(match_operand 3 "immediate_operand" ""))
+(match_operand 3 "immediate_operand" "n"))
  (match_operand:DI 4 "register_operand" "r")))]
   "TARGET_64BIT && TARGET_ZBA
&& (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0x"
@@ -110,7 +110,7 @@ (define_insn "*slliuw"
   [(set (match_operand:DI 0 "register_operand" "=r")
(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
   (match_operand:QI 2 "immediate_operand" "I"))
-   (match_operand 3 "immediate_operand" "")))]
+   (match_operand 3 "immediate_operand" "n")))]
   "TARGET_64BIT && TARGET_ZBA
&& (INTVAL (operands[3]) >> INTVAL (operands[2])) == 0x"
   "slli.uw\t%0,%1,%2"
@@ -354,6 +354,7 @@ (define_insn "*bexti"
(zero_extract:X (match_operand:X 1 "register_operand" "r")
(const_int 1)
(match_operand 2 "immediate_operand" "i")))]
-  "TARGET_ZBS"
+   (match_operand 2 "immediate_operand" "n")))]
+  "TARGET_ZBS && UINTVAL (operands[2]) < GET_MODE_BITSIZE (mode)"
   "bexti\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
-- 
2.27.0



[PATCH 07/10] [RISCV] Use a constraint for bset_mask and bset_1_mask

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

A constraint here just makes it easier to understand what the
operands are.

OK? Built and tested on riscv32-linux-gnu and riscv64-linux-gnu with
--with-arch=rvNimafdc_zba_zbb_zbc_zbs (where N is 32 and 64).

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/constraints.md (DsS): New constraint.
(DsD): New constraint.
* config/riscv/iterators.md (shiftm1c): New iterator.
* config/riscv/bitmanip.md (*bset_mask):
Use shiftm1c.
(*bset_1_mask): Likewise.
---
 gcc/config/riscv/bitmanip.md|  4 ++--
 gcc/config/riscv/constraints.md | 12 
 gcc/config/riscv/iterators.md   |  1 +
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 73a36f7751b..d362f526e79 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -273,7 +273,7 @@ (define_insn "*bset_mask"
(ior:X (ashift:X (const_int 1)
 (subreg:QI
  (and:X (match_operand:X 2 "register_operand" "r")
-(match_operand 3 "" "i")) 0))
+(match_operand 3 "" 
"")) 0))
   (match_operand:X 1 "register_operand" "r")))]
   "TARGET_ZBS"
   "bset\t%0,%1,%2"
@@ -292,7 +292,7 @@ (define_insn "*bset_1_mask"
(ashift:X (const_int 1)
  (subreg:QI
   (and:X (match_operand:X 1 "register_operand" "r")
- (match_operand 2 "" "i")) 0)))]
+ (match_operand 2 "" "")) 0)))]
   "TARGET_ZBS"
   "bset\t%0,x0,%1"
   [(set_attr "type" "bitmanip")])
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 61b84875fd9..444870ad060 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -60,6 +60,18 @@ (define_constraint "Ds3"
   (and (match_code "const_int")
(match_test "IN_RANGE (ival, 1, 3)")))
 
+(define_constraint "DsS"
+  "@internal
+   31 immediate"
+  (and (match_code "const_int")
+   (match_test "ival == 31")))
+
+(define_constraint "DsD"
+  "@internal
+   63 immediate"
+  (and (match_code "const_int")
+   (match_test "ival == 63")))
+
 ;; Floating-point constant +0.0, used for FCVT-based moves when FMV is
 ;; not available in RV32.
 (define_constraint "G"
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 6c8a6d2dd59..be0d5390307 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -115,6 +115,7 @@ (define_mode_attr HALFMODE [(DF "SI") (DI "SI") (TF "DI")])
 
 ; bitmanip mode attribute
 (define_mode_attr shiftm1 [(SI "const31_operand") (DI "const63_operand")])
+(define_mode_attr shiftm1p [(SI "DsS") (DI "DsD")])
 
 ;; ---
 ;; Code Iterators
-- 
2.27.0



[PATCH 05/10] [RISCV] Add %~ to print w if TARGET_64BIT and use it

2022-08-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

To make things easier and more maintainable, we need to
add support printing out w if TARGET_64BIT so this patch
adds %~ to do that, similar how the x86 backend uses %~
to print out i/f for TARGET_AVX2. We could have chosen any
punctuation symbol but ~ looks the closest to w.

OK? Build and tested for riscv64-linux-gnu and riscv32-linux-gnu with no 
regressions.

Thanks,
Andrew Pinski

gcc/ChangeLog:

* config/riscv/riscv.cc (riscv_print_operand):
Handle '~'.
(riscv_print_operand_punct_valid_p): New function
(TARGET_PRINT_OPERAND_PUNCT_VALID_P): Define.
* config/riscv/bitmanip.md (si2/clz_ctz_pcnt):
Use %~ instead of conditional the pattern on TARGET_64BIT.
(rotrsi3): Likewise.
(rotlsi3): Likewise.
* config/riscv/riscv.md: Add ~ to the list of modifiers.
(addsi3): Use %~ instead of conditional the pattern on TARGET_64BIT.
(subsi3): Likewise.
(negsi2): Likewise.
(mulsi3): Likewise.
(optab>si3/any_div): Likewise.
(*addhi3): Likewise.
(si3/any_shift): Likewise.
---
 gcc/config/riscv/bitmanip.md |  6 +++---
 gcc/config/riscv/riscv.cc| 19 +++
 gcc/config/riscv/riscv.md| 15 ---
 3 files changed, 30 insertions(+), 10 deletions(-)

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 3329dd54eb6..ebd6eee1a22 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -143,7 +143,7 @@ (define_insn "si2"
   [(set (match_operand:SI 0 "register_operand" "=r")
 (clz_ctz_pcnt:SI (match_operand:SI 1 "register_operand" "r")))]
   "TARGET_ZBB"
-  { return TARGET_64BIT ? "w\t%0,%1" : 
"\t%0,%1"; }
+  "%~\t%0,%1"
   [(set_attr "type" "bitmanip")
(set_attr "mode" "SI")])
 
@@ -201,7 +201,7 @@ (define_insn "rotrsi3"
(rotatert:SI (match_operand:SI 1 "register_operand" "r")
 (match_operand:QI 2 "arith_operand" "rI")))]
   "TARGET_ZBB"
-  { return TARGET_64BIT ? "ror%i2w\t%0,%1,%2" : "ror%i2\t%0,%1,%2"; }
+  "ror%i2%~\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
 (define_insn "rotrdi3"
@@ -225,7 +225,7 @@ (define_insn "rotlsi3"
(rotate:SI (match_operand:SI 1 "register_operand" "r")
   (match_operand:QI 2 "register_operand" "r")))]
   "TARGET_ZBB"
-  { return TARGET_64BIT ? "rolw\t%0,%1,%2" : "rol\t%0,%1,%2"; }
+  "rol%~\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
 (define_insn "rotldi3"
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 189be5e4e6f..22d0f6d604c 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -3731,12 +3731,22 @@ riscv_memmodel_needs_release_fence (enum memmodel model)
'i' Print i if the operand is not a register.
'S' Print shift-index of single-bit mask OP.
'T' Print shift-index of inverted single-bit mask OP.
+   '~' Print w if TARGET_64BIT is true; otherwise not print anything.
 
Note please keep this list and the list in riscv.md in sync.  */
 
 static void
 riscv_print_operand (FILE *file, rtx op, int letter)
 {
+  /* `~` does not take an operand so op will be null
+ Check for before accessing op.
+  */
+  if (letter == '~')
+{
+  if (TARGET_64BIT)
+   fputc('w', file);
+  return;
+}
   machine_mode mode = GET_MODE (op);
   enum rtx_code code = GET_CODE (op);
 
@@ -3812,6 +3822,13 @@ riscv_print_operand (FILE *file, rtx op, int letter)
 }
 }
 
+/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P */
+static bool
+riscv_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '~');
+}
+
 /* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
 
 static void
@@ -5900,6 +5917,8 @@ riscv_init_libfuncs (void)
 #define TARGET_PRINT_OPERAND riscv_print_operand
 #undef TARGET_PRINT_OPERAND_ADDRESS
 #define TARGET_PRINT_OPERAND_ADDRESS riscv_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P riscv_print_operand_punct_valid_p
 
 #undef TARGET_SETUP_INCOMING_VARARGS
 #define TARGET_SETUP_INCOMING_VARARGS riscv_setup_incoming_varargs
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index aad2836d179..30cd07dc6f5 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -32,6 +32,7 @@
 ;; i -- Print i if the operand is not a register.
 ;; S -- Print shift-index of single-bit mask OP.
 ;; T -- Print shift-index of inverted single-bit mask OP.
+;; ~ -- Print w if TARGET_64BIT is true; otherwise not print anything.
 
 (define_c_enum "unspec" [
   ;; Override return address for exception handling.
@@ -312,7 +313,7 @@ (define_insn "addsi3"
(plus:SI (match_operand:SI 1 "register_operand" " r,r")
 (match_operand:SI 2 "arith_operand"" r,I")))]
   ""
-  { return TARGET_64BIT ? "add%i2w\t%0,%1,%2" : "add%i2\t%0,%1,%2"; }
+  "add%i2%~\t%0,%1,%2"
   [(set_attr "type" "arith")
(set_attr "mode" "SI")])
 
@@ -452,7 +453,7 @@ (d

[PATCH 2/3] Fix PR 106601: __builtin_bswap16 code gen could be improved with ZBB enabled

2022-08-20 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The default expansion for bswap16 is two extractions (shift/and)
followed by an insertation (ior) and then a zero extend. This can be improved
with ZBB enabled to just full byteswap followed by a (logical) shift right.
This patch adds a new pattern for this which does that.

OK? Built and tested on riscv32-linux-gnu and riscv64-linux-gnu.

gcc/ChangeLog:

PR target/106601
* config/riscv/bitmanip.md (bswaphi2): New pattern.

gcc/testsuite/ChangeLog:

PR target/106601
* gcc.target/riscv/zbb_32_bswap-2.c: New test.
* gcc.target/riscv/zbb_bswap-2.c: New test.

Change-Id: If61362c14664cf8685da17779217033689878f86
---
 gcc/config/riscv/bitmanip.md  | 24 +++
 .../gcc.target/riscv/zbb_32_bswap-2.c | 12 ++
 gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c  | 12 ++
 3 files changed, 48 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_32_bswap-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index c7ba667f87a..c4383285d81 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -276,6 +276,30 @@ (define_insn "bswap2"
   "rev8\t%0,%1"
   [(set_attr "type" "bitmanip")])
 
+;; HI bswap can be emulated using SI/DI bswap followed
+;; by a logical shift right
+;; SI bswap for TARGET_64BIT is already similarly in
+;; the common code.
+(define_expand "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_ZBB"
+{
+  rtx tmp = gen_reg_rtx (word_mode);
+  rtx newop1 = gen_lowpart (word_mode, operands[1]);
+  if (TARGET_64BIT)
+emit_insn (gen_bswapdi2 (tmp, newop1));
+  else
+emit_insn (gen_bswapsi2 (tmp, newop1));
+  rtx tmp1 = gen_reg_rtx (word_mode);
+  if (TARGET_64BIT)
+emit_insn (gen_lshrdi3 (tmp1, tmp, GEN_INT (64 - 16)));
+  else
+emit_insn (gen_lshrsi3 (tmp1, tmp, GEN_INT (32 - 16)));
+  emit_move_insn (operands[0], gen_lowpart (HImode, tmp1));
+  DONE;
+})
+
 (define_insn "3"
   [(set (match_operand:X 0 "register_operand" "=r")
 (bitmanip_minmax:X (match_operand:X 1 "register_operand" "r")
diff --git a/gcc/testsuite/gcc.target/riscv/zbb_32_bswap-2.c 
b/gcc/testsuite/gcc.target/riscv/zbb_32_bswap-2.c
new file mode 100644
index 000..679b34c4e41
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb_32_bswap-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+int foo(int n)
+{
+  return __builtin_bswap16(n);
+}
+
+/* { dg-final { scan-assembler "rev8" } } */
+/* { dg-final { scan-assembler "srli" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c 
b/gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c
new file mode 100644
index 000..c358f6683f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+int foo(int n)
+{
+  return __builtin_bswap16(n);
+}
+
+/* { dg-final { scan-assembler "rev8" } } */
+/* { dg-final { scan-assembler "srli" } } */
+
-- 
2.17.1



[PATCH 0/3] [RISCV] Improve bswap for ZBB

2022-08-20 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Just some improvements for bswap and ZBB including a testsuite change that will
allow more testing to happen.

Thanks,
Andrew Pinski


Andrew Pinski (3):
  Fix PR 106600: __builtin_bswap32 is not hooked up for ZBB for 32bit
  Fix PR 106601: __builtin_bswap16 code gen could be improved with ZBB
enabled
  Fix PR 106690: enable effective_target_bswap for RISCV targets with
ZBB enabled by default

 gcc/config/riscv/bitmanip.md  | 26 ++-
 .../gcc.target/riscv/zbb_32_bswap-1.c | 11 
 .../gcc.target/riscv/zbb_32_bswap-2.c | 12 +
 gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c  | 11 
 gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c  | 12 +
 gcc/testsuite/lib/target-supports.exp |  7 +
 6 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_32_bswap-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_32_bswap-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_bswap-2.c

-- 
2.17.1



[PATCH 1/3] Fix PR 106600: __builtin_bswap32 is not hooked up for ZBB for 32bit

2022-08-20 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is the bswap2 pattern had a check for TARGET_64BIT
but then used the X iterator. Since the X iterator is either SI or DI depending
on the setting TARGET_64BIT, there is no reason for the TARGET_64BIT.

OK? Built and tested on both riscv32-linux-gnu and riscv64-linux-gnu.

Thanks,
Andrew Pinski

gcc/ChangeLog:

PR target/106600
* config/riscv/bitmanip.md (bswap2): Remove
condition on TARGET_64BIT as X is already conditional there.

gcc/testsuite/ChangeLog:

PR target/106600
* gcc.target/riscv/zbb_32_bswap-1.c: New test.
* gcc.target/riscv/zbb_bswap-1.c: New test.

Change-Id: Iba3187e5620b0f291f7c38aab597f367b47a22c5
---
 gcc/config/riscv/bitmanip.md|  2 +-
 gcc/testsuite/gcc.target/riscv/zbb_32_bswap-1.c | 11 +++
 gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c| 11 +++
 3 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_32_bswap-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c

diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index d1570ce8508..c7ba667f87a 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -272,7 +272,7 @@ (define_insn "rotlsi3_sext"
 (define_insn "bswap2"
   [(set (match_operand:X 0 "register_operand" "=r")
 (bswap:X (match_operand:X 1 "register_operand" "r")))]
-  "TARGET_64BIT && TARGET_ZBB"
+  "TARGET_ZBB"
   "rev8\t%0,%1"
   [(set_attr "type" "bitmanip")])
 
diff --git a/gcc/testsuite/gcc.target/riscv/zbb_32_bswap-1.c 
b/gcc/testsuite/gcc.target/riscv/zbb_32_bswap-1.c
new file mode 100644
index 000..3ff7d9de409
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb_32_bswap-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv32gc_zbb -mabi=ilp32" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+int foo(int n)
+{
+  return __builtin_bswap32(n);
+}
+
+/* { dg-final { scan-assembler "rev8" } } */
+
diff --git a/gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c 
b/gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c
new file mode 100644
index 000..20feded0df2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/zbb_bswap-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gc_zbb -mabi=lp64" } */
+/* { dg-skip-if "" { *-*-* } { "-O0" } } */
+
+int foo(int n)
+{
+  return __builtin_bswap32(n);
+}
+
+/* { dg-final { scan-assembler "rev8" } } */
+
-- 
2.17.1



[PATCH 3/3] Fix PR 106690: enable effective_target_bswap for RISCV targets with ZBB enabled by default

2022-08-20 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

While looking for testcases to quickly test, I Noticed that
check_effective_target_bswap was not enabled for riscv when
ZBB is enabled. This patch checks if ZBB is enabled when
targeting RISCV* for bswap.

OK? Ran the testsuite for riscv32-linux-gnu both with and without ZBB enabled.

PR testsuite/106690
gcc/testsuite/ChangeLog:

* lib/target-supports.exp (check_effective_target_bswap):
Return true if riscv and ZBB ISA extension is enabled.

Change-Id: I521c91e7fc1a54faa0c8399b685248690022278b
---
 gcc/testsuite/lib/target-supports.exp | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index 04a2a8e8659..0f1e1af31e9 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -8646,6 +8646,13 @@ proc check_effective_target_bswap { } {
 || [istarget powerpc*-*-*]
 || [istarget rs6000-*-*]
 || [istarget s390*-*-*]
+|| ([istarget riscv*-*-*]
+&& [check_no_compiler_messages_nocache riscv_zbb object {
+#if __riscv_zbb  <= 0
+#error ZBB is not enabled
+#endif
+int i;
+} ""])
 || ([istarget arm*-*-*]
 && [check_no_compiler_messages_nocache arm_v6_or_later object {
 #if __ARM_ARCH < 6
-- 
2.17.1



[PATCH] Fix target/101934: aarch64 memset code creates unaligned stores for -mstrict-align

2021-08-31 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is the aarch64_expand_setmem code did not check
STRICT_ALIGNMENT if it is creating an overlapping store.
This patch adds that check and the testcase works.

gcc/ChangeLog:

PR target/101934
* config/aarch64/aarch64.c (aarch64_expand_setmem):
Check STRICT_ALIGNMENT before creating an overlapping
store.

gcc/testsuite/ChangeLog:

PR target/101934
* gcc.target/aarch64/memset-strict-align-1.c: New test.
---
 gcc/config/aarch64/aarch64.c  |  4 +--
 .../aarch64/memset-strict-align-1.c   | 28 +++
 2 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/memset-strict-align-1.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3213585a588..26d59ba1e13 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23566,8 +23566,8 @@ aarch64_expand_setmem (rtx *operands)
   /* Do certain trailing copies as overlapping if it's going to be
 cheaper.  i.e. less instructions to do so.  For instance doing a 15
 byte copy it's more efficient to do two overlapping 8 byte copies than
-8 + 4 + 2 + 1.  */
-  if (n > 0 && n < copy_limit / 2)
+8 + 4 + 2 + 1.  Only do this when -mstrict-align is not supplied.  */
+  if (n > 0 && n < copy_limit / 2 && !STRICT_ALIGNMENT)
{
  next_mode = smallest_mode_for_size (n, MODE_INT);
  int n_bits = GET_MODE_BITSIZE (next_mode).to_constant ();
diff --git a/gcc/testsuite/gcc.target/aarch64/memset-strict-align-1.c 
b/gcc/testsuite/gcc.target/aarch64/memset-strict-align-1.c
new file mode 100644
index 000..5cdc8a44968
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/memset-strict-align-1.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -mstrict-align" } */
+
+struct s { char x[95]; };
+void foo (struct s *);
+void bar (void) { struct s s1 = {}; foo (&s1); }
+
+/* memset (s1 = {}, sizeof = 95) should be expanded out
+   such that there are no overlap stores when -mstrict-align
+   is in use.
+   so 2 pair 16 bytes stores (64 bytes).
+   1 16 byte stores
+   1 8 byte store
+   1 4 byte store
+   1 2 byte store
+   1 1 byte store
+   */
+
+/* { dg-final { scan-assembler-times "stp\tq" 2 } } */
+/* { dg-final { scan-assembler-times "str\tq" 1 } } */
+/* { dg-final { scan-assembler-times "str\txzr" 1 } } */
+/* { dg-final { scan-assembler-times "str\twzr" 1 } } */
+/* { dg-final { scan-assembler-times "strh\twzr" 1 } } */
+/* { dg-final { scan-assembler-times "strb\twzr" 1 } } */
+
+/* Also one store pair for the frame-pointer and the LR. */
+/* { dg-final { scan-assembler-times "stp\tx" 1 } } */
+
-- 
2.17.1



[PATCH] Add MIPS Linux support to gcc.misc-tests/linkage.c (testsuite/51748)

2021-08-31 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This adds MIPS Linux support to gcc.misc-tests/linkage.exp.  Basically
copying what was done for MIPS IRIX and changing the options to be correct.

OK?

gcc/testsuite/ChangeLog:

PR testsuite/51748
* gcc.misc-tests/linkage.exp: Add mips*-linux-* support.
---
 gcc/testsuite/gcc.misc-tests/linkage.exp | 12 
 1 file changed, 12 insertions(+)

diff --git a/gcc/testsuite/gcc.misc-tests/linkage.exp 
b/gcc/testsuite/gcc.misc-tests/linkage.exp
index afed2b811c9..2cb109e776e 100644
--- a/gcc/testsuite/gcc.misc-tests/linkage.exp
+++ b/gcc/testsuite/gcc.misc-tests/linkage.exp
@@ -38,6 +38,18 @@ if { [isnative] && ![is_remote host] } then {
 
# Need to ensure ABI for native compiler matches gcc
set native_cflags ""
+   if  [istarget "mips*-linux*"] {
+   set file_string [exec file "linkage-x.o"]
+   if [ string match "*64*" $file_string ] {
+   set native_cflags "-mabi=64"
+   }
+   if [ string match "*ELF 32*" $file_string ] {
+   set native_cflags "-mabi=32"
+   }
+   if [ string match "*N32*" $file_string ] {
+   set native_cflags "-mabi=n32"
+   }
+   }
if  [istarget "sparc*-sun-solaris2*"] {
set file_string [exec file "linkage-x.o"]
if [ string match "*64*" $file_string ] {
-- 
2.17.1



[PATCH] Fix target/102173 ICE after error recovery

2021-09-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

After the recent r12-3278-823685221de986a change, the testcase
gcc.target/aarch64/sve/acle/general-c/type_redef_1.c started
to ICE as the code was not ready for error_mark_node in the
type.  This fixes that and the testcase now passes.

gcc/ChangeLog:

* config/aarch64/aarch64-sve-builtins.cc (register_vector_type):
Handle error_mark_node as the type of the type_decl.
---
 gcc/config/aarch64/aarch64-sve-builtins.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc 
b/gcc/config/aarch64/aarch64-sve-builtins.cc
index f71b287570e..bc92213665c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -3416,6 +3416,7 @@ register_vector_type (vector_type_index type)
  installing an incorrect type.  */
   if (decl
   && TREE_CODE (decl) == TYPE_DECL
+  && TREE_TYPE (decl) != error_mark_node
   && TYPE_MAIN_VARIANT (TREE_TYPE (decl)) == vectype)
 vectype = TREE_TYPE (decl);
   acle_vector_types[0][type] = vectype;
-- 
2.17.1



[PATCH] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple

2021-09-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
we are not going to error out. It fixes the problem by the removal
of the function from the IR.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
New function.
(aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK.
(aarch64_general_gimple_fold_builtin): Likewise.
---
 gcc/config/aarch64/aarch64-builtins.c | 35 +++
 1 file changed, 35 insertions(+)

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index f6b41d9c200..d4414373aa4 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -29,6 +29,7 @@
 #include "rtl.h"
 #include "tree.h"
 #include "gimple.h"
+#include "ssa.h"
 #include "memmodel.h"
 #include "tm_p.h"
 #include "expmed.h"
@@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
   return NULL_TREE;
 }
 
+/* Return true if the lane check can be removed as there is no
+   error going to be emitted.  */
+static bool
+aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
+{
+  if (TREE_CODE (arg0) != INTEGER_CST)
+return false;
+  if (TREE_CODE (arg1) != INTEGER_CST)
+return false;
+  if (TREE_CODE (arg2) != INTEGER_CST)
+return false;
+
+  auto totalsize = wi::to_widest (arg0);
+  auto elementsize = wi::to_widest (arg1);
+  if (totalsize == 0 || elementsize == 0)
+return false;
+  auto lane = wi::to_widest (arg2);
+  auto high = wi::udiv_trunc (totalsize, elementsize);
+  return wi::ltu_p (lane, high);
+}
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   case AARCH64_SIMD_BUILTIN_##T##_##N##A:
@@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree 
type,
   VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
   VAR1 (UNOP, floatv2di, 2, ALL, v2df)
return fold_build1 (FLOAT_EXPR, type, args[0]);
+  case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+   if (n_args == 3
+   && aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+ return fold_convert (void_type_node, integer_zero_node);
+   break;
   default:
break;
 }
@@ -2440,6 +2467,14 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, 
gcall *stmt)
}
  break;
}
+case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+  if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+   {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ new_stmt = gimple_build_nop ();
+   }
+  break;
 default:
   break;
 }
-- 
2.17.1



[PATCH] Fix some GC issues in the aarch64 back-end.

2021-09-02 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

I got some ICEs in my latest testsing while running the libstdc++ testsuite.
I had noticed the problem was connected to types and had just touched the
builtins code but nothing which could have caused this and I looked for
some types/variables that were not being marked with GTY.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* config/aarch64/aarch64-builtins.c (struct aarch64_simd_type_info):
Mark with GTY.
(aarch64_simd_types): Likewise.
(aarch64_simd_intOI_type_node): Likewise.
(aarch64_simd_intCI_type_node): Likewise.
(aarch64_simd_intXI_type_node): Likewise.
* config/aarch64/aarch64.h (aarch64_fp16_type_node): Likewise.
(aarch64_fp16_ptr_type_node): Likewise.
(aarch64_bf16_type_node): Likewise.
(aarch64_bf16_ptr_type_node): Likewise.
---
 gcc/config/aarch64/aarch64-builtins.c | 10 +-
 gcc/config/aarch64/aarch64.h  |  8 
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index d441437..9f37a71 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -594,7 +594,7 @@ enum aarch64_simd_type
 };
 #undef ENTRY
 
-struct aarch64_simd_type_info
+struct GTY(()) aarch64_simd_type_info
 {
   enum aarch64_simd_type type;
 
@@ -626,14 +626,14 @@ struct aarch64_simd_type_info
 
 #define ENTRY(E, M, Q, G)  \
   {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
-static struct aarch64_simd_type_info aarch64_simd_types [] = {
+static GTY(()) struct aarch64_simd_type_info aarch64_simd_types [] = {
 #include "aarch64-simd-builtin-types.def"
 };
 #undef ENTRY
 
-static tree aarch64_simd_intOI_type_node = NULL_TREE;
-static tree aarch64_simd_intCI_type_node = NULL_TREE;
-static tree aarch64_simd_intXI_type_node = NULL_TREE;
+static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
+static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
+static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
 
 /* The user-visible __fp16 type, and a pointer to that type.  Used
across the back-end.  */
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index bfffbcd..a5ba6c2 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1262,13 +1262,13 @@ extern const char *host_detect_local_cpu (int argc, 
const char **argv);
 
 /* This type is the user-visible __fp16, and a pointer to that type.  We
need it in many places in the backend.  Defined in aarch64-builtins.c.  */
-extern tree aarch64_fp16_type_node;
-extern tree aarch64_fp16_ptr_type_node;
+extern GTY(()) tree aarch64_fp16_type_node;
+extern GTY(()) tree aarch64_fp16_ptr_type_node;
 
 /* This type is the user-visible __bf16, and a pointer to that type.  Defined
in aarch64-builtins.c.  */
-extern tree aarch64_bf16_type_node;
-extern tree aarch64_bf16_ptr_type_node;
+extern GTY(()) tree aarch64_bf16_type_node;
+extern GTY(()) tree aarch64_bf16_ptr_type_node;
 
 /* The generic unwind code in libgcc does not initialize the frame pointer.
So in order to unwind a function using a frame pointer, the very first
-- 
1.8.3.1



[PATCHv2] [aarch64] Fix target/95969: __builtin_aarch64_im_lane_boundsi interferes with gimple

2021-09-03 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

This patch adds simple folding of __builtin_aarch64_im_lane_boundsi where
we are not going to error out. It fixes the problem by the removal
of the function from the IR.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR target/95969
* config/aarch64/aarch64-builtins.c (aarch64_fold_builtin_lane_check):
New function.
(aarch64_general_fold_builtin): Handle AARCH64_SIMD_BUILTIN_LANE_CHECK.
(aarch64_general_gimple_fold_builtin): Likewise.

gcc/testsuite/ChangeLog:

PR target/95969
* gcc.target/aarch64/lane-bound-1.c: New test.
* gcc.target/aarch64/lane-bound-2.c: New test.
---
 gcc/config/aarch64/aarch64-builtins.c | 35 +++
 .../gcc.target/aarch64/lane-bound-1.c | 14 
 .../gcc.target/aarch64/lane-bound-2.c | 10 ++
 3 files changed, 59 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lane-bound-2.c

diff --git a/gcc/config/aarch64/aarch64-builtins.c 
b/gcc/config/aarch64/aarch64-builtins.c
index eef9fc0f444..119f67d4e4c 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -29,6 +29,7 @@
 #include "rtl.h"
 #include "tree.h"
 #include "gimple.h"
+#include "ssa.h"
 #include "memmodel.h"
 #include "tm_p.h"
 #include "expmed.h"
@@ -2333,6 +2334,27 @@ aarch64_general_builtin_rsqrt (unsigned int fn)
   return NULL_TREE;
 }
 
+/* Return true if the lane check can be removed as there is no
+   error going to be emitted.  */
+static bool
+aarch64_fold_builtin_lane_check (tree arg0, tree arg1, tree arg2)
+{
+  if (TREE_CODE (arg0) != INTEGER_CST)
+return false;
+  if (TREE_CODE (arg1) != INTEGER_CST)
+return false;
+  if (TREE_CODE (arg2) != INTEGER_CST)
+return false;
+
+  auto totalsize = wi::to_widest (arg0);
+  auto elementsize = wi::to_widest (arg1);
+  if (totalsize == 0 || elementsize == 0)
+return false;
+  auto lane = wi::to_widest (arg2);
+  auto high = wi::udiv_trunc (totalsize, elementsize);
+  return wi::ltu_p (lane, high);
+}
+
 #undef VAR1
 #define VAR1(T, N, MAP, FLAG, A) \
   case AARCH64_SIMD_BUILTIN_##T##_##N##A:
@@ -2353,6 +2375,11 @@ aarch64_general_fold_builtin (unsigned int fcode, tree 
type,
   VAR1 (UNOP, floatv4si, 2, ALL, v4sf)
   VAR1 (UNOP, floatv2di, 2, ALL, v2df)
return fold_build1 (FLOAT_EXPR, type, args[0]);
+  case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+   gcc_assert (n_args == 3);
+   if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+ return void_node;
+   break;
   default:
break;
 }
@@ -2440,6 +2467,14 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, 
gcall *stmt)
}
  break;
}
+case AARCH64_SIMD_BUILTIN_LANE_CHECK:
+  if (aarch64_fold_builtin_lane_check (args[0], args[1], args[2]))
+   {
+ unlink_stmt_vdef (stmt);
+ release_defs (stmt);
+ new_stmt = gimple_build_nop ();
+   }
+  break;
 default:
   break;
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c 
b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
new file mode 100644
index 000..bbbe679fd80
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-1.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-optimized" } */
+#include 
+
+void
+f (float32x4_t **ptr)
+{
+  float32x4_t res = vsetq_lane_f32 (0.0f, **ptr, 0);
+  **ptr = res;
+}
+/* GCC should be able to remove the call to "__builtin_aarch64_im_lane_boundsi"
+   and optimize out the second load from *ptr.  */
+/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 
"optimized" } } */
+/* { dg-final { scan-tree-dump-times " = \\\*ptr_" 1 "optimized" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c 
b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
new file mode 100644
index 000..923c94687c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/lane-bound-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+void
+f (void)
+{
+  __builtin_aarch64_im_lane_boundsi (16, 4, 0);
+  __builtin_aarch64_im_lane_boundsi (8, 8, 0);
+}
+/* GCC should be able to optimize these out before gimplification. */
+/* { dg-final { scan-tree-dump-times "__builtin_aarch64_im_lane_boundsi" 0 
"original" } } */
-- 
2.17.1



[PATCH] Fix PR tree-opt/63184: add simplification of (& + A) != (& + B)

2021-09-05 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

These two testcases have been failing since GCC 5 but things
have improved such that adding a simplification to match.pd
for this case is easier than before.
In the end we have the following IR:

  _5 = &a[1] + _4;
  _7 = &a + _13;
  if (_5 != _7)

So we can fold the _5 != _7 into:
(&a[1] - &a) + _4 != _13

The subtraction is folded into constant by ptr_difference_const.
In this case, the full expression gets folded into a constant
and we are able to remove the if statement.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

* match.pd: Add simplification of pointer_diff of two pointer_plus
with addr_expr in the first operand of each pointer_plus.
Add simplificatoin of ne/eq of two pointer_plus with addr_expr
in the first operand of each pointer_plus.

gcc/testsuite/ChangeLog:

* c-c++-common/pr19807-2.c: Enable for all targets and remove the xfail.
* c-c++-common/pr19807-3.c: Likewise.
---
 gcc/match.pd   | 15 +++
 gcc/testsuite/c-c++-common/pr19807-2.c |  5 ++---
 gcc/testsuite/c-c++-common/pr19807-3.c |  5 ++---
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index f920bc4b7c1..cc7809dfe0f 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -2063,6 +2063,21 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(if (ptr_difference_const (@0, @1, &diff))
 { build_int_cst_type (type, diff); }
 
+/* (&a+b) - (&a[1] + c) -> sizeof(a[0]) + (b - c) */
+(simplify
+ (pointer_diff (pointer_plus ADDR_EXPR@0 @1) (pointer_plus ADDR_EXPR@2 @3))
+ (with { poly_int64 diff; }
+   (if (ptr_difference_const (@0, @2, &diff))
+(plus { build_int_cst_type (type, diff); } (convert (minus @1 @3))
+
+/* (&a+b) !=/== (&a[1] + c) ->  sizeof(a[0]) + b !=/== c */
+(for neeq (ne eq)
+ (simplify
+  (neeq (pointer_plus ADDR_EXPR@0 @1) (pointer_plus ADDR_EXPR@2 @3))
+   (with { poly_int64 diff; tree inner_type = TREE_TYPE (@1);}
+(if (ptr_difference_const (@0, @2, &diff))
+ (neeq (plus { build_int_cst_type (inner_type, diff); } @1) @3)
+
 /* Canonicalize (T *)(ptr - ptr-cst) to &MEM[ptr + -ptr-cst].  */
 (simplify
  (convert (pointer_diff @0 INTEGER_CST@1))
diff --git a/gcc/testsuite/c-c++-common/pr19807-2.c 
b/gcc/testsuite/c-c++-common/pr19807-2.c
index d2c010140d0..529b9c97322 100644
--- a/gcc/testsuite/c-c++-common/pr19807-2.c
+++ b/gcc/testsuite/c-c++-common/pr19807-2.c
@@ -1,5 +1,4 @@
-/* Some targets can optimize this on RTL.  */
-/* { dg-do link { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-do link } */
 /* { dg-options "-O -fdump-tree-optimized" } */
 
 extern void link_error(void);
@@ -12,4 +11,4 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-not "link_error" "optimized" { xfail *-*-* } } 
} */
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
diff --git a/gcc/testsuite/c-c++-common/pr19807-3.c 
b/gcc/testsuite/c-c++-common/pr19807-3.c
index bb7f9827725..31c88f3b850 100644
--- a/gcc/testsuite/c-c++-common/pr19807-3.c
+++ b/gcc/testsuite/c-c++-common/pr19807-3.c
@@ -1,5 +1,4 @@
-/* Some targets can optimize this on RTL.  */
-/* { dg-do link { target { x86_64-*-* i?86-*-* } } } */
+/* { dg-do link } */
 /* { dg-options "-O -fdump-tree-optimized" } */
 
 extern void link_error(void);
@@ -12,4 +11,4 @@ int main()
   return 0;
 }
 
-/* { dg-final { scan-tree-dump-not "link_error" "optimized" { xfail *-*-* } } 
} */
+/* { dg-final { scan-tree-dump-not "link_error" "optimized" } } */
-- 
2.17.1



[PATCH] Fix PR lto/49664: liblto_plugin.so exports too many symbols

2021-09-12 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So right now liblto_plugin.so exports many libiberty symbols and
simple_object file symbols but really it just needs to export onload.

This fixes the problem by using "-export-symbols-regex onload" on
the libtool link line.

lto-plugin/ChangeLog:

* Makefile.am: Export only onload.
* Makefile.in: Regenerate.
---
 lto-plugin/Makefile.am | 3 ++-
 lto-plugin/Makefile.in | 7 ---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/lto-plugin/Makefile.am b/lto-plugin/Makefile.am
index 8b20e1d1d87..988d7a78294 100644
--- a/lto-plugin/Makefile.am
+++ b/lto-plugin/Makefile.am
@@ -21,7 +21,8 @@ in_gcc_libs = $(foreach lib, $(libexecsub_LTLIBRARIES), 
$(gcc_build_dir)/$(lib))
 liblto_plugin_la_SOURCES = lto-plugin.c
 # Note that we intentionally override the bindir supplied by ACX_LT_HOST_FLAGS.
 liblto_plugin_la_LDFLAGS = $(AM_LDFLAGS) \
-   $(lt_host_flags) -module -avoid-version -bindir $(libexecsubdir)
+   $(lt_host_flags) -module -avoid-version -bindir $(libexecsubdir) \
+   -export-symbols-regex onload
 # Can be simplified when libiberty becomes a normal convenience library.
 libiberty = $(with_libiberty)/libiberty.a
 libiberty_noasan = $(with_libiberty)/noasan/libiberty.a
diff --git a/lto-plugin/Makefile.in b/lto-plugin/Makefile.in
index 20611c6b1e6..f8df31bb1e8 100644
--- a/lto-plugin/Makefile.in
+++ b/lto-plugin/Makefile.in
@@ -323,6 +323,7 @@ prefix = @prefix@
 program_transform_name = @program_transform_name@
 psdir = @psdir@
 real_target_noncanonical = @real_target_noncanonical@
+runstatedir = @runstatedir@
 sbindir = @sbindir@
 sharedstatedir = @sharedstatedir@
 srcdir = @srcdir@
@@ -350,9 +351,9 @@ libexecsub_LTLIBRARIES = liblto_plugin.la
 in_gcc_libs = $(foreach lib, $(libexecsub_LTLIBRARIES), 
$(gcc_build_dir)/$(lib))
 liblto_plugin_la_SOURCES = lto-plugin.c
 # Note that we intentionally override the bindir supplied by ACX_LT_HOST_FLAGS.
-liblto_plugin_la_LDFLAGS = $(AM_LDFLAGS) $(lt_host_flags) -module 
-avoid-version \
-   -bindir $(libexecsubdir) $(if $(wildcard \
-   $(libiberty_noasan)),, $(if $(wildcard \
+liblto_plugin_la_LDFLAGS = $(AM_LDFLAGS) $(lt_host_flags) -module \
+   -avoid-version -bindir $(libexecsubdir) -export-symbols-regex \
+   onload $(if $(wildcard $(libiberty_noasan)),, $(if $(wildcard \
$(libiberty_pic)),,-Wc,$(libiberty)))
 # Can be simplified when libiberty becomes a normal convenience library.
 libiberty = $(with_libiberty)/libiberty.a
-- 
2.17.1



[PATCH] Remove m32r{,le}-*-linux* support from GCC

2021-09-13 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

m32r support never made it to glibc and the support for the Linux kernel
was removed with 4.18. It does not remove much but no reason to keep
around a port which never worked or one which the support in other
projects is gone.

OK? Checked to make sure m32r-linux and m32rle-linux were rejected
when building.

contrib/ChangeLog:

* config-list.mk: Remove m32r-linux and m32rle-linux
from the list.

gcc/ChangeLog:

* config.gcc: Add m32r-*-linux* and m32rle-*-linux*
to the Unsupported targets list.
Remove support for m32r-*-linux* and m32rle-*-linux*.
* config/m32r/linux.h: Removed.
* config/m32r/t-linux: Removed.

libgcc/ChangeLog:

* config.host: Remove m32r-*-linux* and m32rle-*-linux*.
* config/m32r/libgcc-glibc.ver: Removed.
* config/m32r/t-linux: Removed.
---
 contrib/config-list.mk  |  2 +-
 gcc/config.gcc  | 18 +---
 gcc/config/m32r/linux.h | 91 -
 gcc/config/m32r/t-linux | 20 
 libgcc/config.host  |  6 ---
 libgcc/config/m32r/libgcc-glibc.ver | 48 ---
 libgcc/config/m32r/t-linux  |  5 --
 7 files changed, 3 insertions(+), 187 deletions(-)
 delete mode 100644 gcc/config/m32r/linux.h
 delete mode 100644 gcc/config/m32r/t-linux
 delete mode 100644 libgcc/config/m32r/libgcc-glibc.ver
 delete mode 100644 libgcc/config/m32r/t-linux

diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index b9e9dd0..a93d1db 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -57,7 +57,7 @@ LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \
   i686-cygwinOPT-enable-threads=yes i686-mingw32crt ia64-elf \
   ia64-freebsd6 ia64-linux ia64-hpux ia64-hp-vms iq2000-elf lm32-elf \
   lm32-rtems lm32-uclinux m32c-rtems m32c-elf m32r-elf m32rle-elf \
-  m32r-linux m32rle-linux m68k-elf m68k-netbsdelf \
+  m68k-elf m68k-netbsdelf \
   m68k-openbsd m68k-uclinux m68k-linux m68k-rtems \
   mcore-elf microblaze-linux microblaze-elf \
   mips-netbsd \
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ccf41f6..f976038 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -276,6 +276,8 @@ case ${target} in
  | crisv32-*-* \
  | i[34567]86-go32-*   \
  | i[34567]86-*-go32*  \
+ | m32r-*-linux*   \
+ | m32rle-*-linux* \
  | m68k-*-uclinuxoldabi*   \
  | mips64orion*-*-rtems*   \
  | pdp11-*-bsd \
@@ -2301,22 +2303,6 @@ m32r-*-elf*)
 m32rle-*-elf*)
tm_file="elfos.h newlib-stdint.h m32r/little.h ${tm_file}"
;;
-m32r-*-linux*)
-   tm_file="elfos.h gnu-user.h linux.h glibc-stdint.h ${tm_file} 
m32r/linux.h"
-   tmake_file="${tmake_file} m32r/t-linux t-slibgcc"
-   gnu_ld=yes
-   if test x$enable_threads = xyes; then
-   thread_file='posix'
-   fi
-   ;;
-m32rle-*-linux*)
-   tm_file="elfos.h gnu-user.h linux.h glibc-stdint.h m32r/little.h 
${tm_file} m32r/linux.h"
-   tmake_file="${tmake_file} m32r/t-linux t-slibgcc"
-   gnu_ld=yes
-   if test x$enable_threads = xyes; then
-   thread_file='posix'
-   fi
-   ;;
 m68k-*-elf* | fido-*-elf*)
case ${target} in
fido-*-elf*)
diff --git a/gcc/config/m32r/linux.h b/gcc/config/m32r/linux.h
deleted file mode 100644
index 4fdebbc..000
--- a/gcc/config/m32r/linux.h
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Definitions for Renesas M32R running Linux-based GNU systems using ELF.
-   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   .  */
-
-#undef  SIZE_TYPE
-#define SIZE_TYPE "unsigned int"
- 
-#undef  PTRDIFF_TYPE
-#define PTRDIFF_TYPE "int"
-  
-#undef  WCHAR_TYPE
-#define WCHAR_TYPE "long int"
-   
-#undef  WCHAR_TYPE_SIZE
-#define WCHAR_TYPE_SIZE BITS_PER_WORD
-
-/* Provide a LINK_SPEC appropriate for Linux.  Here we provide support
-   for the special GCC options -static and -shared, which allow us to
-   link things in one of these three modes by applying the appropriate
-   combinations of options at link-time.
-
-   When the -shared link option is used a final link is not being
-   done.  */
-
-#define GLIBC_DYNAMIC_LINKER "/lib/l

[PATCH] Fix PR 67102: Add libstdc++ dependancy to libffi

2021-09-15 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The error message is obvious -funconfigured-libstdc++-v3 is used
on the g++ command line.  So we just add the dependancy.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

ChangeLog:

* Makefile.def: Have configure-target-libffi depend on
all-target-libstdc++-v3.
* Makefile.in: Regenerate.
---
 Makefile.def | 1 +
 Makefile.in  | 1 +
 2 files changed, 2 insertions(+)

diff --git a/Makefile.def b/Makefile.def
index de3e0052106..90316364d01 100644
--- a/Makefile.def
+++ b/Makefile.def
@@ -592,6 +592,7 @@ dependencies = { module=configure-target-fastjar; 
on=configure-target-zlib; };
 dependencies = { module=all-target-fastjar; on=all-target-zlib; };
 dependencies = { module=configure-target-libgo; on=configure-target-libffi; };
 dependencies = { module=configure-target-libgo; on=all-target-libstdc++-v3; };
+dependencies = { module=configure-target-libffi; on=all-target-libstdc++-v3; };
 dependencies = { module=all-target-libgo; on=all-target-libbacktrace; };
 dependencies = { module=all-target-libgo; on=all-target-libffi; };
 dependencies = { module=all-target-libgo; on=all-target-libatomic; };
diff --git a/Makefile.in b/Makefile.in
index 61af99dc75a..81b26c7177e 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -61261,6 +61261,7 @@ all-bison: maybe-all-intl
 all-flex: maybe-all-intl
 all-m4: maybe-all-intl
 configure-target-libgo: maybe-all-target-libstdc++-v3
+configure-target-libffi: maybe-all-target-libstdc++-v3
 configure-target-liboffloadmic: maybe-configure-target-libgomp
 all-target-liboffloadmic: maybe-all-target-libgomp
 configure-target-newlib: maybe-all-binutils
-- 
2.17.1



[PATCH 1/2] Fix PR bootstrap/102389: --with-build-config=bootstrap-lto is broken

2021-09-17 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So the problem here is that now the lto-plugin requires NM that works
with LTO to work so we need to pass down NM just like we do for ranlib
and ar.

OK? Bootstrapped and tested with --with-build-config=bootstrap-lto on 
aarch64-linux-gnu.
Note you need to use binutils 2.35 or later too due to 
ttps://sourceware.org/PR25355
(I will submit another patch to improve the installation instructions too).

config/ChangeLog:

PR bootstrap/102389
* bootstrap-lto-lean.mk: Handle NM like RANLIB AND AR.
* bootstrap-lto.mk: Likewise.
---
 config/bootstrap-lto-lean.mk | 6 --
 config/bootstrap-lto.mk  | 6 --
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/config/bootstrap-lto-lean.mk b/config/bootstrap-lto-lean.mk
index 79cea50a4c6..42cb3394c70 100644
--- a/config/bootstrap-lto-lean.mk
+++ b/config/bootstrap-lto-lean.mk
@@ -9,9 +9,11 @@ STAGEfeedback_CFLAGS += -flto=jobserver
 # assumes the host supports the linker plugin
 LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
 LTO_RANLIB = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ranlib$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
+LTO_NM = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-nm$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
 
 LTO_EXPORTS = AR="$(LTO_AR)"; export AR; \
- RANLIB="$(LTO_RANLIB)"; export RANLIB;
-LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)"
+ RANLIB="$(LTO_RANLIB)"; export RANLIB; \
+ NM="$(LTO_NM)"; export NM;
+LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)" NM="$(LTO_NM)"
 
 do-compare = /bin/true
diff --git a/config/bootstrap-lto.mk b/config/bootstrap-lto.mk
index 4de07e5b226..1ddb1d870ba 100644
--- a/config/bootstrap-lto.mk
+++ b/config/bootstrap-lto.mk
@@ -9,10 +9,12 @@ STAGEfeedback_CFLAGS += -flto=jobserver -frandom-seed=1
 # assumes the host supports the linker plugin
 LTO_AR = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ar$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
 LTO_RANLIB = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-ranlib$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
+LTO_NM = $$r/$(HOST_SUBDIR)/prev-gcc/gcc-nm$(exeext) 
-B$$r/$(HOST_SUBDIR)/prev-gcc/
 
 LTO_EXPORTS = AR="$(LTO_AR)"; export AR; \
- RANLIB="$(LTO_RANLIB)"; export RANLIB;
-LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)"
+ RANLIB="$(LTO_RANLIB)"; export RANLIB; \
+ NM="$(LTO_NM)"; export NM;
+LTO_FLAGS_TO_PASS = AR="$(LTO_AR)" RANLIB="$(LTO_RANLIB)" NM="$(LTO_NM)"
 
 do-compare = $(SHELL) $(srcdir)/contrib/compare-lto $$f1 $$f2
 extra-compare = gcc/lto1$(exeext)
-- 
2.17.1



[PATCH 2/2] Update the section on binutils version

2021-09-17 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

LTO usage requires binutils 2.35 or newer due to
https://sourceware.org/PR25355.
This adds a note in the prerequisites page about it.

Ok?

gcc/ChangeLog:

* doc/install.texi: Add note about
binutils 2.35 is required for LTO usage.
---
 gcc/doc/install.texi | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 88e453c3f6b..a141507c7b0 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -325,6 +325,9 @@ Necessary in some circumstances, optional in others.  See 
the
 host/target specific instructions for your platform for the exact
 requirements.
 
+Note binutils 2.35 or newer is required for LTO to work correctly
+with GNU libtool that includes doing a bootstrap with LTO enabled.
+
 @item gzip version 1.2.4 (or later) or
 @itemx bzip2 version 1.0.2 (or later)
 
-- 
2.17.1



[PATCH] Fix middle-end/102395: reg_class having only NO_REGS and ALL_REGS.

2021-09-18 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So this is a simple fix is to just add to the assert that
sclass and dclass are both greater than or equal to NO_REGS.
NO_REGS is documented as the first register class so it should
have the value of 0.

gcc/ChangeLog:

* lra-constraints.c (check_and_process_move): Assert
that dclass and sclass are greater than or equal to NO_REGS.
---
 gcc/lra-constraints.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index a56080bee35..4d734548c38 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1276,7 +1276,7 @@ check_and_process_move (bool *change_p, bool *sec_mem_p 
ATTRIBUTE_UNUSED)
   sclass = dclass = NO_REGS;
   if (REG_P (dreg))
 dclass = get_reg_class (REGNO (dreg));
-  gcc_assert (dclass < LIM_REG_CLASSES);
+  gcc_assert (dclass < LIM_REG_CLASSES && dclass >= NO_REGS);
   if (dclass == ALL_REGS)
 /* ALL_REGS is used for new pseudos created by transformations
like reload of SUBREG_REG (see function
@@ -1288,7 +1288,7 @@ check_and_process_move (bool *change_p, bool *sec_mem_p 
ATTRIBUTE_UNUSED)
 return false;
   if (REG_P (sreg))
 sclass = get_reg_class (REGNO (sreg));
-  gcc_assert (sclass < LIM_REG_CLASSES);
+  gcc_assert (sclass < LIM_REG_CLASSES && sclass >= NO_REGS);
   if (sclass == ALL_REGS)
 /* See comments above.  */
 return false;
-- 
2.17.1



[PATCH] c: [PR32122] Require pointer types for computed gotos

2021-09-19 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

So GCC has always accepted non-pointer types in computed gotos but
that was wrong based on the documentation:
Any expression of type void * is allowed.

So this fixes the problem by requiring the type to
be a pointer type.

OK? Bootstrapped and tested on x86_64-linux-gnu with no regressions.

PR c/32122

gcc/c/ChangeLog:

* c-parser.c (c_parser_statement_after_labels): Pass
the c_expr instead of the tree to c_finish_goto_ptr.
* c-typeck.c (c_finish_goto_ptr): Change the second
argument type to c_expr.
* c-tree.h (c_finish_goto_ptr): Likewise.
Error out if the expression was not of a pointer type.

gcc/testsuite/ChangeLog:

* gcc.dg/comp-goto-5.c: New test.
* gcc.dg/comp-goto-6.c: New test.
---
 gcc/c/c-parser.c   |  2 +-
 gcc/c/c-tree.h |  2 +-
 gcc/c/c-typeck.c   | 11 ++-
 gcc/testsuite/gcc.dg/comp-goto-5.c | 11 +++
 gcc/testsuite/gcc.dg/comp-goto-6.c |  6 ++
 5 files changed, 29 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/comp-goto-5.c
 create mode 100644 gcc/testsuite/gcc.dg/comp-goto-6.c

diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index fb1399e300d..bcd8a05489f 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -6141,7 +6141,7 @@ c_parser_statement_after_labels (c_parser *parser, bool 
*if_p,
  c_parser_consume_token (parser);
  val = c_parser_expression (parser);
  val = convert_lvalue_to_rvalue (loc, val, false, true);
- stmt = c_finish_goto_ptr (loc, val.value);
+ stmt = c_finish_goto_ptr (loc, val);
}
  else
c_parser_error (parser, "expected identifier or %<*%>");
diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h
index d50d0cb7f2d..a046c6b0926 100644
--- a/gcc/c/c-tree.h
+++ b/gcc/c/c-tree.h
@@ -746,7 +746,7 @@ extern tree c_finish_expr_stmt (location_t, tree);
 extern tree c_finish_return (location_t, tree, tree);
 extern tree c_finish_bc_stmt (location_t, tree, bool);
 extern tree c_finish_goto_label (location_t, tree);
-extern tree c_finish_goto_ptr (location_t, tree);
+extern tree c_finish_goto_ptr (location_t, c_expr val);
 extern tree c_expr_to_decl (tree, bool *, bool *);
 extern tree c_finish_omp_construct (location_t, enum tree_code, tree, tree);
 extern tree c_finish_oacc_data (location_t, tree, tree);
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 49d1bb067a0..b472e448011 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -10783,10 +10783,19 @@ c_finish_goto_label (location_t loc, tree label)
the GOTO.  */
 
 tree
-c_finish_goto_ptr (location_t loc, tree expr)
+c_finish_goto_ptr (location_t loc, c_expr val)
 {
+  tree expr = val.value;
   tree t;
   pedwarn (loc, OPT_Wpedantic, "ISO C forbids %");
+  if (expr != error_mark_node
+  && !POINTER_TYPE_P (TREE_TYPE (expr))
+  && !null_pointer_constant_p (expr))
+{
+  error_at (val.get_location (),
+   "computed goto must be pointer type");
+  expr = build_zero_cst (ptr_type_node);
+}
   expr = c_fully_fold (expr, false, NULL);
   expr = convert (ptr_type_node, expr);
   t = build1 (GOTO_EXPR, void_type_node, expr);
diff --git a/gcc/testsuite/gcc.dg/comp-goto-5.c 
b/gcc/testsuite/gcc.dg/comp-goto-5.c
new file mode 100644
index 000..d487729a5d4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/comp-goto-5.c
@@ -0,0 +1,11 @@
+/* PR c/32122 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+enum {a=1};
+void foo()
+{
+  goto *
+a; /* { dg-error "computed goto must be pointer type" } */
+}
+
diff --git a/gcc/testsuite/gcc.dg/comp-goto-6.c 
b/gcc/testsuite/gcc.dg/comp-goto-6.c
new file mode 100644
index 000..497f6cd76ca
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/comp-goto-6.c
@@ -0,0 +1,6 @@
+/* PR c/32122 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+void foo(void *a) { goto *1000; } /* { dg-error "computed goto must be 
pointer type" } */
+void foo1(void *a) { goto *a; }
+
-- 
2.17.1



[PATCH] Fix PR c/94726: ICE with __builtin_shuffle and changing of types

2021-09-26 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is __builtin_shuffle when called with two arguments
instead of 1, uses a SAVE_EXPR to put in for the 1st and 2nd operand
of VEC_PERM_EXPR and when we go and gimplify the SAVE_EXPR, the type
is now error_mark_node and that fails hard.
This fixes the problem by adding a simple check for type of operand
of SAVE_EXPR not to be error_mark_node.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

gcc/ChangeLog:

PR c/94726
* gimplify.c (gimplify_save_expr): Return early
if the type of val is error_mark_node.

gcc/testsuite/ChangeLog:

PR c/94726
* gcc.dg/pr94726.c: New test.
---
 gcc/gimplify.c |  3 +++
 gcc/testsuite/gcc.dg/pr94726.c | 11 +++
 2 files changed, 14 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/pr94726.c

diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 9163dcda438..943c5cb8f2d 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -6232,6 +6232,9 @@ gimplify_save_expr (tree *expr_p, gimple_seq *pre_p, 
gimple_seq *post_p)
   gcc_assert (TREE_CODE (*expr_p) == SAVE_EXPR);
   val = TREE_OPERAND (*expr_p, 0);
 
+  if (TREE_TYPE (val) == error_mark_node)
+return GS_ERROR;
+
   /* If the SAVE_EXPR has not been resolved, then evaluate it once.  */
   if (!SAVE_EXPR_RESOLVED_P (*expr_p))
 {
diff --git a/gcc/testsuite/gcc.dg/pr94726.c b/gcc/testsuite/gcc.dg/pr94726.c
new file mode 100644
index 000..d6911a644a4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr94726.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+typedef unsigned int type __attribute__ ( ( vector_size ( 2*sizeof(int) ) ) ) 
; 
+type a , b; 
+/* { dg-message "note: previous declaration" "previous declaration" { target 
*-*-* } .-1 } */
+void foo ( void ) { 
+   type var = { 2 , 2 } ; 
+   b = __builtin_shuffle ( a , var ) ;
+} 
+
+void * a [ ] = { } ; /* { dg-error "conflicting types" } */
-- 
2.17.1



[COMMITTED] Fix some testcases after my computed goto patch

2021-09-29 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

For some reason I did not see these failures in my testing.
Sorry about that.  Anyways this fixes the testcases by
adding a cast to __INTPTR_TYPE__ and then a cast to void*.

Committed after testing them on x86_64-linux-gnu.

gcc/testsuite/ChangeLog:

* gcc.c-torture/compile/920826-1.c: Fix computed goto.
* gcc.c-torture/compile/pr27863.c: Likewise.
* gcc.c-torture/compile/pr70190.c: Likewise.
* gcc.dg/torture/pr89135.c: Likewise.
* gcc.dg/torture/pr90071.c: Likewise.
* gcc.dg/vect/bb-slp-pr97709.c: Likewise.
---
 gcc/testsuite/gcc.c-torture/compile/920826-1.c | 2 +-
 gcc/testsuite/gcc.c-torture/compile/pr27863.c  | 2 +-
 gcc/testsuite/gcc.c-torture/compile/pr70190.c  | 2 +-
 gcc/testsuite/gcc.dg/torture/pr89135.c | 2 +-
 gcc/testsuite/gcc.dg/torture/pr90071.c | 2 +-
 gcc/testsuite/gcc.dg/vect/bb-slp-pr97709.c | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/gcc/testsuite/gcc.c-torture/compile/920826-1.c 
b/gcc/testsuite/gcc.c-torture/compile/920826-1.c
index c2d8843..2a175456 100644
--- a/gcc/testsuite/gcc.c-torture/compile/920826-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/920826-1.c
@@ -1,3 +1,3 @@
 /* { dg-require-effective-target indirect_jumps } */
 
-f(int*x){goto*(char)*x;}
+f(int*x){goto*(void*)(__INTPTR_TYPE__)(char)*x;}
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr27863.c 
b/gcc/testsuite/gcc.c-torture/compile/pr27863.c
index 926312e4..9dc1ab8 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr27863.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr27863.c
@@ -27,5 +27,5 @@ _loc66:;
 *++esp=(long)&&_loc119;
  _loc119:;
 SetTermStruc:
- goto *(*esp--);
+ goto *(void*)(__INTPTR_TYPE__)(*esp--);
 }
diff --git a/gcc/testsuite/gcc.c-torture/compile/pr70190.c 
b/gcc/testsuite/gcc.c-torture/compile/pr70190.c
index d3d209a..6c57b50 100644
--- a/gcc/testsuite/gcc.c-torture/compile/pr70190.c
+++ b/gcc/testsuite/gcc.c-torture/compile/pr70190.c
@@ -8,7 +8,7 @@ fn1 ()
   static char a[] = "foo";
   static void *b[] = { &&l1, &&l2 };
   goto *(b[1]);
- l1: goto *(a[0]);
+ l1: goto *(void*)(__INTPTR_TYPE__)(a[0]);
  l2: return 0;
 }
 
diff --git a/gcc/testsuite/gcc.dg/torture/pr89135.c 
b/gcc/testsuite/gcc.dg/torture/pr89135.c
index 278303f..4cf0533 100644
--- a/gcc/testsuite/gcc.dg/torture/pr89135.c
+++ b/gcc/testsuite/gcc.dg/torture/pr89135.c
@@ -28,7 +28,7 @@ h:
  g = foo ();
  *o = g;
  if (c)
-   goto *d;
+   goto *(void*)d;
}
 }
   goto *i;
diff --git a/gcc/testsuite/gcc.dg/torture/pr90071.c 
b/gcc/testsuite/gcc.dg/torture/pr90071.c
index 702c143..1c4099c 100644
--- a/gcc/testsuite/gcc.dg/torture/pr90071.c
+++ b/gcc/testsuite/gcc.dg/torture/pr90071.c
@@ -21,6 +21,6 @@ h: ++e;
 goto i;
  }
 f:
-   goto *({ d || e < 0 || e >= 2; });
+   goto *(void*)(__INTPTR_TYPE__)({ d || e < 0 || e >= 2; });
&e;
 }
diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr97709.c 
b/gcc/testsuite/gcc.dg/vect/bb-slp-pr97709.c
index 672807f..d0f3d05 100644
--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr97709.c
+++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr97709.c
@@ -12,7 +12,7 @@ e()
   void *f[] = {&&g, &&h, &&i, &&j};
   int d, c;
 j:
-  goto *a;
+  goto *(void*)(__INTPTR_TYPE__)a;
 g:
   d = 0;
 h:
-- 
1.8.3.1



[PATCH] [www] Add note about computed gotos to changes and porting guide

2021-09-29 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

Even though there is not many computed gotos in the wild and even less
that would use an integer type, it would still be a good idea to add
this new error message to both changes and the porting to guide.

OK?
---
 htdocs/gcc-12/changes.html|  6 ++--
 htdocs/gcc-12/porting_to.html | 66 +++
 2 files changed, 70 insertions(+), 2 deletions(-)
 create mode 100644 htdocs/gcc-12/porting_to.html

diff --git a/htdocs/gcc-12/changes.html b/htdocs/gcc-12/changes.html
index 1f156a9..3130710 100644
--- a/htdocs/gcc-12/changes.html
+++ b/htdocs/gcc-12/changes.html
@@ -17,11 +17,9 @@
 
 This page is a "brief" summary of some of the huge number of improvements
 in GCC 12.
-
 
 
 Note: GCC 12 has not been released yet, so this document is
@@ -31,6 +29,10 @@ a work-in-progress.
 Caveats
 
   
+C:
+Computed gotos require a pointer type now.
+  
+  
 C++:
 Two non-standard std::pair constructors have been deprecated.
 These allowed the use of an rvalue and a literal 0 to
diff --git a/htdocs/gcc-12/porting_to.html b/htdocs/gcc-12/porting_to.html
new file mode 100644
index 000..9154d7b
--- /dev/null
+++ b/htdocs/gcc-12/porting_to.html
@@ -0,0 +1,66 @@
+
+
+
+
+
+Porting to GCC 12
+https://gcc.gnu.org/gcc.css"; />
+
+
+
+Porting to GCC 12
+
+
+The GCC 12 release series differs from previous GCC releases in
+a number of ways. Some of these are a result
+of bug fixing, and some old behaviors have been intentionally changed
+to support new standards, or relaxed in standards-conforming ways to
+facilitate compilation or run-time performance.
+
+
+
+Some of these changes are user visible and can cause grief when
+porting to GCC 12. This document is an effort to identify common issues
+and provide solutions. Let us know if you have suggestions for improvements!
+
+
+
+
+C language issues
+
+Computed goto now require a pointer type
+
+
+In GCC 12, computed gotos require a pointer type.
+An example which was accepted before:
+
+  void f(void)
+  {
+goto *10;
+  }
+
+is no longer accepted and you need to add a cast to it like:
+
+
+  void f(void)
+  {
+goto *(void*)10;
+  }
+
+
+
+
+
+
+
+
+
+
-- 
1.8.3.1



[PATCH] Fix PR target/103100 -mstrict-align and memset on not aligned buffers

2021-11-05 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The problem here is with -mstrict-align, aarch64_expand_setmem needs
to check the alginment of the mode to make sure we can use it for
doing the stores.

gcc/ChangeLog:

PR target/103100
* config/aarch64/aarch64.c (aarch64_expand_setmem):
Add check for alignment of the mode if STRICT_ALIGNMENT is true.
---
 gcc/config/aarch64/aarch64.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fdf05505846..2c00583e12c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23738,7 +23738,9 @@ aarch64_expand_setmem (rtx *operands)
 over writing.  */
   opt_scalar_int_mode mode_iter;
   FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
-   if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit))
+   if (GET_MODE_BITSIZE (mode_iter.require ()) <= MIN (n, copy_limit)
+   && (!STRICT_ALIGNMENT
+   || MEM_ALIGN (dst) >= GET_MODE_ALIGNMENT (mode_iter.require 
(
  cur_mode = mode_iter.require ();
 
   gcc_assert (cur_mode != BLKmode);
-- 
2.17.1



[PATCH] aarch64: [PR101529] Fix vector shuffle insertion expansion

2021-11-06 Thread apinski--- via Gcc-patches
From: Andrew Pinski 

The function aarch64_evpc_ins would reuse the target even though
it might be the same register as the two inputs.
Instead of checking to see if we can reuse the target, creating
a new register always is better.

OK? Bootstrapped and tested on aarch64-linux-gnu with no regressions.

PR target/101529

gcc/ChangeLog:

* config/aarch64/aarch64.c (aarch64_evpc_ins): Don't use target
as an input instead create a new reg.

gcc/testsuite/ChangeLog:

* c-c++-common/torture/builtin-convertvector-2.c: New test.
* c-c++-common/torture/builtin-shufflevector-2.c: New test.
---
 gcc/config/aarch64/aarch64.c  |  8 --
 .../torture/builtin-convertvector-2.c | 26 +++
 .../torture/builtin-shufflevector-2.c | 26 +++
 3 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
 create mode 100644 gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2c00583e12c..e4fc546fae7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23084,11 +23084,15 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
 }
   gcc_assert (extractindex < nelt);
 
-  emit_move_insn (d->target, insv);
+  /* Use a new reg instead of target as one of the
+ operands might be target. */
+  rtx original = gen_reg_rtx (GET_MODE (d->target));
+
+  emit_move_insn (original, insv);
   insn_code icode = code_for_aarch64_simd_vec_copy_lane (mode);
   expand_operand ops[5];
   create_output_operand (&ops[0], d->target, mode);
-  create_input_operand (&ops[1], d->target, mode);
+  create_input_operand (&ops[1], original, mode);
   create_integer_operand (&ops[2], 1 << idx);
   create_input_operand (&ops[3], extractv, mode);
   create_integer_operand (&ops[4], extractindex);
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c 
b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
new file mode 100644
index 000..d88f6a72b5c
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-convertvector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run } */
+/* PR target/101529 */
+
+typedef unsigned char __attribute__((__vector_size__ (1))) W;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned short __attribute__((__vector_size__ (16))) U;
+
+unsigned short us;
+
+/* aarch64 used to miscompile foo to just return 0. */
+W
+foo (unsigned char uc)
+{
+  V v = __builtin_convertvector ((U){ } >= us, V);
+  return __builtin_shufflevector ((W){ }, v, 4) & uc;
+}
+
+int
+main (void)
+{
+  W x = foo (5);
+  if (x[0] != 5)
+__builtin_abort();
+  return 0;
+}
+
diff --git a/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c 
b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
new file mode 100644
index 000..7c4999ed4e9
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/torture/builtin-shufflevector-2.c
@@ -0,0 +1,26 @@
+/* { dg-do run}  */
+/* PR target/101529 */
+typedef unsigned char C;
+typedef unsigned char __attribute__((__vector_size__ (8))) V;
+typedef unsigned char __attribute__((__vector_size__ (32))) U;
+
+C c;
+
+/* aarch64 used to miscompile foo to just return a vector of 0s */
+V
+foo (V v)
+{
+  v |= __builtin_shufflevector (c * v, (U) (0 == (U){ }),
+   0, 1, 8, 32, 8, 20, 36, 36);
+  return v;
+}
+
+int
+main (void)
+{
+  V v = foo ((V) { });
+  for (unsigned i = 0; i < sizeof (v); i++)
+if (v[i] != (i >= 2 ? 0xff : 0))
+  __builtin_abort ();
+  return 0;
+}
-- 
2.17.1



  1   2   3   >