[PATCH] Optimize nested permutation to single VEC_PERM_EXPR [PR54346]

2022-09-25 Thread Liwei Xu via Gcc-patches
This patch implemented the optimization in PR 54346, which Merges

c = VEC_PERM_EXPR ;
d = VEC_PERM_EXPR ;
to
d = VEC_PERM_EXPR ;

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}
tree-ssa/forwprop-19.c fail to pass but I'm not sure whether it
is ok to removed it.

gcc/ChangeLog:

PR target/54346
* match.pd: Merge the index of VCST then generates the new vec_perm.

gcc/testsuite/ChangeLog:

PR target/54346
* gcc.dg/pr54346.c: New test.

Co-authored-by: liuhongt 
---
 gcc/match.pd   | 41 ++
 gcc/testsuite/gcc.dg/pr54346.c | 13 +++
 2 files changed, 54 insertions(+)
 create mode 100755 gcc/testsuite/gcc.dg/pr54346.c

diff --git a/gcc/match.pd b/gcc/match.pd
index 345bcb701a5..9219b0a10e1 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8086,6 +8086,47 @@ and,
   (minus (mult (vec_perm @1 @1 @3) @2) @4)))
 
 
+/* (PR54346) Merge 
+   c = VEC_PERM_EXPR ; 
+   d = VEC_PERM_EXPR ;
+   to
+   d = VEC_PERM_EXPR ; */
+   
+(simplify
+ (vec_perm (vec_perm@0 @1 @2 VECTOR_CST@3) @0 VECTOR_CST@4)
+ (with
+  {
+if(!TYPE_VECTOR_SUBPARTS (type).is_constant())
+  return NULL_TREE;
+
+tree op0;
+machine_mode result_mode = TYPE_MODE (type);
+machine_mode op_mode = TYPE_MODE (TREE_TYPE (@1));
+int nelts = TYPE_VECTOR_SUBPARTS (type).to_constant();
+vec_perm_builder builder0;
+vec_perm_builder builder1;
+vec_perm_builder builder2 (nelts, nelts, 1);
+
+if (!tree_to_vec_perm_builder (&builder0, @3) 
+|| !tree_to_vec_perm_builder (&builder1, @4))
+  return NULL_TREE;
+
+vec_perm_indices sel0 (builder0, 2, nelts);
+vec_perm_indices sel1 (builder1, 1, nelts);
+   
+for (int i = 0; i < nelts; i++)
+  builder2.quick_push (sel0[sel1[i].to_constant()]);
+
+vec_perm_indices sel2 (builder2, 2, nelts);
+
+if (!can_vec_perm_const_p (result_mode, op_mode, sel2, false))
+  return NULL_TREE;
+
+op0 = vec_perm_indices_to_tree (TREE_TYPE (@4), sel2);
+  }
+  (vec_perm @1 @2 { op0; })))
+
+
 /* Match count trailing zeroes for simplify_count_trailing_zeroes in fwprop.
The canonical form is array[((x & -x) * C) >> SHIFT] where C is a magic
constant which when multiplied by a power of 2 contains a unique value
diff --git a/gcc/testsuite/gcc.dg/pr54346.c b/gcc/testsuite/gcc.dg/pr54346.c
new file mode 100755
index 000..d87dc3a79a5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr54346.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-dse1" } */
+
+typedef int veci __attribute__ ((vector_size (4 * sizeof (int;
+
+void fun (veci a, veci b, veci *i)
+{
+  veci c = __builtin_shuffle (a, b, __extension__ (veci) {1, 4, 2, 7});
+  *i = __builtin_shuffle (c, __extension__ (veci) { 7, 2, 1, 5 });
+}
+
+/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 3, 6, 0, 0 }" "dse1" } } */
+/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 1 "dse1" } } */
\ No newline at end of file
-- 
2.18.2



[PATCH] Optimize indentical permuation in my last r13-3212-gb88adba751da63

2022-10-12 Thread Liwei Xu via Gcc-patches
Add extra index check when merging VEC_CST, this handles the case when exactly 
op1 needs to be return.

This fixes:
FAIL: gcc.dg/tree-ssa/forwprop-19.c scan-tree-dump-not forwprop1 
"VEC_PERM_EXPR"

gcc/ChangeLog:

PR target/107220
* match.pd: Check the index of VEC_CST and return the op1 if needed.
---
 gcc/match.pd | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3550c16aaa6..1efdc3abb5d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8106,6 +8106,7 @@ and,
 vec_perm_builder builder0;
 vec_perm_builder builder1;
 vec_perm_builder builder2 (nelts, nelts, 1);
+bool ident_to_1 = true;
 
 if (!tree_to_vec_perm_builder (&builder0, @3)
|| !tree_to_vec_perm_builder (&builder1, @4))
@@ -8115,7 +8116,15 @@ and,
 vec_perm_indices sel1 (builder1, 1, nelts);
 
 for (int i = 0; i < nelts; i++)
-  builder2.quick_push (sel0[sel1[i].to_constant ()]);
+  {
+int tmp_index = sel0[sel1[i].to_constant ()].to_constant ();
+builder2.quick_push (sel0[sel1[i].to_constant ()]);
+if ( i != tmp_index)
+ ident_to_1 = false;
+  }
+
+if (ident_to_1)
+  return @1;
 
 vec_perm_indices sel2 (builder2, 2, nelts);
 
-- 
2.18.2



[PATCH] Optimize identical permutation in my last r13-3212-gb88adba751da63

2022-10-12 Thread Liwei Xu via Gcc-patches
Add extra index check when merging VEC_CST, this handles the case when exactly 
op1 needs to be return.

This fixes:
FAIL: gcc.dg/tree-ssa/forwprop-19.c scan-tree-dump-not forwprop1 
"VEC_PERM_EXPR"

gcc/ChangeLog:

PR target/107220
* match.pd: Check the index of VEC_CST and return the op1 if needed.
---
 gcc/match.pd | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/gcc/match.pd b/gcc/match.pd
index 3550c16aaa6..1efdc3abb5d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -8106,6 +8106,7 @@ and,
 vec_perm_builder builder0;
 vec_perm_builder builder1;
 vec_perm_builder builder2 (nelts, nelts, 1);
+bool ident_to_1 = true;
 
 if (!tree_to_vec_perm_builder (&builder0, @3)
|| !tree_to_vec_perm_builder (&builder1, @4))
@@ -8115,7 +8116,15 @@ and,
 vec_perm_indices sel1 (builder1, 1, nelts);
 
 for (int i = 0; i < nelts; i++)
-  builder2.quick_push (sel0[sel1[i].to_constant ()]);
+  {
+int tmp_index = sel0[sel1[i].to_constant ()].to_constant ();
+builder2.quick_push (sel0[sel1[i].to_constant ()]);
+if ( i != tmp_index)
+ ident_to_1 = false;
+  }
+
+if (ident_to_1)
+  return @1;
 
 vec_perm_indices sel2 (builder2, 2, nelts);
 
-- 
2.18.2



[PATCH] Move scanning pass of forwprop-19.c to dse1 for r13-3212-gb88adba751da63

2022-10-16 Thread Liwei Xu via Gcc-patches
gcc/testsuite/ChangeLog:

* gcc.dg/tree-ssa/forwprop-19.c: Move scanning pass from forwprop1 to 
dse1, This fixs
the test case fail.
---
 gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c 
b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
index 4d77138b206..6ca81cb6c49 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O -fdump-tree-forwprop1" } */
+/* { dg-options "-O -fdump-tree-dse1" } */
 
 typedef int vec __attribute__((vector_size (4 * sizeof (int;
 void f (vec *x1, vec *x2)
@@ -11,4 +11,4 @@ void f (vec *x1, vec *x2)
   *x1 = z;
 }
 
-/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "forwprop1" } } */
+/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "dse1" } } */
-- 
2.18.2