Hi,
The expanding of widen-sum pattern always fails. The vectorizer expects
the operands to have the same size, while the current implementation of
widen-sum pattern dose not conform to this.
This patch implements the widen-sum pattern with vpadal. Change the
vaddw pattern to anonymous. Add widen-sum test cases for neon.
--
Regards,
Xingxing
commit 62637f371a3329ff56644526bc5dbf9356cbdd6c
Author: Xingxing Pan <xxing...@marvell.com>
Date: Wed Feb 25 16:44:25 2015 +0800
Fix widen-sum pattern in neon.md.
2015-03-05 Xingxing Pan <xxing...@marvell.com>
config/arm/
* iterators.md:
(VWSD): New define_mode_iterator.
(V_widen_sum_d): New define_mode_attr.
* neon.md
(widen_ssum<mode>3): Redefined.
(widen_usum<mode>3): Ditto.
(neon_svaddw<mode>3): New anonymous define_insn.
(neon_uvaddw<mode>3): Ditto.
testsuite/gcc.target/arm/neon/
* vect-widen-sum-char2short-s-d.c: New file.
* vect-widen-sum-char2short-s.c: Ditto.
* vect-widen-sum-char2short-u-d.c: Ditto.
* vect-widen-sum-char2short-u.c: Ditto.
* vect-widen-sum-short2int-s-d.c: Ditto.
* vect-widen-sum-short2int-s.c: Ditto.
* vect-widen-sum-short2int-u-d.c: Ditto.
* vect-widen-sum-short2int-u.c: Ditto.
testsuite/lib/
* target-supports.exp:
(check_effective_target_vect_widen_sum_hi_to_si_pattern): Return 1 for ARM NEON.
(check_effective_target_vect_widen_sum_hi_to_si): Ditto.
(check_effective_target_vect_widen_sum_qi_to_hi): Ditto.
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index f7f8ab7..4ba5901 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -95,6 +95,9 @@
;; Widenable modes.
(define_mode_iterator VW [V8QI V4HI V2SI])
+;; Widenable modes. Used by widen sum.
+(define_mode_iterator VWSD [V8QI V4HI V16QI V8HI])
+
;; Narrowable modes.
(define_mode_iterator VN [V8HI V4SI V2DI])
@@ -558,6 +561,11 @@
;; Widen. Result is half the number of elements, but widened to double-width.
(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
+;; Widen. Result is half the number of elements, but widened to double-width.
+;; Used by widen sum.
+(define_mode_attr V_widen_sum_d [(V8QI "V4HI") (V4HI "V2SI")
+ (V16QI "V8HI") (V8HI "V4SI")])
+
;; Conditions to be used in extend<mode>di patterns.
(define_mode_attr qhs_zextenddi_cond [(SI "") (HI "&& arm_arch6") (QI "")])
(define_mode_attr qhs_sextenddi_cond [(SI "") (HI "&& arm_arch6")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 63c327e..6cac36d 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1174,7 +1174,31 @@
;; Widening operations
-(define_insn "widen_ssum<mode>3"
+(define_expand "widen_usum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+ (match_operand:VWSD 1 "s_register_operand" "")
+ (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+ "TARGET_NEON"
+ {
+ emit_move_insn(operands[0], operands[2]);
+ emit_insn (gen_neon_vpadalu<mode> (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+)
+
+(define_expand "widen_ssum<mode>3"
+ [(match_operand:<V_widen_sum_d> 0 "s_register_operand" "")
+ (match_operand:VWSD 1 "s_register_operand" "")
+ (match_operand:<V_widen_sum_d> 2 "s_register_operand" "")]
+ "TARGET_NEON"
+ {
+ emit_move_insn(operands[0], operands[2]);
+ emit_insn (gen_neon_vpadals<mode> (operands[0], operands[0], operands[1]));
+ DONE;
+ }
+)
+
+(define_insn "*neon_svaddw<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (sign_extend:<V_widen>
(match_operand:VW 1 "s_register_operand" "%w"))
@@ -1184,7 +1208,7 @@
[(set_attr "type" "neon_add_widen")]
)
-(define_insn "widen_usum<mode>3"
+(define_insn "*neon_uvaddw<mode>3"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
(plus:<V_widen> (zero_extend:<V_widen>
(match_operand:VW 1 "s_register_operand" "%w"))
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
new file mode 100644
index 0000000..c81c325
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s-d.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+ int i;
+ STYPE2 sum = 0;
+ STYPE2 check_sum = 0;
+
+ /* widenning sum: sum chars into short.
+
+ Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+ is currently not detected because of this patch:
+
+ 2005-12-26 Kazu Hirata <k...@codesourcery.com>
+ PR tree-optimization/25125
+ */
+
+ for (i = 0; i < N; i++)
+ {
+ sdata[i] = i*2;
+ check_sum += sdata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += sdata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ ssum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
new file mode 100644
index 0000000..de53f5c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-s.c
@@ -0,0 +1,64 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { xfail *-*-* } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { xfail *-*-* } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed char STYPE1;
+typedef signed short STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+ int i;
+ STYPE2 sum = 0;
+ STYPE2 check_sum = 0;
+
+ /* widenning sum: sum chars into short.
+
+ Like gcc.dg/vect/vect-reduc-pattern-2c.c, the widening-summation pattern
+ is currently not detected because of this patch:
+
+ 2005-12-26 Kazu Hirata <k...@codesourcery.com>
+ PR tree-optimization/25125
+ */
+
+ for (i = 0; i < N; i++)
+ {
+ sdata[i] = i*2;
+ check_sum += sdata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += sdata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ ssum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
new file mode 100644
index 0000000..bfa17d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+ int i;
+ UTYPE2 sum = 0;
+ UTYPE2 check_sum = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ udata[i] = i*2;
+ check_sum += udata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += udata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ usum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
new file mode 100644
index 0000000..38d8179
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-char2short-u.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned char UTYPE1;
+typedef unsigned short UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+ int i;
+ UTYPE2 sum = 0;
+ UTYPE2 check_sum = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ udata[i] = i*2;
+ check_sum += udata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += udata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ usum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
new file mode 100644
index 0000000..7aef5d1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target arm_neon } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+ int i;
+ STYPE2 sum = 0;
+ STYPE2 check_sum = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ sdata[i] = i*2;
+ check_sum += sdata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += sdata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ ssum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
new file mode 100644
index 0000000..914ad09
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-s.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef signed short STYPE1;
+typedef signed int STYPE2;
+
+#define N 128
+STYPE1 sdata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+ssum ()
+{
+ int i;
+ STYPE2 sum = 0;
+ STYPE2 check_sum = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ sdata[i] = i*2;
+ check_sum += sdata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += sdata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ ssum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
new file mode 100644
index 0000000..6f4a29b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u-d.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mvectorize-with-neon-double -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+ int i;
+ UTYPE2 sum = 0;
+ UTYPE2 check_sum = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ udata[i] = i*2;
+ check_sum += udata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += udata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ usum ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
new file mode 100644
index 0000000..194b260
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon/vect-widen-sum-short2int-u.c
@@ -0,0 +1,55 @@
+/* { dg-do run } */
+/* { dg-require-effective-target arm_neon_hw } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -fdump-tree-vect-details -fdump-rtl-expand" } */
+/* { dg-add-options arm_neon } */
+
+/* { dg-final { scan-tree-dump-times "pattern recognized.*w\\\+" 1 "vect" { target { arm_neon } } } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+/* { dg-final { scan-rtl-dump-times "UNSPEC_VPADAL" 1 "expand" { target { arm_neon } } } } */
+/* { dg-final { cleanup-rtl-dump "expand" } } */
+
+#include <stdlib.h>
+
+typedef unsigned short UTYPE1;
+typedef unsigned int UTYPE2;
+
+#define N 128
+UTYPE1 udata[N];
+
+volatile int y = 0;
+
+__attribute__ ((noinline)) int
+usum ()
+{
+ int i;
+ UTYPE2 sum = 0;
+ UTYPE2 check_sum = 0;
+
+ for (i = 0; i < N; i++)
+ {
+ udata[i] = i*2;
+ check_sum += udata[i];
+ /* Avoid vectorization. */
+ if (y)
+ abort ();
+ }
+
+ /* widenning sum: sum chars into int. */
+ for (i = 0; i < N; i++)
+ {
+ sum += udata[i];
+ }
+
+ /* check results: */
+ if (sum != check_sum)
+ abort ();
+
+ return 0;
+}
+
+int
+main (void)
+{
+ usum ();
+ return 0;
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 6b957de..eaccb83 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3767,6 +3767,7 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
} else {
set et_vect_widen_sum_hi_to_si_pattern_saved 0
if { [istarget powerpc*-*-*]
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
|| [istarget ia64-*-*] } {
set et_vect_widen_sum_hi_to_si_pattern_saved 1
}
@@ -3790,7 +3791,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
} else {
set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
if { [istarget powerpc*-*-*]
- || [istarget ia64-*-*] } {
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
+ || [istarget ia64-*-*] } {
set et_vect_widen_sum_hi_to_si_saved 1
}
}
@@ -3813,7 +3815,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
} else {
set et_vect_widen_sum_qi_to_hi_saved 0
if { [check_effective_target_vect_unpack]
- || [check_effective_target_arm_neon_ok]
+ || ([istarget arm*-*-*] && [check_effective_target_arm_neon_ok])
|| [istarget ia64-*-*] } {
set et_vect_widen_sum_qi_to_hi_saved 1
}