Hi Kewen,
On 8/4/21 9:06 PM, Kewen.Lin wrote:
Hi,
The existing vec_unpacku_{hi,lo} supports emulated unsigned
unpacking for short and char but misses the support for int.
This patch adds the support for vec_unpacku_{hi,lo}_v4si.
Meanwhile, the current implementation uses vector permutation
way, which requires one extra customized constant vector as
the permutation control vector. It's better to use vector
merge high/low with zero constant vector, to save the space
in constant area as well as the cost to initialize pcv in
prologue. This patch updates it with vector merging and
simplify it with iterators.
Bootstrapped & regtested on powerpc64le-linux-gnu P9 and
powerpc64-linux-gnu P8.
btw, the loop in unpack-vectorize-2.c doesn't get vectorized
without this patch, unpack-vectorize-[13]* is to verify
the vector merging and simplification works expectedly.
Is it ok for trunk?
BR,
Kewen
-----
gcc/ChangeLog:
* config/rs6000/altivec.md (vec_unpacku_hi_v16qi): Remove.
(vec_unpacku_hi_v8hi): Likewise.
(vec_unpacku_lo_v16qi): Likewise.
(vec_unpacku_lo_v8hi): Likewise.
(vec_unpacku_hi_<VP_small_lc>): New define_expand.
(vec_unpacku_lo_<VP_small_lc>): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/unpack-vectorize-1.c: New test.
* gcc.target/powerpc/unpack-vectorize-1.h: New test.
* gcc.target/powerpc/unpack-vectorize-2.c: New test.
* gcc.target/powerpc/unpack-vectorize-2.h: New test.
* gcc.target/powerpc/unpack-vectorize-3.c: New test.
* gcc.target/powerpc/unpack-vectorize-3.h: New test.
* gcc.target/powerpc/unpack-vectorize-run-1.c: New test.
* gcc.target/powerpc/unpack-vectorize-run-2.c: New test.
* gcc.target/powerpc/unpack-vectorize-run-3.c: New test.
* gcc.target/powerpc/unpack-vectorize.h: New test.
---
gcc/config/rs6000/altivec.md | 158 ++++--------------
.../gcc.target/powerpc/unpack-vectorize-1.c | 18 ++
.../gcc.target/powerpc/unpack-vectorize-1.h | 14 ++
.../gcc.target/powerpc/unpack-vectorize-2.c | 12 ++
.../gcc.target/powerpc/unpack-vectorize-2.h | 7 +
.../gcc.target/powerpc/unpack-vectorize-3.c | 11 ++
.../gcc.target/powerpc/unpack-vectorize-3.h | 7 +
.../powerpc/unpack-vectorize-run-1.c | 24 +++
.../powerpc/unpack-vectorize-run-2.c | 16 ++
.../powerpc/unpack-vectorize-run-3.c | 16 ++
.../gcc.target/powerpc/unpack-vectorize.h | 42 +++++
11 files changed, 196 insertions(+), 129 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
create mode 100644
gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
create mode 100644
gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
create mode 100644
gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
create mode 100644 gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index d70c17e6bc2..0e8b66cd6a5 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -134,10 +134,8 @@ (define_c_enum "unspec"
UNSPEC_VMULWLUH
UNSPEC_VMULWHSH
UNSPEC_VMULWLSH
- UNSPEC_VUPKHUB
- UNSPEC_VUPKHUH
- UNSPEC_VUPKLUB
- UNSPEC_VUPKLUH
+ UNSPEC_VUPKHUBHW
+ UNSPEC_VUPKLUBHW
Up to you, but... maybe just UNSPEC_VUPKHU and UNSPEC_VUPKLU, in case we
extend this later to other types. Fine either way.
UNSPEC_VPERMSI
UNSPEC_VPERMHI
UNSPEC_INTERHI
@@ -3885,143 +3883,45 @@ (define_insn "xxeval"
[(set_attr "type" "vecsimple")
(set_attr "prefixed" "yes")])
-(define_expand "vec_unpacku_hi_v16qi"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
- UNSPEC_VUPKHUB))]
- "TARGET_ALTIVEC"
-{
- rtx vzero = gen_reg_rtx (V8HImode);
- rtx mask = gen_reg_rtx (V16QImode);
- rtvec v = rtvec_alloc (16);
- bool be = BYTES_BIG_ENDIAN;
-
- emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
-
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7);
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 0 : 16);
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 6);
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16);
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5);
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 2 : 16);
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 4);
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16);
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3);
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 4 : 16);
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 2);
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16);
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1);
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 6 : 16);
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 0);
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16);
-
- emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode,
v)));
- emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero,
mask));
- DONE;
-})
-
-(define_expand "vec_unpacku_hi_v8hi"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
- UNSPEC_VUPKHUH))]
+(define_expand "vec_unpacku_hi_<VP_small_lc>"
+ [(set (match_operand:VP 0 "register_operand" "=v")
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+ UNSPEC_VUPKHUBHW))]
"TARGET_ALTIVEC"
{
- rtx vzero = gen_reg_rtx (V4SImode);
- rtx mask = gen_reg_rtx (V16QImode);
- rtvec v = rtvec_alloc (16);
- bool be = BYTES_BIG_ENDIAN;
+ rtx vzero = gen_reg_rtx (<VP_small>mode);
+ emit_insn (gen_altivec_vspltis<VU_char> (vzero, const0_rtx));
- emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
-
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 7);
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 6);
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 0 : 17);
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 1 : 16);
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 5);
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 4);
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 2 : 17);
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 3 : 16);
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 3);
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 2);
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 4 : 17);
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 5 : 16);
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 1);
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 0);
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 6 : 17);
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 7 : 16);
-
- emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode,
v)));
- emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
- DONE;
-})
+ rtx res = gen_reg_rtx (<VP_small>mode);
+ rtx op1 = operands[1];
-(define_expand "vec_unpacku_lo_v16qi"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
- UNSPEC_VUPKLUB))]
- "TARGET_ALTIVEC"
-{
- rtx vzero = gen_reg_rtx (V8HImode);
- rtx mask = gen_reg_rtx (V16QImode);
- rtvec v = rtvec_alloc (16);
- bool be = BYTES_BIG_ENDIAN;
-
- emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
-
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 8 : 16);
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 16 : 14);
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16);
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 10 : 16);
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 16 : 12);
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 12 : 16);
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 16 : 10);
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9);
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 14 : 16);
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 16 : 8);
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_altivec_vmrgh<VU_char> (res, vzero, op1));
+ else
+ emit_insn (gen_altivec_vmrgl<VU_char> (res, op1, vzero));
- emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode,
v)));
- emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero,
mask));
+ emit_insn (gen_move_insn (operands[0], gen_lowpart (<MODE>mode, res)));
DONE;
})
-(define_expand "vec_unpacku_lo_v8hi"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
- UNSPEC_VUPKLUH))]
+(define_expand "vec_unpacku_lo_<VP_small_lc>"
+ [(set (match_operand:VP 0 "register_operand" "=v")
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+ UNSPEC_VUPKLUBHW))]
"TARGET_ALTIVEC"
{
- rtx vzero = gen_reg_rtx (V4SImode);
- rtx mask = gen_reg_rtx (V16QImode);
- rtvec v = rtvec_alloc (16);
- bool be = BYTES_BIG_ENDIAN;
+ rtx vzero = gen_reg_rtx (<VP_small>mode);
+ emit_insn (gen_altivec_vspltis<VU_char> (vzero, const0_rtx));
- emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
-
- RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, be ? 16 : 15);
- RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, be ? 17 : 14);
- RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, be ? 8 : 17);
- RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, be ? 9 : 16);
- RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, be ? 16 : 13);
- RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, be ? 17 : 12);
- RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, be ? 10 : 17);
- RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, be ? 11 : 16);
- RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, be ? 16 : 11);
- RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, be ? 17 : 10);
- RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, be ? 12 : 17);
- RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, be ? 13 : 16);
- RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, be ? 16 : 9);
- RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, be ? 17 : 8);
- RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, be ? 14 : 17);
- RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, be ? 15 : 16);
+ rtx res = gen_reg_rtx (<VP_small>mode);
+ rtx op1 = operands[1];
- emit_insn (gen_vec_initv16qiqi (mask, gen_rtx_PARALLEL (V16QImode,
v)));
- emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
+ if (BYTES_BIG_ENDIAN)
+ emit_insn (gen_altivec_vmrgl<VU_char> (res, vzero, op1));
+ else
+ emit_insn (gen_altivec_vmrgh<VU_char> (res, op1, vzero));
+
+ emit_insn (gen_move_insn (operands[0], gen_lowpart (<MODE>mode, res)));
DONE;
})
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
new file mode 100644
index 00000000000..2621d753baa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
I guess powerpc_altivec_ok is fine. I was initially concerned since
unpack-vectorize.h mentions vector long long, but the types aren't
actually used here. OK.
+/* { dg-options "-maltivec -O2 -ftree-vectorize -fno-vect-cost-model
-fdump-tree-vect-details" } */
+
+/* Test if unpack vectorization succeeds for type signed/unsigned
+ short and char. */
+
+#include "unpack-vectorize-1.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 4 "vect" } } */
+/* { dg-final { scan-assembler {\mvupkhsb\M} } } */
+/* { dg-final { scan-assembler {\mvupklsb\M} } } */
+/* { dg-final { scan-assembler {\mvupkhsh\M} } } */
+/* { dg-final { scan-assembler {\mvupklsh\M} } } */
+/* { dg-final { scan-assembler {\mvmrghb\M} } } */
+/* { dg-final { scan-assembler {\mvmrglb\M} } } */
+/* { dg-final { scan-assembler {\mvmrghh\M} } } */
+/* { dg-final { scan-assembler {\mvmrglh\M} } } */
Suggest that you consider scan-assembler-times 1 to make the tests more
robust, here and for other tests.
Otherwise the patch looks fine to me. Recommend maintainers approve
with or without changes.
Thanks for the improvements!
Bill
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
new file mode 100644
index 00000000000..1cb89aba392
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-1.h
@@ -0,0 +1,14 @@
+#include "unpack-vectorize.h"
+
+DEF_ARR (si)
+DEF_ARR (ui)
+DEF_ARR (sh)
+DEF_ARR (uh)
+DEF_ARR (sc)
+DEF_ARR (uc)
+
+TEST1 (sh, si)
+TEST1 (uh, ui)
+TEST1 (sc, sh)
+TEST1 (uc, uh)
+
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
new file mode 100644
index 00000000000..3e7e97da43c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mdejagnu-cpu=power7 -O2 -ftree-vectorize
-fno-vect-cost-model -fdump-tree-vect-details" } */
+
+/* Test if unsigned int unpack vectorization succeeds. V2DImode is
+ supported since Power7 so guard it under Power7 and up. */
+
+#include "unpack-vectorize-2.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-assembler {\mxxmrghw\M} } } */
+/* { dg-final { scan-assembler {\mxxmrglw\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
new file mode 100644
index 00000000000..e199229e6f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-2.h
@@ -0,0 +1,7 @@
+#include "unpack-vectorize.h"
+
+DEF_ARR (ui)
+DEF_ARR (ull)
+
+TEST1 (ui, ull)
+
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
new file mode 100644
index 00000000000..a246e7e26b6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize
-fno-vect-cost-model -fdump-tree-vect-details" } */
+
+/* Test if signed int unpack vectorization succeeds. */
+
+#include "unpack-vectorize-3.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-assembler {\mvupkhsw\M} } } */
+/* { dg-final { scan-assembler {\mvupklsw\M} } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
new file mode 100644
index 00000000000..6a5191d28a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-3.h
@@ -0,0 +1,7 @@
+#include "unpack-vectorize.h"
+
+DEF_ARR (si)
+DEF_ARR (sll)
+
+TEST1 (si, sll)
+
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
new file mode 100644
index 00000000000..51f0e67524f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-1.c
@@ -0,0 +1,24 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vmx_hw } */
+/* { dg-options "-maltivec -O2 -ftree-vectorize -fno-vect-cost-model"
} */
+
+#include "unpack-vectorize-1.h"
+
+/* Test if unpack vectorization cases on signed/unsigned short and char
+ run successfully. */
+
+CHECK1 (sh, si)
+CHECK1 (uh, ui)
+CHECK1 (sc, sh)
+CHECK1 (uc, uh)
+
+int
+main ()
+{
+ check1_sh_si ();
+ check1_uh_ui ();
+ check1_sc_sh ();
+ check1_uc_uh ();
+
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
new file mode 100644
index 00000000000..6d243602bbf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-require-effective-target vsx_hw } */
+/* { dg-options "-mdejagnu-cpu=power7 -O2 -ftree-vectorize
-fno-vect-cost-model" } */
+
+#include "unpack-vectorize-2.h"
+
+/* Test if unpack vectorization cases on unsigned int run
successfully. */
+
+CHECK1 (ui, ull)
+
+int
+main ()
+{
+ check1_ui_ull ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
new file mode 100644
index 00000000000..fec33c46abc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize-run-3.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mdejagnu-cpu=power8 -O2 -ftree-vectorize
-fno-vect-cost-model" } */
+
+#include "unpack-vectorize-3.h"
+
+/* Test if unpack vectorization cases on signed int run successfully. */
+
+CHECK1 (si, sll)
+
+int
+main ()
+{
+ check1_si_sll ();
+ return 0;
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
new file mode 100644
index 00000000000..11fa7d4aa6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/unpack-vectorize.h
@@ -0,0 +1,42 @@
+typedef signed long long sll;
+typedef unsigned long long ull;
+typedef signed int si;
+typedef unsigned int ui;
+typedef signed short sh;
+typedef unsigned short uh;
+typedef signed char sc;
+typedef unsigned char uc;
+
+#ifndef ALIGN
+#define ALIGN 32
+#endif
+
+#define ALIGN_ATTR __attribute__((__aligned__(ALIGN)))
+
+#define N 128
+
+#define DEF_ARR(TYPE) \
+ TYPE TYPE##_a[N]
ALIGN_ATTR; \
+ TYPE TYPE##_b[N]
ALIGN_ATTR; \
+ TYPE TYPE##_c[N] ALIGN_ATTR;
+
+#define TEST1(NTYPE,
WTYPE) \
+ __attribute__((noipa)) void test1_##NTYPE##_##WTYPE()
{ \
+ for (int i = 0; i < N;
i++) \
+ WTYPE##_c[i] = NTYPE##_a[i] +
NTYPE##_b[i]; \
+ }
+
+#define CHECK1(NTYPE,
WTYPE) \
+ __attribute__((noipa, optimize(0))) void check1_##NTYPE##_##WTYPE()
{ \
+ for (int i = 0; i < N; i++)
{ \
+ NTYPE##_a[i] = 2 * i * sizeof(NTYPE) +
10; \
+ NTYPE##_b[i] = 7 * i * sizeof(NTYPE) / 5 -
10; \
+ } \
+ test1_##NTYPE##_##WTYPE(); \
+ for (int i = 0; i < N; i++)
{ \
+ WTYPE exp = NTYPE##_a[i] +
NTYPE##_b[i]; \
+ if (WTYPE##_c[i] !=
exp) \
+ __builtin_abort(); \
+ } \
+ }
+
--
2.17.1