Hi:
As indicated in PR98167, this patch is a follow-up to [1].
Bootstrapped and regtested on x86_64-linux-gnu.
Ok for trunk?
gcc/
PR target/98167
* config/i386/i386.c (ix86_gimple_fold_builtin): Handle
IX86_BUILTIN_SHUFPD512, IX86_BUILTIN_SHUFPS512,
IX86_BUILTIN_SHUFPD256, IX86_BUILTIN_SHUFPS,
IX86_BUILTIN_SHUFPS256.
gcc/testsuite/
* gcc.target/i386/avx512f-vshufpd-1.c: Adjust testcase.
* gcc.target/i386/avx512f-vshufps-1.c: Adjust testcase.
[1]. https://gcc.gnu.org/pipermail/gcc-patches/2019-May/521983.html
--
BR,
Hongtao
From 74596b08a91dafcb29441de59544dd857a090564 Mon Sep 17 00:00:00 2001
From: liuhongt <[email protected]>
Date: Fri, 11 Dec 2020 19:02:43 +0800
Subject: [PATCH] [X86] Fold more shuffle builtins to VEC_PERM_EXPR.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
A follow-up to https://gcc.gnu.org/pipermail/gcc-patches/2019-May/521983.html
gcc/
PR target/98167
* config/i386/i386.c (ix86_gimple_fold_builtin): Handle
IX86_BUILTIN_SHUFPD512, IX86_BUILTIN_SHUFPS512,
IX86_BUILTIN_SHUFPD256, IX86_BUILTIN_SHUFPS,
IX86_BUILTIN_SHUFPS256.
gcc/testsuite/
* gcc.target/i386/avx512f-vshufpd-1.c: Adjust testcase.
* gcc.target/i386/avx512f-vshufps-1.c: Adjust testcase.
---
gcc/config/i386/i386.c | 64 ++++++++++++++++---
.../gcc.target/i386/avx512f-vshufpd-1.c | 3 +-
.../gcc.target/i386/avx512f-vshufps-1.c | 3 +-
3 files changed, 59 insertions(+), 11 deletions(-)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 54b7e103ba2..432470a916c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -18187,21 +18187,67 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
}
break;
+ case IX86_BUILTIN_SHUFPD512:
+ case IX86_BUILTIN_SHUFPS512:
+ if (n_args > 2)
+ {
+ /* This is masked shuffle. Only optimize if the mask is all ones. */
+ tree argl = gimple_call_arg (stmt, n_args - 1);
+ arg0 = gimple_call_arg (stmt, 0);
+ if (!tree_fits_uhwi_p (argl))
+ break;
+ unsigned HOST_WIDE_INT mask = tree_to_uhwi (argl);
+ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+ if ((mask | (HOST_WIDE_INT_M1U << elems)) != HOST_WIDE_INT_M1U)
+ break;
+ }
+ /* Fall thru. */
case IX86_BUILTIN_SHUFPD:
+ case IX86_BUILTIN_SHUFPD256:
+ case IX86_BUILTIN_SHUFPS:
+ case IX86_BUILTIN_SHUFPS256:
arg2 = gimple_call_arg (stmt, 2);
if (TREE_CODE (arg2) == INTEGER_CST)
{
- location_t loc = gimple_location (stmt);
- unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
arg0 = gimple_call_arg (stmt, 0);
+ unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+ machine_mode imode = GET_MODE_INNER (TYPE_MODE (TREE_TYPE (arg0)));
+ unsigned HOST_WIDE_INT imask = TREE_INT_CST_LOW (arg2);
+
+ /* Check valid imm, refer to gcc.target/i386/testimm-10.c. */
+ if (imask > 255
+ || (imask >= HOST_WIDE_INT_1U << elems
+ && imode == E_DFmode))
+ return false;
+
arg1 = gimple_call_arg (stmt, 1);
- tree itype = long_long_integer_type_node;
- tree vtype = build_vector_type (itype, 2); /* V2DI */
- tree_vector_builder elts (vtype, 2, 1);
- /* Ignore bits other than the lowest 2. */
- elts.quick_push (build_int_cst (itype, imask & 1));
- imask >>= 1;
- elts.quick_push (build_int_cst (itype, 2 + (imask & 1)));
+ location_t loc = gimple_location (stmt);
+ tree itype = imode == E_DFmode
+ ? long_long_integer_type_node : integer_type_node;
+ /* V2DI/V4DI/V8DI/V4SI/V8SI/V16SI */
+ tree vtype = build_vector_type (itype, elems);
+ tree_vector_builder elts (vtype, elems, 1);
+
+ for (unsigned i = 0; i != elems; i++)
+ {
+ unsigned sel_idx;
+ /* Imm[1:0](if VL > 128, then use Imm[3:2],Imm[5:4],Imm[7:6])
+ provide 2 select constrols for each element of the
+ destination. */
+ if (imode == E_DFmode)
+ sel_idx = (i & 1) * elems
+ + (i >> 1 << 1) + ((imask & 1 << i) >> i);
+ else
+ {
+ /* Imm[7:0](if VL > 128, also use Imm[7:0]) provide 4 select
+ controls for each element of the destination. */
+ unsigned j = i % 4;
+ sel_idx = ((i & 2) >> 1) * elems
+ + (i >> 2 << 2) + ((imask & 3 << j << j) >> j >> j);
+ }
+ elts.quick_push (build_int_cst (itype, sel_idx));
+ }
+
tree omask = elts.build ();
gimple *g = gimple_build_assign (gimple_call_lhs (stmt),
VEC_PERM_EXPR,
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c
index d1ac01e1c88..8df5b9d4441 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufpd-1.c
@@ -7,11 +7,12 @@
#include <immintrin.h>
__m512d x;
+__m512d y;
void extern
avx512f_test (void)
{
- x = _mm512_shuffle_pd (x, x, 56);
+ x = _mm512_shuffle_pd (x, y, 56);
x = _mm512_mask_shuffle_pd (x, 2, x, x, 56);
x = _mm512_maskz_shuffle_pd (2, x, x, 56);
}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c b/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c
index 07a63fca3ff..378ae4b7101 100644
--- a/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512f-vshufps-1.c
@@ -7,11 +7,12 @@
#include <immintrin.h>
__m512 x;
+__m512 y;
void extern
avx512f_test (void)
{
- x = _mm512_shuffle_ps (x, x, 56);
+ x = _mm512_shuffle_ps (x, y, 56);
x = _mm512_mask_shuffle_ps (x, 2, x, x, 56);
x = _mm512_maskz_shuffle_ps (2, x, x, 56);
}
--
2.18.1