Hello,
this is a follow up to the patch applied after this discussion:
http://gcc.gnu.org/ml/gcc-patches/2012-05/msg00504.html
It handles the -mavx __builtin_shuffle case mentioned there.
It passes bootstrap (languages=c,c++) and regtest on x86_64.
2012-08-04 Marc Glisse <marc.gli...@inria.fr>
gcc/
* simplify-rtx.c (simplify_binary_operation_1): Optimize shuffle of
a concatenation.
gcc/testsuite/
* gcc.target/i386/perm-concat.c: New test.
--
Marc Glisse
Index: gcc/testsuite/gcc.target/i386/perm-concat.c
===================================================================
--- gcc/testsuite/gcc.target/i386/perm-concat.c (revision 0)
+++ gcc/testsuite/gcc.target/i386/perm-concat.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mavx -mfpmath=sse" } */
+
+typedef double v2df __attribute__ ((__vector_size__ (16)));
+
+v2df
+f (double d)
+{
+ v2df x = {-d, d};
+ return __builtin_ia32_vpermilpd (x, 1);
+}
+
+/* { dg-final { scan-assembler-not "\tvpermilpd\[ \t\]" } } */
Property changes on: gcc/testsuite/gcc.target/i386/perm-concat.c
___________________________________________________________________
Added: svn:keywords
+ Author Date Id Revision URL
Added: svn:eol-style
+ native
Index: gcc/simplify-rtx.c
===================================================================
--- gcc/simplify-rtx.c (revision 190127)
+++ gcc/simplify-rtx.c (working copy)
@@ -3235,40 +3235,57 @@ simplify_binary_operation_1 (enum rtx_co
/* If we build {a,b} then permute it, build the result directly. */
if (XVECLEN (trueop1, 0) == 2
&& CONST_INT_P (XVECEXP (trueop1, 0, 0))
&& CONST_INT_P (XVECEXP (trueop1, 0, 1))
&& GET_CODE (trueop0) == VEC_CONCAT
&& GET_CODE (XEXP (trueop0, 0)) == VEC_CONCAT
&& GET_MODE (XEXP (trueop0, 0)) == mode
&& GET_CODE (XEXP (trueop0, 1)) == VEC_CONCAT
&& GET_MODE (XEXP (trueop0, 1)) == mode)
{
unsigned int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
unsigned int i1 = INTVAL (XVECEXP (trueop1, 0, 1));
rtx subop0, subop1;
gcc_assert (i0 < 4 && i1 < 4);
subop0 = XEXP (XEXP (trueop0, i0 / 2), i0 % 2);
subop1 = XEXP (XEXP (trueop0, i1 / 2), i1 % 2);
return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
}
+
+ if (XVECLEN (trueop1, 0) == 2
+ && CONST_INT_P (XVECEXP (trueop1, 0, 0))
+ && CONST_INT_P (XVECEXP (trueop1, 0, 1))
+ && GET_CODE (trueop0) == VEC_CONCAT
+ && GET_MODE (trueop0) == mode)
+ {
+ unsigned int i0 = INTVAL (XVECEXP (trueop1, 0, 0));
+ unsigned int i1 = INTVAL (XVECEXP (trueop1, 0, 1));
+ rtx subop0, subop1;
+
+ gcc_assert (i0 < 2 && i1 < 2);
+ subop0 = XEXP (trueop0, i0);
+ subop1 = XEXP (trueop0, i1);
+
+ return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
+ }
}
if (XVECLEN (trueop1, 0) == 1
&& CONST_INT_P (XVECEXP (trueop1, 0, 0))
&& GET_CODE (trueop0) == VEC_CONCAT)
{
rtx vec = trueop0;
int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
/* Try to find the element in the VEC_CONCAT. */
while (GET_MODE (vec) != mode
&& GET_CODE (vec) == VEC_CONCAT)
{
HOST_WIDE_INT vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0)));
if (offset < vec_size)
vec = XEXP (vec, 0);
else
{
offset -= vec_size;
vec = XEXP (vec, 1);