While I was there, I decided to improve another bit of VEC_SELECT
simplification (this supersedes the patch in the father of this message).
Adding Uros in Cc: because I am touching the x86 backend. Sorry to add yet
another insn variant, but I don't see how we can avoid it here.
The x86 and RTL parts are independent, but the testcases need both.
bootstrap+testsuite on x86_64-linux. (for next stage1 I assume)
2012-12-02 Marc Glisse <marc.gli...@inria.fr>
PR target/43147
PR target/44551
gcc/
* simplify-rtx.c (simplify_binary_operation_1) <VEC_SELECT>:
Improve VEC_SELECT and VEC_CONCAT subcases.
* config/i386/sse.md (*sse_shufps_<mode>_single): New.
gcc/testsuite/
* gcc.target/i386/pr43147.c: New testcase.
* gcc.target/i386/pr44551.c: New testcase.
--
Marc Glisse
Index: simplify-rtx.c
===================================================================
--- simplify-rtx.c (revision 194051)
+++ simplify-rtx.c (working copy)
@@ -3317,98 +3317,20 @@ simplify_binary_operation_1 (enum rtx_co
gcc_assert (VECTOR_MODE_P (GET_MODE (trueop0)));
gcc_assert (mode == GET_MODE_INNER (GET_MODE (trueop0)));
gcc_assert (GET_CODE (trueop1) == PARALLEL);
gcc_assert (XVECLEN (trueop1, 0) == 1);
gcc_assert (CONST_INT_P (XVECEXP (trueop1, 0, 0)));
if (GET_CODE (trueop0) == CONST_VECTOR)
return CONST_VECTOR_ELT (trueop0, INTVAL (XVECEXP
(trueop1, 0, 0)));
- /* Extract a scalar element from a nested VEC_SELECT expression
- (with optional nested VEC_CONCAT expression). Some targets
- (i386) extract scalar element from a vector using chain of
- nested VEC_SELECT expressions. When input operand is a memory
- operand, this operation can be simplified to a simple scalar
- load from an offseted memory address. */
- if (GET_CODE (trueop0) == VEC_SELECT)
- {
- rtx op0 = XEXP (trueop0, 0);
- rtx op1 = XEXP (trueop0, 1);
-
- enum machine_mode opmode = GET_MODE (op0);
- int elt_size = GET_MODE_SIZE (GET_MODE_INNER (opmode));
- int n_elts = GET_MODE_SIZE (opmode) / elt_size;
-
- int i = INTVAL (XVECEXP (trueop1, 0, 0));
- int elem;
-
- rtvec vec;
- rtx tmp_op, tmp;
-
- gcc_assert (GET_CODE (op1) == PARALLEL);
- gcc_assert (i < n_elts);
-
- /* Select element, pointed by nested selector. */
- elem = INTVAL (XVECEXP (op1, 0, i));
-
- /* Handle the case when nested VEC_SELECT wraps VEC_CONCAT. */
- if (GET_CODE (op0) == VEC_CONCAT)
- {
- rtx op00 = XEXP (op0, 0);
- rtx op01 = XEXP (op0, 1);
-
- enum machine_mode mode00, mode01;
- int n_elts00, n_elts01;
-
- mode00 = GET_MODE (op00);
- mode01 = GET_MODE (op01);
-
- /* Find out number of elements of each operand. */
- if (VECTOR_MODE_P (mode00))
- {
- elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode00));
- n_elts00 = GET_MODE_SIZE (mode00) / elt_size;
- }
- else
- n_elts00 = 1;
-
- if (VECTOR_MODE_P (mode01))
- {
- elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode01));
- n_elts01 = GET_MODE_SIZE (mode01) / elt_size;
- }
- else
- n_elts01 = 1;
-
- gcc_assert (n_elts == n_elts00 + n_elts01);
-
- /* Select correct operand of VEC_CONCAT
- and adjust selector. */
- if (elem < n_elts01)
- tmp_op = op00;
- else
- {
- tmp_op = op01;
- elem -= n_elts00;
- }
- }
- else
- tmp_op = op0;
-
- vec = rtvec_alloc (1);
- RTVEC_ELT (vec, 0) = GEN_INT (elem);
-
- tmp = gen_rtx_fmt_ee (code, mode,
- tmp_op, gen_rtx_PARALLEL (VOIDmode, vec));
- return tmp;
- }
if (GET_CODE (trueop0) == VEC_DUPLICATE
&& GET_MODE (XEXP (trueop0, 0)) == mode)
return XEXP (trueop0, 0);
}
else
{
gcc_assert (VECTOR_MODE_P (GET_MODE (trueop0)));
gcc_assert (GET_MODE_INNER (mode)
== GET_MODE_INNER (GET_MODE (trueop0)));
gcc_assert (GET_CODE (trueop1) == PARALLEL);
@@ -3482,44 +3404,96 @@ simplify_binary_operation_1 (enum rtx_co
rtx subop0, subop1;
gcc_assert (i0 < 2 && i1 < 2);
subop0 = XEXP (trueop0, i0);
subop1 = XEXP (trueop0, i1);
return simplify_gen_binary (VEC_CONCAT, mode, subop0, subop1);
}
}
- if (XVECLEN (trueop1, 0) == 1
- && CONST_INT_P (XVECEXP (trueop1, 0, 0))
- && GET_CODE (trueop0) == VEC_CONCAT)
+ /* Look through nested VEC_SELECTs. */
+ if (GET_CODE (trueop0) == VEC_SELECT)
{
- rtx vec = trueop0;
- int offset = INTVAL (XVECEXP (trueop1, 0, 0)) * GET_MODE_SIZE (mode);
+ int len = XVECLEN (trueop1, 0);
+ rtvec vec = rtvec_alloc (len);
+ for (int i = 0; i < len; i++)
+ {
+ int j = INTVAL (XVECEXP (trueop1, 0, i));
+ RTVEC_ELT (vec, i) = XVECEXP (XEXP (trueop0, 1), 0, j);
+ }
+ rtx new_op0 = XEXP (trueop0, 0);
+ rtx new_op1 = gen_rtx_PARALLEL (VOIDmode, vec);
+ return simplify_gen_binary (VEC_SELECT, mode, new_op0, new_op1);
+ }
- /* Try to find the element in the VEC_CONCAT. */
- while (GET_MODE (vec) != mode
- && GET_CODE (vec) == VEC_CONCAT)
- {
- HOST_WIDE_INT vec_size = GET_MODE_SIZE (GET_MODE (XEXP (vec, 0)));
- if (offset < vec_size)
- vec = XEXP (vec, 0);
+ /* Detect if all the elements come from the same subpart of a concat. */
+ if (GET_CODE (trueop0) == VEC_CONCAT)
+ {
+ rtx new_op0 = NULL_RTX;
+ rtx new_op1 = NULL_RTX;
+ int first = 0;
+ int second = 0;
+ unsigned nelts_first_half = 1;
+ enum machine_mode mode_first_half = GET_MODE (XEXP (trueop0, 0));
+ if (VECTOR_MODE_P (mode_first_half))
+ {
+ int elt_size = GET_MODE_SIZE (GET_MODE_INNER (mode_first_half));
+ nelts_first_half = (GET_MODE_SIZE (mode_first_half) / elt_size);
+ }
+
+ for (int i = 0; i < XVECLEN (trueop1, 0); i++)
+ {
+ rtx j = XVECEXP (trueop1, 0, i);
+ if (!CONST_INT_P (j))
+ {
+ first++;
+ second++;
+ break;
+ }
+ if (INTVAL (j) < nelts_first_half)
+ first++;
else
+ second++;
+ }
+
+ if (second == 0)
+ {
+ new_op0 = XEXP (trueop0, 0);
+ new_op1 = trueop1;
+ }
+ else if (first == 0
+ || rtx_equal_p (XEXP (trueop0, 0), XEXP (trueop0, 1)))
+ {
+ int len = XVECLEN (trueop1, 0);
+ rtvec vec = rtvec_alloc (len);
+ for (int i = 0; i < len; i++)
{
- offset -= vec_size;
- vec = XEXP (vec, 1);
+ /* All vectors have a power-of-2 size, so both halves of a
+ VEC_CONCAT must have the same size and using '%' instead
+ of '-' is safe. */
+ int j = INTVAL (XVECEXP (trueop1, 0, i)) % nelts_first_half;
+ RTVEC_ELT (vec, i) = GEN_INT (j);
}
- vec = avoid_constant_pool_reference (vec);
+ new_op0 = XEXP (trueop0, 1);
+ new_op1 = gen_rtx_PARALLEL (VOIDmode, vec);
}
- if (GET_MODE (vec) == mode)
- return vec;
+ if (new_op0)
+ {
+ if (VECTOR_MODE_P (GET_MODE (new_op0)))
+ return simplify_gen_binary (VEC_SELECT, mode, new_op0, new_op1);
+ if (VECTOR_MODE_P (mode))
+ return simplify_gen_unary (VEC_DUPLICATE, mode, new_op0,
+ GET_MODE (new_op0));
+ return new_op0;
+ }
}
return 0;
case VEC_CONCAT:
{
enum machine_mode op0_mode = (GET_MODE (trueop0) != VOIDmode
? GET_MODE (trueop0)
: GET_MODE_INNER (mode));
enum machine_mode op1_mode = (GET_MODE (trueop1) != VOIDmode
? GET_MODE (trueop1)
Index: testsuite/gcc.target/i386/pr44551.c
===================================================================
--- testsuite/gcc.target/i386/pr44551.c (revision 0)
+++ testsuite/gcc.target/i386/pr44551.c (revision 0)
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mavx" } */
+
+#include <immintrin.h>
+
+__m128i
+foo (__m256i x, __m128i y)
+{
+ __m256i r = _mm256_insertf128_si256(x, y, 1);
+ __m128i a = _mm256_extractf128_si256(r, 1);
+ return a;
+}
+
+/* { dg-final { scan-assembler-not "insert" } } */
+/* { dg-final { scan-assembler-not "extract" } } */
Property changes on: testsuite/gcc.target/i386/pr44551.c
___________________________________________________________________
Added: svn:keywords
+ Author Date Id Revision URL
Added: svn:eol-style
+ native
Index: testsuite/gcc.target/i386/pr43147.c
===================================================================
--- testsuite/gcc.target/i386/pr43147.c (revision 0)
+++ testsuite/gcc.target/i386/pr43147.c (revision 0)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O -msse" } */
+
+#include <x86intrin.h>
+
+__m128 f(__m128 m)
+{
+ m = _mm_shuffle_ps(m, m, 0xC9);
+ m = _mm_shuffle_ps(m, m, 0x2D);
+ return m;
+}
+
+/* { dg-final { scan-assembler-times "shufps" 1 } } */
Property changes on: testsuite/gcc.target/i386/pr43147.c
___________________________________________________________________
Added: svn:eol-style
+ native
Added: svn:keywords
+ Author Date Id Revision URL
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md (revision 194051)
+++ config/i386/sse.md (working copy)
@@ -3900,20 +3900,53 @@
default:
gcc_unreachable ();
}
}
[(set_attr "isa" "noavx,avx")
(set_attr "type" "sseshuf")
(set_attr "length_immediate" "1")
(set_attr "prefix" "orig,vex")
(set_attr "mode" "V4SF")])
+(define_insn "*sse_shufps_<mode>_single"
+ [(set (match_operand:VI4F_128 0 "register_operand" "=x,x")
+ (vec_select:VI4F_128
+ (match_operand:VI4F_128 1 "register_operand" "0,x")
+ (parallel [(match_operand 2 "const_0_to_3_operand")
+ (match_operand 3 "const_0_to_3_operand")
+ (match_operand 4 "const_0_to_3_operand")
+ (match_operand 5 "const_0_to_3_operand")])))]
+ "TARGET_SSE"
+{
+ int mask = 0;
+ mask |= INTVAL (operands[2]) << 0;
+ mask |= INTVAL (operands[3]) << 2;
+ mask |= INTVAL (operands[4]) << 4;
+ mask |= INTVAL (operands[5]) << 6;
+ operands[2] = GEN_INT (mask);
+
+ switch (which_alternative)
+ {
+ case 0:
+ return "shufps\t{%2, %0, %0|%0, %0, %2}";
+ case 1:
+ return "vshufps\t{%2, %1, %1, %0|%0, %1, %1, %2}";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "noavx,avx")
+ (set_attr "type" "sseshuf1")
+ (set_attr "length_immediate" "1")
+ (set_attr "prefix" "orig,vex")
+ (set_attr "mode" "V4SF")])
+
(define_insn "sse_storehps"
[(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
(vec_select:V2SF
(match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
(parallel [(const_int 2) (const_int 3)])))]
"TARGET_SSE"
"@
%vmovhps\t{%1, %0|%0, %1}
%vmovhlps\t{%1, %d0|%d0, %1}
%vmovlps\t{%H1, %d0|%d0, %H1}"