Hi!

VEC_INTERLEAVE_*_EXPR trees are unfortunately dependent on BYTES_BIG_ENDIAN,
what is HIGH vs. LOW is different based on endianity.
The only place that creates these in the IL is:
          if (BYTES_BIG_ENDIAN)
            {
              high_code = VEC_INTERLEAVE_HIGH_EXPR;
              low_code = VEC_INTERLEAVE_LOW_EXPR;
            }
          else
            {
              low_code = VEC_INTERLEAVE_HIGH_EXPR;
              high_code = VEC_INTERLEAVE_LOW_EXPR;
            }
          perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
                                                    vect1, vect2);
...
so either folding (and expansion if only vec_perm* is supported) needs to
be adjusted as done in the patch below, or we'd need to rename them
to VEC_INTERLEAVE_{FIRST,SECOND}_EXPR or similar and adjust all the patterns
etc.

Bootstrapped/regtested on x86_64-linux and i686-linux, tested on the
testcase using powerpc cross.  Ok for trunk?

2011-11-22  Jakub Jelinek  <ja...@redhat.com>

        PR tree-optimization/51074
        * fold-const.c (fold_binary_loc): Fix up VEC_INTERLEAVE_*_EXPR
        handling for BYTES_BIG_ENDIAN.
        * optabs.c (can_vec_perm_for_code_p): Likewise.

        * gcc.dg/vect/pr51074.c: New test.

--- gcc/fold-const.c.jj 2011-11-21 16:22:02.000000000 +0100
+++ gcc/fold-const.c    2011-11-22 09:59:15.606739333 +0100
@@ -13483,10 +13483,12 @@ fold_binary_loc (location_t loc,
                sel[i] = i * 2 + 1;
                break;
              case VEC_INTERLEAVE_HIGH_EXPR:
-               sel[i] = (i + nelts) / 2 + ((i & 1) ? nelts : 0);
+               sel[i] = (i + (BYTES_BIG_ENDIAN ? 0 : nelts)) / 2
+                        + ((i & 1) ? nelts : 0);
                break;
              case VEC_INTERLEAVE_LOW_EXPR:
-               sel[i] = i / 2 + ((i & 1) ? nelts : 0);
+               sel[i] = (i + (BYTES_BIG_ENDIAN ? nelts : 0)) / 2
+                        + ((i & 1) ? nelts : 0);
                break;
              default:
                gcc_unreachable ();
--- gcc/optabs.c.jj     2011-11-21 16:22:02.000000000 +0100
+++ gcc/optabs.c        2011-11-22 10:17:04.820399126 +0100
@@ -6932,9 +6932,9 @@ can_vec_perm_for_code_p (enum tree_code
          break;
 
        case VEC_INTERLEAVE_HIGH_EXPR:
-         alt = nelt / 2;
-         /* FALLTHRU */
        case VEC_INTERLEAVE_LOW_EXPR:
+         if ((BYTES_BIG_ENDIAN != 0) ^ (code == VEC_INTERLEAVE_HIGH_EXPR))
+           alt = nelt / 2;
          for (i = 0; i < nelt / 2; ++i)
            {
              data[i * 2] = i + alt;
--- gcc/testsuite/gcc.dg/vect/pr51074.c.jj      2011-11-22 10:22:44.247377928 
+0100
+++ gcc/testsuite/gcc.dg/vect/pr51074.c 2011-11-22 10:22:16.000000000 +0100
@@ -0,0 +1,24 @@
+/* PR tree-optimization/51074 */
+
+#include "tree-vect.h"
+
+struct S { int a, b; } s[8];
+
+int
+main ()
+{
+  int i;
+  check_vect ();
+  for (i = 0; i < 8; i++)
+    {
+      s[i].b = 0;
+      s[i].a = i;
+    }
+  asm volatile ("" : : : "memory");
+  for (i = 0; i < 8; i++)
+    if (s[i].b != 0 || s[i].a != i)
+      abort ();
+  return 0;
+}
+
+/* { dg-final { cleanup-tree-dump "vect" } } */

        Jakub

Reply via email to