https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98535

--- Comment #6 from Richard Biener <rguenth at gcc dot gnu.org> ---
So the issue is that hi_start isn't halved I think or hi_start isn't
added when i&1 != 0.  So probably all non-single round duplicate&interleaves
are broken right now.

@@ -5091,13 +5091,14 @@ duplicate_and_interleave (vec_info *vinfo, gimple_seq
*seq, tree vector_type,
      a multiple of N * 2, the HI result is the same as the LO.  */
   unsigned int in_start = 0;
   unsigned int out_start = nvectors;
-  unsigned int hi_start = nvectors / 2;
+  unsigned int hi_start = nvectors;
   /* A bound on the number of outputs needed to produce NRESULTS results
      in the final iteration.  */
   unsigned int noutputs_bound = nvectors * nresults;
   for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
     {
       noutputs_bound /= 2;
+      hi_start /= 2;
       unsigned int limit = MIN (noutputs_bound, nvectors);
       for (unsigned int i = 0; i < limit; ++i)
        {

fixes the crash but the code generated seemingly lacks a permute anyway
since seq is initially

_99 = {j$b_2(D)};
_100 = VIEW_CONVERT_EXPR<unsigned short>(_99);
_101 = [vec_duplicate_expr] _100;
_102 = {j$c_10(D)};
_103 = VIEW_CONVERT_EXPR<unsigned short>(_102);
_104 = [vec_duplicate_expr] _103;
_105 = {j$d_11(D)};
_106 = VIEW_CONVERT_EXPR<unsigned short>(_105);
_107 = [vec_duplicate_expr] _106;
_108 = {j$e_12(D)};
_109 = VIEW_CONVERT_EXPR<unsigned short>(_108);
_110 = [vec_duplicate_expr] _109;

but we re-use the interleaving of _101 and _107 for the other part
due to

          if ((i & 1) != 0
              && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
                             2 * in_repeat))
            {
              pieces[out_start + i] = pieces[out_start + i - 1];
              continue;
            }

that is, we end up with

  <bb 26> [local count: 73320728]:
  _99 = {j$b_2(D)};
  _100 = VIEW_CONVERT_EXPR<unsigned short>(_99);
  _101 = [vec_duplicate_expr] _100;
  _102 = {j$c_10(D)};
  _103 = VIEW_CONVERT_EXPR<unsigned short>(_102);
  _104 = [vec_duplicate_expr] _103;
  _105 = {j$d_11(D)};
  _106 = VIEW_CONVERT_EXPR<unsigned short>(_105);
  _107 = [vec_duplicate_expr] _106;
  _108 = {j$e_12(D)};
  _109 = VIEW_CONVERT_EXPR<unsigned short>(_108);
  _110 = [vec_duplicate_expr] _109;
  _111 = VEC_PERM_EXPR <_101, _107, { 0, POLY_INT_CST [4, 4], 1, POLY_INT_CST
[5, 4], 2, POLY_INT_CST [6, 4], ... }>;
  _112 = VEC_PERM_EXPR <_111, _111, { 0, POLY_INT_CST [4, 4], 1, POLY_INT_CST
[5, 4], 2, POLY_INT_CST [6, 4], ... }>;
  _113 = VIEW_CONVERT_EXPR<vector([4,4]) short int>(_112);

seemingly ignoring _104 and _108 entirely ...

Reply via email to