https://gcc.gnu.org/bugzilla/show_bug.cgi?id=117990

--- Comment #5 from Li Pan <pan2.li at intel dot com> ---
The tree optimized looks right up to a point.

   5   │ int main ()
   6   │ {
   7   │   vector(8) int vect__4.8;
   8   │   vector(8) char vect__3.7;
   9   │   vector(8) char D.2823;
  10   │   int _5;
  11   │   int _8;
  12   │   vector(8) char _26(D);
  13   │
  14   │   <bb 2> [local count: 97603129]:
  15   │   d[0] = 9;
  16   │   d[10] = 9;
  17   │   d[20] = 9;
  18   │   d[30] = 9;
  19   │   d[40] = 9;
  20   │   d[50] = 9;
  21   │   d[60] = 9;
  22   │   d[70] = 9;
  23   │   d[80] = 9;
  24   │   d[90] = 9;
  25   │   vect__3.7_27 = .MASK_LEN_STRIDED_LOAD (&MEM <char[225]> [(void *)&d
+ 30B], 10, { 0, 0, 0, 0, 0, 0, 0, 0 }, { -1, -1, -1, -1, -1, -1, -1, -1 },
_26(D), 7, 0);
  26   │   vect__4.8_28 = (vector(8) int) vect__3.7_27;
  27   │   .MASK_LEN_STORE (&MEM <int[10]> [(void *)&e + 12B], 32B, { -1, -1,
-1, -1, -1, -1, -1, -1 }, 7, 0, vect__4.8_28);
  28   │   _5 = e[5];
  29   │   if (_5 != 9)
  30   │     goto <bb 3>; [51.11%]
  31   │   else
  32   │     goto <bb 4>; [48.89%]
  33   │
  34   │   <bb 3> [local count: 49884959]:
  35   │
  36   │   <bb 4> [local count: 97603128]:
  37   │   # _8 = PHI <123(3), 0(2)>
  38   │   return _8;
  39   │
  40   │ }

But the asm dump looks not that correct.

main:
        lui     a5,%hi(.LANCHOR0)
        addi    a5,a5,%lo(.LANCHOR0)
        li      a4,9
        sb      a4,30(a5)
        addi    a3,a5,30
        vsetivli        zero,7,e32,m1,ta,ma
        li      a2,10
        vlse8.v v2,0(a3),a2 // depends on 30(a5), 40(a5), ... 90(a5) but only
30(a5) has been promoted before vlse, looks incorrect memory dependencies.
        addi    a3,a5,252
        sb      a4,0(a5)
        sb      a4,10(a5)                                                      
                                                                               
                                                                               
                                     sb      a4,20(a5)
        sb      a4,40(a5)
        vzext.vf4       v1,v2
        sb      a4,50(a5)
        sb      a4,60(a5)
        vse32.v v1,0(a3)
        li      a0,0
        sb      a4,70(a5)
        sb      a4,80(a5)
        sb      a4,90(a5)
        lw      a5,260(a5)
        beq     a5,a4,.L4
        li      a0,123

Reply via email to