On Tue, Sep 22, 2009 at 1:52 PM, Rahul Kharche <ra...@icerasemi.com> wrote:
> The following causes missed loop optimizations in O2 from creating
> unnecessary loop induction variables. Or, is a case of IV opts not
> able to coalesce copies of induction variables. A previous post related
> to this was made in PR41026 which had a type promoted loop index
> variable
> copied. I believe this example makes the problem more obvious.
>
> struct struct_t {
>  int* data;
> };
>
> void testAutoIncStruct (struct struct_t* sp, int start, int end)
> {
>    int i;
>    for (i = 0; i+start < end; i++)
>      {
>        sp->data[i+start] = 0;
>      }
> }
>
> With GCC v4.4.1 release) and gcc -O2 -fdump-tree-all on the above case
> we get the following dump from IVOpts
>
> testAutoIncStruct (struct struct_t * sp, int start, int end)
> {
>  unsigned int D.1283;
>  unsigned int D.1284;
>  int D.1282;
>  unsigned int ivtmp.32;
>  int * pretmp.17;
>  int i;
>  int * D.1245;
>  unsigned int D.1244;
>  unsigned int D.1243;
>
> <bb 2>:
>  if (start_3(D) < end_5(D))
>    goto <bb 3>;
>  else
>    goto <bb 6>;
>
> <bb 3>:
>  pretmp.17_22 = sp_6(D)->data;
>  D.1282_23 = start_3(D) + 1;
>  ivtmp.32_25 = (unsigned int) D.1282_23;
>  D.1283_27 = (unsigned int) end_5(D);
>  D.1284_28 = D.1283_27 + 1;
>
> <bb 4>:
>  # start_20 = PHI <start_4(5), start_3(D)(3)>
>  # ivtmp.32_7 = PHI <ivtmp.32_24(5), ivtmp.32_25(3)>
>  D.1243_9 = (unsigned int) start_20;
>  D.1244_10 = D.1243_9 * 4;
>  D.1245_11 = pretmp.17_22 + D.1244_10;
>  *D.1245_11 = 0;
>  start_26 = (int) ivtmp.32_7;
>  start_4 = start_26;
>  ivtmp.32_24 = ivtmp.32_7 + 1;
>  if (ivtmp.32_24 != D.1284_28)
>    goto <bb 5>;
>  else
>    goto <bb 6>;
>
> <bb 5>:
>  goto <bb 4>;
>
> <bb 6>:
>  return;
>
> }
>
> IVOpts cannot identify start_26, start_4 and ivtmp_32_7 to be copies.
> The root cause is that expression 'i + start' is identified as a common
> expression between the test in the header and the index operation in the
> latch. This is unified by copy propagation or FRE prior to loop
> optimizations
> and creates a new induction variable.
>
> If we disable tree copy propagation and FRE with
> gcc -O2 -fno-tree-copy-prop -fno-tree-fre -fdump-tree-all we get
>
> testAutoIncStruct (struct struct_t * sp, int start, int end)
> {
>  unsigned int D.1287;
>  unsigned int D.1288;
>  unsigned int D.1289;
>  int D.1290;
>  unsigned int D.1284;
>  unsigned int D.1285;
>  unsigned int D.1286;
>  int * pretmp.17;
>  int i;
>  int * D.1245;
>  unsigned int D.1244;
>  unsigned int D.1243;
>  int D.1242;
>  int * D.1241;
>
> <bb 2>:
>  if (start_3(D) < end_5(D))
>    goto <bb 3>;
>  else
>    goto <bb 6>;
>
> <bb 3>:
>  pretmp.17_23 = sp_6(D)->data;
>  D.1287_27 = (unsigned int) end_5(D);
>  D.1288_28 = (unsigned int) start_3(D);
>  D.1289_29 = D.1287_27 - D.1288_28;
>  D.1290_30 = (int) D.1289_29;
>
> <bb 4>:
>  # i_20 = PHI <i_12(5), 0(3)>
>  D.1241_7 = pretmp.17_23;
>  D.1284_26 = (unsigned int) start_3(D);
>  D.1285_25 = (unsigned int) i_20;
>  D.1286_24 = D.1284_26 + D.1285_25;
>  MEM[base: pretmp.17_23, index: D.1286_24, step: 4] = 0;
>  i_12 = i_20 + 1;
>  if (i_12 != D.1290_30)
>    goto <bb 5>;
>  else
>    goto <bb 6>;
>
> <bb 5>:
>  goto <bb 4>;
>
> <bb 6>:
>  return;
>
> }
>
> The correct single induction variable as been identified here. This is
> not
> a loop header copying problem either. If we disable loop header copying,
> we
> still get multiple induction variables created. In fact in the above
> case
> loop header copying correctly enables post-increment mode on our port.
>
> testAutoIncStruct (struct struct_t * sp, int start, int end)
> {
>  unsigned int D.1282;
>  unsigned int ivtmp.31;
>  unsigned int ivtmp.29;
>  int i;
>  int * D.1245;
>  unsigned int D.1244;
>  unsigned int D.1243;
>  int D.1242;
>  int * D.1241;
>
> <bb 2>:
>  ivtmp.29_18 = (unsigned int) start_3(D);
>  D.1282_21 = (unsigned int) start_3(D);
>  ivtmp.31_22 = D.1282_21 * 4;
>  goto <bb 4>;
>
> <bb 3>:
>  D.1241_7 = sp_6(D)->data;
>  D.1244_10 = ivtmp.31_19;
>  D.1245_11 = D.1241_7 + D.1244_10;
>  *D.1245_11 = 0;
>  ivtmp.29_17 = ivtmp.29_8 + 1;
>  ivtmp.31_20 = ivtmp.31_19 + 4;
>
> <bb 4>:
>  # ivtmp.29_8 = PHI <ivtmp.29_18(2), ivtmp.29_17(3)>
>  # ivtmp.31_19 = PHI <ivtmp.31_22(2), ivtmp.31_20(3)>
>  D.1242_23 = (int) ivtmp.29_8;
>  if (D.1242_23 < end_5(D))
>    goto <bb 3>;
>  else
>    goto <bb 5>;
>
> <bb 5>:
>  return;
>
> }
>
> Does this imply we try and not copy propagate or FRE potential induction
> variables? Or is this simply a missed case in IVOpts?

Well, the situation is more complicated.  Initially FRE removes a
full redundancy which loop-header copying turns into a loop
carried dependency.  So for example moving loop header copying
before FRE avoids this issue as well (which would be a good idea
for other reasons as well).  So, at the point of FRE there is no
way to avoid the situation with the current pass ordering.

Richard.

Reply via email to