https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89152

            Bug ID: 89152
           Summary: Wrapping values in structures can make the optimizer
                    blind
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: m...@nieper-wisskirchen.de
  Target Milestone: ---

GCC compiles the following C module

**
typedef void (*Cont) (void *f, int a);

int quux (int a);

static void g (Cont c, Cont d, int a)
{
    if (quux (a))
      g (c, d, a + 1);
    ((Cont) c) (d, a);
}

void bar (Cont, int a);

static void h (Cont d, int a)
{
    if (d != (Cont) bar)
      ((Cont) d) (d, a);
}

void foo (int a)
{
    g ((Cont) h, (Cont) bar, a);
}
**

to

**
h:
        cmpq    $bar, %rdi
        je      .L1
        jmp     *%rdi
.L1:
        ret
g.constprop.0:
        pushq   %rbx
        movl    %edi, %ebx
.L6:
        movl    %ebx, %edi
        call    quux
        testl   %eax, %eax
        jne     .L8
        popq    %rbx
        ret
.L8:
        addl    $1, %ebx
        jmp     .L6
foo:
        jmp     g.constprop.0
**

Apart from the fact that `h' should be eliminated (see bug ipa/89139), the
resulting code looks rather optimal (maybe except for the unnecessary jump from
f into g).

However, when I wrap the functions pointers that are passed around into a
structure (to avoid having to do typecasts in the code above, for example) as
in the following module

**
typedef struct cont
{
    void (*f) (struct cont, int a);
} Cont;

int quux (int a);

static void g (Cont c, Cont d, int a)
{
    if (quux (a))
      g (c, d, a + 1);
    c.f (d, a);
}

void bar (struct cont, int a);

static void h (Cont d, int a)
{
    if (d.f != bar)
      d.f (d, a);
}

void foo (int a)
{
    g ((Cont) { h }, (Cont) { bar }, a);
}
**

a lot of optimizations are missed:

**
h:
        cmpq    $bar, %rdi
        je      .L1
        jmp     *%rdi
.L1:
        ret
g.constprop.0:
        pushq   %r13
        movq    %rdi, %r13
        movl    %edx, %edi
        pushq   %r12
        movq    %rsi, %r12
        pushq   %rbp
        movl    %edx, %ebp
        call    quux
        testl   %eax, %eax
        jne     .L10
        movl    %ebp, %esi
        movq    %r12, %rdi
        popq    %rbp
        popq    %r12
        popq    %r13
        jmp     h
.L10:
        movq    %r12, %rsi
        movq    %r13, %rdi
        leal    1(%rbp), %edx
        call    g.constprop.0
        movl    %ebp, %esi
        movq    %r12, %rdi
        popq    %rbp
        popq    %r12
        popq    %r13
        jmp     h
foo:
        movl    %edi, %edx
        movl    $bar, %esi
        movl    $h, %edi
        jmp     g.constprop.0
**

Both compilations were done at -O2; -O3 makes things no better.

For a comparison, clang optimizes also the latter code, namely to:

**
foo:                                    # @foo
        pushq   %rbx
        movl    %edi, %ebx
.LBB0_1:                                # =>This Inner Loop Header: Depth=1
        movl    %ebx, %edi
        callq   quux
        addl    $1, %ebx
        testl   %eax, %eax
        jne     .LBB0_1
        popq    %rbx
        retq
**

Reply via email to