[Bug tree-optimization/87205] Inefficient code generation for switch

pdimov at gmail dot com Tue, 04 Sep 2018 12:45:01 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87205


--- Comment #9 from Peter Dimov <pdimov at gmail dot com> ---
For more context, see https://godbolt.org/z/SzfpKr

```
#include <type_traits>

template<class... T> struct variant
{
    std::aligned_union_t<0, T...> storage_;
    unsigned index_;
};

template<class T0, class T1, class T2, class T3, class T4, class T5, class F>
auto visit( variant<T0, T1, T2, T3, T4, T5>& v, F f )
{
    switch( v.index_ )
    {
        case 0: return f( (T0&)v.storage_ );
        case 1: return f( (T1&)v.storage_ );
        case 2: return f( (T2&)v.storage_ );
        case 3: return f( (T3&)v.storage_ );
        case 4: return f( (T4&)v.storage_ );
        case 5: return f( (T5&)v.storage_ );
        default: __builtin_unreachable();
    }
}

struct X
{
    int v;
};

template<int I> struct Y: X
{
};

using V = variant<Y<0>, Y<1>, Y<2>, Y<3>, Y<4>, Y<5>>;

void f( X& );
int g( int );

int h1( V& v )
{
    return visit( v, [](X const& x){ return x.v; } );
}

int h2( V& v )
{
    return visit( v, [](auto&& x){ return x.v; } );
}

void h3( V& v )
{
    return visit( v, [](auto&& x){ f(x); } );
}

int h4( V& v )
{
    return visit( v, [](auto&& x){ return g(x.v); } );
}
```

This generates

```
h1(variant<Y<0>, Y<1>, Y<2>, Y<3>, Y<4>, Y<5> >&):
  mov eax, DWORD PTR [rdi]
  ret
h2(variant<Y<0>, Y<1>, Y<2>, Y<3>, Y<4>, Y<5> >&):
  mov eax, DWORD PTR [rdi]
  ret
h3(variant<Y<0>, Y<1>, Y<2>, Y<3>, Y<4>, Y<5> >&):
  cmp DWORD PTR [rdi+4], 5
  jbe .L15
.L15:
  jmp f(X&)
h4(variant<Y<0>, Y<1>, Y<2>, Y<3>, Y<4>, Y<5> >&):
  cmp DWORD PTR [rdi+4], 5
  jbe .L19
.L19:
  mov edi, DWORD PTR [rdi]
  jmp g(int)
```

so the member access is folded in both cases (which is good!), even though the
first occurs through X& and the second through Y<I>&.

I've been unable to determine what makes the optimizations misfire. This code
should in principle be the same as the simplified one, but it isn't.

[Bug tree-optimization/87205] Inefficient code generation for switch

Reply via email to