https://gcc.gnu.org/bugzilla/show_bug.cgi?id=121600

            Bug ID: 121600
           Summary: repeated function call inside switch-case generates
                    more code
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: federico at kircheis dot it
  Target Milestone: ---

Not a real bug, but a possible area of improvement.

Consider the following example

----
#include <string>
#include <string_view>

enum class MyEnum{
  e1,
  e2,
  e3,
  e4,
  e5,
  e6,
  e7,
};

#define BLIND_PINDIALOG_PARAMS( X )                                    \
/*  e  ,v1 ,v2        ,v3*/     \
  X(e1 ,6  ,"15"      ,6       )\
  X(e2 ,7  ,"13"      ,6       )\
  X(e3 ,7  ,"15"      ,6       )\
  X(e4 ,6  ,"14"      ,5       )\
  X(e5 ,10 ,"normal"  ,10      )\
  X(e6 ,2  ,"13"      ,2       )\
  X(e7 ,12 ,"normal"  ,12      )\
  static_assert( true )

std::string foo();
std::string bar(std::string_view, int, std::string_view, std::string_view,
int);

  std::string baz1( MyEnum en, std::string_view sv ) {
#define CASE_ASSIGN( e, v1, v2, v3 ) case MyEnum::e: return bar( "bar", v1, v2,
sv, v3 );

    switch ( en ) {
      BLIND_PINDIALOG_PARAMS( CASE_ASSIGN );
      default: return foo();
    }
#undef CASE_ASSIGN
  }


  std::string baz2( MyEnum en, std::string_view sv ) {
    int v1;
    std::string_view v2;
    int v3;
#define CASE_ASSIGN( e, v1_, v2_, v3_ ) case MyEnum::e: {v1=v1_; v2=v2_;
v3=v3_; break;}

    switch ( en ) {
      BLIND_PINDIALOG_PARAMS( CASE_ASSIGN );
      default: return foo();
    }
#undef CASE_ASSIGN
    return bar( "bar", v1, v2, sv, v3 );  }
----

The function baz1 and baz2 behave the same, but baz1, which IMHO is a better
version of baz2 (1), but generates more code bloat.

Example on godbolt:
https://godbolt.org/z/E8jcd5354


It seems that for baz1, the stack is adjusted in every switch statement before
doing the function call to bar, while in baz2, the stack is adjusted only once.
Since at the end of every switch there is a jmp, I assume the compiler could
adjust the stack only once after the jmp.

In particular, for baz1

----
.L3:
        sub     rsp, 8
        mov     r8d, 6
        mov     r9d, OFFSET FLAT:.LC4
        push    12
        push    rcx
        mov     ecx, 12
        push    rdx
        jmp     .L13
.L10:
        sub     rsp, 8
        mov     r8d, 2
        mov     r9d, OFFSET FLAT:.LC0
        push    6
        push    rcx
        mov     ecx, 6
        push    rdx
        jmp     .L13


.L13:
        mov     rdi, rbx
        mov     esi, 3
        mov     edx, OFFSET FLAT:.LC1
        call    "bar[abi:cxx11](std::basic_string_view<char,
std::char_traits<char> >, int, std::basic_string_view<char,
std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char>
>, int)"
        add     rsp, 32
        mov     rax, rbx
        pop     rbx
        ret
----

while for baz2

----
.L10:
        mov     esi, 2
        mov     r8d, OFFSET FLAT:.LC0
        mov     edi, 6
        mov     eax, 6
        jmp     .L8
.L9:
        mov     esi, 2
        mov     r8d, OFFSET FLAT:.LC3
        mov     edi, 6
        mov     eax, 7
        jmp     .L8


.L8:
        sub     rsp, 8
        mov     r9, r8
        mov     r8, rsi
        mov     esi, 3
        push    rdi
        mov     rdi, rbx
        push    rcx
        mov     ecx, eax
        push    rdx
        mov     edx, OFFSET FLAT:.LC4
        call    "bar[abi:cxx11](std::basic_string_view<char,
std::char_traits<char> >, int, std::basic_string_view<char,
std::char_traits<char> >, std::basic_string_view<char, std::char_traits<char>
>, int)"
        add     rsp, 32
        mov     rax, rbx
        pop     rbx
        ret
----

Since there are "many" switch cases, the duplicated generated code accumulates.
Maybe if there where only a couple of enum values, duplicating some code might
lead to less code.
Ideally the programmer should not think about this, and GCC would produce the
optimal code for both baz1 and baz2.


Note that using "-fno-exceptions" or marking functions as "noexcept" does not
improve the generated code of baz1.


(1) I'm convinced that baz1 it is a better version of baz2 because it has no
temporary variables and less code.

In my opinion it is also less error prone (no chance to assign wrong variable,
or forget the break, ...)

Last but not least, the temporary variables are not expensive to create in this
example, but it is in general not the case.

Reply via email to