https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91501

            Bug ID: 91501
           Summary: Stack Optimization bug on function and lambda return
           Product: gcc
           Version: 9.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: baptiste.cartier at ertosgener dot com
  Target Milestone: ---

Created attachment 46735
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=46735&action=edit
Preprocessed test file

When calling a function or a lambda to create function arguments, the stack is
not reused properly.

This bug was discovered using the 8-2019-q3-update of ARM GCC, but it appears
it also exists in GCC.

Discussion about this bug can be found here (for the ARM version of GCC) :
https://community.arm.com/developer/tools-software/tools/f/arm-compilers-forum/13366/arm-gcc-lambda-optimization/159463#159463

Link to online compiler test case : https://godbolt.org/z/hx2cEU

Attached is the preprocessed test case.

The problem with the following example code is :

wrapper2LAMBDA uses 32 bytes more of stack than wrapper1LAMBDA (ie the size of
the structure), but it should not, the stack should be reused for further
structures.

----------------------------------------
struct TestStruct
    {
        uint32_t field1;
        uint32_t field2;
        uint32_t field3;
        uint32_t field4;

        uint32_t field11;
        uint32_t field21;
        uint32_t field31;
        uint32_t field41;
    } ;

    struct TestStruct initStructure(uint32_t f1, uint32_t f2, uint32_t f3,
uint32_t f4,uint32_t f11, uint32_t f21, uint32_t f31, uint32_t f41)
    {
        struct TestStruct myStruct;
        myStruct.field1 = f1;
        myStruct.field2 = f2;
        myStruct.field3 = f3;
        myStruct.field4 = f4;

        myStruct.field11 = f11;
        myStruct.field21 = f21;
        myStruct.field31 = f31;
        myStruct.field41 = f41;

        return myStruct;
    }

#define MACROLAMBDA(f1,f2,f3,f4,f5,f6,f7,f8) \
    [&]() -> struct TestStruct { struct TestStruct ${}; \
    $.field1 = f1; \
    $.field2 = f2; \
    $.field3 = f3; \
    $.field4 = f4; \
    $.field11 = f5; \
    $.field21 = f6; \
    $.field31 = f7; \
    $.field41 = f8; \
    return $; \
}()

    void __attribute__((noinline)) doStuff(struct TestStruct myStruct)
    {
        printf("f1 = %d, f2 = %d, f3 = %d, f4 = %d, f1 = %d, f2 = %d, f3 = %d,
f4 = %d", myStruct.field1, myStruct.field2, myStruct.field3,
myStruct.field4,myStruct.field11, myStruct.field21, myStruct.field31,
myStruct.field41);
    }

    void __attribute__((noinline)) wrapper2LAMBDA(void)
    {
        doStuff(MACROLAMBDA(1,2,3,4,5,6,7,8));
        doStuff(MACROLAMBDA(11,22,33,44,55,66,77,88));
    }

    void __attribute__((noinline)) wrapper1LAMBDA(void)
    {
        doStuff(MACROLAMBDA(1,2,3,4,5,6,7,8));
    }

---------------------------

The assembly generated for both functions are :

----------------------------------------------
_Z14wrapper2LAMBDAv:
        movabs  rax, 8589934593
        sub     rsp, 72
        mov     QWORD PTR [rsp], rax
        movabs  rax, 17179869187
        mov     QWORD PTR [rsp+8], rax
        movabs  rax, 25769803781
        mov     QWORD PTR [rsp+16], rax
        movabs  rax, 34359738375
        mov     QWORD PTR [rsp+24], rax
        push    QWORD PTR [rsp+24]
        push    QWORD PTR [rsp+24]
        push    QWORD PTR [rsp+24]
        push    QWORD PTR [rsp+24]
        call    _Z7doStuff10TestStruct
        movabs  rax, 94489280523
        mov     QWORD PTR [rsp+64], rax
        movabs  rax, 188978561057
        mov     QWORD PTR [rsp+72], rax
        movabs  rax, 283467841591
        mov     QWORD PTR [rsp+80], rax
        movabs  rax, 377957122125
        mov     QWORD PTR [rsp+88], rax
        add     rsp, 32
        push    QWORD PTR [rsp+56]
        push    QWORD PTR [rsp+56]
        push    QWORD PTR [rsp+56]
        push    QWORD PTR [rsp+56]
        call    _Z7doStuff10TestStruct
        add     rsp, 104
        ret
_Z14wrapper1LAMBDAv:
        movabs  rax, 8589934593
        sub     rsp, 40
        mov     QWORD PTR [rsp], rax
        movabs  rax, 17179869187
        mov     QWORD PTR [rsp+8], rax
        movabs  rax, 25769803781
        mov     QWORD PTR [rsp+16], rax
        movabs  rax, 34359738375
        mov     QWORD PTR [rsp+24], rax
        push    QWORD PTR [rsp+24]
        push    QWORD PTR [rsp+24]
        push    QWORD PTR [rsp+24]
        push    QWORD PTR [rsp+24]
        call    _Z7doStuff10TestStruct
        add     rsp, 72
        ret
------------------------------------------

"sub     rsp, 72 " for wrapper2LAMBDA and "sub     rsp, 40" for wrapper1LAMBDA
is what makes me believe stack is not reused, or that there is some dead stack.

The same behavior happen with ARM ASM

"sub    sp, sp, #80" for wrapper2LAMBDA and "sub        sp, sp, #52" for
wrapper1LAMBDA.

This also happens with function returns.

Compiler versions : GCC 9.2.0 and 8-2019-q3-update
Compiled on a Windows 10 (with target STM32L4 in mind)
Command line :
-O2 -x c++ -std=c++17 -D"STM32" -D"STM32L4" -D"STM32L476xx" -D"STM32L476ZG"
-D"ARM_MATH_CM4" -D"RELEASE" -D"NDEBUG" -D"USE_STM32L4XX_NUCLEO_64" -Wall
-Wextra -Wno-comment -Wno-ignored-qualifiers -Wno-implicit-fallthrough
-Wno-missing-field-initializers -Wno-overflow -Wno-sign-compare
-Wno-type-limits -Wno-unused-parameter -Wpointer-arith -save-temps=obj -c
-Wno-register -x c++ -fno-rtti -fno-exceptions -fpermissive -ggdb
-fsingle-precision-constant -fmessage-length=0 -fvar-tracking -O2
-ffreestanding -fdata-sections -ffunction-sections -nostartfiles

with warning :
main.c:159:16: warning: format '%d' expects argument of type 'int', but
argument 3 has type 'uint32_t' {aka 'long unsigned int'} [-Wformat=]

which is to be expected but should not be related to the bug.

Reply via email to