https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112510

--- Comment #7 from Vladimir Sadovnikov <sadko4u at gmail dot com> ---
Disassembly for GCC 11.4.0:

```
00000000000011e9 <_Z13gate_x1_curvePfPKfPKN3dsp11gate_knee_tEm>:
    11e9:       f3 0f 1e fa             endbr64 
    11ed:       55                      push   %rbp
    11ee:       48 89 e5                mov    %rsp,%rbp
    11f1:       41 57                   push   %r15
    11f3:       41 56                   push   %r14
    11f5:       41 55                   push   %r13
    11f7:       41 54                   push   %r12
    11f9:       53                      push   %rbx
    11fa:       48 83 e4 c0             and    $0xffffffffffffffc0,%rsp
    11fe:       48 81 ec c0 03 00 00    sub    $0x3c0,%rsp
    1205:       49 89 fc                mov    %rdi,%r12
    1208:       49 89 f5                mov    %rsi,%r13
    120b:       49 89 d7                mov    %rdx,%r15
    120e:       49 89 ce                mov    %rcx,%r14
    1211:       48 8d 5c 24 20          lea    0x20(%rsp),%rbx
    1216:       48 89 5c 24 18          mov    %rbx,0x18(%rsp)
    121b:       83 3d ee 2d 00 00 00    cmpl   $0x0,0x2dee(%rip)        # 4010
<__asan_option_detect_stack_use_after_return@@Base>
    1222:       0f 85 65 01 00 00       jne    138d
<_Z13gate_x1_curvePfPKfPKN3dsp11gate_knee_tEm+0x1a4>
    1228:       48 c7 03 b3 8a b5 41    movq   $0x41b58ab3,(%rbx)
    122f:       48 8d 05 d2 0d 00 00    lea    0xdd2(%rip),%rax        # 2008
<_IO_stdin_used+0x8>
    1236:       48 89 43 08             mov    %rax,0x8(%rbx)
    123a:       48 8d 05 a8 ff ff ff    lea    -0x58(%rip),%rax        # 11e9
<_Z13gate_x1_curvePfPKfPKN3dsp11gate_knee_tEm>
    1241:       48 89 43 10             mov    %rax,0x10(%rbx)
    1245:       48 89 d8                mov    %rbx,%rax
    1248:       48 c1 e8 03             shr    $0x3,%rax
    124c:       c7 80 00 80 ff 7f f1    movl   $0xf1f1f1f1,0x7fff8000(%rax)
    1253:       f1 f1 f1 
    1256:       c7 80 08 80 ff 7f f2    movl   $0xf2f2f2f2,0x7fff8008(%rax)
    125d:       f2 f2 f2 
    1260:       c7 80 1c 80 ff 7f f2    movl   $0xf2f2f2f2,0x7fff801c(%rax)
    1267:       f2 f2 f2 
    126a:       c7 80 20 80 ff 7f f2    movl   $0xf2f2f2f2,0x7fff8020(%rax)
    1271:       f2 f2 f2 
    1274:       c7 80 64 80 ff 7f f3    movl   $0xf3f3f3f3,0x7fff8064(%rax)
    127b:       f3 f3 f3 
    127e:       c7 80 68 80 ff 7f f3    movl   $0xf3f3f3f3,0x7fff8068(%rax)
    1285:       f3 f3 f3 
    1288:       c7 80 6c 80 ff 7f f3    movl   $0xf3f3f3f3,0x7fff806c(%rax)
    128f:       f3 f3 f3 
    1292:       64 48 8b 14 25 28 00    mov    %fs:0x28,%rdx
    1299:       00 00 
    129b:       48 89 94 24 b8 03 00    mov    %rdx,0x3b8(%rsp)
    12a2:       00 
    12a3:       31 d2                   xor    %edx,%edx
    12a5:       62 d2 7d 48 18 07       vbroadcastss (%r15),%zmm0
    12ab:       62 d2 7d 48 18 4f 01    vbroadcastss 0x4(%r15),%zmm1
    12b2:       62 d2 7d 48 18 57 02    vbroadcastss 0x8(%r15),%zmm2
    12b9:       62 d2 7d 48 18 5f 03    vbroadcastss 0xc(%r15),%zmm3
    12c0:       62 d2 7d 48 18 67 04    vbroadcastss 0x10(%r15),%zmm4
    12c7:       62 d2 7d 48 18 6f 05    vbroadcastss 0x14(%r15),%zmm5
    12ce:       62 d2 7d 48 18 77 06    vbroadcastss 0x18(%r15),%zmm6
    12d5:       62 d2 7d 48 18 7f 07    vbroadcastss 0x1c(%r15),%zmm7
    12dc:       62 f1 7c 48 29 83 20    vmovaps %zmm0,0x120(%rbx)
    12e3:       01 00 00 
    12e6:       62 f1 7c 48 29 8b 60    vmovaps %zmm1,0x160(%rbx)
    12ed:       01 00 00 
    12f0:       62 f1 7c 48 29 93 a0    vmovaps %zmm2,0x1a0(%rbx)
    12f7:       01 00 00 
    12fa:       62 f1 7c 48 29 9b e0    vmovaps %zmm3,0x1e0(%rbx)
    1301:       01 00 00 
    1304:       62 f1 7c 48 29 a3 20    vmovaps %zmm4,0x220(%rbx)
    130b:       02 00 00 
    130e:       62 f1 7c 48 29 ab 60    vmovaps %zmm5,0x260(%rbx)
    1315:       02 00 00 
    1318:       62 f1 7c 48 29 b3 a0    vmovaps %zmm6,0x2a0(%rbx)
    131f:       02 00 00 
    1322:       62 f1 7c 48 29 bb e0    vmovaps %zmm7,0x2e0(%rbx)
    1329:       02 00 00 
```

Here we have offsets multiple of 0x20 but not multiple of 0x40 but missing
strange load of %rbx from stack. It is reasonable because of this instruction:
```
    1211:       48 8d 5c 24 20          lea    0x20(%rsp),%rbx
```

All works fine.

gcc --version
gcc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0
Copyright (C) 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Reply via email to