https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110946

--- Comment #1 from Dave Rodgman <dave.rodgman at arm dot com> ---
Disassembly under -Os:

000000000000139c <mbedtls_aes_crypt_xts>:
    139c:       a9b67bfd        stp     x29, x30, [sp, #-160]!
    13a0:       910003fd        mov     x29, sp
    13a4:       a9046bf9        stp     x25, x26, [sp, #64]
    13a8:       aa0003f9        mov     x25, x0
    13ac:       90000000        adrp    x0, 0 <__stack_chk_guard>
    13b0:       a90153f3        stp     x19, x20, [sp, #16]
    13b4:       f9400000        ldr     x0, [x0]
    13b8:       a9025bf5        stp     x21, x22, [sp, #32]
    13bc:       2a0103f6        mov     w22, w1
    13c0:       a90363f7        stp     x23, x24, [sp, #48]
    13c4:       a90573fb        stp     x27, x28, [sp, #80]
    13c8:       f9400001        ldr     x1, [x0]
    13cc:       f9004fe1        str     x1, [sp, #152]
    13d0:       d2800001        mov     x1, #0x0                        // #0
    13d4:       710006df        cmp     w22, #0x1
    13d8:       54000c28        b.hi    155c <mbedtls_aes_crypt_xts+0x1c0>  //
b.pmore
    13dc:       d1004041        sub     x1, x2, #0x10
    13e0:       aa0203f3        mov     x19, x2
    13e4:       b27c4fe0        mov     x0, #0xfffff0                   //
#16777200
    13e8:       eb00003f        cmp     x1, x0
    13ec:       54000bc8        b.hi    1564 <mbedtls_aes_crypt_xts+0x1c8>  //
b.pmore
    13f0:       9101a3f5        add     x21, sp, #0x68
    13f4:       aa0303e2        mov     x2, x3
    13f8:       aa0403f8        mov     x24, x4
    13fc:       aa0503f7        mov     x23, x5
    1400:       aa1503e3        mov     x3, x21
    1404:       91048320        add     x0, x25, #0x120
    1408:       52800021        mov     w1, #0x1                        // #1
    140c:       94000000        bl      1210 <mbedtls_aes_crypt_ecb>
    1410:       2a0003f4        mov     w20, w0
    1414:       35000540        cbnz    w0, 14bc <mbedtls_aes_crypt_xts+0x120>
    1418:       520002db        eor     w27, w22, #0x1
    141c:       d344fe7a        lsr     x26, x19, #4
    1420:       1200037b        and     w27, w27, #0x1
    1424:       92400e73        and     x19, x19, #0xf
    1428:       910223fc        add     x28, sp, #0x88
    142c:       d100075a        sub     x26, x26, #0x1
    1430:       b100075f        cmn     x26, #0x1
    1434:       54000541        b.ne    14dc <mbedtls_aes_crypt_xts+0x140>  //
b.any
    1438:       b4000433        cbz     x19, 14bc <mbedtls_aes_crypt_xts+0x120>
    143c:       710002df        cmp     w22, #0x0
    1440:       d10042fb        sub     x27, x23, #0x10
    1444:       9101e3fa        add     x26, sp, #0x78
    1448:       aa1303e2        mov     x2, x19
    144c:       9a95035a        csel    x26, x26, x21, eq  // eq = none
    1450:       aa1b03e1        mov     x1, x27
    1454:       910223f5        add     x21, sp, #0x88
    1458:       aa1703e0        mov     x0, x23
    145c:       94000000        bl      0 <memmove>
    1460:       d2800217        mov     x23, #0x10                      // #16
    1464:       aa1303e3        mov     x3, x19
    1468:       aa1a03e2        mov     x2, x26
    146c:       aa1803e1        mov     x1, x24
    1470:       aa1503e0        mov     x0, x21
    1474:       94000000        bl      0 <mbedtls_xor>
    1478:       cb1302e3        sub     x3, x23, x19
    147c:       8b130342        add     x2, x26, x19
    1480:       8b130361        add     x1, x27, x19
    1484:       8b1302a0        add     x0, x21, x19
    1488:       94000000        bl      0 <mbedtls_xor>
    148c:       aa1503e3        mov     x3, x21
    1490:       aa1503e2        mov     x2, x21
    1494:       2a1603e1        mov     w1, w22
    1498:       aa1903e0        mov     x0, x25
    149c:       94000000        bl      1210 <mbedtls_aes_crypt_ecb>
    14a0:       2a0003f4        mov     w20, w0
    14a4:       350000c0        cbnz    w0, 14bc <mbedtls_aes_crypt_xts+0x120>
    14a8:       aa1703e3        mov     x3, x23
    14ac:       aa1a03e2        mov     x2, x26
    14b0:       aa1503e1        mov     x1, x21
    14b4:       aa1b03e0        mov     x0, x27
    14b8:       94000000        bl      0 <mbedtls_xor>
    14bc:       90000000        adrp    x0, 0 <__stack_chk_guard>
    14c0:       f9400000        ldr     x0, [x0]
    14c4:       f9404fe2        ldr     x2, [sp, #152]
    14c8:       f9400001        ldr     x1, [x0]
    14cc:       eb010042        subs    x2, x2, x1
    14d0:       d2800001        mov     x1, #0x0                        // #0
    14d4:       54000500        b.eq    1574 <mbedtls_aes_crypt_xts+0x1d8>  //
b.none
    14d8:       94000000        bl      0 <__stack_chk_fail>
    14dc:       f100027f        cmp     x19, #0x0
    14e0:       1a9f07e0        cset    w0, ne  // ne = any
    14e4:       6a1b001f        tst     w0, w27
    14e8:       540000e0        b.eq    1504 <mbedtls_aes_crypt_xts+0x168>  //
b.none
    14ec:       b50000da        cbnz    x26, 1504 <mbedtls_aes_crypt_xts+0x168>
    14f0:       a94687e0        ldp     x0, x1, [sp, #104]
    14f4:       a90787e0        stp     x0, x1, [sp, #120]
    14f8:       aa1503e1        mov     x1, x21
    14fc:       aa1503e0        mov     x0, x21
    1500:       97fffb63        bl      28c <mbedtls_gf128mul_x_ble>
    1504:       aa1503e2        mov     x2, x21
    1508:       aa1803e1        mov     x1, x24
    150c:       aa1c03e0        mov     x0, x28
    1510:       d2800203        mov     x3, #0x10                       // #16
    1514:       94000000        bl      0 <mbedtls_xor>
    1518:       aa1c03e3        mov     x3, x28
    151c:       aa1c03e2        mov     x2, x28
    1520:       2a1603e1        mov     w1, w22
    1524:       aa1903e0        mov     x0, x25
    1528:       94000000        bl      1210 <mbedtls_aes_crypt_ecb>
    152c:       35000200        cbnz    w0, 156c <mbedtls_aes_crypt_xts+0x1d0>
    1530:       aa1503e2        mov     x2, x21
    1534:       d2800203        mov     x3, #0x10                       // #16
    1538:       aa1703e0        mov     x0, x23
    153c:       aa1c03e1        mov     x1, x28
    1540:       94000000        bl      0 <mbedtls_xor>
    1544:       910042f7        add     x23, x23, #0x10
    1548:       aa1503e1        mov     x1, x21
    154c:       aa1503e0        mov     x0, x21
    1550:       91004318        add     x24, x24, #0x10
    1554:       97fffb4e        bl      28c <mbedtls_gf128mul_x_ble>
    1558:       17ffffb5        b       142c <mbedtls_aes_crypt_xts+0x90>
    155c:       12800414        mov     w20, #0xffffffdf                // #-33
    1560:       17ffffd7        b       14bc <mbedtls_aes_crypt_xts+0x120>
    1564:       12800434        mov     w20, #0xffffffde                // #-34
    1568:       17ffffd5        b       14bc <mbedtls_aes_crypt_xts+0x120>
    156c:       2a0003f4        mov     w20, w0
    1570:       17ffffd3        b       14bc <mbedtls_aes_crypt_xts+0x120>
    1574:       2a1403e0        mov     w0, w20
    1578:       a94153f3        ldp     x19, x20, [sp, #16]
    157c:       a9425bf5        ldp     x21, x22, [sp, #32]
    1580:       a94363f7        ldp     x23, x24, [sp, #48]
    1584:       a9446bf9        ldp     x25, x26, [sp, #64]
    1588:       a94573fb        ldp     x27, x28, [sp, #80]
    158c:       a8ca7bfd        ldp     x29, x30, [sp], #160
    1590:       d65f03c0        ret

    Disassembly for mbedtls_gf128mul_x_ble:
000000000000028c <mbedtls_gf128mul_x_ble>:
     28c:       a9be7bfd        stp     x29, x30, [sp, #-32]!
     290:       910003fd        mov     x29, sp
     294:       a90153f3        stp     x19, x20, [sp, #16]
     298:       aa0003f3        mov     x19, x0
     29c:       a9400823        ldp     x3, x2, [x1]
     2a0:       52800101        mov     w1, #0x8                        // #8
     2a4:       93c3fc54        extr    x20, x2, x3, #63
     2a8:       d37ffc42        lsr     x2, x2, #63
     2ac:       4b020c22        sub     w2, w1, w2, lsl #3
     2b0:       528010e1        mov     w1, #0x87                       // #135
     2b4:       1ac22821        asr     w1, w1, w2
     2b8:       93407c21        sxtw    x1, w1
     2bc:       ca030421        eor     x1, x1, x3, lsl #1
     2c0:       94000000        bl      0 <mbedtls_put_unaligned_uint64>
     2c4:       aa1403e1        mov     x1, x20
     2c8:       91002260        add     x0, x19, #0x8
     2cc:       a94153f3        ldp     x19, x20, [sp, #16]
     2d0:       a8c27bfd        ldp     x29, x30, [sp], #32
     2d4:       14000000        b       0 <mbedtls_put_unaligned_uint64>


and under -O2:

    Disassembly for mbedtls_aes_crypt_xts:
0000000000001500 <mbedtls_aes_crypt_xts>:
    1500:       a9b57bfd        stp     x29, x30, [sp, #-176]!
    1504:       90000006        adrp    x6, 0 <__stack_chk_guard>
    1508:       910003fd        mov     x29, sp
    150c:       f94000c6        ldr     x6, [x6]
    1510:       a90153f3        stp     x19, x20, [sp, #16]
    1514:       2a0103f4        mov     w20, w1
    1518:       f94000c1        ldr     x1, [x6]
    151c:       f90057e1        str     x1, [sp, #168]
    1520:       d2800001        mov     x1, #0x0                        // #0
    1524:       7100069f        cmp     w20, #0x1
    1528:       54001348        b.hi    1790 <mbedtls_aes_crypt_xts+0x290>  //
b.pmore
    152c:       d1004041        sub     x1, x2, #0x10
    1530:       a9025bf5        stp     x21, x22, [sp, #32]
    1534:       aa0003f5        mov     x21, x0
    1538:       aa0203f6        mov     x22, x2
    153c:       b27c4fe0        mov     x0, #0xfffff0                   //
#16777200
    1540:       eb00003f        cmp     x1, x0
    1544:       54001208        b.hi    1784 <mbedtls_aes_crypt_xts+0x284>  //
b.pmore
    1548:       aa0503f3        mov     x19, x5
    154c:       a90363f7        stp     x23, x24, [sp, #48]
    1550:       aa0303f7        mov     x23, x3
    1554:       a90573fb        stp     x27, x28, [sp, #80]
    1558:       aa0403fb        mov     x27, x4
    155c:       94000000        bl      0 <mbedtls_aesce_has_support>
    1560:       910482a3        add     x3, x21, #0x120
    1564:       9101e3fc        add     x28, sp, #0x78
    1568:       35000ec0        cbnz    w0, 1740 <mbedtls_aes_crypt_xts+0x240>
    156c:       aa1703e1        mov     x1, x23
    1570:       aa0303e0        mov     x0, x3
    1574:       aa1c03e2        mov     x2, x28
    1578:       94000000        bl      9c0 <mbedtls_internal_aes_encrypt>
    157c:       35000760        cbnz    w0, 1668 <mbedtls_aes_crypt_xts+0x168>
    1580:       f2400ec0        ands    x0, x22, #0xf
    1584:       d344fec4        lsr     x4, x22, #4
    1588:       52000298        eor     w24, w20, #0x1
    158c:       f90037e0        str     x0, [sp, #104]
    1590:       1a9f07e0        cset    w0, ne  // ne = any
    1594:       52800117        mov     w23, #0x8                       // #8
    1598:       0a000318        and     w24, w24, w0
    159c:       528010f6        mov     w22, #0x87                      // #135
    15a0:       a9046bf9        stp     x25, x26, [sp, #64]
    15a4:       d1000499        sub     x25, x4, #0x1
    15a8:       910263fa        add     x26, sp, #0x98
    15ac:       14000014        b       15fc <mbedtls_aes_crypt_xts+0xfc>
    15b0:       3dc00341        ldr     q1, [x26]
    15b4:       aa1303e3        mov     x3, x19
    15b8:       3dc00380        ldr     q0, [x28]
    15bc:       d1000739        sub     x25, x25, #0x1
    15c0:       91004366        add     x6, x27, #0x10
    15c4:       6e211c00        eor     v0.16b, v0.16b, v1.16b
    15c8:       3c810460        str     q0, [x3], #16
    15cc:       a94797e1        ldp     x1, x5, [sp, #120]
    15d0:       d37ffca2        lsr     x2, x5, #63
    15d4:       93c1fca5        extr    x5, x5, x1, #63
    15d8:       4b020ee2        sub     w2, w23, w2, lsl #3
    15dc:       1ac22ac2        asr     w2, w22, w2
    15e0:       93407c42        sxtw    x2, w2
    15e4:       ca010441        eor     x1, x2, x1, lsl #1
    15e8:       a90797e1        stp     x1, x5, [sp, #120]
    15ec:       b100073f        cmn     x25, #0x1
    15f0:       54000440        b.eq    1678 <mbedtls_aes_crypt_xts+0x178>  //
b.none
    15f4:       aa0303f3        mov     x19, x3
    15f8:       aa0603fb        mov     x27, x6
    15fc:       f100033f        cmp     x25, #0x0
    1600:       7a400b04        ccmp    w24, #0x0, #0x4, eq  // eq = none
    1604:       54000aa1        b.ne    1758 <mbedtls_aes_crypt_xts+0x258>  //
b.any
    1608:       3dc00360        ldr     q0, [x27]
    160c:       aa1a03e3        mov     x3, x26
    1610:       3dc00381        ldr     q1, [x28]
    1614:       aa1a03e2        mov     x2, x26
    1618:       2a1403e1        mov     w1, w20
    161c:       aa1503e0        mov     x0, x21
    1620:       6e211c00        eor     v0.16b, v0.16b, v1.16b
    1624:       3d800340        str     q0, [x26]
    1628:       94000000        bl      12d0 <mbedtls_aes_crypt_ecb>
    162c:       34fffc20        cbz     w0, 15b0 <mbedtls_aes_crypt_xts+0xb0>
    1630:       a9425bf5        ldp     x21, x22, [sp, #32]
    1634:       a94363f7        ldp     x23, x24, [sp, #48]
    1638:       a9446bf9        ldp     x25, x26, [sp, #64]
    163c:       a94573fb        ldp     x27, x28, [sp, #80]
    1640:       90000001        adrp    x1, 0 <__stack_chk_guard>
    1644:       f9400021        ldr     x1, [x1]
    1648:       f94057e3        ldr     x3, [sp, #168]
    164c:       f9400022        ldr     x2, [x1]
    1650:       eb020063        subs    x3, x3, x2
    1654:       d2800002        mov     x2, #0x0                        // #0
    1658:       54000a01        b.ne    1798 <mbedtls_aes_crypt_xts+0x298>  //
b.any
    165c:       a94153f3        ldp     x19, x20, [sp, #16]
    1660:       a8cb7bfd        ldp     x29, x30, [sp], #176
    1664:       d65f03c0        ret
    1668:       a9425bf5        ldp     x21, x22, [sp, #32]
    166c:       a94363f7        ldp     x23, x24, [sp, #48]
    1670:       a94573fb        ldp     x27, x28, [sp, #80]
    1674:       17fffff3        b       1640 <mbedtls_aes_crypt_xts+0x140>
    1678:       f94037e2        ldr     x2, [sp, #104]
    167c:       b4fffda2        cbz     x2, 1630 <mbedtls_aes_crypt_xts+0x130>
    1680:       7100029f        cmp     w20, #0x0
    1684:       910223f6        add     x22, sp, #0x88
    1688:       9a9c02d6        csel    x22, x22, x28, eq  // eq = none
    168c:       aa0303e0        mov     x0, x3
    1690:       aa1303e1        mov     x1, x19
    1694:       91003f7b        add     x27, x27, #0xf
    1698:       94000000        bl      0 <memmove>
    169c:       d10006c5        sub     x5, x22, #0x1
    16a0:       d2800020        mov     x0, #0x1                        // #1
    16a4:       d503201f        nop
    16a8:       38606b62        ldrb    w2, [x27, x0]
    16ac:       8b000343        add     x3, x26, x0
    16b0:       386068a4        ldrb    w4, [x5, x0]
    16b4:       aa0003e1        mov     x1, x0
    16b8:       91000400        add     x0, x0, #0x1
    16bc:       4a040042        eor     w2, w2, w4
    16c0:       381ff062        sturb   w2, [x3, #-1]
    16c4:       f94037e2        ldr     x2, [sp, #104]
    16c8:       eb02003f        cmp     x1, x2
    16cc:       54fffee1        b.ne    16a8 <mbedtls_aes_crypt_xts+0x1a8>  //
b.any
    16d0:       d2800203        mov     x3, #0x10                       // #16
    16d4:       8b020265        add     x5, x19, x2
    16d8:       cb020063        sub     x3, x3, x2
    16dc:       8b0202c4        add     x4, x22, x2
    16e0:       8b020359        add     x25, x26, x2
    16e4:       d2800000        mov     x0, #0x0                        // #0
    16e8:       386068a1        ldrb    w1, [x5, x0]
    16ec:       38606882        ldrb    w2, [x4, x0]
    16f0:       4a020021        eor     w1, w1, w2
    16f4:       38206b21        strb    w1, [x25, x0]
    16f8:       91000400        add     x0, x0, #0x1
    16fc:       eb00007f        cmp     x3, x0
    1700:       54ffff41        b.ne    16e8 <mbedtls_aes_crypt_xts+0x1e8>  //
b.any
    1704:       2a1403e1        mov     w1, w20
    1708:       aa1503e0        mov     x0, x21
    170c:       aa1a03e3        mov     x3, x26
    1710:       aa1a03e2        mov     x2, x26
    1714:       94000000        bl      12d0 <mbedtls_aes_crypt_ecb>
    1718:       35fff8c0        cbnz    w0, 1630 <mbedtls_aes_crypt_xts+0x130>
    171c:       3dc002c0        ldr     q0, [x22]
    1720:       3dc00341        ldr     q1, [x26]
    1724:       6e211c00        eor     v0.16b, v0.16b, v1.16b
    1728:       3d800260        str     q0, [x19]
    172c:       a9425bf5        ldp     x21, x22, [sp, #32]
    1730:       a94363f7        ldp     x23, x24, [sp, #48]
    1734:       a9446bf9        ldp     x25, x26, [sp, #64]
    1738:       a94573fb        ldp     x27, x28, [sp, #80]
    173c:       17ffffc1        b       1640 <mbedtls_aes_crypt_xts+0x140>
    1740:       aa1703e2        mov     x2, x23
    1744:       aa0303e0        mov     x0, x3
    1748:       52800021        mov     w1, #0x1                        // #1
    174c:       aa1c03e3        mov     x3, x28
    1750:       94000000        bl      0 <mbedtls_aesce_crypt_ecb>
    1754:       17ffff8a        b       157c <mbedtls_aes_crypt_xts+0x7c>
    1758:       a94797e1        ldp     x1, x5, [sp, #120]
    175c:       a9478fe2        ldp     x2, x3, [sp, #120]
    1760:       a9088fe2        stp     x2, x3, [sp, #136]
    1764:       d37ffca0        lsr     x0, x5, #63
    1768:       93c1fca5        extr    x5, x5, x1, #63
    176c:       4b000ee0        sub     w0, w23, w0, lsl #3
    1770:       1ac02ac0        asr     w0, w22, w0
    1774:       93407c00        sxtw    x0, w0
    1778:       ca010401        eor     x1, x0, x1, lsl #1
    177c:       a90797e1        stp     x1, x5, [sp, #120]
    1780:       17ffffa2        b       1608 <mbedtls_aes_crypt_xts+0x108>
    1784:       12800420        mov     w0, #0xffffffde                 // #-34
    1788:       a9425bf5        ldp     x21, x22, [sp, #32]
    178c:       17ffffad        b       1640 <mbedtls_aes_crypt_xts+0x140>
    1790:       12800400        mov     w0, #0xffffffdf                 // #-33
    1794:       17ffffab        b       1640 <mbedtls_aes_crypt_xts+0x140>
    1798:       a9025bf5        stp     x21, x22, [sp, #32]
    179c:       a90363f7        stp     x23, x24, [sp, #48]
    17a0:       a9046bf9        stp     x25, x26, [sp, #64]
    17a4:       a90573fb        stp     x27, x28, [sp, #80]
    17a8:       94000000        bl      0 <__stack_chk_fail>

    Disassembly for mbedtls_gf128mul_x_ble: (actually, this gets inlined, but I
removed the "static inline" to get this disassembly)
0000000000001500 <mbedtls_gf128mul_x_ble>:
    1500:       a9401023        ldp     x3, x4, [x1]
    1504:       52800105        mov     w5, #0x8                        // #8
    1508:       528010e2        mov     w2, #0x87                       // #135
    150c:       d37ffc81        lsr     x1, x4, #63
    1510:       93c3fc84        extr    x4, x4, x3, #63
    1514:       4b010ca1        sub     w1, w5, w1, lsl #3
    1518:       1ac12841        asr     w1, w2, w1
    151c:       93407c21        sxtw    x1, w1
    1520:       ca030423        eor     x3, x1, x3, lsl #1
    1524:       a9001003        stp     x3, x4, [x0]
    1528:       d65f03c0        ret

Reply via email to