https://gcc.gnu.org/bugzilla/show_bug.cgi?id=110946
--- Comment #1 from Dave Rodgman <dave.rodgman at arm dot com> --- Disassembly under -Os: 000000000000139c <mbedtls_aes_crypt_xts>: 139c: a9b67bfd stp x29, x30, [sp, #-160]! 13a0: 910003fd mov x29, sp 13a4: a9046bf9 stp x25, x26, [sp, #64] 13a8: aa0003f9 mov x25, x0 13ac: 90000000 adrp x0, 0 <__stack_chk_guard> 13b0: a90153f3 stp x19, x20, [sp, #16] 13b4: f9400000 ldr x0, [x0] 13b8: a9025bf5 stp x21, x22, [sp, #32] 13bc: 2a0103f6 mov w22, w1 13c0: a90363f7 stp x23, x24, [sp, #48] 13c4: a90573fb stp x27, x28, [sp, #80] 13c8: f9400001 ldr x1, [x0] 13cc: f9004fe1 str x1, [sp, #152] 13d0: d2800001 mov x1, #0x0 // #0 13d4: 710006df cmp w22, #0x1 13d8: 54000c28 b.hi 155c <mbedtls_aes_crypt_xts+0x1c0> // b.pmore 13dc: d1004041 sub x1, x2, #0x10 13e0: aa0203f3 mov x19, x2 13e4: b27c4fe0 mov x0, #0xfffff0 // #16777200 13e8: eb00003f cmp x1, x0 13ec: 54000bc8 b.hi 1564 <mbedtls_aes_crypt_xts+0x1c8> // b.pmore 13f0: 9101a3f5 add x21, sp, #0x68 13f4: aa0303e2 mov x2, x3 13f8: aa0403f8 mov x24, x4 13fc: aa0503f7 mov x23, x5 1400: aa1503e3 mov x3, x21 1404: 91048320 add x0, x25, #0x120 1408: 52800021 mov w1, #0x1 // #1 140c: 94000000 bl 1210 <mbedtls_aes_crypt_ecb> 1410: 2a0003f4 mov w20, w0 1414: 35000540 cbnz w0, 14bc <mbedtls_aes_crypt_xts+0x120> 1418: 520002db eor w27, w22, #0x1 141c: d344fe7a lsr x26, x19, #4 1420: 1200037b and w27, w27, #0x1 1424: 92400e73 and x19, x19, #0xf 1428: 910223fc add x28, sp, #0x88 142c: d100075a sub x26, x26, #0x1 1430: b100075f cmn x26, #0x1 1434: 54000541 b.ne 14dc <mbedtls_aes_crypt_xts+0x140> // b.any 1438: b4000433 cbz x19, 14bc <mbedtls_aes_crypt_xts+0x120> 143c: 710002df cmp w22, #0x0 1440: d10042fb sub x27, x23, #0x10 1444: 9101e3fa add x26, sp, #0x78 1448: aa1303e2 mov x2, x19 144c: 9a95035a csel x26, x26, x21, eq // eq = none 1450: aa1b03e1 mov x1, x27 1454: 910223f5 add x21, sp, #0x88 1458: aa1703e0 mov x0, x23 145c: 94000000 bl 0 <memmove> 1460: d2800217 mov x23, #0x10 // #16 1464: aa1303e3 mov x3, x19 1468: aa1a03e2 mov x2, x26 146c: aa1803e1 mov x1, x24 1470: aa1503e0 mov x0, x21 1474: 94000000 bl 0 <mbedtls_xor> 1478: cb1302e3 sub x3, x23, x19 147c: 8b130342 add x2, x26, x19 1480: 8b130361 add x1, x27, x19 1484: 8b1302a0 add x0, x21, x19 1488: 94000000 bl 0 <mbedtls_xor> 148c: aa1503e3 mov x3, x21 1490: aa1503e2 mov x2, x21 1494: 2a1603e1 mov w1, w22 1498: aa1903e0 mov x0, x25 149c: 94000000 bl 1210 <mbedtls_aes_crypt_ecb> 14a0: 2a0003f4 mov w20, w0 14a4: 350000c0 cbnz w0, 14bc <mbedtls_aes_crypt_xts+0x120> 14a8: aa1703e3 mov x3, x23 14ac: aa1a03e2 mov x2, x26 14b0: aa1503e1 mov x1, x21 14b4: aa1b03e0 mov x0, x27 14b8: 94000000 bl 0 <mbedtls_xor> 14bc: 90000000 adrp x0, 0 <__stack_chk_guard> 14c0: f9400000 ldr x0, [x0] 14c4: f9404fe2 ldr x2, [sp, #152] 14c8: f9400001 ldr x1, [x0] 14cc: eb010042 subs x2, x2, x1 14d0: d2800001 mov x1, #0x0 // #0 14d4: 54000500 b.eq 1574 <mbedtls_aes_crypt_xts+0x1d8> // b.none 14d8: 94000000 bl 0 <__stack_chk_fail> 14dc: f100027f cmp x19, #0x0 14e0: 1a9f07e0 cset w0, ne // ne = any 14e4: 6a1b001f tst w0, w27 14e8: 540000e0 b.eq 1504 <mbedtls_aes_crypt_xts+0x168> // b.none 14ec: b50000da cbnz x26, 1504 <mbedtls_aes_crypt_xts+0x168> 14f0: a94687e0 ldp x0, x1, [sp, #104] 14f4: a90787e0 stp x0, x1, [sp, #120] 14f8: aa1503e1 mov x1, x21 14fc: aa1503e0 mov x0, x21 1500: 97fffb63 bl 28c <mbedtls_gf128mul_x_ble> 1504: aa1503e2 mov x2, x21 1508: aa1803e1 mov x1, x24 150c: aa1c03e0 mov x0, x28 1510: d2800203 mov x3, #0x10 // #16 1514: 94000000 bl 0 <mbedtls_xor> 1518: aa1c03e3 mov x3, x28 151c: aa1c03e2 mov x2, x28 1520: 2a1603e1 mov w1, w22 1524: aa1903e0 mov x0, x25 1528: 94000000 bl 1210 <mbedtls_aes_crypt_ecb> 152c: 35000200 cbnz w0, 156c <mbedtls_aes_crypt_xts+0x1d0> 1530: aa1503e2 mov x2, x21 1534: d2800203 mov x3, #0x10 // #16 1538: aa1703e0 mov x0, x23 153c: aa1c03e1 mov x1, x28 1540: 94000000 bl 0 <mbedtls_xor> 1544: 910042f7 add x23, x23, #0x10 1548: aa1503e1 mov x1, x21 154c: aa1503e0 mov x0, x21 1550: 91004318 add x24, x24, #0x10 1554: 97fffb4e bl 28c <mbedtls_gf128mul_x_ble> 1558: 17ffffb5 b 142c <mbedtls_aes_crypt_xts+0x90> 155c: 12800414 mov w20, #0xffffffdf // #-33 1560: 17ffffd7 b 14bc <mbedtls_aes_crypt_xts+0x120> 1564: 12800434 mov w20, #0xffffffde // #-34 1568: 17ffffd5 b 14bc <mbedtls_aes_crypt_xts+0x120> 156c: 2a0003f4 mov w20, w0 1570: 17ffffd3 b 14bc <mbedtls_aes_crypt_xts+0x120> 1574: 2a1403e0 mov w0, w20 1578: a94153f3 ldp x19, x20, [sp, #16] 157c: a9425bf5 ldp x21, x22, [sp, #32] 1580: a94363f7 ldp x23, x24, [sp, #48] 1584: a9446bf9 ldp x25, x26, [sp, #64] 1588: a94573fb ldp x27, x28, [sp, #80] 158c: a8ca7bfd ldp x29, x30, [sp], #160 1590: d65f03c0 ret Disassembly for mbedtls_gf128mul_x_ble: 000000000000028c <mbedtls_gf128mul_x_ble>: 28c: a9be7bfd stp x29, x30, [sp, #-32]! 290: 910003fd mov x29, sp 294: a90153f3 stp x19, x20, [sp, #16] 298: aa0003f3 mov x19, x0 29c: a9400823 ldp x3, x2, [x1] 2a0: 52800101 mov w1, #0x8 // #8 2a4: 93c3fc54 extr x20, x2, x3, #63 2a8: d37ffc42 lsr x2, x2, #63 2ac: 4b020c22 sub w2, w1, w2, lsl #3 2b0: 528010e1 mov w1, #0x87 // #135 2b4: 1ac22821 asr w1, w1, w2 2b8: 93407c21 sxtw x1, w1 2bc: ca030421 eor x1, x1, x3, lsl #1 2c0: 94000000 bl 0 <mbedtls_put_unaligned_uint64> 2c4: aa1403e1 mov x1, x20 2c8: 91002260 add x0, x19, #0x8 2cc: a94153f3 ldp x19, x20, [sp, #16] 2d0: a8c27bfd ldp x29, x30, [sp], #32 2d4: 14000000 b 0 <mbedtls_put_unaligned_uint64> and under -O2: Disassembly for mbedtls_aes_crypt_xts: 0000000000001500 <mbedtls_aes_crypt_xts>: 1500: a9b57bfd stp x29, x30, [sp, #-176]! 1504: 90000006 adrp x6, 0 <__stack_chk_guard> 1508: 910003fd mov x29, sp 150c: f94000c6 ldr x6, [x6] 1510: a90153f3 stp x19, x20, [sp, #16] 1514: 2a0103f4 mov w20, w1 1518: f94000c1 ldr x1, [x6] 151c: f90057e1 str x1, [sp, #168] 1520: d2800001 mov x1, #0x0 // #0 1524: 7100069f cmp w20, #0x1 1528: 54001348 b.hi 1790 <mbedtls_aes_crypt_xts+0x290> // b.pmore 152c: d1004041 sub x1, x2, #0x10 1530: a9025bf5 stp x21, x22, [sp, #32] 1534: aa0003f5 mov x21, x0 1538: aa0203f6 mov x22, x2 153c: b27c4fe0 mov x0, #0xfffff0 // #16777200 1540: eb00003f cmp x1, x0 1544: 54001208 b.hi 1784 <mbedtls_aes_crypt_xts+0x284> // b.pmore 1548: aa0503f3 mov x19, x5 154c: a90363f7 stp x23, x24, [sp, #48] 1550: aa0303f7 mov x23, x3 1554: a90573fb stp x27, x28, [sp, #80] 1558: aa0403fb mov x27, x4 155c: 94000000 bl 0 <mbedtls_aesce_has_support> 1560: 910482a3 add x3, x21, #0x120 1564: 9101e3fc add x28, sp, #0x78 1568: 35000ec0 cbnz w0, 1740 <mbedtls_aes_crypt_xts+0x240> 156c: aa1703e1 mov x1, x23 1570: aa0303e0 mov x0, x3 1574: aa1c03e2 mov x2, x28 1578: 94000000 bl 9c0 <mbedtls_internal_aes_encrypt> 157c: 35000760 cbnz w0, 1668 <mbedtls_aes_crypt_xts+0x168> 1580: f2400ec0 ands x0, x22, #0xf 1584: d344fec4 lsr x4, x22, #4 1588: 52000298 eor w24, w20, #0x1 158c: f90037e0 str x0, [sp, #104] 1590: 1a9f07e0 cset w0, ne // ne = any 1594: 52800117 mov w23, #0x8 // #8 1598: 0a000318 and w24, w24, w0 159c: 528010f6 mov w22, #0x87 // #135 15a0: a9046bf9 stp x25, x26, [sp, #64] 15a4: d1000499 sub x25, x4, #0x1 15a8: 910263fa add x26, sp, #0x98 15ac: 14000014 b 15fc <mbedtls_aes_crypt_xts+0xfc> 15b0: 3dc00341 ldr q1, [x26] 15b4: aa1303e3 mov x3, x19 15b8: 3dc00380 ldr q0, [x28] 15bc: d1000739 sub x25, x25, #0x1 15c0: 91004366 add x6, x27, #0x10 15c4: 6e211c00 eor v0.16b, v0.16b, v1.16b 15c8: 3c810460 str q0, [x3], #16 15cc: a94797e1 ldp x1, x5, [sp, #120] 15d0: d37ffca2 lsr x2, x5, #63 15d4: 93c1fca5 extr x5, x5, x1, #63 15d8: 4b020ee2 sub w2, w23, w2, lsl #3 15dc: 1ac22ac2 asr w2, w22, w2 15e0: 93407c42 sxtw x2, w2 15e4: ca010441 eor x1, x2, x1, lsl #1 15e8: a90797e1 stp x1, x5, [sp, #120] 15ec: b100073f cmn x25, #0x1 15f0: 54000440 b.eq 1678 <mbedtls_aes_crypt_xts+0x178> // b.none 15f4: aa0303f3 mov x19, x3 15f8: aa0603fb mov x27, x6 15fc: f100033f cmp x25, #0x0 1600: 7a400b04 ccmp w24, #0x0, #0x4, eq // eq = none 1604: 54000aa1 b.ne 1758 <mbedtls_aes_crypt_xts+0x258> // b.any 1608: 3dc00360 ldr q0, [x27] 160c: aa1a03e3 mov x3, x26 1610: 3dc00381 ldr q1, [x28] 1614: aa1a03e2 mov x2, x26 1618: 2a1403e1 mov w1, w20 161c: aa1503e0 mov x0, x21 1620: 6e211c00 eor v0.16b, v0.16b, v1.16b 1624: 3d800340 str q0, [x26] 1628: 94000000 bl 12d0 <mbedtls_aes_crypt_ecb> 162c: 34fffc20 cbz w0, 15b0 <mbedtls_aes_crypt_xts+0xb0> 1630: a9425bf5 ldp x21, x22, [sp, #32] 1634: a94363f7 ldp x23, x24, [sp, #48] 1638: a9446bf9 ldp x25, x26, [sp, #64] 163c: a94573fb ldp x27, x28, [sp, #80] 1640: 90000001 adrp x1, 0 <__stack_chk_guard> 1644: f9400021 ldr x1, [x1] 1648: f94057e3 ldr x3, [sp, #168] 164c: f9400022 ldr x2, [x1] 1650: eb020063 subs x3, x3, x2 1654: d2800002 mov x2, #0x0 // #0 1658: 54000a01 b.ne 1798 <mbedtls_aes_crypt_xts+0x298> // b.any 165c: a94153f3 ldp x19, x20, [sp, #16] 1660: a8cb7bfd ldp x29, x30, [sp], #176 1664: d65f03c0 ret 1668: a9425bf5 ldp x21, x22, [sp, #32] 166c: a94363f7 ldp x23, x24, [sp, #48] 1670: a94573fb ldp x27, x28, [sp, #80] 1674: 17fffff3 b 1640 <mbedtls_aes_crypt_xts+0x140> 1678: f94037e2 ldr x2, [sp, #104] 167c: b4fffda2 cbz x2, 1630 <mbedtls_aes_crypt_xts+0x130> 1680: 7100029f cmp w20, #0x0 1684: 910223f6 add x22, sp, #0x88 1688: 9a9c02d6 csel x22, x22, x28, eq // eq = none 168c: aa0303e0 mov x0, x3 1690: aa1303e1 mov x1, x19 1694: 91003f7b add x27, x27, #0xf 1698: 94000000 bl 0 <memmove> 169c: d10006c5 sub x5, x22, #0x1 16a0: d2800020 mov x0, #0x1 // #1 16a4: d503201f nop 16a8: 38606b62 ldrb w2, [x27, x0] 16ac: 8b000343 add x3, x26, x0 16b0: 386068a4 ldrb w4, [x5, x0] 16b4: aa0003e1 mov x1, x0 16b8: 91000400 add x0, x0, #0x1 16bc: 4a040042 eor w2, w2, w4 16c0: 381ff062 sturb w2, [x3, #-1] 16c4: f94037e2 ldr x2, [sp, #104] 16c8: eb02003f cmp x1, x2 16cc: 54fffee1 b.ne 16a8 <mbedtls_aes_crypt_xts+0x1a8> // b.any 16d0: d2800203 mov x3, #0x10 // #16 16d4: 8b020265 add x5, x19, x2 16d8: cb020063 sub x3, x3, x2 16dc: 8b0202c4 add x4, x22, x2 16e0: 8b020359 add x25, x26, x2 16e4: d2800000 mov x0, #0x0 // #0 16e8: 386068a1 ldrb w1, [x5, x0] 16ec: 38606882 ldrb w2, [x4, x0] 16f0: 4a020021 eor w1, w1, w2 16f4: 38206b21 strb w1, [x25, x0] 16f8: 91000400 add x0, x0, #0x1 16fc: eb00007f cmp x3, x0 1700: 54ffff41 b.ne 16e8 <mbedtls_aes_crypt_xts+0x1e8> // b.any 1704: 2a1403e1 mov w1, w20 1708: aa1503e0 mov x0, x21 170c: aa1a03e3 mov x3, x26 1710: aa1a03e2 mov x2, x26 1714: 94000000 bl 12d0 <mbedtls_aes_crypt_ecb> 1718: 35fff8c0 cbnz w0, 1630 <mbedtls_aes_crypt_xts+0x130> 171c: 3dc002c0 ldr q0, [x22] 1720: 3dc00341 ldr q1, [x26] 1724: 6e211c00 eor v0.16b, v0.16b, v1.16b 1728: 3d800260 str q0, [x19] 172c: a9425bf5 ldp x21, x22, [sp, #32] 1730: a94363f7 ldp x23, x24, [sp, #48] 1734: a9446bf9 ldp x25, x26, [sp, #64] 1738: a94573fb ldp x27, x28, [sp, #80] 173c: 17ffffc1 b 1640 <mbedtls_aes_crypt_xts+0x140> 1740: aa1703e2 mov x2, x23 1744: aa0303e0 mov x0, x3 1748: 52800021 mov w1, #0x1 // #1 174c: aa1c03e3 mov x3, x28 1750: 94000000 bl 0 <mbedtls_aesce_crypt_ecb> 1754: 17ffff8a b 157c <mbedtls_aes_crypt_xts+0x7c> 1758: a94797e1 ldp x1, x5, [sp, #120] 175c: a9478fe2 ldp x2, x3, [sp, #120] 1760: a9088fe2 stp x2, x3, [sp, #136] 1764: d37ffca0 lsr x0, x5, #63 1768: 93c1fca5 extr x5, x5, x1, #63 176c: 4b000ee0 sub w0, w23, w0, lsl #3 1770: 1ac02ac0 asr w0, w22, w0 1774: 93407c00 sxtw x0, w0 1778: ca010401 eor x1, x0, x1, lsl #1 177c: a90797e1 stp x1, x5, [sp, #120] 1780: 17ffffa2 b 1608 <mbedtls_aes_crypt_xts+0x108> 1784: 12800420 mov w0, #0xffffffde // #-34 1788: a9425bf5 ldp x21, x22, [sp, #32] 178c: 17ffffad b 1640 <mbedtls_aes_crypt_xts+0x140> 1790: 12800400 mov w0, #0xffffffdf // #-33 1794: 17ffffab b 1640 <mbedtls_aes_crypt_xts+0x140> 1798: a9025bf5 stp x21, x22, [sp, #32] 179c: a90363f7 stp x23, x24, [sp, #48] 17a0: a9046bf9 stp x25, x26, [sp, #64] 17a4: a90573fb stp x27, x28, [sp, #80] 17a8: 94000000 bl 0 <__stack_chk_fail> Disassembly for mbedtls_gf128mul_x_ble: (actually, this gets inlined, but I removed the "static inline" to get this disassembly) 0000000000001500 <mbedtls_gf128mul_x_ble>: 1500: a9401023 ldp x3, x4, [x1] 1504: 52800105 mov w5, #0x8 // #8 1508: 528010e2 mov w2, #0x87 // #135 150c: d37ffc81 lsr x1, x4, #63 1510: 93c3fc84 extr x4, x4, x3, #63 1514: 4b010ca1 sub w1, w5, w1, lsl #3 1518: 1ac12841 asr w1, w2, w1 151c: 93407c21 sxtw x1, w1 1520: ca030423 eor x3, x1, x3, lsl #1 1524: a9001003 stp x3, x4, [x0] 1528: d65f03c0 ret