https://gcc.gnu.org/bugzilla/show_bug.cgi?id=122871
Torbjorn SVENSSON <azoff at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |azoff at gcc dot gnu.org
--- Comment #11 from Torbjorn SVENSSON <azoff at gcc dot gnu.org> ---
(In reply to GCC Commits from comment #10)
> The master branch has been updated by Roger Sayle <[email protected]>:
>
> https://gcc.gnu.org/g:1a06a37611e3b27889c595a17df13f6d27202a95
>
> commit r17-383-g1a06a37611e3b27889c595a17df13f6d27202a95
> Author: Roger Sayle <[email protected]>
> Date: Thu May 7 18:46:37 2026 +0100
>
> PR middle-end/122871: Doubleword multiplication improvements
>
> This patch resolves PR middle-end/122871 by improving RTL expansion of
> doubleword multiplications. The main change is to synth_mult adding
> support for the case where the constant being multiplied has
> BITS_PER_WORD
> or more trailing zeros. The shift_cost tables in expmed are only
> parameterized for shifts less than BITS_PER_WORD, so doubleword shifts
> by more than this can't use the usual code path. This patch teaches
> synth_mult that for scalar doubleword multiplications, a doubleword shift
> by more than BITS_PER_WORD typically requires two instructions; one to
> set the result lowpart to zero, and the other a wordmode shift to
> calculate the result highpart.
>
> For the testcase given in the PR:
>
> long long ashll_fn (long long a)
> {
> long long c;
>
> c = a << 33;
> c += a;
> return c;
> }
>
> GCC for arm-linux-gnueabihf currently generates with -O2:
>
> ashll_fn:
> lsl r2, r1, #11
> lsl ip, r0, #11
> subs ip, ip, r0
> orr r2, r2, r0, lsr #21
> sbc r2, r2, r1
> lsl r3, ip, #11
> lsl r2, r2, #11
> adds r3, r3, r0
> orr r2, r2, ip, lsr #21
> adc r1, r1, r2
> lsl r2, r1, #11
> lsl r0, r3, #11
> adds r0, r3, r0
> orr r2, r2, r3, lsr #21
> adc r1, r1, r2
> bx lr
>
> with this patch, we instead generate:
>
> ashll_fn:
> add r1, r1, r0, lsl #1
> bx lr
>
> Additionally, this patch includes a clean-up (identified by A. Pinski)
> to prevent RTL expansion of doubleword multiplications from
> initially emitting multiply instructions by immediate constants 0, 1
> or 2. These dubious multiplications eventually get tidied up by later
> RTL optimization passes, but being sensible during RTL expansion
> both speeds up the compiler and reduces unnecessary memory usage.
>
> 2026-05-07 Roger Sayle <[email protected]>
>
> gcc/ChangeLog
> PR middle-end/122871
> * expmed.cc (synth_mult): Handle doubleword left shifts by
> BITS_PER_WORD bits or more, for scalar modes.
> * optabs.cc (expand_doubleword_mult): Avoid generating multiply
> instructions by immediate constants 0, 1 or 2.
>
> gcc/testsuite/ChangeLog
> PR middle-end/122871
> * gcc.target/arm/muldi-1.c: New test case.
The new test case fail for Cortex-M0 and Cortex-M23. Is this a thumb2-only
improvement?
For Cortex-M0, I get:
$ /build/r17-409-g8376a674e3564f/bin/arm-none-eabi-gcc
/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c -mthumb -march=armv6s-m
-mtune=cortex-m0 -mfloat-abi=soft -mfpu=auto -fdiagnostics-plain-output -O2
-ffat-lto-objects -fno-ident -S -o - -dP
.arch armv6s-m
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 1
.eabi_attribute 30, 2
.eabi_attribute 34, 0
.eabi_attribute 18, 4
.file "muldi-1.c"
.text
.align 1
.p2align 2,,3
.global ashll_fn
.syntax unified
.code 16
.thumb_func
.type ashll_fn, %function
ashll_fn:
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
@(insn 23 3 38 (set (reg:SI 3 r3 [ a+4 ])
@ (reg:SI 0 r0 [orig:107 a ] [107]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@ (nil))
@ 0x0000
movs r3, r0 @ 23 [c=4 l=2] *thumb1_movsi_insn/0
@(insn 32 38 22 (unspec:SI [
@ (reg/f:SI 13 sp)
@ ] UNSPEC_REGISTER_USE)
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":11:1 403
{force_register_use}
@ (nil))
@ 0x0002
@ sp needed @ 32 [c=8 l=0] force_register_use
@(insn 22 32 11 (set (reg:SI 2 r2 [orig:99 a ] [99])
@ (const_int 0 [0]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@ (nil))
@ 0x0002
movs r2, #0 @ 22 [c=4 l=2] *thumb1_movsi_insn/1
@(insn 11 22 12 (parallel [
@ (set (reg:DI 2 r2 [102])
@ (plus:DI (reg:DI 2 r2 [orig:99 a ] [99])
@ (reg:DI 0 r0 [orig:107 a ] [107])))
@ (clobber (reg:CC 80 cc))
@ ]) "/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 718
{*thumb1_adddi3}
@ (expr_list:REG_UNUSED (reg:CC 80 cc)
@ (expr_list:REG_EQUAL (mult:DI (reg:DI 0 r0 [orig:107 a ] [107])
@ (const_int 4294967297 [0x100000001]))
@ (nil))))
@ 0x0004
adds r2, r2, r0 @ 11 [c=4 l=4] *thumb1_adddi3
adcs r3, r3, r1
@(insn 12 11 13 (parallel [
@ (set (reg:DI 2 r2 [103])
@ (plus:DI (reg:DI 2 r2 [102])
@ (reg:DI 2 r2 [102])))
@ (clobber (reg:CC 80 cc))
@ ]) "/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 718
{*thumb1_adddi3}
@ (expr_list:REG_UNUSED (reg:CC 80 cc)
@ (nil)))
@ 0x0008
adds r2, r2, r2 @ 12 [c=4 l=4] *thumb1_adddi3
adcs r3, r3, r3
@(insn 13 12 28 (parallel [
@ (set (reg:DI 2 r2 [103])
@ (minus:DI (reg:DI 2 r2 [103])
@ (reg:DI 0 r0 [orig:107 a ] [107])))
@ (clobber (reg:CC 80 cc))
@ ]) "/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 720
{*thumb_subdi3}
@ (expr_list:REG_DEAD (reg:DI 0 r0 [orig:107 a ] [107])
@ (expr_list:REG_DEAD (reg:DI 0 r0 [orig:107 a ] [107])
@ (expr_list:REG_UNUSED (reg:CC 80 cc)
@ (expr_list:REG_EQUAL (mult:DI (reg:DI 0 r0 [orig:107 a ]
[107])
@ (const_int 8589934593 [0x200000001]))
@ (nil))))))
@ 0x000c
subs r2, r2, r0 @ 13 [c=4 l=4] *thumb_subdi3
sbcs r3, r3, r1
@(insn 28 13 29 (set (reg:SI 0 r0 [orig:104 _5 ] [104])
@ (reg:SI 2 r2 [103]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@ (expr_list:REG_DEAD (reg:SI 2 r2 [103])
@ (nil)))
@ 0x0010
movs r0, r2 @ 28 [c=4 l=2] *thumb1_movsi_insn/0
@(insn 29 28 19 (set (reg:SI 1 r1 [ _5+4 ])
@ (reg:SI 3 r3 [+4 ]))
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":9:5 743
{*thumb1_movsi_insn}
@ (expr_list:REG_DEAD (reg:SI 3 r3 [+4 ])
@ (nil)))
@ 0x0012
movs r1, r3 @ 29 [c=4 l=2] *thumb1_movsi_insn/0
@(jump_insn 34 33 35 (unspec_volatile [
@ (return)
@ ] VUNSPEC_EPILOGUE)
"/build/gcc_src/gcc/testsuite/gcc.target/arm/muldi-1.c":11:1 779
{*epilogue_insns}
@ (nil)
@ -> return)
@ 0x0014
bx lr
.size ashll_fn, .-ashll_fn