Bizarre inlining type promotion effect

Shaun Jackman Mon, 04 Dec 2006 15:01:10 -0800

In the code snippet below, the function mul_8_8 compiles to use
exactly one `mul' instruction on the AVR. The function mul_16_8 calls
mul_8_8 twice. If mul_8_8 is a static inline function and inlined in
mul_16_8, each call generates three `mul' instructions! Why does
inlining mul_8_8 cause each 8x8 multiplication to be promoted to a
16x16 multiplication?


It seems that the inlining mechanism has a real bug if inlining can
cause such a major change in the code generated for a given function.

Cheers,
Shaun

$ avr-gcc --version |head -1
avr-gcc (GCC) 4.1.0
$ cat mul.c
#include <stdint.h>

static uint16_t mul_8_8(uint8_t a, uint8_t b)
{
        return a * b;
}

uint32_t mul_16_8(uint16_t a, uint8_t b)
{
        uint8_t a0 = a, a1 = a >> 8;
        return ((uint32_t)mul_8_8(a1, b) << 8) + mul_8_8(a0, b);
}
$ avr-gcc -c -g -O2 -mmcu=avr4 mul.c
$ avr-objdump -d mul.o

mul.o:     file format elf32-avr

Disassembly of section .text:

00000000 <mul_8_8>:
  0:    86 9f           mul     r24, r22
  2:    c0 01           movw    r24, r0
  4:    11 24           eor     r1, r1
  6:    08 95           ret

00000008 <mul_16_8>:
  8:    bf 92           push    r11
  a:    cf 92           push    r12
  c:    df 92           push    r13
  e:    ef 92           push    r14
 10:    ff 92           push    r15
 12:    0f 93           push    r16
 14:    1f 93           push    r17
 16:    6c 01           movw    r12, r24
 18:    b6 2e           mov     r11, r22
 1a:    8d 2d           mov     r24, r13
 1c:    99 27           eor     r25, r25
 1e:    f0 df           rcall   .-32            ; 0x0 <mul_8_8>
 20:    7c 01           movw    r14, r24
 22:    00 27           eor     r16, r16
 24:    11 27           eor     r17, r17
 26:    10 2f           mov     r17, r16
 28:    0f 2d           mov     r16, r15
 2a:    fe 2c           mov     r15, r14
 2c:    ee 24           eor     r14, r14
 2e:    6b 2d           mov     r22, r11
 30:    8c 2d           mov     r24, r12
 32:    e6 df           rcall   .-52            ; 0x0 <mul_8_8>
 34:    aa 27           eor     r26, r26
 36:    bb 27           eor     r27, r27
 38:    e8 0e           add     r14, r24
 3a:    f9 1e           adc     r15, r25
 3c:    0a 1f           adc     r16, r26
 3e:    1b 1f           adc     r17, r27
 40:    c8 01           movw    r24, r16
 42:    b7 01           movw    r22, r14
 44:    1f 91           pop     r17
 46:    0f 91           pop     r16
 48:    ff 90           pop     r15
 4a:    ef 90           pop     r14
 4c:    df 90           pop     r13
 4e:    cf 90           pop     r12
 50:    bf 90           pop     r11
 52:    08 95           ret
$ sed -i 's/static/& inline/' mul.c
$ avr-gcc -c -g -O2 -mmcu=avr4 mul.c
$ avr-objdump -d mul.o

mul.o:     file format elf32-avr

Disassembly of section .text:

00000000 <mul_16_8>:
  0:    ac 01           movw    r20, r24
  2:    26 2f           mov     r18, r22
  4:    33 27           eor     r19, r19
  6:    89 2f           mov     r24, r25
  8:    99 27           eor     r25, r25
  a:    82 9f           mul     r24, r18
  c:    b0 01           movw    r22, r0
  e:    83 9f           mul     r24, r19
 10:    70 0d           add     r23, r0
 12:    92 9f           mul     r25, r18
 14:    70 0d           add     r23, r0
 16:    11 24           eor     r1, r1
 18:    88 27           eor     r24, r24
 1a:    99 27           eor     r25, r25
 1c:    98 2f           mov     r25, r24
 1e:    87 2f           mov     r24, r23
 20:    76 2f           mov     r23, r22
 22:    66 27           eor     r22, r22
 24:    55 27           eor     r21, r21
 26:    f9 01           movw    r30, r18
 28:    e4 9f           mul     r30, r20
 2a:    90 01           movw    r18, r0
 2c:    e5 9f           mul     r30, r21
 2e:    30 0d           add     r19, r0
 30:    f4 9f           mul     r31, r20
 32:    30 0d           add     r19, r0
 34:    11 24           eor     r1, r1
 36:    44 27           eor     r20, r20
 38:    55 27           eor     r21, r21
 3a:    62 0f           add     r22, r18
 3c:    73 1f           adc     r23, r19
 3e:    84 1f           adc     r24, r20
 40:    95 1f           adc     r25, r21
 42:    08 95           ret

Bizarre inlining type promotion effect

Reply via email to