http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50107
--- Comment #12 from H.J. Lu <hjl.tools at gmail dot com> 2011-08-19 01:12:56 UTC --- I changed MULX to (define_insn "bmi2_umul<mode><dwi>3_1" [(set (match_operand:<DWI> 0 "register_operand" "=r") (mult:<DWI> (zero_extend:<DWI> (match_operand:DWIH 1 "register_operand" "d")) (zero_extend:<DWI> (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))] "TARGET_BMI2" { if (<MODE>mode == DImode) return "mulx\t{%2, %q0, %N0|%N0, %q0, %2}"; else return "mulx\t{%2, %k0, %K0|%K0, %k0, %2}"; } [(set_attr "type" "imul") (set_attr "prefix" "vex") (set_attr "mode" "<MODE>")]) Now I got [hjl@gnu-6 pr50107]$ cat udi-2.i unsigned long long test_mul_64 (unsigned long a, unsigned long b) { return (unsigned long long) a * b; } [hjl@gnu-6 pr50107]$ /export/build/gnu/gcc-hsw/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/gcc-hsw/build-x86_64-linux/gcc/ -S -O2 -mbmi2 -dp -m32 udi-2.i [hjl@gnu-6 pr50107]$ cat udi-2.s .file "udi-2.i" .text .p2align 4,,15 .globl test_mul_64 .type test_mul_64, @function test_mul_64: .LFB0: .cfi_startproc movl 8(%esp), %edx # 20 *movsi_internal/1 [length = 4] mulx 4(%esp), %eax, %edx # 9 bmi2_umulsidi3_1 [length = 7] ret # 25 return_internal [length = 1] .cfi_endproc .LFE0: .size test_mul_64, .-test_mul_64 .ident "GCC: (GNU) 4.7.0 20110818 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-6 pr50107]$ cat uti-2.i unsigned __int128 test_mul_64 (unsigned long long a, unsigned long long b) { return (unsigned __int128) a*b; } [hjl@gnu-6 pr50107]$ /export/build/gnu/gcc-hsw/build-x86_64-linux/gcc/xgcc -B/export/build/gnu/gcc-hsw/build-x86_64-linux/gcc/ -S -O2 -mbmi2 -dp uti-2.i [hjl@gnu-6 pr50107]$ cat uti-2.s .file "uti-2.i" .text .p2align 4,,15 .globl test_mul_64 .type test_mul_64, @function test_mul_64: .LFB0: .cfi_startproc movq %rsi, %rdx # 24 *movdi_internal_rex64/2 [length = 3] mulx %rdi, %rsi, %rdi # 11 bmi2_umulditi3_1 [length = 5] movq %rsi, %rax # 25 *movdi_internal_rex64/2 [length = 3] movq %rdi, %rdx # 26 *movdi_internal_rex64/2 [length = 3] ret # 29 return_internal [length = 1] .cfi_endproc .LFE0: .size test_mul_64, .-test_mul_64 .ident "GCC: (GNU) 4.7.0 20110818 (experimental)" .section .note.GNU-stack,"",@progbits [hjl@gnu-6 pr50107]$ Why don't we generate mulx %rdi, %rax, %rdx for 64bit?