I wrote a very small procedure in D and the x86-64 asm code generated in GDC 12.3 was excellent whereas that from 13.1 was insanely bloated, totally different. Note: the badness is independent of the -On optimisation level (-O3 used initially.)

Here’s the D code and following it, two asm code snippets:

====



public
pragma( inline, true )
cpuid_abcd_t
cpuid_insn( in uint32_t eax ) pure nothrow @nogc @trusted
        { /* ecx arg omitted; absolutely minimal variant wrapper */
assert( ! is_ecx_needed( eax ) ); // since we are not providing an ecx, we had better not be needing to supply one

        static assert( eax.sizeof * 8 == 32 );  // optional, exact
        static assert( eax.sizeof * 8 >= 32 );       // essential min

const uint32_t in_eax = eax; // really just for type-checking, and constness-assertion
        static assert( in_eax.sizeof * 8 == 32 );
        
cpuid_abcd_t ret = void; /* undefined until the cpuid insn writes it */ static assert( ret.eax.sizeof * 8 == 32 && ret.ebx.sizeof * 8 == 32 && ret.ecx.sizeof * 8 == 32 && ret.edx.sizeof * 8 == 32 );
        asm pure nothrow @nogc
            {
            ".intel_syntax   " ~ "\n\t" ~
        
            "cpuid"          ~ "\n\t" ~
                
            ".att_syntax     \n"
        
: /* outputs : it is guaranteed that all bits 63…32 of rax/rbx/rcx/rdx etc are zeroed in output. */ "=a" ( ret.eax ), // an lhs ref, write-only; and only bits 31…0 are significant
                "=b" ( ret.ebx ),     // ..  ..
                "=c" ( ret.ecx ),
                "=d" ( ret.edx )
            :   /* inputs : */
                "a"  ( in_eax )       // read.
// /* no ecx input - this is the variant with input ecx omitted */
            :   /* no clobbers apart from the outputs already listed */
/* does cpuid set flags? - think not, so no "cc" clobber reqd */
            ;
            }
        return ret;
        }

/* ======== */

GDC 12.3::  -O3 -frelease -march=native

push    rbx
mov     eax, edi
cpuid
mov     rsi, rdx
sal             rbx, 32
mov     eax, eax
mov     edx, ecx
sal             rsi, 32
or              rax, rbx
pop     rbx
or              rdx, rsi
ret

====
GDC 13.1 = v. bad, same switches:  -O3 -frelease -march=native

push            bp
mov             eax, edi
mov             rbp, rsp
push            rbx
and                     rsp, -32
cpuid
vmovd           xmm3, eax
vmovd           xmm2, ecx
vpinsrd         xmm1, xmm2, edx, 1
vpinsrd         xmm0, xmm3, rbx, 1
vpunpcklqdq       xmm4, xmm0, xmm1
vmovdqa xmmword ptr [rsp-80], xmm4
mov             rax, qword ptr [rsp-80]
mov             rdx, qword ptr [rsp-72]
mov             rbx, qword ptr [rbp-8]  
leave
ret
/* ======== */

Reply via email to