http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58897
--- Comment #1 from Marc Glisse <glisse at gcc dot gnu.org> --- Untested, but this shows what the 128/64 division could look like (with obvious variants for mod and divmod). I don't really know how to get gcc to generate anything like that though. I have seen udivmodtidi3 in s390 but that seems to return (ql,r) packed in an __int128, not what I want. And gcc doesn't show ul->ux as a zero_extend and forgets extremely fast the REG_EQUAL note on the call to __udivti3, so I can't rely on combine. typedef unsigned __int128 ux; typedef unsigned long ul; #define udiv_qrnnd(q, r, n1, n0, dx) \ __asm__ ("divq %4" : "=a" (q), "=d" (r) \ : "0" ((ul)(n0)), "1" ((ul)(n1)), "rm" ((ul)(dx))) ux div128by64(ux a, ul b){ ul ah = a >> 64; ul al = a; ul qh, ql, r1, r; qh=ah/b; r1=ah%b; udiv_qrnnd(ql,r,r1,al,b); return (ux)qh << 64 | ql; } /* a=ah*2^64+al ah=qh*b+r1 a=qh*2^64*b+r1*2^64+al r1*2^64+al<2^64*b r1*2^64+al=ql*b+r a=(qh*2^64+ql)*b+r */