> The first few instruction bundles are not 12 bytes long, as required, > but 11 bytes, with catastrophic consequences. I will see what can be > done about this. Technically, this is not a GAS bug.
Below is a first attempt at a patch. It doesn't pass the test suite (but debian/rules doesn't detect the failure), and some bugs around indirect jumps still remain. I'm now wondering if it really makes sense to keep yet another bignum library in Debian. We already have GMP, libgcrypt and OpenSSL... --- add.S 2005/11/16 16:24:16 1.1 +++ add.S 2005/11/16 19:05:50 @@ -993,7 +993,16 @@ # boucle d addition déroulée pour 16 chiffres ALIGN(4) L(begin): - BODY(-4,0,4); BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) + # BODY(-4,0,4) est augmenté manuellement parce que GAS choisit + # une instruction de tres octets pour "mov 0(%esi,%ecx,4),%edx" + # et des instructions similaires. + adcl -4(%ebx,%ecx,4), %eax + .byte 0x8b, 0x54, 0x8e, 0x00 # movl 0(%esi,%ecx,4), %edx + movl %eax, -4(%edi,%ecx,4) + .byte 0x13, 0x54, 0x8b, 0x00 # adcl 0(%ebx,%ecx,4), %edx + movl 4(%esi,%ecx,4), %eax + .byte 0x89, 0x54, 0x8f, 0x00 # movl %edx, 0(%edi,%ecx,4) + BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) BODY(28,32,36); BODY(36,40,44); BODY(44,48,52); BODY(52,56,60) leal 15(%ecx), %ecx @@ -1205,7 +1214,18 @@ # boucle de soustraction déroulée pour 16 chiffres ALIGN(4) L(begin): - BODY(-4,0,4); BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) + movl %eax, -4(%edi,%ecx,4) + movl 4(%esi,%ecx,4), %eax + # BODY(-4,0,4) est augmenté manuellement parce que GAS choisit + # une instruction de tres octets pour "mov 0(%esi,%ecx,4),%edx" + # et des instructions similaires. + sbbl -4(%ebx,%ecx,4), %eax + .byte 0x8b, 0x54, 0x8e, 0x00 # movl 0(%esi,%ecx,4), %edx + movl %eax, -4(%edi,%ecx,4) + .byte 0x1b, 0x54, 0x8b, 0x00 # sbbl 0(%ebx,%ecx,4), %edx + movl 4(%esi,%ecx,4), %eax + .byte 0x89, 0x54, 0x8f, 0x00 # movl %edx, 0(%edi,%ecx,4) + BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) BODY(28,32,36); BODY(36,40,44); BODY(44,48,52); BODY(52,56,60) leal 15(%ecx), %ecx @@ -1350,7 +1370,16 @@ # boucle d addition déroulée pour 16 chiffres ALIGN(4) L(begin): - BODY(-4,0,4); BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) + # BODY(-4,0,4) est augmenté manuellement parce que GAS choisit + # une instruction de tres octets pour "mov 0(%esi,%ecx,4),%edx" + # et des instructions similaires. + adcl -4(%esi,%ecx,4), %eax + .byte 0x8b, 0x54, 0x8b, 0x00 # movl 0(%ebx,%ecx,4), %edx + movl %eax, -4(%esi,%ecx,4) + .byte 0x13, 0x54, 0x8e, 0x00 # adcl 0(%esi,%ecx,4), %edx + movl 4(%ebx,%ecx,4), %eax + .byte 0x89, 0x54, 0x8e, 0x00 # movl %edx, 0(%esi,%ecx,4) + BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) BODY(28,32,36); BODY(36,40,44); BODY(44,48,52); BODY(52,56,60) leal 15(%ecx), %ecx @@ -1485,7 +1514,16 @@ # boucle de soustraction déroulée pour 16 chiffres ALIGN(4) L(begin): - BODY(-4,0,4); BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) + # BODY(-4,0,4) est augmenté manuellement parce que GAS choisit + # une instruction de tres octets pour "mov 0(%esi,%ecx,4),%edx" + # et des instructions similaires. + sbbl -4(%ebx,%ecx,4), %eax + .byte 0x8b, 0x54, 0x8e, 0x00 # movl 0(%esi,%ecx,4), %edx + movl %eax, -4(%esi,%ecx,4) + .byte 0x1b, 0x54, 0x8b, 0x00 # sbbl 0(%ebx,%ecx,4), %edx + movl 4(%esi,%ecx,4), %eax + .byte 0x89, 0x54, 0x8e, 0x00 # movl %edx, 0(%esi,%ecx,4) + BODY(4,8,12); BODY(12,16,20); BODY(20,24,28) BODY(28,32,36); BODY(36,40,44); BODY(44,48,52); BODY(52,56,60) leal 15(%ecx), %ecx