Re: RSA Keygen problem

Jean-Daniel Thu, 22 Jan 2009 14:22:49 -0800

On Jan 22, 6:46 pm, Nelson B Bolyard <[email protected]> wrote:
> > min: 389 ms, max: 2648
>
> That's more like what's expected.
>
>
>
> > Is there a simple way to test if the generated values are correct ?
>
> Two ways come to mind.
>
> 1) Run NSS's cipher tests.
>    cd mozilla/security/nss/tests/cipher
>    cipher.sh > /tmp/cipher.sh.log 2>&1
>    Then look at results.html in the generated directory
>    mozilla/tests_results/security/HOST.N
>
> 2) Run NSS's FIPS mode self test.
>    cd mozilla/security/nss/tests/fips
>    fips.sh > /tmp/fips.sh.log 2>&1
>    Then look at results.html in the generated directory
>    mozilla/tests_results/security/HOST.N
>
> where HOST is your host name and N is an integer that increments with each
> test run.


Everything is green.

The new asm file does not try to determine at runtime if SSE2 is
present, but it does it at compile time.
By default the Apple GCC version define __SSE2__, so the default is to
use it on Mac.
And as mention before, all supported Mac have a processor with SSE2
available, so it should be fine.
Else, this is a simple copy/paste of the Linux version (without
the .type lines, as the Darwin assembler does not like them).

diff -Naur mozilla/security/nss/lib/freebl/Makefile mozilla/security/
nss/lib/freebl/Makefile
--- mozilla/security/nss/lib/freebl/Makefile    2008-03-28
01:35:26.000000000 +0100
+++ mozilla/security/nss/lib/freebl/Makefile    2009-01-22
20:37:24.000000000 +0100
@@ -147,6 +147,27 @@
 endif
 endif # Linux

+ifeq ($(OS_TARGET),Darwin)
+#ifeq ($(CPU_ARCH),x86_64)
+#    ASFILES  = arcfour-amd64-gas.s mpi_amd64_gas.s
+#    ASFLAGS += -march=opteron -m64 -fPIC
+#    DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -
DMP_ASSEMBLY_MULTIPLY
+#    DEFINES += -DNSS_USE_COMBA
+#    DEFINES += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
+#   DEFINES += -DMPI_AMD64_ADD
+#    MPI_SRCS += mpi_amd64.c mp_comba.c
+#endif
+ifeq ($(CPU_ARCH),i386)
+    ASFILES  = mpi_x86-darwin.s
+    DEFINES += -DMP_USE_UINT_DIGIT
+    DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+    DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+    DEFINES += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
+    # Should it be enabled on Darwin ?
+    #ECL_USE_FP = 1
+endif
+endif # Darwin
+
 ifeq ($(OS_TARGET),AIX)
     DEFINES += -DMP_USE_UINT_DIGIT
     ifndef USE_64

diff -Naur mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s
mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s
--- mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s        1970-01-01
01:00:00.000000000 +0100
+++ mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s        2009-01-22
22:41:36.000000000 +0100
@@ -0,0 +1,503 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License
Version
+# 1.1 (the "License"); you may not use this file except in compliance
with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS"
basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License
+# for the specific language governing rights and limitations under
the
+# License.
+#
+# The Original Code is the Netscape security libraries.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2000
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#
+# Alternatively, the contents of this file may be used under the
terms of
+# either the GNU General Public License Version 2 or later (the
"GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the
"LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable
instead
+# of those above. If you wish to allow use of your version of this
file only
+# under the terms of either the GPL or the LGPL, and not to allow
others to
+# use your version of this file under the terms of the MPL, indicate
your
+# decision by deleting the provisions above and replace them with the
notice
+# and other provisions required by the GPL or the LGPL. If you do not
delete
+# the provisions above, a recipient may use your version of this file
under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+#  $Id: mpi_x86.s,v 1.6 2006/12/11 09:45:32 gerv%gerv.net Exp $
+#
+
+.text
+
+ #  ebp - 36:  caller's esi
+ #  ebp - 32:  caller's edi
+ #  ebp - 28:
+ #  ebp - 24:
+ #  ebp - 20:
+ #  ebp - 16:
+ #  ebp - 12:
+ #  ebp - 8:
+ #  ebp - 4:
+ #  ebp + 0:   caller's ebp
+ #  ebp + 4:   return address
+ #  ebp + 8:   a       argument
+ #  ebp + 12:  a_len   argument
+ #  ebp + 16:  b       argument
+ #  ebp + 20:  c       argument
+ #  registers:
+ #     eax:
+ #     ebx:    carry
+ #     ecx:    a_len
+ #     edx:
+ #     esi:    a ptr
+ #     edi:    c ptr
+.private_extern        _s_mpv_mul_d
+_s_mpv_mul_d:
+#ifndef __SSE2__
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx             # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     2f                  # jmp if a_len == 0
+    mov    8(%ebp),%esi                # esi = a
+    cld
+1:
+    lodsl                      # eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx       # edx = b
+    mull   %edx                        # edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax           # add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    %edx,%ebx           # high half of product becomes next carry
+
+    stosl                      # [es:edi] = ax; edi += 4;
+    dec    %ecx                        # --a_len
+    jnz    1b                  # jmp if a_len != 0
+2:
+    mov    %ebx,0(%edi)                # *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#else
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2           # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    movd   16(%ebp),%mm1       # mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     6f                  # jmp if a_len == 0
+    mov    8(%ebp),%esi                # esi = a
+    cld
+5:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2           # save the carry
+    dec    %ecx                        # --a_len
+    jnz    5b                  # jmp if a_len != 0
+6:
+    movd   %mm2,0(%edi)                # *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#endif
+ #  ebp - 36:  caller's esi
+ #  ebp - 32:  caller's edi
+ #  ebp - 28:
+ #  ebp - 24:
+ #  ebp - 20:
+ #  ebp - 16:
+ #  ebp - 12:
+ #  ebp - 8:
+ #  ebp - 4:
+ #  ebp + 0:   caller's ebp
+ #  ebp + 4:   return address
+ #  ebp + 8:   a       argument
+ #  ebp + 12:  a_len   argument
+ #  ebp + 16:  b       argument
+ #  ebp + 20:  c       argument
+ #  registers:
+ #     eax:
+ #     ebx:    carry
+ #     ecx:    a_len
+ #     edx:
+ #     esi:    a ptr
+ #     edi:    c ptr
+.private_extern        _s_mpv_mul_d_add
+_s_mpv_mul_d_add:
+#ifndef __SSE2__
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx             # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     11f                 # jmp if a_len == 0
+    mov    8(%ebp),%esi                # esi = a
+    cld
+10:
+    lodsl                      # eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx       # edx = b
+    mull   %edx                        # edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax           # add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx                # add in current word from *c
+    add    %ebx,%eax
+    adc    $0,%edx
+    mov    %edx,%ebx           # high half of product becomes next carry
+
+    stosl                      # [es:edi] = ax; edi += 4;
+    dec    %ecx                        # --a_len
+    jnz    10b                 # jmp if a_len != 0
+11:
+    mov    %ebx,0(%edi)                # *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#else
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2           # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    movd   16(%ebp),%mm1       # mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     16f                 # jmp if a_len == 0
+    mov    8(%ebp),%esi                # esi = a
+    cld
+15:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   0(%edi),%mm0
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2           # save the carry
+    dec    %ecx                        # --a_len
+    jnz    15b                 # jmp if a_len != 0
+16:
+    movd   %mm2,0(%edi)                # *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#endif
+
+ #  ebp - 8:   caller's esi
+ #  ebp - 4:   caller's edi
+ #  ebp + 0:   caller's ebp
+ #  ebp + 4:   return address
+ #  ebp + 8:   a       argument
+ #  ebp + 12:  a_len   argument
+ #  ebp + 16:  b       argument
+ #  ebp + 20:  c       argument
+ #  registers:
+ #     eax:
+ #     ebx:    carry
+ #     ecx:    a_len
+ #     edx:
+ #     esi:    a ptr
+ #     edi:    c ptr
+.private_extern        _s_mpv_mul_d_add_prop
+_s_mpv_mul_d_add_prop:
+#ifndef __SSE2__
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx             # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     21f                 # jmp if a_len == 0
+    cld
+    mov    8(%ebp),%esi                # esi = a
+20:
+    lodsl                      # eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx       # edx = b
+    mull   %edx                        # edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax           # add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx                # add in current word from *c
+    add    %ebx,%eax
+    adc    $0,%edx
+    mov    %edx,%ebx           # high half of product becomes next carry
+
+    stosl                      # [es:edi] = ax; edi += 4;
+    dec    %ecx                        # --a_len
+    jnz    20b                 # jmp if a_len != 0
+21:
+    cmp    $0,%ebx             # is carry zero?
+    jz     23f
+    mov    0(%edi),%eax                # add in current word from *c
+    add           %ebx,%eax
+    stosl                      # [es:edi] = ax; edi += 4;
+    jnc    23f
+22:
+    mov    0(%edi),%eax                # add in current word from *c
+    adc           $0,%eax
+    stosl                      # [es:edi] = ax; edi += 4;
+    jc     22b
+23:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#else
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2           # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    movd   16(%ebp),%mm1       # mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     26f                 # jmp if a_len == 0
+    mov    8(%ebp),%esi                # esi = a
+    cld
+25:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    movd   0(%edi),%mm3                # fetch the sum
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    paddq  %mm3,%mm2            # add *c++
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2           # save the carry
+    dec    %ecx                        # --a_len
+    jnz    25b                 # jmp if a_len != 0
+26:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx             # is carry zero?
+    jz     28f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    28f
+27:
+    mov    0(%edi),%eax                # add in current word from *c
+    adc           $0,%eax
+    stosl                      # [es:edi] = ax; edi += 4;
+    jc     27b
+28:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#endif
+
+ #  ebp - 20:  caller's esi
+ #  ebp - 16:  caller's edi
+ #  ebp - 12:
+ #  ebp - 8:   carry
+ #  ebp - 4:   a_len   local
+ #  ebp + 0:   caller's ebp
+ #  ebp + 4:   return address
+ #  ebp + 8:   pa      argument
+ #  ebp + 12:  a_len   argument
+ #  ebp + 16:  ps      argument
+ #  ebp + 20:
+ #  registers:
+ #     eax:
+ #     ebx:    carry
+ #     ecx:    a_len
+ #     edx:
+ #     esi:    a ptr
+ #     edi:    c ptr
+
+.private_extern        _s_mpv_sqr_add_prop
+_s_mpv_sqr_add_prop:
+#ifndef __SSE2__
+     push   %ebp
+     mov    %esp,%ebp
+     sub    $12,%esp
+     push   %edi
+     push   %esi
+     push   %ebx
+     movl   $0,%ebx            # carry = 0
+     mov    12(%ebp),%ecx      # a_len
+     mov    16(%ebp),%edi      # edi = ps
+     cmp    $0,%ecx
+     je     31f                        # jump if a_len == 0
+     cld
+     mov    8(%ebp),%esi       # esi = pa
+30:
+     lodsl                     # %eax = [ds:si]; si += 4;
+     mull   %eax
+
+     add    %ebx,%eax          # add "carry"
+     adc    $0,%edx
+     mov    0(%edi),%ebx
+     add    %ebx,%eax          # add low word from result
+     mov    4(%edi),%ebx
+     stosl                     # [es:di] = %eax; di += 4;
+     adc    %ebx,%edx          # add high word from result
+     movl   $0,%ebx
+     mov    %edx,%eax
+     adc    $0,%ebx
+     stosl                     # [es:di] = %eax; di += 4;
+     dec    %ecx               # --a_len
+     jnz    30b                        # jmp if a_len != 0
+31:
+    cmp    $0,%ebx             # is carry zero?
+    jz     34f
+    mov    0(%edi),%eax                # add in current word from *c
+    add           %ebx,%eax
+    stosl                      # [es:edi] = ax; edi += 4;
+    jnc    34f
+32:
+    mov    0(%edi),%eax                # add in current word from *c
+    adc           $0,%eax
+    stosl                      # [es:edi] = ax; edi += 4;
+    jc     32b
+34:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#else
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2           # carry = 0
+    mov    12(%ebp),%ecx       # ecx = a_len
+    mov    16(%ebp),%edi
+    cmp    $0,%ecx
+    je     36f                 # jmp if a_len == 0
+    mov    8(%ebp),%esi                # esi = a
+    cld
+35:
+    movd   0(%esi),%mm0        # mm0 = *a
+    movd   0(%edi),%mm3               # fetch the sum
+    add           $4,%esi
+    pmuludq %mm0,%mm0          # mm0 = sqr(a)
+    paddq  %mm0,%mm2           # add the carry
+    paddq  %mm3,%mm2           # add the low word
+    movd   4(%edi),%mm3
+    movd   %mm2,0(%edi)        # store the 32bit result
+    psrlq  $32, %mm2
+    paddq  %mm3,%mm2           # add the high word
+    movd   %mm2,4(%edi)        # store the 32bit result
+    psrlq  $32, %mm2          # save the carry.
+    add    $8,%edi
+    dec    %ecx                        # --a_len
+    jnz    35b                 # jmp if a_len != 0
+36:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx             # is carry zero?
+    jz     38f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    38f
+37:
+    mov    0(%edi),%eax                # add in current word from *c
+    adc           $0,%eax
+    stosl                      # [es:edi] = ax; edi += 4;
+    jc     37b
+38:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave
+    ret
+    nop
+#endif
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be
normalized
+ # so its high bit is 1.   This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit
divisor,
+ #                       mp_digit *qp, mp_digit *rp)
+
+ #  esp +  0:   Caller's ebx
+ #  esp +  4:  return address
+ #  esp +  8:  Nhi     argument
+ #  esp + 12:  Nlo     argument
+ #  esp + 16:  divisor argument
+ #  esp + 20:  qp      argument
+ #  esp + 24:   rp     argument
+ #  registers:
+ #     eax:
+ #     ebx:    carry
+ #     ecx:    a_len
+ #     edx:
+ #     esi:    a ptr
+ #     edi:    c ptr
+ #
+
+.private_extern        _s_mpv_div_2dx1d
+_s_mpv_div_2dx1d:
+       push   %ebx
+       mov    8(%esp),%edx
+       mov    12(%esp),%eax
+       mov    16(%esp),%ebx
+       div    %ebx
+       mov    20(%esp),%ebx
+       mov    %eax,0(%ebx)
+       mov    24(%esp),%ebx
+       mov    %edx,0(%ebx)
+       xor    %eax,%eax                # return zero
+       pop    %ebx
+       ret
+       nop
--
dev-tech-crypto mailing list
[email protected]
https://lists.mozilla.org/listinfo/dev-tech-crypto

Re: RSA Keygen problem

Reply via email to