On Jan 22, 6:46 pm, Nelson B Bolyard <[email protected]> wrote:
> > min: 389 ms, max: 2648
>
> That's more like what's expected.
>
>
>
> > Is there a simple way to test if the generated values are correct ?
>
> Two ways come to mind.
>
> 1) Run NSS's cipher tests.
> cd mozilla/security/nss/tests/cipher
> cipher.sh > /tmp/cipher.sh.log 2>&1
> Then look at results.html in the generated directory
> mozilla/tests_results/security/HOST.N
>
> 2) Run NSS's FIPS mode self test.
> cd mozilla/security/nss/tests/fips
> fips.sh > /tmp/fips.sh.log 2>&1
> Then look at results.html in the generated directory
> mozilla/tests_results/security/HOST.N
>
> where HOST is your host name and N is an integer that increments with each
> test run.
Everything is green.
The new asm file does not try to determine at runtime if SSE2 is
present, but it does it at compile time.
By default the Apple GCC version define __SSE2__, so the default is to
use it on Mac.
And as mention before, all supported Mac have a processor with SSE2
available, so it should be fine.
Else, this is a simple copy/paste of the Linux version (without
the .type lines, as the Darwin assembler does not like them).
diff -Naur mozilla/security/nss/lib/freebl/Makefile mozilla/security/
nss/lib/freebl/Makefile
--- mozilla/security/nss/lib/freebl/Makefile 2008-03-28
01:35:26.000000000 +0100
+++ mozilla/security/nss/lib/freebl/Makefile 2009-01-22
20:37:24.000000000 +0100
@@ -147,6 +147,27 @@
endif
endif # Linux
+ifeq ($(OS_TARGET),Darwin)
+#ifeq ($(CPU_ARCH),x86_64)
+# ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
+# ASFLAGS += -march=opteron -m64 -fPIC
+# DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -
DMP_ASSEMBLY_MULTIPLY
+# DEFINES += -DNSS_USE_COMBA
+# DEFINES += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
+# DEFINES += -DMPI_AMD64_ADD
+# MPI_SRCS += mpi_amd64.c mp_comba.c
+#endif
+ifeq ($(CPU_ARCH),i386)
+ ASFILES = mpi_x86-darwin.s
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+ DEFINES += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
+ # Should it be enabled on Darwin ?
+ #ECL_USE_FP = 1
+endif
+endif # Darwin
+
ifeq ($(OS_TARGET),AIX)
DEFINES += -DMP_USE_UINT_DIGIT
ifndef USE_64
diff -Naur mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s
mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s
--- mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s 1970-01-01
01:00:00.000000000 +0100
+++ mozilla/security/nss/lib/freebl/mpi/mpi_x86-darwin.s 2009-01-22
22:41:36.000000000 +0100
@@ -0,0 +1,503 @@
+#
+# ***** BEGIN LICENSE BLOCK *****
+# Version: MPL 1.1/GPL 2.0/LGPL 2.1
+#
+# The contents of this file are subject to the Mozilla Public License
Version
+# 1.1 (the "License"); you may not use this file except in compliance
with
+# the License. You may obtain a copy of the License at
+# http://www.mozilla.org/MPL/
+#
+# Software distributed under the License is distributed on an "AS IS"
basis,
+# WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License
+# for the specific language governing rights and limitations under
the
+# License.
+#
+# The Original Code is the Netscape security libraries.
+#
+# The Initial Developer of the Original Code is
+# Netscape Communications Corporation.
+# Portions created by the Initial Developer are Copyright (C) 2000
+# the Initial Developer. All Rights Reserved.
+#
+# Contributor(s):
+#
+# Alternatively, the contents of this file may be used under the
terms of
+# either the GNU General Public License Version 2 or later (the
"GPL"), or
+# the GNU Lesser General Public License Version 2.1 or later (the
"LGPL"),
+# in which case the provisions of the GPL or the LGPL are applicable
instead
+# of those above. If you wish to allow use of your version of this
file only
+# under the terms of either the GPL or the LGPL, and not to allow
others to
+# use your version of this file under the terms of the MPL, indicate
your
+# decision by deleting the provisions above and replace them with the
notice
+# and other provisions required by the GPL or the LGPL. If you do not
delete
+# the provisions above, a recipient may use your version of this file
under
+# the terms of any one of the MPL, the GPL or the LGPL.
+#
+# ***** END LICENSE BLOCK *****
+
+# $Id: mpi_x86.s,v 1.6 2006/12/11 09:45:32 gerv%gerv.net Exp $
+#
+
+.text
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.private_extern _s_mpv_mul_d
+_s_mpv_mul_d:
+#ifndef __SSE2__
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#else
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#endif
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.private_extern _s_mpv_mul_d_add
+_s_mpv_mul_d_add:
+#ifndef __SSE2__
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#else
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#endif
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.private_extern _s_mpv_mul_d_add_prop
+_s_mpv_mul_d_add_prop:
+#ifndef __SSE2__
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#else
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#endif
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.private_extern _s_mpv_sqr_add_prop
+_s_mpv_sqr_add_prop:
+#ifndef __SSE2__
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#else
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+#endif
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be
normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit
divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.private_extern _s_mpv_div_2dx1d
+_s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
--
dev-tech-crypto mailing list
[email protected]
https://lists.mozilla.org/listinfo/dev-tech-crypto