This commit is contained in:
Zhi Guan
2015-08-15 15:02:15 +08:00
parent 06df2fab54
commit 3bdc0ea895
2536 changed files with 417052 additions and 271997 deletions

View File

@@ -26,13 +26,13 @@ LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
bn_depr.c bn_const.c
bn_depr.c bn_const.c bn_x931p.c
LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \
bn_depr.o bn_const.o
bn_depr.o bn_const.o bn_x931p.o
SRC= $(LIBSRC)
@@ -66,6 +66,8 @@ co-586.s: asm/co-586.pl ../perlasm/x86asm.pl
$(PERL) asm/co-586.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
x86-mont.s: asm/x86-mont.pl ../perlasm/x86asm.pl
$(PERL) asm/x86-mont.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
x86-gf2m.s: asm/x86-gf2m.pl ../perlasm/x86asm.pl
$(PERL) asm/x86-gf2m.pl $(PERLASM_SCHEME) $(CFLAGS) $(PROCESSOR) > $@
sparcv8.o: asm/sparcv8.S
$(CC) $(CFLAGS) -c asm/sparcv8.S
@@ -75,6 +77,12 @@ sparcv9a-mont.s: asm/sparcv9a-mont.pl
$(PERL) asm/sparcv9a-mont.pl $(CFLAGS) > $@
sparcv9-mont.s: asm/sparcv9-mont.pl
$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
vis3-mont.s: asm/vis3-mont.pl
$(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
sparct4-mont.S: asm/sparct4-mont.pl
$(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@
sparcv9-gf2m.S: asm/sparcv9-gf2m.pl
$(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@
bn-mips3.o: asm/mips3.s
@if [ "$(CC)" = "gcc" ]; then \
@@ -82,16 +90,33 @@ bn-mips3.o: asm/mips3.s
as -$$ABI -O -o $@ asm/mips3.s; \
else $(CC) -c $(CFLAGS) -o $@ asm/mips3.s; fi
bn-mips.s: asm/mips.pl
$(PERL) asm/mips.pl $(PERLASM_SCHEME) $@
mips-mont.s: asm/mips-mont.pl
$(PERL) asm/mips-mont.pl $(PERLASM_SCHEME) $@
bn-s390x.o: asm/s390x.S
$(CC) $(CFLAGS) -c -o $@ asm/s390x.S
s390x-gf2m.s: asm/s390x-gf2m.pl
$(PERL) asm/s390x-gf2m.pl $(PERLASM_SCHEME) $@
x86_64-gcc.o: asm/x86_64-gcc.c
$(CC) $(CFLAGS) -c -o $@ asm/x86_64-gcc.c
x86_64-mont.s: asm/x86_64-mont.pl
$(PERL) asm/x86_64-mont.pl $(PERLASM_SCHEME) > $@
x86_64-mont5.s: asm/x86_64-mont5.pl
$(PERL) asm/x86_64-mont5.pl $(PERLASM_SCHEME) > $@
x86_64-gf2m.s: asm/x86_64-gf2m.pl
$(PERL) asm/x86_64-gf2m.pl $(PERLASM_SCHEME) > $@
rsaz-x86_64.s: asm/rsaz-x86_64.pl
$(PERL) asm/rsaz-x86_64.pl $(PERLASM_SCHEME) > $@
rsaz-avx2.s: asm/rsaz-avx2.pl
$(PERL) asm/rsaz-avx2.pl $(PERLASM_SCHEME) > $@
bn-ia64.s: asm/ia64.S
$(CC) $(CFLAGS) -E asm/ia64.S > $@
ia64-mont.s: asm/ia64-mont.pl
$(PERL) asm/ia64-mont.pl $@ $(CFLAGS)
# GNU assembler fails to compile PA-RISC2 modules, insist on calling
# vendor assembler...
@@ -99,16 +124,25 @@ pa-risc2W.o: asm/pa-risc2W.s
/usr/ccs/bin/as -o pa-risc2W.o asm/pa-risc2W.s
pa-risc2.o: asm/pa-risc2.s
/usr/ccs/bin/as -o pa-risc2.o asm/pa-risc2.s
parisc-mont.s: asm/parisc-mont.pl
$(PERL) asm/parisc-mont.pl $(PERLASM_SCHEME) $@
# ppc - AIX, Linux, MacOS X...
bn-ppc.s: asm/ppc.pl; $(PERL) asm/ppc.pl $(PERLASM_SCHEME) $@
ppc-mont.s: asm/ppc-mont.pl;$(PERL) asm/ppc-mont.pl $(PERLASM_SCHEME) $@
ppc64-mont.s: asm/ppc64-mont.pl;$(PERL) asm/ppc64-mont.pl $(PERLASM_SCHEME) $@
alpha-mont.s: asm/alpha-mont.pl
$(PERL) $< | $(CC) -E - | tee $@ > /dev/null
(preproc=$$$$.$@.S; trap "rm $$preproc" INT; \
$(PERL) asm/alpha-mont.pl > $$preproc && \
$(CC) -E -P $$preproc > $@ && rm $$preproc)
# GNU make "catch all"
%-mont.s: asm/%-mont.pl; $(PERL) $< $(CFLAGS) > $@
%-mont.S: asm/%-mont.pl; $(PERL) $< $(PERLASM_SCHEME) $@
%-gf2m.S: asm/%-gf2m.pl; $(PERL) $< $(PERLASM_SCHEME) $@
armv4-mont.o: armv4-mont.S
armv4-gf2m.o: armv4-gf2m.S
files:
$(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO
@@ -142,6 +176,8 @@ tests:
lint:
lint -DLINT $(INCLUDES) $(SRC)>fluff
update: bn_prime.h depend
depend:
@[ -n "$(MAKEDEPEND)" ] # should be set by upper Makefile...
$(MAKEDEPEND) -- $(CFLAG) $(INCLUDES) $(DEPFLAG) -- $(PROGS) $(LIBSRC)
@@ -217,6 +253,7 @@ bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp.c bn_lcl.h
bn_exp.o: rsaz_exp.h
bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
@@ -345,3 +382,8 @@ bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c
bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/crypto.h
bn_x931p.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
bn_x931p.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_x931p.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_x931p.o: ../../include/openssl/symhacks.h bn_x931p.c

View File

@@ -41,7 +41,7 @@ $j="s4";
$m1="s5";
$code=<<___;
#indef __linux__
#ifdef __linux__
#include <asm/regdef.h>
#else
#include <asm.h>

289
crypto/bn/asm/armv4-gf2m.pl Normal file
View File

@@ -0,0 +1,289 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# May 2011
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication
# used in bn_gf2m.c. It's kind of low-hanging mechanical port from
# C for the time being... Except that it has two code paths: pure
# integer code suitable for any ARMv4 and later CPU and NEON code
# suitable for ARMv7. Pure integer 1x1 multiplication subroutine runs
# in ~45 cycles on dual-issue core such as Cortex A8, which is ~50%
# faster than compiler-generated code. For ECDH and ECDSA verify (but
# not for ECDSA sign) it means 25%-45% improvement depending on key
# length, more for longer keys. Even though NEON 1x1 multiplication
# runs in even less cycles, ~30, improvement is measurable only on
# longer keys. One has to optimize code elsewhere to get NEON glow...
#
# April 2014
#
# Double bn_GF2m_mul_2x2 performance by using algorithm from paper
# referred below, which improves ECDH and ECDSA verify benchmarks
# by 18-40%.
#
# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$code=<<___;
#include "arm_arch.h"
.text
.code 32
___
################
# private interface to mul_1x1_ialu
#
$a="r1";
$b="r0";
($a0,$a1,$a2,$a12,$a4,$a14)=
($hi,$lo,$t0,$t1, $i0,$i1 )=map("r$_",(4..9),12);
$mask="r12";
$code.=<<___;
.type mul_1x1_ialu,%function
.align 5
mul_1x1_ialu:
mov $a0,#0
bic $a1,$a,#3<<30 @ a1=a&0x3fffffff
str $a0,[sp,#0] @ tab[0]=0
add $a2,$a1,$a1 @ a2=a1<<1
str $a1,[sp,#4] @ tab[1]=a1
eor $a12,$a1,$a2 @ a1^a2
str $a2,[sp,#8] @ tab[2]=a2
mov $a4,$a1,lsl#2 @ a4=a1<<2
str $a12,[sp,#12] @ tab[3]=a1^a2
eor $a14,$a1,$a4 @ a1^a4
str $a4,[sp,#16] @ tab[4]=a4
eor $a0,$a2,$a4 @ a2^a4
str $a14,[sp,#20] @ tab[5]=a1^a4
eor $a12,$a12,$a4 @ a1^a2^a4
str $a0,[sp,#24] @ tab[6]=a2^a4
and $i0,$mask,$b,lsl#2
str $a12,[sp,#28] @ tab[7]=a1^a2^a4
and $i1,$mask,$b,lsr#1
ldr $lo,[sp,$i0] @ tab[b & 0x7]
and $i0,$mask,$b,lsr#4
ldr $t1,[sp,$i1] @ tab[b >> 3 & 0x7]
and $i1,$mask,$b,lsr#7
ldr $t0,[sp,$i0] @ tab[b >> 6 & 0x7]
eor $lo,$lo,$t1,lsl#3 @ stall
mov $hi,$t1,lsr#29
ldr $t1,[sp,$i1] @ tab[b >> 9 & 0x7]
and $i0,$mask,$b,lsr#10
eor $lo,$lo,$t0,lsl#6
eor $hi,$hi,$t0,lsr#26
ldr $t0,[sp,$i0] @ tab[b >> 12 & 0x7]
and $i1,$mask,$b,lsr#13
eor $lo,$lo,$t1,lsl#9
eor $hi,$hi,$t1,lsr#23
ldr $t1,[sp,$i1] @ tab[b >> 15 & 0x7]
and $i0,$mask,$b,lsr#16
eor $lo,$lo,$t0,lsl#12
eor $hi,$hi,$t0,lsr#20
ldr $t0,[sp,$i0] @ tab[b >> 18 & 0x7]
and $i1,$mask,$b,lsr#19
eor $lo,$lo,$t1,lsl#15
eor $hi,$hi,$t1,lsr#17
ldr $t1,[sp,$i1] @ tab[b >> 21 & 0x7]
and $i0,$mask,$b,lsr#22
eor $lo,$lo,$t0,lsl#18
eor $hi,$hi,$t0,lsr#14
ldr $t0,[sp,$i0] @ tab[b >> 24 & 0x7]
and $i1,$mask,$b,lsr#25
eor $lo,$lo,$t1,lsl#21
eor $hi,$hi,$t1,lsr#11
ldr $t1,[sp,$i1] @ tab[b >> 27 & 0x7]
tst $a,#1<<30
and $i0,$mask,$b,lsr#28
eor $lo,$lo,$t0,lsl#24
eor $hi,$hi,$t0,lsr#8
ldr $t0,[sp,$i0] @ tab[b >> 30 ]
eorne $lo,$lo,$b,lsl#30
eorne $hi,$hi,$b,lsr#2
tst $a,#1<<31
eor $lo,$lo,$t1,lsl#27
eor $hi,$hi,$t1,lsr#5
eorne $lo,$lo,$b,lsl#31
eorne $hi,$hi,$b,lsr#1
eor $lo,$lo,$t0,lsl#30
eor $hi,$hi,$t0,lsr#2
mov pc,lr
.size mul_1x1_ialu,.-mul_1x1_ialu
___
################
# void bn_GF2m_mul_2x2(BN_ULONG *r,
# BN_ULONG a1,BN_ULONG a0,
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{
$code.=<<___;
.global bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,%function
.align 5
bn_GF2m_mul_2x2:
#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
.Lpic: ldr r12,[pc,r12]
tst r12,#1
bne .LNEON
#endif
___
$ret="r10"; # reassigned 1st argument
$code.=<<___;
stmdb sp!,{r4-r10,lr}
mov $ret,r0 @ reassign 1st argument
mov $b,r3 @ $b=b1
ldr r3,[sp,#32] @ load b0
mov $mask,#7<<2
sub sp,sp,#32 @ allocate tab[8]
bl mul_1x1_ialu @ a1·b1
str $lo,[$ret,#8]
str $hi,[$ret,#12]
eor $b,$b,r3 @ flip b0 and b1
eor $a,$a,r2 @ flip a0 and a1
eor r3,r3,$b
eor r2,r2,$a
eor $b,$b,r3
eor $a,$a,r2
bl mul_1x1_ialu @ a0·b0
str $lo,[$ret]
str $hi,[$ret,#4]
eor $a,$a,r2
eor $b,$b,r3
bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
___
@r=map("r$_",(6..9));
$code.=<<___;
ldmia $ret,{@r[0]-@r[3]}
eor $lo,$lo,$hi
eor $hi,$hi,@r[1]
eor $lo,$lo,@r[0]
eor $hi,$hi,@r[2]
eor $lo,$lo,@r[3]
eor $hi,$hi,@r[3]
str $hi,[$ret,#8]
eor $lo,$lo,$hi
add sp,sp,#32 @ destroy tab[8]
str $lo,[$ret,#4]
#if __ARM_ARCH__>=5
ldmia sp!,{r4-r10,pc}
#else
ldmia sp!,{r4-r10,lr}
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
___
}
{
my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.align 5
.LNEON:
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
vmov.i64 $k48, #0x0000ffffffffffff
vmov.i64 $k32, #0x00000000ffffffff
vmov.i64 $k16, #0x000000000000ffff
vext.8 $t0#lo, $a, $a, #1 @ A1
vmull.p8 $t0, $t0#lo, $b @ F = A1*B
vext.8 $r#lo, $b, $b, #1 @ B1
vmull.p8 $r, $a, $r#lo @ E = A*B1
vext.8 $t1#lo, $a, $a, #2 @ A2
vmull.p8 $t1, $t1#lo, $b @ H = A2*B
vext.8 $t3#lo, $b, $b, #2 @ B2
vmull.p8 $t3, $a, $t3#lo @ G = A*B2
vext.8 $t2#lo, $a, $a, #3 @ A3
veor $t0, $t0, $r @ L = E + F
vmull.p8 $t2, $t2#lo, $b @ J = A3*B
vext.8 $r#lo, $b, $b, #3 @ B3
veor $t1, $t1, $t3 @ M = G + H
vmull.p8 $r, $a, $r#lo @ I = A*B3
veor $t0#lo, $t0#lo, $t0#hi @ t0 = (L) (P0 + P1) << 8
vand $t0#hi, $t0#hi, $k48
vext.8 $t3#lo, $b, $b, #4 @ B4
veor $t1#lo, $t1#lo, $t1#hi @ t1 = (M) (P2 + P3) << 16
vand $t1#hi, $t1#hi, $k32
vmull.p8 $t3, $a, $t3#lo @ K = A*B4
veor $t2, $t2, $r @ N = I + J
veor $t0#lo, $t0#lo, $t0#hi
veor $t1#lo, $t1#lo, $t1#hi
veor $t2#lo, $t2#lo, $t2#hi @ t2 = (N) (P4 + P5) << 24
vand $t2#hi, $t2#hi, $k16
vext.8 $t0, $t0, $t0, #15
veor $t3#lo, $t3#lo, $t3#hi @ t3 = (K) (P6 + P7) << 32
vmov.i64 $t3#hi, #0
vext.8 $t1, $t1, $t1, #14
veor $t2#lo, $t2#lo, $t2#hi
vmull.p8 $r, $a, $b @ D = A*B
vext.8 $t3, $t3, $t3, #12
vext.8 $t2, $t2, $t2, #13
veor $t0, $t0, $t1
veor $t2, $t2, $t3
veor $r, $r, $t0
veor $r, $r, $t2
vst1.32 {$r}, [r0]
ret @ bx lr
#endif
___
}
$code.=<<___;
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-(.Lpic+8)
#endif
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/geo;
s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
s/\bret\b/bx lr/go or
s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
print $_,"\n";
}
close STDOUT; # enforce flush

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
@@ -23,6 +23,24 @@
# than 1/2KB. Windows CE port would be trivial, as it's exclusively
# about decorations, ABI and instruction syntax are identical.
# November 2013
#
# Add NEON code path, which handles lengths divisible by 8. RSA/DSA
# performance improvement on Cortex-A8 is ~45-100% depending on key
# length, more for longer keys. On Cortex-A15 the span is ~10-105%.
# On Snapdragon S4 improvement was measured to vary from ~70% to
# incredible ~380%, yes, 4.8x faster, for RSA4096 sign. But this is
# rather because original integer-only code seems to perform
# suboptimally on S4. Situation on Cortex-A9 is unfortunately
# different. It's being looked into, but the trouble is that
# performance for vectors longer than 256 bits is actually couple
# of percent worse than for integer-only code. The code is chosen
# for execution on all NEON-capable processors, because gain on
# others outweighs the marginal loss on Cortex-A9.
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$num="r0"; # starts as num argument, but holds &tp[num-1]
$ap="r1";
$bp="r2"; $bi="r2"; $rp="r2";
@@ -49,16 +67,40 @@ $_n0="$num,#14*4";
$_num="$num,#15*4"; $_bpend=$_num;
$code=<<___;
#include "arm_arch.h"
.text
.code 32
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-bn_mul_mont
#endif
.global bn_mul_mont
.type bn_mul_mont,%function
.align 2
.align 5
bn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
ldr $num,[sp,#3*4] @ load num
cmp $num,#2
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
tst r0,#1 @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov $num,ip @ load num
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
@@ -89,9 +131,9 @@ bn_mul_mont:
.L1st:
ldr $aj,[$ap],#4 @ ap[j],ap++
mov $alo,$ahi
ldr $nj,[$np],#4 @ np[j],np++
mov $ahi,#0
umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
ldr $nj,[$np],#4 @ np[j],np++
mov $nhi,#0
umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
adds $nlo,$nlo,$alo
@@ -101,21 +143,21 @@ bn_mul_mont:
bne .L1st
adds $nlo,$nlo,$ahi
mov $nhi,#0
adc $nhi,$nhi,#0
ldr $tp,[$_bp] @ restore bp
str $nlo,[$num] @ tp[num-1]=
mov $nhi,#0
ldr $n0,[$_n0] @ restore n0
adc $nhi,$nhi,#0
str $nlo,[$num] @ tp[num-1]=
str $nhi,[$num,#4] @ tp[num]=
.Louter:
sub $tj,$num,sp @ "original" $num-1 value
sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
sub $np,$np,$tj @ "rewind" np to &np[1]
ldr $bi,[$tp,#4]! @ *(++bp)
sub $np,$np,$tj @ "rewind" np to &np[1]
ldr $aj,[$ap,#-4] @ ap[0]
ldr $nj,[$np,#-4] @ np[0]
ldr $alo,[sp] @ tp[0]
ldr $nj,[$np,#-4] @ np[0]
ldr $tj,[sp,#4] @ tp[1]
mov $ahi,#0
@@ -129,13 +171,13 @@ bn_mul_mont:
.Linner:
ldr $aj,[$ap],#4 @ ap[j],ap++
adds $alo,$ahi,$tj @ +=tp[j]
ldr $nj,[$np],#4 @ np[j],np++
mov $ahi,#0
umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
ldr $nj,[$np],#4 @ np[j],np++
mov $nhi,#0
umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
ldr $tj,[$tp,#8] @ tp[j+1]
adc $ahi,$ahi,#0
ldr $tj,[$tp,#8] @ tp[j+1]
adds $nlo,$nlo,$alo
str $nlo,[$tp],#4 @ tp[j-1]=,tp++
adc $nlo,$nhi,#0
@@ -144,13 +186,13 @@ bn_mul_mont:
adds $nlo,$nlo,$ahi
mov $nhi,#0
adc $nhi,$nhi,#0
adds $nlo,$nlo,$tj
adc $nhi,$nhi,#0
ldr $tp,[$_bp] @ restore bp
ldr $tj,[$_bpend] @ restore &bp[num]
str $nlo,[$num] @ tp[num-1]=
adc $nhi,$nhi,#0
ldr $n0,[$_n0] @ restore n0
adds $nlo,$nlo,$tj
ldr $tj,[$_bpend] @ restore &bp[num]
adc $nhi,$nhi,#0
str $nlo,[$num] @ tp[num-1]=
str $nhi,[$num,#4] @ tp[num]=
cmp $tp,$tj
@@ -188,14 +230,447 @@ bn_mul_mont:
ldmia sp!,{r4-r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt: tst lr,#1
.Labrt:
#if __ARM_ARCH__>=5
ret @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
bx lr @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
___
{
sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
my ($A0,$A1,$A2,$A3)=map("d$_",(0..3));
my ($N0,$N1,$N2,$N3)=map("d$_",(4..7));
my ($Z,$Temp)=("q4","q5");
my ($A0xB,$A1xB,$A2xB,$A3xB,$A4xB,$A5xB,$A6xB,$A7xB)=map("q$_",(6..13));
my ($Bi,$Ni,$M0)=map("d$_",(28..31));
my $zero=&Dlo($Z);
my $temp=&Dlo($Temp);
my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
$code.=<<___;
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4-r11}
vstmdb sp!,{d8-d15} @ ABI specification says so
ldmia ip,{r4-r5} @ load rest of parameter block
sub $toutptr,sp,#16
vld1.32 {${Bi}[0]}, [$bptr,:32]!
sub $toutptr,$toutptr,$num,lsl#4
vld1.32 {$A0-$A3}, [$aptr]! @ can't specify :32 :-(
and $toutptr,$toutptr,#-64
vld1.32 {${M0}[0]}, [$n0,:32]
mov sp,$toutptr @ alloca
veor $zero,$zero,$zero
subs $inner,$num,#8
vzip.16 $Bi,$zero
vmull.u32 $A0xB,$Bi,${A0}[0]
vmull.u32 $A1xB,$Bi,${A0}[1]
vmull.u32 $A2xB,$Bi,${A1}[0]
vshl.i64 $temp,`&Dhi("$A0xB")`,#16
vmull.u32 $A3xB,$Bi,${A1}[1]
vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
veor $zero,$zero,$zero
vmul.u32 $Ni,$temp,$M0
vmull.u32 $A4xB,$Bi,${A2}[0]
vld1.32 {$N0-$N3}, [$nptr]!
vmull.u32 $A5xB,$Bi,${A2}[1]
vmull.u32 $A6xB,$Bi,${A3}[0]
vzip.16 $Ni,$zero
vmull.u32 $A7xB,$Bi,${A3}[1]
bne .LNEON_1st
@ special case for num=8, everything is in register bank...
vmlal.u32 $A0xB,$Ni,${N0}[0]
sub $outer,$num,#1
vmlal.u32 $A1xB,$Ni,${N0}[1]
vmlal.u32 $A2xB,$Ni,${N1}[0]
vmlal.u32 $A3xB,$Ni,${N1}[1]
vmlal.u32 $A4xB,$Ni,${N2}[0]
vmov $Temp,$A0xB
vmlal.u32 $A5xB,$Ni,${N2}[1]
vmov $A0xB,$A1xB
vmlal.u32 $A6xB,$Ni,${N3}[0]
vmov $A1xB,$A2xB
vmlal.u32 $A7xB,$Ni,${N3}[1]
vmov $A2xB,$A3xB
vmov $A3xB,$A4xB
vshr.u64 $temp,$temp,#16
vmov $A4xB,$A5xB
vmov $A5xB,$A6xB
vadd.u64 $temp,$temp,`&Dhi("$Temp")`
vmov $A6xB,$A7xB
veor $A7xB,$A7xB
vshr.u64 $temp,$temp,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {${Bi}[0]}, [$bptr,:32]!
veor $zero,$zero,$zero
vzip.16 $Bi,$zero
vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
vmlal.u32 $A0xB,$Bi,${A0}[0]
vmlal.u32 $A1xB,$Bi,${A0}[1]
vmlal.u32 $A2xB,$Bi,${A1}[0]
vshl.i64 $temp,`&Dhi("$A0xB")`,#16
vmlal.u32 $A3xB,$Bi,${A1}[1]
vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
veor $zero,$zero,$zero
subs $outer,$outer,#1
vmul.u32 $Ni,$temp,$M0
vmlal.u32 $A4xB,$Bi,${A2}[0]
vmlal.u32 $A5xB,$Bi,${A2}[1]
vmlal.u32 $A6xB,$Bi,${A3}[0]
vzip.16 $Ni,$zero
vmlal.u32 $A7xB,$Bi,${A3}[1]
vmlal.u32 $A0xB,$Ni,${N0}[0]
vmlal.u32 $A1xB,$Ni,${N0}[1]
vmlal.u32 $A2xB,$Ni,${N1}[0]
vmlal.u32 $A3xB,$Ni,${N1}[1]
vmlal.u32 $A4xB,$Ni,${N2}[0]
vmov $Temp,$A0xB
vmlal.u32 $A5xB,$Ni,${N2}[1]
vmov $A0xB,$A1xB
vmlal.u32 $A6xB,$Ni,${N3}[0]
vmov $A1xB,$A2xB
vmlal.u32 $A7xB,$Ni,${N3}[1]
vmov $A2xB,$A3xB
vmov $A3xB,$A4xB
vshr.u64 $temp,$temp,#16
vmov $A4xB,$A5xB
vmov $A5xB,$A6xB
vadd.u64 $temp,$temp,`&Dhi("$Temp")`
vmov $A6xB,$A7xB
veor $A7xB,$A7xB
vshr.u64 $temp,$temp,#16
bne .LNEON_outer8
vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
mov $toutptr,sp
vshr.u64 $temp,`&Dlo("$A0xB")`,#16
mov $inner,$num
vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
add $tinptr,sp,#16
vshr.u64 $temp,`&Dhi("$A0xB")`,#16
vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
b .LNEON_tail2
.align 4
.LNEON_1st:
vmlal.u32 $A0xB,$Ni,${N0}[0]
vld1.32 {$A0-$A3}, [$aptr]!
vmlal.u32 $A1xB,$Ni,${N0}[1]
subs $inner,$inner,#8
vmlal.u32 $A2xB,$Ni,${N1}[0]
vmlal.u32 $A3xB,$Ni,${N1}[1]
vmlal.u32 $A4xB,$Ni,${N2}[0]
vld1.32 {$N0-$N1}, [$nptr]!
vmlal.u32 $A5xB,$Ni,${N2}[1]
vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
vmlal.u32 $A6xB,$Ni,${N3}[0]
vmlal.u32 $A7xB,$Ni,${N3}[1]
vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
vmull.u32 $A0xB,$Bi,${A0}[0]
vld1.32 {$N2-$N3}, [$nptr]!
vmull.u32 $A1xB,$Bi,${A0}[1]
vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
vmull.u32 $A2xB,$Bi,${A1}[0]
vmull.u32 $A3xB,$Bi,${A1}[1]
vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
vmull.u32 $A4xB,$Bi,${A2}[0]
vmull.u32 $A5xB,$Bi,${A2}[1]
vmull.u32 $A6xB,$Bi,${A3}[0]
vmull.u32 $A7xB,$Bi,${A3}[1]
bne .LNEON_1st
vmlal.u32 $A0xB,$Ni,${N0}[0]
add $tinptr,sp,#16
vmlal.u32 $A1xB,$Ni,${N0}[1]
sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
vmlal.u32 $A2xB,$Ni,${N1}[0]
vld1.64 {$Temp}, [sp,:128]
vmlal.u32 $A3xB,$Ni,${N1}[1]
sub $outer,$num,#1
vmlal.u32 $A4xB,$Ni,${N2}[0]
vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
vmlal.u32 $A5xB,$Ni,${N2}[1]
vshr.u64 $temp,$temp,#16
vld1.64 {$A0xB}, [$tinptr, :128]!
vmlal.u32 $A6xB,$Ni,${N3}[0]
vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
vmlal.u32 $A7xB,$Ni,${N3}[1]
vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
vadd.u64 $temp,$temp,`&Dhi("$Temp")`
veor $Z,$Z,$Z
vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
vst1.64 {$Z}, [$toutptr,:128]
vshr.u64 $temp,$temp,#16
b .LNEON_outer
.align 4
.LNEON_outer:
vld1.32 {${Bi}[0]}, [$bptr,:32]!
sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
vld1.32 {$A0-$A3}, [$aptr]!
veor $zero,$zero,$zero
mov $toutptr,sp
vzip.16 $Bi,$zero
sub $inner,$num,#8
vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
vmlal.u32 $A0xB,$Bi,${A0}[0]
vld1.64 {$A3xB-$A4xB},[$tinptr,:256]!
vmlal.u32 $A1xB,$Bi,${A0}[1]
vmlal.u32 $A2xB,$Bi,${A1}[0]
vld1.64 {$A5xB-$A6xB},[$tinptr,:256]!
vmlal.u32 $A3xB,$Bi,${A1}[1]
vshl.i64 $temp,`&Dhi("$A0xB")`,#16
veor $zero,$zero,$zero
vadd.u64 $temp,$temp,`&Dlo("$A0xB")`
vld1.64 {$A7xB},[$tinptr,:128]!
vmul.u32 $Ni,$temp,$M0
vmlal.u32 $A4xB,$Bi,${A2}[0]
vld1.32 {$N0-$N3}, [$nptr]!
vmlal.u32 $A5xB,$Bi,${A2}[1]
vmlal.u32 $A6xB,$Bi,${A3}[0]
vzip.16 $Ni,$zero
vmlal.u32 $A7xB,$Bi,${A3}[1]
.LNEON_inner:
vmlal.u32 $A0xB,$Ni,${N0}[0]
vld1.32 {$A0-$A3}, [$aptr]!
vmlal.u32 $A1xB,$Ni,${N0}[1]
subs $inner,$inner,#8
vmlal.u32 $A2xB,$Ni,${N1}[0]
vmlal.u32 $A3xB,$Ni,${N1}[1]
vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
vmlal.u32 $A4xB,$Ni,${N2}[0]
vld1.64 {$A0xB}, [$tinptr, :128]!
vmlal.u32 $A5xB,$Ni,${N2}[1]
vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
vmlal.u32 $A6xB,$Ni,${N3}[0]
vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
vmlal.u32 $A7xB,$Ni,${N3}[1]
vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
vmlal.u32 $A0xB,$Bi,${A0}[0]
vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
vmlal.u32 $A1xB,$Bi,${A0}[1]
vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
vmlal.u32 $A2xB,$Bi,${A1}[0]
vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
vmlal.u32 $A3xB,$Bi,${A1}[1]
vld1.32 {$N0-$N3}, [$nptr]!
vmlal.u32 $A4xB,$Bi,${A2}[0]
vld1.64 {$A7xB}, [$tinptr, :128]!
vmlal.u32 $A5xB,$Bi,${A2}[1]
vmlal.u32 $A6xB,$Bi,${A3}[0]
vmlal.u32 $A7xB,$Bi,${A3}[1]
bne .LNEON_inner
vmlal.u32 $A0xB,$Ni,${N0}[0]
add $tinptr,sp,#16
vmlal.u32 $A1xB,$Ni,${N0}[1]
sub $aptr,$aptr,$num,lsl#2 @ rewind $aptr
vmlal.u32 $A2xB,$Ni,${N1}[0]
vld1.64 {$Temp}, [sp,:128]
vmlal.u32 $A3xB,$Ni,${N1}[1]
subs $outer,$outer,#1
vmlal.u32 $A4xB,$Ni,${N2}[0]
vst1.64 {$A0xB-$A1xB}, [$toutptr,:256]!
vmlal.u32 $A5xB,$Ni,${N2}[1]
vld1.64 {$A0xB}, [$tinptr, :128]!
vshr.u64 $temp,$temp,#16
vst1.64 {$A2xB-$A3xB}, [$toutptr,:256]!
vmlal.u32 $A6xB,$Ni,${N3}[0]
vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
vmlal.u32 $A7xB,$Ni,${N3}[1]
vst1.64 {$A4xB-$A5xB}, [$toutptr,:256]!
vadd.u64 $temp,$temp,`&Dhi("$Temp")`
vst1.64 {$A6xB-$A7xB}, [$toutptr,:256]!
vshr.u64 $temp,$temp,#16
bne .LNEON_outer
mov $toutptr,sp
mov $inner,$num
.LNEON_tail:
vadd.u64 `&Dlo("$A0xB")`,`&Dlo("$A0xB")`,$temp
vld1.64 {$A3xB-$A4xB}, [$tinptr, :256]!
vshr.u64 $temp,`&Dlo("$A0xB")`,#16
vadd.u64 `&Dhi("$A0xB")`,`&Dhi("$A0xB")`,$temp
vld1.64 {$A5xB-$A6xB}, [$tinptr, :256]!
vshr.u64 $temp,`&Dhi("$A0xB")`,#16
vld1.64 {$A7xB}, [$tinptr, :128]!
vzip.16 `&Dlo("$A0xB")`,`&Dhi("$A0xB")`
.LNEON_tail2:
vadd.u64 `&Dlo("$A1xB")`,`&Dlo("$A1xB")`,$temp
vst1.32 {`&Dlo("$A0xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A1xB")`,#16
vadd.u64 `&Dhi("$A1xB")`,`&Dhi("$A1xB")`,$temp
vshr.u64 $temp,`&Dhi("$A1xB")`,#16
vzip.16 `&Dlo("$A1xB")`,`&Dhi("$A1xB")`
vadd.u64 `&Dlo("$A2xB")`,`&Dlo("$A2xB")`,$temp
vst1.32 {`&Dlo("$A1xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A2xB")`,#16
vadd.u64 `&Dhi("$A2xB")`,`&Dhi("$A2xB")`,$temp
vshr.u64 $temp,`&Dhi("$A2xB")`,#16
vzip.16 `&Dlo("$A2xB")`,`&Dhi("$A2xB")`
vadd.u64 `&Dlo("$A3xB")`,`&Dlo("$A3xB")`,$temp
vst1.32 {`&Dlo("$A2xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A3xB")`,#16
vadd.u64 `&Dhi("$A3xB")`,`&Dhi("$A3xB")`,$temp
vshr.u64 $temp,`&Dhi("$A3xB")`,#16
vzip.16 `&Dlo("$A3xB")`,`&Dhi("$A3xB")`
vadd.u64 `&Dlo("$A4xB")`,`&Dlo("$A4xB")`,$temp
vst1.32 {`&Dlo("$A3xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A4xB")`,#16
vadd.u64 `&Dhi("$A4xB")`,`&Dhi("$A4xB")`,$temp
vshr.u64 $temp,`&Dhi("$A4xB")`,#16
vzip.16 `&Dlo("$A4xB")`,`&Dhi("$A4xB")`
vadd.u64 `&Dlo("$A5xB")`,`&Dlo("$A5xB")`,$temp
vst1.32 {`&Dlo("$A4xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A5xB")`,#16
vadd.u64 `&Dhi("$A5xB")`,`&Dhi("$A5xB")`,$temp
vshr.u64 $temp,`&Dhi("$A5xB")`,#16
vzip.16 `&Dlo("$A5xB")`,`&Dhi("$A5xB")`
vadd.u64 `&Dlo("$A6xB")`,`&Dlo("$A6xB")`,$temp
vst1.32 {`&Dlo("$A5xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A6xB")`,#16
vadd.u64 `&Dhi("$A6xB")`,`&Dhi("$A6xB")`,$temp
vld1.64 {$A0xB}, [$tinptr, :128]!
vshr.u64 $temp,`&Dhi("$A6xB")`,#16
vzip.16 `&Dlo("$A6xB")`,`&Dhi("$A6xB")`
vadd.u64 `&Dlo("$A7xB")`,`&Dlo("$A7xB")`,$temp
vst1.32 {`&Dlo("$A6xB")`[0]}, [$toutptr, :32]!
vshr.u64 $temp,`&Dlo("$A7xB")`,#16
vadd.u64 `&Dhi("$A7xB")`,`&Dhi("$A7xB")`,$temp
vld1.64 {$A1xB-$A2xB}, [$tinptr, :256]!
vshr.u64 $temp,`&Dhi("$A7xB")`,#16
vzip.16 `&Dlo("$A7xB")`,`&Dhi("$A7xB")`
subs $inner,$inner,#8
vst1.32 {`&Dlo("$A7xB")`[0]}, [$toutptr, :32]!
bne .LNEON_tail
vst1.32 {${temp}[0]}, [$toutptr, :32] @ top-most bit
sub $nptr,$nptr,$num,lsl#2 @ rewind $nptr
subs $aptr,sp,#0 @ clear carry flag
add $bptr,sp,$num,lsl#2
.LNEON_sub:
ldmia $aptr!, {r4-r7}
ldmia $nptr!, {r8-r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq $aptr,$bptr @ preserves carry
stmia $rptr!, {r8-r11}
bne .LNEON_sub
ldr r10, [$aptr] @ load top-most bit
veor q0,q0,q0
sub r11,$bptr,sp @ this is num*4
veor q1,q1,q1
mov $aptr,sp
sub $rptr,$rptr,r11 @ rewind $rptr
mov $nptr,$bptr @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia $aptr!, {r4-r7}
ldmia $rptr, {r8-r11}
movcc r8, r4
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
movcc r11,r7
ldmia $aptr, {r4-r7}
stmia $rptr!, {r8-r11}
sub $aptr,$aptr,#16
ldmia $rptr, {r8-r11}
movcc r8, r4
vst1.64 {q0-q1}, [$aptr,:256]! @ wipe
movcc r9, r5
movcc r10,r6
vst1.64 {q0-q1}, [$nptr,:256]! @ wipe
movcc r11,r7
teq $aptr,$bptr @ preserves carry
stmia $rptr!, {r8-r11}
bne .LNEON_copy_n_zap
sub sp,ip,#96
vldmia sp!,{d8-d15}
ldmia sp!,{r4-r11}
ret @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
___
}
$code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
$code =~ s/\bret\b/bx lr/gm;
print $code;
close STDOUT;

851
crypto/bn/asm/ia64-mont.pl Normal file
View File

@@ -0,0 +1,851 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# January 2010
#
# "Teaser" Montgomery multiplication module for IA-64. There are
# several possibilities for improvement:
#
# - modulo-scheduling outer loop would eliminate quite a number of
# stalls after ldf8, xma and getf.sig outside inner loop and
# improve shorter key performance;
# - shorter vector support [with input vectors being fetched only
# once] should be added;
# - 2x unroll with help of n0[1] would make the code scalable on
# "wider" IA-64, "wider" than Itanium 2 that is, which is not of
# acute interest, because upcoming Tukwila's individual cores are
# reportedly based on Itanium 2 design;
# - dedicated squaring procedure(?);
#
# January 2010
#
# Shorter vector support is implemented by zero-padding ap and np
# vectors up to 8 elements, or 512 bits. This means that 256-bit
# inputs will be processed only 2 times faster than 512-bit inputs,
# not 4 [as one would expect, because algorithm complexity is n^2].
# The reason for padding is that inputs shorter than 512 bits won't
# be processed faster anyway, because minimal critical path of the
# core loop happens to match 512-bit timing. Either way, it resulted
# in >100% improvement of 512-bit RSA sign benchmark and 50% - of
# 1024-bit one [in comparison to original version of *this* module].
#
# So far 'openssl speed rsa dsa' output on 900MHz Itanium 2 *with*
# this module is:
# sign verify sign/s verify/s
# rsa 512 bits 0.000290s 0.000024s 3452.8 42031.4
# rsa 1024 bits 0.000793s 0.000058s 1261.7 17172.0
# rsa 2048 bits 0.005908s 0.000148s 169.3 6754.0
# rsa 4096 bits 0.033456s 0.000469s 29.9 2133.6
# dsa 512 bits 0.000253s 0.000198s 3949.9 5057.0
# dsa 1024 bits 0.000585s 0.000607s 1708.4 1647.4
# dsa 2048 bits 0.001453s 0.001703s 688.1 587.4
#
# ... and *without* (but still with ia64.S):
#
# rsa 512 bits 0.000670s 0.000041s 1491.8 24145.5
# rsa 1024 bits 0.001988s 0.000080s 502.9 12499.3
# rsa 2048 bits 0.008702s 0.000189s 114.9 5293.9
# rsa 4096 bits 0.043860s 0.000533s 22.8 1875.9
# dsa 512 bits 0.000441s 0.000427s 2265.3 2340.6
# dsa 1024 bits 0.000823s 0.000867s 1215.6 1153.2
# dsa 2048 bits 0.001894s 0.002179s 528.1 458.9
#
# As it can be seen, RSA sign performance improves by 130-30%,
# hereafter less for longer keys, while verify - by 74-13%.
# DSA performance improves by 115-30%.
if ($^O eq "hpux") {
$ADDP="addp4";
for (@ARGV) { $ADDP="add" if (/[\+DD|\-mlp]64/); }
} else { $ADDP="add"; }
$code=<<___;
.explicit
.text
// int bn_mul_mont (BN_ULONG *rp,const BN_ULONG *ap,
// const BN_ULONG *bp,const BN_ULONG *np,
// const BN_ULONG *n0p,int num);
.align 64
.global bn_mul_mont#
.proc bn_mul_mont#
bn_mul_mont:
.prologue
.body
{ .mmi; cmp4.le p6,p7=2,r37;;
(p6) cmp4.lt.unc p8,p9=8,r37
mov ret0=r0 };;
{ .bbb;
(p9) br.cond.dptk.many bn_mul_mont_8
(p8) br.cond.dpnt.many bn_mul_mont_general
(p7) br.ret.spnt.many b0 };;
.endp bn_mul_mont#
prevfs=r2; prevpr=r3; prevlc=r10; prevsp=r11;
rptr=r8; aptr=r9; bptr=r14; nptr=r15;
tptr=r16; // &tp[0]
tp_1=r17; // &tp[-1]
num=r18; len=r19; lc=r20;
topbit=r21; // carry bit from tmp[num]
n0=f6;
m0=f7;
bi=f8;
.align 64
.local bn_mul_mont_general#
.proc bn_mul_mont_general#
bn_mul_mont_general:
.prologue
{ .mmi; .save ar.pfs,prevfs
alloc prevfs=ar.pfs,6,2,0,8
$ADDP aptr=0,in1
.save ar.lc,prevlc
mov prevlc=ar.lc }
{ .mmi; .vframe prevsp
mov prevsp=sp
$ADDP bptr=0,in2
.save pr,prevpr
mov prevpr=pr };;
.body
.rotf alo[6],nlo[4],ahi[8],nhi[6]
.rotr a[3],n[3],t[2]
{ .mmi; ldf8 bi=[bptr],8 // (*bp++)
ldf8 alo[4]=[aptr],16 // ap[0]
$ADDP r30=8,in1 };;
{ .mmi; ldf8 alo[3]=[r30],16 // ap[1]
ldf8 alo[2]=[aptr],16 // ap[2]
$ADDP in4=0,in4 };;
{ .mmi; ldf8 alo[1]=[r30] // ap[3]
ldf8 n0=[in4] // n0
$ADDP rptr=0,in0 }
{ .mmi; $ADDP nptr=0,in3
mov r31=16
zxt4 num=in5 };;
{ .mmi; ldf8 nlo[2]=[nptr],8 // np[0]
shladd len=num,3,r0
shladd r31=num,3,r31 };;
{ .mmi; ldf8 nlo[1]=[nptr],8 // np[1]
add lc=-5,num
sub r31=sp,r31 };;
{ .mfb; and sp=-16,r31 // alloca
xmpy.hu ahi[2]=alo[4],bi // ap[0]*bp[0]
nop.b 0 }
{ .mfb; nop.m 0
xmpy.lu alo[4]=alo[4],bi
brp.loop.imp .L1st_ctop,.L1st_cend-16
};;
{ .mfi; nop.m 0
xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[0]
add tp_1=8,sp }
{ .mfi; nop.m 0
xma.lu alo[3]=alo[3],bi,ahi[2]
mov pr.rot=0x20001f<<16
// ------^----- (p40) at first (p23)
// ----------^^ p[16:20]=1
};;
{ .mfi; nop.m 0
xmpy.lu m0=alo[4],n0 // (ap[0]*bp[0])*n0
mov ar.lc=lc }
{ .mfi; nop.m 0
fcvt.fxu.s1 nhi[1]=f0
mov ar.ec=8 };;
.align 32
.L1st_ctop:
.pred.rel "mutex",p40,p42
{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++)
(p18) xma.hu ahi[0]=alo[2],bi,ahi[1]
(p40) add n[2]=n[2],a[2] } // (p23) }
{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)(p16)
(p18) xma.lu alo[2]=alo[2],bi,ahi[1]
(p42) add n[2]=n[2],a[2],1 };; // (p23)
{ .mfi; (p21) getf.sig a[0]=alo[5]
(p20) xma.hu nhi[0]=nlo[2],m0,nhi[1]
(p42) cmp.leu p41,p39=n[2],a[2] } // (p23)
{ .mfi; (p23) st8 [tp_1]=n[2],8
(p20) xma.lu nlo[2]=nlo[2],m0,nhi[1]
(p40) cmp.ltu p41,p39=n[2],a[2] } // (p23)
{ .mmb; (p21) getf.sig n[0]=nlo[3]
(p16) nop.m 0
br.ctop.sptk .L1st_ctop };;
.L1st_cend:
{ .mmi; getf.sig a[0]=ahi[6] // (p24)
getf.sig n[0]=nhi[4]
add num=-1,num };; // num--
{ .mmi; .pred.rel "mutex",p40,p42
(p40) add n[0]=n[0],a[0]
(p42) add n[0]=n[0],a[0],1
sub aptr=aptr,len };; // rewind
{ .mmi; .pred.rel "mutex",p40,p42
(p40) cmp.ltu p41,p39=n[0],a[0]
(p42) cmp.leu p41,p39=n[0],a[0]
sub nptr=nptr,len };;
{ .mmi; .pred.rel "mutex",p39,p41
(p39) add topbit=r0,r0
(p41) add topbit=r0,r0,1
nop.i 0 }
{ .mmi; st8 [tp_1]=n[0]
add tptr=16,sp
add tp_1=8,sp };;
.Louter:
{ .mmi; ldf8 bi=[bptr],8 // (*bp++)
ldf8 ahi[3]=[tptr] // tp[0]
add r30=8,aptr };;
{ .mmi; ldf8 alo[4]=[aptr],16 // ap[0]
ldf8 alo[3]=[r30],16 // ap[1]
add r31=8,nptr };;
{ .mfb; ldf8 alo[2]=[aptr],16 // ap[2]
xma.hu ahi[2]=alo[4],bi,ahi[3] // ap[0]*bp[i]+tp[0]
brp.loop.imp .Linner_ctop,.Linner_cend-16
}
{ .mfb; ldf8 alo[1]=[r30] // ap[3]
xma.lu alo[4]=alo[4],bi,ahi[3]
clrrrb.pr };;
{ .mfi; ldf8 nlo[2]=[nptr],16 // np[0]
xma.hu ahi[1]=alo[3],bi,ahi[2] // ap[1]*bp[i]
nop.i 0 }
{ .mfi; ldf8 nlo[1]=[r31] // np[1]
xma.lu alo[3]=alo[3],bi,ahi[2]
mov pr.rot=0x20101f<<16
// ------^----- (p40) at first (p23)
// --------^--- (p30) at first (p22)
// ----------^^ p[16:20]=1
};;
{ .mfi; st8 [tptr]=r0 // tp[0] is already accounted
xmpy.lu m0=alo[4],n0 // (ap[0]*bp[i]+tp[0])*n0
mov ar.lc=lc }
{ .mfi;
fcvt.fxu.s1 nhi[1]=f0
mov ar.ec=8 };;
// This loop spins in 4*(n+7) ticks on Itanium 2 and should spin in
// 7*(n+7) ticks on Itanium (the one codenamed Merced). Factor of 7
// in latter case accounts for two-tick pipeline stall, which means
// that its performance would be ~20% lower than optimal one. No
// attempt was made to address this, because original Itanium is
// hardly represented out in the wild...
.align 32
.Linner_ctop:
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p30,p32
{ .mfi; (p16) ldf8 alo[0]=[aptr],8 // *(aptr++)
(p18) xma.hu ahi[0]=alo[2],bi,ahi[1]
(p40) add n[2]=n[2],a[2] } // (p23)
{ .mfi; (p16) nop.m 0
(p18) xma.lu alo[2]=alo[2],bi,ahi[1]
(p42) add n[2]=n[2],a[2],1 };; // (p23)
{ .mfi; (p21) getf.sig a[0]=alo[5]
(p16) nop.f 0
(p40) cmp.ltu p41,p39=n[2],a[2] } // (p23)
{ .mfi; (p21) ld8 t[0]=[tptr],8
(p16) nop.f 0
(p42) cmp.leu p41,p39=n[2],a[2] };; // (p23)
{ .mfi; (p18) ldf8 nlo[0]=[nptr],8 // *(nptr++)
(p20) xma.hu nhi[0]=nlo[2],m0,nhi[1]
(p30) add a[1]=a[1],t[1] } // (p22)
{ .mfi; (p16) nop.m 0
(p20) xma.lu nlo[2]=nlo[2],m0,nhi[1]
(p32) add a[1]=a[1],t[1],1 };; // (p22)
{ .mmi; (p21) getf.sig n[0]=nlo[3]
(p16) nop.m 0
(p30) cmp.ltu p31,p29=a[1],t[1] } // (p22)
{ .mmb; (p23) st8 [tp_1]=n[2],8
(p32) cmp.leu p31,p29=a[1],t[1] // (p22)
br.ctop.sptk .Linner_ctop };;
.Linner_cend:
{ .mmi; getf.sig a[0]=ahi[6] // (p24)
getf.sig n[0]=nhi[4]
nop.i 0 };;
{ .mmi; .pred.rel "mutex",p31,p33
(p31) add a[0]=a[0],topbit
(p33) add a[0]=a[0],topbit,1
mov topbit=r0 };;
{ .mfi; .pred.rel "mutex",p31,p33
(p31) cmp.ltu p32,p30=a[0],topbit
(p33) cmp.leu p32,p30=a[0],topbit
}
{ .mfi; .pred.rel "mutex",p40,p42
(p40) add n[0]=n[0],a[0]
(p42) add n[0]=n[0],a[0],1
};;
{ .mmi; .pred.rel "mutex",p44,p46
(p40) cmp.ltu p41,p39=n[0],a[0]
(p42) cmp.leu p41,p39=n[0],a[0]
(p32) add topbit=r0,r0,1 }
{ .mmi; st8 [tp_1]=n[0],8
cmp4.ne p6,p0=1,num
sub aptr=aptr,len };; // rewind
{ .mmi; sub nptr=nptr,len
(p41) add topbit=r0,r0,1
add tptr=16,sp }
{ .mmb; add tp_1=8,sp
add num=-1,num // num--
(p6) br.cond.sptk.many .Louter };;
{ .mbb; add lc=4,lc
brp.loop.imp .Lsub_ctop,.Lsub_cend-16
clrrrb.pr };;
{ .mii; nop.m 0
mov pr.rot=0x10001<<16
// ------^---- (p33) at first (p17)
mov ar.lc=lc }
{ .mii; nop.m 0
mov ar.ec=3
nop.i 0 };;
.Lsub_ctop:
.pred.rel "mutex",p33,p35
{ .mfi; (p16) ld8 t[0]=[tptr],8 // t=*(tp++)
(p16) nop.f 0
(p33) sub n[1]=t[1],n[1] } // (p17)
{ .mfi; (p16) ld8 n[0]=[nptr],8 // n=*(np++)
(p16) nop.f 0
(p35) sub n[1]=t[1],n[1],1 };; // (p17)
{ .mib; (p18) st8 [rptr]=n[2],8 // *(rp++)=r
(p33) cmp.gtu p34,p32=n[1],t[1] // (p17)
(p18) nop.b 0 }
{ .mib; (p18) nop.m 0
(p35) cmp.geu p34,p32=n[1],t[1] // (p17)
br.ctop.sptk .Lsub_ctop };;
.Lsub_cend:
{ .mmb; .pred.rel "mutex",p34,p36
(p34) sub topbit=topbit,r0 // (p19)
(p36) sub topbit=topbit,r0,1
brp.loop.imp .Lcopy_ctop,.Lcopy_cend-16
}
{ .mmb; sub rptr=rptr,len // rewind
sub tptr=tptr,len
clrrrb.pr };;
{ .mmi; and aptr=tptr,topbit
andcm bptr=rptr,topbit
mov pr.rot=1<<16 };;
{ .mii; or nptr=aptr,bptr
mov ar.lc=lc
mov ar.ec=3 };;
.Lcopy_ctop:
{ .mmb; (p16) ld8 n[0]=[nptr],8
(p18) st8 [tptr]=r0,8
(p16) nop.b 0 }
{ .mmb; (p16) nop.m 0
(p18) st8 [rptr]=n[2],8
br.ctop.sptk .Lcopy_ctop };;
.Lcopy_cend:
{ .mmi; mov ret0=1 // signal "handled"
rum 1<<5 // clear um.mfh
mov ar.lc=prevlc }
{ .mib; .restore sp
mov sp=prevsp
mov pr=prevpr,0x1ffff
br.ret.sptk.many b0 };;
.endp bn_mul_mont_general#
a1=r16; a2=r17; a3=r18; a4=r19; a5=r20; a6=r21; a7=r22; a8=r23;
n1=r24; n2=r25; n3=r26; n4=r27; n5=r28; n6=r29; n7=r30; n8=r31;
t0=r15;
ai0=f8; ai1=f9; ai2=f10; ai3=f11; ai4=f12; ai5=f13; ai6=f14; ai7=f15;
ni0=f16; ni1=f17; ni2=f18; ni3=f19; ni4=f20; ni5=f21; ni6=f22; ni7=f23;
.align 64
.skip 48 // aligns loop body
.local bn_mul_mont_8#
.proc bn_mul_mont_8#
bn_mul_mont_8:
.prologue
{ .mmi; .save ar.pfs,prevfs
alloc prevfs=ar.pfs,6,2,0,8
.vframe prevsp
mov prevsp=sp
.save ar.lc,prevlc
mov prevlc=ar.lc }
{ .mmi; add r17=-6*16,sp
add sp=-7*16,sp
.save pr,prevpr
mov prevpr=pr };;
{ .mmi; .save.gf 0,0x10
stf.spill [sp]=f16,-16
.save.gf 0,0x20
stf.spill [r17]=f17,32
add r16=-5*16,prevsp};;
{ .mmi; .save.gf 0,0x40
stf.spill [r16]=f18,32
.save.gf 0,0x80
stf.spill [r17]=f19,32
$ADDP aptr=0,in1 };;
{ .mmi; .save.gf 0,0x100
stf.spill [r16]=f20,32
.save.gf 0,0x200
stf.spill [r17]=f21,32
$ADDP r29=8,in1 };;
{ .mmi; .save.gf 0,0x400
stf.spill [r16]=f22
.save.gf 0,0x800
stf.spill [r17]=f23
$ADDP rptr=0,in0 };;
.body
.rotf bj[8],mj[2],tf[2],alo[10],ahi[10],nlo[10],nhi[10]
.rotr t[8]
// load input vectors padding them to 8 elements
{ .mmi; ldf8 ai0=[aptr],16 // ap[0]
ldf8 ai1=[r29],16 // ap[1]
$ADDP bptr=0,in2 }
{ .mmi; $ADDP r30=8,in2
$ADDP nptr=0,in3
$ADDP r31=8,in3 };;
{ .mmi; ldf8 bj[7]=[bptr],16 // bp[0]
ldf8 bj[6]=[r30],16 // bp[1]
cmp4.le p4,p5=3,in5 }
{ .mmi; ldf8 ni0=[nptr],16 // np[0]
ldf8 ni1=[r31],16 // np[1]
cmp4.le p6,p7=4,in5 };;
{ .mfi; (p4)ldf8 ai2=[aptr],16 // ap[2]
(p5)fcvt.fxu ai2=f0
cmp4.le p8,p9=5,in5 }
{ .mfi; (p6)ldf8 ai3=[r29],16 // ap[3]
(p7)fcvt.fxu ai3=f0
cmp4.le p10,p11=6,in5 }
{ .mfi; (p4)ldf8 bj[5]=[bptr],16 // bp[2]
(p5)fcvt.fxu bj[5]=f0
cmp4.le p12,p13=7,in5 }
{ .mfi; (p6)ldf8 bj[4]=[r30],16 // bp[3]
(p7)fcvt.fxu bj[4]=f0
cmp4.le p14,p15=8,in5 }
{ .mfi; (p4)ldf8 ni2=[nptr],16 // np[2]
(p5)fcvt.fxu ni2=f0
addp4 r28=-1,in5 }
{ .mfi; (p6)ldf8 ni3=[r31],16 // np[3]
(p7)fcvt.fxu ni3=f0
$ADDP in4=0,in4 };;
{ .mfi; ldf8 n0=[in4]
fcvt.fxu tf[1]=f0
nop.i 0 }
{ .mfi; (p8)ldf8 ai4=[aptr],16 // ap[4]
(p9)fcvt.fxu ai4=f0
mov t[0]=r0 }
{ .mfi; (p10)ldf8 ai5=[r29],16 // ap[5]
(p11)fcvt.fxu ai5=f0
mov t[1]=r0 }
{ .mfi; (p8)ldf8 bj[3]=[bptr],16 // bp[4]
(p9)fcvt.fxu bj[3]=f0
mov t[2]=r0 }
{ .mfi; (p10)ldf8 bj[2]=[r30],16 // bp[5]
(p11)fcvt.fxu bj[2]=f0
mov t[3]=r0 }
{ .mfi; (p8)ldf8 ni4=[nptr],16 // np[4]
(p9)fcvt.fxu ni4=f0
mov t[4]=r0 }
{ .mfi; (p10)ldf8 ni5=[r31],16 // np[5]
(p11)fcvt.fxu ni5=f0
mov t[5]=r0 };;
{ .mfi; (p12)ldf8 ai6=[aptr],16 // ap[6]
(p13)fcvt.fxu ai6=f0
mov t[6]=r0 }
{ .mfi; (p14)ldf8 ai7=[r29],16 // ap[7]
(p15)fcvt.fxu ai7=f0
mov t[7]=r0 }
{ .mfi; (p12)ldf8 bj[1]=[bptr],16 // bp[6]
(p13)fcvt.fxu bj[1]=f0
mov ar.lc=r28 }
{ .mfi; (p14)ldf8 bj[0]=[r30],16 // bp[7]
(p15)fcvt.fxu bj[0]=f0
mov ar.ec=1 }
{ .mfi; (p12)ldf8 ni6=[nptr],16 // np[6]
(p13)fcvt.fxu ni6=f0
mov pr.rot=1<<16 }
{ .mfb; (p14)ldf8 ni7=[r31],16 // np[7]
(p15)fcvt.fxu ni7=f0
brp.loop.imp .Louter_8_ctop,.Louter_8_cend-16
};;
// The loop is scheduled for 32*n ticks on Itanium 2. Actual attempt
// to measure with help of Interval Time Counter indicated that the
// factor is a tad higher: 33 or 34, if not 35. Exact measurement and
// addressing the issue is problematic, because I don't have access
// to platform-specific instruction-level profiler. On Itanium it
// should run in 56*n ticks, because of higher xma latency...
.Louter_8_ctop:
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mfi; (p16) nop.m 0 // 0:
(p16) xma.hu ahi[0]=ai0,bj[7],tf[1] // ap[0]*b[i]+t[0]
(p40) add a3=a3,n3 } // (p17) a3+=n3
{ .mfi; (p42) add a3=a3,n3,1
(p16) xma.lu alo[0]=ai0,bj[7],tf[1]
(p16) nop.i 0 };;
{ .mii; (p17) getf.sig a7=alo[8] // 1:
(p48) add t[6]=t[6],a3 // (p17) t[6]+=a3
(p50) add t[6]=t[6],a3,1 };;
{ .mfi; (p17) getf.sig a8=ahi[8] // 2:
(p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0
(p40) cmp.ltu p43,p41=a3,n3 }
{ .mfi; (p42) cmp.leu p43,p41=a3,n3
(p17) xma.lu nlo[7]=ni6,mj[1],nhi[6]
(p16) nop.i 0 };;
{ .mii; (p17) getf.sig n5=nlo[6] // 3:
(p48) cmp.ltu p51,p49=t[6],a3
(p50) cmp.leu p51,p49=t[6],a3 };;
.pred.rel "mutex",p41,p43
.pred.rel "mutex",p49,p51
{ .mfi; (p16) nop.m 0 // 4:
(p16) xma.hu ahi[1]=ai1,bj[7],ahi[0] // ap[1]*b[i]
(p41) add a4=a4,n4 } // (p17) a4+=n4
{ .mfi; (p43) add a4=a4,n4,1
(p16) xma.lu alo[1]=ai1,bj[7],ahi[0]
(p16) nop.i 0 };;
{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4
(p16) xmpy.lu mj[0]=alo[0],n0 // (ap[0]*b[i]+t[0])*n0
(p51) add t[5]=t[5],a4,1 };;
{ .mfi; (p16) nop.m 0 // 6:
(p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0
(p41) cmp.ltu p42,p40=a4,n4 }
{ .mfi; (p43) cmp.leu p42,p40=a4,n4
(p17) xma.lu nlo[8]=ni7,mj[1],nhi[7]
(p16) nop.i 0 };;
{ .mii; (p17) getf.sig n6=nlo[7] // 7:
(p49) cmp.ltu p50,p48=t[5],a4
(p51) cmp.leu p50,p48=t[5],a4 };;
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mfi; (p16) nop.m 0 // 8:
(p16) xma.hu ahi[2]=ai2,bj[7],ahi[1] // ap[2]*b[i]
(p40) add a5=a5,n5 } // (p17) a5+=n5
{ .mfi; (p42) add a5=a5,n5,1
(p16) xma.lu alo[2]=ai2,bj[7],ahi[1]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig a1=alo[1] // 9:
(p48) add t[4]=t[4],a5 // p(17) t[4]+=a5
(p50) add t[4]=t[4],a5,1 };;
{ .mfi; (p16) nop.m 0 // 10:
(p16) xma.hu nhi[0]=ni0,mj[0],alo[0] // np[0]*m0
(p40) cmp.ltu p43,p41=a5,n5 }
{ .mfi; (p42) cmp.leu p43,p41=a5,n5
(p16) xma.lu nlo[0]=ni0,mj[0],alo[0]
(p16) nop.i 0 };;
{ .mii; (p17) getf.sig n7=nlo[8] // 11:
(p48) cmp.ltu p51,p49=t[4],a5
(p50) cmp.leu p51,p49=t[4],a5 };;
.pred.rel "mutex",p41,p43
.pred.rel "mutex",p49,p51
{ .mfi; (p17) getf.sig n8=nhi[8] // 12:
(p16) xma.hu ahi[3]=ai3,bj[7],ahi[2] // ap[3]*b[i]
(p41) add a6=a6,n6 } // (p17) a6+=n6
{ .mfi; (p43) add a6=a6,n6,1
(p16) xma.lu alo[3]=ai3,bj[7],ahi[2]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig a2=alo[2] // 13:
(p49) add t[3]=t[3],a6 // (p17) t[3]+=a6
(p51) add t[3]=t[3],a6,1 };;
{ .mfi; (p16) nop.m 0 // 14:
(p16) xma.hu nhi[1]=ni1,mj[0],nhi[0] // np[1]*m0
(p41) cmp.ltu p42,p40=a6,n6 }
{ .mfi; (p43) cmp.leu p42,p40=a6,n6
(p16) xma.lu nlo[1]=ni1,mj[0],nhi[0]
(p16) nop.i 0 };;
{ .mii; (p16) nop.m 0 // 15:
(p49) cmp.ltu p50,p48=t[3],a6
(p51) cmp.leu p50,p48=t[3],a6 };;
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mfi; (p16) nop.m 0 // 16:
(p16) xma.hu ahi[4]=ai4,bj[7],ahi[3] // ap[4]*b[i]
(p40) add a7=a7,n7 } // (p17) a7+=n7
{ .mfi; (p42) add a7=a7,n7,1
(p16) xma.lu alo[4]=ai4,bj[7],ahi[3]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig a3=alo[3] // 17:
(p48) add t[2]=t[2],a7 // (p17) t[2]+=a7
(p50) add t[2]=t[2],a7,1 };;
{ .mfi; (p16) nop.m 0 // 18:
(p16) xma.hu nhi[2]=ni2,mj[0],nhi[1] // np[2]*m0
(p40) cmp.ltu p43,p41=a7,n7 }
{ .mfi; (p42) cmp.leu p43,p41=a7,n7
(p16) xma.lu nlo[2]=ni2,mj[0],nhi[1]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig n1=nlo[1] // 19:
(p48) cmp.ltu p51,p49=t[2],a7
(p50) cmp.leu p51,p49=t[2],a7 };;
.pred.rel "mutex",p41,p43
.pred.rel "mutex",p49,p51
{ .mfi; (p16) nop.m 0 // 20:
(p16) xma.hu ahi[5]=ai5,bj[7],ahi[4] // ap[5]*b[i]
(p41) add a8=a8,n8 } // (p17) a8+=n8
{ .mfi; (p43) add a8=a8,n8,1
(p16) xma.lu alo[5]=ai5,bj[7],ahi[4]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig a4=alo[4] // 21:
(p49) add t[1]=t[1],a8 // (p17) t[1]+=a8
(p51) add t[1]=t[1],a8,1 };;
{ .mfi; (p16) nop.m 0 // 22:
(p16) xma.hu nhi[3]=ni3,mj[0],nhi[2] // np[3]*m0
(p41) cmp.ltu p42,p40=a8,n8 }
{ .mfi; (p43) cmp.leu p42,p40=a8,n8
(p16) xma.lu nlo[3]=ni3,mj[0],nhi[2]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig n2=nlo[2] // 23:
(p49) cmp.ltu p50,p48=t[1],a8
(p51) cmp.leu p50,p48=t[1],a8 };;
{ .mfi; (p16) nop.m 0 // 24:
(p16) xma.hu ahi[6]=ai6,bj[7],ahi[5] // ap[6]*b[i]
(p16) add a1=a1,n1 } // (p16) a1+=n1
{ .mfi; (p16) nop.m 0
(p16) xma.lu alo[6]=ai6,bj[7],ahi[5]
(p17) mov t[0]=r0 };;
{ .mii; (p16) getf.sig a5=alo[5] // 25:
(p16) add t0=t[7],a1 // (p16) t[7]+=a1
(p42) add t[0]=t[0],r0,1 };;
{ .mfi; (p16) setf.sig tf[0]=t0 // 26:
(p16) xma.hu nhi[4]=ni4,mj[0],nhi[3] // np[4]*m0
(p50) add t[0]=t[0],r0,1 }
{ .mfi; (p16) cmp.ltu.unc p42,p40=a1,n1
(p16) xma.lu nlo[4]=ni4,mj[0],nhi[3]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig n3=nlo[3] // 27:
(p16) cmp.ltu.unc p50,p48=t0,a1
(p16) nop.i 0 };;
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mfi; (p16) nop.m 0 // 28:
(p16) xma.hu ahi[7]=ai7,bj[7],ahi[6] // ap[7]*b[i]
(p40) add a2=a2,n2 } // (p16) a2+=n2
{ .mfi; (p42) add a2=a2,n2,1
(p16) xma.lu alo[7]=ai7,bj[7],ahi[6]
(p16) nop.i 0 };;
{ .mii; (p16) getf.sig a6=alo[6] // 29:
(p48) add t[6]=t[6],a2 // (p16) t[6]+=a2
(p50) add t[6]=t[6],a2,1 };;
{ .mfi; (p16) nop.m 0 // 30:
(p16) xma.hu nhi[5]=ni5,mj[0],nhi[4] // np[5]*m0
(p40) cmp.ltu p41,p39=a2,n2 }
{ .mfi; (p42) cmp.leu p41,p39=a2,n2
(p16) xma.lu nlo[5]=ni5,mj[0],nhi[4]
(p16) nop.i 0 };;
{ .mfi; (p16) getf.sig n4=nlo[4] // 31:
(p16) nop.f 0
(p48) cmp.ltu p49,p47=t[6],a2 }
{ .mfb; (p50) cmp.leu p49,p47=t[6],a2
(p16) nop.f 0
br.ctop.sptk.many .Louter_8_ctop };;
.Louter_8_cend:
// above loop has to execute one more time, without (p16), which is
// replaced with merged move of np[8] to GPR bank
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mmi; (p0) getf.sig n1=ni0 // 0:
(p40) add a3=a3,n3 // (p17) a3+=n3
(p42) add a3=a3,n3,1 };;
{ .mii; (p17) getf.sig a7=alo[8] // 1:
(p48) add t[6]=t[6],a3 // (p17) t[6]+=a3
(p50) add t[6]=t[6],a3,1 };;
{ .mfi; (p17) getf.sig a8=ahi[8] // 2:
(p17) xma.hu nhi[7]=ni6,mj[1],nhi[6] // np[6]*m0
(p40) cmp.ltu p43,p41=a3,n3 }
{ .mfi; (p42) cmp.leu p43,p41=a3,n3
(p17) xma.lu nlo[7]=ni6,mj[1],nhi[6]
(p0) nop.i 0 };;
{ .mii; (p17) getf.sig n5=nlo[6] // 3:
(p48) cmp.ltu p51,p49=t[6],a3
(p50) cmp.leu p51,p49=t[6],a3 };;
.pred.rel "mutex",p41,p43
.pred.rel "mutex",p49,p51
{ .mmi; (p0) getf.sig n2=ni1 // 4:
(p41) add a4=a4,n4 // (p17) a4+=n4
(p43) add a4=a4,n4,1 };;
{ .mfi; (p49) add t[5]=t[5],a4 // 5: (p17) t[5]+=a4
(p0) nop.f 0
(p51) add t[5]=t[5],a4,1 };;
{ .mfi; (p0) getf.sig n3=ni2 // 6:
(p17) xma.hu nhi[8]=ni7,mj[1],nhi[7] // np[7]*m0
(p41) cmp.ltu p42,p40=a4,n4 }
{ .mfi; (p43) cmp.leu p42,p40=a4,n4
(p17) xma.lu nlo[8]=ni7,mj[1],nhi[7]
(p0) nop.i 0 };;
{ .mii; (p17) getf.sig n6=nlo[7] // 7:
(p49) cmp.ltu p50,p48=t[5],a4
(p51) cmp.leu p50,p48=t[5],a4 };;
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mii; (p0) getf.sig n4=ni3 // 8:
(p40) add a5=a5,n5 // (p17) a5+=n5
(p42) add a5=a5,n5,1 };;
{ .mii; (p0) nop.m 0 // 9:
(p48) add t[4]=t[4],a5 // p(17) t[4]+=a5
(p50) add t[4]=t[4],a5,1 };;
{ .mii; (p0) nop.m 0 // 10:
(p40) cmp.ltu p43,p41=a5,n5
(p42) cmp.leu p43,p41=a5,n5 };;
{ .mii; (p17) getf.sig n7=nlo[8] // 11:
(p48) cmp.ltu p51,p49=t[4],a5
(p50) cmp.leu p51,p49=t[4],a5 };;
.pred.rel "mutex",p41,p43
.pred.rel "mutex",p49,p51
{ .mii; (p17) getf.sig n8=nhi[8] // 12:
(p41) add a6=a6,n6 // (p17) a6+=n6
(p43) add a6=a6,n6,1 };;
{ .mii; (p0) getf.sig n5=ni4 // 13:
(p49) add t[3]=t[3],a6 // (p17) t[3]+=a6
(p51) add t[3]=t[3],a6,1 };;
{ .mii; (p0) nop.m 0 // 14:
(p41) cmp.ltu p42,p40=a6,n6
(p43) cmp.leu p42,p40=a6,n6 };;
{ .mii; (p0) getf.sig n6=ni5 // 15:
(p49) cmp.ltu p50,p48=t[3],a6
(p51) cmp.leu p50,p48=t[3],a6 };;
.pred.rel "mutex",p40,p42
.pred.rel "mutex",p48,p50
{ .mii; (p0) nop.m 0 // 16:
(p40) add a7=a7,n7 // (p17) a7+=n7
(p42) add a7=a7,n7,1 };;
{ .mii; (p0) nop.m 0 // 17:
(p48) add t[2]=t[2],a7 // (p17) t[2]+=a7
(p50) add t[2]=t[2],a7,1 };;
{ .mii; (p0) nop.m 0 // 18:
(p40) cmp.ltu p43,p41=a7,n7
(p42) cmp.leu p43,p41=a7,n7 };;
{ .mii; (p0) getf.sig n7=ni6 // 19:
(p48) cmp.ltu p51,p49=t[2],a7
(p50) cmp.leu p51,p49=t[2],a7 };;
.pred.rel "mutex",p41,p43
.pred.rel "mutex",p49,p51
{ .mii; (p0) nop.m 0 // 20:
(p41) add a8=a8,n8 // (p17) a8+=n8
(p43) add a8=a8,n8,1 };;
{ .mmi; (p0) nop.m 0 // 21:
(p49) add t[1]=t[1],a8 // (p17) t[1]+=a8
(p51) add t[1]=t[1],a8,1 }
{ .mmi; (p17) mov t[0]=r0
(p41) cmp.ltu p42,p40=a8,n8
(p43) cmp.leu p42,p40=a8,n8 };;
{ .mmi; (p0) getf.sig n8=ni7 // 22:
(p49) cmp.ltu p50,p48=t[1],a8
(p51) cmp.leu p50,p48=t[1],a8 }
{ .mmi; (p42) add t[0]=t[0],r0,1
(p0) add r16=-7*16,prevsp
(p0) add r17=-6*16,prevsp };;
// subtract np[8] from carrybit|tmp[8]
// carrybit|tmp[8] layout upon exit from above loop is:
// t[0]|t[1]|t[2]|t[3]|t[4]|t[5]|t[6]|t[7]|t0 (least significant)
{ .mmi; (p50)add t[0]=t[0],r0,1
add r18=-5*16,prevsp
sub n1=t0,n1 };;
{ .mmi; cmp.gtu p34,p32=n1,t0;;
.pred.rel "mutex",p32,p34
(p32)sub n2=t[7],n2
(p34)sub n2=t[7],n2,1 };;
{ .mii; (p32)cmp.gtu p35,p33=n2,t[7]
(p34)cmp.geu p35,p33=n2,t[7];;
.pred.rel "mutex",p33,p35
(p33)sub n3=t[6],n3 }
{ .mmi; (p35)sub n3=t[6],n3,1;;
(p33)cmp.gtu p34,p32=n3,t[6]
(p35)cmp.geu p34,p32=n3,t[6] };;
.pred.rel "mutex",p32,p34
{ .mii; (p32)sub n4=t[5],n4
(p34)sub n4=t[5],n4,1;;
(p32)cmp.gtu p35,p33=n4,t[5] }
{ .mmi; (p34)cmp.geu p35,p33=n4,t[5];;
.pred.rel "mutex",p33,p35
(p33)sub n5=t[4],n5
(p35)sub n5=t[4],n5,1 };;
{ .mii; (p33)cmp.gtu p34,p32=n5,t[4]
(p35)cmp.geu p34,p32=n5,t[4];;
.pred.rel "mutex",p32,p34
(p32)sub n6=t[3],n6 }
{ .mmi; (p34)sub n6=t[3],n6,1;;
(p32)cmp.gtu p35,p33=n6,t[3]
(p34)cmp.geu p35,p33=n6,t[3] };;
.pred.rel "mutex",p33,p35
{ .mii; (p33)sub n7=t[2],n7
(p35)sub n7=t[2],n7,1;;
(p33)cmp.gtu p34,p32=n7,t[2] }
{ .mmi; (p35)cmp.geu p34,p32=n7,t[2];;
.pred.rel "mutex",p32,p34
(p32)sub n8=t[1],n8
(p34)sub n8=t[1],n8,1 };;
{ .mii; (p32)cmp.gtu p35,p33=n8,t[1]
(p34)cmp.geu p35,p33=n8,t[1];;
.pred.rel "mutex",p33,p35
(p33)sub a8=t[0],r0 }
{ .mmi; (p35)sub a8=t[0],r0,1;;
(p33)cmp.gtu p34,p32=a8,t[0]
(p35)cmp.geu p34,p32=a8,t[0] };;
// save the result, either tmp[num] or tmp[num]-np[num]
.pred.rel "mutex",p32,p34
{ .mmi; (p32)st8 [rptr]=n1,8
(p34)st8 [rptr]=t0,8
add r19=-4*16,prevsp};;
{ .mmb; (p32)st8 [rptr]=n2,8
(p34)st8 [rptr]=t[7],8
(p5)br.cond.dpnt.few .Ldone };;
{ .mmb; (p32)st8 [rptr]=n3,8
(p34)st8 [rptr]=t[6],8
(p7)br.cond.dpnt.few .Ldone };;
{ .mmb; (p32)st8 [rptr]=n4,8
(p34)st8 [rptr]=t[5],8
(p9)br.cond.dpnt.few .Ldone };;
{ .mmb; (p32)st8 [rptr]=n5,8
(p34)st8 [rptr]=t[4],8
(p11)br.cond.dpnt.few .Ldone };;
{ .mmb; (p32)st8 [rptr]=n6,8
(p34)st8 [rptr]=t[3],8
(p13)br.cond.dpnt.few .Ldone };;
{ .mmb; (p32)st8 [rptr]=n7,8
(p34)st8 [rptr]=t[2],8
(p15)br.cond.dpnt.few .Ldone };;
{ .mmb; (p32)st8 [rptr]=n8,8
(p34)st8 [rptr]=t[1],8
nop.b 0 };;
.Ldone: // epilogue
{ .mmi; ldf.fill f16=[r16],64
ldf.fill f17=[r17],64
nop.i 0 }
{ .mmi; ldf.fill f18=[r18],64
ldf.fill f19=[r19],64
mov pr=prevpr,0x1ffff };;
{ .mmi; ldf.fill f20=[r16]
ldf.fill f21=[r17]
mov ar.lc=prevlc }
{ .mmi; ldf.fill f22=[r18]
ldf.fill f23=[r19]
mov ret0=1 } // signal "handled"
{ .mib; rum 1<<5
.restore sp
mov sp=prevsp
br.ret.sptk.many b0 };;
.endp bn_mul_mont_8#
.type copyright#,\@object
copyright:
stringz "Montgomery multiplication for IA-64, CRYPTOGAMS by <appro\@openssl.org>"
___
$output=shift and open STDOUT,">$output";
print $code;
close STDOUT;

426
crypto/bn/asm/mips-mont.pl Normal file
View File

@@ -0,0 +1,426 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# This module doesn't present direct interest for OpenSSL, because it
# doesn't provide better performance for longer keys, at least not on
# in-order-execution cores. While 512-bit RSA sign operations can be
# 65% faster in 64-bit mode, 1024-bit ones are only 15% faster, and
# 4096-bit ones are up to 15% slower. In 32-bit mode it varies from
# 16% improvement for 512-bit RSA sign to -33% for 4096-bit RSA
# verify:-( All comparisons are against bn_mul_mont-free assembler.
# The module might be of interest to embedded system developers, as
# the code is smaller than 1KB, yet offers >3x improvement on MIPS64
# and 75-30% [less for longer keys] on MIPS32 over compiler-generated
# code.
######################################################################
# There is a number of MIPS ABI in use, O32 and N32/64 are most
# widely used. Then there is a new contender: NUBI. It appears that if
# one picks the latter, it's possible to arrange code in ABI neutral
# manner. Therefore let's stick to NUBI register layout:
#
($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
#
# The return value is placed in $a0. Following coding rules facilitate
# interoperability:
#
# - never ever touch $tp, "thread pointer", former $gp;
# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
# old code];
# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
#
# For reference here is register layout for N32/64 MIPS ABIs:
#
# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
#
$flavour = shift || "o32"; # supported flavours are o32,n32,64,nubi32,nubi64
if ($flavour =~ /64|n32/i) {
$PTR_ADD="dadd"; # incidentally works even on n32
$PTR_SUB="dsub"; # incidentally works even on n32
$REG_S="sd";
$REG_L="ld";
$SZREG=8;
} else {
$PTR_ADD="add";
$PTR_SUB="sub";
$REG_S="sw";
$REG_L="lw";
$SZREG=4;
}
$SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? 0x00fff000 : 0x00ff0000;
#
# <appro@openssl.org>
#
######################################################################
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
if ($flavour =~ /64|n32/i) {
$LD="ld";
$ST="sd";
$MULTU="dmultu";
$ADDU="daddu";
$SUBU="dsubu";
$BNSZ=8;
} else {
$LD="lw";
$ST="sw";
$MULTU="multu";
$ADDU="addu";
$SUBU="subu";
$BNSZ=4;
}
# int bn_mul_mont(
$rp=$a0; # BN_ULONG *rp,
$ap=$a1; # const BN_ULONG *ap,
$bp=$a2; # const BN_ULONG *bp,
$np=$a3; # const BN_ULONG *np,
$n0=$a4; # const BN_ULONG *n0,
$num=$a5; # int num);
$lo0=$a6;
$hi0=$a7;
$lo1=$t1;
$hi1=$t2;
$aj=$s0;
$bi=$s1;
$nj=$s2;
$tp=$s3;
$alo=$s4;
$ahi=$s5;
$nlo=$s6;
$nhi=$s7;
$tj=$s8;
$i=$s9;
$j=$s10;
$m1=$s11;
$FRAMESIZE=14;
$code=<<___;
.text
.set noat
.set noreorder
.align 5
.globl bn_mul_mont
.ent bn_mul_mont
bn_mul_mont:
___
$code.=<<___ if ($flavour =~ /o32/i);
lw $n0,16($sp)
lw $num,20($sp)
___
$code.=<<___;
slt $at,$num,4
bnez $at,1f
li $t0,0
slt $at,$num,17 # on in-order CPU
bnez $at,bn_mul_mont_internal
nop
1: jr $ra
li $a0,0
.end bn_mul_mont
.align 5
.ent bn_mul_mont_internal
bn_mul_mont_internal:
.frame $fp,$FRAMESIZE*$SZREG,$ra
.mask 0x40000000|$SAVED_REGS_MASK,-$SZREG
$PTR_SUB $sp,$FRAMESIZE*$SZREG
$REG_S $fp,($FRAMESIZE-1)*$SZREG($sp)
$REG_S $s11,($FRAMESIZE-2)*$SZREG($sp)
$REG_S $s10,($FRAMESIZE-3)*$SZREG($sp)
$REG_S $s9,($FRAMESIZE-4)*$SZREG($sp)
$REG_S $s8,($FRAMESIZE-5)*$SZREG($sp)
$REG_S $s7,($FRAMESIZE-6)*$SZREG($sp)
$REG_S $s6,($FRAMESIZE-7)*$SZREG($sp)
$REG_S $s5,($FRAMESIZE-8)*$SZREG($sp)
$REG_S $s4,($FRAMESIZE-9)*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
$REG_S $s3,($FRAMESIZE-10)*$SZREG($sp)
$REG_S $s2,($FRAMESIZE-11)*$SZREG($sp)
$REG_S $s1,($FRAMESIZE-12)*$SZREG($sp)
$REG_S $s0,($FRAMESIZE-13)*$SZREG($sp)
___
$code.=<<___;
move $fp,$sp
.set reorder
$LD $n0,0($n0)
$LD $bi,0($bp) # bp[0]
$LD $aj,0($ap) # ap[0]
$LD $nj,0($np) # np[0]
$PTR_SUB $sp,2*$BNSZ # place for two extra words
sll $num,`log($BNSZ)/log(2)`
li $at,-4096
$PTR_SUB $sp,$num
and $sp,$at
$MULTU $aj,$bi
$LD $alo,$BNSZ($ap)
$LD $nlo,$BNSZ($np)
mflo $lo0
mfhi $hi0
$MULTU $lo0,$n0
mflo $m1
$MULTU $alo,$bi
mflo $alo
mfhi $ahi
$MULTU $nj,$m1
mflo $lo1
mfhi $hi1
$MULTU $nlo,$m1
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
mflo $nlo
mfhi $nhi
move $tp,$sp
li $j,2*$BNSZ
.align 4
.L1st:
.set noreorder
$PTR_ADD $aj,$ap,$j
$PTR_ADD $nj,$np,$j
$LD $aj,($aj)
$LD $nj,($nj)
$MULTU $aj,$bi
$ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0
mflo $alo
mfhi $ahi
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$MULTU $nj,$m1
$ADDU $hi1,$at
addu $j,$BNSZ
$ST $lo1,($tp)
sltu $t0,$j,$num
mflo $nlo
mfhi $nhi
bnez $t0,.L1st
$PTR_ADD $tp,$BNSZ
.set reorder
$ADDU $lo0,$alo,$hi0
sltu $at,$lo0,$hi0
$ADDU $hi0,$ahi,$at
$ADDU $lo1,$nlo,$hi1
sltu $t0,$lo1,$hi1
$ADDU $hi1,$nhi,$t0
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
$ST $lo1,($tp)
$ADDU $hi1,$hi0
sltu $at,$hi1,$hi0
$ST $hi1,$BNSZ($tp)
$ST $at,2*$BNSZ($tp)
li $i,$BNSZ
.align 4
.Louter:
$PTR_ADD $bi,$bp,$i
$LD $bi,($bi)
$LD $aj,($ap)
$LD $alo,$BNSZ($ap)
$LD $tj,($sp)
$MULTU $aj,$bi
$LD $nj,($np)
$LD $nlo,$BNSZ($np)
mflo $lo0
mfhi $hi0
$ADDU $lo0,$tj
$MULTU $lo0,$n0
sltu $at,$lo0,$tj
$ADDU $hi0,$at
mflo $m1
$MULTU $alo,$bi
mflo $alo
mfhi $ahi
$MULTU $nj,$m1
mflo $lo1
mfhi $hi1
$MULTU $nlo,$m1
$ADDU $lo1,$lo0
sltu $at,$lo1,$lo0
$ADDU $hi1,$at
mflo $nlo
mfhi $nhi
move $tp,$sp
li $j,2*$BNSZ
$LD $tj,$BNSZ($tp)
.align 4
.Linner:
.set noreorder
$PTR_ADD $aj,$ap,$j
$PTR_ADD $nj,$np,$j
$LD $aj,($aj)
$LD $nj,($nj)
$MULTU $aj,$bi
$ADDU $lo0,$alo,$hi0
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo0,$hi0
sltu $t0,$lo1,$hi1
$ADDU $hi0,$ahi,$at
$ADDU $hi1,$nhi,$t0
mflo $alo
mfhi $ahi
$ADDU $lo0,$tj
addu $j,$BNSZ
$MULTU $nj,$m1
sltu $at,$lo0,$tj
$ADDU $lo1,$lo0
$ADDU $hi0,$at
sltu $t0,$lo1,$lo0
$LD $tj,2*$BNSZ($tp)
$ADDU $hi1,$t0
sltu $at,$j,$num
mflo $nlo
mfhi $nhi
$ST $lo1,($tp)
bnez $at,.Linner
$PTR_ADD $tp,$BNSZ
.set reorder
$ADDU $lo0,$alo,$hi0
sltu $at,$lo0,$hi0
$ADDU $hi0,$ahi,$at
$ADDU $lo0,$tj
sltu $t0,$lo0,$tj
$ADDU $hi0,$t0
$LD $tj,2*$BNSZ($tp)
$ADDU $lo1,$nlo,$hi1
sltu $at,$lo1,$hi1
$ADDU $hi1,$nhi,$at
$ADDU $lo1,$lo0
sltu $t0,$lo1,$lo0
$ADDU $hi1,$t0
$ST $lo1,($tp)
$ADDU $lo1,$hi1,$hi0
sltu $hi1,$lo1,$hi0
$ADDU $lo1,$tj
sltu $at,$lo1,$tj
$ADDU $hi1,$at
$ST $lo1,$BNSZ($tp)
$ST $hi1,2*$BNSZ($tp)
addu $i,$BNSZ
sltu $t0,$i,$num
bnez $t0,.Louter
.set noreorder
$PTR_ADD $tj,$sp,$num # &tp[num]
move $tp,$sp
move $ap,$sp
li $hi0,0 # clear borrow bit
.align 4
.Lsub: $LD $lo0,($tp)
$LD $lo1,($np)
$PTR_ADD $tp,$BNSZ
$PTR_ADD $np,$BNSZ
$SUBU $lo1,$lo0,$lo1 # tp[i]-np[i]
sgtu $at,$lo1,$lo0
$SUBU $lo0,$lo1,$hi0
sgtu $hi0,$lo0,$lo1
$ST $lo0,($rp)
or $hi0,$at
sltu $at,$tp,$tj
bnez $at,.Lsub
$PTR_ADD $rp,$BNSZ
$SUBU $hi0,$hi1,$hi0 # handle upmost overflow bit
move $tp,$sp
$PTR_SUB $rp,$num # restore rp
not $hi1,$hi0
and $ap,$hi0,$sp
and $bp,$hi1,$rp
or $ap,$ap,$bp # ap=borrow?tp:rp
.align 4
.Lcopy: $LD $aj,($ap)
$PTR_ADD $ap,$BNSZ
$ST $zero,($tp)
$PTR_ADD $tp,$BNSZ
sltu $at,$tp,$tj
$ST $aj,($rp)
bnez $at,.Lcopy
$PTR_ADD $rp,$BNSZ
li $a0,1
li $t0,1
.set noreorder
move $sp,$fp
$REG_L $fp,($FRAMESIZE-1)*$SZREG($sp)
$REG_L $s11,($FRAMESIZE-2)*$SZREG($sp)
$REG_L $s10,($FRAMESIZE-3)*$SZREG($sp)
$REG_L $s9,($FRAMESIZE-4)*$SZREG($sp)
$REG_L $s8,($FRAMESIZE-5)*$SZREG($sp)
$REG_L $s7,($FRAMESIZE-6)*$SZREG($sp)
$REG_L $s6,($FRAMESIZE-7)*$SZREG($sp)
$REG_L $s5,($FRAMESIZE-8)*$SZREG($sp)
$REG_L $s4,($FRAMESIZE-9)*$SZREG($sp)
___
$code.=<<___ if ($flavour =~ /nubi/i);
$REG_L $s3,($FRAMESIZE-10)*$SZREG($sp)
$REG_L $s2,($FRAMESIZE-11)*$SZREG($sp)
$REG_L $s1,($FRAMESIZE-12)*$SZREG($sp)
$REG_L $s0,($FRAMESIZE-13)*$SZREG($sp)
___
$code.=<<___;
jr $ra
$PTR_ADD $sp,$FRAMESIZE*$SZREG
.end bn_mul_mont_internal
.rdata
.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro\@openssl.org>"
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;

2234
crypto/bn/asm/mips.pl Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,995 @@
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# On PA-7100LC this module performs ~90-50% better, less for longer
# keys, than code generated by gcc 3.2 for PA-RISC 1.1. Latter means
# that compiler utilized xmpyu instruction to perform 32x32=64-bit
# multiplication, which in turn means that "baseline" performance was
# optimal in respect to instruction set capabilities. Fair comparison
# with vendor compiler is problematic, because OpenSSL doesn't define
# BN_LLONG [presumably] for historical reasons, which drives compiler
# toward 4 times 16x16=32-bit multiplicatons [plus complementary
# shifts and additions] instead. This means that you should observe
# several times improvement over code generated by vendor compiler
# for PA-RISC 1.1, but the "baseline" is far from optimal. The actual
# improvement coefficient was never collected on PA-7100LC, or any
# other 1.1 CPU, because I don't have access to such machine with
# vendor compiler. But to give you a taste, PA-RISC 1.1 code path
# reportedly outperformed code generated by cc +DA1.1 +O3 by factor
# of ~5x on PA-8600.
#
# On PA-RISC 2.0 it has to compete with pa-risc2[W].s, which is
# reportedly ~2x faster than vendor compiler generated code [according
# to comment in pa-risc2[W].s]. Here comes a catch. Execution core of
# this implementation is actually 32-bit one, in the sense that it
# operates on 32-bit values. But pa-risc2[W].s operates on arrays of
# 64-bit BN_LONGs... How do they interoperate then? No problem. This
# module picks halves of 64-bit values in reverse order and pretends
# they were 32-bit BN_LONGs. But can 32-bit core compete with "pure"
# 64-bit code such as pa-risc2[W].s then? Well, the thing is that
# 32x32=64-bit multiplication is the best even PA-RISC 2.0 can do,
# i.e. there is no "wider" multiplication like on most other 64-bit
# platforms. This means that even being effectively 32-bit, this
# implementation performs "64-bit" computational task in same amount
# of arithmetic operations, most notably multiplications. It requires
# more memory references, most notably to tp[num], but this doesn't
# seem to exhaust memory port capacity. And indeed, dedicated PA-RISC
# 2.0 code path provides virtually same performance as pa-risc2[W].s:
# it's ~10% better for shortest key length and ~10% worse for longest
# one.
#
# In case it wasn't clear. The module has two distinct code paths:
# PA-RISC 1.1 and PA-RISC 2.0 ones. Latter features carry-free 64-bit
# additions and 64-bit integer loads, not to mention specific
# instruction scheduling. In 64-bit build naturally only 2.0 code path
# is assembled. In 32-bit application context both code paths are
# assembled, PA-RISC 2.0 CPU is detected at run-time and proper path
# is taken automatically. Also, in 32-bit build the module imposes
# couple of limitations: vector lengths has to be even and vector
# addresses has to be 64-bit aligned. Normally neither is a problem:
# most common key lengths are even and vectors are commonly malloc-ed,
# which ensures alignment.
#
# Special thanks to polarhome.com for providing HP-UX account on
# PA-RISC 1.1 machine, and to correspondent who chose to remain
# anonymous for testing the code on PA-RISC 2.0 machine.
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
$flavour = shift;
$output = shift;
open STDOUT,">$output";
if ($flavour =~ /64/) {
$LEVEL ="2.0W";
$SIZE_T =8;
$FRAME_MARKER =80;
$SAVED_RP =16;
$PUSH ="std";
$PUSHMA ="std,ma";
$POP ="ldd";
$POPMB ="ldd,mb";
$BN_SZ =$SIZE_T;
} else {
$LEVEL ="1.1"; #$LEVEL.="\n\t.ALLOW\t2.0";
$SIZE_T =4;
$FRAME_MARKER =48;
$SAVED_RP =20;
$PUSH ="stw";
$PUSHMA ="stwm";
$POP ="ldw";
$POPMB ="ldwm";
$BN_SZ =$SIZE_T;
if (open CONF,"<${dir}../../opensslconf.h") {
while(<CONF>) {
if (m/#\s*define\s+SIXTY_FOUR_BIT/) {
$BN_SZ=8;
$LEVEL="2.0";
last;
}
}
close CONF;
}
}
$FRAME=8*$SIZE_T+$FRAME_MARKER; # 8 saved regs + frame marker
# [+ argument transfer]
$LOCALS=$FRAME-$FRAME_MARKER;
$FRAME+=32; # local variables
$tp="%r31";
$ti1="%r29";
$ti0="%r28";
$rp="%r26";
$ap="%r25";
$bp="%r24";
$np="%r23";
$n0="%r22"; # passed through stack in 32-bit
$num="%r21"; # passed through stack in 32-bit
$idx="%r20";
$arrsz="%r19";
$nm1="%r7";
$nm0="%r6";
$ab1="%r5";
$ab0="%r4";
$fp="%r3";
$hi1="%r2";
$hi0="%r1";
$xfer=$n0; # accomodates [-16..15] offset in fld[dw]s
$fm0="%fr4"; $fti=$fm0;
$fbi="%fr5L";
$fn0="%fr5R";
$fai="%fr6"; $fab0="%fr7"; $fab1="%fr8";
$fni="%fr9"; $fnm0="%fr10"; $fnm1="%fr11";
$code=<<___;
.LEVEL $LEVEL
.SPACE \$TEXT\$
.SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
.EXPORT bn_mul_mont,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
.ALIGN 64
bn_mul_mont
.PROC
.CALLINFO FRAME=`$FRAME-8*$SIZE_T`,NO_CALLS,SAVE_RP,SAVE_SP,ENTRY_GR=6
.ENTRY
$PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
$PUSHMA %r3,$FRAME(%sp)
$PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
$PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
$PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
$PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
$PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
$PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
$PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
ldo -$FRAME(%sp),$fp
___
$code.=<<___ if ($SIZE_T==4);
ldw `-$FRAME_MARKER-4`($fp),$n0
ldw `-$FRAME_MARKER-8`($fp),$num
nop
nop ; alignment
___
$code.=<<___ if ($BN_SZ==4);
comiclr,<= 6,$num,%r0 ; are vectors long enough?
b L\$abort
ldi 0,%r28 ; signal "unhandled"
add,ev %r0,$num,$num ; is $num even?
b L\$abort
nop
or $ap,$np,$ti1
extru,= $ti1,31,3,%r0 ; are ap and np 64-bit aligned?
b L\$abort
nop
nop ; alignment
nop
fldws 0($n0),${fn0}
fldws,ma 4($bp),${fbi} ; bp[0]
___
$code.=<<___ if ($BN_SZ==8);
comib,> 3,$num,L\$abort ; are vectors long enough?
ldi 0,%r28 ; signal "unhandled"
addl $num,$num,$num ; I operate on 32-bit values
fldws 4($n0),${fn0} ; only low part of n0
fldws 4($bp),${fbi} ; bp[0] in flipped word order
___
$code.=<<___;
fldds 0($ap),${fai} ; ap[0,1]
fldds 0($np),${fni} ; np[0,1]
sh2addl $num,%r0,$arrsz
ldi 31,$hi0
ldo 36($arrsz),$hi1 ; space for tp[num+1]
andcm $hi1,$hi0,$hi1 ; align
addl $hi1,%sp,%sp
$PUSH $fp,-$SIZE_T(%sp)
ldo `$LOCALS+16`($fp),$xfer
ldo `$LOCALS+32+4`($fp),$tp
xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[0]
xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[0]
xmpyu ${fn0},${fab0}R,${fm0}
addl $arrsz,$ap,$ap ; point at the end
addl $arrsz,$np,$np
subi 0,$arrsz,$idx ; j=0
ldo 8($idx),$idx ; j++++
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
fstds ${fab0},-16($xfer)
fstds ${fnm0},-8($xfer)
fstds ${fab1},0($xfer)
fstds ${fnm1},8($xfer)
flddx $idx($ap),${fai} ; ap[2,3]
flddx $idx($np),${fni} ; np[2,3]
___
$code.=<<___ if ($BN_SZ==4);
mtctl $hi0,%cr11 ; $hi0 still holds 31
extrd,u,*= $hi0,%sar,1,$hi0 ; executes on PA-RISC 1.0
b L\$parisc11
nop
___
$code.=<<___; # PA-RISC 2.0 code-path
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
ldd -16($xfer),$ab0
fstds ${fab0},-16($xfer)
extrd,u $ab0,31,32,$hi0
extrd,u $ab0,63,32,$ab0
ldd -8($xfer),$nm0
fstds ${fnm0},-8($xfer)
ldo 8($idx),$idx ; j++++
addl $ab0,$nm0,$nm0 ; low part is discarded
extrd,u $nm0,31,32,$hi1
L\$1st
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
ldd 0($xfer),$ab1
fstds ${fab1},0($xfer)
addl $hi0,$ab1,$ab1
extrd,u $ab1,31,32,$hi0
ldd 8($xfer),$nm1
fstds ${fnm1},8($xfer)
extrd,u $ab1,63,32,$ab1
addl $hi1,$nm1,$nm1
flddx $idx($ap),${fai} ; ap[j,j+1]
flddx $idx($np),${fni} ; np[j,j+1]
addl $ab1,$nm1,$nm1
extrd,u $nm1,31,32,$hi1
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
ldd -16($xfer),$ab0
fstds ${fab0},-16($xfer)
addl $hi0,$ab0,$ab0
extrd,u $ab0,31,32,$hi0
ldd -8($xfer),$nm0
fstds ${fnm0},-8($xfer)
extrd,u $ab0,63,32,$ab0
addl $hi1,$nm0,$nm0
stw $nm1,-4($tp) ; tp[j-1]
addl $ab0,$nm0,$nm0
stw,ma $nm0,8($tp) ; tp[j-1]
addib,<> 8,$idx,L\$1st ; j++++
extrd,u $nm0,31,32,$hi1
xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
ldd 0($xfer),$ab1
fstds ${fab1},0($xfer)
addl $hi0,$ab1,$ab1
extrd,u $ab1,31,32,$hi0
ldd 8($xfer),$nm1
fstds ${fnm1},8($xfer)
extrd,u $ab1,63,32,$ab1
addl $hi1,$nm1,$nm1
ldd -16($xfer),$ab0
addl $ab1,$nm1,$nm1
ldd -8($xfer),$nm0
extrd,u $nm1,31,32,$hi1
addl $hi0,$ab0,$ab0
extrd,u $ab0,31,32,$hi0
stw $nm1,-4($tp) ; tp[j-1]
extrd,u $ab0,63,32,$ab0
addl $hi1,$nm0,$nm0
ldd 0($xfer),$ab1
addl $ab0,$nm0,$nm0
ldd,mb 8($xfer),$nm1
extrd,u $nm0,31,32,$hi1
stw,ma $nm0,8($tp) ; tp[j-1]
ldo -1($num),$num ; i--
subi 0,$arrsz,$idx ; j=0
___
$code.=<<___ if ($BN_SZ==4);
fldws,ma 4($bp),${fbi} ; bp[1]
___
$code.=<<___ if ($BN_SZ==8);
fldws 0($bp),${fbi} ; bp[1] in flipped word order
___
$code.=<<___;
flddx $idx($ap),${fai} ; ap[0,1]
flddx $idx($np),${fni} ; np[0,1]
fldws 8($xfer),${fti}R ; tp[0]
addl $hi0,$ab1,$ab1
extrd,u $ab1,31,32,$hi0
extrd,u $ab1,63,32,$ab1
ldo 8($idx),$idx ; j++++
xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1]
xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1]
addl $hi1,$nm1,$nm1
addl $ab1,$nm1,$nm1
extrd,u $nm1,31,32,$hi1
fstws,mb ${fab0}L,-8($xfer) ; save high part
stw $nm1,-4($tp) ; tp[j-1]
fcpy,sgl %fr0,${fti}L ; zero high part
fcpy,sgl %fr0,${fab0}L
addl $hi1,$hi0,$hi0
extrd,u $hi0,31,32,$hi1
fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
fcnvxf,dbl,dbl ${fab0},${fab0}
stw $hi0,0($tp)
stw $hi1,4($tp)
fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
xmpyu ${fn0},${fab0}R,${fm0}
ldo `$LOCALS+32+4`($fp),$tp
L\$outer
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
fstds ${fab0},-16($xfer) ; 33-bit value
fstds ${fnm0},-8($xfer)
flddx $idx($ap),${fai} ; ap[2]
flddx $idx($np),${fni} ; np[2]
ldo 8($idx),$idx ; j++++
ldd -16($xfer),$ab0 ; 33-bit value
ldd -8($xfer),$nm0
ldw 0($xfer),$hi0 ; high part
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
extrd,u $ab0,31,32,$ti0 ; carry bit
extrd,u $ab0,63,32,$ab0
fstds ${fab1},0($xfer)
addl $ti0,$hi0,$hi0 ; account carry bit
fstds ${fnm1},8($xfer)
addl $ab0,$nm0,$nm0 ; low part is discarded
ldw 0($tp),$ti1 ; tp[1]
extrd,u $nm0,31,32,$hi1
fstds ${fab0},-16($xfer)
fstds ${fnm0},-8($xfer)
L\$inner
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
ldd 0($xfer),$ab1
fstds ${fab1},0($xfer)
addl $hi0,$ti1,$ti1
addl $ti1,$ab1,$ab1
ldd 8($xfer),$nm1
fstds ${fnm1},8($xfer)
extrd,u $ab1,31,32,$hi0
extrd,u $ab1,63,32,$ab1
flddx $idx($ap),${fai} ; ap[j,j+1]
flddx $idx($np),${fni} ; np[j,j+1]
addl $hi1,$nm1,$nm1
addl $ab1,$nm1,$nm1
ldw 4($tp),$ti0 ; tp[j]
stw $nm1,-4($tp) ; tp[j-1]
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
ldd -16($xfer),$ab0
fstds ${fab0},-16($xfer)
addl $hi0,$ti0,$ti0
addl $ti0,$ab0,$ab0
ldd -8($xfer),$nm0
fstds ${fnm0},-8($xfer)
extrd,u $ab0,31,32,$hi0
extrd,u $nm1,31,32,$hi1
ldw 8($tp),$ti1 ; tp[j]
extrd,u $ab0,63,32,$ab0
addl $hi1,$nm0,$nm0
addl $ab0,$nm0,$nm0
stw,ma $nm0,8($tp) ; tp[j-1]
addib,<> 8,$idx,L\$inner ; j++++
extrd,u $nm0,31,32,$hi1
xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
ldd 0($xfer),$ab1
fstds ${fab1},0($xfer)
addl $hi0,$ti1,$ti1
addl $ti1,$ab1,$ab1
ldd 8($xfer),$nm1
fstds ${fnm1},8($xfer)
extrd,u $ab1,31,32,$hi0
extrd,u $ab1,63,32,$ab1
ldw 4($tp),$ti0 ; tp[j]
addl $hi1,$nm1,$nm1
addl $ab1,$nm1,$nm1
ldd -16($xfer),$ab0
ldd -8($xfer),$nm0
extrd,u $nm1,31,32,$hi1
addl $hi0,$ab0,$ab0
addl $ti0,$ab0,$ab0
stw $nm1,-4($tp) ; tp[j-1]
extrd,u $ab0,31,32,$hi0
ldw 8($tp),$ti1 ; tp[j]
extrd,u $ab0,63,32,$ab0
addl $hi1,$nm0,$nm0
ldd 0($xfer),$ab1
addl $ab0,$nm0,$nm0
ldd,mb 8($xfer),$nm1
extrd,u $nm0,31,32,$hi1
stw,ma $nm0,8($tp) ; tp[j-1]
addib,= -1,$num,L\$outerdone ; i--
subi 0,$arrsz,$idx ; j=0
___
$code.=<<___ if ($BN_SZ==4);
fldws,ma 4($bp),${fbi} ; bp[i]
___
$code.=<<___ if ($BN_SZ==8);
ldi 12,$ti0 ; bp[i] in flipped word order
addl,ev %r0,$num,$num
ldi -4,$ti0
addl $ti0,$bp,$bp
fldws 0($bp),${fbi}
___
$code.=<<___;
flddx $idx($ap),${fai} ; ap[0]
addl $hi0,$ab1,$ab1
flddx $idx($np),${fni} ; np[0]
fldws 8($xfer),${fti}R ; tp[0]
addl $ti1,$ab1,$ab1
extrd,u $ab1,31,32,$hi0
extrd,u $ab1,63,32,$ab1
ldo 8($idx),$idx ; j++++
xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i]
xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i]
ldw 4($tp),$ti0 ; tp[j]
addl $hi1,$nm1,$nm1
fstws,mb ${fab0}L,-8($xfer) ; save high part
addl $ab1,$nm1,$nm1
extrd,u $nm1,31,32,$hi1
fcpy,sgl %fr0,${fti}L ; zero high part
fcpy,sgl %fr0,${fab0}L
stw $nm1,-4($tp) ; tp[j-1]
fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
fcnvxf,dbl,dbl ${fab0},${fab0}
addl $hi1,$hi0,$hi0
fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
addl $ti0,$hi0,$hi0
extrd,u $hi0,31,32,$hi1
fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
stw $hi0,0($tp)
stw $hi1,4($tp)
xmpyu ${fn0},${fab0}R,${fm0}
b L\$outer
ldo `$LOCALS+32+4`($fp),$tp
L\$outerdone
addl $hi0,$ab1,$ab1
addl $ti1,$ab1,$ab1
extrd,u $ab1,31,32,$hi0
extrd,u $ab1,63,32,$ab1
ldw 4($tp),$ti0 ; tp[j]
addl $hi1,$nm1,$nm1
addl $ab1,$nm1,$nm1
extrd,u $nm1,31,32,$hi1
stw $nm1,-4($tp) ; tp[j-1]
addl $hi1,$hi0,$hi0
addl $ti0,$hi0,$hi0
extrd,u $hi0,31,32,$hi1
stw $hi0,0($tp)
stw $hi1,4($tp)
ldo `$LOCALS+32`($fp),$tp
sub %r0,%r0,%r0 ; clear borrow
___
$code.=<<___ if ($BN_SZ==4);
ldws,ma 4($tp),$ti0
extru,= $rp,31,3,%r0 ; is rp 64-bit aligned?
b L\$sub_pa11
addl $tp,$arrsz,$tp
L\$sub
ldwx $idx($np),$hi0
subb $ti0,$hi0,$hi1
ldwx $idx($tp),$ti0
addib,<> 4,$idx,L\$sub
stws,ma $hi1,4($rp)
subb $ti0,%r0,$hi1
ldo -4($tp),$tp
___
$code.=<<___ if ($BN_SZ==8);
ldd,ma 8($tp),$ti0
L\$sub
ldd $idx($np),$hi0
shrpd $ti0,$ti0,32,$ti0 ; flip word order
std $ti0,-8($tp) ; save flipped value
sub,db $ti0,$hi0,$hi1
ldd,ma 8($tp),$ti0
addib,<> 8,$idx,L\$sub
std,ma $hi1,8($rp)
extrd,u $ti0,31,32,$ti0 ; carry in flipped word order
sub,db $ti0,%r0,$hi1
ldo -8($tp),$tp
___
$code.=<<___;
and $tp,$hi1,$ap
andcm $rp,$hi1,$bp
or $ap,$bp,$np
sub $rp,$arrsz,$rp ; rewind rp
subi 0,$arrsz,$idx
ldo `$LOCALS+32`($fp),$tp
L\$copy
ldd $idx($np),$hi0
std,ma %r0,8($tp)
addib,<> 8,$idx,.-8 ; L\$copy
std,ma $hi0,8($rp)
___
if ($BN_SZ==4) { # PA-RISC 1.1 code-path
$ablo=$ab0;
$abhi=$ab1;
$nmlo0=$nm0;
$nmhi0=$nm1;
$nmlo1="%r9";
$nmhi1="%r8";
$code.=<<___;
b L\$done
nop
.ALIGN 8
L\$parisc11
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
ldw -12($xfer),$ablo
ldw -16($xfer),$hi0
ldw -4($xfer),$nmlo0
ldw -8($xfer),$nmhi0
fstds ${fab0},-16($xfer)
fstds ${fnm0},-8($xfer)
ldo 8($idx),$idx ; j++++
add $ablo,$nmlo0,$nmlo0 ; discarded
addc %r0,$nmhi0,$hi1
ldw 4($xfer),$ablo
ldw 0($xfer),$abhi
nop
L\$1st_pa11
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[0]
flddx $idx($ap),${fai} ; ap[j,j+1]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
flddx $idx($np),${fni} ; np[j,j+1]
add $hi0,$ablo,$ablo
ldw 12($xfer),$nmlo1
addc %r0,$abhi,$hi0
ldw 8($xfer),$nmhi1
add $ablo,$nmlo1,$nmlo1
fstds ${fab1},0($xfer)
addc %r0,$nmhi1,$nmhi1
fstds ${fnm1},8($xfer)
add $hi1,$nmlo1,$nmlo1
ldw -12($xfer),$ablo
addc %r0,$nmhi1,$hi1
ldw -16($xfer),$abhi
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[0]
ldw -4($xfer),$nmlo0
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
ldw -8($xfer),$nmhi0
add $hi0,$ablo,$ablo
stw $nmlo1,-4($tp) ; tp[j-1]
addc %r0,$abhi,$hi0
fstds ${fab0},-16($xfer)
add $ablo,$nmlo0,$nmlo0
fstds ${fnm0},-8($xfer)
addc %r0,$nmhi0,$nmhi0
ldw 0($xfer),$abhi
add $hi1,$nmlo0,$nmlo0
ldw 4($xfer),$ablo
stws,ma $nmlo0,8($tp) ; tp[j-1]
addib,<> 8,$idx,L\$1st_pa11 ; j++++
addc %r0,$nmhi0,$hi1
ldw 8($xfer),$nmhi1
ldw 12($xfer),$nmlo1
xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[0]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
add $hi0,$ablo,$ablo
fstds ${fab1},0($xfer)
addc %r0,$abhi,$hi0
fstds ${fnm1},8($xfer)
add $ablo,$nmlo1,$nmlo1
ldw -16($xfer),$abhi
addc %r0,$nmhi1,$nmhi1
ldw -12($xfer),$ablo
add $hi1,$nmlo1,$nmlo1
ldw -8($xfer),$nmhi0
addc %r0,$nmhi1,$hi1
ldw -4($xfer),$nmlo0
add $hi0,$ablo,$ablo
stw $nmlo1,-4($tp) ; tp[j-1]
addc %r0,$abhi,$hi0
ldw 0($xfer),$abhi
add $ablo,$nmlo0,$nmlo0
ldw 4($xfer),$ablo
addc %r0,$nmhi0,$nmhi0
ldws,mb 8($xfer),$nmhi1
add $hi1,$nmlo0,$nmlo0
ldw 4($xfer),$nmlo1
addc %r0,$nmhi0,$hi1
stws,ma $nmlo0,8($tp) ; tp[j-1]
ldo -1($num),$num ; i--
subi 0,$arrsz,$idx ; j=0
fldws,ma 4($bp),${fbi} ; bp[1]
flddx $idx($ap),${fai} ; ap[0,1]
flddx $idx($np),${fni} ; np[0,1]
fldws 8($xfer),${fti}R ; tp[0]
add $hi0,$ablo,$ablo
addc %r0,$abhi,$hi0
ldo 8($idx),$idx ; j++++
xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[1]
xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[1]
add $hi1,$nmlo1,$nmlo1
addc %r0,$nmhi1,$nmhi1
add $ablo,$nmlo1,$nmlo1
addc %r0,$nmhi1,$hi1
fstws,mb ${fab0}L,-8($xfer) ; save high part
stw $nmlo1,-4($tp) ; tp[j-1]
fcpy,sgl %fr0,${fti}L ; zero high part
fcpy,sgl %fr0,${fab0}L
add $hi1,$hi0,$hi0
addc %r0,%r0,$hi1
fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
fcnvxf,dbl,dbl ${fab0},${fab0}
stw $hi0,0($tp)
stw $hi1,4($tp)
fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
xmpyu ${fn0},${fab0}R,${fm0}
ldo `$LOCALS+32+4`($fp),$tp
L\$outer_pa11
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[0]*m
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[1]*m
fstds ${fab0},-16($xfer) ; 33-bit value
fstds ${fnm0},-8($xfer)
flddx $idx($ap),${fai} ; ap[2,3]
flddx $idx($np),${fni} ; np[2,3]
ldw -16($xfer),$abhi ; carry bit actually
ldo 8($idx),$idx ; j++++
ldw -12($xfer),$ablo
ldw -8($xfer),$nmhi0
ldw -4($xfer),$nmlo0
ldw 0($xfer),$hi0 ; high part
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
fstds ${fab1},0($xfer)
addl $abhi,$hi0,$hi0 ; account carry bit
fstds ${fnm1},8($xfer)
add $ablo,$nmlo0,$nmlo0 ; discarded
ldw 0($tp),$ti1 ; tp[1]
addc %r0,$nmhi0,$hi1
fstds ${fab0},-16($xfer)
fstds ${fnm0},-8($xfer)
ldw 4($xfer),$ablo
ldw 0($xfer),$abhi
L\$inner_pa11
xmpyu ${fai}R,${fbi},${fab1} ; ap[j+1]*bp[i]
flddx $idx($ap),${fai} ; ap[j,j+1]
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j+1]*m
flddx $idx($np),${fni} ; np[j,j+1]
add $hi0,$ablo,$ablo
ldw 4($tp),$ti0 ; tp[j]
addc %r0,$abhi,$abhi
ldw 12($xfer),$nmlo1
add $ti1,$ablo,$ablo
ldw 8($xfer),$nmhi1
addc %r0,$abhi,$hi0
fstds ${fab1},0($xfer)
add $ablo,$nmlo1,$nmlo1
fstds ${fnm1},8($xfer)
addc %r0,$nmhi1,$nmhi1
ldw -12($xfer),$ablo
add $hi1,$nmlo1,$nmlo1
ldw -16($xfer),$abhi
addc %r0,$nmhi1,$hi1
xmpyu ${fai}L,${fbi},${fab0} ; ap[j]*bp[i]
ldw 8($tp),$ti1 ; tp[j]
xmpyu ${fni}L,${fm0}R,${fnm0} ; np[j]*m
ldw -4($xfer),$nmlo0
add $hi0,$ablo,$ablo
ldw -8($xfer),$nmhi0
addc %r0,$abhi,$abhi
stw $nmlo1,-4($tp) ; tp[j-1]
add $ti0,$ablo,$ablo
fstds ${fab0},-16($xfer)
addc %r0,$abhi,$hi0
fstds ${fnm0},-8($xfer)
add $ablo,$nmlo0,$nmlo0
ldw 4($xfer),$ablo
addc %r0,$nmhi0,$nmhi0
ldw 0($xfer),$abhi
add $hi1,$nmlo0,$nmlo0
stws,ma $nmlo0,8($tp) ; tp[j-1]
addib,<> 8,$idx,L\$inner_pa11 ; j++++
addc %r0,$nmhi0,$hi1
xmpyu ${fai}R,${fbi},${fab1} ; ap[j]*bp[i]
ldw 12($xfer),$nmlo1
xmpyu ${fni}R,${fm0}R,${fnm1} ; np[j]*m
ldw 8($xfer),$nmhi1
add $hi0,$ablo,$ablo
ldw 4($tp),$ti0 ; tp[j]
addc %r0,$abhi,$abhi
fstds ${fab1},0($xfer)
add $ti1,$ablo,$ablo
fstds ${fnm1},8($xfer)
addc %r0,$abhi,$hi0
ldw -16($xfer),$abhi
add $ablo,$nmlo1,$nmlo1
ldw -12($xfer),$ablo
addc %r0,$nmhi1,$nmhi1
ldw -8($xfer),$nmhi0
add $hi1,$nmlo1,$nmlo1
ldw -4($xfer),$nmlo0
addc %r0,$nmhi1,$hi1
add $hi0,$ablo,$ablo
stw $nmlo1,-4($tp) ; tp[j-1]
addc %r0,$abhi,$abhi
add $ti0,$ablo,$ablo
ldw 8($tp),$ti1 ; tp[j]
addc %r0,$abhi,$hi0
ldw 0($xfer),$abhi
add $ablo,$nmlo0,$nmlo0
ldw 4($xfer),$ablo
addc %r0,$nmhi0,$nmhi0
ldws,mb 8($xfer),$nmhi1
add $hi1,$nmlo0,$nmlo0
ldw 4($xfer),$nmlo1
addc %r0,$nmhi0,$hi1
stws,ma $nmlo0,8($tp) ; tp[j-1]
addib,= -1,$num,L\$outerdone_pa11; i--
subi 0,$arrsz,$idx ; j=0
fldws,ma 4($bp),${fbi} ; bp[i]
flddx $idx($ap),${fai} ; ap[0]
add $hi0,$ablo,$ablo
addc %r0,$abhi,$abhi
flddx $idx($np),${fni} ; np[0]
fldws 8($xfer),${fti}R ; tp[0]
add $ti1,$ablo,$ablo
addc %r0,$abhi,$hi0
ldo 8($idx),$idx ; j++++
xmpyu ${fai}L,${fbi},${fab0} ; ap[0]*bp[i]
xmpyu ${fai}R,${fbi},${fab1} ; ap[1]*bp[i]
ldw 4($tp),$ti0 ; tp[j]
add $hi1,$nmlo1,$nmlo1
addc %r0,$nmhi1,$nmhi1
fstws,mb ${fab0}L,-8($xfer) ; save high part
add $ablo,$nmlo1,$nmlo1
addc %r0,$nmhi1,$hi1
fcpy,sgl %fr0,${fti}L ; zero high part
fcpy,sgl %fr0,${fab0}L
stw $nmlo1,-4($tp) ; tp[j-1]
fcnvxf,dbl,dbl ${fti},${fti} ; 32-bit unsigned int -> double
fcnvxf,dbl,dbl ${fab0},${fab0}
add $hi1,$hi0,$hi0
addc %r0,%r0,$hi1
fadd,dbl ${fti},${fab0},${fab0} ; add tp[0]
add $ti0,$hi0,$hi0
addc %r0,$hi1,$hi1
fcnvfx,dbl,dbl ${fab0},${fab0} ; double -> 33-bit unsigned int
stw $hi0,0($tp)
stw $hi1,4($tp)
xmpyu ${fn0},${fab0}R,${fm0}
b L\$outer_pa11
ldo `$LOCALS+32+4`($fp),$tp
L\$outerdone_pa11
add $hi0,$ablo,$ablo
addc %r0,$abhi,$abhi
add $ti1,$ablo,$ablo
addc %r0,$abhi,$hi0
ldw 4($tp),$ti0 ; tp[j]
add $hi1,$nmlo1,$nmlo1
addc %r0,$nmhi1,$nmhi1
add $ablo,$nmlo1,$nmlo1
addc %r0,$nmhi1,$hi1
stw $nmlo1,-4($tp) ; tp[j-1]
add $hi1,$hi0,$hi0
addc %r0,%r0,$hi1
add $ti0,$hi0,$hi0
addc %r0,$hi1,$hi1
stw $hi0,0($tp)
stw $hi1,4($tp)
ldo `$LOCALS+32+4`($fp),$tp
sub %r0,%r0,%r0 ; clear borrow
ldw -4($tp),$ti0
addl $tp,$arrsz,$tp
L\$sub_pa11
ldwx $idx($np),$hi0
subb $ti0,$hi0,$hi1
ldwx $idx($tp),$ti0
addib,<> 4,$idx,L\$sub_pa11
stws,ma $hi1,4($rp)
subb $ti0,%r0,$hi1
ldo -4($tp),$tp
and $tp,$hi1,$ap
andcm $rp,$hi1,$bp
or $ap,$bp,$np
sub $rp,$arrsz,$rp ; rewind rp
subi 0,$arrsz,$idx
ldo `$LOCALS+32`($fp),$tp
L\$copy_pa11
ldwx $idx($np),$hi0
stws,ma %r0,4($tp)
addib,<> 4,$idx,L\$copy_pa11
stws,ma $hi0,4($rp)
nop ; alignment
L\$done
___
}
$code.=<<___;
ldi 1,%r28 ; signal "handled"
ldo $FRAME($fp),%sp ; destroy tp[num+1]
$POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
$POP `-$FRAME+1*$SIZE_T`(%sp),%r4
$POP `-$FRAME+2*$SIZE_T`(%sp),%r5
$POP `-$FRAME+3*$SIZE_T`(%sp),%r6
$POP `-$FRAME+4*$SIZE_T`(%sp),%r7
$POP `-$FRAME+5*$SIZE_T`(%sp),%r8
$POP `-$FRAME+6*$SIZE_T`(%sp),%r9
$POP `-$FRAME+7*$SIZE_T`(%sp),%r10
L\$abort
bv (%r2)
.EXIT
$POPMB -$FRAME(%sp),%r3
.PROCEND
.STRINGZ "Montgomery Multiplication for PA-RISC, CRYPTOGAMS by <appro\@openssl.org>"
___
# Explicitly encode PA-RISC 2.0 instructions used in this module, so
# that it can be compiled with .LEVEL 1.0. It should be noted that I
# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
# directive...
my $ldd = sub {
my ($mod,$args) = @_;
my $orig = "ldd$mod\t$args";
if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
{ my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
{ my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
$opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
$opcode|=(1<<5) if ($mod =~ /^,m/);
$opcode|=(1<<13) if ($mod =~ /^,mb/);
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
else { "\t".$orig; }
};
my $std = sub {
my ($mod,$args) = @_;
my $orig = "std$mod\t$args";
if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 6
{ my $opcode=(0x03<<26)|($3<<21)|($1<<16)|(1<<12)|(0xB<<6);
$opcode|=(($2&0xF)<<1)|(($2&0x10)>>4); # encode offset
$opcode|=(1<<5) if ($mod =~ /^,m/);
$opcode|=(1<<13) if ($mod =~ /^,mb/);
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
else { "\t".$orig; }
};
my $extrd = sub {
my ($mod,$args) = @_;
my $orig = "extrd$mod\t$args";
# I only have ",u" completer, it's implicitly encoded...
if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
{ my $opcode=(0x36<<26)|($1<<21)|($4<<16);
my $len=32-$3;
$opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
$opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
{ my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
my $len=32-$2;
$opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
$opcode |= (1<<13) if ($mod =~ /,\**=/);
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
else { "\t".$orig; }
};
my $shrpd = sub {
my ($mod,$args) = @_;
my $orig = "shrpd$mod\t$args";
if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
{ my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
my $cpos=63-$3;
$opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
}
else { "\t".$orig; }
};
my $sub = sub {
my ($mod,$args) = @_;
my $orig = "sub$mod\t$args";
if ($mod eq ",db" && $args =~ /%r([0-9]+),%r([0-9]+),%r([0-9]+)/) {
my $opcode=(0x02<<26)|($2<<21)|($1<<16)|$3;
$opcode|=(1<<10); # e1
$opcode|=(1<<8); # e2
$opcode|=(1<<5); # d
sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig
}
else { "\t".$orig; }
};
sub assemble {
my ($mnemonic,$mod,$args)=@_;
my $opcode = eval("\$$mnemonic");
ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
}
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
# flip word order in 64-bit mode...
s/(xmpyu\s+)($fai|$fni)([LR])/$1.$2.($3 eq "L"?"R":"L")/e if ($BN_SZ==8);
# assemble 2.0 instructions in 32-bit mode...
s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e if ($BN_SZ==4);
s/\bbv\b/bve/gm if ($SIZE_T==8);
print $_,"\n";
}
close STDOUT;

View File

@@ -31,7 +31,6 @@ if ($flavour =~ /32/) {
$BNSZ= $BITS/8;
$SIZE_T=4;
$RZONE= 224;
$FRAME= $SIZE_T*16;
$LD= "lwz"; # load
$LDU= "lwzu"; # load and update
@@ -51,7 +50,6 @@ if ($flavour =~ /32/) {
$BNSZ= $BITS/8;
$SIZE_T=8;
$RZONE= 288;
$FRAME= $SIZE_T*16;
# same as above, but 64-bit mnemonics...
$LD= "ld"; # load
@@ -69,6 +67,9 @@ if ($flavour =~ /32/) {
$POP= $LD;
} else { die "nonsense $flavour"; }
$FRAME=8*$SIZE_T+$RZONE;
$LOCALS=8*$SIZE_T;
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
@@ -89,18 +90,18 @@ $aj="r10";
$nj="r11";
$tj="r12";
# non-volatile registers
$i="r14";
$j="r15";
$tp="r16";
$m0="r17";
$m1="r18";
$lo0="r19";
$hi0="r20";
$lo1="r21";
$hi1="r22";
$alo="r23";
$ahi="r24";
$nlo="r25";
$i="r20";
$j="r21";
$tp="r22";
$m0="r23";
$m1="r24";
$lo0="r25";
$hi0="r26";
$lo1="r27";
$hi1="r28";
$alo="r29";
$ahi="r30";
$nlo="r31";
#
$nhi="r0";
@@ -108,42 +109,48 @@ $code=<<___;
.machine "any"
.text
.globl .bn_mul_mont
.globl .bn_mul_mont_int
.align 4
.bn_mul_mont:
.bn_mul_mont_int:
cmpwi $num,4
mr $rp,r3 ; $rp is reassigned
li r3,0
bltlr
___
$code.=<<___ if ($BNSZ==4);
cmpwi $num,32 ; longer key performance is not better
bgelr
___
$code.=<<___;
slwi $num,$num,`log($BNSZ)/log(2)`
li $tj,-4096
addi $ovf,$num,`$FRAME+$RZONE`
addi $ovf,$num,$FRAME
subf $ovf,$ovf,$sp ; $sp-$ovf
and $ovf,$ovf,$tj ; minimize TLB usage
subf $ovf,$sp,$ovf ; $ovf-$sp
mr $tj,$sp
srwi $num,$num,`log($BNSZ)/log(2)`
$STUX $sp,$sp,$ovf
$PUSH r14,`4*$SIZE_T`($sp)
$PUSH r15,`5*$SIZE_T`($sp)
$PUSH r16,`6*$SIZE_T`($sp)
$PUSH r17,`7*$SIZE_T`($sp)
$PUSH r18,`8*$SIZE_T`($sp)
$PUSH r19,`9*$SIZE_T`($sp)
$PUSH r20,`10*$SIZE_T`($sp)
$PUSH r21,`11*$SIZE_T`($sp)
$PUSH r22,`12*$SIZE_T`($sp)
$PUSH r23,`13*$SIZE_T`($sp)
$PUSH r24,`14*$SIZE_T`($sp)
$PUSH r25,`15*$SIZE_T`($sp)
$PUSH r20,`-12*$SIZE_T`($tj)
$PUSH r21,`-11*$SIZE_T`($tj)
$PUSH r22,`-10*$SIZE_T`($tj)
$PUSH r23,`-9*$SIZE_T`($tj)
$PUSH r24,`-8*$SIZE_T`($tj)
$PUSH r25,`-7*$SIZE_T`($tj)
$PUSH r26,`-6*$SIZE_T`($tj)
$PUSH r27,`-5*$SIZE_T`($tj)
$PUSH r28,`-4*$SIZE_T`($tj)
$PUSH r29,`-3*$SIZE_T`($tj)
$PUSH r30,`-2*$SIZE_T`($tj)
$PUSH r31,`-1*$SIZE_T`($tj)
$LD $n0,0($n0) ; pull n0[0] value
addi $num,$num,-2 ; adjust $num for counter register
$LD $m0,0($bp) ; m0=bp[0]
$LD $aj,0($ap) ; ap[0]
addi $tp,$sp,$FRAME
addi $tp,$sp,$LOCALS
$UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
$UMULH $hi0,$aj,$m0
@@ -205,8 +212,8 @@ L1st:
Louter:
$LDX $m0,$bp,$i ; m0=bp[i]
$LD $aj,0($ap) ; ap[0]
addi $tp,$sp,$FRAME
$LD $tj,$FRAME($sp) ; tp[0]
addi $tp,$sp,$LOCALS
$LD $tj,$LOCALS($sp); tp[0]
$UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
$UMULH $hi0,$aj,$m0
$LD $aj,$BNSZ($ap) ; ap[1]
@@ -273,7 +280,7 @@ Linner:
addi $num,$num,2 ; restore $num
subfc $j,$j,$j ; j=0 and "clear" XER[CA]
addi $tp,$sp,$FRAME
addi $tp,$sp,$LOCALS
mtctr $num
.align 4
@@ -299,23 +306,28 @@ Lcopy: ; copy or in-place refresh
addi $j,$j,$BNSZ
bdnz- Lcopy
$POP r14,`4*$SIZE_T`($sp)
$POP r15,`5*$SIZE_T`($sp)
$POP r16,`6*$SIZE_T`($sp)
$POP r17,`7*$SIZE_T`($sp)
$POP r18,`8*$SIZE_T`($sp)
$POP r19,`9*$SIZE_T`($sp)
$POP r20,`10*$SIZE_T`($sp)
$POP r21,`11*$SIZE_T`($sp)
$POP r22,`12*$SIZE_T`($sp)
$POP r23,`13*$SIZE_T`($sp)
$POP r24,`14*$SIZE_T`($sp)
$POP r25,`15*$SIZE_T`($sp)
$POP $sp,0($sp)
$POP $tj,0($sp)
li r3,1
$POP r20,`-12*$SIZE_T`($tj)
$POP r21,`-11*$SIZE_T`($tj)
$POP r22,`-10*$SIZE_T`($tj)
$POP r23,`-9*$SIZE_T`($tj)
$POP r24,`-8*$SIZE_T`($tj)
$POP r25,`-7*$SIZE_T`($tj)
$POP r26,`-6*$SIZE_T`($tj)
$POP r27,`-5*$SIZE_T`($tj)
$POP r28,`-4*$SIZE_T`($tj)
$POP r29,`-3*$SIZE_T`($tj)
$POP r30,`-2*$SIZE_T`($tj)
$POP r31,`-1*$SIZE_T`($tj)
mr $sp,$tj
blr
.long 0
.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
.byte 0,12,4,0,0x80,12,6,0
.long 0
.size .bn_mul_mont_int,.-.bn_mul_mont_int
.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@openssl.org>"
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;

View File

@@ -389,7 +389,10 @@ $data=<<EOF;
$ST r9,`6*$BNSZ`(r3) #r[6]=c1
$ST r10,`7*$BNSZ`(r3) #r[7]=c2
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .bn_sqr_comba4,.-.bn_sqr_comba4
#
# NOTE: The following label name should be changed to
@@ -814,8 +817,10 @@ $data=<<EOF;
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .bn_sqr_comba8,.-.bn_sqr_comba8
#
# NOTE: The following label name should be changed to
@@ -949,7 +954,7 @@ $data=<<EOF;
addze r11,r0
#mul_add_c(a[3],b[2],c3,c1,c2);
$LD r6,`3*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r4)
$LD r7,`2*$BNSZ`(r5)
$UMULL r8,r6,r7
$UMULH r9,r6,r7
addc r12,r8,r12
@@ -966,7 +971,10 @@ $data=<<EOF;
$ST r10,`6*$BNSZ`(r3) #r[6]=c1
$ST r11,`7*$BNSZ`(r3) #r[7]=c2
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size .bn_mul_comba4,.-.bn_mul_comba4
#
# NOTE: The following label name should be changed to
@@ -1502,7 +1510,10 @@ $data=<<EOF;
$ST r12,`14*$BNSZ`(r3) #r[14]=c3;
$ST r10,`15*$BNSZ`(r3) #r[15]=c1;
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size .bn_mul_comba8,.-.bn_mul_comba8
#
# NOTE: The following label name should be changed to
@@ -1550,8 +1561,10 @@ Lppcasm_sub_adios:
subfze r3,r0 # if carry bit is set then r3 = 0 else -1
andi. r3,r3,1 # keep only last bit.
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.size .bn_sub_words,.-.bn_sub_words
#
# NOTE: The following label name should be changed to
@@ -1594,7 +1607,10 @@ Lppcasm_add_mainloop:
Lppcasm_add_adios:
addze r3,r0 #return carry bit.
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.size .bn_add_words,.-.bn_add_words
#
# NOTE: The following label name should be changed to
@@ -1707,7 +1723,10 @@ Lppcasm_div8:
Lppcasm_div9:
or r3,r8,r0
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size .bn_div_words,.-.bn_div_words
#
# NOTE: The following label name should be changed to
@@ -1746,8 +1765,10 @@ Lppcasm_sqr_mainloop:
bdnz- Lppcasm_sqr_mainloop
Lppcasm_sqr_adios:
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size .bn_sqr_words,.-.bn_sqr_words
#
# NOTE: The following label name should be changed to
@@ -1850,7 +1871,10 @@ Lppcasm_mw_REM:
Lppcasm_mw_OVER:
addi r3,r12,0
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.size bn_mul_words,.-bn_mul_words
#
# NOTE: The following label name should be changed to
@@ -1973,7 +1997,10 @@ Lppcasm_maw_leftover:
Lppcasm_maw_adios:
addi r3,r12,0
blr
.long 0x00000000
.long 0
.byte 0,12,0x14,0,0,0,4,0
.long 0
.size .bn_mul_add_words,.-.bn_mul_add_words
.align 4
EOF
$data =~ s/\`([^\`]*)\`/eval $1/gem;

File diff suppressed because it is too large Load Diff

1898
crypto/bn/asm/rsaz-avx2.pl Executable file

File diff suppressed because it is too large Load Diff

2144
crypto/bn/asm/rsaz-x86_64.pl Executable file

File diff suppressed because it is too large Load Diff

221
crypto/bn/asm/s390x-gf2m.pl Normal file
View File

@@ -0,0 +1,221 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# May 2011
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... gcc 4.3 appeared to generate poor code, therefore
# the effort. And indeed, the module delivers 55%-90%(*) improvement
# on haviest ECDSA verify and ECDH benchmarks for 163- and 571-bit
# key lengths on z990, 30%-55%(*) - on z10, and 70%-110%(*) - on z196.
# This is for 64-bit build. In 32-bit "highgprs" case improvement is
# even higher, for example on z990 it was measured 80%-150%. ECDSA
# sign is modest 9%-12% faster. Keep in mind that these coefficients
# are not ones for bn_GF2m_mul_2x2 itself, as not all CPU time is
# burnt in it...
#
# (*) gcc 4.1 was observed to deliver better results than gcc 4.3,
# so that improvement coefficients can vary from one specific
# setup to another.
$flavour = shift;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
$g="";
} else {
$SIZE_T=8;
$g="g";
}
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;
$rp="%r2";
$a1="%r3";
$a0="%r4";
$b1="%r5";
$b0="%r6";
$ra="%r14";
$sp="%r15";
@T=("%r0","%r1");
@i=("%r12","%r13");
($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(6..11));
($lo,$hi,$b)=map("%r$_",(3..5)); $a=$lo; $mask=$a8;
$code.=<<___;
.text
.type _mul_1x1,\@function
.align 16
_mul_1x1:
lgr $a1,$a
sllg $a2,$a,1
sllg $a4,$a,2
sllg $a8,$a,3
srag $lo,$a1,63 # broadcast 63rd bit
nihh $a1,0x1fff
srag @i[0],$a2,63 # broadcast 62nd bit
nihh $a2,0x3fff
srag @i[1],$a4,63 # broadcast 61st bit
nihh $a4,0x7fff
ngr $lo,$b
ngr @i[0],$b
ngr @i[1],$b
lghi @T[0],0
lgr $a12,$a1
stg @T[0],`$stdframe+0*8`($sp) # tab[0]=0
xgr $a12,$a2
stg $a1,`$stdframe+1*8`($sp) # tab[1]=a1
lgr $a48,$a4
stg $a2,`$stdframe+2*8`($sp) # tab[2]=a2
xgr $a48,$a8
stg $a12,`$stdframe+3*8`($sp) # tab[3]=a1^a2
xgr $a1,$a4
stg $a4,`$stdframe+4*8`($sp) # tab[4]=a4
xgr $a2,$a4
stg $a1,`$stdframe+5*8`($sp) # tab[5]=a1^a4
xgr $a12,$a4
stg $a2,`$stdframe+6*8`($sp) # tab[6]=a2^a4
xgr $a1,$a48
stg $a12,`$stdframe+7*8`($sp) # tab[7]=a1^a2^a4
xgr $a2,$a48
stg $a8,`$stdframe+8*8`($sp) # tab[8]=a8
xgr $a12,$a48
stg $a1,`$stdframe+9*8`($sp) # tab[9]=a1^a8
xgr $a1,$a4
stg $a2,`$stdframe+10*8`($sp) # tab[10]=a2^a8
xgr $a2,$a4
stg $a12,`$stdframe+11*8`($sp) # tab[11]=a1^a2^a8
xgr $a12,$a4
stg $a48,`$stdframe+12*8`($sp) # tab[12]=a4^a8
srlg $hi,$lo,1
stg $a1,`$stdframe+13*8`($sp) # tab[13]=a1^a4^a8
sllg $lo,$lo,63
stg $a2,`$stdframe+14*8`($sp) # tab[14]=a2^a4^a8
srlg @T[0],@i[0],2
stg $a12,`$stdframe+15*8`($sp) # tab[15]=a1^a2^a4^a8
lghi $mask,`0xf<<3`
sllg $a1,@i[0],62
sllg @i[0],$b,3
srlg @T[1],@i[1],3
ngr @i[0],$mask
sllg $a2,@i[1],61
srlg @i[1],$b,4-3
xgr $hi,@T[0]
ngr @i[1],$mask
xgr $lo,$a1
xgr $hi,@T[1]
xgr $lo,$a2
xg $lo,$stdframe(@i[0],$sp)
srlg @i[0],$b,8-3
ngr @i[0],$mask
___
for($n=1;$n<14;$n++) {
$code.=<<___;
lg @T[1],$stdframe(@i[1],$sp)
srlg @i[1],$b,`($n+2)*4`-3
sllg @T[0],@T[1],`$n*4`
ngr @i[1],$mask
srlg @T[1],@T[1],`64-$n*4`
xgr $lo,@T[0]
xgr $hi,@T[1]
___
push(@i,shift(@i)); push(@T,shift(@T));
}
$code.=<<___;
lg @T[1],$stdframe(@i[1],$sp)
sllg @T[0],@T[1],`$n*4`
srlg @T[1],@T[1],`64-$n*4`
xgr $lo,@T[0]
xgr $hi,@T[1]
lg @T[0],$stdframe(@i[0],$sp)
sllg @T[1],@T[0],`($n+1)*4`
srlg @T[0],@T[0],`64-($n+1)*4`
xgr $lo,@T[1]
xgr $hi,@T[0]
br $ra
.size _mul_1x1,.-_mul_1x1
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,\@function
.align 16
bn_GF2m_mul_2x2:
stm${g} %r3,%r15,3*$SIZE_T($sp)
lghi %r1,-$stdframe-128
la %r0,0($sp)
la $sp,0(%r1,$sp) # alloca
st${g} %r0,0($sp) # back chain
___
if ($SIZE_T==8) {
my @r=map("%r$_",(6..9));
$code.=<<___;
bras $ra,_mul_1x1 # a1·b1
stmg $lo,$hi,16($rp)
lg $a,`$stdframe+128+4*$SIZE_T`($sp)
lg $b,`$stdframe+128+6*$SIZE_T`($sp)
bras $ra,_mul_1x1 # a0·b0
stmg $lo,$hi,0($rp)
lg $a,`$stdframe+128+3*$SIZE_T`($sp)
lg $b,`$stdframe+128+5*$SIZE_T`($sp)
xg $a,`$stdframe+128+4*$SIZE_T`($sp)
xg $b,`$stdframe+128+6*$SIZE_T`($sp)
bras $ra,_mul_1x1 # (a0+a1)·(b0+b1)
lmg @r[0],@r[3],0($rp)
xgr $lo,$hi
xgr $hi,@r[1]
xgr $lo,@r[0]
xgr $hi,@r[2]
xgr $lo,@r[3]
xgr $hi,@r[3]
xgr $lo,$hi
stg $hi,16($rp)
stg $lo,8($rp)
___
} else {
$code.=<<___;
sllg %r3,%r3,32
sllg %r5,%r5,32
or %r3,%r4
or %r5,%r6
bras $ra,_mul_1x1
rllg $lo,$lo,32
rllg $hi,$hi,32
stmg $lo,$hi,0($rp)
___
}
$code.=<<___;
lm${g} %r6,%r15,`$stdframe+128+6*$SIZE_T`($sp)
br $ra
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.string "GF(2^m) Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code;
close STDOUT;

View File

@@ -32,6 +32,33 @@
# Reschedule to minimize/avoid Address Generation Interlock hazard,
# make inner loops counter-based.
# November 2010.
#
# Adapt for -m31 build. If kernel supports what's called "highgprs"
# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
# instructions and achieve "64-bit" performance even in 31-bit legacy
# application context. The feature is not specific to any particular
# processor, as long as it's "z-CPU". Latter implies that the code
# remains z/Architecture specific. Compatibility with 32-bit BN_ULONG
# is achieved by swapping words after 64-bit loads, follow _dswap-s.
# On z990 it was measured to perform 2.6-2.2 times better than
# compiler-generated code, less for longer keys...
$flavour = shift;
if ($flavour =~ /3[12]/) {
$SIZE_T=4;
$g="";
} else {
$SIZE_T=8;
$g="g";
}
while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
open STDOUT,">$output";
$stdframe=16*$SIZE_T+4*8;
$mn0="%r0";
$num="%r1";
@@ -60,34 +87,44 @@ $code.=<<___;
.globl bn_mul_mont
.type bn_mul_mont,\@function
bn_mul_mont:
lgf $num,164($sp) # pull $num
sla $num,3 # $num to enumerate bytes
lgf $num,`$stdframe+$SIZE_T-4`($sp) # pull $num
sla $num,`log($SIZE_T)/log(2)` # $num to enumerate bytes
la $bp,0($num,$bp)
stg %r2,16($sp)
st${g} %r2,2*$SIZE_T($sp)
cghi $num,16 #
lghi %r2,0 #
blr %r14 # if($num<16) return 0;
cghi $num,128 #
bhr %r14 # if($num>128) return 0;
___
$code.=<<___ if ($flavour =~ /3[12]/);
tmll $num,4
bnzr %r14 # if ($num&1) return 0;
___
$code.=<<___ if ($flavour !~ /3[12]/);
cghi $num,96 #
bhr %r14 # if($num>96) return 0;
___
$code.=<<___;
stm${g} %r3,%r15,3*$SIZE_T($sp)
stmg %r3,%r15,24($sp)
lghi $rp,-160-8 # leave room for carry bit
lghi $rp,-$stdframe-8 # leave room for carry bit
lcgr $j,$num # -$num
lgr %r0,$sp
la $rp,0($rp,$sp)
la $sp,0($j,$rp) # alloca
stg %r0,0($sp) # back chain
st${g} %r0,0($sp) # back chain
sra $num,3 # restore $num
la $bp,0($j,$bp) # restore $bp
ahi $num,-1 # adjust $num for inner loop
lg $n0,0($n0) # pull n0
_dswap $n0
lg $bi,0($bp)
_dswap $bi
lg $alo,0($ap)
_dswap $alo
mlgr $ahi,$bi # ap[0]*bp[0]
lgr $AHI,$ahi
@@ -95,6 +132,7 @@ bn_mul_mont:
msgr $mn0,$n0
lg $nlo,0($np) #
_dswap $nlo
mlgr $nhi,$mn0 # np[0]*m1
algr $nlo,$alo # +="tp[0]"
lghi $NHI,0
@@ -106,12 +144,14 @@ bn_mul_mont:
.align 16
.L1st:
lg $alo,0($j,$ap)
_dswap $alo
mlgr $ahi,$bi # ap[j]*bp[0]
algr $alo,$AHI
lghi $AHI,0
alcgr $AHI,$ahi
lg $nlo,0($j,$np)
_dswap $nlo
mlgr $nhi,$mn0 # np[j]*m1
algr $nlo,$NHI
lghi $NHI,0
@@ -119,22 +159,24 @@ bn_mul_mont:
algr $nlo,$alo
alcgr $NHI,$nhi
stg $nlo,160-8($j,$sp) # tp[j-1]=
stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
la $j,8($j) # j++
brct $count,.L1st
algr $NHI,$AHI
lghi $AHI,0
alcgr $AHI,$AHI # upmost overflow bit
stg $NHI,160-8($j,$sp)
stg $AHI,160($j,$sp)
stg $NHI,$stdframe-8($j,$sp)
stg $AHI,$stdframe($j,$sp)
la $bp,8($bp) # bp++
.Louter:
lg $bi,0($bp) # bp[i]
_dswap $bi
lg $alo,0($ap)
_dswap $alo
mlgr $ahi,$bi # ap[0]*bp[i]
alg $alo,160($sp) # +=tp[0]
alg $alo,$stdframe($sp) # +=tp[0]
lghi $AHI,0
alcgr $AHI,$ahi
@@ -142,6 +184,7 @@ bn_mul_mont:
msgr $mn0,$n0 # tp[0]*n0
lg $nlo,0($np) # np[0]
_dswap $nlo
mlgr $nhi,$mn0 # np[0]*m1
algr $nlo,$alo # +="tp[0]"
lghi $NHI,0
@@ -153,14 +196,16 @@ bn_mul_mont:
.align 16
.Linner:
lg $alo,0($j,$ap)
_dswap $alo
mlgr $ahi,$bi # ap[j]*bp[i]
algr $alo,$AHI
lghi $AHI,0
alcgr $ahi,$AHI
alg $alo,160($j,$sp)# +=tp[j]
alg $alo,$stdframe($j,$sp)# +=tp[j]
alcgr $AHI,$ahi
lg $nlo,0($j,$np)
_dswap $nlo
mlgr $nhi,$mn0 # np[j]*m1
algr $nlo,$NHI
lghi $NHI,0
@@ -168,31 +213,33 @@ bn_mul_mont:
algr $nlo,$alo # +="tp[j]"
alcgr $NHI,$nhi
stg $nlo,160-8($j,$sp) # tp[j-1]=
stg $nlo,$stdframe-8($j,$sp) # tp[j-1]=
la $j,8($j) # j++
brct $count,.Linner
algr $NHI,$AHI
lghi $AHI,0
alcgr $AHI,$AHI
alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit
alg $NHI,$stdframe($j,$sp)# accumulate previous upmost overflow bit
lghi $ahi,0
alcgr $AHI,$ahi # new upmost overflow bit
stg $NHI,160-8($j,$sp)
stg $AHI,160($j,$sp)
stg $NHI,$stdframe-8($j,$sp)
stg $AHI,$stdframe($j,$sp)
la $bp,8($bp) # bp++
clg $bp,160+8+32($j,$sp) # compare to &bp[num]
cl${g} $bp,`$stdframe+8+4*$SIZE_T`($j,$sp) # compare to &bp[num]
jne .Louter
lg $rp,160+8+16($j,$sp) # reincarnate rp
la $ap,160($sp)
l${g} $rp,`$stdframe+8+2*$SIZE_T`($j,$sp) # reincarnate rp
la $ap,$stdframe($sp)
ahi $num,1 # restore $num, incidentally clears "borrow"
la $j,0(%r0)
lr $count,$num
.Lsub: lg $alo,0($j,$ap)
slbg $alo,0($j,$np)
lg $nlo,0($j,$np)
_dswap $nlo
slbgr $alo,$nlo
stg $alo,0($j,$rp)
la $j,8($j)
brct $count,.Lsub
@@ -207,19 +254,24 @@ bn_mul_mont:
la $j,0(%r0)
lgr $count,$num
.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
stg $j,160($j,$sp) # zap tp
.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
_dswap $alo
stg $j,$stdframe($j,$sp) # zap tp
stg $alo,0($j,$rp)
la $j,8($j)
brct $count,.Lcopy
la %r1,160+8+48($j,$sp)
lmg %r6,%r15,0(%r1)
la %r1,`$stdframe+8+6*$SIZE_T`($j,$sp)
lm${g} %r6,%r15,0(%r1)
lghi %r2,1 # signal "processed"
br %r14
.size bn_mul_mont,.-bn_mul_mont
.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
___
print $code;
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
s/_dswap\s+(%r[0-9]+)/sprintf("rllg\t%s,%s,32",$1,$1) if($SIZE_T==4)/e;
print $_,"\n";
}
close STDOUT;

1222
crypto/bn/asm/sparct4-mont.pl Executable file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,190 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# October 2012
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has two code paths: one suitable
# for all SPARCv9 processors and one for VIS3-capable ones. Former
# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
# ~100-230% faster than gcc-generated code and ~35-90% faster than
# the pure SPARCv9 code path.
$locals=16*8;
$tab="%l0";
@T=("%g2","%g3");
@i=("%g4","%g5");
($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
$code.=<<___;
#include <sparc_arch.h>
#ifdef __arch64__
.register %g2,#scratch
.register %g3,#scratch
#endif
#ifdef __PIC__
SPARC_PIC_THUNK(%g1)
#endif
.globl bn_GF2m_mul_2x2
.align 16
bn_GF2m_mul_2x2:
SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0]
andcc %g1, SPARCV9_VIS3, %g0
bz,pn %icc,.Lsoftware
nop
sllx %o1, 32, %o1
sllx %o3, 32, %o3
or %o2, %o1, %o1
or %o4, %o3, %o3
.word 0x95b262ab ! xmulx %o1, %o3, %o2
.word 0x99b262cb ! xmulxhi %o1, %o3, %o4
srlx %o2, 32, %o1 ! 13 cycles later
st %o2, [%o0+0]
st %o1, [%o0+4]
srlx %o4, 32, %o3
st %o4, [%o0+8]
retl
st %o3, [%o0+12]
.align 16
.Lsoftware:
save %sp,-STACK_FRAME-$locals,%sp
sllx %i1,32,$a
mov -1,$a12
sllx %i3,32,$b
or %i2,$a,$a
srlx $a12,1,$a48 ! 0x7fff...
or %i4,$b,$b
srlx $a12,2,$a12 ! 0x3fff...
add %sp,STACK_BIAS+STACK_FRAME,$tab
sllx $a,2,$a4
mov $a,$a1
sllx $a,1,$a2
srax $a4,63,@i[1] ! broadcast 61st bit
and $a48,$a4,$a4 ! (a<<2)&0x7fff...
srlx $a48,2,$a48
srax $a2,63,@i[0] ! broadcast 62nd bit
and $a12,$a2,$a2 ! (a<<1)&0x3fff...
srax $a1,63,$lo ! broadcast 63rd bit
and $a48,$a1,$a1 ! (a<<0)&0x1fff...
sllx $a1,3,$a8
and $b,$lo,$lo
and $b,@i[0],@i[0]
and $b,@i[1],@i[1]
stx %g0,[$tab+0*8] ! tab[0]=0
xor $a1,$a2,$a12
stx $a1,[$tab+1*8] ! tab[1]=a1
stx $a2,[$tab+2*8] ! tab[2]=a2
xor $a4,$a8,$a48
stx $a12,[$tab+3*8] ! tab[3]=a1^a2
xor $a4,$a1,$a1
stx $a4,[$tab+4*8] ! tab[4]=a4
xor $a4,$a2,$a2
stx $a1,[$tab+5*8] ! tab[5]=a1^a4
xor $a4,$a12,$a12
stx $a2,[$tab+6*8] ! tab[6]=a2^a4
xor $a48,$a1,$a1
stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4
xor $a48,$a2,$a2
stx $a8,[$tab+8*8] ! tab[8]=a8
xor $a48,$a12,$a12
stx $a1,[$tab+9*8] ! tab[9]=a1^a8
xor $a4,$a1,$a1
stx $a2,[$tab+10*8] ! tab[10]=a2^a8
xor $a4,$a2,$a2
stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8
xor $a4,$a12,$a12
stx $a48,[$tab+12*8] ! tab[12]=a4^a8
srlx $lo,1,$hi
stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8
sllx $lo,63,$lo
stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8
srlx @i[0],2,@T[0]
stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8
sllx @i[0],62,$a1
sllx $b,3,@i[0]
srlx @i[1],3,@T[1]
and @i[0],`0xf<<3`,@i[0]
sllx @i[1],61,$a2
ldx [$tab+@i[0]],@i[0]
srlx $b,4-3,@i[1]
xor @T[0],$hi,$hi
and @i[1],`0xf<<3`,@i[1]
xor $a1,$lo,$lo
ldx [$tab+@i[1]],@i[1]
xor @T[1],$hi,$hi
xor @i[0],$lo,$lo
srlx $b,8-3,@i[0]
xor $a2,$lo,$lo
and @i[0],`0xf<<3`,@i[0]
___
for($n=1;$n<14;$n++) {
$code.=<<___;
sllx @i[1],`$n*4`,@T[0]
ldx [$tab+@i[0]],@i[0]
srlx @i[1],`64-$n*4`,@T[1]
xor @T[0],$lo,$lo
srlx $b,`($n+2)*4`-3,@i[1]
xor @T[1],$hi,$hi
and @i[1],`0xf<<3`,@i[1]
___
push(@i,shift(@i)); push(@T,shift(@T));
}
$code.=<<___;
sllx @i[1],`$n*4`,@T[0]
ldx [$tab+@i[0]],@i[0]
srlx @i[1],`64-$n*4`,@T[1]
xor @T[0],$lo,$lo
sllx @i[0],`($n+1)*4`,@T[0]
xor @T[1],$hi,$hi
srlx @i[0],`64-($n+1)*4`,@T[1]
xor @T[0],$lo,$lo
xor @T[1],$hi,$hi
srlx $lo,32,%i1
st $lo,[%i0+0]
st %i1,[%i0+4]
srlx $hi,32,%i2
st $hi,[%i0+8]
st %i2,[%i0+12]
ret
restore
.type bn_GF2m_mul_2x2,#function
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code;
close STDOUT;

373
crypto/bn/asm/vis3-mont.pl Normal file
View File

@@ -0,0 +1,373 @@
#!/usr/bin/env perl
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
# October 2012.
#
# SPARCv9 VIS3 Montgomery multiplicaion procedure suitable for T3 and
# onward. There are three new instructions used here: umulxhi,
# addxc[cc] and initializing store. On T3 RSA private key operations
# are 1.54/1.87/2.11/2.26 times faster for 512/1024/2048/4096-bit key
# lengths. This is without dedicated squaring procedure. On T4
# corresponding coefficients are 1.47/2.10/2.80/2.90x, which is mostly
# for reference purposes, because T4 has dedicated Montgomery
# multiplication and squaring *instructions* that deliver even more.
$bits=32;
for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
if ($bits==64) { $bias=2047; $frame=192; }
else { $bias=0; $frame=112; }
$code.=<<___ if ($bits==64);
.register %g2,#scratch
.register %g3,#scratch
___
$code.=<<___;
.section ".text",#alloc,#execinstr
___
($n0,$m0,$m1,$lo0,$hi0, $lo1,$hi1,$aj,$alo,$nj,$nlo,$tj)=
(map("%g$_",(1..5)),map("%o$_",(0..5,7)));
# int bn_mul_mont(
$rp="%o0"; # BN_ULONG *rp,
$ap="%o1"; # const BN_ULONG *ap,
$bp="%o2"; # const BN_ULONG *bp,
$np="%o3"; # const BN_ULONG *np,
$n0p="%o4"; # const BN_ULONG *n0,
$num="%o5"; # int num); # caller ensures that num is even
# and >=6
$code.=<<___;
.globl bn_mul_mont_vis3
.align 32
bn_mul_mont_vis3:
add %sp, $bias, %g4 ! real top of stack
sll $num, 2, $num ! size in bytes
add $num, 63, %g5
andn %g5, 63, %g5 ! buffer size rounded up to 64 bytes
add %g5, %g5, %g1
add %g5, %g1, %g1 ! 3*buffer size
sub %g4, %g1, %g1
andn %g1, 63, %g1 ! align at 64 byte
sub %g1, $frame, %g1 ! new top of stack
sub %g1, %g4, %g1
save %sp, %g1, %sp
___
# +-------------------------------+<----- %sp
# . .
# +-------------------------------+<----- aligned at 64 bytes
# | __int64 tmp[0] |
# +-------------------------------+
# . .
# . .
# +-------------------------------+<----- aligned at 64 bytes
# | __int64 ap[1..0] | converted ap[]
# +-------------------------------+
# | __int64 np[1..0] | converted np[]
# +-------------------------------+
# | __int64 ap[3..2] |
# . .
# . .
# +-------------------------------+
($rp,$ap,$bp,$np,$n0p,$num)=map("%i$_",(0..5));
($t0,$t1,$t2,$t3,$cnt,$tp,$bufsz,$anp)=map("%l$_",(0..7));
($ovf,$i)=($t0,$t1);
$code.=<<___;
ld [$n0p+0], $t0 ! pull n0[0..1] value
add %sp, $bias+$frame, $tp
ld [$n0p+4], $t1
add $tp, %g5, $anp
ld [$bp+0], $t2 ! m0=bp[0]
sllx $t1, 32, $n0
ld [$bp+4], $t3
or $t0, $n0, $n0
add $bp, 8, $bp
ld [$ap+0], $t0 ! ap[0]
sllx $t3, 32, $m0
ld [$ap+4], $t1
or $t2, $m0, $m0
ld [$ap+8], $t2 ! ap[1]
sllx $t1, 32, $aj
ld [$ap+12], $t3
or $t0, $aj, $aj
add $ap, 16, $ap
stx $aj, [$anp] ! converted ap[0]
mulx $aj, $m0, $lo0 ! ap[0]*bp[0]
umulxhi $aj, $m0, $hi0
ld [$np+0], $t0 ! np[0]
sllx $t3, 32, $aj
ld [$np+4], $t1
or $t2, $aj, $aj
ld [$np+8], $t2 ! np[1]
sllx $t1, 32, $nj
ld [$np+12], $t3
or $t0, $nj, $nj
add $np, 16, $np
stx $nj, [$anp+8] ! converted np[0]
mulx $lo0, $n0, $m1 ! "tp[0]"*n0
stx $aj, [$anp+16] ! converted ap[1]
mulx $aj, $m0, $alo ! ap[1]*bp[0]
umulxhi $aj, $m0, $aj ! ahi=aj
mulx $nj, $m1, $lo1 ! np[0]*m1
umulxhi $nj, $m1, $hi1
sllx $t3, 32, $nj
or $t2, $nj, $nj
stx $nj, [$anp+24] ! converted np[1]
add $anp, 32, $anp
addcc $lo0, $lo1, $lo1
addxc %g0, $hi1, $hi1
mulx $nj, $m1, $nlo ! np[1]*m1
umulxhi $nj, $m1, $nj ! nhi=nj
ba .L1st
sub $num, 24, $cnt ! cnt=num-3
.align 16
.L1st:
ld [$ap+0], $t0 ! ap[j]
addcc $alo, $hi0, $lo0
ld [$ap+4], $t1
addxc $aj, %g0, $hi0
sllx $t1, 32, $aj
add $ap, 8, $ap
or $t0, $aj, $aj
stx $aj, [$anp] ! converted ap[j]
ld [$np+0], $t2 ! np[j]
addcc $nlo, $hi1, $lo1
ld [$np+4], $t3
addxc $nj, %g0, $hi1 ! nhi=nj
sllx $t3, 32, $nj
add $np, 8, $np
mulx $aj, $m0, $alo ! ap[j]*bp[0]
or $t2, $nj, $nj
umulxhi $aj, $m0, $aj ! ahi=aj
stx $nj, [$anp+8] ! converted np[j]
add $anp, 16, $anp ! anp++
mulx $nj, $m1, $nlo ! np[j]*m1
addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0]
umulxhi $nj, $m1, $nj ! nhi=nj
addxc %g0, $hi1, $hi1
stx $lo1, [$tp] ! tp[j-1]
add $tp, 8, $tp ! tp++
brnz,pt $cnt, .L1st
sub $cnt, 8, $cnt ! j--
!.L1st
addcc $alo, $hi0, $lo0
addxc $aj, %g0, $hi0 ! ahi=aj
addcc $nlo, $hi1, $lo1
addxc $nj, %g0, $hi1
addcc $lo0, $lo1, $lo1 ! np[j]*m1+ap[j]*bp[0]
addxc %g0, $hi1, $hi1
stx $lo1, [$tp] ! tp[j-1]
add $tp, 8, $tp
addcc $hi0, $hi1, $hi1
addxc %g0, %g0, $ovf ! upmost overflow bit
stx $hi1, [$tp]
add $tp, 8, $tp
ba .Louter
sub $num, 16, $i ! i=num-2
.align 16
.Louter:
ld [$bp+0], $t2 ! m0=bp[i]
ld [$bp+4], $t3
sub $anp, $num, $anp ! rewind
sub $tp, $num, $tp
sub $anp, $num, $anp
add $bp, 8, $bp
sllx $t3, 32, $m0
ldx [$anp+0], $aj ! ap[0]
or $t2, $m0, $m0
ldx [$anp+8], $nj ! np[0]
mulx $aj, $m0, $lo0 ! ap[0]*bp[i]
ldx [$tp], $tj ! tp[0]
umulxhi $aj, $m0, $hi0
ldx [$anp+16], $aj ! ap[1]
addcc $lo0, $tj, $lo0 ! ap[0]*bp[i]+tp[0]
mulx $aj, $m0, $alo ! ap[1]*bp[i]
addxc %g0, $hi0, $hi0
mulx $lo0, $n0, $m1 ! tp[0]*n0
umulxhi $aj, $m0, $aj ! ahi=aj
mulx $nj, $m1, $lo1 ! np[0]*m1
umulxhi $nj, $m1, $hi1
ldx [$anp+24], $nj ! np[1]
add $anp, 32, $anp
addcc $lo1, $lo0, $lo1
mulx $nj, $m1, $nlo ! np[1]*m1
addxc %g0, $hi1, $hi1
umulxhi $nj, $m1, $nj ! nhi=nj
ba .Linner
sub $num, 24, $cnt ! cnt=num-3
.align 16
.Linner:
addcc $alo, $hi0, $lo0
ldx [$tp+8], $tj ! tp[j]
addxc $aj, %g0, $hi0 ! ahi=aj
ldx [$anp+0], $aj ! ap[j]
addcc $nlo, $hi1, $lo1
mulx $aj, $m0, $alo ! ap[j]*bp[i]
addxc $nj, %g0, $hi1 ! nhi=nj
ldx [$anp+8], $nj ! np[j]
add $anp, 16, $anp
umulxhi $aj, $m0, $aj ! ahi=aj
addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j]
mulx $nj, $m1, $nlo ! np[j]*m1
addxc %g0, $hi0, $hi0
umulxhi $nj, $m1, $nj ! nhi=nj
addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j]
addxc %g0, $hi1, $hi1
stx $lo1, [$tp] ! tp[j-1]
add $tp, 8, $tp
brnz,pt $cnt, .Linner
sub $cnt, 8, $cnt
!.Linner
ldx [$tp+8], $tj ! tp[j]
addcc $alo, $hi0, $lo0
addxc $aj, %g0, $hi0 ! ahi=aj
addcc $lo0, $tj, $lo0 ! ap[j]*bp[i]+tp[j]
addxc %g0, $hi0, $hi0
addcc $nlo, $hi1, $lo1
addxc $nj, %g0, $hi1 ! nhi=nj
addcc $lo1, $lo0, $lo1 ! np[j]*m1+ap[j]*bp[i]+tp[j]
addxc %g0, $hi1, $hi1
stx $lo1, [$tp] ! tp[j-1]
subcc %g0, $ovf, %g0 ! move upmost overflow to CCR.xcc
addxccc $hi1, $hi0, $hi1
addxc %g0, %g0, $ovf
stx $hi1, [$tp+8]
add $tp, 16, $tp
brnz,pt $i, .Louter
sub $i, 8, $i
sub $anp, $num, $anp ! rewind
sub $tp, $num, $tp
sub $anp, $num, $anp
ba .Lsub
subcc $num, 8, $cnt ! cnt=num-1 and clear CCR.xcc
.align 16
.Lsub:
ldx [$tp], $tj
add $tp, 8, $tp
ldx [$anp+8], $nj
add $anp, 16, $anp
subccc $tj, $nj, $t2 ! tp[j]-np[j]
srlx $tj, 32, $tj
srlx $nj, 32, $nj
subccc $tj, $nj, $t3
add $rp, 8, $rp
st $t2, [$rp-4] ! reverse order
st $t3, [$rp-8]
brnz,pt $cnt, .Lsub
sub $cnt, 8, $cnt
sub $anp, $num, $anp ! rewind
sub $tp, $num, $tp
sub $anp, $num, $anp
sub $rp, $num, $rp
subc $ovf, %g0, $ovf ! handle upmost overflow bit
and $tp, $ovf, $ap
andn $rp, $ovf, $np
or $np, $ap, $ap ! ap=borrow?tp:rp
ba .Lcopy
sub $num, 8, $cnt
.align 16
.Lcopy: ! copy or in-place refresh
ld [$ap+0], $t2
ld [$ap+4], $t3
add $ap, 8, $ap
stx %g0, [$tp] ! zap
add $tp, 8, $tp
stx %g0, [$anp] ! zap
stx %g0, [$anp+8]
add $anp, 16, $anp
st $t3, [$rp+0] ! flip order
st $t2, [$rp+4]
add $rp, 8, $rp
brnz $cnt, .Lcopy
sub $cnt, 8, $cnt
mov 1, %o0
ret
restore
.type bn_mul_mont_vis3, #function
.size bn_mul_mont_vis3, .-bn_mul_mont_vis3
.asciz "Montgomery Multiplication for SPARCv9 VIS3, CRYPTOGAMS by <appro\@openssl.org>"
.align 4
___
# Purpose of these subroutines is to explicitly encode VIS instructions,
# so that one can compile the module without having to specify VIS
# extentions on compiler command line, e.g. -xarch=v9 vs. -xarch=v9a.
# Idea is to reserve for option to produce "universal" binary and let
# programmer detect if current CPU is VIS capable at run-time.
sub unvis3 {
my ($mnemonic,$rs1,$rs2,$rd)=@_;
my %bias = ( "g" => 0, "o" => 8, "l" => 16, "i" => 24 );
my ($ref,$opf);
my %visopf = ( "addxc" => 0x011,
"addxccc" => 0x013,
"umulxhi" => 0x016 );
$ref = "$mnemonic\t$rs1,$rs2,$rd";
if ($opf=$visopf{$mnemonic}) {
foreach ($rs1,$rs2,$rd) {
return $ref if (!/%([goli])([0-9])/);
$_=$bias{$1}+$2;
}
return sprintf ".word\t0x%08x !%s",
0x81b00000|$rd<<25|$rs1<<14|$opf<<5|$rs2,
$ref;
} else {
return $ref;
}
}
foreach (split("\n",$code)) {
s/\`([^\`]*)\`/eval $1/ge;
s/\b(umulxhi|addxc[c]{0,2})\s+(%[goli][0-7]),\s*(%[goli][0-7]),\s*(%[goli][0-7])/
&unvis3($1,$2,$3,$4)
/ge;
print $_,"\n";
}
close STDOUT;

313
crypto/bn/asm/x86-gf2m.pl Normal file
View File

@@ -0,0 +1,313 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# May 2011
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has three code paths: pure integer
# code suitable for any x86 CPU, MMX code suitable for PIII and later
# and PCLMULQDQ suitable for Westmere and later. Improvement varies
# from one benchmark and µ-arch to another. Below are interval values
# for 163- and 571-bit ECDH benchmarks relative to compiler-generated
# code:
#
# PIII 16%-30%
# P4 12%-12%
# Opteron 18%-40%
# Core2 19%-44%
# Atom 38%-64%
# Westmere 53%-121%(PCLMULQDQ)/20%-32%(MMX)
# Sandy Bridge 72%-127%(PCLMULQDQ)/27%-23%(MMX)
#
# Note that above improvement coefficients are not coefficients for
# bn_GF2m_mul_2x2 itself. For example 120% ECDH improvement is result
# of bn_GF2m_mul_2x2 being >4x faster. As it gets faster, benchmark
# is more and more dominated by other subroutines, most notably by
# BN_GF2m_mod[_mul]_arr...
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../perlasm");
require "x86asm.pl";
&asm_init($ARGV[0],$0,$x86only = $ARGV[$#ARGV] eq "386");
$sse2=0;
for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&external_label("OPENSSL_ia32cap_P") if ($sse2);
$a="eax";
$b="ebx";
($a1,$a2,$a4)=("ecx","edx","ebp");
$R="mm0";
@T=("mm1","mm2");
($A,$B,$B30,$B31)=("mm2","mm3","mm4","mm5");
@i=("esi","edi");
if (!$x86only) {
&function_begin_B("_mul_1x1_mmx");
&sub ("esp",32+4);
&mov ($a1,$a);
&lea ($a2,&DWP(0,$a,$a));
&and ($a1,0x3fffffff);
&lea ($a4,&DWP(0,$a2,$a2));
&mov (&DWP(0*4,"esp"),0);
&and ($a2,0x7fffffff);
&movd ($A,$a);
&movd ($B,$b);
&mov (&DWP(1*4,"esp"),$a1); # a1
&xor ($a1,$a2); # a1^a2
&pxor ($B31,$B31);
&pxor ($B30,$B30);
&mov (&DWP(2*4,"esp"),$a2); # a2
&xor ($a2,$a4); # a2^a4
&mov (&DWP(3*4,"esp"),$a1); # a1^a2
&pcmpgtd($B31,$A); # broadcast 31st bit
&paddd ($A,$A); # $A<<=1
&xor ($a1,$a2); # a1^a4=a1^a2^a2^a4
&mov (&DWP(4*4,"esp"),$a4); # a4
&xor ($a4,$a2); # a2=a4^a2^a4
&pand ($B31,$B);
&pcmpgtd($B30,$A); # broadcast 30th bit
&mov (&DWP(5*4,"esp"),$a1); # a1^a4
&xor ($a4,$a1); # a1^a2^a4
&psllq ($B31,31);
&pand ($B30,$B);
&mov (&DWP(6*4,"esp"),$a2); # a2^a4
&mov (@i[0],0x7);
&mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4
&mov ($a4,@i[0]);
&and (@i[0],$b);
&shr ($b,3);
&mov (@i[1],$a4);
&psllq ($B30,30);
&and (@i[1],$b);
&shr ($b,3);
&movd ($R,&DWP(0,"esp",@i[0],4));
&mov (@i[0],$a4);
&and (@i[0],$b);
&shr ($b,3);
for($n=1;$n<9;$n++) {
&movd (@T[1],&DWP(0,"esp",@i[1],4));
&mov (@i[1],$a4);
&psllq (@T[1],3*$n);
&and (@i[1],$b);
&shr ($b,3);
&pxor ($R,@T[1]);
push(@i,shift(@i)); push(@T,shift(@T));
}
&movd (@T[1],&DWP(0,"esp",@i[1],4));
&pxor ($R,$B30);
&psllq (@T[1],3*$n++);
&pxor ($R,@T[1]);
&movd (@T[0],&DWP(0,"esp",@i[0],4));
&pxor ($R,$B31);
&psllq (@T[0],3*$n);
&add ("esp",32+4);
&pxor ($R,@T[0]);
&ret ();
&function_end_B("_mul_1x1_mmx");
}
($lo,$hi)=("eax","edx");
@T=("ecx","ebp");
&function_begin_B("_mul_1x1_ialu");
&sub ("esp",32+4);
&mov ($a1,$a);
&lea ($a2,&DWP(0,$a,$a));
&lea ($a4,&DWP(0,"",$a,4));
&and ($a1,0x3fffffff);
&lea (@i[1],&DWP(0,$lo,$lo));
&sar ($lo,31); # broadcast 31st bit
&mov (&DWP(0*4,"esp"),0);
&and ($a2,0x7fffffff);
&mov (&DWP(1*4,"esp"),$a1); # a1
&xor ($a1,$a2); # a1^a2
&mov (&DWP(2*4,"esp"),$a2); # a2
&xor ($a2,$a4); # a2^a4
&mov (&DWP(3*4,"esp"),$a1); # a1^a2
&xor ($a1,$a2); # a1^a4=a1^a2^a2^a4
&mov (&DWP(4*4,"esp"),$a4); # a4
&xor ($a4,$a2); # a2=a4^a2^a4
&mov (&DWP(5*4,"esp"),$a1); # a1^a4
&xor ($a4,$a1); # a1^a2^a4
&sar (@i[1],31); # broardcast 30th bit
&and ($lo,$b);
&mov (&DWP(6*4,"esp"),$a2); # a2^a4
&and (@i[1],$b);
&mov (&DWP(7*4,"esp"),$a4); # a1^a2^a4
&mov ($hi,$lo);
&shl ($lo,31);
&mov (@T[0],@i[1]);
&shr ($hi,1);
&mov (@i[0],0x7);
&shl (@i[1],30);
&and (@i[0],$b);
&shr (@T[0],2);
&xor ($lo,@i[1]);
&shr ($b,3);
&mov (@i[1],0x7); # 5-byte instruction!?
&and (@i[1],$b);
&shr ($b,3);
&xor ($hi,@T[0]);
&xor ($lo,&DWP(0,"esp",@i[0],4));
&mov (@i[0],0x7);
&and (@i[0],$b);
&shr ($b,3);
for($n=1;$n<9;$n++) {
&mov (@T[1],&DWP(0,"esp",@i[1],4));
&mov (@i[1],0x7);
&mov (@T[0],@T[1]);
&shl (@T[1],3*$n);
&and (@i[1],$b);
&shr (@T[0],32-3*$n);
&xor ($lo,@T[1]);
&shr ($b,3);
&xor ($hi,@T[0]);
push(@i,shift(@i)); push(@T,shift(@T));
}
&mov (@T[1],&DWP(0,"esp",@i[1],4));
&mov (@T[0],@T[1]);
&shl (@T[1],3*$n);
&mov (@i[1],&DWP(0,"esp",@i[0],4));
&shr (@T[0],32-3*$n); $n++;
&mov (@i[0],@i[1]);
&xor ($lo,@T[1]);
&shl (@i[1],3*$n);
&xor ($hi,@T[0]);
&shr (@i[0],32-3*$n);
&xor ($lo,@i[1]);
&xor ($hi,@i[0]);
&add ("esp",32+4);
&ret ();
&function_end_B("_mul_1x1_ialu");
# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
&function_begin_B("bn_GF2m_mul_2x2");
if (!$x86only) {
&picmeup("edx","OPENSSL_ia32cap_P");
&mov ("eax",&DWP(0,"edx"));
&mov ("edx",&DWP(4,"edx"));
&test ("eax",1<<23); # check MMX bit
&jz (&label("ialu"));
if ($sse2) {
&test ("eax",1<<24); # check FXSR bit
&jz (&label("mmx"));
&test ("edx",1<<1); # check PCLMULQDQ bit
&jz (&label("mmx"));
&movups ("xmm0",&QWP(8,"esp"));
&shufps ("xmm0","xmm0",0b10110001);
&pclmulqdq ("xmm0","xmm0",1);
&mov ("eax",&DWP(4,"esp"));
&movups (&QWP(0,"eax"),"xmm0");
&ret ();
&set_label("mmx",16);
}
&push ("ebp");
&push ("ebx");
&push ("esi");
&push ("edi");
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
&call ("_mul_1x1_mmx"); # a1·b1
&movq ("mm7",$R);
&mov ($a,&wparam(2));
&mov ($b,&wparam(4));
&call ("_mul_1x1_mmx"); # a0·b0
&movq ("mm6",$R);
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
&xor ($a,&wparam(2));
&xor ($b,&wparam(4));
&call ("_mul_1x1_mmx"); # (a0+a1)·(b0+b1)
&pxor ($R,"mm7");
&mov ($a,&wparam(0));
&pxor ($R,"mm6"); # (a0+a1)·(b0+b1)-a1·b1-a0·b0
&movq ($A,$R);
&psllq ($R,32);
&pop ("edi");
&psrlq ($A,32);
&pop ("esi");
&pxor ($R,"mm6");
&pop ("ebx");
&pxor ($A,"mm7");
&movq (&QWP(0,$a),$R);
&pop ("ebp");
&movq (&QWP(8,$a),$A);
&emms ();
&ret ();
&set_label("ialu",16);
}
&push ("ebp");
&push ("ebx");
&push ("esi");
&push ("edi");
&stack_push(4+1);
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
&call ("_mul_1x1_ialu"); # a1·b1
&mov (&DWP(8,"esp"),$lo);
&mov (&DWP(12,"esp"),$hi);
&mov ($a,&wparam(2));
&mov ($b,&wparam(4));
&call ("_mul_1x1_ialu"); # a0·b0
&mov (&DWP(0,"esp"),$lo);
&mov (&DWP(4,"esp"),$hi);
&mov ($a,&wparam(1));
&mov ($b,&wparam(3));
&xor ($a,&wparam(2));
&xor ($b,&wparam(4));
&call ("_mul_1x1_ialu"); # (a0+a1)·(b0+b1)
&mov ("ebp",&wparam(0));
@r=("ebx","ecx","edi","esi");
&mov (@r[0],&DWP(0,"esp"));
&mov (@r[1],&DWP(4,"esp"));
&mov (@r[2],&DWP(8,"esp"));
&mov (@r[3],&DWP(12,"esp"));
&xor ($lo,$hi);
&xor ($hi,@r[1]);
&xor ($lo,@r[0]);
&mov (&DWP(0,"ebp"),@r[0]);
&xor ($hi,@r[2]);
&mov (&DWP(12,"ebp"),@r[3]);
&xor ($lo,@r[3]);
&stack_pop(4+1);
&xor ($hi,@r[3]);
&pop ("edi");
&xor ($lo,$hi);
&pop ("esi");
&mov (&DWP(8,"ebp"),$hi);
&pop ("ebx");
&mov (&DWP(4,"ebp"),$lo);
&pop ("ebp");
&ret ();
&function_end_B("bn_GF2m_mul_2x2");
&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();

View File

@@ -527,8 +527,10 @@ $sbit=$num;
&jle (&label("sqradd"));
&mov ($carry,"edx");
&lea ("edx",&DWP(0,$sbit,"edx",2));
&add ("edx","edx");
&shr ($carry,31);
&add ("edx",$sbit);
&adc ($carry,0);
&set_label("sqrlast");
&mov ($word,$_n0);
&mov ($inp,$_np);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,390 @@
#!/usr/bin/env perl
#
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
# ====================================================================
#
# May 2011
#
# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
# the time being... Except that it has two code paths: code suitable
# for any x86_64 CPU and PCLMULQDQ one suitable for Westmere and
# later. Improvement varies from one benchmark and µ-arch to another.
# Vanilla code path is at most 20% faster than compiler-generated code
# [not very impressive], while PCLMULQDQ - whole 85%-160% better on
# 163- and 571-bit ECDH benchmarks on Intel CPUs. Keep in mind that
# these coefficients are not ones for bn_GF2m_mul_2x2 itself, as not
# all CPU time is burnt in it...
$flavour = shift;
$output = shift;
if ($flavour =~ /\./) { $output = $flavour; undef $flavour; }
$win64=0; $win64=1 if ($flavour =~ /[nm]asm|mingw64/ || $output =~ /\.asm$/);
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}x86_64-xlate.pl" and -f $xlate ) or
( $xlate="${dir}../../perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
open OUT,"| \"$^X\" $xlate $flavour $output";
*STDOUT=*OUT;
($lo,$hi)=("%rax","%rdx"); $a=$lo;
($i0,$i1)=("%rsi","%rdi");
($t0,$t1)=("%rbx","%rcx");
($b,$mask)=("%rbp","%r8");
($a1,$a2,$a4,$a8,$a12,$a48)=map("%r$_",(9..15));
($R,$Tx)=("%xmm0","%xmm1");
$code.=<<___;
.text
.type _mul_1x1,\@abi-omnipotent
.align 16
_mul_1x1:
sub \$128+8,%rsp
mov \$-1,$a1
lea ($a,$a),$i0
shr \$3,$a1
lea (,$a,4),$i1
and $a,$a1 # a1=a&0x1fffffffffffffff
lea (,$a,8),$a8
sar \$63,$a # broadcast 63rd bit
lea ($a1,$a1),$a2
sar \$63,$i0 # broadcast 62nd bit
lea (,$a1,4),$a4
and $b,$a
sar \$63,$i1 # boardcast 61st bit
mov $a,$hi # $a is $lo
shl \$63,$lo
and $b,$i0
shr \$1,$hi
mov $i0,$t1
shl \$62,$i0
and $b,$i1
shr \$2,$t1
xor $i0,$lo
mov $i1,$t0
shl \$61,$i1
xor $t1,$hi
shr \$3,$t0
xor $i1,$lo
xor $t0,$hi
mov $a1,$a12
movq \$0,0(%rsp) # tab[0]=0
xor $a2,$a12 # a1^a2
mov $a1,8(%rsp) # tab[1]=a1
mov $a4,$a48
mov $a2,16(%rsp) # tab[2]=a2
xor $a8,$a48 # a4^a8
mov $a12,24(%rsp) # tab[3]=a1^a2
xor $a4,$a1
mov $a4,32(%rsp) # tab[4]=a4
xor $a4,$a2
mov $a1,40(%rsp) # tab[5]=a1^a4
xor $a4,$a12
mov $a2,48(%rsp) # tab[6]=a2^a4
xor $a48,$a1 # a1^a4^a4^a8=a1^a8
mov $a12,56(%rsp) # tab[7]=a1^a2^a4
xor $a48,$a2 # a2^a4^a4^a8=a1^a8
mov $a8,64(%rsp) # tab[8]=a8
xor $a48,$a12 # a1^a2^a4^a4^a8=a1^a2^a8
mov $a1,72(%rsp) # tab[9]=a1^a8
xor $a4,$a1 # a1^a8^a4
mov $a2,80(%rsp) # tab[10]=a2^a8
xor $a4,$a2 # a2^a8^a4
mov $a12,88(%rsp) # tab[11]=a1^a2^a8
xor $a4,$a12 # a1^a2^a8^a4
mov $a48,96(%rsp) # tab[12]=a4^a8
mov $mask,$i0
mov $a1,104(%rsp) # tab[13]=a1^a4^a8
and $b,$i0
mov $a2,112(%rsp) # tab[14]=a2^a4^a8
shr \$4,$b
mov $a12,120(%rsp) # tab[15]=a1^a2^a4^a8
mov $mask,$i1
and $b,$i1
shr \$4,$b
movq (%rsp,$i0,8),$R # half of calculations is done in SSE2
mov $mask,$i0
and $b,$i0
shr \$4,$b
___
for ($n=1;$n<8;$n++) {
$code.=<<___;
mov (%rsp,$i1,8),$t1
mov $mask,$i1
mov $t1,$t0
shl \$`8*$n-4`,$t1
and $b,$i1
movq (%rsp,$i0,8),$Tx
shr \$`64-(8*$n-4)`,$t0
xor $t1,$lo
pslldq \$$n,$Tx
mov $mask,$i0
shr \$4,$b
xor $t0,$hi
and $b,$i0
shr \$4,$b
pxor $Tx,$R
___
}
$code.=<<___;
mov (%rsp,$i1,8),$t1
mov $t1,$t0
shl \$`8*$n-4`,$t1
movq $R,$i0
shr \$`64-(8*$n-4)`,$t0
xor $t1,$lo
psrldq \$8,$R
xor $t0,$hi
movq $R,$i1
xor $i0,$lo
xor $i1,$hi
add \$128+8,%rsp
ret
.Lend_mul_1x1:
.size _mul_1x1,.-_mul_1x1
___
($rp,$a1,$a0,$b1,$b0) = $win64? ("%rcx","%rdx","%r8", "%r9","%r10") : # Win64 order
("%rdi","%rsi","%rdx","%rcx","%r8"); # Unix order
$code.=<<___;
.extern OPENSSL_ia32cap_P
.globl bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,\@abi-omnipotent
.align 16
bn_GF2m_mul_2x2:
mov OPENSSL_ia32cap_P(%rip),%rax
bt \$33,%rax
jnc .Lvanilla_mul_2x2
movq $a1,%xmm0
movq $b1,%xmm1
movq $a0,%xmm2
___
$code.=<<___ if ($win64);
movq 40(%rsp),%xmm3
___
$code.=<<___ if (!$win64);
movq $b0,%xmm3
___
$code.=<<___;
movdqa %xmm0,%xmm4
movdqa %xmm1,%xmm5
pclmulqdq \$0,%xmm1,%xmm0 # a1·b1
pxor %xmm2,%xmm4
pxor %xmm3,%xmm5
pclmulqdq \$0,%xmm3,%xmm2 # a0·b0
pclmulqdq \$0,%xmm5,%xmm4 # (a0+a1)·(b0+b1)
xorps %xmm0,%xmm4
xorps %xmm2,%xmm4 # (a0+a1)·(b0+b1)-a0·b0-a1·b1
movdqa %xmm4,%xmm5
pslldq \$8,%xmm4
psrldq \$8,%xmm5
pxor %xmm4,%xmm2
pxor %xmm5,%xmm0
movdqu %xmm2,0($rp)
movdqu %xmm0,16($rp)
ret
.align 16
.Lvanilla_mul_2x2:
lea -8*17(%rsp),%rsp
___
$code.=<<___ if ($win64);
mov `8*17+40`(%rsp),$b0
mov %rdi,8*15(%rsp)
mov %rsi,8*16(%rsp)
___
$code.=<<___;
mov %r14,8*10(%rsp)
mov %r13,8*11(%rsp)
mov %r12,8*12(%rsp)
mov %rbp,8*13(%rsp)
mov %rbx,8*14(%rsp)
.Lbody_mul_2x2:
mov $rp,32(%rsp) # save the arguments
mov $a1,40(%rsp)
mov $a0,48(%rsp)
mov $b1,56(%rsp)
mov $b0,64(%rsp)
mov \$0xf,$mask
mov $a1,$a
mov $b1,$b
call _mul_1x1 # a1·b1
mov $lo,16(%rsp)
mov $hi,24(%rsp)
mov 48(%rsp),$a
mov 64(%rsp),$b
call _mul_1x1 # a0·b0
mov $lo,0(%rsp)
mov $hi,8(%rsp)
mov 40(%rsp),$a
mov 56(%rsp),$b
xor 48(%rsp),$a
xor 64(%rsp),$b
call _mul_1x1 # (a0+a1)·(b0+b1)
___
@r=("%rbx","%rcx","%rdi","%rsi");
$code.=<<___;
mov 0(%rsp),@r[0]
mov 8(%rsp),@r[1]
mov 16(%rsp),@r[2]
mov 24(%rsp),@r[3]
mov 32(%rsp),%rbp
xor $hi,$lo
xor @r[1],$hi
xor @r[0],$lo
mov @r[0],0(%rbp)
xor @r[2],$hi
mov @r[3],24(%rbp)
xor @r[3],$lo
xor @r[3],$hi
xor $hi,$lo
mov $hi,16(%rbp)
mov $lo,8(%rbp)
mov 8*10(%rsp),%r14
mov 8*11(%rsp),%r13
mov 8*12(%rsp),%r12
mov 8*13(%rsp),%rbp
mov 8*14(%rsp),%rbx
___
$code.=<<___ if ($win64);
mov 8*15(%rsp),%rdi
mov 8*16(%rsp),%rsi
___
$code.=<<___;
lea 8*17(%rsp),%rsp
ret
.Lend_mul_2x2:
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
.asciz "GF(2^m) Multiplication for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
.align 16
___
# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
# CONTEXT *context,DISPATCHER_CONTEXT *disp)
if ($win64) {
$rec="%rcx";
$frame="%rdx";
$context="%r8";
$disp="%r9";
$code.=<<___;
.extern __imp_RtlVirtualUnwind
.type se_handler,\@abi-omnipotent
.align 16
se_handler:
push %rsi
push %rdi
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
pushfq
sub \$64,%rsp
mov 152($context),%rax # pull context->Rsp
mov 248($context),%rbx # pull context->Rip
lea .Lbody_mul_2x2(%rip),%r10
cmp %r10,%rbx # context->Rip<"prologue" label
jb .Lin_prologue
mov 8*10(%rax),%r14 # mimic epilogue
mov 8*11(%rax),%r13
mov 8*12(%rax),%r12
mov 8*13(%rax),%rbp
mov 8*14(%rax),%rbx
mov 8*15(%rax),%rdi
mov 8*16(%rax),%rsi
mov %rbx,144($context) # restore context->Rbx
mov %rbp,160($context) # restore context->Rbp
mov %rsi,168($context) # restore context->Rsi
mov %rdi,176($context) # restore context->Rdi
mov %r12,216($context) # restore context->R12
mov %r13,224($context) # restore context->R13
mov %r14,232($context) # restore context->R14
.Lin_prologue:
lea 8*17(%rax),%rax
mov %rax,152($context) # restore context->Rsp
mov 40($disp),%rdi # disp->ContextRecord
mov $context,%rsi # context
mov \$154,%ecx # sizeof(CONTEXT)
.long 0xa548f3fc # cld; rep movsq
mov $disp,%rsi
xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
mov 8(%rsi),%rdx # arg2, disp->ImageBase
mov 0(%rsi),%r8 # arg3, disp->ControlPc
mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
mov 40(%rsi),%r10 # disp->ContextRecord
lea 56(%rsi),%r11 # &disp->HandlerData
lea 24(%rsi),%r12 # &disp->EstablisherFrame
mov %r10,32(%rsp) # arg5
mov %r11,40(%rsp) # arg6
mov %r12,48(%rsp) # arg7
mov %rcx,56(%rsp) # arg8, (NULL)
call *__imp_RtlVirtualUnwind(%rip)
mov \$1,%eax # ExceptionContinueSearch
add \$64,%rsp
popfq
pop %r15
pop %r14
pop %r13
pop %r12
pop %rbp
pop %rbx
pop %rdi
pop %rsi
ret
.size se_handler,.-se_handler
.section .pdata
.align 4
.rva _mul_1x1
.rva .Lend_mul_1x1
.rva .LSEH_info_1x1
.rva .Lvanilla_mul_2x2
.rva .Lend_mul_2x2
.rva .LSEH_info_2x2
.section .xdata
.align 8
.LSEH_info_1x1:
.byte 0x01,0x07,0x02,0x00
.byte 0x07,0x01,0x11,0x00 # sub rsp,128+8
.LSEH_info_2x2:
.byte 9,0,0,0
.rva se_handler
___
}
$code =~ s/\`([^\`]*)\`/eval($1)/gem;
print $code;
close STDOUT;

File diff suppressed because it is too large Load Diff

3519
crypto/bn/asm/x86_64-mont5.pl Executable file

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -62,252 +62,252 @@
/* r can == a or b */
int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
const BIGNUM *tmp;
int a_neg = a->neg, ret;
{
const BIGNUM *tmp;
int a_neg = a->neg, ret;
bn_check_top(a);
bn_check_top(b);
bn_check_top(a);
bn_check_top(b);
/* a + b a+b
* a + -b a-b
* -a + b b-a
* -a + -b -(a+b)
*/
if (a_neg ^ b->neg)
{
/* only one is negative */
if (a_neg)
{ tmp=a; a=b; b=tmp; }
/*-
* a + b a+b
* a + -b a-b
* -a + b b-a
* -a + -b -(a+b)
*/
if (a_neg ^ b->neg) {
/* only one is negative */
if (a_neg) {
tmp = a;
a = b;
b = tmp;
}
/* we are now a - b */
/* we are now a - b */
if (BN_ucmp(a,b) < 0)
{
if (!BN_usub(r,b,a)) return(0);
r->neg=1;
}
else
{
if (!BN_usub(r,a,b)) return(0);
r->neg=0;
}
return(1);
}
if (BN_ucmp(a, b) < 0) {
if (!BN_usub(r, b, a))
return (0);
r->neg = 1;
} else {
if (!BN_usub(r, a, b))
return (0);
r->neg = 0;
}
return (1);
}
ret = BN_uadd(r,a,b);
r->neg = a_neg;
bn_check_top(r);
return ret;
}
ret = BN_uadd(r, a, b);
r->neg = a_neg;
bn_check_top(r);
return ret;
}
/* unsigned add of b to a */
int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max,min,dif;
BN_ULONG *ap,*bp,*rp,carry,t1,t2;
const BIGNUM *tmp;
{
int max, min, dif;
BN_ULONG *ap, *bp, *rp, carry, t1, t2;
const BIGNUM *tmp;
bn_check_top(a);
bn_check_top(b);
bn_check_top(a);
bn_check_top(b);
if (a->top < b->top)
{ tmp=a; a=b; b=tmp; }
max = a->top;
min = b->top;
dif = max - min;
if (a->top < b->top) {
tmp = a;
a = b;
b = tmp;
}
max = a->top;
min = b->top;
dif = max - min;
if (bn_wexpand(r,max+1) == NULL)
return 0;
if (bn_wexpand(r, max + 1) == NULL)
return 0;
r->top=max;
r->top = max;
ap = a->d;
bp = b->d;
rp = r->d;
ap=a->d;
bp=b->d;
rp=r->d;
carry = bn_add_words(rp, ap, bp, min);
rp += min;
ap += min;
bp += min;
carry=bn_add_words(rp,ap,bp,min);
rp+=min;
ap+=min;
bp+=min;
if (carry)
{
while (dif)
{
dif--;
t1 = *(ap++);
t2 = (t1+1) & BN_MASK2;
*(rp++) = t2;
if (t2)
{
carry=0;
break;
}
}
if (carry)
{
/* carry != 0 => dif == 0 */
*rp = 1;
r->top++;
}
}
if (dif && rp != ap)
while (dif--)
/* copy remaining words if ap != rp */
*(rp++) = *(ap++);
r->neg = 0;
bn_check_top(r);
return 1;
}
if (carry) {
while (dif) {
dif--;
t1 = *(ap++);
t2 = (t1 + 1) & BN_MASK2;
*(rp++) = t2;
if (t2) {
carry = 0;
break;
}
}
if (carry) {
/* carry != 0 => dif == 0 */
*rp = 1;
r->top++;
}
}
if (dif && rp != ap)
while (dif--)
/* copy remaining words if ap != rp */
*(rp++) = *(ap++);
r->neg = 0;
bn_check_top(r);
return 1;
}
/* unsigned subtraction of b from a, a must be larger than b. */
int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max,min,dif;
register BN_ULONG t1,t2,*ap,*bp,*rp;
int i,carry;
{
int max, min, dif;
register BN_ULONG t1, t2, *ap, *bp, *rp;
int i, carry;
#if defined(IRIX_CC_BUG) && !defined(LINT)
int dummy;
int dummy;
#endif
bn_check_top(a);
bn_check_top(b);
bn_check_top(a);
bn_check_top(b);
max = a->top;
min = b->top;
dif = max - min;
max = a->top;
min = b->top;
dif = max - min;
if (dif < 0) /* hmm... should not be happening */
{
BNerr(BN_F_BN_USUB,BN_R_ARG2_LT_ARG3);
return(0);
}
if (dif < 0) { /* hmm... should not be happening */
BNerr(BN_F_BN_USUB, BN_R_ARG2_LT_ARG3);
return (0);
}
if (bn_wexpand(r,max) == NULL) return(0);
if (bn_wexpand(r, max) == NULL)
return (0);
ap=a->d;
bp=b->d;
rp=r->d;
ap = a->d;
bp = b->d;
rp = r->d;
#if 1
carry=0;
for (i = min; i != 0; i--)
{
t1= *(ap++);
t2= *(bp++);
if (carry)
{
carry=(t1 <= t2);
t1=(t1-t2-1)&BN_MASK2;
}
else
{
carry=(t1 < t2);
t1=(t1-t2)&BN_MASK2;
}
#if defined(IRIX_CC_BUG) && !defined(LINT)
dummy=t1;
#endif
*(rp++)=t1&BN_MASK2;
}
carry = 0;
for (i = min; i != 0; i--) {
t1 = *(ap++);
t2 = *(bp++);
if (carry) {
carry = (t1 <= t2);
t1 = (t1 - t2 - 1) & BN_MASK2;
} else {
carry = (t1 < t2);
t1 = (t1 - t2) & BN_MASK2;
}
# if defined(IRIX_CC_BUG) && !defined(LINT)
dummy = t1;
# endif
*(rp++) = t1 & BN_MASK2;
}
#else
carry=bn_sub_words(rp,ap,bp,min);
ap+=min;
bp+=min;
rp+=min;
carry = bn_sub_words(rp, ap, bp, min);
ap += min;
bp += min;
rp += min;
#endif
if (carry) /* subtracted */
{
if (!dif)
/* error: a < b */
return 0;
while (dif)
{
dif--;
t1 = *(ap++);
t2 = (t1-1)&BN_MASK2;
*(rp++) = t2;
if (t1)
break;
}
}
if (carry) { /* subtracted */
if (!dif)
/* error: a < b */
return 0;
while (dif) {
dif--;
t1 = *(ap++);
t2 = (t1 - 1) & BN_MASK2;
*(rp++) = t2;
if (t1)
break;
}
}
#if 0
memcpy(rp,ap,sizeof(*rp)*(max-i));
memcpy(rp, ap, sizeof(*rp) * (max - i));
#else
if (rp != ap)
{
for (;;)
{
if (!dif--) break;
rp[0]=ap[0];
if (!dif--) break;
rp[1]=ap[1];
if (!dif--) break;
rp[2]=ap[2];
if (!dif--) break;
rp[3]=ap[3];
rp+=4;
ap+=4;
}
}
if (rp != ap) {
for (;;) {
if (!dif--)
break;
rp[0] = ap[0];
if (!dif--)
break;
rp[1] = ap[1];
if (!dif--)
break;
rp[2] = ap[2];
if (!dif--)
break;
rp[3] = ap[3];
rp += 4;
ap += 4;
}
}
#endif
r->top=max;
r->neg=0;
bn_correct_top(r);
return(1);
}
r->top = max;
r->neg = 0;
bn_correct_top(r);
return (1);
}
int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b)
{
int max;
int add=0,neg=0;
const BIGNUM *tmp;
{
int max;
int add = 0, neg = 0;
const BIGNUM *tmp;
bn_check_top(a);
bn_check_top(b);
bn_check_top(a);
bn_check_top(b);
/* a - b a-b
* a - -b a+b
* -a - b -(a+b)
* -a - -b b-a
*/
if (a->neg)
{
if (b->neg)
{ tmp=a; a=b; b=tmp; }
else
{ add=1; neg=1; }
}
else
{
if (b->neg) { add=1; neg=0; }
}
/*-
* a - b a-b
* a - -b a+b
* -a - b -(a+b)
* -a - -b b-a
*/
if (a->neg) {
if (b->neg) {
tmp = a;
a = b;
b = tmp;
} else {
add = 1;
neg = 1;
}
} else {
if (b->neg) {
add = 1;
neg = 0;
}
}
if (add)
{
if (!BN_uadd(r,a,b)) return(0);
r->neg=neg;
return(1);
}
if (add) {
if (!BN_uadd(r, a, b))
return (0);
r->neg = neg;
return (1);
}
/* We are actually doing a - b :-) */
max=(a->top > b->top)?a->top:b->top;
if (bn_wexpand(r,max) == NULL) return(0);
if (BN_ucmp(a,b) < 0)
{
if (!BN_usub(r,b,a)) return(0);
r->neg=1;
}
else
{
if (!BN_usub(r,a,b)) return(0);
r->neg=0;
}
bn_check_top(r);
return(1);
}
/* We are actually doing a - b :-) */
max = (a->top > b->top) ? a->top : b->top;
if (bn_wexpand(r, max) == NULL)
return (0);
if (BN_ucmp(a, b) < 0) {
if (!BN_usub(r, b, a))
return (0);
r->neg = 1;
} else {
if (!BN_usub(r, a, b))
return (0);
r->neg = 0;
}
bn_check_top(r);
return (1);
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -58,21 +58,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -87,10 +87,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -102,7 +102,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -113,264 +113,273 @@
#include "cryptlib.h"
#include "bn_lcl.h"
#define BN_BLINDING_COUNTER 32
#define BN_BLINDING_COUNTER 32
struct bn_blinding_st
{
BIGNUM *A;
BIGNUM *Ai;
BIGNUM *e;
BIGNUM *mod; /* just a reference */
struct bn_blinding_st {
BIGNUM *A;
BIGNUM *Ai;
BIGNUM *e;
BIGNUM *mod; /* just a reference */
#ifndef OPENSSL_NO_DEPRECATED
unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b;
* used only by crypto/rsa/rsa_eay.c, rsa_lib.c */
unsigned long thread_id; /* added in OpenSSL 0.9.6j and 0.9.7b; used
* only by crypto/rsa/rsa_eay.c, rsa_lib.c */
#endif
CRYPTO_THREADID tid;
unsigned int counter;
unsigned long flags;
BN_MONT_CTX *m_ctx;
int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx,
BN_MONT_CTX *m_ctx);
};
CRYPTO_THREADID tid;
int counter;
unsigned long flags;
BN_MONT_CTX *m_ctx;
int (*bn_mod_exp) (BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx);
};
BN_BLINDING *BN_BLINDING_new(const BIGNUM *A, const BIGNUM *Ai, BIGNUM *mod)
{
BN_BLINDING *ret=NULL;
{
BN_BLINDING *ret = NULL;
bn_check_top(mod);
bn_check_top(mod);
if ((ret=(BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL)
{
BNerr(BN_F_BN_BLINDING_NEW,ERR_R_MALLOC_FAILURE);
return(NULL);
}
memset(ret,0,sizeof(BN_BLINDING));
if (A != NULL)
{
if ((ret->A = BN_dup(A)) == NULL) goto err;
}
if (Ai != NULL)
{
if ((ret->Ai = BN_dup(Ai)) == NULL) goto err;
}
if ((ret = (BN_BLINDING *)OPENSSL_malloc(sizeof(BN_BLINDING))) == NULL) {
BNerr(BN_F_BN_BLINDING_NEW, ERR_R_MALLOC_FAILURE);
return (NULL);
}
memset(ret, 0, sizeof(BN_BLINDING));
if (A != NULL) {
if ((ret->A = BN_dup(A)) == NULL)
goto err;
}
if (Ai != NULL) {
if ((ret->Ai = BN_dup(Ai)) == NULL)
goto err;
}
/* save a copy of mod in the BN_BLINDING structure */
if ((ret->mod = BN_dup(mod)) == NULL) goto err;
if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
/* save a copy of mod in the BN_BLINDING structure */
if ((ret->mod = BN_dup(mod)) == NULL)
goto err;
if (BN_get_flags(mod, BN_FLG_CONSTTIME) != 0)
BN_set_flags(ret->mod, BN_FLG_CONSTTIME);
ret->counter = BN_BLINDING_COUNTER;
CRYPTO_THREADID_current(&ret->tid);
return(ret);
err:
if (ret != NULL) BN_BLINDING_free(ret);
return(NULL);
}
/*
* Set the counter to the special value -1 to indicate that this is
* never-used fresh blinding that does not need updating before first
* use.
*/
ret->counter = -1;
CRYPTO_THREADID_current(&ret->tid);
return (ret);
err:
if (ret != NULL)
BN_BLINDING_free(ret);
return (NULL);
}
void BN_BLINDING_free(BN_BLINDING *r)
{
if(r == NULL)
return;
{
if (r == NULL)
return;
if (r->A != NULL) BN_free(r->A );
if (r->Ai != NULL) BN_free(r->Ai);
if (r->e != NULL) BN_free(r->e );
if (r->mod != NULL) BN_free(r->mod);
OPENSSL_free(r);
}
if (r->A != NULL)
BN_free(r->A);
if (r->Ai != NULL)
BN_free(r->Ai);
if (r->e != NULL)
BN_free(r->e);
if (r->mod != NULL)
BN_free(r->mod);
OPENSSL_free(r);
}
int BN_BLINDING_update(BN_BLINDING *b, BN_CTX *ctx)
{
int ret=0;
{
int ret = 0;
if ((b->A == NULL) || (b->Ai == NULL))
{
BNerr(BN_F_BN_BLINDING_UPDATE,BN_R_NOT_INITIALIZED);
goto err;
}
if ((b->A == NULL) || (b->Ai == NULL)) {
BNerr(BN_F_BN_BLINDING_UPDATE, BN_R_NOT_INITIALIZED);
goto err;
}
if (--(b->counter) == 0 && b->e != NULL &&
!(b->flags & BN_BLINDING_NO_RECREATE))
{
/* re-create blinding parameters */
if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
goto err;
}
else if (!(b->flags & BN_BLINDING_NO_UPDATE))
{
if (!BN_mod_mul(b->A,b->A,b->A,b->mod,ctx)) goto err;
if (!BN_mod_mul(b->Ai,b->Ai,b->Ai,b->mod,ctx)) goto err;
}
if (b->counter == -1)
b->counter = 0;
ret=1;
err:
if (b->counter == 0)
b->counter = BN_BLINDING_COUNTER;
return(ret);
}
if (++b->counter == BN_BLINDING_COUNTER && b->e != NULL &&
!(b->flags & BN_BLINDING_NO_RECREATE)) {
/* re-create blinding parameters */
if (!BN_BLINDING_create_param(b, NULL, NULL, ctx, NULL, NULL))
goto err;
} else if (!(b->flags & BN_BLINDING_NO_UPDATE)) {
if (!BN_mod_mul(b->A, b->A, b->A, b->mod, ctx))
goto err;
if (!BN_mod_mul(b->Ai, b->Ai, b->Ai, b->mod, ctx))
goto err;
}
ret = 1;
err:
if (b->counter == BN_BLINDING_COUNTER)
b->counter = 0;
return (ret);
}
int BN_BLINDING_convert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
{
return BN_BLINDING_convert_ex(n, NULL, b, ctx);
}
{
return BN_BLINDING_convert_ex(n, NULL, b, ctx);
}
int BN_BLINDING_convert_ex(BIGNUM *n, BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
{
int ret = 1;
{
int ret = 1;
bn_check_top(n);
bn_check_top(n);
if ((b->A == NULL) || (b->Ai == NULL))
{
BNerr(BN_F_BN_BLINDING_CONVERT_EX,BN_R_NOT_INITIALIZED);
return(0);
}
if ((b->A == NULL) || (b->Ai == NULL)) {
BNerr(BN_F_BN_BLINDING_CONVERT_EX, BN_R_NOT_INITIALIZED);
return (0);
}
if (r != NULL)
{
if (!BN_copy(r, b->Ai)) ret=0;
}
if (b->counter == -1)
/* Fresh blinding, doesn't need updating. */
b->counter = 0;
else if (!BN_BLINDING_update(b, ctx))
return (0);
if (!BN_mod_mul(n,n,b->A,b->mod,ctx)) ret=0;
return ret;
}
if (r != NULL) {
if (!BN_copy(r, b->Ai))
ret = 0;
}
if (!BN_mod_mul(n, n, b->A, b->mod, ctx))
ret = 0;
return ret;
}
int BN_BLINDING_invert(BIGNUM *n, BN_BLINDING *b, BN_CTX *ctx)
{
return BN_BLINDING_invert_ex(n, NULL, b, ctx);
}
{
return BN_BLINDING_invert_ex(n, NULL, b, ctx);
}
int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b, BN_CTX *ctx)
{
int ret;
int BN_BLINDING_invert_ex(BIGNUM *n, const BIGNUM *r, BN_BLINDING *b,
BN_CTX *ctx)
{
int ret;
bn_check_top(n);
if ((b->A == NULL) || (b->Ai == NULL))
{
BNerr(BN_F_BN_BLINDING_INVERT_EX,BN_R_NOT_INITIALIZED);
return(0);
}
bn_check_top(n);
if (r != NULL)
ret = BN_mod_mul(n, n, r, b->mod, ctx);
else
ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx);
if (r != NULL)
ret = BN_mod_mul(n, n, r, b->mod, ctx);
else {
if (b->Ai == NULL) {
BNerr(BN_F_BN_BLINDING_INVERT_EX, BN_R_NOT_INITIALIZED);
return (0);
}
ret = BN_mod_mul(n, n, b->Ai, b->mod, ctx);
}
if (ret >= 0)
{
if (!BN_BLINDING_update(b,ctx))
return(0);
}
bn_check_top(n);
return(ret);
}
bn_check_top(n);
return (ret);
}
#ifndef OPENSSL_NO_DEPRECATED
unsigned long BN_BLINDING_get_thread_id(const BN_BLINDING *b)
{
return b->thread_id;
}
{
return b->thread_id;
}
void BN_BLINDING_set_thread_id(BN_BLINDING *b, unsigned long n)
{
b->thread_id = n;
}
{
b->thread_id = n;
}
#endif
CRYPTO_THREADID *BN_BLINDING_thread_id(BN_BLINDING *b)
{
return &b->tid;
}
{
return &b->tid;
}
unsigned long BN_BLINDING_get_flags(const BN_BLINDING *b)
{
return b->flags;
}
{
return b->flags;
}
void BN_BLINDING_set_flags(BN_BLINDING *b, unsigned long flags)
{
b->flags = flags;
}
{
b->flags = flags;
}
BN_BLINDING *BN_BLINDING_create_param(BN_BLINDING *b,
const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
int (*bn_mod_exp)(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *m_ctx),
BN_MONT_CTX *m_ctx)
const BIGNUM *e, BIGNUM *m, BN_CTX *ctx,
int (*bn_mod_exp) (BIGNUM *r,
const BIGNUM *a,
const BIGNUM *p,
const BIGNUM *m,
BN_CTX *ctx,
BN_MONT_CTX *m_ctx),
BN_MONT_CTX *m_ctx)
{
int retry_counter = 32;
BN_BLINDING *ret = NULL;
int retry_counter = 32;
BN_BLINDING *ret = NULL;
if (b == NULL)
ret = BN_BLINDING_new(NULL, NULL, m);
else
ret = b;
if (b == NULL)
ret = BN_BLINDING_new(NULL, NULL, m);
else
ret = b;
if (ret == NULL)
goto err;
if (ret == NULL)
goto err;
if (ret->A == NULL && (ret->A = BN_new()) == NULL)
goto err;
if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL)
goto err;
if (ret->A == NULL && (ret->A = BN_new()) == NULL)
goto err;
if (ret->Ai == NULL && (ret->Ai = BN_new()) == NULL)
goto err;
if (e != NULL)
{
if (ret->e != NULL)
BN_free(ret->e);
ret->e = BN_dup(e);
}
if (ret->e == NULL)
goto err;
if (e != NULL) {
if (ret->e != NULL)
BN_free(ret->e);
ret->e = BN_dup(e);
}
if (ret->e == NULL)
goto err;
if (bn_mod_exp != NULL)
ret->bn_mod_exp = bn_mod_exp;
if (m_ctx != NULL)
ret->m_ctx = m_ctx;
if (bn_mod_exp != NULL)
ret->bn_mod_exp = bn_mod_exp;
if (m_ctx != NULL)
ret->m_ctx = m_ctx;
do {
if (!BN_rand_range(ret->A, ret->mod)) goto err;
if (BN_mod_inverse(ret->Ai, ret->A, ret->mod, ctx) == NULL)
{
/* this should almost never happen for good RSA keys */
unsigned long error = ERR_peek_last_error();
if (ERR_GET_REASON(error) == BN_R_NO_INVERSE)
{
if (retry_counter-- == 0)
{
BNerr(BN_F_BN_BLINDING_CREATE_PARAM,
BN_R_TOO_MANY_ITERATIONS);
goto err;
}
ERR_clear_error();
}
else
goto err;
}
else
break;
} while (1);
do {
if (!BN_rand_range(ret->A, ret->mod))
goto err;
if (BN_mod_inverse(ret->Ai, ret->A, ret->mod, ctx) == NULL) {
/*
* this should almost never happen for good RSA keys
*/
unsigned long error = ERR_peek_last_error();
if (ERR_GET_REASON(error) == BN_R_NO_INVERSE) {
if (retry_counter-- == 0) {
BNerr(BN_F_BN_BLINDING_CREATE_PARAM,
BN_R_TOO_MANY_ITERATIONS);
goto err;
}
ERR_clear_error();
} else
goto err;
} else
break;
} while (1);
if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL)
{
if (!ret->bn_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
goto err;
}
else
{
if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
goto err;
}
if (ret->bn_mod_exp != NULL && ret->m_ctx != NULL) {
if (!ret->bn_mod_exp
(ret->A, ret->A, ret->e, ret->mod, ctx, ret->m_ctx))
goto err;
} else {
if (!BN_mod_exp(ret->A, ret->A, ret->e, ret->mod, ctx))
goto err;
}
return ret;
err:
if (b == NULL && ret != NULL)
{
BN_BLINDING_free(ret);
ret = NULL;
}
return ret;
err:
if (b == NULL && ret != NULL) {
BN_BLINDING_free(ret);
ret = NULL;
}
return ret;
return ret;
}

Binary file not shown.

View File

@@ -3,7 +3,8 @@
#include "bn.h"
/* "First Oakley Default Group" from RFC2409, section 6.1.
/*-
* "First Oakley Default Group" from RFC2409, section 6.1.
*
* The prime is: 2^768 - 2 ^704 - 1 + 2^64 * { [2^638 pi] + 149686 }
*
@@ -12,21 +13,26 @@
*/
BIGNUM *get_rfc2409_prime_768(BIGNUM *bn)
{
static const unsigned char RFC2409_PRIME_768[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x3A,0x36,0x20,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC2409_PRIME_768,sizeof(RFC2409_PRIME_768),bn);
}
{
static const unsigned char RFC2409_PRIME_768[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x3A, 0x36, 0x20,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC2409_PRIME_768, sizeof(RFC2409_PRIME_768), bn);
}
/* "Second Oakley Default Group" from RFC2409, section 6.2.
/*-
* "Second Oakley Default Group" from RFC2409, section 6.2.
*
* The prime is: 2^1024 - 2^960 - 1 + 2^64 * { [2^894 pi] + 129093 }.
*
@@ -35,24 +41,30 @@ BIGNUM *get_rfc2409_prime_768(BIGNUM *bn)
*/
BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn)
{
static const unsigned char RFC2409_PRIME_1024[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE6,0x53,0x81,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC2409_PRIME_1024,sizeof(RFC2409_PRIME_1024),bn);
}
{
static const unsigned char RFC2409_PRIME_1024[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE6, 0x53, 0x81,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC2409_PRIME_1024, sizeof(RFC2409_PRIME_1024), bn);
}
/* "1536-bit MODP Group" from RFC3526, Section 2.
/*-
* "1536-bit MODP Group" from RFC3526, Section 2.
*
* The prime is: 2^1536 - 2^1472 - 1 + 2^64 * { [2^1406 pi] + 741804 }
*
@@ -61,29 +73,38 @@ BIGNUM *get_rfc2409_prime_1024(BIGNUM *bn)
*/
BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn)
{
static const unsigned char RFC3526_PRIME_1536[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
0xCA,0x23,0x73,0x27,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC3526_PRIME_1536,sizeof(RFC3526_PRIME_1536),bn);
}
{
static const unsigned char RFC3526_PRIME_1536[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x23, 0x73, 0x27,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC3526_PRIME_1536, sizeof(RFC3526_PRIME_1536), bn);
}
/* "2048-bit MODP Group" from RFC3526, Section 3.
/*-
* "2048-bit MODP Group" from RFC3526, Section 3.
*
* The prime is: 2^2048 - 2^1984 - 1 + 2^64 * { [2^1918 pi] + 124476 }
*
@@ -91,35 +112,46 @@ BIGNUM *get_rfc3526_prime_1536(BIGNUM *bn)
*/
BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn)
{
static const unsigned char RFC3526_PRIME_2048[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
0x15,0x72,0x8E,0x5A,0x8A,0xAC,0xAA,0x68,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC3526_PRIME_2048,sizeof(RFC3526_PRIME_2048),bn);
}
{
static const unsigned char RFC3526_PRIME_2048[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAC, 0xAA, 0x68,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC3526_PRIME_2048, sizeof(RFC3526_PRIME_2048), bn);
}
/* "3072-bit MODP Group" from RFC3526, Section 4.
/*-
* "3072-bit MODP Group" from RFC3526, Section 4.
*
* The prime is: 2^3072 - 2^3008 - 1 + 2^64 * { [2^2942 pi] + 1690314 }
*
@@ -127,45 +159,62 @@ BIGNUM *get_rfc3526_prime_2048(BIGNUM *bn)
*/
BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn)
{
static const unsigned char RFC3526_PRIME_3072[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
0xA9,0x3A,0xD2,0xCA,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC3526_PRIME_3072,sizeof(RFC3526_PRIME_3072),bn);
}
{
static const unsigned char RFC3526_PRIME_3072[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x3A, 0xD2, 0xCA,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC3526_PRIME_3072, sizeof(RFC3526_PRIME_3072), bn);
}
/* "4096-bit MODP Group" from RFC3526, Section 5.
/*-
* "4096-bit MODP Group" from RFC3526, Section 5.
*
* The prime is: 2^4096 - 2^4032 - 1 + 2^64 * { [2^3966 pi] + 240904 }
*
@@ -173,56 +222,78 @@ BIGNUM *get_rfc3526_prime_3072(BIGNUM *bn)
*/
BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn)
{
static const unsigned char RFC3526_PRIME_4096[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x06,0x31,0x99,
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC3526_PRIME_4096,sizeof(RFC3526_PRIME_4096),bn);
}
{
static const unsigned char RFC3526_PRIME_4096[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x06, 0x31, 0x99,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC3526_PRIME_4096, sizeof(RFC3526_PRIME_4096), bn);
}
/* "6144-bit MODP Group" from RFC3526, Section 6.
/*-
* "6144-bit MODP Group" from RFC3526, Section 6.
*
* The prime is: 2^6144 - 2^6080 - 1 + 2^64 * { [2^6014 pi] + 929484 }
*
@@ -230,77 +301,110 @@ BIGNUM *get_rfc3526_prime_4096(BIGNUM *bn)
*/
BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn)
{
static const unsigned char RFC3526_PRIME_6144[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92,
0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2,
0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD,
0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F,
0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31,
0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB,
0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B,
0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51,
0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF,
0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15,
0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6,
0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31,
0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3,
0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7,
0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA,
0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2,
0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28,
0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D,
0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C,
0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7,
0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE,
0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E,
0x6D,0xCC,0x40,0x24,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC3526_PRIME_6144,sizeof(RFC3526_PRIME_6144),bn);
}
{
static const unsigned char RFC3526_PRIME_6144[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26,
0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE,
0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E,
0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE,
0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18,
0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED,
0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B,
0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42,
0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC,
0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03,
0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82,
0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E,
0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE,
0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5,
0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8,
0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0,
0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76,
0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0,
0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32,
0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68,
0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6,
0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xCC, 0x40, 0x24,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC3526_PRIME_6144, sizeof(RFC3526_PRIME_6144), bn);
}
/* "8192-bit MODP Group" from RFC3526, Section 7.
/*-
* "8192-bit MODP Group" from RFC3526, Section 7.
*
* The prime is: 2^8192 - 2^8128 - 1 + 2^64 * { [2^8062 pi] + 4743158 }
*
@@ -308,95 +412,136 @@ BIGNUM *get_rfc3526_prime_6144(BIGNUM *bn)
*/
BIGNUM *get_rfc3526_prime_8192(BIGNUM *bn)
{
static const unsigned char RFC3526_PRIME_8192[]={
0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xC9,0x0F,0xDA,0xA2,
0x21,0x68,0xC2,0x34,0xC4,0xC6,0x62,0x8B,0x80,0xDC,0x1C,0xD1,
0x29,0x02,0x4E,0x08,0x8A,0x67,0xCC,0x74,0x02,0x0B,0xBE,0xA6,
0x3B,0x13,0x9B,0x22,0x51,0x4A,0x08,0x79,0x8E,0x34,0x04,0xDD,
0xEF,0x95,0x19,0xB3,0xCD,0x3A,0x43,0x1B,0x30,0x2B,0x0A,0x6D,
0xF2,0x5F,0x14,0x37,0x4F,0xE1,0x35,0x6D,0x6D,0x51,0xC2,0x45,
0xE4,0x85,0xB5,0x76,0x62,0x5E,0x7E,0xC6,0xF4,0x4C,0x42,0xE9,
0xA6,0x37,0xED,0x6B,0x0B,0xFF,0x5C,0xB6,0xF4,0x06,0xB7,0xED,
0xEE,0x38,0x6B,0xFB,0x5A,0x89,0x9F,0xA5,0xAE,0x9F,0x24,0x11,
0x7C,0x4B,0x1F,0xE6,0x49,0x28,0x66,0x51,0xEC,0xE4,0x5B,0x3D,
0xC2,0x00,0x7C,0xB8,0xA1,0x63,0xBF,0x05,0x98,0xDA,0x48,0x36,
0x1C,0x55,0xD3,0x9A,0x69,0x16,0x3F,0xA8,0xFD,0x24,0xCF,0x5F,
0x83,0x65,0x5D,0x23,0xDC,0xA3,0xAD,0x96,0x1C,0x62,0xF3,0x56,
0x20,0x85,0x52,0xBB,0x9E,0xD5,0x29,0x07,0x70,0x96,0x96,0x6D,
0x67,0x0C,0x35,0x4E,0x4A,0xBC,0x98,0x04,0xF1,0x74,0x6C,0x08,
0xCA,0x18,0x21,0x7C,0x32,0x90,0x5E,0x46,0x2E,0x36,0xCE,0x3B,
0xE3,0x9E,0x77,0x2C,0x18,0x0E,0x86,0x03,0x9B,0x27,0x83,0xA2,
0xEC,0x07,0xA2,0x8F,0xB5,0xC5,0x5D,0xF0,0x6F,0x4C,0x52,0xC9,
0xDE,0x2B,0xCB,0xF6,0x95,0x58,0x17,0x18,0x39,0x95,0x49,0x7C,
0xEA,0x95,0x6A,0xE5,0x15,0xD2,0x26,0x18,0x98,0xFA,0x05,0x10,
0x15,0x72,0x8E,0x5A,0x8A,0xAA,0xC4,0x2D,0xAD,0x33,0x17,0x0D,
0x04,0x50,0x7A,0x33,0xA8,0x55,0x21,0xAB,0xDF,0x1C,0xBA,0x64,
0xEC,0xFB,0x85,0x04,0x58,0xDB,0xEF,0x0A,0x8A,0xEA,0x71,0x57,
0x5D,0x06,0x0C,0x7D,0xB3,0x97,0x0F,0x85,0xA6,0xE1,0xE4,0xC7,
0xAB,0xF5,0xAE,0x8C,0xDB,0x09,0x33,0xD7,0x1E,0x8C,0x94,0xE0,
0x4A,0x25,0x61,0x9D,0xCE,0xE3,0xD2,0x26,0x1A,0xD2,0xEE,0x6B,
0xF1,0x2F,0xFA,0x06,0xD9,0x8A,0x08,0x64,0xD8,0x76,0x02,0x73,
0x3E,0xC8,0x6A,0x64,0x52,0x1F,0x2B,0x18,0x17,0x7B,0x20,0x0C,
0xBB,0xE1,0x17,0x57,0x7A,0x61,0x5D,0x6C,0x77,0x09,0x88,0xC0,
0xBA,0xD9,0x46,0xE2,0x08,0xE2,0x4F,0xA0,0x74,0xE5,0xAB,0x31,
0x43,0xDB,0x5B,0xFC,0xE0,0xFD,0x10,0x8E,0x4B,0x82,0xD1,0x20,
0xA9,0x21,0x08,0x01,0x1A,0x72,0x3C,0x12,0xA7,0x87,0xE6,0xD7,
0x88,0x71,0x9A,0x10,0xBD,0xBA,0x5B,0x26,0x99,0xC3,0x27,0x18,
0x6A,0xF4,0xE2,0x3C,0x1A,0x94,0x68,0x34,0xB6,0x15,0x0B,0xDA,
0x25,0x83,0xE9,0xCA,0x2A,0xD4,0x4C,0xE8,0xDB,0xBB,0xC2,0xDB,
0x04,0xDE,0x8E,0xF9,0x2E,0x8E,0xFC,0x14,0x1F,0xBE,0xCA,0xA6,
0x28,0x7C,0x59,0x47,0x4E,0x6B,0xC0,0x5D,0x99,0xB2,0x96,0x4F,
0xA0,0x90,0xC3,0xA2,0x23,0x3B,0xA1,0x86,0x51,0x5B,0xE7,0xED,
0x1F,0x61,0x29,0x70,0xCE,0xE2,0xD7,0xAF,0xB8,0x1B,0xDD,0x76,
0x21,0x70,0x48,0x1C,0xD0,0x06,0x91,0x27,0xD5,0xB0,0x5A,0xA9,
0x93,0xB4,0xEA,0x98,0x8D,0x8F,0xDD,0xC1,0x86,0xFF,0xB7,0xDC,
0x90,0xA6,0xC0,0x8F,0x4D,0xF4,0x35,0xC9,0x34,0x02,0x84,0x92,
0x36,0xC3,0xFA,0xB4,0xD2,0x7C,0x70,0x26,0xC1,0xD4,0xDC,0xB2,
0x60,0x26,0x46,0xDE,0xC9,0x75,0x1E,0x76,0x3D,0xBA,0x37,0xBD,
0xF8,0xFF,0x94,0x06,0xAD,0x9E,0x53,0x0E,0xE5,0xDB,0x38,0x2F,
0x41,0x30,0x01,0xAE,0xB0,0x6A,0x53,0xED,0x90,0x27,0xD8,0x31,
0x17,0x97,0x27,0xB0,0x86,0x5A,0x89,0x18,0xDA,0x3E,0xDB,0xEB,
0xCF,0x9B,0x14,0xED,0x44,0xCE,0x6C,0xBA,0xCE,0xD4,0xBB,0x1B,
0xDB,0x7F,0x14,0x47,0xE6,0xCC,0x25,0x4B,0x33,0x20,0x51,0x51,
0x2B,0xD7,0xAF,0x42,0x6F,0xB8,0xF4,0x01,0x37,0x8C,0xD2,0xBF,
0x59,0x83,0xCA,0x01,0xC6,0x4B,0x92,0xEC,0xF0,0x32,0xEA,0x15,
0xD1,0x72,0x1D,0x03,0xF4,0x82,0xD7,0xCE,0x6E,0x74,0xFE,0xF6,
0xD5,0x5E,0x70,0x2F,0x46,0x98,0x0C,0x82,0xB5,0xA8,0x40,0x31,
0x90,0x0B,0x1C,0x9E,0x59,0xE7,0xC9,0x7F,0xBE,0xC7,0xE8,0xF3,
0x23,0xA9,0x7A,0x7E,0x36,0xCC,0x88,0xBE,0x0F,0x1D,0x45,0xB7,
0xFF,0x58,0x5A,0xC5,0x4B,0xD4,0x07,0xB2,0x2B,0x41,0x54,0xAA,
0xCC,0x8F,0x6D,0x7E,0xBF,0x48,0xE1,0xD8,0x14,0xCC,0x5E,0xD2,
0x0F,0x80,0x37,0xE0,0xA7,0x97,0x15,0xEE,0xF2,0x9B,0xE3,0x28,
0x06,0xA1,0xD5,0x8B,0xB7,0xC5,0xDA,0x76,0xF5,0x50,0xAA,0x3D,
0x8A,0x1F,0xBF,0xF0,0xEB,0x19,0xCC,0xB1,0xA3,0x13,0xD5,0x5C,
0xDA,0x56,0xC9,0xEC,0x2E,0xF2,0x96,0x32,0x38,0x7F,0xE8,0xD7,
0x6E,0x3C,0x04,0x68,0x04,0x3E,0x8F,0x66,0x3F,0x48,0x60,0xEE,
0x12,0xBF,0x2D,0x5B,0x0B,0x74,0x74,0xD6,0xE6,0x94,0xF9,0x1E,
0x6D,0xBE,0x11,0x59,0x74,0xA3,0x92,0x6F,0x12,0xFE,0xE5,0xE4,
0x38,0x77,0x7C,0xB6,0xA9,0x32,0xDF,0x8C,0xD8,0xBE,0xC4,0xD0,
0x73,0xB9,0x31,0xBA,0x3B,0xC8,0x32,0xB6,0x8D,0x9D,0xD3,0x00,
0x74,0x1F,0xA7,0xBF,0x8A,0xFC,0x47,0xED,0x25,0x76,0xF6,0x93,
0x6B,0xA4,0x24,0x66,0x3A,0xAB,0x63,0x9C,0x5A,0xE4,0xF5,0x68,
0x34,0x23,0xB4,0x74,0x2B,0xF1,0xC9,0x78,0x23,0x8F,0x16,0xCB,
0xE3,0x9D,0x65,0x2D,0xE3,0xFD,0xB8,0xBE,0xFC,0x84,0x8A,0xD9,
0x22,0x22,0x2E,0x04,0xA4,0x03,0x7C,0x07,0x13,0xEB,0x57,0xA8,
0x1A,0x23,0xF0,0xC7,0x34,0x73,0xFC,0x64,0x6C,0xEA,0x30,0x6B,
0x4B,0xCB,0xC8,0x86,0x2F,0x83,0x85,0xDD,0xFA,0x9D,0x4B,0x7F,
0xA2,0xC0,0x87,0xE8,0x79,0x68,0x33,0x03,0xED,0x5B,0xDD,0x3A,
0x06,0x2B,0x3C,0xF5,0xB3,0xA2,0x78,0xA6,0x6D,0x2A,0x13,0xF8,
0x3F,0x44,0xF8,0x2D,0xDF,0x31,0x0E,0xE0,0x74,0xAB,0x6A,0x36,
0x45,0x97,0xE8,0x99,0xA0,0x25,0x5D,0xC1,0x64,0xF3,0x1C,0xC5,
0x08,0x46,0x85,0x1D,0xF9,0xAB,0x48,0x19,0x5D,0xED,0x7E,0xA1,
0xB1,0xD5,0x10,0xBD,0x7E,0xE7,0x4D,0x73,0xFA,0xF3,0x6B,0xC3,
0x1E,0xCF,0xA2,0x68,0x35,0x90,0x46,0xF4,0xEB,0x87,0x9F,0x92,
0x40,0x09,0x43,0x8B,0x48,0x1C,0x6C,0xD7,0x88,0x9A,0x00,0x2E,
0xD5,0xEE,0x38,0x2B,0xC9,0x19,0x0D,0xA6,0xFC,0x02,0x6E,0x47,
0x95,0x58,0xE4,0x47,0x56,0x77,0xE9,0xAA,0x9E,0x30,0x50,0xE2,
0x76,0x56,0x94,0xDF,0xC8,0x1F,0x56,0xE8,0x80,0xB9,0x6E,0x71,
0x60,0xC9,0x80,0xDD,0x98,0xED,0xD3,0xDF,0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF,
};
return BN_bin2bn(RFC3526_PRIME_8192,sizeof(RFC3526_PRIME_8192),bn);
}
{
static const unsigned char RFC3526_PRIME_8192[] = {
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xC9, 0x0F, 0xDA, 0xA2, 0x21, 0x68, 0xC2, 0x34,
0xC4, 0xC6, 0x62, 0x8B, 0x80, 0xDC, 0x1C, 0xD1,
0x29, 0x02, 0x4E, 0x08, 0x8A, 0x67, 0xCC, 0x74,
0x02, 0x0B, 0xBE, 0xA6, 0x3B, 0x13, 0x9B, 0x22,
0x51, 0x4A, 0x08, 0x79, 0x8E, 0x34, 0x04, 0xDD,
0xEF, 0x95, 0x19, 0xB3, 0xCD, 0x3A, 0x43, 0x1B,
0x30, 0x2B, 0x0A, 0x6D, 0xF2, 0x5F, 0x14, 0x37,
0x4F, 0xE1, 0x35, 0x6D, 0x6D, 0x51, 0xC2, 0x45,
0xE4, 0x85, 0xB5, 0x76, 0x62, 0x5E, 0x7E, 0xC6,
0xF4, 0x4C, 0x42, 0xE9, 0xA6, 0x37, 0xED, 0x6B,
0x0B, 0xFF, 0x5C, 0xB6, 0xF4, 0x06, 0xB7, 0xED,
0xEE, 0x38, 0x6B, 0xFB, 0x5A, 0x89, 0x9F, 0xA5,
0xAE, 0x9F, 0x24, 0x11, 0x7C, 0x4B, 0x1F, 0xE6,
0x49, 0x28, 0x66, 0x51, 0xEC, 0xE4, 0x5B, 0x3D,
0xC2, 0x00, 0x7C, 0xB8, 0xA1, 0x63, 0xBF, 0x05,
0x98, 0xDA, 0x48, 0x36, 0x1C, 0x55, 0xD3, 0x9A,
0x69, 0x16, 0x3F, 0xA8, 0xFD, 0x24, 0xCF, 0x5F,
0x83, 0x65, 0x5D, 0x23, 0xDC, 0xA3, 0xAD, 0x96,
0x1C, 0x62, 0xF3, 0x56, 0x20, 0x85, 0x52, 0xBB,
0x9E, 0xD5, 0x29, 0x07, 0x70, 0x96, 0x96, 0x6D,
0x67, 0x0C, 0x35, 0x4E, 0x4A, 0xBC, 0x98, 0x04,
0xF1, 0x74, 0x6C, 0x08, 0xCA, 0x18, 0x21, 0x7C,
0x32, 0x90, 0x5E, 0x46, 0x2E, 0x36, 0xCE, 0x3B,
0xE3, 0x9E, 0x77, 0x2C, 0x18, 0x0E, 0x86, 0x03,
0x9B, 0x27, 0x83, 0xA2, 0xEC, 0x07, 0xA2, 0x8F,
0xB5, 0xC5, 0x5D, 0xF0, 0x6F, 0x4C, 0x52, 0xC9,
0xDE, 0x2B, 0xCB, 0xF6, 0x95, 0x58, 0x17, 0x18,
0x39, 0x95, 0x49, 0x7C, 0xEA, 0x95, 0x6A, 0xE5,
0x15, 0xD2, 0x26, 0x18, 0x98, 0xFA, 0x05, 0x10,
0x15, 0x72, 0x8E, 0x5A, 0x8A, 0xAA, 0xC4, 0x2D,
0xAD, 0x33, 0x17, 0x0D, 0x04, 0x50, 0x7A, 0x33,
0xA8, 0x55, 0x21, 0xAB, 0xDF, 0x1C, 0xBA, 0x64,
0xEC, 0xFB, 0x85, 0x04, 0x58, 0xDB, 0xEF, 0x0A,
0x8A, 0xEA, 0x71, 0x57, 0x5D, 0x06, 0x0C, 0x7D,
0xB3, 0x97, 0x0F, 0x85, 0xA6, 0xE1, 0xE4, 0xC7,
0xAB, 0xF5, 0xAE, 0x8C, 0xDB, 0x09, 0x33, 0xD7,
0x1E, 0x8C, 0x94, 0xE0, 0x4A, 0x25, 0x61, 0x9D,
0xCE, 0xE3, 0xD2, 0x26, 0x1A, 0xD2, 0xEE, 0x6B,
0xF1, 0x2F, 0xFA, 0x06, 0xD9, 0x8A, 0x08, 0x64,
0xD8, 0x76, 0x02, 0x73, 0x3E, 0xC8, 0x6A, 0x64,
0x52, 0x1F, 0x2B, 0x18, 0x17, 0x7B, 0x20, 0x0C,
0xBB, 0xE1, 0x17, 0x57, 0x7A, 0x61, 0x5D, 0x6C,
0x77, 0x09, 0x88, 0xC0, 0xBA, 0xD9, 0x46, 0xE2,
0x08, 0xE2, 0x4F, 0xA0, 0x74, 0xE5, 0xAB, 0x31,
0x43, 0xDB, 0x5B, 0xFC, 0xE0, 0xFD, 0x10, 0x8E,
0x4B, 0x82, 0xD1, 0x20, 0xA9, 0x21, 0x08, 0x01,
0x1A, 0x72, 0x3C, 0x12, 0xA7, 0x87, 0xE6, 0xD7,
0x88, 0x71, 0x9A, 0x10, 0xBD, 0xBA, 0x5B, 0x26,
0x99, 0xC3, 0x27, 0x18, 0x6A, 0xF4, 0xE2, 0x3C,
0x1A, 0x94, 0x68, 0x34, 0xB6, 0x15, 0x0B, 0xDA,
0x25, 0x83, 0xE9, 0xCA, 0x2A, 0xD4, 0x4C, 0xE8,
0xDB, 0xBB, 0xC2, 0xDB, 0x04, 0xDE, 0x8E, 0xF9,
0x2E, 0x8E, 0xFC, 0x14, 0x1F, 0xBE, 0xCA, 0xA6,
0x28, 0x7C, 0x59, 0x47, 0x4E, 0x6B, 0xC0, 0x5D,
0x99, 0xB2, 0x96, 0x4F, 0xA0, 0x90, 0xC3, 0xA2,
0x23, 0x3B, 0xA1, 0x86, 0x51, 0x5B, 0xE7, 0xED,
0x1F, 0x61, 0x29, 0x70, 0xCE, 0xE2, 0xD7, 0xAF,
0xB8, 0x1B, 0xDD, 0x76, 0x21, 0x70, 0x48, 0x1C,
0xD0, 0x06, 0x91, 0x27, 0xD5, 0xB0, 0x5A, 0xA9,
0x93, 0xB4, 0xEA, 0x98, 0x8D, 0x8F, 0xDD, 0xC1,
0x86, 0xFF, 0xB7, 0xDC, 0x90, 0xA6, 0xC0, 0x8F,
0x4D, 0xF4, 0x35, 0xC9, 0x34, 0x02, 0x84, 0x92,
0x36, 0xC3, 0xFA, 0xB4, 0xD2, 0x7C, 0x70, 0x26,
0xC1, 0xD4, 0xDC, 0xB2, 0x60, 0x26, 0x46, 0xDE,
0xC9, 0x75, 0x1E, 0x76, 0x3D, 0xBA, 0x37, 0xBD,
0xF8, 0xFF, 0x94, 0x06, 0xAD, 0x9E, 0x53, 0x0E,
0xE5, 0xDB, 0x38, 0x2F, 0x41, 0x30, 0x01, 0xAE,
0xB0, 0x6A, 0x53, 0xED, 0x90, 0x27, 0xD8, 0x31,
0x17, 0x97, 0x27, 0xB0, 0x86, 0x5A, 0x89, 0x18,
0xDA, 0x3E, 0xDB, 0xEB, 0xCF, 0x9B, 0x14, 0xED,
0x44, 0xCE, 0x6C, 0xBA, 0xCE, 0xD4, 0xBB, 0x1B,
0xDB, 0x7F, 0x14, 0x47, 0xE6, 0xCC, 0x25, 0x4B,
0x33, 0x20, 0x51, 0x51, 0x2B, 0xD7, 0xAF, 0x42,
0x6F, 0xB8, 0xF4, 0x01, 0x37, 0x8C, 0xD2, 0xBF,
0x59, 0x83, 0xCA, 0x01, 0xC6, 0x4B, 0x92, 0xEC,
0xF0, 0x32, 0xEA, 0x15, 0xD1, 0x72, 0x1D, 0x03,
0xF4, 0x82, 0xD7, 0xCE, 0x6E, 0x74, 0xFE, 0xF6,
0xD5, 0x5E, 0x70, 0x2F, 0x46, 0x98, 0x0C, 0x82,
0xB5, 0xA8, 0x40, 0x31, 0x90, 0x0B, 0x1C, 0x9E,
0x59, 0xE7, 0xC9, 0x7F, 0xBE, 0xC7, 0xE8, 0xF3,
0x23, 0xA9, 0x7A, 0x7E, 0x36, 0xCC, 0x88, 0xBE,
0x0F, 0x1D, 0x45, 0xB7, 0xFF, 0x58, 0x5A, 0xC5,
0x4B, 0xD4, 0x07, 0xB2, 0x2B, 0x41, 0x54, 0xAA,
0xCC, 0x8F, 0x6D, 0x7E, 0xBF, 0x48, 0xE1, 0xD8,
0x14, 0xCC, 0x5E, 0xD2, 0x0F, 0x80, 0x37, 0xE0,
0xA7, 0x97, 0x15, 0xEE, 0xF2, 0x9B, 0xE3, 0x28,
0x06, 0xA1, 0xD5, 0x8B, 0xB7, 0xC5, 0xDA, 0x76,
0xF5, 0x50, 0xAA, 0x3D, 0x8A, 0x1F, 0xBF, 0xF0,
0xEB, 0x19, 0xCC, 0xB1, 0xA3, 0x13, 0xD5, 0x5C,
0xDA, 0x56, 0xC9, 0xEC, 0x2E, 0xF2, 0x96, 0x32,
0x38, 0x7F, 0xE8, 0xD7, 0x6E, 0x3C, 0x04, 0x68,
0x04, 0x3E, 0x8F, 0x66, 0x3F, 0x48, 0x60, 0xEE,
0x12, 0xBF, 0x2D, 0x5B, 0x0B, 0x74, 0x74, 0xD6,
0xE6, 0x94, 0xF9, 0x1E, 0x6D, 0xBE, 0x11, 0x59,
0x74, 0xA3, 0x92, 0x6F, 0x12, 0xFE, 0xE5, 0xE4,
0x38, 0x77, 0x7C, 0xB6, 0xA9, 0x32, 0xDF, 0x8C,
0xD8, 0xBE, 0xC4, 0xD0, 0x73, 0xB9, 0x31, 0xBA,
0x3B, 0xC8, 0x32, 0xB6, 0x8D, 0x9D, 0xD3, 0x00,
0x74, 0x1F, 0xA7, 0xBF, 0x8A, 0xFC, 0x47, 0xED,
0x25, 0x76, 0xF6, 0x93, 0x6B, 0xA4, 0x24, 0x66,
0x3A, 0xAB, 0x63, 0x9C, 0x5A, 0xE4, 0xF5, 0x68,
0x34, 0x23, 0xB4, 0x74, 0x2B, 0xF1, 0xC9, 0x78,
0x23, 0x8F, 0x16, 0xCB, 0xE3, 0x9D, 0x65, 0x2D,
0xE3, 0xFD, 0xB8, 0xBE, 0xFC, 0x84, 0x8A, 0xD9,
0x22, 0x22, 0x2E, 0x04, 0xA4, 0x03, 0x7C, 0x07,
0x13, 0xEB, 0x57, 0xA8, 0x1A, 0x23, 0xF0, 0xC7,
0x34, 0x73, 0xFC, 0x64, 0x6C, 0xEA, 0x30, 0x6B,
0x4B, 0xCB, 0xC8, 0x86, 0x2F, 0x83, 0x85, 0xDD,
0xFA, 0x9D, 0x4B, 0x7F, 0xA2, 0xC0, 0x87, 0xE8,
0x79, 0x68, 0x33, 0x03, 0xED, 0x5B, 0xDD, 0x3A,
0x06, 0x2B, 0x3C, 0xF5, 0xB3, 0xA2, 0x78, 0xA6,
0x6D, 0x2A, 0x13, 0xF8, 0x3F, 0x44, 0xF8, 0x2D,
0xDF, 0x31, 0x0E, 0xE0, 0x74, 0xAB, 0x6A, 0x36,
0x45, 0x97, 0xE8, 0x99, 0xA0, 0x25, 0x5D, 0xC1,
0x64, 0xF3, 0x1C, 0xC5, 0x08, 0x46, 0x85, 0x1D,
0xF9, 0xAB, 0x48, 0x19, 0x5D, 0xED, 0x7E, 0xA1,
0xB1, 0xD5, 0x10, 0xBD, 0x7E, 0xE7, 0x4D, 0x73,
0xFA, 0xF3, 0x6B, 0xC3, 0x1E, 0xCF, 0xA2, 0x68,
0x35, 0x90, 0x46, 0xF4, 0xEB, 0x87, 0x9F, 0x92,
0x40, 0x09, 0x43, 0x8B, 0x48, 0x1C, 0x6C, 0xD7,
0x88, 0x9A, 0x00, 0x2E, 0xD5, 0xEE, 0x38, 0x2B,
0xC9, 0x19, 0x0D, 0xA6, 0xFC, 0x02, 0x6E, 0x47,
0x95, 0x58, 0xE4, 0x47, 0x56, 0x77, 0xE9, 0xAA,
0x9E, 0x30, 0x50, 0xE2, 0x76, 0x56, 0x94, 0xDF,
0xC8, 0x1F, 0x56, 0xE8, 0x80, 0xB9, 0x6E, 0x71,
0x60, 0xC9, 0x80, 0xDD, 0x98, 0xED, 0xD3, 0xDF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
};
return BN_bin2bn(RFC3526_PRIME_8192, sizeof(RFC3526_PRIME_8192), bn);
}

Binary file not shown.

View File

@@ -8,7 +8,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -55,9 +55,9 @@
*/
#if !defined(BN_CTX_DEBUG) && !defined(BN_DEBUG)
#ifndef NDEBUG
#define NDEBUG
#endif
# ifndef NDEBUG
# define NDEBUG
# endif
#endif
#include <stdio.h>
@@ -66,7 +66,8 @@
#include "cryptlib.h"
#include "bn_lcl.h"
/* TODO list
/*-
* TODO list
*
* 1. Check a bunch of "(words+1)" type hacks in various bignum functions and
* check they can be safely removed.
@@ -79,376 +80,369 @@
*/
/* How many bignums are in each "pool item"; */
#define BN_CTX_POOL_SIZE 16
#define BN_CTX_POOL_SIZE 16
/* The stack frame info is resizing, set a first-time expansion size; */
#define BN_CTX_START_FRAMES 32
#define BN_CTX_START_FRAMES 32
/***********/
/* BN_POOL */
/***********/
/* A bundle of bignums that can be linked with other bundles */
typedef struct bignum_pool_item
{
/* The bignum values */
BIGNUM vals[BN_CTX_POOL_SIZE];
/* Linked-list admin */
struct bignum_pool_item *prev, *next;
} BN_POOL_ITEM;
typedef struct bignum_pool_item {
/* The bignum values */
BIGNUM vals[BN_CTX_POOL_SIZE];
/* Linked-list admin */
struct bignum_pool_item *prev, *next;
} BN_POOL_ITEM;
/* A linked-list of bignums grouped in bundles */
typedef struct bignum_pool
{
/* Linked-list admin */
BN_POOL_ITEM *head, *current, *tail;
/* Stack depth and allocation size */
unsigned used, size;
} BN_POOL;
static void BN_POOL_init(BN_POOL *);
static void BN_POOL_finish(BN_POOL *);
typedef struct bignum_pool {
/* Linked-list admin */
BN_POOL_ITEM *head, *current, *tail;
/* Stack depth and allocation size */
unsigned used, size;
} BN_POOL;
static void BN_POOL_init(BN_POOL *);
static void BN_POOL_finish(BN_POOL *);
#ifndef OPENSSL_NO_DEPRECATED
static void BN_POOL_reset(BN_POOL *);
static void BN_POOL_reset(BN_POOL *);
#endif
static BIGNUM * BN_POOL_get(BN_POOL *);
static void BN_POOL_release(BN_POOL *, unsigned int);
static BIGNUM *BN_POOL_get(BN_POOL *);
static void BN_POOL_release(BN_POOL *, unsigned int);
/************/
/* BN_STACK */
/************/
/* A wrapper to manage the "stack frames" */
typedef struct bignum_ctx_stack
{
/* Array of indexes into the bignum stack */
unsigned int *indexes;
/* Number of stack frames, and the size of the allocated array */
unsigned int depth, size;
} BN_STACK;
static void BN_STACK_init(BN_STACK *);
static void BN_STACK_finish(BN_STACK *);
typedef struct bignum_ctx_stack {
/* Array of indexes into the bignum stack */
unsigned int *indexes;
/* Number of stack frames, and the size of the allocated array */
unsigned int depth, size;
} BN_STACK;
static void BN_STACK_init(BN_STACK *);
static void BN_STACK_finish(BN_STACK *);
#ifndef OPENSSL_NO_DEPRECATED
static void BN_STACK_reset(BN_STACK *);
static void BN_STACK_reset(BN_STACK *);
#endif
static int BN_STACK_push(BN_STACK *, unsigned int);
static unsigned int BN_STACK_pop(BN_STACK *);
static int BN_STACK_push(BN_STACK *, unsigned int);
static unsigned int BN_STACK_pop(BN_STACK *);
/**********/
/* BN_CTX */
/**********/
/* The opaque BN_CTX type */
struct bignum_ctx
{
/* The bignum bundles */
BN_POOL pool;
/* The "stack frames", if you will */
BN_STACK stack;
/* The number of bignums currently assigned */
unsigned int used;
/* Depth of stack overflow */
int err_stack;
/* Block "gets" until an "end" (compatibility behaviour) */
int too_many;
};
struct bignum_ctx {
/* The bignum bundles */
BN_POOL pool;
/* The "stack frames", if you will */
BN_STACK stack;
/* The number of bignums currently assigned */
unsigned int used;
/* Depth of stack overflow */
int err_stack;
/* Block "gets" until an "end" (compatibility behaviour) */
int too_many;
};
/* Enable this to find BN_CTX bugs */
#ifdef BN_CTX_DEBUG
static const char *ctxdbg_cur = NULL;
static void ctxdbg(BN_CTX *ctx)
{
unsigned int bnidx = 0, fpidx = 0;
BN_POOL_ITEM *item = ctx->pool.head;
BN_STACK *stack = &ctx->stack;
fprintf(stderr,"(%08x): ", (unsigned int)ctx);
while(bnidx < ctx->used)
{
fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
if(!(bnidx % BN_CTX_POOL_SIZE))
item = item->next;
}
fprintf(stderr,"\n");
bnidx = 0;
fprintf(stderr," : ");
while(fpidx < stack->depth)
{
while(bnidx++ < stack->indexes[fpidx])
fprintf(stderr," ");
fprintf(stderr,"^^^ ");
bnidx++;
fpidx++;
}
fprintf(stderr,"\n");
}
#define CTXDBG_ENTRY(str, ctx) do { \
ctxdbg_cur = (str); \
fprintf(stderr,"Starting %s\n", ctxdbg_cur); \
ctxdbg(ctx); \
} while(0)
#define CTXDBG_EXIT(ctx) do { \
fprintf(stderr,"Ending %s\n", ctxdbg_cur); \
ctxdbg(ctx); \
} while(0)
#define CTXDBG_RET(ctx,ret)
{
unsigned int bnidx = 0, fpidx = 0;
BN_POOL_ITEM *item = ctx->pool.head;
BN_STACK *stack = &ctx->stack;
fprintf(stderr, "(%16p): ", ctx);
while (bnidx < ctx->used) {
fprintf(stderr, "%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax);
if (!(bnidx % BN_CTX_POOL_SIZE))
item = item->next;
}
fprintf(stderr, "\n");
bnidx = 0;
fprintf(stderr, " : ");
while (fpidx < stack->depth) {
while (bnidx++ < stack->indexes[fpidx])
fprintf(stderr, " ");
fprintf(stderr, "^^^ ");
bnidx++;
fpidx++;
}
fprintf(stderr, "\n");
}
# define CTXDBG_ENTRY(str, ctx) do { \
ctxdbg_cur = (str); \
fprintf(stderr,"Starting %s\n", ctxdbg_cur); \
ctxdbg(ctx); \
} while(0)
# define CTXDBG_EXIT(ctx) do { \
fprintf(stderr,"Ending %s\n", ctxdbg_cur); \
ctxdbg(ctx); \
} while(0)
# define CTXDBG_RET(ctx,ret)
#else
#define CTXDBG_ENTRY(str, ctx)
#define CTXDBG_EXIT(ctx)
#define CTXDBG_RET(ctx,ret)
# define CTXDBG_ENTRY(str, ctx)
# define CTXDBG_EXIT(ctx)
# define CTXDBG_RET(ctx,ret)
#endif
/* This function is an evil legacy and should not be used. This implementation
* is WYSIWYG, though I've done my best. */
/*
* This function is an evil legacy and should not be used. This
* implementation is WYSIWYG, though I've done my best.
*/
#ifndef OPENSSL_NO_DEPRECATED
void BN_CTX_init(BN_CTX *ctx)
{
/* Assume the caller obtained the context via BN_CTX_new() and so is
* trying to reset it for use. Nothing else makes sense, least of all
* binary compatibility from a time when they could declare a static
* variable. */
BN_POOL_reset(&ctx->pool);
BN_STACK_reset(&ctx->stack);
ctx->used = 0;
ctx->err_stack = 0;
ctx->too_many = 0;
}
{
/*
* Assume the caller obtained the context via BN_CTX_new() and so is
* trying to reset it for use. Nothing else makes sense, least of all
* binary compatibility from a time when they could declare a static
* variable.
*/
BN_POOL_reset(&ctx->pool);
BN_STACK_reset(&ctx->stack);
ctx->used = 0;
ctx->err_stack = 0;
ctx->too_many = 0;
}
#endif
BN_CTX *BN_CTX_new(void)
{
BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX));
if(!ret)
{
BNerr(BN_F_BN_CTX_NEW,ERR_R_MALLOC_FAILURE);
return NULL;
}
/* Initialise the structure */
BN_POOL_init(&ret->pool);
BN_STACK_init(&ret->stack);
ret->used = 0;
ret->err_stack = 0;
ret->too_many = 0;
return ret;
}
{
BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX));
if (!ret) {
BNerr(BN_F_BN_CTX_NEW, ERR_R_MALLOC_FAILURE);
return NULL;
}
/* Initialise the structure */
BN_POOL_init(&ret->pool);
BN_STACK_init(&ret->stack);
ret->used = 0;
ret->err_stack = 0;
ret->too_many = 0;
return ret;
}
void BN_CTX_free(BN_CTX *ctx)
{
if (ctx == NULL)
return;
{
if (ctx == NULL)
return;
#ifdef BN_CTX_DEBUG
{
BN_POOL_ITEM *pool = ctx->pool.head;
fprintf(stderr,"BN_CTX_free, stack-size=%d, pool-bignums=%d\n",
ctx->stack.size, ctx->pool.size);
fprintf(stderr,"dmaxs: ");
while(pool) {
unsigned loop = 0;
while(loop < BN_CTX_POOL_SIZE)
fprintf(stderr,"%02x ", pool->vals[loop++].dmax);
pool = pool->next;
}
fprintf(stderr,"\n");
}
{
BN_POOL_ITEM *pool = ctx->pool.head;
fprintf(stderr, "BN_CTX_free, stack-size=%d, pool-bignums=%d\n",
ctx->stack.size, ctx->pool.size);
fprintf(stderr, "dmaxs: ");
while (pool) {
unsigned loop = 0;
while (loop < BN_CTX_POOL_SIZE)
fprintf(stderr, "%02x ", pool->vals[loop++].dmax);
pool = pool->next;
}
fprintf(stderr, "\n");
}
#endif
BN_STACK_finish(&ctx->stack);
BN_POOL_finish(&ctx->pool);
OPENSSL_free(ctx);
}
BN_STACK_finish(&ctx->stack);
BN_POOL_finish(&ctx->pool);
OPENSSL_free(ctx);
}
void BN_CTX_start(BN_CTX *ctx)
{
CTXDBG_ENTRY("BN_CTX_start", ctx);
/* If we're already overflowing ... */
if(ctx->err_stack || ctx->too_many)
ctx->err_stack++;
/* (Try to) get a new frame pointer */
else if(!BN_STACK_push(&ctx->stack, ctx->used))
{
BNerr(BN_F_BN_CTX_START,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
ctx->err_stack++;
}
CTXDBG_EXIT(ctx);
}
{
CTXDBG_ENTRY("BN_CTX_start", ctx);
/* If we're already overflowing ... */
if (ctx->err_stack || ctx->too_many)
ctx->err_stack++;
/* (Try to) get a new frame pointer */
else if (!BN_STACK_push(&ctx->stack, ctx->used)) {
BNerr(BN_F_BN_CTX_START, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
ctx->err_stack++;
}
CTXDBG_EXIT(ctx);
}
void BN_CTX_end(BN_CTX *ctx)
{
CTXDBG_ENTRY("BN_CTX_end", ctx);
if(ctx->err_stack)
ctx->err_stack--;
else
{
unsigned int fp = BN_STACK_pop(&ctx->stack);
/* Does this stack frame have anything to release? */
if(fp < ctx->used)
BN_POOL_release(&ctx->pool, ctx->used - fp);
ctx->used = fp;
/* Unjam "too_many" in case "get" had failed */
ctx->too_many = 0;
}
CTXDBG_EXIT(ctx);
}
{
CTXDBG_ENTRY("BN_CTX_end", ctx);
if (ctx->err_stack)
ctx->err_stack--;
else {
unsigned int fp = BN_STACK_pop(&ctx->stack);
/* Does this stack frame have anything to release? */
if (fp < ctx->used)
BN_POOL_release(&ctx->pool, ctx->used - fp);
ctx->used = fp;
/* Unjam "too_many" in case "get" had failed */
ctx->too_many = 0;
}
CTXDBG_EXIT(ctx);
}
BIGNUM *BN_CTX_get(BN_CTX *ctx)
{
BIGNUM *ret;
CTXDBG_ENTRY("BN_CTX_get", ctx);
if(ctx->err_stack || ctx->too_many) return NULL;
if((ret = BN_POOL_get(&ctx->pool)) == NULL)
{
/* Setting too_many prevents repeated "get" attempts from
* cluttering the error stack. */
ctx->too_many = 1;
BNerr(BN_F_BN_CTX_GET,BN_R_TOO_MANY_TEMPORARY_VARIABLES);
return NULL;
}
/* OK, make sure the returned bignum is "zero" */
BN_zero(ret);
ctx->used++;
CTXDBG_RET(ctx, ret);
return ret;
}
{
BIGNUM *ret;
CTXDBG_ENTRY("BN_CTX_get", ctx);
if (ctx->err_stack || ctx->too_many)
return NULL;
if ((ret = BN_POOL_get(&ctx->pool)) == NULL) {
/*
* Setting too_many prevents repeated "get" attempts from cluttering
* the error stack.
*/
ctx->too_many = 1;
BNerr(BN_F_BN_CTX_GET, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
return NULL;
}
/* OK, make sure the returned bignum is "zero" */
BN_zero(ret);
ctx->used++;
CTXDBG_RET(ctx, ret);
return ret;
}
/************/
/* BN_STACK */
/************/
static void BN_STACK_init(BN_STACK *st)
{
st->indexes = NULL;
st->depth = st->size = 0;
}
{
st->indexes = NULL;
st->depth = st->size = 0;
}
static void BN_STACK_finish(BN_STACK *st)
{
if(st->size) OPENSSL_free(st->indexes);
}
{
if (st->size)
OPENSSL_free(st->indexes);
}
#ifndef OPENSSL_NO_DEPRECATED
static void BN_STACK_reset(BN_STACK *st)
{
st->depth = 0;
}
{
st->depth = 0;
}
#endif
static int BN_STACK_push(BN_STACK *st, unsigned int idx)
{
if(st->depth == st->size)
/* Need to expand */
{
unsigned int newsize = (st->size ?
(st->size * 3 / 2) : BN_CTX_START_FRAMES);
unsigned int *newitems = OPENSSL_malloc(newsize *
sizeof(unsigned int));
if(!newitems) return 0;
if(st->depth)
memcpy(newitems, st->indexes, st->depth *
sizeof(unsigned int));
if(st->size) OPENSSL_free(st->indexes);
st->indexes = newitems;
st->size = newsize;
}
st->indexes[(st->depth)++] = idx;
return 1;
}
{
if (st->depth == st->size)
/* Need to expand */
{
unsigned int newsize = (st->size ?
(st->size * 3 / 2) : BN_CTX_START_FRAMES);
unsigned int *newitems = OPENSSL_malloc(newsize *
sizeof(unsigned int));
if (!newitems)
return 0;
if (st->depth)
memcpy(newitems, st->indexes, st->depth * sizeof(unsigned int));
if (st->size)
OPENSSL_free(st->indexes);
st->indexes = newitems;
st->size = newsize;
}
st->indexes[(st->depth)++] = idx;
return 1;
}
static unsigned int BN_STACK_pop(BN_STACK *st)
{
return st->indexes[--(st->depth)];
}
{
return st->indexes[--(st->depth)];
}
/***********/
/* BN_POOL */
/***********/
static void BN_POOL_init(BN_POOL *p)
{
p->head = p->current = p->tail = NULL;
p->used = p->size = 0;
}
{
p->head = p->current = p->tail = NULL;
p->used = p->size = 0;
}
static void BN_POOL_finish(BN_POOL *p)
{
while(p->head)
{
unsigned int loop = 0;
BIGNUM *bn = p->head->vals;
while(loop++ < BN_CTX_POOL_SIZE)
{
if(bn->d) BN_clear_free(bn);
bn++;
}
p->current = p->head->next;
OPENSSL_free(p->head);
p->head = p->current;
}
}
{
while (p->head) {
unsigned int loop = 0;
BIGNUM *bn = p->head->vals;
while (loop++ < BN_CTX_POOL_SIZE) {
if (bn->d)
BN_clear_free(bn);
bn++;
}
p->current = p->head->next;
OPENSSL_free(p->head);
p->head = p->current;
}
}
#ifndef OPENSSL_NO_DEPRECATED
static void BN_POOL_reset(BN_POOL *p)
{
BN_POOL_ITEM *item = p->head;
while(item)
{
unsigned int loop = 0;
BIGNUM *bn = item->vals;
while(loop++ < BN_CTX_POOL_SIZE)
{
if(bn->d) BN_clear(bn);
bn++;
}
item = item->next;
}
p->current = p->head;
p->used = 0;
}
{
BN_POOL_ITEM *item = p->head;
while (item) {
unsigned int loop = 0;
BIGNUM *bn = item->vals;
while (loop++ < BN_CTX_POOL_SIZE) {
if (bn->d)
BN_clear(bn);
bn++;
}
item = item->next;
}
p->current = p->head;
p->used = 0;
}
#endif
static BIGNUM *BN_POOL_get(BN_POOL *p)
{
if(p->used == p->size)
{
BIGNUM *bn;
unsigned int loop = 0;
BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM));
if(!item) return NULL;
/* Initialise the structure */
bn = item->vals;
while(loop++ < BN_CTX_POOL_SIZE)
BN_init(bn++);
item->prev = p->tail;
item->next = NULL;
/* Link it in */
if(!p->head)
p->head = p->current = p->tail = item;
else
{
p->tail->next = item;
p->tail = item;
p->current = item;
}
p->size += BN_CTX_POOL_SIZE;
p->used++;
/* Return the first bignum from the new pool */
return item->vals;
}
if(!p->used)
p->current = p->head;
else if((p->used % BN_CTX_POOL_SIZE) == 0)
p->current = p->current->next;
return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
}
{
if (p->used == p->size) {
BIGNUM *bn;
unsigned int loop = 0;
BN_POOL_ITEM *item = OPENSSL_malloc(sizeof(BN_POOL_ITEM));
if (!item)
return NULL;
/* Initialise the structure */
bn = item->vals;
while (loop++ < BN_CTX_POOL_SIZE)
BN_init(bn++);
item->prev = p->tail;
item->next = NULL;
/* Link it in */
if (!p->head)
p->head = p->current = p->tail = item;
else {
p->tail->next = item;
p->tail = item;
p->current = item;
}
p->size += BN_CTX_POOL_SIZE;
p->used++;
/* Return the first bignum from the new pool */
return item->vals;
}
if (!p->used)
p->current = p->head;
else if ((p->used % BN_CTX_POOL_SIZE) == 0)
p->current = p->current->next;
return p->current->vals + ((p->used++) % BN_CTX_POOL_SIZE);
}
static void BN_POOL_release(BN_POOL *p, unsigned int num)
{
unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
p->used -= num;
while(num--)
{
bn_check_top(p->current->vals + offset);
if(!offset)
{
offset = BN_CTX_POOL_SIZE - 1;
p->current = p->current->prev;
}
else
offset--;
}
}
{
unsigned int offset = (p->used - 1) % BN_CTX_POOL_SIZE;
p->used -= num;
while (num--) {
bn_check_top(p->current->vals + offset);
if (!offset) {
offset = BN_CTX_POOL_SIZE - 1;
p->current = p->current->prev;
} else
offset--;
}
}

Binary file not shown.

View File

@@ -7,7 +7,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -53,8 +53,10 @@
*
*/
/* Support for deprecated functions goes here - static linkage will only slurp
* this code if applications are using them directly. */
/*
* Support for deprecated functions goes here - static linkage will only
* slurp this code if applications are using them directly.
*/
#include <stdio.h>
#include <time.h>
@@ -62,51 +64,52 @@
#include "bn_lcl.h"
#include <openssl/rand.h>
static void *dummy=&dummy;
static void *dummy = &dummy;
#ifndef OPENSSL_NO_DEPRECATED
BIGNUM *BN_generate_prime(BIGNUM *ret, int bits, int safe,
const BIGNUM *add, const BIGNUM *rem,
void (*callback)(int,int,void *), void *cb_arg)
{
BN_GENCB cb;
BIGNUM *rnd=NULL;
int found = 0;
const BIGNUM *add, const BIGNUM *rem,
void (*callback) (int, int, void *), void *cb_arg)
{
BN_GENCB cb;
BIGNUM *rnd = NULL;
int found = 0;
BN_GENCB_set_old(&cb, callback, cb_arg);
BN_GENCB_set_old(&cb, callback, cb_arg);
if (ret == NULL)
{
if ((rnd=BN_new()) == NULL) goto err;
}
else
rnd=ret;
if(!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb))
goto err;
if (ret == NULL) {
if ((rnd = BN_new()) == NULL)
goto err;
} else
rnd = ret;
if (!BN_generate_prime_ex(rnd, bits, safe, add, rem, &cb))
goto err;
/* we have a prime :-) */
found = 1;
err:
if (!found && (ret == NULL) && (rnd != NULL)) BN_free(rnd);
return(found ? rnd : NULL);
}
/* we have a prime :-) */
found = 1;
err:
if (!found && (ret == NULL) && (rnd != NULL))
BN_free(rnd);
return (found ? rnd : NULL);
}
int BN_is_prime(const BIGNUM *a, int checks, void (*callback)(int,int,void *),
BN_CTX *ctx_passed, void *cb_arg)
{
BN_GENCB cb;
BN_GENCB_set_old(&cb, callback, cb_arg);
return BN_is_prime_ex(a, checks, ctx_passed, &cb);
}
int BN_is_prime(const BIGNUM *a, int checks,
void (*callback) (int, int, void *), BN_CTX *ctx_passed,
void *cb_arg)
{
BN_GENCB cb;
BN_GENCB_set_old(&cb, callback, cb_arg);
return BN_is_prime_ex(a, checks, ctx_passed, &cb);
}
int BN_is_prime_fasttest(const BIGNUM *a, int checks,
void (*callback)(int,int,void *),
BN_CTX *ctx_passed, void *cb_arg,
int do_trial_division)
{
BN_GENCB cb;
BN_GENCB_set_old(&cb, callback, cb_arg);
return BN_is_prime_fasttest_ex(a, checks, ctx_passed,
do_trial_division, &cb);
}
void (*callback) (int, int, void *),
BN_CTX *ctx_passed, void *cb_arg,
int do_trial_division)
{
BN_GENCB cb;
BN_GENCB_set_old(&cb, callback, cb_arg);
return BN_is_prime_fasttest_ex(a, checks, ctx_passed,
do_trial_division, &cb);
}
#endif

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -61,77 +61,86 @@
#include "cryptlib.h"
#include "bn_lcl.h"
/* The old slow way */
#if 0
int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
BN_CTX *ctx)
{
int i,nm,nd;
int ret = 0;
BIGNUM *D;
BN_CTX *ctx)
{
int i, nm, nd;
int ret = 0;
BIGNUM *D;
bn_check_top(m);
bn_check_top(d);
if (BN_is_zero(d))
{
BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
return(0);
}
bn_check_top(m);
bn_check_top(d);
if (BN_is_zero(d)) {
BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
return (0);
}
if (BN_ucmp(m,d) < 0)
{
if (rem != NULL)
{ if (BN_copy(rem,m) == NULL) return(0); }
if (dv != NULL) BN_zero(dv);
return(1);
}
if (BN_ucmp(m, d) < 0) {
if (rem != NULL) {
if (BN_copy(rem, m) == NULL)
return (0);
}
if (dv != NULL)
BN_zero(dv);
return (1);
}
BN_CTX_start(ctx);
D = BN_CTX_get(ctx);
if (dv == NULL) dv = BN_CTX_get(ctx);
if (rem == NULL) rem = BN_CTX_get(ctx);
if (D == NULL || dv == NULL || rem == NULL)
goto end;
BN_CTX_start(ctx);
D = BN_CTX_get(ctx);
if (dv == NULL)
dv = BN_CTX_get(ctx);
if (rem == NULL)
rem = BN_CTX_get(ctx);
if (D == NULL || dv == NULL || rem == NULL)
goto end;
nd=BN_num_bits(d);
nm=BN_num_bits(m);
if (BN_copy(D,d) == NULL) goto end;
if (BN_copy(rem,m) == NULL) goto end;
nd = BN_num_bits(d);
nm = BN_num_bits(m);
if (BN_copy(D, d) == NULL)
goto end;
if (BN_copy(rem, m) == NULL)
goto end;
/* The next 2 are needed so we can do a dv->d[0]|=1 later
* since BN_lshift1 will only work once there is a value :-) */
BN_zero(dv);
if(bn_wexpand(dv,1) == NULL) goto end;
dv->top=1;
/*
* The next 2 are needed so we can do a dv->d[0]|=1 later since
* BN_lshift1 will only work once there is a value :-)
*/
BN_zero(dv);
if (bn_wexpand(dv, 1) == NULL)
goto end;
dv->top = 1;
if (!BN_lshift(D,D,nm-nd)) goto end;
for (i=nm-nd; i>=0; i--)
{
if (!BN_lshift1(dv,dv)) goto end;
if (BN_ucmp(rem,D) >= 0)
{
dv->d[0]|=1;
if (!BN_usub(rem,rem,D)) goto end;
}
if (!BN_lshift(D, D, nm - nd))
goto end;
for (i = nm - nd; i >= 0; i--) {
if (!BN_lshift1(dv, dv))
goto end;
if (BN_ucmp(rem, D) >= 0) {
dv->d[0] |= 1;
if (!BN_usub(rem, rem, D))
goto end;
}
/* CAN IMPROVE (and have now :=) */
if (!BN_rshift1(D,D)) goto end;
}
rem->neg=BN_is_zero(rem)?0:m->neg;
dv->neg=m->neg^d->neg;
ret = 1;
if (!BN_rshift1(D, D))
goto end;
}
rem->neg = BN_is_zero(rem) ? 0 : m->neg;
dv->neg = m->neg ^ d->neg;
ret = 1;
end:
BN_CTX_end(ctx);
return(ret);
}
BN_CTX_end(ctx);
return (ret);
}
#else
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) \
&& !defined(PEDANTIC) && !defined(BN_DIV3W)
# if defined(__GNUC__) && __GNUC__>=2
# if defined(__i386) || defined (__i386__)
/*
# if defined(__GNUC__) && __GNUC__>=2
# if defined(__i386) || defined (__i386__)
/*-
* There were two reasons for implementing this template:
* - GNU C generates a call to a function (__udivdi3 to be exact)
* in reply to ((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0 (I fail to
@@ -139,512 +148,330 @@ int BN_div(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, const BIGNUM *d,
* - divl doesn't only calculate quotient, but also leaves
* remainder in %edx which we can definitely use here:-)
*
* <appro@fy.chalmers.se>
* <appro@fy.chalmers.se>
*/
# define bn_div_words(n0,n1,d0) \
({ asm volatile ( \
"divl %4" \
: "=a"(q), "=d"(rem) \
: "a"(n1), "d"(n0), "g"(d0) \
: "cc"); \
q; \
})
# define REMAINDER_IS_ALREADY_CALCULATED
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
# undef bn_div_words
# define bn_div_words(n0,n1,d0) \
({ asm volatile ( \
"divl %4" \
: "=a"(q), "=d"(rem) \
: "a"(n1), "d"(n0), "g"(d0) \
: "cc"); \
q; \
})
# define REMAINDER_IS_ALREADY_CALCULATED
# elif defined(__x86_64) && defined(SIXTY_FOUR_BIT_LONG)
/*
* Same story here, but it's 128-bit by 64-bit division. Wow!
* <appro@fy.chalmers.se>
* <appro@fy.chalmers.se>
*/
# define bn_div_words(n0,n1,d0) \
({ asm volatile ( \
"divq %4" \
: "=a"(q), "=d"(rem) \
: "a"(n1), "d"(n0), "g"(d0) \
: "cc"); \
q; \
})
# define REMAINDER_IS_ALREADY_CALCULATED
# endif /* __<cpu> */
# endif /* __GNUC__ */
#endif /* OPENSSL_NO_ASM */
# undef bn_div_words
# define bn_div_words(n0,n1,d0) \
({ asm volatile ( \
"divq %4" \
: "=a"(q), "=d"(rem) \
: "a"(n1), "d"(n0), "g"(d0) \
: "cc"); \
q; \
})
# define REMAINDER_IS_ALREADY_CALCULATED
# endif /* __<cpu> */
# endif /* __GNUC__ */
# endif /* OPENSSL_NO_ASM */
/* BN_div[_no_branch] computes dv := num / divisor, rounding towards
/*-
* BN_div computes dv := num / divisor, rounding towards
* zero, and sets up rm such that dv*divisor + rm = num holds.
* Thus:
* dv->neg == num->neg ^ divisor->neg (unless the result is zero)
* rm->neg == num->neg (unless the remainder is zero)
* If 'dv' or 'rm' is NULL, the respective value is not returned.
*/
static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
const BIGNUM *divisor, BN_CTX *ctx);
int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor,
BN_CTX *ctx)
{
int norm_shift,i,loop;
BIGNUM *tmp,wnum,*snum,*sdiv,*res;
BN_ULONG *resp,*wnump;
BN_ULONG d0,d1;
int num_n,div_n;
BN_CTX *ctx)
{
int norm_shift, i, loop;
BIGNUM *tmp, wnum, *snum, *sdiv, *res;
BN_ULONG *resp, *wnump;
BN_ULONG d0, d1;
int num_n, div_n;
int no_branch = 0;
/* Invalid zero-padding would have particularly bad consequences
* in the case of 'num', so don't just rely on bn_check_top() for this one
* (bn_check_top() works only for BN_DEBUG builds) */
if (num->top > 0 && num->d[num->top - 1] == 0)
{
BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED);
return 0;
}
/*
* Invalid zero-padding would have particularly bad consequences so don't
* just rely on bn_check_top() here (bn_check_top() works only for
* BN_DEBUG builds)
*/
if ((num->top > 0 && num->d[num->top - 1] == 0) ||
(divisor->top > 0 && divisor->d[divisor->top - 1] == 0)) {
BNerr(BN_F_BN_DIV, BN_R_NOT_INITIALIZED);
return 0;
}
bn_check_top(num);
bn_check_top(num);
bn_check_top(divisor);
if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0))
{
return BN_div_no_branch(dv, rm, num, divisor, ctx);
}
if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0)
|| (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) {
no_branch = 1;
}
bn_check_top(dv);
bn_check_top(rm);
/* bn_check_top(num); */ /* 'num' has been checked already */
bn_check_top(divisor);
bn_check_top(dv);
bn_check_top(rm);
/*- bn_check_top(num); *//*
* 'num' has been checked already
*/
/*- bn_check_top(divisor); *//*
* 'divisor' has been checked already
*/
if (BN_is_zero(divisor))
{
BNerr(BN_F_BN_DIV,BN_R_DIV_BY_ZERO);
return(0);
}
if (BN_is_zero(divisor)) {
BNerr(BN_F_BN_DIV, BN_R_DIV_BY_ZERO);
return (0);
}
if (BN_ucmp(num,divisor) < 0)
{
if (rm != NULL)
{ if (BN_copy(rm,num) == NULL) return(0); }
if (dv != NULL) BN_zero(dv);
return(1);
}
if (!no_branch && BN_ucmp(num, divisor) < 0) {
if (rm != NULL) {
if (BN_copy(rm, num) == NULL)
return (0);
}
if (dv != NULL)
BN_zero(dv);
return (1);
}
BN_CTX_start(ctx);
tmp=BN_CTX_get(ctx);
snum=BN_CTX_get(ctx);
sdiv=BN_CTX_get(ctx);
if (dv == NULL)
res=BN_CTX_get(ctx);
else res=dv;
if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
goto err;
BN_CTX_start(ctx);
tmp = BN_CTX_get(ctx);
snum = BN_CTX_get(ctx);
sdiv = BN_CTX_get(ctx);
if (dv == NULL)
res = BN_CTX_get(ctx);
else
res = dv;
if (sdiv == NULL || res == NULL || tmp == NULL || snum == NULL)
goto err;
/* First we normalise the numbers */
norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
sdiv->neg=0;
norm_shift+=BN_BITS2;
if (!(BN_lshift(snum,num,norm_shift))) goto err;
snum->neg=0;
div_n=sdiv->top;
num_n=snum->top;
loop=num_n-div_n;
/* Lets setup a 'window' into snum
* This is the part that corresponds to the current
* 'area' being divided */
wnum.neg = 0;
wnum.d = &(snum->d[loop]);
wnum.top = div_n;
/* only needed when BN_ucmp messes up the values between top and max */
wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
/* First we normalise the numbers */
norm_shift = BN_BITS2 - ((BN_num_bits(divisor)) % BN_BITS2);
if (!(BN_lshift(sdiv, divisor, norm_shift)))
goto err;
sdiv->neg = 0;
norm_shift += BN_BITS2;
if (!(BN_lshift(snum, num, norm_shift)))
goto err;
snum->neg = 0;
/* Get the top 2 words of sdiv */
/* div_n=sdiv->top; */
d0=sdiv->d[div_n-1];
d1=(div_n == 1)?0:sdiv->d[div_n-2];
if (no_branch) {
/*
* Since we don't know whether snum is larger than sdiv, we pad snum
* with enough zeroes without changing its value.
*/
if (snum->top <= sdiv->top + 1) {
if (bn_wexpand(snum, sdiv->top + 2) == NULL)
goto err;
for (i = snum->top; i < sdiv->top + 2; i++)
snum->d[i] = 0;
snum->top = sdiv->top + 2;
} else {
if (bn_wexpand(snum, snum->top + 1) == NULL)
goto err;
snum->d[snum->top] = 0;
snum->top++;
}
}
/* pointer to the 'top' of snum */
wnump= &(snum->d[num_n-1]);
div_n = sdiv->top;
num_n = snum->top;
loop = num_n - div_n;
/*
* Lets setup a 'window' into snum This is the part that corresponds to
* the current 'area' being divided
*/
wnum.neg = 0;
wnum.d = &(snum->d[loop]);
wnum.top = div_n;
/*
* only needed when BN_ucmp messes up the values between top and max
*/
wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
/* Setup to 'res' */
res->neg= (num->neg^divisor->neg);
if (!bn_wexpand(res,(loop+1))) goto err;
res->top=loop;
resp= &(res->d[loop-1]);
/* Get the top 2 words of sdiv */
/* div_n=sdiv->top; */
d0 = sdiv->d[div_n - 1];
d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
/* space for temp */
if (!bn_wexpand(tmp,(div_n+1))) goto err;
/* pointer to the 'top' of snum */
wnump = &(snum->d[num_n - 1]);
if (BN_ucmp(&wnum,sdiv) >= 0)
{
/* If BN_DEBUG_RAND is defined BN_ucmp changes (via
* bn_pollute) the const bignum arguments =>
* clean the values between top and max again */
bn_clear_top2max(&wnum);
bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
*resp=1;
}
else
res->top--;
/* if res->top == 0 then clear the neg value otherwise decrease
* the resp pointer */
if (res->top == 0)
res->neg = 0;
else
resp--;
/* Setup to 'res' */
res->neg = (num->neg ^ divisor->neg);
if (!bn_wexpand(res, (loop + 1)))
goto err;
res->top = loop - no_branch;
resp = &(res->d[loop - 1]);
for (i=0; i<loop-1; i++, wnump--, resp--)
{
BN_ULONG q,l0;
/* the first part of the loop uses the top two words of
* snum and sdiv to calculate a BN_ULONG q such that
* | wnum - sdiv * q | < sdiv */
#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
q=bn_div_3_words(wnump,d1,d0);
#else
BN_ULONG n0,n1,rem=0;
/* space for temp */
if (!bn_wexpand(tmp, (div_n + 1)))
goto err;
n0=wnump[0];
n1=wnump[-1];
if (n0 == d0)
q=BN_MASK2;
else /* n0 < d0 */
{
#ifdef BN_LLONG
BN_ULLONG t2;
#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
#else
q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n",
n0, n1, d0, q);
#endif
#endif
#ifndef REMAINDER_IS_ALREADY_CALCULATED
/*
* rem doesn't have to be BN_ULLONG. The least we
* know it's less that d0, isn't it?
*/
rem=(n1-q*d0)&BN_MASK2;
#endif
t2=(BN_ULLONG)d1*q;
for (;;)
{
if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
break;
q--;
rem += d0;
if (rem < d0) break; /* don't let rem overflow */
t2 -= d1;
}
#else /* !BN_LLONG */
BN_ULONG t2l,t2h;
q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n",
n0, n1, d0, q);
#endif
#ifndef REMAINDER_IS_ALREADY_CALCULATED
rem=(n1-q*d0)&BN_MASK2;
#endif
#if defined(BN_UMULT_LOHI)
BN_UMULT_LOHI(t2l,t2h,d1,q);
#elif defined(BN_UMULT_HIGH)
t2l = d1 * q;
t2h = BN_UMULT_HIGH(d1,q);
#else
{
BN_ULONG ql, qh;
t2l=LBITS(d1); t2h=HBITS(d1);
ql =LBITS(q); qh =HBITS(q);
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
}
#endif
for (;;)
{
if ((t2h < rem) ||
((t2h == rem) && (t2l <= wnump[-2])))
break;
q--;
rem += d0;
if (rem < d0) break; /* don't let rem overflow */
if (t2l < d1) t2h--; t2l -= d1;
}
#endif /* !BN_LLONG */
}
#endif /* !BN_DIV3W */
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
tmp->d[div_n]=l0;
wnum.d--;
/* ingore top values of the bignums just sub the two
* BN_ULONG arrays with bn_sub_words */
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
{
/* Note: As we have considered only the leading
* two BN_ULONGs in the calculation of q, sdiv * q
* might be greater than wnum (but then (q-1) * sdiv
* is less or equal than wnum)
*/
q--;
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
/* we can't have an overflow here (assuming
* that q != 0, but if q == 0 then tmp is
* zero anyway) */
(*wnump)++;
}
/* store part of the result */
*resp = q;
}
bn_correct_top(snum);
if (rm != NULL)
{
/* Keep a copy of the neg flag in num because if rm==num
* BN_rshift() will overwrite it.
*/
int neg = num->neg;
BN_rshift(rm,snum,norm_shift);
if (!BN_is_zero(rm))
rm->neg = neg;
bn_check_top(rm);
}
BN_CTX_end(ctx);
return(1);
err:
bn_check_top(rm);
BN_CTX_end(ctx);
return(0);
}
/* BN_div_no_branch is a special version of BN_div. It does not contain
* branches that may leak sensitive information.
*/
static int BN_div_no_branch(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num,
const BIGNUM *divisor, BN_CTX *ctx)
{
int norm_shift,i,loop;
BIGNUM *tmp,wnum,*snum,*sdiv,*res;
BN_ULONG *resp,*wnump;
BN_ULONG d0,d1;
int num_n,div_n;
bn_check_top(dv);
bn_check_top(rm);
/* bn_check_top(num); */ /* 'num' has been checked in BN_div() */
bn_check_top(divisor);
if (BN_is_zero(divisor))
{
BNerr(BN_F_BN_DIV_NO_BRANCH,BN_R_DIV_BY_ZERO);
return(0);
}
BN_CTX_start(ctx);
tmp=BN_CTX_get(ctx);
snum=BN_CTX_get(ctx);
sdiv=BN_CTX_get(ctx);
if (dv == NULL)
res=BN_CTX_get(ctx);
else res=dv;
if (sdiv == NULL || res == NULL) goto err;
/* First we normalise the numbers */
norm_shift=BN_BITS2-((BN_num_bits(divisor))%BN_BITS2);
if (!(BN_lshift(sdiv,divisor,norm_shift))) goto err;
sdiv->neg=0;
norm_shift+=BN_BITS2;
if (!(BN_lshift(snum,num,norm_shift))) goto err;
snum->neg=0;
/* Since we don't know whether snum is larger than sdiv,
* we pad snum with enough zeroes without changing its
* value.
*/
if (snum->top <= sdiv->top+1)
{
if (bn_wexpand(snum, sdiv->top + 2) == NULL) goto err;
for (i = snum->top; i < sdiv->top + 2; i++) snum->d[i] = 0;
snum->top = sdiv->top + 2;
}
else
{
if (bn_wexpand(snum, snum->top + 1) == NULL) goto err;
snum->d[snum->top] = 0;
snum->top ++;
}
div_n=sdiv->top;
num_n=snum->top;
loop=num_n-div_n;
/* Lets setup a 'window' into snum
* This is the part that corresponds to the current
* 'area' being divided */
wnum.neg = 0;
wnum.d = &(snum->d[loop]);
wnum.top = div_n;
/* only needed when BN_ucmp messes up the values between top and max */
wnum.dmax = snum->dmax - loop; /* so we don't step out of bounds */
/* Get the top 2 words of sdiv */
/* div_n=sdiv->top; */
d0=sdiv->d[div_n-1];
d1=(div_n == 1)?0:sdiv->d[div_n-2];
/* pointer to the 'top' of snum */
wnump= &(snum->d[num_n-1]);
/* Setup to 'res' */
res->neg= (num->neg^divisor->neg);
if (!bn_wexpand(res,(loop+1))) goto err;
res->top=loop-1;
resp= &(res->d[loop-1]);
/* space for temp */
if (!bn_wexpand(tmp,(div_n+1))) goto err;
/* if res->top == 0 then clear the neg value otherwise decrease
* the resp pointer */
if (res->top == 0)
res->neg = 0;
else
resp--;
for (i=0; i<loop-1; i++, wnump--, resp--)
{
BN_ULONG q,l0;
/* the first part of the loop uses the top two words of
* snum and sdiv to calculate a BN_ULONG q such that
* | wnum - sdiv * q | < sdiv */
#if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
BN_ULONG bn_div_3_words(BN_ULONG*,BN_ULONG,BN_ULONG);
q=bn_div_3_words(wnump,d1,d0);
#else
BN_ULONG n0,n1,rem=0;
n0=wnump[0];
n1=wnump[-1];
if (n0 == d0)
q=BN_MASK2;
else /* n0 < d0 */
{
#ifdef BN_LLONG
BN_ULLONG t2;
#if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
q=(BN_ULONG)(((((BN_ULLONG)n0)<<BN_BITS2)|n1)/d0);
#else
q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n",
n0, n1, d0, q);
#endif
#endif
#ifndef REMAINDER_IS_ALREADY_CALCULATED
/*
* rem doesn't have to be BN_ULLONG. The least we
* know it's less that d0, isn't it?
*/
rem=(n1-q*d0)&BN_MASK2;
#endif
t2=(BN_ULLONG)d1*q;
for (;;)
{
if (t2 <= ((((BN_ULLONG)rem)<<BN_BITS2)|wnump[-2]))
break;
q--;
rem += d0;
if (rem < d0) break; /* don't let rem overflow */
t2 -= d1;
}
#else /* !BN_LLONG */
BN_ULONG t2l,t2h;
q=bn_div_words(n0,n1,d0);
#ifdef BN_DEBUG_LEVITTE
fprintf(stderr,"DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n",
n0, n1, d0, q);
#endif
#ifndef REMAINDER_IS_ALREADY_CALCULATED
rem=(n1-q*d0)&BN_MASK2;
#endif
#if defined(BN_UMULT_LOHI)
BN_UMULT_LOHI(t2l,t2h,d1,q);
#elif defined(BN_UMULT_HIGH)
t2l = d1 * q;
t2h = BN_UMULT_HIGH(d1,q);
#else
{
BN_ULONG ql, qh;
t2l=LBITS(d1); t2h=HBITS(d1);
ql =LBITS(q); qh =HBITS(q);
mul64(t2l,t2h,ql,qh); /* t2=(BN_ULLONG)d1*q; */
}
#endif
for (;;)
{
if ((t2h < rem) ||
((t2h == rem) && (t2l <= wnump[-2])))
break;
q--;
rem += d0;
if (rem < d0) break; /* don't let rem overflow */
if (t2l < d1) t2h--; t2l -= d1;
}
#endif /* !BN_LLONG */
}
#endif /* !BN_DIV3W */
l0=bn_mul_words(tmp->d,sdiv->d,div_n,q);
tmp->d[div_n]=l0;
wnum.d--;
/* ingore top values of the bignums just sub the two
* BN_ULONG arrays with bn_sub_words */
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n+1))
{
/* Note: As we have considered only the leading
* two BN_ULONGs in the calculation of q, sdiv * q
* might be greater than wnum (but then (q-1) * sdiv
* is less or equal than wnum)
*/
q--;
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
/* we can't have an overflow here (assuming
* that q != 0, but if q == 0 then tmp is
* zero anyway) */
(*wnump)++;
}
/* store part of the result */
*resp = q;
}
bn_correct_top(snum);
if (rm != NULL)
{
/* Keep a copy of the neg flag in num because if rm==num
* BN_rshift() will overwrite it.
*/
int neg = num->neg;
BN_rshift(rm,snum,norm_shift);
if (!BN_is_zero(rm))
rm->neg = neg;
bn_check_top(rm);
}
bn_correct_top(res);
BN_CTX_end(ctx);
return(1);
err:
bn_check_top(rm);
BN_CTX_end(ctx);
return(0);
}
if (!no_branch) {
if (BN_ucmp(&wnum, sdiv) >= 0) {
/*
* If BN_DEBUG_RAND is defined BN_ucmp changes (via bn_pollute)
* the const bignum arguments => clean the values between top and
* max again
*/
bn_clear_top2max(&wnum);
bn_sub_words(wnum.d, wnum.d, sdiv->d, div_n);
*resp = 1;
} else
res->top--;
}
/*
* if res->top == 0 then clear the neg value otherwise decrease the resp
* pointer
*/
if (res->top == 0)
res->neg = 0;
else
resp--;
for (i = 0; i < loop - 1; i++, wnump--, resp--) {
BN_ULONG q, l0;
/*
* the first part of the loop uses the top two words of snum and sdiv
* to calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
*/
# if defined(BN_DIV3W) && !defined(OPENSSL_NO_ASM)
BN_ULONG bn_div_3_words(BN_ULONG *, BN_ULONG, BN_ULONG);
q = bn_div_3_words(wnump, d1, d0);
# else
BN_ULONG n0, n1, rem = 0;
n0 = wnump[0];
n1 = wnump[-1];
if (n0 == d0)
q = BN_MASK2;
else { /* n0 < d0 */
# ifdef BN_LLONG
BN_ULLONG t2;
# if defined(BN_LLONG) && defined(BN_DIV2W) && !defined(bn_div_words)
q = (BN_ULONG)(((((BN_ULLONG) n0) << BN_BITS2) | n1) / d0);
# else
q = bn_div_words(n0, n1, d0);
# ifdef BN_DEBUG_LEVITTE
fprintf(stderr, "DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n", n0, n1, d0, q);
# endif
# endif
# ifndef REMAINDER_IS_ALREADY_CALCULATED
/*
* rem doesn't have to be BN_ULLONG. The least we
* know it's less that d0, isn't it?
*/
rem = (n1 - q * d0) & BN_MASK2;
# endif
t2 = (BN_ULLONG) d1 *q;
for (;;) {
if (t2 <= ((((BN_ULLONG) rem) << BN_BITS2) | wnump[-2]))
break;
q--;
rem += d0;
if (rem < d0)
break; /* don't let rem overflow */
t2 -= d1;
}
# else /* !BN_LLONG */
BN_ULONG t2l, t2h;
q = bn_div_words(n0, n1, d0);
# ifdef BN_DEBUG_LEVITTE
fprintf(stderr, "DEBUG: bn_div_words(0x%08X,0x%08X,0x%08\
X) -> 0x%08X\n", n0, n1, d0, q);
# endif
# ifndef REMAINDER_IS_ALREADY_CALCULATED
rem = (n1 - q * d0) & BN_MASK2;
# endif
# if defined(BN_UMULT_LOHI)
BN_UMULT_LOHI(t2l, t2h, d1, q);
# elif defined(BN_UMULT_HIGH)
t2l = d1 * q;
t2h = BN_UMULT_HIGH(d1, q);
# else
{
BN_ULONG ql, qh;
t2l = LBITS(d1);
t2h = HBITS(d1);
ql = LBITS(q);
qh = HBITS(q);
mul64(t2l, t2h, ql, qh); /* t2=(BN_ULLONG)d1*q; */
}
# endif
for (;;) {
if ((t2h < rem) || ((t2h == rem) && (t2l <= wnump[-2])))
break;
q--;
rem += d0;
if (rem < d0)
break; /* don't let rem overflow */
if (t2l < d1)
t2h--;
t2l -= d1;
}
# endif /* !BN_LLONG */
}
# endif /* !BN_DIV3W */
l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
tmp->d[div_n] = l0;
wnum.d--;
/*
* ingore top values of the bignums just sub the two BN_ULONG arrays
* with bn_sub_words
*/
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1)) {
/*
* Note: As we have considered only the leading two BN_ULONGs in
* the calculation of q, sdiv * q might be greater than wnum (but
* then (q-1) * sdiv is less or equal than wnum)
*/
q--;
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n))
/*
* we can't have an overflow here (assuming that q != 0, but
* if q == 0 then tmp is zero anyway)
*/
(*wnump)++;
}
/* store part of the result */
*resp = q;
}
bn_correct_top(snum);
if (rm != NULL) {
/*
* Keep a copy of the neg flag in num because if rm==num BN_rshift()
* will overwrite it.
*/
int neg = num->neg;
BN_rshift(rm, snum, norm_shift);
if (!BN_is_zero(rm))
rm->neg = neg;
bn_check_top(rm);
}
if (no_branch)
bn_correct_top(res);
BN_CTX_end(ctx);
return (1);
err:
bn_check_top(rm);
BN_CTX_end(ctx);
return (0);
}
#endif

Binary file not shown.

View File

@@ -1,13 +1,13 @@
/* crypto/bn/bn_err.c */
/* ====================================================================
* Copyright (c) 1999-2007 The OpenSSL Project. All rights reserved.
* Copyright (c) 1999-2015 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -53,7 +53,8 @@
*
*/
/* NOTE: this file was auto generated by the mkerr.pl script: any changes
/*
* NOTE: this file was auto generated by the mkerr.pl script: any changes
* made to it will be overwritten when the script next updates this file,
* only reason strings will be preserved.
*/
@@ -65,86 +66,89 @@
/* BEGIN ERROR CODES */
#ifndef OPENSSL_NO_ERR
#define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
#define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
# define ERR_FUNC(func) ERR_PACK(ERR_LIB_BN,func,0)
# define ERR_REASON(reason) ERR_PACK(ERR_LIB_BN,0,reason)
static ERR_STRING_DATA BN_str_functs[]=
{
{ERR_FUNC(BN_F_BNRAND), "BNRAND"},
{ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"},
{ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"},
{ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"},
{ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
{ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
{ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
{ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
{ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
{ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
{ERR_FUNC(BN_F_BN_DIV), "BN_div"},
{ERR_FUNC(BN_F_BN_DIV_NO_BRANCH), "BN_div_no_branch"},
{ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
{ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"},
{ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
{ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
{ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"},
{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
{ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
{ERR_FUNC(BN_F_BN_NEW), "BN_new"},
{ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
{ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
{ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
{0,NULL}
};
static ERR_STRING_DATA BN_str_functs[] = {
{ERR_FUNC(BN_F_BNRAND), "BNRAND"},
{ERR_FUNC(BN_F_BN_BLINDING_CONVERT_EX), "BN_BLINDING_convert_ex"},
{ERR_FUNC(BN_F_BN_BLINDING_CREATE_PARAM), "BN_BLINDING_create_param"},
{ERR_FUNC(BN_F_BN_BLINDING_INVERT_EX), "BN_BLINDING_invert_ex"},
{ERR_FUNC(BN_F_BN_BLINDING_NEW), "BN_BLINDING_new"},
{ERR_FUNC(BN_F_BN_BLINDING_UPDATE), "BN_BLINDING_update"},
{ERR_FUNC(BN_F_BN_BN2DEC), "BN_bn2dec"},
{ERR_FUNC(BN_F_BN_BN2HEX), "BN_bn2hex"},
{ERR_FUNC(BN_F_BN_CTX_GET), "BN_CTX_get"},
{ERR_FUNC(BN_F_BN_CTX_NEW), "BN_CTX_new"},
{ERR_FUNC(BN_F_BN_CTX_START), "BN_CTX_start"},
{ERR_FUNC(BN_F_BN_DIV), "BN_div"},
{ERR_FUNC(BN_F_BN_DIV_NO_BRANCH), "BN_div_no_branch"},
{ERR_FUNC(BN_F_BN_DIV_RECP), "BN_div_recp"},
{ERR_FUNC(BN_F_BN_EXP), "BN_exp"},
{ERR_FUNC(BN_F_BN_EXPAND2), "bn_expand2"},
{ERR_FUNC(BN_F_BN_EXPAND_INTERNAL), "BN_EXPAND_INTERNAL"},
{ERR_FUNC(BN_F_BN_GF2M_MOD), "BN_GF2m_mod"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_EXP), "BN_GF2m_mod_exp"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_MUL), "BN_GF2m_mod_mul"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD), "BN_GF2m_mod_solve_quad"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SOLVE_QUAD_ARR), "BN_GF2m_mod_solve_quad_arr"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SQR), "BN_GF2m_mod_sqr"},
{ERR_FUNC(BN_F_BN_GF2M_MOD_SQRT), "BN_GF2m_mod_sqrt"},
{ERR_FUNC(BN_F_BN_LSHIFT), "BN_lshift"},
{ERR_FUNC(BN_F_BN_MOD_EXP2_MONT), "BN_mod_exp2_mont"},
{ERR_FUNC(BN_F_BN_MOD_EXP_MONT), "BN_mod_exp_mont"},
{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_CONSTTIME), "BN_mod_exp_mont_consttime"},
{ERR_FUNC(BN_F_BN_MOD_EXP_MONT_WORD), "BN_mod_exp_mont_word"},
{ERR_FUNC(BN_F_BN_MOD_EXP_RECP), "BN_mod_exp_recp"},
{ERR_FUNC(BN_F_BN_MOD_EXP_SIMPLE), "BN_mod_exp_simple"},
{ERR_FUNC(BN_F_BN_MOD_INVERSE), "BN_mod_inverse"},
{ERR_FUNC(BN_F_BN_MOD_INVERSE_NO_BRANCH), "BN_mod_inverse_no_branch"},
{ERR_FUNC(BN_F_BN_MOD_LSHIFT_QUICK), "BN_mod_lshift_quick"},
{ERR_FUNC(BN_F_BN_MOD_MUL_RECIPROCAL), "BN_mod_mul_reciprocal"},
{ERR_FUNC(BN_F_BN_MOD_SQRT), "BN_mod_sqrt"},
{ERR_FUNC(BN_F_BN_MPI2BN), "BN_mpi2bn"},
{ERR_FUNC(BN_F_BN_NEW), "BN_new"},
{ERR_FUNC(BN_F_BN_RAND), "BN_rand"},
{ERR_FUNC(BN_F_BN_RAND_RANGE), "BN_rand_range"},
{ERR_FUNC(BN_F_BN_RSHIFT), "BN_rshift"},
{ERR_FUNC(BN_F_BN_USUB), "BN_usub"},
{0, NULL}
};
static ERR_STRING_DATA BN_str_reasons[]=
{
{ERR_REASON(BN_R_ARG2_LT_ARG3) ,"arg2 lt arg3"},
{ERR_REASON(BN_R_BAD_RECIPROCAL) ,"bad reciprocal"},
{ERR_REASON(BN_R_BIGNUM_TOO_LONG) ,"bignum too long"},
{ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS),"called with even modulus"},
{ERR_REASON(BN_R_DIV_BY_ZERO) ,"div by zero"},
{ERR_REASON(BN_R_ENCODING_ERROR) ,"encoding error"},
{ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),"expand on static bignum data"},
{ERR_REASON(BN_R_INPUT_NOT_REDUCED) ,"input not reduced"},
{ERR_REASON(BN_R_INVALID_LENGTH) ,"invalid length"},
{ERR_REASON(BN_R_INVALID_RANGE) ,"invalid range"},
{ERR_REASON(BN_R_NOT_A_SQUARE) ,"not a square"},
{ERR_REASON(BN_R_NOT_INITIALIZED) ,"not initialized"},
{ERR_REASON(BN_R_NO_INVERSE) ,"no inverse"},
{ERR_REASON(BN_R_NO_SOLUTION) ,"no solution"},
{ERR_REASON(BN_R_P_IS_NOT_PRIME) ,"p is not prime"},
{ERR_REASON(BN_R_TOO_MANY_ITERATIONS) ,"too many iterations"},
{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),"too many temporary variables"},
{0,NULL}
};
static ERR_STRING_DATA BN_str_reasons[] = {
{ERR_REASON(BN_R_ARG2_LT_ARG3), "arg2 lt arg3"},
{ERR_REASON(BN_R_BAD_RECIPROCAL), "bad reciprocal"},
{ERR_REASON(BN_R_BIGNUM_TOO_LONG), "bignum too long"},
{ERR_REASON(BN_R_BITS_TOO_SMALL), "bits too small"},
{ERR_REASON(BN_R_CALLED_WITH_EVEN_MODULUS), "called with even modulus"},
{ERR_REASON(BN_R_DIV_BY_ZERO), "div by zero"},
{ERR_REASON(BN_R_ENCODING_ERROR), "encoding error"},
{ERR_REASON(BN_R_EXPAND_ON_STATIC_BIGNUM_DATA),
"expand on static bignum data"},
{ERR_REASON(BN_R_INPUT_NOT_REDUCED), "input not reduced"},
{ERR_REASON(BN_R_INVALID_LENGTH), "invalid length"},
{ERR_REASON(BN_R_INVALID_RANGE), "invalid range"},
{ERR_REASON(BN_R_INVALID_SHIFT), "invalid shift"},
{ERR_REASON(BN_R_NOT_A_SQUARE), "not a square"},
{ERR_REASON(BN_R_NOT_INITIALIZED), "not initialized"},
{ERR_REASON(BN_R_NO_INVERSE), "no inverse"},
{ERR_REASON(BN_R_NO_SOLUTION), "no solution"},
{ERR_REASON(BN_R_P_IS_NOT_PRIME), "p is not prime"},
{ERR_REASON(BN_R_TOO_MANY_ITERATIONS), "too many iterations"},
{ERR_REASON(BN_R_TOO_MANY_TEMPORARY_VARIABLES),
"too many temporary variables"},
{0, NULL}
};
#endif
void ERR_load_BN_strings(void)
{
{
#ifndef OPENSSL_NO_ERR
if (ERR_func_error_string(BN_str_functs[0].error) == NULL)
{
ERR_load_strings(0,BN_str_functs);
ERR_load_strings(0,BN_str_reasons);
}
if (ERR_func_error_string(BN_str_functs[0].error) == NULL) {
ERR_load_strings(0, BN_str_functs);
ERR_load_strings(0, BN_str_reasons);
}
#endif
}
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -63,7 +63,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -113,200 +113,191 @@
#include "cryptlib.h"
#include "bn_lcl.h"
#define TABLE_SIZE 32
#define TABLE_SIZE 32
int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
BN_CTX *ctx, BN_MONT_CTX *in_mont)
{
int i,j,bits,b,bits1,bits2,ret=0,wpos1,wpos2,window1,window2,wvalue1,wvalue2;
int r_is_one=1;
BIGNUM *d,*r;
const BIGNUM *a_mod_m;
/* Tables of variables obtained from 'ctx' */
BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
BN_MONT_CTX *mont=NULL;
const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
BN_CTX *ctx, BN_MONT_CTX *in_mont)
{
int i, j, bits, b, bits1, bits2, ret =
0, wpos1, wpos2, window1, window2, wvalue1, wvalue2;
int r_is_one = 1;
BIGNUM *d, *r;
const BIGNUM *a_mod_m;
/* Tables of variables obtained from 'ctx' */
BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE];
BN_MONT_CTX *mont = NULL;
bn_check_top(a1);
bn_check_top(p1);
bn_check_top(a2);
bn_check_top(p2);
bn_check_top(m);
bn_check_top(a1);
bn_check_top(p1);
bn_check_top(a2);
bn_check_top(p2);
bn_check_top(m);
if (!(m->d[0] & 1))
{
BNerr(BN_F_BN_MOD_EXP2_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
return(0);
}
bits1=BN_num_bits(p1);
bits2=BN_num_bits(p2);
if ((bits1 == 0) && (bits2 == 0))
{
ret = BN_one(rr);
return ret;
}
bits=(bits1 > bits2)?bits1:bits2;
if (!(m->d[0] & 1)) {
BNerr(BN_F_BN_MOD_EXP2_MONT, BN_R_CALLED_WITH_EVEN_MODULUS);
return (0);
}
bits1 = BN_num_bits(p1);
bits2 = BN_num_bits(p2);
if ((bits1 == 0) && (bits2 == 0)) {
ret = BN_one(rr);
return ret;
}
BN_CTX_start(ctx);
d = BN_CTX_get(ctx);
r = BN_CTX_get(ctx);
val1[0] = BN_CTX_get(ctx);
val2[0] = BN_CTX_get(ctx);
if(!d || !r || !val1[0] || !val2[0]) goto err;
bits = (bits1 > bits2) ? bits1 : bits2;
if (in_mont != NULL)
mont=in_mont;
else
{
if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
}
BN_CTX_start(ctx);
d = BN_CTX_get(ctx);
r = BN_CTX_get(ctx);
val1[0] = BN_CTX_get(ctx);
val2[0] = BN_CTX_get(ctx);
if (!d || !r || !val1[0] || !val2[0])
goto err;
window1 = BN_window_bits_for_exponent_size(bits1);
window2 = BN_window_bits_for_exponent_size(bits2);
if (in_mont != NULL)
mont = in_mont;
else {
if ((mont = BN_MONT_CTX_new()) == NULL)
goto err;
if (!BN_MONT_CTX_set(mont, m, ctx))
goto err;
}
/*
* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
*/
if (a1->neg || BN_ucmp(a1,m) >= 0)
{
if (!BN_mod(val1[0],a1,m,ctx))
goto err;
a_mod_m = val1[0];
}
else
a_mod_m = a1;
if (BN_is_zero(a_mod_m))
{
BN_zero(rr);
ret = 1;
goto err;
}
window1 = BN_window_bits_for_exponent_size(bits1);
window2 = BN_window_bits_for_exponent_size(bits2);
if (!BN_to_montgomery(val1[0],a_mod_m,mont,ctx)) goto err;
if (window1 > 1)
{
if (!BN_mod_mul_montgomery(d,val1[0],val1[0],mont,ctx)) goto err;
/*
* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 2^(window1-1)
*/
if (a1->neg || BN_ucmp(a1, m) >= 0) {
if (!BN_mod(val1[0], a1, m, ctx))
goto err;
a_mod_m = val1[0];
} else
a_mod_m = a1;
if (BN_is_zero(a_mod_m)) {
BN_zero(rr);
ret = 1;
goto err;
}
j=1<<(window1-1);
for (i=1; i<j; i++)
{
if(((val1[i] = BN_CTX_get(ctx)) == NULL) ||
!BN_mod_mul_montgomery(val1[i],val1[i-1],
d,mont,ctx))
goto err;
}
}
if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx))
goto err;
if (window1 > 1) {
if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx))
goto err;
j = 1 << (window1 - 1);
for (i = 1; i < j; i++) {
if (((val1[i] = BN_CTX_get(ctx)) == NULL) ||
!BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx))
goto err;
}
}
/*
* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
*/
if (a2->neg || BN_ucmp(a2,m) >= 0)
{
if (!BN_mod(val2[0],a2,m,ctx))
goto err;
a_mod_m = val2[0];
}
else
a_mod_m = a2;
if (BN_is_zero(a_mod_m))
{
BN_zero(rr);
ret = 1;
goto err;
}
if (!BN_to_montgomery(val2[0],a_mod_m,mont,ctx)) goto err;
if (window2 > 1)
{
if (!BN_mod_mul_montgomery(d,val2[0],val2[0],mont,ctx)) goto err;
/*
* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 2^(window2-1)
*/
if (a2->neg || BN_ucmp(a2, m) >= 0) {
if (!BN_mod(val2[0], a2, m, ctx))
goto err;
a_mod_m = val2[0];
} else
a_mod_m = a2;
if (BN_is_zero(a_mod_m)) {
BN_zero(rr);
ret = 1;
goto err;
}
if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx))
goto err;
if (window2 > 1) {
if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx))
goto err;
j=1<<(window2-1);
for (i=1; i<j; i++)
{
if(((val2[i] = BN_CTX_get(ctx)) == NULL) ||
!BN_mod_mul_montgomery(val2[i],val2[i-1],
d,mont,ctx))
goto err;
}
}
j = 1 << (window2 - 1);
for (i = 1; i < j; i++) {
if (((val2[i] = BN_CTX_get(ctx)) == NULL) ||
!BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx))
goto err;
}
}
/* Now compute the power product, using independent windows. */
r_is_one = 1;
wvalue1 = 0; /* The 'value' of the first window */
wvalue2 = 0; /* The 'value' of the second window */
wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the
* first window */
wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the
* second window */
/* Now compute the power product, using independent windows. */
r_is_one=1;
wvalue1=0; /* The 'value' of the first window */
wvalue2=0; /* The 'value' of the second window */
wpos1=0; /* If wvalue1 > 0, the bottom bit of the first window */
wpos2=0; /* If wvalue2 > 0, the bottom bit of the second window */
if (!BN_to_montgomery(r, BN_value_one(), mont, ctx))
goto err;
for (b = bits - 1; b >= 0; b--) {
if (!r_is_one) {
if (!BN_mod_mul_montgomery(r, r, r, mont, ctx))
goto err;
}
if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
for (b=bits-1; b>=0; b--)
{
if (!r_is_one)
{
if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
goto err;
}
if (!wvalue1)
if (BN_is_bit_set(p1, b))
{
/* consider bits b-window1+1 .. b for this window */
i = b-window1+1;
while (!BN_is_bit_set(p1, i)) /* works for i<0 */
i++;
wpos1 = i;
wvalue1 = 1;
for (i = b-1; i >= wpos1; i--)
{
wvalue1 <<= 1;
if (BN_is_bit_set(p1, i))
wvalue1++;
}
}
if (!wvalue2)
if (BN_is_bit_set(p2, b))
{
/* consider bits b-window2+1 .. b for this window */
i = b-window2+1;
while (!BN_is_bit_set(p2, i))
i++;
wpos2 = i;
wvalue2 = 1;
for (i = b-1; i >= wpos2; i--)
{
wvalue2 <<= 1;
if (BN_is_bit_set(p2, i))
wvalue2++;
}
}
if (!wvalue1)
if (BN_is_bit_set(p1, b)) {
/*
* consider bits b-window1+1 .. b for this window
*/
i = b - window1 + 1;
while (!BN_is_bit_set(p1, i)) /* works for i<0 */
i++;
wpos1 = i;
wvalue1 = 1;
for (i = b - 1; i >= wpos1; i--) {
wvalue1 <<= 1;
if (BN_is_bit_set(p1, i))
wvalue1++;
}
}
if (wvalue1 && b == wpos1)
{
/* wvalue1 is odd and < 2^window1 */
if (!BN_mod_mul_montgomery(r,r,val1[wvalue1>>1],mont,ctx))
goto err;
wvalue1 = 0;
r_is_one = 0;
}
if (wvalue2 && b == wpos2)
{
/* wvalue2 is odd and < 2^window2 */
if (!BN_mod_mul_montgomery(r,r,val2[wvalue2>>1],mont,ctx))
goto err;
wvalue2 = 0;
r_is_one = 0;
}
}
if (!BN_from_montgomery(rr,r,mont,ctx))
goto err;
ret=1;
err:
if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
BN_CTX_end(ctx);
bn_check_top(rr);
return(ret);
}
if (!wvalue2)
if (BN_is_bit_set(p2, b)) {
/*
* consider bits b-window2+1 .. b for this window
*/
i = b - window2 + 1;
while (!BN_is_bit_set(p2, i))
i++;
wpos2 = i;
wvalue2 = 1;
for (i = b - 1; i >= wpos2; i--) {
wvalue2 <<= 1;
if (BN_is_bit_set(p2, i))
wvalue2++;
}
}
if (wvalue1 && b == wpos1) {
/* wvalue1 is odd and < 2^window1 */
if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx))
goto err;
wvalue1 = 0;
r_is_one = 0;
}
if (wvalue2 && b == wpos2) {
/* wvalue2 is odd and < 2^window2 */
if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx))
goto err;
wvalue2 = 0;
r_is_one = 0;
}
}
if (!BN_from_montgomery(rr, r, mont, ctx))
goto err;
ret = 1;
err:
if ((in_mont == NULL) && (mont != NULL))
BN_MONT_CTX_free(mont);
BN_CTX_end(ctx);
bn_check_top(rr);
return (ret);
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -7,7 +7,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -61,124 +61,126 @@
/* Returns -2 for errors because both -1 and 0 are valid results. */
int BN_kronecker(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
{
int i;
int ret = -2; /* avoid 'uninitialized' warning */
int err = 0;
BIGNUM *A, *B, *tmp;
/* In 'tab', only odd-indexed entries are relevant:
* For any odd BIGNUM n,
* tab[BN_lsw(n) & 7]
* is $(-1)^{(n^2-1)/8}$ (using TeX notation).
* Note that the sign of n does not matter.
*/
static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
{
int i;
int ret = -2; /* avoid 'uninitialized' warning */
int err = 0;
BIGNUM *A, *B, *tmp;
/*-
* In 'tab', only odd-indexed entries are relevant:
* For any odd BIGNUM n,
* tab[BN_lsw(n) & 7]
* is $(-1)^{(n^2-1)/8}$ (using TeX notation).
* Note that the sign of n does not matter.
*/
static const int tab[8] = { 0, 1, 0, -1, 0, -1, 0, 1 };
bn_check_top(a);
bn_check_top(b);
bn_check_top(a);
bn_check_top(b);
BN_CTX_start(ctx);
A = BN_CTX_get(ctx);
B = BN_CTX_get(ctx);
if (B == NULL) goto end;
err = !BN_copy(A, a);
if (err) goto end;
err = !BN_copy(B, b);
if (err) goto end;
BN_CTX_start(ctx);
A = BN_CTX_get(ctx);
B = BN_CTX_get(ctx);
if (B == NULL)
goto end;
/*
* Kronecker symbol, imlemented according to Henri Cohen,
* "A Course in Computational Algebraic Number Theory"
* (algorithm 1.4.10).
*/
err = !BN_copy(A, a);
if (err)
goto end;
err = !BN_copy(B, b);
if (err)
goto end;
/* Cohen's step 1: */
/*
* Kronecker symbol, imlemented according to Henri Cohen,
* "A Course in Computational Algebraic Number Theory"
* (algorithm 1.4.10).
*/
if (BN_is_zero(B))
{
ret = BN_abs_is_word(A, 1);
goto end;
}
/* Cohen's step 2: */
/* Cohen's step 1: */
if (!BN_is_odd(A) && !BN_is_odd(B))
{
ret = 0;
goto end;
}
if (BN_is_zero(B)) {
ret = BN_abs_is_word(A, 1);
goto end;
}
/* now B is non-zero */
i = 0;
while (!BN_is_bit_set(B, i))
i++;
err = !BN_rshift(B, B, i);
if (err) goto end;
if (i & 1)
{
/* i is odd */
/* (thus B was even, thus A must be odd!) */
/* Cohen's step 2: */
/* set 'ret' to $(-1)^{(A^2-1)/8}$ */
ret = tab[BN_lsw(A) & 7];
}
else
{
/* i is even */
ret = 1;
}
if (B->neg)
{
B->neg = 0;
if (A->neg)
ret = -ret;
}
if (!BN_is_odd(A) && !BN_is_odd(B)) {
ret = 0;
goto end;
}
/* now B is positive and odd, so what remains to be done is
* to compute the Jacobi symbol (A/B) and multiply it by 'ret' */
/* now B is non-zero */
i = 0;
while (!BN_is_bit_set(B, i))
i++;
err = !BN_rshift(B, B, i);
if (err)
goto end;
if (i & 1) {
/* i is odd */
/* (thus B was even, thus A must be odd!) */
while (1)
{
/* Cohen's step 3: */
/* set 'ret' to $(-1)^{(A^2-1)/8}$ */
ret = tab[BN_lsw(A) & 7];
} else {
/* i is even */
ret = 1;
}
/* B is positive and odd */
if (B->neg) {
B->neg = 0;
if (A->neg)
ret = -ret;
}
if (BN_is_zero(A))
{
ret = BN_is_one(B) ? ret : 0;
goto end;
}
/*
* now B is positive and odd, so what remains to be done is to compute
* the Jacobi symbol (A/B) and multiply it by 'ret'
*/
/* now A is non-zero */
i = 0;
while (!BN_is_bit_set(A, i))
i++;
err = !BN_rshift(A, A, i);
if (err) goto end;
if (i & 1)
{
/* i is odd */
/* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
ret = ret * tab[BN_lsw(B) & 7];
}
/* Cohen's step 4: */
/* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
ret = -ret;
/* (A, B) := (B mod |A|, |A|) */
err = !BN_nnmod(B, B, A, ctx);
if (err) goto end;
tmp = A; A = B; B = tmp;
tmp->neg = 0;
}
end:
BN_CTX_end(ctx);
if (err)
return -2;
else
return ret;
}
while (1) {
/* Cohen's step 3: */
/* B is positive and odd */
if (BN_is_zero(A)) {
ret = BN_is_one(B) ? ret : 0;
goto end;
}
/* now A is non-zero */
i = 0;
while (!BN_is_bit_set(A, i))
i++;
err = !BN_rshift(A, A, i);
if (err)
goto end;
if (i & 1) {
/* i is odd */
/* multiply 'ret' by $(-1)^{(B^2-1)/8}$ */
ret = ret * tab[BN_lsw(B) & 7];
}
/* Cohen's step 4: */
/* multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$ */
if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2)
ret = -ret;
/* (A, B) := (B mod |A|, |A|) */
err = !BN_nnmod(B, B, A, ctx);
if (err)
goto end;
tmp = A;
A = B;
B = tmp;
tmp->neg = 0;
}
end:
BN_CTX_end(ctx);
if (err)
return -2;
else
return ret;
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -63,7 +63,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -110,16 +110,15 @@
*/
#ifndef HEADER_BN_LCL_H
#define HEADER_BN_LCL_H
# define HEADER_BN_LCL_H
#include <openssl/bn.h>
# include <openssl/bn.h>
#ifdef __cplusplus
extern "C" {
#endif
/*
/*-
* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp functions
*
*
@@ -144,73 +143,86 @@ extern "C" {
* (with draws in between). Very small exponents are often selected
* with low Hamming weight, so we use w = 1 for b <= 23.
*/
#if 1
#define BN_window_bits_for_exponent_size(b) \
((b) > 671 ? 6 : \
(b) > 239 ? 5 : \
(b) > 79 ? 4 : \
(b) > 23 ? 3 : 1)
#else
/* Old SSLeay/OpenSSL table.
* Maximum window size was 5, so this table differs for b==1024;
* but it coincides for other interesting values (b==160, b==512).
# if 1
# define BN_window_bits_for_exponent_size(b) \
((b) > 671 ? 6 : \
(b) > 239 ? 5 : \
(b) > 79 ? 4 : \
(b) > 23 ? 3 : 1)
# else
/*
* Old SSLeay/OpenSSL table. Maximum window size was 5, so this table differs
* for b==1024; but it coincides for other interesting values (b==160,
* b==512).
*/
#define BN_window_bits_for_exponent_size(b) \
((b) > 255 ? 5 : \
(b) > 127 ? 4 : \
(b) > 17 ? 3 : 1)
#endif
# define BN_window_bits_for_exponent_size(b) \
((b) > 255 ? 5 : \
(b) > 127 ? 4 : \
(b) > 17 ? 3 : 1)
# endif
/* BN_mod_exp_mont_conttime is based on the assumption that the
* L1 data cache line width of the target processor is at least
* the following value.
/*
* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
* line width of the target processor is at least the following value.
*/
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
#define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
# define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH ( 64 )
# define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
/* Window sizes optimized for fixed window size modular exponentiation
* algorithm (BN_mod_exp_mont_consttime).
*
* To achieve the security goals of BN_mode_exp_mont_consttime, the
* maximum size of the window must not exceed
* log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
*
* Window size thresholds are defined for cache line sizes of 32 and 64,
* cache line sizes where log_2(32)=5 and log_2(64)=6 respectively. A
* window size of 7 should only be used on processors that have a 128
* byte or greater cache line size.
/*
* Window sizes optimized for fixed window size modular exponentiation
* algorithm (BN_mod_exp_mont_consttime). To achieve the security goals of
* BN_mode_exp_mont_consttime, the maximum size of the window must not exceed
* log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). Window size thresholds are
* defined for cache line sizes of 32 and 64, cache line sizes where
* log_2(32)=5 and log_2(64)=6 respectively. A window size of 7 should only be
* used on processors that have a 128 byte or greater cache line size.
*/
#if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
# if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
# define BN_window_bits_for_ctime_exponent_size(b) \
((b) > 937 ? 6 : \
(b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
((b) > 937 ? 6 : \
(b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
#elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
# elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
# define BN_window_bits_for_ctime_exponent_size(b) \
((b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
#endif
((b) > 306 ? 5 : \
(b) > 89 ? 4 : \
(b) > 22 ? 3 : 1)
# define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
# endif
/* Pentium pro 16,16,16,32,64 */
/* Alpha 16,16,16,16.64 */
#define BN_MULL_SIZE_NORMAL (16) /* 32 */
#define BN_MUL_RECURSIVE_SIZE_NORMAL (16) /* 32 less than */
#define BN_SQR_RECURSIVE_SIZE_NORMAL (16) /* 32 */
#define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32) /* 32 */
#define BN_MONT_CTX_SET_SIZE_WORD (64) /* 32 */
# define BN_MULL_SIZE_NORMAL (16)/* 32 */
# define BN_MUL_RECURSIVE_SIZE_NORMAL (16)/* 32 less than */
# define BN_SQR_RECURSIVE_SIZE_NORMAL (16)/* 32 */
# define BN_MUL_LOW_RECURSIVE_SIZE_NORMAL (32)/* 32 */
# define BN_MONT_CTX_SET_SIZE_WORD (64)/* 32 */
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
/*
* 2011-02-22 SMS. In various places, a size_t variable or a type cast to
* size_t was used to perform integer-only operations on pointers. This
* failed on VMS with 64-bit pointers (CC /POINTER_SIZE = 64) because size_t
* is still only 32 bits. What's needed in these cases is an integer type
* with the same size as a pointer, which size_t is not certain to be. The
* only fix here is VMS-specific.
*/
# if defined(OPENSSL_SYS_VMS)
# if __INITIAL_POINTER_SIZE == 64
# define PTR_SIZE_INT long long
# else /* __INITIAL_POINTER_SIZE == 64 */
# define PTR_SIZE_INT int
# endif /* __INITIAL_POINTER_SIZE == 64 [else] */
# elif !defined(PTR_SIZE_INT) /* defined(OPENSSL_SYS_VMS) */
# define PTR_SIZE_INT size_t
# endif /* defined(OPENSSL_SYS_VMS) [else] */
# if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_NO_INLINE_ASM) && !defined(PEDANTIC)
/*
* BN_UMULT_HIGH section.
*
@@ -232,257 +244,291 @@ extern "C" {
* exhibiting "native" performance in C. That's what BN_UMULT_HIGH
* macro is about:-)
*
* <appro@fy.chalmers.se>
* <appro@fy.chalmers.se>
*/
# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__DECC)
# include <c_asm.h>
# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
# elif defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("umulh %1,%2,%0" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("mulhdu %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
# elif (defined(__x86_64) || defined(__x86_64__)) && \
# if defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__DECC)
# include <c_asm.h>
# define BN_UMULT_HIGH(a,b) (BN_ULONG)asm("umulh %a0,%a1,%v0",(a),(b))
# elif defined(__GNUC__) && __GNUC__>=2
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("umulh %1,%2,%0" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
# elif defined(_ARCH_PPC) && defined(__64BIT__) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__) && __GNUC__>=2
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("mulhdu %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif /* compiler */
# elif (defined(__x86_64) || defined(__x86_64__)) && \
(defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
# if defined(__GNUC__)
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret,discard; \
asm ("mulq %3" \
: "=a"(discard),"=d"(ret) \
: "a"(a), "g"(b) \
: "cc"); \
ret; })
# define BN_UMULT_LOHI(low,high,a,b) \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(a),"g"(b) \
: "cc");
# endif
# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
# if defined(_MSC_VER) && _MSC_VER>=1400
unsigned __int64 __umulh (unsigned __int64 a,unsigned __int64 b);
unsigned __int64 _umul128 (unsigned __int64 a,unsigned __int64 b,
unsigned __int64 *h);
# pragma intrinsic(__umulh,_umul128)
# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
# endif
# endif /* cpu */
#endif /* OPENSSL_NO_ASM */
# if defined(__GNUC__) && __GNUC__>=2
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret,discard; \
asm ("mulq %3" \
: "=a"(discard),"=d"(ret) \
: "a"(a), "g"(b) \
: "cc"); \
ret; })
# define BN_UMULT_LOHI(low,high,a,b) \
asm ("mulq %3" \
: "=a"(low),"=d"(high) \
: "a"(a),"g"(b) \
: "cc");
# endif
# elif (defined(_M_AMD64) || defined(_M_X64)) && defined(SIXTY_FOUR_BIT)
# if defined(_MSC_VER) && _MSC_VER>=1400
unsigned __int64 __umulh(unsigned __int64 a, unsigned __int64 b);
unsigned __int64 _umul128(unsigned __int64 a, unsigned __int64 b,
unsigned __int64 *h);
# pragma intrinsic(__umulh,_umul128)
# define BN_UMULT_HIGH(a,b) __umulh((a),(b))
# define BN_UMULT_LOHI(low,high,a,b) ((low)=_umul128((a),(b),&(high)))
# endif
# elif defined(__mips) && (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
# if defined(__GNUC__) && __GNUC__>=2
# if __GNUC__>4 || (__GNUC__>=4 && __GNUC_MINOR__>=4)
/* "h" constraint is no more since 4.4 */
# define BN_UMULT_HIGH(a,b) (((__uint128_t)(a)*(b))>>64)
# define BN_UMULT_LOHI(low,high,a,b) ({ \
__uint128_t ret=(__uint128_t)(a)*(b); \
(high)=ret>>64; (low)=ret; })
# else
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("dmultu %1,%2" \
: "=h"(ret) \
: "r"(a), "r"(b) : "l"); \
ret; })
# define BN_UMULT_LOHI(low,high,a,b)\
asm ("dmultu %2,%3" \
: "=l"(low),"=h"(high) \
: "r"(a), "r"(b));
# endif
# endif
# elif defined(__aarch64__) && defined(SIXTY_FOUR_BIT_LONG)
# if defined(__GNUC__) && __GNUC__>=2
# define BN_UMULT_HIGH(a,b) ({ \
register BN_ULONG ret; \
asm ("umulh %0,%1,%2" \
: "=r"(ret) \
: "r"(a), "r"(b)); \
ret; })
# endif
# endif /* cpu */
# endif /* OPENSSL_NO_ASM */
/*************************************************************
* Using the long long type
*/
#define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
#define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
# define Lw(t) (((BN_ULONG)(t))&BN_MASK2)
# define Hw(t) (((BN_ULONG)((t)>>BN_BITS2))&BN_MASK2)
#ifdef BN_DEBUG_RAND
#define bn_clear_top2max(a) \
{ \
int ind = (a)->dmax - (a)->top; \
BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
for (; ind != 0; ind--) \
*(++ftl) = 0x0; \
}
#else
#define bn_clear_top2max(a)
#endif
# ifdef BN_DEBUG_RAND
# define bn_clear_top2max(a) \
{ \
int ind = (a)->dmax - (a)->top; \
BN_ULONG *ftl = &(a)->d[(a)->top-1]; \
for (; ind != 0; ind--) \
*(++ftl) = 0x0; \
}
# else
# define bn_clear_top2max(a)
# endif
#ifdef BN_LLONG
#define mul_add(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (r) + (c); \
(r)= Lw(t); \
(c)= Hw(t); \
}
# ifdef BN_LLONG
# define mul_add(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (r) + (c); \
(r)= Lw(t); \
(c)= Hw(t); \
}
#define mul(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (c); \
(r)= Lw(t); \
(c)= Hw(t); \
}
# define mul(r,a,w,c) { \
BN_ULLONG t; \
t=(BN_ULLONG)w * (a) + (c); \
(r)= Lw(t); \
(c)= Hw(t); \
}
#define sqr(r0,r1,a) { \
BN_ULLONG t; \
t=(BN_ULLONG)(a)*(a); \
(r0)=Lw(t); \
(r1)=Hw(t); \
}
# define sqr(r0,r1,a) { \
BN_ULLONG t; \
t=(BN_ULLONG)(a)*(a); \
(r0)=Lw(t); \
(r1)=Hw(t); \
}
#elif defined(BN_UMULT_LOHI)
#define mul_add(r,a,w,c) { \
BN_ULONG high,low,ret,tmp=(a); \
ret = (r); \
BN_UMULT_LOHI(low,high,w,tmp); \
ret += (c); \
(c) = (ret<(c))?1:0; \
(c) += high; \
ret += low; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
# elif defined(BN_UMULT_LOHI)
# define mul_add(r,a,w,c) { \
BN_ULONG high,low,ret,tmp=(a); \
ret = (r); \
BN_UMULT_LOHI(low,high,w,tmp); \
ret += (c); \
(c) = (ret<(c))?1:0; \
(c) += high; \
ret += low; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define mul(r,a,w,c) { \
BN_ULONG high,low,ret,ta=(a); \
BN_UMULT_LOHI(low,high,w,ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
# define mul(r,a,w,c) { \
BN_ULONG high,low,ret,ta=(a); \
BN_UMULT_LOHI(low,high,w,ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define sqr(r0,r1,a) { \
BN_ULONG tmp=(a); \
BN_UMULT_LOHI(r0,r1,tmp,tmp); \
}
# define sqr(r0,r1,a) { \
BN_ULONG tmp=(a); \
BN_UMULT_LOHI(r0,r1,tmp,tmp); \
}
#elif defined(BN_UMULT_HIGH)
#define mul_add(r,a,w,c) { \
BN_ULONG high,low,ret,tmp=(a); \
ret = (r); \
high= BN_UMULT_HIGH(w,tmp); \
ret += (c); \
low = (w) * tmp; \
(c) = (ret<(c))?1:0; \
(c) += high; \
ret += low; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
# elif defined(BN_UMULT_HIGH)
# define mul_add(r,a,w,c) { \
BN_ULONG high,low,ret,tmp=(a); \
ret = (r); \
high= BN_UMULT_HIGH(w,tmp); \
ret += (c); \
low = (w) * tmp; \
(c) = (ret<(c))?1:0; \
(c) += high; \
ret += low; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define mul(r,a,w,c) { \
BN_ULONG high,low,ret,ta=(a); \
low = (w) * ta; \
high= BN_UMULT_HIGH(w,ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
# define mul(r,a,w,c) { \
BN_ULONG high,low,ret,ta=(a); \
low = (w) * ta; \
high= BN_UMULT_HIGH(w,ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret<low)?1:0; \
(r) = ret; \
}
#define sqr(r0,r1,a) { \
BN_ULONG tmp=(a); \
(r0) = tmp * tmp; \
(r1) = BN_UMULT_HIGH(tmp,tmp); \
}
# define sqr(r0,r1,a) { \
BN_ULONG tmp=(a); \
(r0) = tmp * tmp; \
(r1) = BN_UMULT_HIGH(tmp,tmp); \
}
#else
# else
/*************************************************************
* No long long type
*/
#define LBITS(a) ((a)&BN_MASK2l)
#define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
#define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
# define LBITS(a) ((a)&BN_MASK2l)
# define HBITS(a) (((a)>>BN_BITS4)&BN_MASK2l)
# define L2HBITS(a) (((a)<<BN_BITS4)&BN_MASK2)
#define LLBITS(a) ((a)&BN_MASKl)
#define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
#define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
# define LLBITS(a) ((a)&BN_MASKl)
# define LHBITS(a) (((a)>>BN_BITS2)&BN_MASKl)
# define LL2HBITS(a) ((BN_ULLONG)((a)&BN_MASKl)<<BN_BITS2)
#define mul64(l,h,bl,bh) \
{ \
BN_ULONG m,m1,lt,ht; \
# define mul64(l,h,bl,bh) \
{ \
BN_ULONG m,m1,lt,ht; \
\
lt=l; \
ht=h; \
m =(bh)*(lt); \
lt=(bl)*(lt); \
m1=(bl)*(ht); \
ht =(bh)*(ht); \
m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
ht+=HBITS(m); \
m1=L2HBITS(m); \
lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
(l)=lt; \
(h)=ht; \
}
lt=l; \
ht=h; \
m =(bh)*(lt); \
lt=(bl)*(lt); \
m1=(bl)*(ht); \
ht =(bh)*(ht); \
m=(m+m1)&BN_MASK2; if (m < m1) ht+=L2HBITS((BN_ULONG)1); \
ht+=HBITS(m); \
m1=L2HBITS(m); \
lt=(lt+m1)&BN_MASK2; if (lt < m1) ht++; \
(l)=lt; \
(h)=ht; \
}
#define sqr64(lo,ho,in) \
{ \
BN_ULONG l,h,m; \
# define sqr64(lo,ho,in) \
{ \
BN_ULONG l,h,m; \
\
h=(in); \
l=LBITS(h); \
h=HBITS(h); \
m =(l)*(h); \
l*=l; \
h*=h; \
h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
m =(m&BN_MASK2l)<<(BN_BITS4+1); \
l=(l+m)&BN_MASK2; if (l < m) h++; \
(lo)=l; \
(ho)=h; \
}
h=(in); \
l=LBITS(h); \
h=HBITS(h); \
m =(l)*(h); \
l*=l; \
h*=h; \
h+=(m&BN_MASK2h1)>>(BN_BITS4-1); \
m =(m&BN_MASK2l)<<(BN_BITS4+1); \
l=(l+m)&BN_MASK2; if (l < m) h++; \
(lo)=l; \
(ho)=h; \
}
#define mul_add(r,a,bl,bh,c) { \
BN_ULONG l,h; \
# define mul_add(r,a,bl,bh,c) { \
BN_ULONG l,h; \
\
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
\
/* non-multiply part */ \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=(r); \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l; \
}
/* non-multiply part */ \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=(r); \
l=(l+(c))&BN_MASK2; if (l < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l; \
}
#define mul(r,a,bl,bh,c) { \
BN_ULONG l,h; \
# define mul(r,a,bl,bh,c) { \
BN_ULONG l,h; \
\
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
h= (a); \
l=LBITS(h); \
h=HBITS(h); \
mul64(l,h,(bl),(bh)); \
\
/* non-multiply part */ \
l+=(c); if ((l&BN_MASK2) < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l&BN_MASK2; \
}
#endif /* !BN_LLONG */
/* non-multiply part */ \
l+=(c); if ((l&BN_MASK2) < (c)) h++; \
(c)=h&BN_MASK2; \
(r)=l&BN_MASK2; \
}
# endif /* !BN_LLONG */
void bn_mul_normal(BN_ULONG *r,BN_ULONG *a,int na,BN_ULONG *b,int nb);
void bn_mul_comba8(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
void bn_mul_comba4(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b);
# if defined(OPENSSL_DOING_MAKEDEPEND) && defined(OPENSSL_FIPS)
# undef bn_div_words
# endif
void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb);
void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b);
void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp);
void bn_sqr_comba8(BN_ULONG *r,const BN_ULONG *a);
void bn_sqr_comba4(BN_ULONG *r,const BN_ULONG *a);
int bn_cmp_words(const BN_ULONG *a,const BN_ULONG *b,int n);
int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
void bn_mul_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
int dna,int dnb,BN_ULONG *t);
void bn_mul_part_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,
int n,int tna,int tnb,BN_ULONG *t);
void bn_sqr_recursive(BN_ULONG *r,const BN_ULONG *a, int n2, BN_ULONG *t);
void bn_mul_low_normal(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b, int n);
void bn_mul_low_recursive(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,int n2,
BN_ULONG *t);
void bn_mul_high(BN_ULONG *r,BN_ULONG *a,BN_ULONG *b,BN_ULONG *l,int n2,
BN_ULONG *t);
void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a);
void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a);
int bn_cmp_words(const BN_ULONG *a, const BN_ULONG *b, int n);
int bn_cmp_part_words(const BN_ULONG *a, const BN_ULONG *b, int cl, int dl);
void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
int dna, int dnb, BN_ULONG *t);
void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b,
int n, int tna, int tnb, BN_ULONG *t);
void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t);
void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n);
void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
BN_ULONG *t);
void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
BN_ULONG *t);
BN_ULONG bn_add_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
int cl, int dl);
BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int cl, int dl);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
int cl, int dl);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, int num);
#ifdef __cplusplus
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -1,6 +1,8 @@
/* crypto/bn/bn_mod.c */
/* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
* for the OpenSSL project. */
/*
* Includes code written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
* for the OpenSSL project.
*/
/* ====================================================================
* Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
*
@@ -9,7 +11,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -60,21 +62,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -89,10 +91,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -104,7 +106,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -114,188 +116,201 @@
#include "cryptlib.h"
#include "bn_lcl.h"
#if 0 /* now just a #define */
#if 0 /* now just a #define */
int BN_mod(BIGNUM *rem, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
{
return(BN_div(NULL,rem,m,d,ctx));
/* note that rem->neg == m->neg (unless the remainder is zero) */
}
{
return (BN_div(NULL, rem, m, d, ctx));
/* note that rem->neg == m->neg (unless the remainder is zero) */
}
#endif
int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx)
{
/* like BN_mod, but returns non-negative remainder
* (i.e., 0 <= r < |d| always holds) */
{
/*
* like BN_mod, but returns non-negative remainder (i.e., 0 <= r < |d|
* always holds)
*/
if (!(BN_mod(r,m,d,ctx)))
return 0;
if (!r->neg)
return 1;
/* now -|d| < r < 0, so we have to set r := r + |d| */
return (d->neg ? BN_sub : BN_add)(r, r, d);
if (!(BN_mod(r, m, d, ctx)))
return 0;
if (!r->neg)
return 1;
/* now -|d| < r < 0, so we have to set r := r + |d| */
return (d->neg ? BN_sub : BN_add) (r, r, d);
}
int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
BN_CTX *ctx)
{
if (!BN_add(r, a, b))
return 0;
return BN_nnmod(r, r, m, ctx);
}
int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
{
if (!BN_add(r, a, b)) return 0;
return BN_nnmod(r, r, m, ctx);
}
/*
* BN_mod_add variant that may be used if both a and b are non-negative and
* less than m
*/
int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m)
{
if (!BN_uadd(r, a, b))
return 0;
if (BN_ucmp(r, m) >= 0)
return BN_usub(r, r, m);
return 1;
}
int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
BN_CTX *ctx)
{
if (!BN_sub(r, a, b))
return 0;
return BN_nnmod(r, r, m, ctx);
}
/* BN_mod_add variant that may be used if both a and b are non-negative
* and less than m */
int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
{
if (!BN_uadd(r, a, b)) return 0;
if (BN_ucmp(r, m) >= 0)
return BN_usub(r, r, m);
return 1;
}
int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m, BN_CTX *ctx)
{
if (!BN_sub(r, a, b)) return 0;
return BN_nnmod(r, r, m, ctx);
}
/* BN_mod_sub variant that may be used if both a and b are non-negative
* and less than m */
int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m)
{
if (!BN_sub(r, a, b)) return 0;
if (r->neg)
return BN_add(r, r, m);
return 1;
}
/*
* BN_mod_sub variant that may be used if both a and b are non-negative and
* less than m
*/
int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m)
{
if (!BN_sub(r, a, b))
return 0;
if (r->neg)
return BN_add(r, r, m);
return 1;
}
/* slow but works */
int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
BN_CTX *ctx)
{
BIGNUM *t;
int ret=0;
BN_CTX *ctx)
{
BIGNUM *t;
int ret = 0;
bn_check_top(a);
bn_check_top(b);
bn_check_top(m);
BN_CTX_start(ctx);
if ((t = BN_CTX_get(ctx)) == NULL) goto err;
if (a == b)
{ if (!BN_sqr(t,a,ctx)) goto err; }
else
{ if (!BN_mul(t,a,b,ctx)) goto err; }
if (!BN_nnmod(r,t,m,ctx)) goto err;
bn_check_top(r);
ret=1;
err:
BN_CTX_end(ctx);
return(ret);
}
bn_check_top(a);
bn_check_top(b);
bn_check_top(m);
BN_CTX_start(ctx);
if ((t = BN_CTX_get(ctx)) == NULL)
goto err;
if (a == b) {
if (!BN_sqr(t, a, ctx))
goto err;
} else {
if (!BN_mul(t, a, b, ctx))
goto err;
}
if (!BN_nnmod(r, t, m, ctx))
goto err;
bn_check_top(r);
ret = 1;
err:
BN_CTX_end(ctx);
return (ret);
}
int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
{
if (!BN_sqr(r, a, ctx)) return 0;
/* r->neg == 0, thus we don't need BN_nnmod */
return BN_mod(r, r, m, ctx);
}
{
if (!BN_sqr(r, a, ctx))
return 0;
/* r->neg == 0, thus we don't need BN_nnmod */
return BN_mod(r, r, m, ctx);
}
int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx)
{
if (!BN_lshift1(r, a)) return 0;
bn_check_top(r);
return BN_nnmod(r, r, m, ctx);
}
{
if (!BN_lshift1(r, a))
return 0;
bn_check_top(r);
return BN_nnmod(r, r, m, ctx);
}
/* BN_mod_lshift1 variant that may be used if a is non-negative
* and less than m */
/*
* BN_mod_lshift1 variant that may be used if a is non-negative and less than
* m
*/
int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m)
{
if (!BN_lshift1(r, a)) return 0;
bn_check_top(r);
if (BN_cmp(r, m) >= 0)
return BN_sub(r, r, m);
return 1;
}
{
if (!BN_lshift1(r, a))
return 0;
bn_check_top(r);
if (BN_cmp(r, m) >= 0)
return BN_sub(r, r, m);
return 1;
}
int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
BN_CTX *ctx)
{
BIGNUM *abs_m = NULL;
int ret;
int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m, BN_CTX *ctx)
{
BIGNUM *abs_m = NULL;
int ret;
if (!BN_nnmod(r, a, m, ctx))
return 0;
if (!BN_nnmod(r, a, m, ctx)) return 0;
if (m->neg) {
abs_m = BN_dup(m);
if (abs_m == NULL)
return 0;
abs_m->neg = 0;
}
if (m->neg)
{
abs_m = BN_dup(m);
if (abs_m == NULL) return 0;
abs_m->neg = 0;
}
ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
bn_check_top(r);
ret = BN_mod_lshift_quick(r, r, n, (abs_m ? abs_m : m));
bn_check_top(r);
if (abs_m)
BN_free(abs_m);
return ret;
}
if (abs_m)
BN_free(abs_m);
return ret;
}
/* BN_mod_lshift variant that may be used if a is non-negative
* and less than m */
/*
* BN_mod_lshift variant that may be used if a is non-negative and less than
* m
*/
int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m)
{
if (r != a)
{
if (BN_copy(r, a) == NULL) return 0;
}
{
if (r != a) {
if (BN_copy(r, a) == NULL)
return 0;
}
while (n > 0)
{
int max_shift;
/* 0 < r < m */
max_shift = BN_num_bits(m) - BN_num_bits(r);
/* max_shift >= 0 */
while (n > 0) {
int max_shift;
if (max_shift < 0)
{
BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
return 0;
}
/* 0 < r < m */
max_shift = BN_num_bits(m) - BN_num_bits(r);
/* max_shift >= 0 */
if (max_shift > n)
max_shift = n;
if (max_shift < 0) {
BNerr(BN_F_BN_MOD_LSHIFT_QUICK, BN_R_INPUT_NOT_REDUCED);
return 0;
}
if (max_shift)
{
if (!BN_lshift(r, r, max_shift)) return 0;
n -= max_shift;
}
else
{
if (!BN_lshift1(r, r)) return 0;
--n;
}
if (max_shift > n)
max_shift = n;
/* BN_num_bits(r) <= BN_num_bits(m) */
if (max_shift) {
if (!BN_lshift(r, r, max_shift))
return 0;
n -= max_shift;
} else {
if (!BN_lshift1(r, r))
return 0;
--n;
}
if (BN_cmp(r, m) >= 0)
{
if (!BN_sub(r, r, m)) return 0;
}
}
bn_check_top(r);
return 1;
}
/* BN_num_bits(r) <= BN_num_bits(m) */
if (BN_cmp(r, m) >= 0) {
if (!BN_sub(r, r, m))
return 0;
}
}
bn_check_top(r);
return 1;
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -63,7 +63,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -120,448 +120,436 @@
#include "cryptlib.h"
#include "bn_lcl.h"
#define MONT_WORD /* use the faster word-based algorithm */
#define MONT_WORD /* use the faster word-based algorithm */
#ifdef MONT_WORD
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont);
#endif
int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_MONT_CTX *mont, BN_CTX *ctx)
{
BIGNUM *tmp;
int ret=0;
BN_MONT_CTX *mont, BN_CTX *ctx)
{
BIGNUM *tmp;
int ret = 0;
#if defined(OPENSSL_BN_ASM_MONT) && defined(MONT_WORD)
int num = mont->N.top;
int num = mont->N.top;
if (num>1 && a->top==num && b->top==num)
{
if (bn_wexpand(r,num) == NULL) return(0);
if (bn_mul_mont(r->d,a->d,b->d,mont->N.d,mont->n0,num))
{
r->neg = a->neg^b->neg;
r->top = num;
bn_correct_top(r);
return(1);
}
}
if (num > 1 && a->top == num && b->top == num) {
if (bn_wexpand(r, num) == NULL)
return (0);
if (bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
r->neg = a->neg ^ b->neg;
r->top = num;
bn_correct_top(r);
return (1);
}
}
#endif
BN_CTX_start(ctx);
tmp = BN_CTX_get(ctx);
if (tmp == NULL) goto err;
BN_CTX_start(ctx);
tmp = BN_CTX_get(ctx);
if (tmp == NULL)
goto err;
bn_check_top(tmp);
if (a == b)
{
if (!BN_sqr(tmp,a,ctx)) goto err;
}
else
{
if (!BN_mul(tmp,a,b,ctx)) goto err;
}
/* reduce from aRR to aR */
bn_check_top(tmp);
if (a == b) {
if (!BN_sqr(tmp, a, ctx))
goto err;
} else {
if (!BN_mul(tmp, a, b, ctx))
goto err;
}
/* reduce from aRR to aR */
#ifdef MONT_WORD
if (!BN_from_montgomery_word(r,tmp,mont)) goto err;
if (!BN_from_montgomery_word(r, tmp, mont))
goto err;
#else
if (!BN_from_montgomery(r,tmp,mont,ctx)) goto err;
if (!BN_from_montgomery(r, tmp, mont, ctx))
goto err;
#endif
bn_check_top(r);
ret=1;
err:
BN_CTX_end(ctx);
return(ret);
}
bn_check_top(r);
ret = 1;
err:
BN_CTX_end(ctx);
return (ret);
}
#ifdef MONT_WORD
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
{
BIGNUM *n;
BN_ULONG *ap,*np,*rp,n0,v,*nrp;
int al,nl,max,i,x,ri;
{
BIGNUM *n;
BN_ULONG *ap, *np, *rp, n0, v, carry;
int nl, max, i;
n= &(mont->N);
/* mont->ri is the size of mont->N in bits (rounded up
to the word size) */
al=ri=mont->ri/BN_BITS2;
n = &(mont->N);
nl = n->top;
if (nl == 0) {
ret->top = 0;
return (1);
}
nl=n->top;
if ((al == 0) || (nl == 0)) { ret->top=0; return(1); }
max = (2 * nl); /* carry is stored separately */
if (bn_wexpand(r, max) == NULL)
return (0);
max=(nl+al+1); /* allow for overflow (no?) XXX */
if (bn_wexpand(r,max) == NULL) return(0);
r->neg ^= n->neg;
np = n->d;
rp = r->d;
r->neg^=n->neg;
np=n->d;
rp=r->d;
nrp= &(r->d[nl]);
/* clear the top words of T */
# if 1
for (i = r->top; i < max; i++) /* memset? XXX */
rp[i] = 0;
# else
memset(&(rp[r->top]), 0, (max - r->top) * sizeof(BN_ULONG));
# endif
/* clear the top words of T */
#if 1
for (i=r->top; i<max; i++) /* memset? XXX */
r->d[i]=0;
#else
memset(&(r->d[r->top]),0,(max-r->top)*sizeof(BN_ULONG));
#endif
r->top = max;
n0 = mont->n0[0];
r->top=max;
n0=mont->n0[0];
# ifdef BN_COUNT
fprintf(stderr, "word BN_from_montgomery_word %d * %d\n", nl, nl);
# endif
for (carry = 0, i = 0; i < nl; i++, rp++) {
# ifdef __TANDEM
{
long long t1;
long long t2;
long long t3;
t1 = rp[0] * (n0 & 0177777);
t2 = 037777600000l;
t2 = n0 & t2;
t3 = rp[0] & 0177777;
t2 = (t3 * t2) & BN_MASK2;
t1 = t1 + t2;
v = bn_mul_add_words(rp, np, nl, (BN_ULONG)t1);
}
# else
v = bn_mul_add_words(rp, np, nl, (rp[0] * n0) & BN_MASK2);
# endif
v = (v + carry + rp[nl]) & BN_MASK2;
carry |= (v != rp[nl]);
carry &= (v <= rp[nl]);
rp[nl] = v;
}
#ifdef BN_COUNT
fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
#endif
for (i=0; i<nl; i++)
{
#ifdef __TANDEM
{
long long t1;
long long t2;
long long t3;
t1 = rp[0] * (n0 & 0177777);
t2 = 037777600000l;
t2 = n0 & t2;
t3 = rp[0] & 0177777;
t2 = (t3 * t2) & BN_MASK2;
t1 = t1 + t2;
v=bn_mul_add_words(rp,np,nl,(BN_ULONG) t1);
}
#else
v=bn_mul_add_words(rp,np,nl,(rp[0]*n0)&BN_MASK2);
#endif
nrp++;
rp++;
if (((nrp[-1]+=v)&BN_MASK2) >= v)
continue;
else
{
if (((++nrp[0])&BN_MASK2) != 0) continue;
if (((++nrp[1])&BN_MASK2) != 0) continue;
for (x=2; (((++nrp[x])&BN_MASK2) == 0); x++) ;
}
}
bn_correct_top(r);
if (bn_wexpand(ret, nl) == NULL)
return (0);
ret->top = nl;
ret->neg = r->neg;
/* mont->ri will be a multiple of the word size and below code
* is kind of BN_rshift(ret,r,mont->ri) equivalent */
if (r->top <= ri)
{
ret->top=0;
return(1);
}
al=r->top-ri;
rp = ret->d;
ap = &(r->d[nl]);
#define BRANCH_FREE 1
#if BRANCH_FREE
if (bn_wexpand(ret,ri) == NULL) return(0);
x=0-(((al-ri)>>(sizeof(al)*8-1))&1);
ret->top=x=(ri&~x)|(al&x); /* min(ri,al) */
ret->neg=r->neg;
# define BRANCH_FREE 1
# if BRANCH_FREE
{
BN_ULONG *nrp;
size_t m;
rp=ret->d;
ap=&(r->d[ri]);
v = bn_sub_words(rp, ap, np, nl) - carry;
/*
* if subtraction result is real, then trick unconditional memcpy
* below to perform in-place "refresh" instead of actual copy.
*/
m = (0 - (size_t)v);
nrp =
(BN_ULONG *)(((PTR_SIZE_INT) rp & ~m) | ((PTR_SIZE_INT) ap & m));
{
size_t m1,m2;
for (i = 0, nl -= 4; i < nl; i += 4) {
BN_ULONG t1, t2, t3, t4;
v=bn_sub_words(rp,ap,np,ri);
/* this ----------------^^ works even in al<ri case
* thanks to zealous zeroing of top of the vector in the
* beginning. */
t1 = nrp[i + 0];
t2 = nrp[i + 1];
t3 = nrp[i + 2];
ap[i + 0] = 0;
t4 = nrp[i + 3];
ap[i + 1] = 0;
rp[i + 0] = t1;
ap[i + 2] = 0;
rp[i + 1] = t2;
ap[i + 3] = 0;
rp[i + 2] = t3;
rp[i + 3] = t4;
}
for (nl += 4; i < nl; i++)
rp[i] = nrp[i], ap[i] = 0;
}
# else
if (bn_sub_words(rp, ap, np, nl) - carry)
memcpy(rp, ap, nl * sizeof(BN_ULONG));
# endif
bn_correct_top(r);
bn_correct_top(ret);
bn_check_top(ret);
/* if (al==ri && !v) || al>ri) nrp=rp; else nrp=ap; */
/* in other words if subtraction result is real, then
* trick unconditional memcpy below to perform in-place
* "refresh" instead of actual copy. */
m1=0-(size_t)(((al-ri)>>(sizeof(al)*8-1))&1); /* al<ri */
m2=0-(size_t)(((ri-al)>>(sizeof(al)*8-1))&1); /* al>ri */
m1|=m2; /* (al!=ri) */
m1|=(0-(size_t)v); /* (al!=ri || v) */
m1&=~m2; /* (al!=ri || v) && !al>ri */
nrp=(BN_ULONG *)(((size_t)rp&~m1)|((size_t)ap&m1));
}
/* 'i<ri' is chosen to eliminate dependency on input data, even
* though it results in redundant copy in al<ri case. */
for (i=0,ri-=4; i<ri; i+=4)
{
BN_ULONG t1,t2,t3,t4;
t1=nrp[i+0];
t2=nrp[i+1];
t3=nrp[i+2]; ap[i+0]=0;
t4=nrp[i+3]; ap[i+1]=0;
rp[i+0]=t1; ap[i+2]=0;
rp[i+1]=t2; ap[i+3]=0;
rp[i+2]=t3;
rp[i+3]=t4;
}
for (ri+=4; i<ri; i++)
rp[i]=nrp[i], ap[i]=0;
bn_correct_top(r);
bn_correct_top(ret);
#else
if (bn_wexpand(ret,al) == NULL) return(0);
ret->top=al;
ret->neg=r->neg;
rp=ret->d;
ap=&(r->d[ri]);
al-=4;
for (i=0; i<al; i+=4)
{
BN_ULONG t1,t2,t3,t4;
t1=ap[i+0];
t2=ap[i+1];
t3=ap[i+2];
t4=ap[i+3];
rp[i+0]=t1;
rp[i+1]=t2;
rp[i+2]=t3;
rp[i+3]=t4;
}
al+=4;
for (; i<al; i++)
rp[i]=ap[i];
if (BN_ucmp(ret, &(mont->N)) >= 0)
{
if (!BN_usub(ret,ret,&(mont->N))) return(0);
}
#endif
bn_check_top(ret);
return(1);
}
#endif /* MONT_WORD */
return (1);
}
#endif /* MONT_WORD */
int BN_from_montgomery(BIGNUM *ret, const BIGNUM *a, BN_MONT_CTX *mont,
BN_CTX *ctx)
{
int retn=0;
BN_CTX *ctx)
{
int retn = 0;
#ifdef MONT_WORD
BIGNUM *t;
BIGNUM *t;
BN_CTX_start(ctx);
if ((t = BN_CTX_get(ctx)) && BN_copy(t,a))
retn = BN_from_montgomery_word(ret,t,mont);
BN_CTX_end(ctx);
#else /* !MONT_WORD */
BIGNUM *t1,*t2;
BN_CTX_start(ctx);
if ((t = BN_CTX_get(ctx)) && BN_copy(t, a))
retn = BN_from_montgomery_word(ret, t, mont);
BN_CTX_end(ctx);
#else /* !MONT_WORD */
BIGNUM *t1, *t2;
BN_CTX_start(ctx);
t1 = BN_CTX_get(ctx);
t2 = BN_CTX_get(ctx);
if (t1 == NULL || t2 == NULL) goto err;
if (!BN_copy(t1,a)) goto err;
BN_mask_bits(t1,mont->ri);
BN_CTX_start(ctx);
t1 = BN_CTX_get(ctx);
t2 = BN_CTX_get(ctx);
if (t1 == NULL || t2 == NULL)
goto err;
if (!BN_mul(t2,t1,&mont->Ni,ctx)) goto err;
BN_mask_bits(t2,mont->ri);
if (!BN_copy(t1, a))
goto err;
BN_mask_bits(t1, mont->ri);
if (!BN_mul(t1,t2,&mont->N,ctx)) goto err;
if (!BN_add(t2,a,t1)) goto err;
if (!BN_rshift(ret,t2,mont->ri)) goto err;
if (!BN_mul(t2, t1, &mont->Ni, ctx))
goto err;
BN_mask_bits(t2, mont->ri);
if (BN_ucmp(ret, &(mont->N)) >= 0)
{
if (!BN_usub(ret,ret,&(mont->N))) goto err;
}
retn=1;
bn_check_top(ret);
if (!BN_mul(t1, t2, &mont->N, ctx))
goto err;
if (!BN_add(t2, a, t1))
goto err;
if (!BN_rshift(ret, t2, mont->ri))
goto err;
if (BN_ucmp(ret, &(mont->N)) >= 0) {
if (!BN_usub(ret, ret, &(mont->N)))
goto err;
}
retn = 1;
bn_check_top(ret);
err:
BN_CTX_end(ctx);
#endif /* MONT_WORD */
return(retn);
}
BN_CTX_end(ctx);
#endif /* MONT_WORD */
return (retn);
}
BN_MONT_CTX *BN_MONT_CTX_new(void)
{
BN_MONT_CTX *ret;
{
BN_MONT_CTX *ret;
if ((ret=(BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
return(NULL);
if ((ret = (BN_MONT_CTX *)OPENSSL_malloc(sizeof(BN_MONT_CTX))) == NULL)
return (NULL);
BN_MONT_CTX_init(ret);
ret->flags=BN_FLG_MALLOCED;
return(ret);
}
BN_MONT_CTX_init(ret);
ret->flags = BN_FLG_MALLOCED;
return (ret);
}
void BN_MONT_CTX_init(BN_MONT_CTX *ctx)
{
ctx->ri=0;
BN_init(&(ctx->RR));
BN_init(&(ctx->N));
BN_init(&(ctx->Ni));
ctx->n0[0] = ctx->n0[1] = 0;
ctx->flags=0;
}
{
ctx->ri = 0;
BN_init(&(ctx->RR));
BN_init(&(ctx->N));
BN_init(&(ctx->Ni));
ctx->n0[0] = ctx->n0[1] = 0;
ctx->flags = 0;
}
void BN_MONT_CTX_free(BN_MONT_CTX *mont)
{
if(mont == NULL)
return;
{
if (mont == NULL)
return;
BN_free(&(mont->RR));
BN_free(&(mont->N));
BN_free(&(mont->Ni));
if (mont->flags & BN_FLG_MALLOCED)
OPENSSL_free(mont);
}
BN_free(&(mont->RR));
BN_free(&(mont->N));
BN_free(&(mont->Ni));
if (mont->flags & BN_FLG_MALLOCED)
OPENSSL_free(mont);
}
int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
{
int ret = 0;
BIGNUM *Ri,*R;
{
int ret = 0;
BIGNUM *Ri, *R;
BN_CTX_start(ctx);
if((Ri = BN_CTX_get(ctx)) == NULL) goto err;
R= &(mont->RR); /* grab RR as a temp */
if (!BN_copy(&(mont->N),mod)) goto err; /* Set N */
mont->N.neg = 0;
BN_CTX_start(ctx);
if ((Ri = BN_CTX_get(ctx)) == NULL)
goto err;
R = &(mont->RR); /* grab RR as a temp */
if (!BN_copy(&(mont->N), mod))
goto err; /* Set N */
mont->N.neg = 0;
#ifdef MONT_WORD
{
BIGNUM tmod;
BN_ULONG buf[2];
{
BIGNUM tmod;
BN_ULONG buf[2];
BN_init(&tmod);
tmod.d=buf;
tmod.dmax=2;
tmod.neg=0;
BN_init(&tmod);
tmod.d = buf;
tmod.dmax = 2;
tmod.neg = 0;
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
mont->ri = (BN_num_bits(mod) + (BN_BITS2 - 1)) / BN_BITS2 * BN_BITS2;
#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
/* Only certain BN_BITS2<=32 platforms actually make use of
* n0[1], and we could use the #else case (with a shorter R
* value) for the others. However, currently only the assembler
* files do know which is which. */
# if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
/*
* Only certain BN_BITS2<=32 platforms actually make use of n0[1],
* and we could use the #else case (with a shorter R value) for the
* others. However, currently only the assembler files do know which
* is which.
*/
BN_zero(R);
if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
BN_zero(R);
if (!(BN_set_bit(R, 2 * BN_BITS2)))
goto err;
tmod.top=0;
if ((buf[0] = mod->d[0])) tmod.top=1;
if ((buf[1] = mod->top>1 ? mod->d[1] : 0)) tmod.top=2;
tmod.top = 0;
if ((buf[0] = mod->d[0]))
tmod.top = 1;
if ((buf[1] = mod->top > 1 ? mod->d[1] : 0))
tmod.top = 2;
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
if (!BN_is_zero(Ri))
{
if (!BN_sub_word(Ri,1)) goto err;
}
else /* if N mod word size == 1 */
{
if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
goto err;
/* Ri-- (mod double word size) */
Ri->neg=0;
Ri->d[0]=BN_MASK2;
Ri->d[1]=BN_MASK2;
Ri->top=2;
}
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only couple of least significant words: */
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
#else
BN_zero(R);
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
goto err;
if (!BN_lshift(Ri, Ri, 2 * BN_BITS2))
goto err; /* R*Ri */
if (!BN_is_zero(Ri)) {
if (!BN_sub_word(Ri, 1))
goto err;
} else { /* if N mod word size == 1 */
buf[0]=mod->d[0]; /* tmod = N mod word size */
buf[1]=0;
tmod.top = buf[0] != 0 ? 1 : 0;
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
if (!BN_lshift(Ri,Ri,BN_BITS2)) goto err; /* R*Ri */
if (!BN_is_zero(Ri))
{
if (!BN_sub_word(Ri,1)) goto err;
}
else /* if N mod word size == 1 */
{
if (!BN_set_word(Ri,BN_MASK2)) goto err; /* Ri-- (mod word size) */
}
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only least significant word: */
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = 0;
#endif
}
#else /* !MONT_WORD */
{ /* bignum version */
mont->ri=BN_num_bits(&mont->N);
BN_zero(R);
if (!BN_set_bit(R,mont->ri)) goto err; /* R = 2^ri */
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&mont->N,ctx)) == NULL)
goto err;
if (!BN_lshift(Ri,Ri,mont->ri)) goto err; /* R*Ri */
if (!BN_sub_word(Ri,1)) goto err;
/* Ni = (R*Ri-1) / N */
if (!BN_div(&(mont->Ni),NULL,Ri,&mont->N,ctx)) goto err;
}
if (bn_expand(Ri, (int)sizeof(BN_ULONG) * 2) == NULL)
goto err;
/* Ri-- (mod double word size) */
Ri->neg = 0;
Ri->d[0] = BN_MASK2;
Ri->d[1] = BN_MASK2;
Ri->top = 2;
}
if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
goto err;
/*
* Ni = (R*Ri-1)/N, keep only couple of least significant words:
*/
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
# else
BN_zero(R);
if (!(BN_set_bit(R, BN_BITS2)))
goto err; /* R */
buf[0] = mod->d[0]; /* tmod = N mod word size */
buf[1] = 0;
tmod.top = buf[0] != 0 ? 1 : 0;
/* Ri = R^-1 mod N */
if ((BN_mod_inverse(Ri, R, &tmod, ctx)) == NULL)
goto err;
if (!BN_lshift(Ri, Ri, BN_BITS2))
goto err; /* R*Ri */
if (!BN_is_zero(Ri)) {
if (!BN_sub_word(Ri, 1))
goto err;
} else { /* if N mod word size == 1 */
if (!BN_set_word(Ri, BN_MASK2))
goto err; /* Ri-- (mod word size) */
}
if (!BN_div(Ri, NULL, Ri, &tmod, ctx))
goto err;
/*
* Ni = (R*Ri-1)/N, keep only least significant word:
*/
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = 0;
# endif
}
#else /* !MONT_WORD */
{ /* bignum version */
mont->ri = BN_num_bits(&mont->N);
BN_zero(R);
if (!BN_set_bit(R, mont->ri))
goto err; /* R = 2^ri */
/* Ri = R^-1 mod N */
if ((BN_mod_inverse(Ri, R, &mont->N, ctx)) == NULL)
goto err;
if (!BN_lshift(Ri, Ri, mont->ri))
goto err; /* R*Ri */
if (!BN_sub_word(Ri, 1))
goto err;
/*
* Ni = (R*Ri-1) / N
*/
if (!BN_div(&(mont->Ni), NULL, Ri, &mont->N, ctx))
goto err;
}
#endif
/* setup RR for conversions */
BN_zero(&(mont->RR));
if (!BN_set_bit(&(mont->RR),mont->ri*2)) goto err;
if (!BN_mod(&(mont->RR),&(mont->RR),&(mont->N),ctx)) goto err;
/* setup RR for conversions */
BN_zero(&(mont->RR));
if (!BN_set_bit(&(mont->RR), mont->ri * 2))
goto err;
if (!BN_mod(&(mont->RR), &(mont->RR), &(mont->N), ctx))
goto err;
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
{
if (to == from) return(to);
{
if (to == from)
return (to);
if (!BN_copy(&(to->RR),&(from->RR))) return NULL;
if (!BN_copy(&(to->N),&(from->N))) return NULL;
if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
to->ri=from->ri;
to->n0[0]=from->n0[0];
to->n0[1]=from->n0[1];
return(to);
}
if (!BN_copy(&(to->RR), &(from->RR)))
return NULL;
if (!BN_copy(&(to->N), &(from->N)))
return NULL;
if (!BN_copy(&(to->Ni), &(from->Ni)))
return NULL;
to->ri = from->ri;
to->n0[0] = from->n0[0];
to->n0[1] = from->n0[1];
return (to);
}
BN_MONT_CTX *BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, int lock,
const BIGNUM *mod, BN_CTX *ctx)
{
int got_write_lock = 0;
BN_MONT_CTX *ret;
const BIGNUM *mod, BN_CTX *ctx)
{
BN_MONT_CTX *ret;
CRYPTO_r_lock(lock);
if (!*pmont)
{
CRYPTO_r_unlock(lock);
CRYPTO_w_lock(lock);
got_write_lock = 1;
CRYPTO_r_lock(lock);
ret = *pmont;
CRYPTO_r_unlock(lock);
if (ret)
return ret;
if (!*pmont)
{
ret = BN_MONT_CTX_new();
if (ret && !BN_MONT_CTX_set(ret, mod, ctx))
BN_MONT_CTX_free(ret);
else
*pmont = ret;
}
}
ret = *pmont;
if (got_write_lock)
CRYPTO_w_unlock(lock);
else
CRYPTO_r_unlock(lock);
return ret;
}
/*
* We don't want to serialise globally while doing our lazy-init math in
* BN_MONT_CTX_set. That punishes threads that are doing independent
* things. Instead, punish the case where more than one thread tries to
* lazy-init the same 'pmont', by having each do the lazy-init math work
* independently and only use the one from the thread that wins the race
* (the losers throw away the work they've done).
*/
ret = BN_MONT_CTX_new();
if (!ret)
return NULL;
if (!BN_MONT_CTX_set(ret, mod, ctx)) {
BN_MONT_CTX_free(ret);
return NULL;
}
/* The locked compare-and-set, after the local work is done. */
CRYPTO_w_lock(lock);
if (*pmont) {
BN_MONT_CTX_free(ret);
ret = *pmont;
} else
*pmont = ret;
CRYPTO_w_unlock(lock);
return ret;
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -61,70 +61,68 @@
#include "bn_lcl.h"
int BN_bn2mpi(const BIGNUM *a, unsigned char *d)
{
int bits;
int num=0;
int ext=0;
long l;
{
int bits;
int num = 0;
int ext = 0;
long l;
bits=BN_num_bits(a);
num=(bits+7)/8;
if (bits > 0)
{
ext=((bits & 0x07) == 0);
}
if (d == NULL)
return(num+4+ext);
bits = BN_num_bits(a);
num = (bits + 7) / 8;
if (bits > 0) {
ext = ((bits & 0x07) == 0);
}
if (d == NULL)
return (num + 4 + ext);
l=num+ext;
d[0]=(unsigned char)(l>>24)&0xff;
d[1]=(unsigned char)(l>>16)&0xff;
d[2]=(unsigned char)(l>> 8)&0xff;
d[3]=(unsigned char)(l )&0xff;
if (ext) d[4]=0;
num=BN_bn2bin(a,&(d[4+ext]));
if (a->neg)
d[4]|=0x80;
return(num+4+ext);
}
l = num + ext;
d[0] = (unsigned char)(l >> 24) & 0xff;
d[1] = (unsigned char)(l >> 16) & 0xff;
d[2] = (unsigned char)(l >> 8) & 0xff;
d[3] = (unsigned char)(l) & 0xff;
if (ext)
d[4] = 0;
num = BN_bn2bin(a, &(d[4 + ext]));
if (a->neg)
d[4] |= 0x80;
return (num + 4 + ext);
}
BIGNUM *BN_mpi2bn(const unsigned char *d, int n, BIGNUM *a)
{
long len;
int neg=0;
{
long len;
int neg = 0;
if (n < 4)
{
BNerr(BN_F_BN_MPI2BN,BN_R_INVALID_LENGTH);
return(NULL);
}
len=((long)d[0]<<24)|((long)d[1]<<16)|((int)d[2]<<8)|(int)d[3];
if ((len+4) != n)
{
BNerr(BN_F_BN_MPI2BN,BN_R_ENCODING_ERROR);
return(NULL);
}
if (n < 4) {
BNerr(BN_F_BN_MPI2BN, BN_R_INVALID_LENGTH);
return (NULL);
}
len = ((long)d[0] << 24) | ((long)d[1] << 16) | ((int)d[2] << 8) | (int)
d[3];
if ((len + 4) != n) {
BNerr(BN_F_BN_MPI2BN, BN_R_ENCODING_ERROR);
return (NULL);
}
if (a == NULL) a=BN_new();
if (a == NULL) return(NULL);
if (len == 0)
{
a->neg=0;
a->top=0;
return(a);
}
d+=4;
if ((*d) & 0x80)
neg=1;
if (BN_bin2bn(d,(int)len,a) == NULL)
return(NULL);
a->neg=neg;
if (neg)
{
BN_clear_bit(a,BN_num_bits(a)-1);
}
bn_check_top(a);
return(a);
}
if (a == NULL)
a = BN_new();
if (a == NULL)
return (NULL);
if (len == 0) {
a->neg = 0;
a->top = 0;
return (a);
}
d += 4;
if ((*d) & 0x80)
neg = 1;
if (BN_bin2bn(d, (int)len, a) == NULL)
return (NULL);
a->neg = neg;
if (neg) {
BN_clear_bit(a, BN_num_bits(a) - 1);
}
bn_check_top(a);
return (a);
}

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -63,7 +63,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -115,380 +115,401 @@
#include "bn_lcl.h"
#include <openssl/rand.h>
/* NB: these functions have been "upgraded", the deprecated versions (which are
* compatibility wrappers using these functions) are in bn_depr.c.
* - Geoff
/*
* NB: these functions have been "upgraded", the deprecated versions (which
* are compatibility wrappers using these functions) are in bn_depr.c. -
* Geoff
*/
/* The quick sieve algorithm approach to weeding out primes is
* Philip Zimmermann's, as implemented in PGP. I have had a read of
* his comments and implemented my own version.
/*
* The quick sieve algorithm approach to weeding out primes is Philip
* Zimmermann's, as implemented in PGP. I have had a read of his comments
* and implemented my own version.
*/
#include "bn_prime.h"
static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont);
const BIGNUM *a1_odd, int k, BN_CTX *ctx,
BN_MONT_CTX *mont);
static int probable_prime(BIGNUM *rnd, int bits);
static int probable_prime_dh(BIGNUM *rnd, int bits,
const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
static int probable_prime_dh_safe(BIGNUM *rnd, int bits,
const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx);
const BIGNUM *add, const BIGNUM *rem,
BN_CTX *ctx);
static int probable_prime_dh_safe(BIGNUM *rnd, int bits, const BIGNUM *add,
const BIGNUM *rem, BN_CTX *ctx);
int BN_GENCB_call(BN_GENCB *cb, int a, int b)
{
/* No callback means continue */
if(!cb) return 1;
switch(cb->ver)
{
case 1:
/* Deprecated-style callbacks */
if(!cb->cb.cb_1)
return 1;
cb->cb.cb_1(a, b, cb->arg);
return 1;
case 2:
/* New-style callbacks */
return cb->cb.cb_2(a, b, cb);
default:
break;
}
/* Unrecognised callback type */
return 0;
}
{
/* No callback means continue */
if (!cb)
return 1;
switch (cb->ver) {
case 1:
/* Deprecated-style callbacks */
if (!cb->cb.cb_1)
return 1;
cb->cb.cb_1(a, b, cb->arg);
return 1;
case 2:
/* New-style callbacks */
return cb->cb.cb_2(a, b, cb);
default:
break;
}
/* Unrecognised callback type */
return 0;
}
int BN_generate_prime_ex(BIGNUM *ret, int bits, int safe,
const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
{
BIGNUM *t;
int found=0;
int i,j,c1=0;
BN_CTX *ctx;
int checks = BN_prime_checks_for_size(bits);
const BIGNUM *add, const BIGNUM *rem, BN_GENCB *cb)
{
BIGNUM *t;
int found = 0;
int i, j, c1 = 0;
BN_CTX *ctx;
int checks = BN_prime_checks_for_size(bits);
ctx=BN_CTX_new();
if (ctx == NULL) goto err;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
if(!t) goto err;
loop:
/* make a random number and set the top and bottom bits */
if (add == NULL)
{
if (!probable_prime(ret,bits)) goto err;
}
else
{
if (safe)
{
if (!probable_prime_dh_safe(ret,bits,add,rem,ctx))
goto err;
}
else
{
if (!probable_prime_dh(ret,bits,add,rem,ctx))
goto err;
}
}
/* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */
if(!BN_GENCB_call(cb, 0, c1++))
/* aborted */
goto err;
ctx = BN_CTX_new();
if (ctx == NULL)
goto err;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
if (!t)
goto err;
loop:
/* make a random number and set the top and bottom bits */
if (add == NULL) {
if (!probable_prime(ret, bits))
goto err;
} else {
if (safe) {
if (!probable_prime_dh_safe(ret, bits, add, rem, ctx))
goto err;
} else {
if (!probable_prime_dh(ret, bits, add, rem, ctx))
goto err;
}
}
/* if (BN_mod_word(ret,(BN_ULONG)3) == 1) goto loop; */
if (!BN_GENCB_call(cb, 0, c1++))
/* aborted */
goto err;
if (!safe)
{
i=BN_is_prime_fasttest_ex(ret,checks,ctx,0,cb);
if (i == -1) goto err;
if (i == 0) goto loop;
}
else
{
/* for "safe prime" generation,
* check that (p-1)/2 is prime.
* Since a prime is odd, We just
* need to divide by 2 */
if (!BN_rshift1(t,ret)) goto err;
if (!safe) {
i = BN_is_prime_fasttest_ex(ret, checks, ctx, 0, cb);
if (i == -1)
goto err;
if (i == 0)
goto loop;
} else {
/*
* for "safe prime" generation, check that (p-1)/2 is prime. Since a
* prime is odd, We just need to divide by 2
*/
if (!BN_rshift1(t, ret))
goto err;
for (i=0; i<checks; i++)
{
j=BN_is_prime_fasttest_ex(ret,1,ctx,0,cb);
if (j == -1) goto err;
if (j == 0) goto loop;
for (i = 0; i < checks; i++) {
j = BN_is_prime_fasttest_ex(ret, 1, ctx, 0, cb);
if (j == -1)
goto err;
if (j == 0)
goto loop;
j=BN_is_prime_fasttest_ex(t,1,ctx,0,cb);
if (j == -1) goto err;
if (j == 0) goto loop;
j = BN_is_prime_fasttest_ex(t, 1, ctx, 0, cb);
if (j == -1)
goto err;
if (j == 0)
goto loop;
if(!BN_GENCB_call(cb, 2, c1-1))
goto err;
/* We have a safe prime test pass */
}
}
/* we have a prime :-) */
found = 1;
err:
if (ctx != NULL)
{
BN_CTX_end(ctx);
BN_CTX_free(ctx);
}
bn_check_top(ret);
return found;
}
if (!BN_GENCB_call(cb, 2, c1 - 1))
goto err;
/* We have a safe prime test pass */
}
}
/* we have a prime :-) */
found = 1;
err:
if (ctx != NULL) {
BN_CTX_end(ctx);
BN_CTX_free(ctx);
}
bn_check_top(ret);
return found;
}
int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed, BN_GENCB *cb)
{
return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
}
int BN_is_prime_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
BN_GENCB *cb)
{
return BN_is_prime_fasttest_ex(a, checks, ctx_passed, 0, cb);
}
int BN_is_prime_fasttest_ex(const BIGNUM *a, int checks, BN_CTX *ctx_passed,
int do_trial_division, BN_GENCB *cb)
{
int i, j, ret = -1;
int k;
BN_CTX *ctx = NULL;
BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
BN_MONT_CTX *mont = NULL;
const BIGNUM *A = NULL;
int do_trial_division, BN_GENCB *cb)
{
int i, j, ret = -1;
int k;
BN_CTX *ctx = NULL;
BIGNUM *A1, *A1_odd, *check; /* taken from ctx */
BN_MONT_CTX *mont = NULL;
const BIGNUM *A = NULL;
if (BN_cmp(a, BN_value_one()) <= 0)
return 0;
if (checks == BN_prime_checks)
checks = BN_prime_checks_for_size(BN_num_bits(a));
if (BN_cmp(a, BN_value_one()) <= 0)
return 0;
/* first look for small factors */
if (!BN_is_odd(a))
/* a is even => a is prime if and only if a == 2 */
return BN_is_word(a, 2);
if (do_trial_division)
{
for (i = 1; i < NUMPRIMES; i++)
if (BN_mod_word(a, primes[i]) == 0)
return 0;
if(!BN_GENCB_call(cb, 1, -1))
goto err;
}
if (checks == BN_prime_checks)
checks = BN_prime_checks_for_size(BN_num_bits(a));
if (ctx_passed != NULL)
ctx = ctx_passed;
else
if ((ctx=BN_CTX_new()) == NULL)
goto err;
BN_CTX_start(ctx);
/* first look for small factors */
if (!BN_is_odd(a))
/* a is even => a is prime if and only if a == 2 */
return BN_is_word(a, 2);
if (do_trial_division) {
for (i = 1; i < NUMPRIMES; i++)
if (BN_mod_word(a, primes[i]) == 0)
return 0;
if (!BN_GENCB_call(cb, 1, -1))
goto err;
}
/* A := abs(a) */
if (a->neg)
{
BIGNUM *t;
if ((t = BN_CTX_get(ctx)) == NULL) goto err;
BN_copy(t, a);
t->neg = 0;
A = t;
}
else
A = a;
A1 = BN_CTX_get(ctx);
A1_odd = BN_CTX_get(ctx);
check = BN_CTX_get(ctx);
if (check == NULL) goto err;
if (ctx_passed != NULL)
ctx = ctx_passed;
else if ((ctx = BN_CTX_new()) == NULL)
goto err;
BN_CTX_start(ctx);
/* compute A1 := A - 1 */
if (!BN_copy(A1, A))
goto err;
if (!BN_sub_word(A1, 1))
goto err;
if (BN_is_zero(A1))
{
ret = 0;
goto err;
}
/* A := abs(a) */
if (a->neg) {
BIGNUM *t;
if ((t = BN_CTX_get(ctx)) == NULL)
goto err;
BN_copy(t, a);
t->neg = 0;
A = t;
} else
A = a;
A1 = BN_CTX_get(ctx);
A1_odd = BN_CTX_get(ctx);
check = BN_CTX_get(ctx);
if (check == NULL)
goto err;
/* write A1 as A1_odd * 2^k */
k = 1;
while (!BN_is_bit_set(A1, k))
k++;
if (!BN_rshift(A1_odd, A1, k))
goto err;
/* compute A1 := A - 1 */
if (!BN_copy(A1, A))
goto err;
if (!BN_sub_word(A1, 1))
goto err;
if (BN_is_zero(A1)) {
ret = 0;
goto err;
}
/* Montgomery setup for computations mod A */
mont = BN_MONT_CTX_new();
if (mont == NULL)
goto err;
if (!BN_MONT_CTX_set(mont, A, ctx))
goto err;
for (i = 0; i < checks; i++)
{
if (!BN_pseudo_rand_range(check, A1))
goto err;
if (!BN_add_word(check, 1))
goto err;
/* now 1 <= check < A */
/* write A1 as A1_odd * 2^k */
k = 1;
while (!BN_is_bit_set(A1, k))
k++;
if (!BN_rshift(A1_odd, A1, k))
goto err;
j = witness(check, A, A1, A1_odd, k, ctx, mont);
if (j == -1) goto err;
if (j)
{
ret=0;
goto err;
}
if(!BN_GENCB_call(cb, 1, i))
goto err;
}
ret=1;
err:
if (ctx != NULL)
{
BN_CTX_end(ctx);
if (ctx_passed == NULL)
BN_CTX_free(ctx);
}
if (mont != NULL)
BN_MONT_CTX_free(mont);
/* Montgomery setup for computations mod A */
mont = BN_MONT_CTX_new();
if (mont == NULL)
goto err;
if (!BN_MONT_CTX_set(mont, A, ctx))
goto err;
return(ret);
}
for (i = 0; i < checks; i++) {
if (!BN_pseudo_rand_range(check, A1))
goto err;
if (!BN_add_word(check, 1))
goto err;
/* now 1 <= check < A */
j = witness(check, A, A1, A1_odd, k, ctx, mont);
if (j == -1)
goto err;
if (j) {
ret = 0;
goto err;
}
if (!BN_GENCB_call(cb, 1, i))
goto err;
}
ret = 1;
err:
if (ctx != NULL) {
BN_CTX_end(ctx);
if (ctx_passed == NULL)
BN_CTX_free(ctx);
}
if (mont != NULL)
BN_MONT_CTX_free(mont);
return (ret);
}
static int witness(BIGNUM *w, const BIGNUM *a, const BIGNUM *a1,
const BIGNUM *a1_odd, int k, BN_CTX *ctx, BN_MONT_CTX *mont)
{
if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
return -1;
if (BN_is_one(w))
return 0; /* probably prime */
if (BN_cmp(w, a1) == 0)
return 0; /* w == -1 (mod a), 'a' is probably prime */
while (--k)
{
if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
return -1;
if (BN_is_one(w))
return 1; /* 'a' is composite, otherwise a previous 'w' would
* have been == -1 (mod 'a') */
if (BN_cmp(w, a1) == 0)
return 0; /* w == -1 (mod a), 'a' is probably prime */
}
/* If we get here, 'w' is the (a-1)/2-th power of the original 'w',
* and it is neither -1 nor +1 -- so 'a' cannot be prime */
bn_check_top(w);
return 1;
}
const BIGNUM *a1_odd, int k, BN_CTX *ctx,
BN_MONT_CTX *mont)
{
if (!BN_mod_exp_mont(w, w, a1_odd, a, ctx, mont)) /* w := w^a1_odd mod a */
return -1;
if (BN_is_one(w))
return 0; /* probably prime */
if (BN_cmp(w, a1) == 0)
return 0; /* w == -1 (mod a), 'a' is probably prime */
while (--k) {
if (!BN_mod_mul(w, w, w, a, ctx)) /* w := w^2 mod a */
return -1;
if (BN_is_one(w))
return 1; /* 'a' is composite, otherwise a previous 'w'
* would have been == -1 (mod 'a') */
if (BN_cmp(w, a1) == 0)
return 0; /* w == -1 (mod a), 'a' is probably prime */
}
/*
* If we get here, 'w' is the (a-1)/2-th power of the original 'w', and
* it is neither -1 nor +1 -- so 'a' cannot be prime
*/
bn_check_top(w);
return 1;
}
static int probable_prime(BIGNUM *rnd, int bits)
{
int i;
prime_t mods[NUMPRIMES];
BN_ULONG delta,maxdelta;
{
int i;
prime_t mods[NUMPRIMES];
BN_ULONG delta, maxdelta;
again:
if (!BN_rand(rnd,bits,1,1)) return(0);
/* we now have a random number 'rand' to test. */
for (i=1; i<NUMPRIMES; i++)
mods[i]=(prime_t)BN_mod_word(rnd,(BN_ULONG)primes[i]);
maxdelta=BN_MASK2 - primes[NUMPRIMES-1];
delta=0;
loop: for (i=1; i<NUMPRIMES; i++)
{
/* check that rnd is not a prime and also
* that gcd(rnd-1,primes) == 1 (except for 2) */
if (((mods[i]+delta)%primes[i]) <= 1)
{
delta+=2;
if (delta > maxdelta) goto again;
goto loop;
}
}
if (!BN_add_word(rnd,delta)) return(0);
bn_check_top(rnd);
return(1);
}
again:
if (!BN_rand(rnd, bits, 1, 1))
return (0);
/* we now have a random number 'rand' to test. */
for (i = 1; i < NUMPRIMES; i++)
mods[i] = (prime_t) BN_mod_word(rnd, (BN_ULONG)primes[i]);
maxdelta = BN_MASK2 - primes[NUMPRIMES - 1];
delta = 0;
loop:for (i = 1; i < NUMPRIMES; i++) {
/*
* check that rnd is not a prime and also that gcd(rnd-1,primes) == 1
* (except for 2)
*/
if (((mods[i] + delta) % primes[i]) <= 1) {
delta += 2;
if (delta > maxdelta)
goto again;
goto loop;
}
}
if (!BN_add_word(rnd, delta))
return (0);
bn_check_top(rnd);
return (1);
}
static int probable_prime_dh(BIGNUM *rnd, int bits,
const BIGNUM *add, const BIGNUM *rem, BN_CTX *ctx)
{
int i,ret=0;
BIGNUM *t1;
const BIGNUM *add, const BIGNUM *rem,
BN_CTX *ctx)
{
int i, ret = 0;
BIGNUM *t1;
BN_CTX_start(ctx);
if ((t1 = BN_CTX_get(ctx)) == NULL) goto err;
BN_CTX_start(ctx);
if ((t1 = BN_CTX_get(ctx)) == NULL)
goto err;
if (!BN_rand(rnd,bits,0,1)) goto err;
if (!BN_rand(rnd, bits, 0, 1))
goto err;
/* we need ((rnd-rem) % add) == 0 */
/* we need ((rnd-rem) % add) == 0 */
if (!BN_mod(t1,rnd,add,ctx)) goto err;
if (!BN_sub(rnd,rnd,t1)) goto err;
if (rem == NULL)
{ if (!BN_add_word(rnd,1)) goto err; }
else
{ if (!BN_add(rnd,rnd,rem)) goto err; }
if (!BN_mod(t1, rnd, add, ctx))
goto err;
if (!BN_sub(rnd, rnd, t1))
goto err;
if (rem == NULL) {
if (!BN_add_word(rnd, 1))
goto err;
} else {
if (!BN_add(rnd, rnd, rem))
goto err;
}
/* we now have a random number 'rand' to test. */
/* we now have a random number 'rand' to test. */
loop: for (i=1; i<NUMPRIMES; i++)
{
/* check that rnd is a prime */
if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
{
if (!BN_add(rnd,rnd,add)) goto err;
goto loop;
}
}
ret=1;
err:
BN_CTX_end(ctx);
bn_check_top(rnd);
return(ret);
}
loop:for (i = 1; i < NUMPRIMES; i++) {
/* check that rnd is a prime */
if (BN_mod_word(rnd, (BN_ULONG)primes[i]) <= 1) {
if (!BN_add(rnd, rnd, add))
goto err;
goto loop;
}
}
ret = 1;
err:
BN_CTX_end(ctx);
bn_check_top(rnd);
return (ret);
}
static int probable_prime_dh_safe(BIGNUM *p, int bits, const BIGNUM *padd,
const BIGNUM *rem, BN_CTX *ctx)
{
int i,ret=0;
BIGNUM *t1,*qadd,*q;
const BIGNUM *rem, BN_CTX *ctx)
{
int i, ret = 0;
BIGNUM *t1, *qadd, *q;
bits--;
BN_CTX_start(ctx);
t1 = BN_CTX_get(ctx);
q = BN_CTX_get(ctx);
qadd = BN_CTX_get(ctx);
if (qadd == NULL) goto err;
bits--;
BN_CTX_start(ctx);
t1 = BN_CTX_get(ctx);
q = BN_CTX_get(ctx);
qadd = BN_CTX_get(ctx);
if (qadd == NULL)
goto err;
if (!BN_rshift1(qadd,padd)) goto err;
if (!BN_rand(q,bits,0,1)) goto err;
if (!BN_rshift1(qadd, padd))
goto err;
/* we need ((rnd-rem) % add) == 0 */
if (!BN_mod(t1,q,qadd,ctx)) goto err;
if (!BN_sub(q,q,t1)) goto err;
if (rem == NULL)
{ if (!BN_add_word(q,1)) goto err; }
else
{
if (!BN_rshift1(t1,rem)) goto err;
if (!BN_add(q,q,t1)) goto err;
}
if (!BN_rand(q, bits, 0, 1))
goto err;
/* we now have a random number 'rand' to test. */
if (!BN_lshift1(p,q)) goto err;
if (!BN_add_word(p,1)) goto err;
/* we need ((rnd-rem) % add) == 0 */
if (!BN_mod(t1, q, qadd, ctx))
goto err;
if (!BN_sub(q, q, t1))
goto err;
if (rem == NULL) {
if (!BN_add_word(q, 1))
goto err;
} else {
if (!BN_rshift1(t1, rem))
goto err;
if (!BN_add(q, q, t1))
goto err;
}
loop: for (i=1; i<NUMPRIMES; i++)
{
/* check that p and q are prime */
/* check that for p and q
* gcd(p-1,primes) == 1 (except for 2) */
if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
(BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
{
if (!BN_add(p,p,padd)) goto err;
if (!BN_add(q,q,qadd)) goto err;
goto loop;
}
}
ret=1;
err:
BN_CTX_end(ctx);
bn_check_top(p);
return(ret);
}
/* we now have a random number 'rand' to test. */
if (!BN_lshift1(p, q))
goto err;
if (!BN_add_word(p, 1))
goto err;
loop:for (i = 1; i < NUMPRIMES; i++) {
/* check that p and q are prime */
/*
* check that for p and q gcd(p-1,primes) == 1 (except for 2)
*/
if ((BN_mod_word(p, (BN_ULONG)primes[i]) == 0) ||
(BN_mod_word(q, (BN_ULONG)primes[i]) == 0)) {
if (!BN_add(p, p, padd))
goto err;
if (!BN_add(q, q, qadd))
goto err;
goto loop;
}
}
ret = 1;
err:
BN_CTX_end(ctx);
bn_check_top(p);
return (ret);
}

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -57,271 +57,270 @@
*/
#ifndef EIGHT_BIT
#define NUMPRIMES 2048
# define NUMPRIMES 2048
typedef unsigned short prime_t;
#else
#define NUMPRIMES 54
# define NUMPRIMES 54
typedef unsigned char prime_t;
#endif
static const prime_t primes[NUMPRIMES]=
{
2, 3, 5, 7, 11, 13, 17, 19,
23, 29, 31, 37, 41, 43, 47, 53,
59, 61, 67, 71, 73, 79, 83, 89,
97, 101, 103, 107, 109, 113, 127, 131,
137, 139, 149, 151, 157, 163, 167, 173,
179, 181, 191, 193, 197, 199, 211, 223,
227, 229, 233, 239, 241, 251,
static const prime_t primes[NUMPRIMES] = {
2, 3, 5, 7, 11, 13, 17, 19,
23, 29, 31, 37, 41, 43, 47, 53,
59, 61, 67, 71, 73, 79, 83, 89,
97, 101, 103, 107, 109, 113, 127, 131,
137, 139, 149, 151, 157, 163, 167, 173,
179, 181, 191, 193, 197, 199, 211, 223,
227, 229, 233, 239, 241, 251,
#ifndef EIGHT_BIT
257, 263,
269, 271, 277, 281, 283, 293, 307, 311,
313, 317, 331, 337, 347, 349, 353, 359,
367, 373, 379, 383, 389, 397, 401, 409,
419, 421, 431, 433, 439, 443, 449, 457,
461, 463, 467, 479, 487, 491, 499, 503,
509, 521, 523, 541, 547, 557, 563, 569,
571, 577, 587, 593, 599, 601, 607, 613,
617, 619, 631, 641, 643, 647, 653, 659,
661, 673, 677, 683, 691, 701, 709, 719,
727, 733, 739, 743, 751, 757, 761, 769,
773, 787, 797, 809, 811, 821, 823, 827,
829, 839, 853, 857, 859, 863, 877, 881,
883, 887, 907, 911, 919, 929, 937, 941,
947, 953, 967, 971, 977, 983, 991, 997,
1009,1013,1019,1021,1031,1033,1039,1049,
1051,1061,1063,1069,1087,1091,1093,1097,
1103,1109,1117,1123,1129,1151,1153,1163,
1171,1181,1187,1193,1201,1213,1217,1223,
1229,1231,1237,1249,1259,1277,1279,1283,
1289,1291,1297,1301,1303,1307,1319,1321,
1327,1361,1367,1373,1381,1399,1409,1423,
1427,1429,1433,1439,1447,1451,1453,1459,
1471,1481,1483,1487,1489,1493,1499,1511,
1523,1531,1543,1549,1553,1559,1567,1571,
1579,1583,1597,1601,1607,1609,1613,1619,
1621,1627,1637,1657,1663,1667,1669,1693,
1697,1699,1709,1721,1723,1733,1741,1747,
1753,1759,1777,1783,1787,1789,1801,1811,
1823,1831,1847,1861,1867,1871,1873,1877,
1879,1889,1901,1907,1913,1931,1933,1949,
1951,1973,1979,1987,1993,1997,1999,2003,
2011,2017,2027,2029,2039,2053,2063,2069,
2081,2083,2087,2089,2099,2111,2113,2129,
2131,2137,2141,2143,2153,2161,2179,2203,
2207,2213,2221,2237,2239,2243,2251,2267,
2269,2273,2281,2287,2293,2297,2309,2311,
2333,2339,2341,2347,2351,2357,2371,2377,
2381,2383,2389,2393,2399,2411,2417,2423,
2437,2441,2447,2459,2467,2473,2477,2503,
2521,2531,2539,2543,2549,2551,2557,2579,
2591,2593,2609,2617,2621,2633,2647,2657,
2659,2663,2671,2677,2683,2687,2689,2693,
2699,2707,2711,2713,2719,2729,2731,2741,
2749,2753,2767,2777,2789,2791,2797,2801,
2803,2819,2833,2837,2843,2851,2857,2861,
2879,2887,2897,2903,2909,2917,2927,2939,
2953,2957,2963,2969,2971,2999,3001,3011,
3019,3023,3037,3041,3049,3061,3067,3079,
3083,3089,3109,3119,3121,3137,3163,3167,
3169,3181,3187,3191,3203,3209,3217,3221,
3229,3251,3253,3257,3259,3271,3299,3301,
3307,3313,3319,3323,3329,3331,3343,3347,
3359,3361,3371,3373,3389,3391,3407,3413,
3433,3449,3457,3461,3463,3467,3469,3491,
3499,3511,3517,3527,3529,3533,3539,3541,
3547,3557,3559,3571,3581,3583,3593,3607,
3613,3617,3623,3631,3637,3643,3659,3671,
3673,3677,3691,3697,3701,3709,3719,3727,
3733,3739,3761,3767,3769,3779,3793,3797,
3803,3821,3823,3833,3847,3851,3853,3863,
3877,3881,3889,3907,3911,3917,3919,3923,
3929,3931,3943,3947,3967,3989,4001,4003,
4007,4013,4019,4021,4027,4049,4051,4057,
4073,4079,4091,4093,4099,4111,4127,4129,
4133,4139,4153,4157,4159,4177,4201,4211,
4217,4219,4229,4231,4241,4243,4253,4259,
4261,4271,4273,4283,4289,4297,4327,4337,
4339,4349,4357,4363,4373,4391,4397,4409,
4421,4423,4441,4447,4451,4457,4463,4481,
4483,4493,4507,4513,4517,4519,4523,4547,
4549,4561,4567,4583,4591,4597,4603,4621,
4637,4639,4643,4649,4651,4657,4663,4673,
4679,4691,4703,4721,4723,4729,4733,4751,
4759,4783,4787,4789,4793,4799,4801,4813,
4817,4831,4861,4871,4877,4889,4903,4909,
4919,4931,4933,4937,4943,4951,4957,4967,
4969,4973,4987,4993,4999,5003,5009,5011,
5021,5023,5039,5051,5059,5077,5081,5087,
5099,5101,5107,5113,5119,5147,5153,5167,
5171,5179,5189,5197,5209,5227,5231,5233,
5237,5261,5273,5279,5281,5297,5303,5309,
5323,5333,5347,5351,5381,5387,5393,5399,
5407,5413,5417,5419,5431,5437,5441,5443,
5449,5471,5477,5479,5483,5501,5503,5507,
5519,5521,5527,5531,5557,5563,5569,5573,
5581,5591,5623,5639,5641,5647,5651,5653,
5657,5659,5669,5683,5689,5693,5701,5711,
5717,5737,5741,5743,5749,5779,5783,5791,
5801,5807,5813,5821,5827,5839,5843,5849,
5851,5857,5861,5867,5869,5879,5881,5897,
5903,5923,5927,5939,5953,5981,5987,6007,
6011,6029,6037,6043,6047,6053,6067,6073,
6079,6089,6091,6101,6113,6121,6131,6133,
6143,6151,6163,6173,6197,6199,6203,6211,
6217,6221,6229,6247,6257,6263,6269,6271,
6277,6287,6299,6301,6311,6317,6323,6329,
6337,6343,6353,6359,6361,6367,6373,6379,
6389,6397,6421,6427,6449,6451,6469,6473,
6481,6491,6521,6529,6547,6551,6553,6563,
6569,6571,6577,6581,6599,6607,6619,6637,
6653,6659,6661,6673,6679,6689,6691,6701,
6703,6709,6719,6733,6737,6761,6763,6779,
6781,6791,6793,6803,6823,6827,6829,6833,
6841,6857,6863,6869,6871,6883,6899,6907,
6911,6917,6947,6949,6959,6961,6967,6971,
6977,6983,6991,6997,7001,7013,7019,7027,
7039,7043,7057,7069,7079,7103,7109,7121,
7127,7129,7151,7159,7177,7187,7193,7207,
7211,7213,7219,7229,7237,7243,7247,7253,
7283,7297,7307,7309,7321,7331,7333,7349,
7351,7369,7393,7411,7417,7433,7451,7457,
7459,7477,7481,7487,7489,7499,7507,7517,
7523,7529,7537,7541,7547,7549,7559,7561,
7573,7577,7583,7589,7591,7603,7607,7621,
7639,7643,7649,7669,7673,7681,7687,7691,
7699,7703,7717,7723,7727,7741,7753,7757,
7759,7789,7793,7817,7823,7829,7841,7853,
7867,7873,7877,7879,7883,7901,7907,7919,
7927,7933,7937,7949,7951,7963,7993,8009,
8011,8017,8039,8053,8059,8069,8081,8087,
8089,8093,8101,8111,8117,8123,8147,8161,
8167,8171,8179,8191,8209,8219,8221,8231,
8233,8237,8243,8263,8269,8273,8287,8291,
8293,8297,8311,8317,8329,8353,8363,8369,
8377,8387,8389,8419,8423,8429,8431,8443,
8447,8461,8467,8501,8513,8521,8527,8537,
8539,8543,8563,8573,8581,8597,8599,8609,
8623,8627,8629,8641,8647,8663,8669,8677,
8681,8689,8693,8699,8707,8713,8719,8731,
8737,8741,8747,8753,8761,8779,8783,8803,
8807,8819,8821,8831,8837,8839,8849,8861,
8863,8867,8887,8893,8923,8929,8933,8941,
8951,8963,8969,8971,8999,9001,9007,9011,
9013,9029,9041,9043,9049,9059,9067,9091,
9103,9109,9127,9133,9137,9151,9157,9161,
9173,9181,9187,9199,9203,9209,9221,9227,
9239,9241,9257,9277,9281,9283,9293,9311,
9319,9323,9337,9341,9343,9349,9371,9377,
9391,9397,9403,9413,9419,9421,9431,9433,
9437,9439,9461,9463,9467,9473,9479,9491,
9497,9511,9521,9533,9539,9547,9551,9587,
9601,9613,9619,9623,9629,9631,9643,9649,
9661,9677,9679,9689,9697,9719,9721,9733,
9739,9743,9749,9767,9769,9781,9787,9791,
9803,9811,9817,9829,9833,9839,9851,9857,
9859,9871,9883,9887,9901,9907,9923,9929,
9931,9941,9949,9967,9973,10007,10009,10037,
10039,10061,10067,10069,10079,10091,10093,10099,
10103,10111,10133,10139,10141,10151,10159,10163,
10169,10177,10181,10193,10211,10223,10243,10247,
10253,10259,10267,10271,10273,10289,10301,10303,
10313,10321,10331,10333,10337,10343,10357,10369,
10391,10399,10427,10429,10433,10453,10457,10459,
10463,10477,10487,10499,10501,10513,10529,10531,
10559,10567,10589,10597,10601,10607,10613,10627,
10631,10639,10651,10657,10663,10667,10687,10691,
10709,10711,10723,10729,10733,10739,10753,10771,
10781,10789,10799,10831,10837,10847,10853,10859,
10861,10867,10883,10889,10891,10903,10909,10937,
10939,10949,10957,10973,10979,10987,10993,11003,
11027,11047,11057,11059,11069,11071,11083,11087,
11093,11113,11117,11119,11131,11149,11159,11161,
11171,11173,11177,11197,11213,11239,11243,11251,
11257,11261,11273,11279,11287,11299,11311,11317,
11321,11329,11351,11353,11369,11383,11393,11399,
11411,11423,11437,11443,11447,11467,11471,11483,
11489,11491,11497,11503,11519,11527,11549,11551,
11579,11587,11593,11597,11617,11621,11633,11657,
11677,11681,11689,11699,11701,11717,11719,11731,
11743,11777,11779,11783,11789,11801,11807,11813,
11821,11827,11831,11833,11839,11863,11867,11887,
11897,11903,11909,11923,11927,11933,11939,11941,
11953,11959,11969,11971,11981,11987,12007,12011,
12037,12041,12043,12049,12071,12073,12097,12101,
12107,12109,12113,12119,12143,12149,12157,12161,
12163,12197,12203,12211,12227,12239,12241,12251,
12253,12263,12269,12277,12281,12289,12301,12323,
12329,12343,12347,12373,12377,12379,12391,12401,
12409,12413,12421,12433,12437,12451,12457,12473,
12479,12487,12491,12497,12503,12511,12517,12527,
12539,12541,12547,12553,12569,12577,12583,12589,
12601,12611,12613,12619,12637,12641,12647,12653,
12659,12671,12689,12697,12703,12713,12721,12739,
12743,12757,12763,12781,12791,12799,12809,12821,
12823,12829,12841,12853,12889,12893,12899,12907,
12911,12917,12919,12923,12941,12953,12959,12967,
12973,12979,12983,13001,13003,13007,13009,13033,
13037,13043,13049,13063,13093,13099,13103,13109,
13121,13127,13147,13151,13159,13163,13171,13177,
13183,13187,13217,13219,13229,13241,13249,13259,
13267,13291,13297,13309,13313,13327,13331,13337,
13339,13367,13381,13397,13399,13411,13417,13421,
13441,13451,13457,13463,13469,13477,13487,13499,
13513,13523,13537,13553,13567,13577,13591,13597,
13613,13619,13627,13633,13649,13669,13679,13681,
13687,13691,13693,13697,13709,13711,13721,13723,
13729,13751,13757,13759,13763,13781,13789,13799,
13807,13829,13831,13841,13859,13873,13877,13879,
13883,13901,13903,13907,13913,13921,13931,13933,
13963,13967,13997,13999,14009,14011,14029,14033,
14051,14057,14071,14081,14083,14087,14107,14143,
14149,14153,14159,14173,14177,14197,14207,14221,
14243,14249,14251,14281,14293,14303,14321,14323,
14327,14341,14347,14369,14387,14389,14401,14407,
14411,14419,14423,14431,14437,14447,14449,14461,
14479,14489,14503,14519,14533,14537,14543,14549,
14551,14557,14561,14563,14591,14593,14621,14627,
14629,14633,14639,14653,14657,14669,14683,14699,
14713,14717,14723,14731,14737,14741,14747,14753,
14759,14767,14771,14779,14783,14797,14813,14821,
14827,14831,14843,14851,14867,14869,14879,14887,
14891,14897,14923,14929,14939,14947,14951,14957,
14969,14983,15013,15017,15031,15053,15061,15073,
15077,15083,15091,15101,15107,15121,15131,15137,
15139,15149,15161,15173,15187,15193,15199,15217,
15227,15233,15241,15259,15263,15269,15271,15277,
15287,15289,15299,15307,15313,15319,15329,15331,
15349,15359,15361,15373,15377,15383,15391,15401,
15413,15427,15439,15443,15451,15461,15467,15473,
15493,15497,15511,15527,15541,15551,15559,15569,
15581,15583,15601,15607,15619,15629,15641,15643,
15647,15649,15661,15667,15671,15679,15683,15727,
15731,15733,15737,15739,15749,15761,15767,15773,
15787,15791,15797,15803,15809,15817,15823,15859,
15877,15881,15887,15889,15901,15907,15913,15919,
15923,15937,15959,15971,15973,15991,16001,16007,
16033,16057,16061,16063,16067,16069,16073,16087,
16091,16097,16103,16111,16127,16139,16141,16183,
16187,16189,16193,16217,16223,16229,16231,16249,
16253,16267,16273,16301,16319,16333,16339,16349,
16361,16363,16369,16381,16411,16417,16421,16427,
16433,16447,16451,16453,16477,16481,16487,16493,
16519,16529,16547,16553,16561,16567,16573,16603,
16607,16619,16631,16633,16649,16651,16657,16661,
16673,16691,16693,16699,16703,16729,16741,16747,
16759,16763,16787,16811,16823,16829,16831,16843,
16871,16879,16883,16889,16901,16903,16921,16927,
16931,16937,16943,16963,16979,16981,16987,16993,
17011,17021,17027,17029,17033,17041,17047,17053,
17077,17093,17099,17107,17117,17123,17137,17159,
17167,17183,17189,17191,17203,17207,17209,17231,
17239,17257,17291,17293,17299,17317,17321,17327,
17333,17341,17351,17359,17377,17383,17387,17389,
17393,17401,17417,17419,17431,17443,17449,17467,
17471,17477,17483,17489,17491,17497,17509,17519,
17539,17551,17569,17573,17579,17581,17597,17599,
17609,17623,17627,17657,17659,17669,17681,17683,
17707,17713,17729,17737,17747,17749,17761,17783,
17789,17791,17807,17827,17837,17839,17851,17863,
257, 263,
269, 271, 277, 281, 283, 293, 307, 311,
313, 317, 331, 337, 347, 349, 353, 359,
367, 373, 379, 383, 389, 397, 401, 409,
419, 421, 431, 433, 439, 443, 449, 457,
461, 463, 467, 479, 487, 491, 499, 503,
509, 521, 523, 541, 547, 557, 563, 569,
571, 577, 587, 593, 599, 601, 607, 613,
617, 619, 631, 641, 643, 647, 653, 659,
661, 673, 677, 683, 691, 701, 709, 719,
727, 733, 739, 743, 751, 757, 761, 769,
773, 787, 797, 809, 811, 821, 823, 827,
829, 839, 853, 857, 859, 863, 877, 881,
883, 887, 907, 911, 919, 929, 937, 941,
947, 953, 967, 971, 977, 983, 991, 997,
1009, 1013, 1019, 1021, 1031, 1033, 1039, 1049,
1051, 1061, 1063, 1069, 1087, 1091, 1093, 1097,
1103, 1109, 1117, 1123, 1129, 1151, 1153, 1163,
1171, 1181, 1187, 1193, 1201, 1213, 1217, 1223,
1229, 1231, 1237, 1249, 1259, 1277, 1279, 1283,
1289, 1291, 1297, 1301, 1303, 1307, 1319, 1321,
1327, 1361, 1367, 1373, 1381, 1399, 1409, 1423,
1427, 1429, 1433, 1439, 1447, 1451, 1453, 1459,
1471, 1481, 1483, 1487, 1489, 1493, 1499, 1511,
1523, 1531, 1543, 1549, 1553, 1559, 1567, 1571,
1579, 1583, 1597, 1601, 1607, 1609, 1613, 1619,
1621, 1627, 1637, 1657, 1663, 1667, 1669, 1693,
1697, 1699, 1709, 1721, 1723, 1733, 1741, 1747,
1753, 1759, 1777, 1783, 1787, 1789, 1801, 1811,
1823, 1831, 1847, 1861, 1867, 1871, 1873, 1877,
1879, 1889, 1901, 1907, 1913, 1931, 1933, 1949,
1951, 1973, 1979, 1987, 1993, 1997, 1999, 2003,
2011, 2017, 2027, 2029, 2039, 2053, 2063, 2069,
2081, 2083, 2087, 2089, 2099, 2111, 2113, 2129,
2131, 2137, 2141, 2143, 2153, 2161, 2179, 2203,
2207, 2213, 2221, 2237, 2239, 2243, 2251, 2267,
2269, 2273, 2281, 2287, 2293, 2297, 2309, 2311,
2333, 2339, 2341, 2347, 2351, 2357, 2371, 2377,
2381, 2383, 2389, 2393, 2399, 2411, 2417, 2423,
2437, 2441, 2447, 2459, 2467, 2473, 2477, 2503,
2521, 2531, 2539, 2543, 2549, 2551, 2557, 2579,
2591, 2593, 2609, 2617, 2621, 2633, 2647, 2657,
2659, 2663, 2671, 2677, 2683, 2687, 2689, 2693,
2699, 2707, 2711, 2713, 2719, 2729, 2731, 2741,
2749, 2753, 2767, 2777, 2789, 2791, 2797, 2801,
2803, 2819, 2833, 2837, 2843, 2851, 2857, 2861,
2879, 2887, 2897, 2903, 2909, 2917, 2927, 2939,
2953, 2957, 2963, 2969, 2971, 2999, 3001, 3011,
3019, 3023, 3037, 3041, 3049, 3061, 3067, 3079,
3083, 3089, 3109, 3119, 3121, 3137, 3163, 3167,
3169, 3181, 3187, 3191, 3203, 3209, 3217, 3221,
3229, 3251, 3253, 3257, 3259, 3271, 3299, 3301,
3307, 3313, 3319, 3323, 3329, 3331, 3343, 3347,
3359, 3361, 3371, 3373, 3389, 3391, 3407, 3413,
3433, 3449, 3457, 3461, 3463, 3467, 3469, 3491,
3499, 3511, 3517, 3527, 3529, 3533, 3539, 3541,
3547, 3557, 3559, 3571, 3581, 3583, 3593, 3607,
3613, 3617, 3623, 3631, 3637, 3643, 3659, 3671,
3673, 3677, 3691, 3697, 3701, 3709, 3719, 3727,
3733, 3739, 3761, 3767, 3769, 3779, 3793, 3797,
3803, 3821, 3823, 3833, 3847, 3851, 3853, 3863,
3877, 3881, 3889, 3907, 3911, 3917, 3919, 3923,
3929, 3931, 3943, 3947, 3967, 3989, 4001, 4003,
4007, 4013, 4019, 4021, 4027, 4049, 4051, 4057,
4073, 4079, 4091, 4093, 4099, 4111, 4127, 4129,
4133, 4139, 4153, 4157, 4159, 4177, 4201, 4211,
4217, 4219, 4229, 4231, 4241, 4243, 4253, 4259,
4261, 4271, 4273, 4283, 4289, 4297, 4327, 4337,
4339, 4349, 4357, 4363, 4373, 4391, 4397, 4409,
4421, 4423, 4441, 4447, 4451, 4457, 4463, 4481,
4483, 4493, 4507, 4513, 4517, 4519, 4523, 4547,
4549, 4561, 4567, 4583, 4591, 4597, 4603, 4621,
4637, 4639, 4643, 4649, 4651, 4657, 4663, 4673,
4679, 4691, 4703, 4721, 4723, 4729, 4733, 4751,
4759, 4783, 4787, 4789, 4793, 4799, 4801, 4813,
4817, 4831, 4861, 4871, 4877, 4889, 4903, 4909,
4919, 4931, 4933, 4937, 4943, 4951, 4957, 4967,
4969, 4973, 4987, 4993, 4999, 5003, 5009, 5011,
5021, 5023, 5039, 5051, 5059, 5077, 5081, 5087,
5099, 5101, 5107, 5113, 5119, 5147, 5153, 5167,
5171, 5179, 5189, 5197, 5209, 5227, 5231, 5233,
5237, 5261, 5273, 5279, 5281, 5297, 5303, 5309,
5323, 5333, 5347, 5351, 5381, 5387, 5393, 5399,
5407, 5413, 5417, 5419, 5431, 5437, 5441, 5443,
5449, 5471, 5477, 5479, 5483, 5501, 5503, 5507,
5519, 5521, 5527, 5531, 5557, 5563, 5569, 5573,
5581, 5591, 5623, 5639, 5641, 5647, 5651, 5653,
5657, 5659, 5669, 5683, 5689, 5693, 5701, 5711,
5717, 5737, 5741, 5743, 5749, 5779, 5783, 5791,
5801, 5807, 5813, 5821, 5827, 5839, 5843, 5849,
5851, 5857, 5861, 5867, 5869, 5879, 5881, 5897,
5903, 5923, 5927, 5939, 5953, 5981, 5987, 6007,
6011, 6029, 6037, 6043, 6047, 6053, 6067, 6073,
6079, 6089, 6091, 6101, 6113, 6121, 6131, 6133,
6143, 6151, 6163, 6173, 6197, 6199, 6203, 6211,
6217, 6221, 6229, 6247, 6257, 6263, 6269, 6271,
6277, 6287, 6299, 6301, 6311, 6317, 6323, 6329,
6337, 6343, 6353, 6359, 6361, 6367, 6373, 6379,
6389, 6397, 6421, 6427, 6449, 6451, 6469, 6473,
6481, 6491, 6521, 6529, 6547, 6551, 6553, 6563,
6569, 6571, 6577, 6581, 6599, 6607, 6619, 6637,
6653, 6659, 6661, 6673, 6679, 6689, 6691, 6701,
6703, 6709, 6719, 6733, 6737, 6761, 6763, 6779,
6781, 6791, 6793, 6803, 6823, 6827, 6829, 6833,
6841, 6857, 6863, 6869, 6871, 6883, 6899, 6907,
6911, 6917, 6947, 6949, 6959, 6961, 6967, 6971,
6977, 6983, 6991, 6997, 7001, 7013, 7019, 7027,
7039, 7043, 7057, 7069, 7079, 7103, 7109, 7121,
7127, 7129, 7151, 7159, 7177, 7187, 7193, 7207,
7211, 7213, 7219, 7229, 7237, 7243, 7247, 7253,
7283, 7297, 7307, 7309, 7321, 7331, 7333, 7349,
7351, 7369, 7393, 7411, 7417, 7433, 7451, 7457,
7459, 7477, 7481, 7487, 7489, 7499, 7507, 7517,
7523, 7529, 7537, 7541, 7547, 7549, 7559, 7561,
7573, 7577, 7583, 7589, 7591, 7603, 7607, 7621,
7639, 7643, 7649, 7669, 7673, 7681, 7687, 7691,
7699, 7703, 7717, 7723, 7727, 7741, 7753, 7757,
7759, 7789, 7793, 7817, 7823, 7829, 7841, 7853,
7867, 7873, 7877, 7879, 7883, 7901, 7907, 7919,
7927, 7933, 7937, 7949, 7951, 7963, 7993, 8009,
8011, 8017, 8039, 8053, 8059, 8069, 8081, 8087,
8089, 8093, 8101, 8111, 8117, 8123, 8147, 8161,
8167, 8171, 8179, 8191, 8209, 8219, 8221, 8231,
8233, 8237, 8243, 8263, 8269, 8273, 8287, 8291,
8293, 8297, 8311, 8317, 8329, 8353, 8363, 8369,
8377, 8387, 8389, 8419, 8423, 8429, 8431, 8443,
8447, 8461, 8467, 8501, 8513, 8521, 8527, 8537,
8539, 8543, 8563, 8573, 8581, 8597, 8599, 8609,
8623, 8627, 8629, 8641, 8647, 8663, 8669, 8677,
8681, 8689, 8693, 8699, 8707, 8713, 8719, 8731,
8737, 8741, 8747, 8753, 8761, 8779, 8783, 8803,
8807, 8819, 8821, 8831, 8837, 8839, 8849, 8861,
8863, 8867, 8887, 8893, 8923, 8929, 8933, 8941,
8951, 8963, 8969, 8971, 8999, 9001, 9007, 9011,
9013, 9029, 9041, 9043, 9049, 9059, 9067, 9091,
9103, 9109, 9127, 9133, 9137, 9151, 9157, 9161,
9173, 9181, 9187, 9199, 9203, 9209, 9221, 9227,
9239, 9241, 9257, 9277, 9281, 9283, 9293, 9311,
9319, 9323, 9337, 9341, 9343, 9349, 9371, 9377,
9391, 9397, 9403, 9413, 9419, 9421, 9431, 9433,
9437, 9439, 9461, 9463, 9467, 9473, 9479, 9491,
9497, 9511, 9521, 9533, 9539, 9547, 9551, 9587,
9601, 9613, 9619, 9623, 9629, 9631, 9643, 9649,
9661, 9677, 9679, 9689, 9697, 9719, 9721, 9733,
9739, 9743, 9749, 9767, 9769, 9781, 9787, 9791,
9803, 9811, 9817, 9829, 9833, 9839, 9851, 9857,
9859, 9871, 9883, 9887, 9901, 9907, 9923, 9929,
9931, 9941, 9949, 9967, 9973, 10007, 10009, 10037,
10039, 10061, 10067, 10069, 10079, 10091, 10093, 10099,
10103, 10111, 10133, 10139, 10141, 10151, 10159, 10163,
10169, 10177, 10181, 10193, 10211, 10223, 10243, 10247,
10253, 10259, 10267, 10271, 10273, 10289, 10301, 10303,
10313, 10321, 10331, 10333, 10337, 10343, 10357, 10369,
10391, 10399, 10427, 10429, 10433, 10453, 10457, 10459,
10463, 10477, 10487, 10499, 10501, 10513, 10529, 10531,
10559, 10567, 10589, 10597, 10601, 10607, 10613, 10627,
10631, 10639, 10651, 10657, 10663, 10667, 10687, 10691,
10709, 10711, 10723, 10729, 10733, 10739, 10753, 10771,
10781, 10789, 10799, 10831, 10837, 10847, 10853, 10859,
10861, 10867, 10883, 10889, 10891, 10903, 10909, 10937,
10939, 10949, 10957, 10973, 10979, 10987, 10993, 11003,
11027, 11047, 11057, 11059, 11069, 11071, 11083, 11087,
11093, 11113, 11117, 11119, 11131, 11149, 11159, 11161,
11171, 11173, 11177, 11197, 11213, 11239, 11243, 11251,
11257, 11261, 11273, 11279, 11287, 11299, 11311, 11317,
11321, 11329, 11351, 11353, 11369, 11383, 11393, 11399,
11411, 11423, 11437, 11443, 11447, 11467, 11471, 11483,
11489, 11491, 11497, 11503, 11519, 11527, 11549, 11551,
11579, 11587, 11593, 11597, 11617, 11621, 11633, 11657,
11677, 11681, 11689, 11699, 11701, 11717, 11719, 11731,
11743, 11777, 11779, 11783, 11789, 11801, 11807, 11813,
11821, 11827, 11831, 11833, 11839, 11863, 11867, 11887,
11897, 11903, 11909, 11923, 11927, 11933, 11939, 11941,
11953, 11959, 11969, 11971, 11981, 11987, 12007, 12011,
12037, 12041, 12043, 12049, 12071, 12073, 12097, 12101,
12107, 12109, 12113, 12119, 12143, 12149, 12157, 12161,
12163, 12197, 12203, 12211, 12227, 12239, 12241, 12251,
12253, 12263, 12269, 12277, 12281, 12289, 12301, 12323,
12329, 12343, 12347, 12373, 12377, 12379, 12391, 12401,
12409, 12413, 12421, 12433, 12437, 12451, 12457, 12473,
12479, 12487, 12491, 12497, 12503, 12511, 12517, 12527,
12539, 12541, 12547, 12553, 12569, 12577, 12583, 12589,
12601, 12611, 12613, 12619, 12637, 12641, 12647, 12653,
12659, 12671, 12689, 12697, 12703, 12713, 12721, 12739,
12743, 12757, 12763, 12781, 12791, 12799, 12809, 12821,
12823, 12829, 12841, 12853, 12889, 12893, 12899, 12907,
12911, 12917, 12919, 12923, 12941, 12953, 12959, 12967,
12973, 12979, 12983, 13001, 13003, 13007, 13009, 13033,
13037, 13043, 13049, 13063, 13093, 13099, 13103, 13109,
13121, 13127, 13147, 13151, 13159, 13163, 13171, 13177,
13183, 13187, 13217, 13219, 13229, 13241, 13249, 13259,
13267, 13291, 13297, 13309, 13313, 13327, 13331, 13337,
13339, 13367, 13381, 13397, 13399, 13411, 13417, 13421,
13441, 13451, 13457, 13463, 13469, 13477, 13487, 13499,
13513, 13523, 13537, 13553, 13567, 13577, 13591, 13597,
13613, 13619, 13627, 13633, 13649, 13669, 13679, 13681,
13687, 13691, 13693, 13697, 13709, 13711, 13721, 13723,
13729, 13751, 13757, 13759, 13763, 13781, 13789, 13799,
13807, 13829, 13831, 13841, 13859, 13873, 13877, 13879,
13883, 13901, 13903, 13907, 13913, 13921, 13931, 13933,
13963, 13967, 13997, 13999, 14009, 14011, 14029, 14033,
14051, 14057, 14071, 14081, 14083, 14087, 14107, 14143,
14149, 14153, 14159, 14173, 14177, 14197, 14207, 14221,
14243, 14249, 14251, 14281, 14293, 14303, 14321, 14323,
14327, 14341, 14347, 14369, 14387, 14389, 14401, 14407,
14411, 14419, 14423, 14431, 14437, 14447, 14449, 14461,
14479, 14489, 14503, 14519, 14533, 14537, 14543, 14549,
14551, 14557, 14561, 14563, 14591, 14593, 14621, 14627,
14629, 14633, 14639, 14653, 14657, 14669, 14683, 14699,
14713, 14717, 14723, 14731, 14737, 14741, 14747, 14753,
14759, 14767, 14771, 14779, 14783, 14797, 14813, 14821,
14827, 14831, 14843, 14851, 14867, 14869, 14879, 14887,
14891, 14897, 14923, 14929, 14939, 14947, 14951, 14957,
14969, 14983, 15013, 15017, 15031, 15053, 15061, 15073,
15077, 15083, 15091, 15101, 15107, 15121, 15131, 15137,
15139, 15149, 15161, 15173, 15187, 15193, 15199, 15217,
15227, 15233, 15241, 15259, 15263, 15269, 15271, 15277,
15287, 15289, 15299, 15307, 15313, 15319, 15329, 15331,
15349, 15359, 15361, 15373, 15377, 15383, 15391, 15401,
15413, 15427, 15439, 15443, 15451, 15461, 15467, 15473,
15493, 15497, 15511, 15527, 15541, 15551, 15559, 15569,
15581, 15583, 15601, 15607, 15619, 15629, 15641, 15643,
15647, 15649, 15661, 15667, 15671, 15679, 15683, 15727,
15731, 15733, 15737, 15739, 15749, 15761, 15767, 15773,
15787, 15791, 15797, 15803, 15809, 15817, 15823, 15859,
15877, 15881, 15887, 15889, 15901, 15907, 15913, 15919,
15923, 15937, 15959, 15971, 15973, 15991, 16001, 16007,
16033, 16057, 16061, 16063, 16067, 16069, 16073, 16087,
16091, 16097, 16103, 16111, 16127, 16139, 16141, 16183,
16187, 16189, 16193, 16217, 16223, 16229, 16231, 16249,
16253, 16267, 16273, 16301, 16319, 16333, 16339, 16349,
16361, 16363, 16369, 16381, 16411, 16417, 16421, 16427,
16433, 16447, 16451, 16453, 16477, 16481, 16487, 16493,
16519, 16529, 16547, 16553, 16561, 16567, 16573, 16603,
16607, 16619, 16631, 16633, 16649, 16651, 16657, 16661,
16673, 16691, 16693, 16699, 16703, 16729, 16741, 16747,
16759, 16763, 16787, 16811, 16823, 16829, 16831, 16843,
16871, 16879, 16883, 16889, 16901, 16903, 16921, 16927,
16931, 16937, 16943, 16963, 16979, 16981, 16987, 16993,
17011, 17021, 17027, 17029, 17033, 17041, 17047, 17053,
17077, 17093, 17099, 17107, 17117, 17123, 17137, 17159,
17167, 17183, 17189, 17191, 17203, 17207, 17209, 17231,
17239, 17257, 17291, 17293, 17299, 17317, 17321, 17327,
17333, 17341, 17351, 17359, 17377, 17383, 17387, 17389,
17393, 17401, 17417, 17419, 17431, 17443, 17449, 17467,
17471, 17477, 17483, 17489, 17491, 17497, 17509, 17519,
17539, 17551, 17569, 17573, 17579, 17581, 17597, 17599,
17609, 17623, 17627, 17657, 17659, 17669, 17681, 17683,
17707, 17713, 17729, 17737, 17747, 17749, 17761, 17783,
17789, 17791, 17807, 17827, 17837, 17839, 17851, 17863,
#endif
};
};

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -62,298 +62,327 @@
#include <openssl/buffer.h>
#include "bn_lcl.h"
static const char Hex[]="0123456789ABCDEF";
static const char Hex[] = "0123456789ABCDEF";
/* Must 'OPENSSL_free' the returned data */
char *BN_bn2hex(const BIGNUM *a)
{
int i,j,v,z=0;
char *buf;
char *p;
{
int i, j, v, z = 0;
char *buf;
char *p;
buf=(char *)OPENSSL_malloc(a->top*BN_BYTES*2+2);
if (buf == NULL)
{
BNerr(BN_F_BN_BN2HEX,ERR_R_MALLOC_FAILURE);
goto err;
}
p=buf;
if (a->neg) *(p++)='-';
if (BN_is_zero(a)) *(p++)='0';
for (i=a->top-1; i >=0; i--)
{
for (j=BN_BITS2-8; j >= 0; j-=8)
{
/* strip leading zeros */
v=((int)(a->d[i]>>(long)j))&0xff;
if (z || (v != 0))
{
*(p++)=Hex[v>>4];
*(p++)=Hex[v&0x0f];
z=1;
}
}
}
*p='\0';
err:
return(buf);
}
if (a->neg && BN_is_zero(a)) {
/* "-0" == 3 bytes including NULL terminator */
buf = OPENSSL_malloc(3);
} else {
buf = OPENSSL_malloc(a->top * BN_BYTES * 2 + 2);
}
if (buf == NULL) {
BNerr(BN_F_BN_BN2HEX, ERR_R_MALLOC_FAILURE);
goto err;
}
p = buf;
if (a->neg)
*(p++) = '-';
if (BN_is_zero(a))
*(p++) = '0';
for (i = a->top - 1; i >= 0; i--) {
for (j = BN_BITS2 - 8; j >= 0; j -= 8) {
/* strip leading zeros */
v = ((int)(a->d[i] >> (long)j)) & 0xff;
if (z || (v != 0)) {
*(p++) = Hex[v >> 4];
*(p++) = Hex[v & 0x0f];
z = 1;
}
}
}
*p = '\0';
err:
return (buf);
}
/* Must 'OPENSSL_free' the returned data */
char *BN_bn2dec(const BIGNUM *a)
{
int i=0,num, ok = 0;
char *buf=NULL;
char *p;
BIGNUM *t=NULL;
BN_ULONG *bn_data=NULL,*lp;
{
int i = 0, num, ok = 0;
char *buf = NULL;
char *p;
BIGNUM *t = NULL;
BN_ULONG *bn_data = NULL, *lp;
/* get an upper bound for the length of the decimal integer
* num <= (BN_num_bits(a) + 1) * log(2)
* <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1 (rounding error)
* <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1
*/
i=BN_num_bits(a)*3;
num=(i/10+i/1000+1)+1;
bn_data=(BN_ULONG *)OPENSSL_malloc((num/BN_DEC_NUM+1)*sizeof(BN_ULONG));
buf=(char *)OPENSSL_malloc(num+3);
if ((buf == NULL) || (bn_data == NULL))
{
BNerr(BN_F_BN_BN2DEC,ERR_R_MALLOC_FAILURE);
goto err;
}
if ((t=BN_dup(a)) == NULL) goto err;
/*-
* get an upper bound for the length of the decimal integer
* num <= (BN_num_bits(a) + 1) * log(2)
* <= 3 * BN_num_bits(a) * 0.1001 + log(2) + 1 (rounding error)
* <= BN_num_bits(a)/10 + BN_num_bits/1000 + 1 + 1
*/
i = BN_num_bits(a) * 3;
num = (i / 10 + i / 1000 + 1) + 1;
bn_data =
(BN_ULONG *)OPENSSL_malloc((num / BN_DEC_NUM + 1) * sizeof(BN_ULONG));
buf = (char *)OPENSSL_malloc(num + 3);
if ((buf == NULL) || (bn_data == NULL)) {
BNerr(BN_F_BN_BN2DEC, ERR_R_MALLOC_FAILURE);
goto err;
}
if ((t = BN_dup(a)) == NULL)
goto err;
#define BUF_REMAIN (num+3 - (size_t)(p - buf))
p=buf;
lp=bn_data;
if (BN_is_zero(t))
{
*(p++)='0';
*(p++)='\0';
}
else
{
if (BN_is_negative(t))
*p++ = '-';
p = buf;
lp = bn_data;
if (BN_is_zero(t)) {
*(p++) = '0';
*(p++) = '\0';
} else {
if (BN_is_negative(t))
*p++ = '-';
i=0;
while (!BN_is_zero(t))
{
*lp=BN_div_word(t,BN_DEC_CONV);
lp++;
}
lp--;
/* We now have a series of blocks, BN_DEC_NUM chars
* in length, where the last one needs truncation.
* The blocks need to be reversed in order. */
BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT1,*lp);
while (*p) p++;
while (lp != bn_data)
{
lp--;
BIO_snprintf(p,BUF_REMAIN,BN_DEC_FMT2,*lp);
while (*p) p++;
}
}
ok = 1;
err:
if (bn_data != NULL) OPENSSL_free(bn_data);
if (t != NULL) BN_free(t);
if (!ok && buf)
{
OPENSSL_free(buf);
buf = NULL;
}
i = 0;
while (!BN_is_zero(t)) {
*lp = BN_div_word(t, BN_DEC_CONV);
lp++;
}
lp--;
/*
* We now have a series of blocks, BN_DEC_NUM chars in length, where
* the last one needs truncation. The blocks need to be reversed in
* order.
*/
BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT1, *lp);
while (*p)
p++;
while (lp != bn_data) {
lp--;
BIO_snprintf(p, BUF_REMAIN, BN_DEC_FMT2, *lp);
while (*p)
p++;
}
}
ok = 1;
err:
if (bn_data != NULL)
OPENSSL_free(bn_data);
if (t != NULL)
BN_free(t);
if (!ok && buf) {
OPENSSL_free(buf);
buf = NULL;
}
return(buf);
}
return (buf);
}
int BN_hex2bn(BIGNUM **bn, const char *a)
{
BIGNUM *ret=NULL;
BN_ULONG l=0;
int neg=0,h,m,i,j,k,c;
int num;
{
BIGNUM *ret = NULL;
BN_ULONG l = 0;
int neg = 0, h, m, i, j, k, c;
int num;
if ((a == NULL) || (*a == '\0')) return(0);
if ((a == NULL) || (*a == '\0'))
return (0);
if (*a == '-') { neg=1; a++; }
if (*a == '-') {
neg = 1;
a++;
}
for (i=0; isxdigit((unsigned char) a[i]); i++)
;
for (i = 0; isxdigit((unsigned char)a[i]); i++) ;
num=i+neg;
if (bn == NULL) return(num);
num = i + neg;
if (bn == NULL)
return (num);
/* a is the start of the hex digits, and it is 'i' long */
if (*bn == NULL)
{
if ((ret=BN_new()) == NULL) return(0);
}
else
{
ret= *bn;
BN_zero(ret);
}
/* a is the start of the hex digits, and it is 'i' long */
if (*bn == NULL) {
if ((ret = BN_new()) == NULL)
return (0);
} else {
ret = *bn;
BN_zero(ret);
}
/* i is the number of hex digests; */
if (bn_expand(ret,i*4) == NULL) goto err;
/* i is the number of hex digests; */
if (bn_expand(ret, i * 4) == NULL)
goto err;
j=i; /* least significant 'hex' */
m=0;
h=0;
while (j > 0)
{
m=((BN_BYTES*2) <= j)?(BN_BYTES*2):j;
l=0;
for (;;)
{
c=a[j-m];
if ((c >= '0') && (c <= '9')) k=c-'0';
else if ((c >= 'a') && (c <= 'f')) k=c-'a'+10;
else if ((c >= 'A') && (c <= 'F')) k=c-'A'+10;
else k=0; /* paranoia */
l=(l<<4)|k;
j = i; /* least significant 'hex' */
m = 0;
h = 0;
while (j > 0) {
m = ((BN_BYTES * 2) <= j) ? (BN_BYTES * 2) : j;
l = 0;
for (;;) {
c = a[j - m];
if ((c >= '0') && (c <= '9'))
k = c - '0';
else if ((c >= 'a') && (c <= 'f'))
k = c - 'a' + 10;
else if ((c >= 'A') && (c <= 'F'))
k = c - 'A' + 10;
else
k = 0; /* paranoia */
l = (l << 4) | k;
if (--m <= 0)
{
ret->d[h++]=l;
break;
}
}
j-=(BN_BYTES*2);
}
ret->top=h;
bn_correct_top(ret);
ret->neg=neg;
if (--m <= 0) {
ret->d[h++] = l;
break;
}
}
j -= (BN_BYTES * 2);
}
ret->top = h;
bn_correct_top(ret);
ret->neg = neg;
*bn=ret;
bn_check_top(ret);
return(num);
err:
if (*bn == NULL) BN_free(ret);
return(0);
}
*bn = ret;
bn_check_top(ret);
return (num);
err:
if (*bn == NULL)
BN_free(ret);
return (0);
}
int BN_dec2bn(BIGNUM **bn, const char *a)
{
BIGNUM *ret=NULL;
BN_ULONG l=0;
int neg=0,i,j;
int num;
{
BIGNUM *ret = NULL;
BN_ULONG l = 0;
int neg = 0, i, j;
int num;
if ((a == NULL) || (*a == '\0')) return(0);
if (*a == '-') { neg=1; a++; }
if ((a == NULL) || (*a == '\0'))
return (0);
if (*a == '-') {
neg = 1;
a++;
}
for (i=0; isdigit((unsigned char) a[i]); i++)
;
for (i = 0; isdigit((unsigned char)a[i]); i++) ;
num=i+neg;
if (bn == NULL) return(num);
num = i + neg;
if (bn == NULL)
return (num);
/* a is the start of the digits, and it is 'i' long.
* We chop it into BN_DEC_NUM digits at a time */
if (*bn == NULL)
{
if ((ret=BN_new()) == NULL) return(0);
}
else
{
ret= *bn;
BN_zero(ret);
}
/*
* a is the start of the digits, and it is 'i' long. We chop it into
* BN_DEC_NUM digits at a time
*/
if (*bn == NULL) {
if ((ret = BN_new()) == NULL)
return (0);
} else {
ret = *bn;
BN_zero(ret);
}
/* i is the number of digests, a bit of an over expand; */
if (bn_expand(ret,i*4) == NULL) goto err;
/* i is the number of digests, a bit of an over expand; */
if (bn_expand(ret, i * 4) == NULL)
goto err;
j=BN_DEC_NUM-(i%BN_DEC_NUM);
if (j == BN_DEC_NUM) j=0;
l=0;
while (*a)
{
l*=10;
l+= *a-'0';
a++;
if (++j == BN_DEC_NUM)
{
BN_mul_word(ret,BN_DEC_CONV);
BN_add_word(ret,l);
l=0;
j=0;
}
}
ret->neg=neg;
j = BN_DEC_NUM - (i % BN_DEC_NUM);
if (j == BN_DEC_NUM)
j = 0;
l = 0;
while (*a) {
l *= 10;
l += *a - '0';
a++;
if (++j == BN_DEC_NUM) {
BN_mul_word(ret, BN_DEC_CONV);
BN_add_word(ret, l);
l = 0;
j = 0;
}
}
ret->neg = neg;
bn_correct_top(ret);
*bn=ret;
bn_check_top(ret);
return(num);
err:
if (*bn == NULL) BN_free(ret);
return(0);
}
bn_correct_top(ret);
*bn = ret;
bn_check_top(ret);
return (num);
err:
if (*bn == NULL)
BN_free(ret);
return (0);
}
int BN_asc2bn(BIGNUM **bn, const char *a)
{
const char *p = a;
if (*p == '-')
p++;
{
const char *p = a;
if (*p == '-')
p++;
if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x'))
{
if (!BN_hex2bn(bn, p + 2))
return 0;
}
else
{
if (!BN_dec2bn(bn, p))
return 0;
}
if (*a == '-')
(*bn)->neg = 1;
return 1;
}
if (p[0] == '0' && (p[1] == 'X' || p[1] == 'x')) {
if (!BN_hex2bn(bn, p + 2))
return 0;
} else {
if (!BN_dec2bn(bn, p))
return 0;
}
if (*a == '-')
(*bn)->neg = 1;
return 1;
}
#ifndef OPENSSL_NO_BIO
#ifndef OPENSSL_NO_FP_API
# ifndef OPENSSL_NO_FP_API
int BN_print_fp(FILE *fp, const BIGNUM *a)
{
BIO *b;
int ret;
{
BIO *b;
int ret;
if ((b=BIO_new(BIO_s_file())) == NULL)
return(0);
BIO_set_fp(b,fp,BIO_NOCLOSE);
ret=BN_print(b,a);
BIO_free(b);
return(ret);
}
#endif
if ((b = BIO_new(BIO_s_file())) == NULL)
return (0);
BIO_set_fp(b, fp, BIO_NOCLOSE);
ret = BN_print(b, a);
BIO_free(b);
return (ret);
}
# endif
int BN_print(BIO *bp, const BIGNUM *a)
{
int i,j,v,z=0;
int ret=0;
{
int i, j, v, z = 0;
int ret = 0;
if ((a->neg) && (BIO_write(bp,"-",1) != 1)) goto end;
if (BN_is_zero(a) && (BIO_write(bp,"0",1) != 1)) goto end;
for (i=a->top-1; i >=0; i--)
{
for (j=BN_BITS2-4; j >= 0; j-=4)
{
/* strip leading zeros */
v=((int)(a->d[i]>>(long)j))&0x0f;
if (z || (v != 0))
{
if (BIO_write(bp,&(Hex[v]),1) != 1)
goto end;
z=1;
}
}
}
ret=1;
end:
return(ret);
}
if ((a->neg) && (BIO_write(bp, "-", 1) != 1))
goto end;
if (BN_is_zero(a) && (BIO_write(bp, "0", 1) != 1))
goto end;
for (i = a->top - 1; i >= 0; i--) {
for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
/* strip leading zeros */
v = ((int)(a->d[i] >> (long)j)) & 0x0f;
if (z || (v != 0)) {
if (BIO_write(bp, &(Hex[v]), 1) != 1)
goto end;
z = 1;
}
}
}
ret = 1;
end:
return (ret);
}
#endif
char *BN_options(void)
{
static int init = 0;
static char data[16];
if (!init) {
init++;
#ifdef BN_LLONG
BIO_snprintf(data, sizeof data, "bn(%d,%d)",
(int)sizeof(BN_ULLONG) * 8, (int)sizeof(BN_ULONG) * 8);
#else
BIO_snprintf(data, sizeof data, "bn(%d,%d)",
(int)sizeof(BN_ULONG) * 8, (int)sizeof(BN_ULONG) * 8);
#endif
}
return (data);
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -63,7 +63,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -116,190 +116,180 @@
#include <openssl/rand.h>
static int bnrand(int pseudorand, BIGNUM *rnd, int bits, int top, int bottom)
{
unsigned char *buf=NULL;
int ret=0,bit,bytes,mask;
time_t tim;
{
unsigned char *buf = NULL;
int ret = 0, bit, bytes, mask;
time_t tim;
if (bits == 0)
{
BN_zero(rnd);
return 1;
}
if (bits < 0 || (bits == 1 && top > 0)) {
BNerr(BN_F_BNRAND, BN_R_BITS_TOO_SMALL);
return 0;
}
bytes=(bits+7)/8;
bit=(bits-1)%8;
mask=0xff<<(bit+1);
if (bits == 0) {
BN_zero(rnd);
return 1;
}
buf=(unsigned char *)OPENSSL_malloc(bytes);
if (buf == NULL)
{
BNerr(BN_F_BNRAND,ERR_R_MALLOC_FAILURE);
goto err;
}
bytes = (bits + 7) / 8;
bit = (bits - 1) % 8;
mask = 0xff << (bit + 1);
/* make a random number and set the top and bottom bits */
time(&tim);
RAND_add(&tim,sizeof(tim),0.0);
buf = (unsigned char *)OPENSSL_malloc(bytes);
if (buf == NULL) {
BNerr(BN_F_BNRAND, ERR_R_MALLOC_FAILURE);
goto err;
}
if (pseudorand)
{
if (RAND_pseudo_bytes(buf, bytes) == -1)
goto err;
}
else
{
if (RAND_bytes(buf, bytes) <= 0)
goto err;
}
/* make a random number and set the top and bottom bits */
time(&tim);
RAND_add(&tim, sizeof(tim), 0.0);
if (pseudorand) {
if (RAND_pseudo_bytes(buf, bytes) == -1)
goto err;
} else {
if (RAND_bytes(buf, bytes) <= 0)
goto err;
}
#if 1
if (pseudorand == 2)
{
/* generate patterns that are more likely to trigger BN
library bugs */
int i;
unsigned char c;
if (pseudorand == 2) {
/*
* generate patterns that are more likely to trigger BN library bugs
*/
int i;
unsigned char c;
for (i = 0; i < bytes; i++)
{
RAND_pseudo_bytes(&c, 1);
if (c >= 128 && i > 0)
buf[i] = buf[i-1];
else if (c < 42)
buf[i] = 0;
else if (c < 84)
buf[i] = 255;
}
}
for (i = 0; i < bytes; i++) {
if (RAND_pseudo_bytes(&c, 1) < 0)
goto err;
if (c >= 128 && i > 0)
buf[i] = buf[i - 1];
else if (c < 42)
buf[i] = 0;
else if (c < 84)
buf[i] = 255;
}
}
#endif
if (top != -1)
{
if (top)
{
if (bit == 0)
{
buf[0]=1;
buf[1]|=0x80;
}
else
{
buf[0]|=(3<<(bit-1));
}
}
else
{
buf[0]|=(1<<bit);
}
}
buf[0] &= ~mask;
if (bottom) /* set bottom bit if requested */
buf[bytes-1]|=1;
if (!BN_bin2bn(buf,bytes,rnd)) goto err;
ret=1;
err:
if (buf != NULL)
{
OPENSSL_cleanse(buf,bytes);
OPENSSL_free(buf);
}
bn_check_top(rnd);
return(ret);
}
if (top >= 0) {
if (top) {
if (bit == 0) {
buf[0] = 1;
buf[1] |= 0x80;
} else {
buf[0] |= (3 << (bit - 1));
}
} else {
buf[0] |= (1 << bit);
}
}
buf[0] &= ~mask;
if (bottom) /* set bottom bit if requested */
buf[bytes - 1] |= 1;
if (!BN_bin2bn(buf, bytes, rnd))
goto err;
ret = 1;
err:
if (buf != NULL) {
OPENSSL_cleanse(buf, bytes);
OPENSSL_free(buf);
}
bn_check_top(rnd);
return (ret);
}
int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
return bnrand(0, rnd, bits, top, bottom);
}
int BN_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
return bnrand(0, rnd, bits, top, bottom);
}
int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
return bnrand(1, rnd, bits, top, bottom);
}
int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
return bnrand(1, rnd, bits, top, bottom);
}
#if 1
int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
return bnrand(2, rnd, bits, top, bottom);
}
int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
{
return bnrand(2, rnd, bits, top, bottom);
}
#endif
/* random number r: 0 <= r < range */
static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
{
int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
int n;
int count = 100;
{
int (*bn_rand) (BIGNUM *, int, int, int) =
pseudo ? BN_pseudo_rand : BN_rand;
int n;
int count = 100;
if (range->neg || BN_is_zero(range))
{
BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
return 0;
}
if (range->neg || BN_is_zero(range)) {
BNerr(BN_F_BN_RAND_RANGE, BN_R_INVALID_RANGE);
return 0;
}
n = BN_num_bits(range); /* n > 0 */
n = BN_num_bits(range); /* n > 0 */
/* BN_is_bit_set(range, n - 1) always holds */
/* BN_is_bit_set(range, n - 1) always holds */
if (n == 1)
BN_zero(r);
else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3))
{
/* range = 100..._2,
* so 3*range (= 11..._2) is exactly one bit longer than range */
do
{
if (!bn_rand(r, n + 1, -1, 0)) return 0;
/* If r < 3*range, use r := r MOD range
* (which is either r, r - range, or r - 2*range).
* Otherwise, iterate once more.
* Since 3*range = 11..._2, each iteration succeeds with
* probability >= .75. */
if (BN_cmp(r ,range) >= 0)
{
if (!BN_sub(r, r, range)) return 0;
if (BN_cmp(r, range) >= 0)
if (!BN_sub(r, r, range)) return 0;
}
if (n == 1)
BN_zero(r);
else if (!BN_is_bit_set(range, n - 2) && !BN_is_bit_set(range, n - 3)) {
/*
* range = 100..._2, so 3*range (= 11..._2) is exactly one bit longer
* than range
*/
do {
if (!bn_rand(r, n + 1, -1, 0))
return 0;
/*
* If r < 3*range, use r := r MOD range (which is either r, r -
* range, or r - 2*range). Otherwise, iterate once more. Since
* 3*range = 11..._2, each iteration succeeds with probability >=
* .75.
*/
if (BN_cmp(r, range) >= 0) {
if (!BN_sub(r, r, range))
return 0;
if (BN_cmp(r, range) >= 0)
if (!BN_sub(r, r, range))
return 0;
}
if (!--count)
{
BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
}
while (BN_cmp(r, range) >= 0);
}
else
{
do
{
/* range = 11..._2 or range = 101..._2 */
if (!bn_rand(r, n, -1, 0)) return 0;
if (!--count) {
BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
if (!--count)
{
BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
}
while (BN_cmp(r, range) >= 0);
}
}
while (BN_cmp(r, range) >= 0);
} else {
do {
/* range = 11..._2 or range = 101..._2 */
if (!bn_rand(r, n, -1, 0))
return 0;
bn_check_top(r);
return 1;
}
if (!--count) {
BNerr(BN_F_BN_RAND_RANGE, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
}
while (BN_cmp(r, range) >= 0);
}
bn_check_top(r);
return 1;
}
int BN_rand_range(BIGNUM *r, const BIGNUM *range)
{
return bn_rand_range(0, r, range);
}
int BN_rand_range(BIGNUM *r, const BIGNUM *range)
{
return bn_rand_range(0, r, range);
}
int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
{
return bn_rand_range(1, r, range);
}
int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
{
return bn_rand_range(1, r, range);
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -61,174 +61,189 @@
#include "bn_lcl.h"
void BN_RECP_CTX_init(BN_RECP_CTX *recp)
{
BN_init(&(recp->N));
BN_init(&(recp->Nr));
recp->num_bits=0;
recp->flags=0;
}
{
BN_init(&(recp->N));
BN_init(&(recp->Nr));
recp->num_bits = 0;
recp->flags = 0;
}
BN_RECP_CTX *BN_RECP_CTX_new(void)
{
BN_RECP_CTX *ret;
{
BN_RECP_CTX *ret;
if ((ret=(BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
return(NULL);
if ((ret = (BN_RECP_CTX *)OPENSSL_malloc(sizeof(BN_RECP_CTX))) == NULL)
return (NULL);
BN_RECP_CTX_init(ret);
ret->flags=BN_FLG_MALLOCED;
return(ret);
}
BN_RECP_CTX_init(ret);
ret->flags = BN_FLG_MALLOCED;
return (ret);
}
void BN_RECP_CTX_free(BN_RECP_CTX *recp)
{
if(recp == NULL)
return;
{
if (recp == NULL)
return;
BN_free(&(recp->N));
BN_free(&(recp->Nr));
if (recp->flags & BN_FLG_MALLOCED)
OPENSSL_free(recp);
}
BN_free(&(recp->N));
BN_free(&(recp->Nr));
if (recp->flags & BN_FLG_MALLOCED)
OPENSSL_free(recp);
}
int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx)
{
if (!BN_copy(&(recp->N),d)) return 0;
BN_zero(&(recp->Nr));
recp->num_bits=BN_num_bits(d);
recp->shift=0;
return(1);
}
{
if (!BN_copy(&(recp->N), d))
return 0;
BN_zero(&(recp->Nr));
recp->num_bits = BN_num_bits(d);
recp->shift = 0;
return (1);
}
int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
BN_RECP_CTX *recp, BN_CTX *ctx)
{
int ret=0;
BIGNUM *a;
const BIGNUM *ca;
BN_RECP_CTX *recp, BN_CTX *ctx)
{
int ret = 0;
BIGNUM *a;
const BIGNUM *ca;
BN_CTX_start(ctx);
if ((a = BN_CTX_get(ctx)) == NULL) goto err;
if (y != NULL)
{
if (x == y)
{ if (!BN_sqr(a,x,ctx)) goto err; }
else
{ if (!BN_mul(a,x,y,ctx)) goto err; }
ca = a;
}
else
ca=x; /* Just do the mod */
BN_CTX_start(ctx);
if ((a = BN_CTX_get(ctx)) == NULL)
goto err;
if (y != NULL) {
if (x == y) {
if (!BN_sqr(a, x, ctx))
goto err;
} else {
if (!BN_mul(a, x, y, ctx))
goto err;
}
ca = a;
} else
ca = x; /* Just do the mod */
ret = BN_div_recp(NULL,r,ca,recp,ctx);
err:
BN_CTX_end(ctx);
bn_check_top(r);
return(ret);
}
ret = BN_div_recp(NULL, r, ca, recp, ctx);
err:
BN_CTX_end(ctx);
bn_check_top(r);
return (ret);
}
int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
BN_RECP_CTX *recp, BN_CTX *ctx)
{
int i,j,ret=0;
BIGNUM *a,*b,*d,*r;
BN_RECP_CTX *recp, BN_CTX *ctx)
{
int i, j, ret = 0;
BIGNUM *a, *b, *d, *r;
BN_CTX_start(ctx);
a=BN_CTX_get(ctx);
b=BN_CTX_get(ctx);
if (dv != NULL)
d=dv;
else
d=BN_CTX_get(ctx);
if (rem != NULL)
r=rem;
else
r=BN_CTX_get(ctx);
if (a == NULL || b == NULL || d == NULL || r == NULL) goto err;
BN_CTX_start(ctx);
a = BN_CTX_get(ctx);
b = BN_CTX_get(ctx);
if (dv != NULL)
d = dv;
else
d = BN_CTX_get(ctx);
if (rem != NULL)
r = rem;
else
r = BN_CTX_get(ctx);
if (a == NULL || b == NULL || d == NULL || r == NULL)
goto err;
if (BN_ucmp(m,&(recp->N)) < 0)
{
BN_zero(d);
if (!BN_copy(r,m)) return 0;
BN_CTX_end(ctx);
return(1);
}
if (BN_ucmp(m, &(recp->N)) < 0) {
BN_zero(d);
if (!BN_copy(r, m))
return 0;
BN_CTX_end(ctx);
return (1);
}
/* We want the remainder
* Given input of ABCDEF / ab
* we need multiply ABCDEF by 3 digests of the reciprocal of ab
*
*/
/*
* We want the remainder Given input of ABCDEF / ab we need multiply
* ABCDEF by 3 digests of the reciprocal of ab
*/
/* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
i=BN_num_bits(m);
j=recp->num_bits<<1;
if (j>i) i=j;
/* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
i = BN_num_bits(m);
j = recp->num_bits << 1;
if (j > i)
i = j;
/* Nr := round(2^i / N) */
if (i != recp->shift)
recp->shift=BN_reciprocal(&(recp->Nr),&(recp->N),
i,ctx); /* BN_reciprocal returns i, or -1 for an error */
if (recp->shift == -1) goto err;
/* Nr := round(2^i / N) */
if (i != recp->shift)
recp->shift = BN_reciprocal(&(recp->Nr), &(recp->N), i, ctx);
/* BN_reciprocal could have returned -1 for an error */
if (recp->shift == -1)
goto err;
/* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
* = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
* <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
* = |m/N|
*/
if (!BN_rshift(a,m,recp->num_bits)) goto err;
if (!BN_mul(b,a,&(recp->Nr),ctx)) goto err;
if (!BN_rshift(d,b,i-recp->num_bits)) goto err;
d->neg=0;
/*-
* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - BN_num_bits(N)))|
* = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - BN_num_bits(N)))|
* <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
* = |m/N|
*/
if (!BN_rshift(a, m, recp->num_bits))
goto err;
if (!BN_mul(b, a, &(recp->Nr), ctx))
goto err;
if (!BN_rshift(d, b, i - recp->num_bits))
goto err;
d->neg = 0;
if (!BN_mul(b,&(recp->N),d,ctx)) goto err;
if (!BN_usub(r,m,b)) goto err;
r->neg=0;
if (!BN_mul(b, &(recp->N), d, ctx))
goto err;
if (!BN_usub(r, m, b))
goto err;
r->neg = 0;
#if 1
j=0;
while (BN_ucmp(r,&(recp->N)) >= 0)
{
if (j++ > 2)
{
BNerr(BN_F_BN_DIV_RECP,BN_R_BAD_RECIPROCAL);
goto err;
}
if (!BN_usub(r,r,&(recp->N))) goto err;
if (!BN_add_word(d,1)) goto err;
}
j = 0;
while (BN_ucmp(r, &(recp->N)) >= 0) {
if (j++ > 2) {
BNerr(BN_F_BN_DIV_RECP, BN_R_BAD_RECIPROCAL);
goto err;
}
if (!BN_usub(r, r, &(recp->N)))
goto err;
if (!BN_add_word(d, 1))
goto err;
}
#endif
r->neg=BN_is_zero(r)?0:m->neg;
d->neg=m->neg^recp->N.neg;
ret=1;
err:
BN_CTX_end(ctx);
bn_check_top(dv);
bn_check_top(rem);
return(ret);
}
r->neg = BN_is_zero(r) ? 0 : m->neg;
d->neg = m->neg ^ recp->N.neg;
ret = 1;
err:
BN_CTX_end(ctx);
bn_check_top(dv);
bn_check_top(rem);
return (ret);
}
/* len is the expected size of the result
* We actually calculate with an extra word of precision, so
* we can do faster division if the remainder is not required.
/*
* len is the expected size of the result We actually calculate with an extra
* word of precision, so we can do faster division if the remainder is not
* required.
*/
/* r := 2^len / m */
int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx)
{
int ret= -1;
BIGNUM *t;
{
int ret = -1;
BIGNUM *t;
BN_CTX_start(ctx);
if((t = BN_CTX_get(ctx)) == NULL) goto err;
BN_CTX_start(ctx);
if ((t = BN_CTX_get(ctx)) == NULL)
goto err;
if (!BN_set_bit(t,len)) goto err;
if (!BN_set_bit(t, len))
goto err;
if (!BN_div(r,NULL,t,m,ctx)) goto err;
if (!BN_div(r, NULL, t, m, ctx))
goto err;
ret=len;
err:
bn_check_top(r);
BN_CTX_end(ctx);
return(ret);
}
ret = len;
err:
bn_check_top(r);
BN_CTX_end(ctx);
return (ret);
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -61,160 +61,164 @@
#include "bn_lcl.h"
int BN_lshift1(BIGNUM *r, const BIGNUM *a)
{
register BN_ULONG *ap,*rp,t,c;
int i;
{
register BN_ULONG *ap, *rp, t, c;
int i;
bn_check_top(r);
bn_check_top(a);
bn_check_top(r);
bn_check_top(a);
if (r != a)
{
r->neg=a->neg;
if (bn_wexpand(r,a->top+1) == NULL) return(0);
r->top=a->top;
}
else
{
if (bn_wexpand(r,a->top+1) == NULL) return(0);
}
ap=a->d;
rp=r->d;
c=0;
for (i=0; i<a->top; i++)
{
t= *(ap++);
*(rp++)=((t<<1)|c)&BN_MASK2;
c=(t & BN_TBIT)?1:0;
}
if (c)
{
*rp=1;
r->top++;
}
bn_check_top(r);
return(1);
}
if (r != a) {
r->neg = a->neg;
if (bn_wexpand(r, a->top + 1) == NULL)
return (0);
r->top = a->top;
} else {
if (bn_wexpand(r, a->top + 1) == NULL)
return (0);
}
ap = a->d;
rp = r->d;
c = 0;
for (i = 0; i < a->top; i++) {
t = *(ap++);
*(rp++) = ((t << 1) | c) & BN_MASK2;
c = (t & BN_TBIT) ? 1 : 0;
}
if (c) {
*rp = 1;
r->top++;
}
bn_check_top(r);
return (1);
}
int BN_rshift1(BIGNUM *r, const BIGNUM *a)
{
BN_ULONG *ap,*rp,t,c;
int i;
{
BN_ULONG *ap, *rp, t, c;
int i, j;
bn_check_top(r);
bn_check_top(a);
bn_check_top(r);
bn_check_top(a);
if (BN_is_zero(a))
{
BN_zero(r);
return(1);
}
if (a != r)
{
if (bn_wexpand(r,a->top) == NULL) return(0);
r->top=a->top;
r->neg=a->neg;
}
ap=a->d;
rp=r->d;
c=0;
for (i=a->top-1; i>=0; i--)
{
t=ap[i];
rp[i]=((t>>1)&BN_MASK2)|c;
c=(t&1)?BN_TBIT:0;
}
bn_correct_top(r);
bn_check_top(r);
return(1);
}
if (BN_is_zero(a)) {
BN_zero(r);
return (1);
}
i = a->top;
ap = a->d;
j = i - (ap[i - 1] == 1);
if (a != r) {
if (bn_wexpand(r, j) == NULL)
return (0);
r->neg = a->neg;
}
rp = r->d;
t = ap[--i];
c = (t & 1) ? BN_TBIT : 0;
if (t >>= 1)
rp[i] = t;
while (i > 0) {
t = ap[--i];
rp[i] = ((t >> 1) & BN_MASK2) | c;
c = (t & 1) ? BN_TBIT : 0;
}
r->top = j;
bn_check_top(r);
return (1);
}
int BN_lshift(BIGNUM *r, const BIGNUM *a, int n)
{
int i,nw,lb,rb;
BN_ULONG *t,*f;
BN_ULONG l;
{
int i, nw, lb, rb;
BN_ULONG *t, *f;
BN_ULONG l;
bn_check_top(r);
bn_check_top(a);
bn_check_top(r);
bn_check_top(a);
r->neg=a->neg;
nw=n/BN_BITS2;
if (bn_wexpand(r,a->top+nw+1) == NULL) return(0);
lb=n%BN_BITS2;
rb=BN_BITS2-lb;
f=a->d;
t=r->d;
t[a->top+nw]=0;
if (lb == 0)
for (i=a->top-1; i>=0; i--)
t[nw+i]=f[i];
else
for (i=a->top-1; i>=0; i--)
{
l=f[i];
t[nw+i+1]|=(l>>rb)&BN_MASK2;
t[nw+i]=(l<<lb)&BN_MASK2;
}
memset(t,0,nw*sizeof(t[0]));
/* for (i=0; i<nw; i++)
t[i]=0;*/
r->top=a->top+nw+1;
bn_correct_top(r);
bn_check_top(r);
return(1);
}
if (n < 0) {
BNerr(BN_F_BN_LSHIFT, BN_R_INVALID_SHIFT);
return 0;
}
r->neg = a->neg;
nw = n / BN_BITS2;
if (bn_wexpand(r, a->top + nw + 1) == NULL)
return (0);
lb = n % BN_BITS2;
rb = BN_BITS2 - lb;
f = a->d;
t = r->d;
t[a->top + nw] = 0;
if (lb == 0)
for (i = a->top - 1; i >= 0; i--)
t[nw + i] = f[i];
else
for (i = a->top - 1; i >= 0; i--) {
l = f[i];
t[nw + i + 1] |= (l >> rb) & BN_MASK2;
t[nw + i] = (l << lb) & BN_MASK2;
}
memset(t, 0, nw * sizeof(t[0]));
/*
* for (i=0; i<nw; i++) t[i]=0;
*/
r->top = a->top + nw + 1;
bn_correct_top(r);
bn_check_top(r);
return (1);
}
int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
{
int i,j,nw,lb,rb;
BN_ULONG *t,*f;
BN_ULONG l,tmp;
{
int i, j, nw, lb, rb;
BN_ULONG *t, *f;
BN_ULONG l, tmp;
bn_check_top(r);
bn_check_top(a);
bn_check_top(r);
bn_check_top(a);
nw=n/BN_BITS2;
rb=n%BN_BITS2;
lb=BN_BITS2-rb;
if (nw >= a->top || a->top == 0)
{
BN_zero(r);
return(1);
}
if (r != a)
{
r->neg=a->neg;
if (bn_wexpand(r,a->top-nw+1) == NULL) return(0);
}
else
{
if (n == 0)
return 1; /* or the copying loop will go berserk */
}
if (n < 0) {
BNerr(BN_F_BN_RSHIFT, BN_R_INVALID_SHIFT);
return 0;
}
f= &(a->d[nw]);
t=r->d;
j=a->top-nw;
r->top=j;
nw = n / BN_BITS2;
rb = n % BN_BITS2;
lb = BN_BITS2 - rb;
if (nw >= a->top || a->top == 0) {
BN_zero(r);
return (1);
}
i = (BN_num_bits(a) - n + (BN_BITS2 - 1)) / BN_BITS2;
if (r != a) {
r->neg = a->neg;
if (bn_wexpand(r, i) == NULL)
return (0);
} else {
if (n == 0)
return 1; /* or the copying loop will go berserk */
}
if (rb == 0)
{
for (i=j; i != 0; i--)
*(t++)= *(f++);
}
else
{
l= *(f++);
for (i=j-1; i != 0; i--)
{
tmp =(l>>rb)&BN_MASK2;
l= *(f++);
*(t++) =(tmp|(l<<lb))&BN_MASK2;
}
*(t++) =(l>>rb)&BN_MASK2;
}
bn_correct_top(r);
bn_check_top(r);
return(1);
}
f = &(a->d[nw]);
t = r->d;
j = a->top - nw;
r->top = i;
if (rb == 0) {
for (i = j; i != 0; i--)
*(t++) = *(f++);
} else {
l = *(f++);
for (i = j - 1; i != 0; i--) {
tmp = (l >> rb) & BN_MASK2;
l = *(f++);
*(t++) = (tmp | (l << lb)) & BN_MASK2;
}
if ((l = (l >> rb) & BN_MASK2))
*(t) = l;
}
bn_check_top(r);
return (1);
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -61,139 +61,137 @@
#include "bn_lcl.h"
/* r must not be a */
/* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96 */
/*
* I've just gone over this and it is now %20 faster on x86 - eay - 27 Jun 96
*/
int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx)
{
int max,al;
int ret = 0;
BIGNUM *tmp,*rr;
{
int max, al;
int ret = 0;
BIGNUM *tmp, *rr;
#ifdef BN_COUNT
fprintf(stderr,"BN_sqr %d * %d\n",a->top,a->top);
fprintf(stderr, "BN_sqr %d * %d\n", a->top, a->top);
#endif
bn_check_top(a);
bn_check_top(a);
al=a->top;
if (al <= 0)
{
r->top=0;
return 1;
}
al = a->top;
if (al <= 0) {
r->top = 0;
r->neg = 0;
return 1;
}
BN_CTX_start(ctx);
rr=(a != r) ? r : BN_CTX_get(ctx);
tmp=BN_CTX_get(ctx);
if (!rr || !tmp) goto err;
BN_CTX_start(ctx);
rr = (a != r) ? r : BN_CTX_get(ctx);
tmp = BN_CTX_get(ctx);
if (!rr || !tmp)
goto err;
max = 2 * al; /* Non-zero (from above) */
if (bn_wexpand(rr,max) == NULL) goto err;
max = 2 * al; /* Non-zero (from above) */
if (bn_wexpand(rr, max) == NULL)
goto err;
if (al == 4)
{
if (al == 4) {
#ifndef BN_SQR_COMBA
BN_ULONG t[8];
bn_sqr_normal(rr->d,a->d,4,t);
BN_ULONG t[8];
bn_sqr_normal(rr->d, a->d, 4, t);
#else
bn_sqr_comba4(rr->d,a->d);
bn_sqr_comba4(rr->d, a->d);
#endif
}
else if (al == 8)
{
} else if (al == 8) {
#ifndef BN_SQR_COMBA
BN_ULONG t[16];
bn_sqr_normal(rr->d,a->d,8,t);
BN_ULONG t[16];
bn_sqr_normal(rr->d, a->d, 8, t);
#else
bn_sqr_comba8(rr->d,a->d);
bn_sqr_comba8(rr->d, a->d);
#endif
}
else
{
} else {
#if defined(BN_RECURSION)
if (al < BN_SQR_RECURSIVE_SIZE_NORMAL)
{
BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL*2];
bn_sqr_normal(rr->d,a->d,al,t);
}
else
{
int j,k;
if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
bn_sqr_normal(rr->d, a->d, al, t);
} else {
int j, k;
j=BN_num_bits_word((BN_ULONG)al);
j=1<<(j-1);
k=j+j;
if (al == j)
{
if (bn_wexpand(tmp,k*2) == NULL) goto err;
bn_sqr_recursive(rr->d,a->d,al,tmp->d);
}
else
{
if (bn_wexpand(tmp,max) == NULL) goto err;
bn_sqr_normal(rr->d,a->d,al,tmp->d);
}
}
j = BN_num_bits_word((BN_ULONG)al);
j = 1 << (j - 1);
k = j + j;
if (al == j) {
if (bn_wexpand(tmp, k * 2) == NULL)
goto err;
bn_sqr_recursive(rr->d, a->d, al, tmp->d);
} else {
if (bn_wexpand(tmp, max) == NULL)
goto err;
bn_sqr_normal(rr->d, a->d, al, tmp->d);
}
}
#else
if (bn_wexpand(tmp,max) == NULL) goto err;
bn_sqr_normal(rr->d,a->d,al,tmp->d);
if (bn_wexpand(tmp, max) == NULL)
goto err;
bn_sqr_normal(rr->d, a->d, al, tmp->d);
#endif
}
}
rr->neg=0;
/* If the most-significant half of the top word of 'a' is zero, then
* the square of 'a' will max-1 words. */
if(a->d[al - 1] == (a->d[al - 1] & BN_MASK2l))
rr->top = max - 1;
else
rr->top = max;
if (rr != r) BN_copy(r,rr);
ret = 1;
rr->neg = 0;
/*
* If the most-significant half of the top word of 'a' is zero, then the
* square of 'a' will max-1 words.
*/
if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l))
rr->top = max - 1;
else
rr->top = max;
if (rr != r)
BN_copy(r, rr);
ret = 1;
err:
bn_check_top(rr);
bn_check_top(tmp);
BN_CTX_end(ctx);
return(ret);
}
bn_check_top(rr);
bn_check_top(tmp);
BN_CTX_end(ctx);
return (ret);
}
/* tmp must have 2*n words */
void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
{
int i,j,max;
const BN_ULONG *ap;
BN_ULONG *rp;
{
int i, j, max;
const BN_ULONG *ap;
BN_ULONG *rp;
max=n*2;
ap=a;
rp=r;
rp[0]=rp[max-1]=0;
rp++;
j=n;
max = n * 2;
ap = a;
rp = r;
rp[0] = rp[max - 1] = 0;
rp++;
j = n;
if (--j > 0)
{
ap++;
rp[j]=bn_mul_words(rp,ap,j,ap[-1]);
rp+=2;
}
if (--j > 0) {
ap++;
rp[j] = bn_mul_words(rp, ap, j, ap[-1]);
rp += 2;
}
for (i=n-2; i>0; i--)
{
j--;
ap++;
rp[j]=bn_mul_add_words(rp,ap,j,ap[-1]);
rp+=2;
}
for (i = n - 2; i > 0; i--) {
j--;
ap++;
rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]);
rp += 2;
}
bn_add_words(r,r,r,max);
bn_add_words(r, r, r, max);
/* There will not be a carry */
/* There will not be a carry */
bn_sqr_words(tmp,a,n);
bn_sqr_words(tmp, a, n);
bn_add_words(r,r,tmp,max);
}
bn_add_words(r, r, tmp, max);
}
#ifdef BN_RECURSION
/* r is 2*n words in size,
/*-
* r is 2*n words in size,
* a and b are both n words in size. (There's not actually a 'b' here ...)
* n must be a power of 2.
* We multiply and return the result.
@@ -204,91 +202,89 @@ void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp)
* a[1]*b[1]
*/
void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t)
{
int n=n2/2;
int zero,c1;
BN_ULONG ln,lo,*p;
{
int n = n2 / 2;
int zero, c1;
BN_ULONG ln, lo, *p;
#ifdef BN_COUNT
fprintf(stderr," bn_sqr_recursive %d * %d\n",n2,n2);
#endif
if (n2 == 4)
{
#ifndef BN_SQR_COMBA
bn_sqr_normal(r,a,4,t);
#else
bn_sqr_comba4(r,a);
#endif
return;
}
else if (n2 == 8)
{
#ifndef BN_SQR_COMBA
bn_sqr_normal(r,a,8,t);
#else
bn_sqr_comba8(r,a);
#endif
return;
}
if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL)
{
bn_sqr_normal(r,a,n2,t);
return;
}
/* r=(a[0]-a[1])*(a[1]-a[0]) */
c1=bn_cmp_words(a,&(a[n]),n);
zero=0;
if (c1 > 0)
bn_sub_words(t,a,&(a[n]),n);
else if (c1 < 0)
bn_sub_words(t,&(a[n]),a,n);
else
zero=1;
/* The result will always be negative unless it is zero */
p= &(t[n2*2]);
if (!zero)
bn_sqr_recursive(&(t[n2]),t,n,p);
else
memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
bn_sqr_recursive(r,a,n,p);
bn_sqr_recursive(&(r[n2]),&(a[n]),n,p);
/* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
*/
c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
/* t[32] is negative */
c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
/* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
* r[10] holds (a[0]*a[0])
* r[32] holds (a[1]*a[1])
* c1 holds the carry bits
*/
c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
if (c1)
{
p= &(r[n+n2]);
lo= *p;
ln=(lo+c1)&BN_MASK2;
*p=ln;
/* The overflow will stop before we over write
* words we should not overwrite */
if (ln < (BN_ULONG)c1)
{
do {
p++;
lo= *p;
ln=(lo+1)&BN_MASK2;
*p=ln;
} while (ln == 0);
}
}
}
# ifdef BN_COUNT
fprintf(stderr, " bn_sqr_recursive %d * %d\n", n2, n2);
# endif
if (n2 == 4) {
# ifndef BN_SQR_COMBA
bn_sqr_normal(r, a, 4, t);
# else
bn_sqr_comba4(r, a);
# endif
return;
} else if (n2 == 8) {
# ifndef BN_SQR_COMBA
bn_sqr_normal(r, a, 8, t);
# else
bn_sqr_comba8(r, a);
# endif
return;
}
if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
bn_sqr_normal(r, a, n2, t);
return;
}
/* r=(a[0]-a[1])*(a[1]-a[0]) */
c1 = bn_cmp_words(a, &(a[n]), n);
zero = 0;
if (c1 > 0)
bn_sub_words(t, a, &(a[n]), n);
else if (c1 < 0)
bn_sub_words(t, &(a[n]), a, n);
else
zero = 1;
/* The result will always be negative unless it is zero */
p = &(t[n2 * 2]);
if (!zero)
bn_sqr_recursive(&(t[n2]), t, n, p);
else
memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
bn_sqr_recursive(r, a, n, p);
bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
/*-
* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
* r[10] holds (a[0]*b[0])
* r[32] holds (b[1]*b[1])
*/
c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
/* t[32] is negative */
c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
/*-
* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
* r[10] holds (a[0]*a[0])
* r[32] holds (a[1]*a[1])
* c1 holds the carry bits
*/
c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
if (c1) {
p = &(r[n + n2]);
lo = *p;
ln = (lo + c1) & BN_MASK2;
*p = ln;
/*
* The overflow will stop before we over write words we should not
* overwrite
*/
if (ln < (BN_ULONG)c1) {
do {
p++;
lo = *p;
ln = (lo + 1) & BN_MASK2;
*p = ln;
} while (ln == 0);
}
}
}
#endif

Binary file not shown.

View File

@@ -1,6 +1,8 @@
/* crypto/bn/bn_sqrt.c */
/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
* and Bodo Moeller for the OpenSSL project. */
/*
* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de> and Bodo
* Moeller for the OpenSSL project.
*/
/* ====================================================================
* Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
*
@@ -9,7 +11,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -58,336 +60,350 @@
#include "cryptlib.h"
#include "bn_lcl.h"
BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
/* Returns 'ret' such that
* ret^2 == a (mod p),
* using the Tonelli/Shanks algorithm (cf. Henri Cohen, "A Course
* in Algebraic Computational Number Theory", algorithm 1.5.1).
* 'p' must be prime!
BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
/*
* Returns 'ret' such that ret^2 == a (mod p), using the Tonelli/Shanks
* algorithm (cf. Henri Cohen, "A Course in Algebraic Computational Number
* Theory", algorithm 1.5.1). 'p' must be prime!
*/
{
BIGNUM *ret = in;
int err = 1;
int r;
BIGNUM *A, *b, *q, *t, *x, *y;
int e, i, j;
if (!BN_is_odd(p) || BN_abs_is_word(p, 1))
{
if (BN_abs_is_word(p, 2))
{
if (ret == NULL)
ret = BN_new();
if (ret == NULL)
goto end;
if (!BN_set_word(ret, BN_is_bit_set(a, 0)))
{
if (ret != in)
BN_free(ret);
return NULL;
}
bn_check_top(ret);
return ret;
}
{
BIGNUM *ret = in;
int err = 1;
int r;
BIGNUM *A, *b, *q, *t, *x, *y;
int e, i, j;
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
return(NULL);
}
if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) {
if (BN_abs_is_word(p, 2)) {
if (ret == NULL)
ret = BN_new();
if (ret == NULL)
goto end;
if (!BN_set_word(ret, BN_is_bit_set(a, 0))) {
if (ret != in)
BN_free(ret);
return NULL;
}
bn_check_top(ret);
return ret;
}
if (BN_is_zero(a) || BN_is_one(a))
{
if (ret == NULL)
ret = BN_new();
if (ret == NULL)
goto end;
if (!BN_set_word(ret, BN_is_one(a)))
{
if (ret != in)
BN_free(ret);
return NULL;
}
bn_check_top(ret);
return ret;
}
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
return (NULL);
}
BN_CTX_start(ctx);
A = BN_CTX_get(ctx);
b = BN_CTX_get(ctx);
q = BN_CTX_get(ctx);
t = BN_CTX_get(ctx);
x = BN_CTX_get(ctx);
y = BN_CTX_get(ctx);
if (y == NULL) goto end;
if (ret == NULL)
ret = BN_new();
if (ret == NULL) goto end;
if (BN_is_zero(a) || BN_is_one(a)) {
if (ret == NULL)
ret = BN_new();
if (ret == NULL)
goto end;
if (!BN_set_word(ret, BN_is_one(a))) {
if (ret != in)
BN_free(ret);
return NULL;
}
bn_check_top(ret);
return ret;
}
/* A = a mod p */
if (!BN_nnmod(A, a, p, ctx)) goto end;
BN_CTX_start(ctx);
A = BN_CTX_get(ctx);
b = BN_CTX_get(ctx);
q = BN_CTX_get(ctx);
t = BN_CTX_get(ctx);
x = BN_CTX_get(ctx);
y = BN_CTX_get(ctx);
if (y == NULL)
goto end;
/* now write |p| - 1 as 2^e*q where q is odd */
e = 1;
while (!BN_is_bit_set(p, e))
e++;
/* we'll set q later (if needed) */
if (ret == NULL)
ret = BN_new();
if (ret == NULL)
goto end;
if (e == 1)
{
/* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
* modulo (|p|-1)/2, and square roots can be computed
* directly by modular exponentiation.
* We have
* 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
* so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
*/
if (!BN_rshift(q, p, 2)) goto end;
q->neg = 0;
if (!BN_add_word(q, 1)) goto end;
if (!BN_mod_exp(ret, A, q, p, ctx)) goto end;
err = 0;
goto vrfy;
}
if (e == 2)
{
/* |p| == 5 (mod 8)
*
* In this case 2 is always a non-square since
* Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
* So if a really is a square, then 2*a is a non-square.
* Thus for
* b := (2*a)^((|p|-5)/8),
* i := (2*a)*b^2
* we have
* i^2 = (2*a)^((1 + (|p|-5)/4)*2)
* = (2*a)^((p-1)/2)
* = -1;
* so if we set
* x := a*b*(i-1),
* then
* x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
* = a^2 * b^2 * (-2*i)
* = a*(-i)*(2*a*b^2)
* = a*(-i)*i
* = a.
*
* (This is due to A.O.L. Atkin,
* <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
* November 1992.)
*/
/* A = a mod p */
if (!BN_nnmod(A, a, p, ctx))
goto end;
/* t := 2*a */
if (!BN_mod_lshift1_quick(t, A, p)) goto end;
/* now write |p| - 1 as 2^e*q where q is odd */
e = 1;
while (!BN_is_bit_set(p, e))
e++;
/* we'll set q later (if needed) */
/* b := (2*a)^((|p|-5)/8) */
if (!BN_rshift(q, p, 3)) goto end;
q->neg = 0;
if (!BN_mod_exp(b, t, q, p, ctx)) goto end;
if (e == 1) {
/*-
* The easy case: (|p|-1)/2 is odd, so 2 has an inverse
* modulo (|p|-1)/2, and square roots can be computed
* directly by modular exponentiation.
* We have
* 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
* so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
*/
if (!BN_rshift(q, p, 2))
goto end;
q->neg = 0;
if (!BN_add_word(q, 1))
goto end;
if (!BN_mod_exp(ret, A, q, p, ctx))
goto end;
err = 0;
goto vrfy;
}
/* y := b^2 */
if (!BN_mod_sqr(y, b, p, ctx)) goto end;
if (e == 2) {
/*-
* |p| == 5 (mod 8)
*
* In this case 2 is always a non-square since
* Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
* So if a really is a square, then 2*a is a non-square.
* Thus for
* b := (2*a)^((|p|-5)/8),
* i := (2*a)*b^2
* we have
* i^2 = (2*a)^((1 + (|p|-5)/4)*2)
* = (2*a)^((p-1)/2)
* = -1;
* so if we set
* x := a*b*(i-1),
* then
* x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
* = a^2 * b^2 * (-2*i)
* = a*(-i)*(2*a*b^2)
* = a*(-i)*i
* = a.
*
* (This is due to A.O.L. Atkin,
* <URL: http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
* November 1992.)
*/
/* t := (2*a)*b^2 - 1*/
if (!BN_mod_mul(t, t, y, p, ctx)) goto end;
if (!BN_sub_word(t, 1)) goto end;
/* t := 2*a */
if (!BN_mod_lshift1_quick(t, A, p))
goto end;
/* x = a*b*t */
if (!BN_mod_mul(x, A, b, p, ctx)) goto end;
if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
/* b := (2*a)^((|p|-5)/8) */
if (!BN_rshift(q, p, 3))
goto end;
q->neg = 0;
if (!BN_mod_exp(b, t, q, p, ctx))
goto end;
if (!BN_copy(ret, x)) goto end;
err = 0;
goto vrfy;
}
/* e > 2, so we really have to use the Tonelli/Shanks algorithm.
* First, find some y that is not a square. */
if (!BN_copy(q, p)) goto end; /* use 'q' as temp */
q->neg = 0;
i = 2;
do
{
/* For efficiency, try small numbers first;
* if this fails, try random numbers.
*/
if (i < 22)
{
if (!BN_set_word(y, i)) goto end;
}
else
{
if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) goto end;
if (BN_ucmp(y, p) >= 0)
{
if (!(p->neg ? BN_add : BN_sub)(y, y, p)) goto end;
}
/* now 0 <= y < |p| */
if (BN_is_zero(y))
if (!BN_set_word(y, i)) goto end;
}
r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
if (r < -1) goto end;
if (r == 0)
{
/* m divides p */
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
goto end;
}
}
while (r == 1 && ++i < 82);
if (r != -1)
{
/* Many rounds and still no non-square -- this is more likely
* a bug than just bad luck.
* Even if p is not prime, we should have found some y
* such that r == -1.
*/
BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
goto end;
}
/* y := b^2 */
if (!BN_mod_sqr(y, b, p, ctx))
goto end;
/* Here's our actual 'q': */
if (!BN_rshift(q, q, e)) goto end;
/* t := (2*a)*b^2 - 1 */
if (!BN_mod_mul(t, t, y, p, ctx))
goto end;
if (!BN_sub_word(t, 1))
goto end;
/* Now that we have some non-square, we can find an element
* of order 2^e by computing its q'th power. */
if (!BN_mod_exp(y, y, q, p, ctx)) goto end;
if (BN_is_one(y))
{
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
goto end;
}
/* x = a*b*t */
if (!BN_mod_mul(x, A, b, p, ctx))
goto end;
if (!BN_mod_mul(x, x, t, p, ctx))
goto end;
/* Now we know that (if p is indeed prime) there is an integer
* k, 0 <= k < 2^e, such that
*
* a^q * y^k == 1 (mod p).
*
* As a^q is a square and y is not, k must be even.
* q+1 is even, too, so there is an element
*
* X := a^((q+1)/2) * y^(k/2),
*
* and it satisfies
*
* X^2 = a^q * a * y^k
* = a,
*
* so it is the square root that we are looking for.
*/
/* t := (q-1)/2 (note that q is odd) */
if (!BN_rshift1(t, q)) goto end;
/* x := a^((q-1)/2) */
if (BN_is_zero(t)) /* special case: p = 2^e + 1 */
{
if (!BN_nnmod(t, A, p, ctx)) goto end;
if (BN_is_zero(t))
{
/* special case: a == 0 (mod p) */
BN_zero(ret);
err = 0;
goto end;
}
else
if (!BN_one(x)) goto end;
}
else
{
if (!BN_mod_exp(x, A, t, p, ctx)) goto end;
if (BN_is_zero(x))
{
/* special case: a == 0 (mod p) */
BN_zero(ret);
err = 0;
goto end;
}
}
if (!BN_copy(ret, x))
goto end;
err = 0;
goto vrfy;
}
/* b := a*x^2 (= a^q) */
if (!BN_mod_sqr(b, x, p, ctx)) goto end;
if (!BN_mod_mul(b, b, A, p, ctx)) goto end;
/* x := a*x (= a^((q+1)/2)) */
if (!BN_mod_mul(x, x, A, p, ctx)) goto end;
/*
* e > 2, so we really have to use the Tonelli/Shanks algorithm. First,
* find some y that is not a square.
*/
if (!BN_copy(q, p))
goto end; /* use 'q' as temp */
q->neg = 0;
i = 2;
do {
/*
* For efficiency, try small numbers first; if this fails, try random
* numbers.
*/
if (i < 22) {
if (!BN_set_word(y, i))
goto end;
} else {
if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0))
goto end;
if (BN_ucmp(y, p) >= 0) {
if (!(p->neg ? BN_add : BN_sub) (y, y, p))
goto end;
}
/* now 0 <= y < |p| */
if (BN_is_zero(y))
if (!BN_set_word(y, i))
goto end;
}
while (1)
{
/* Now b is a^q * y^k for some even k (0 <= k < 2^E
* where E refers to the original value of e, which we
* don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
*
* We have a*b = x^2,
* y^2^(e-1) = -1,
* b^2^(e-1) = 1.
*/
r = BN_kronecker(y, q, ctx); /* here 'q' is |p| */
if (r < -1)
goto end;
if (r == 0) {
/* m divides p */
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
goto end;
}
}
while (r == 1 && ++i < 82);
if (BN_is_one(b))
{
if (!BN_copy(ret, x)) goto end;
err = 0;
goto vrfy;
}
if (r != -1) {
/*
* Many rounds and still no non-square -- this is more likely a bug
* than just bad luck. Even if p is not prime, we should have found
* some y such that r == -1.
*/
BNerr(BN_F_BN_MOD_SQRT, BN_R_TOO_MANY_ITERATIONS);
goto end;
}
/* Here's our actual 'q': */
if (!BN_rshift(q, q, e))
goto end;
/* find smallest i such that b^(2^i) = 1 */
i = 1;
if (!BN_mod_sqr(t, b, p, ctx)) goto end;
while (!BN_is_one(t))
{
i++;
if (i == e)
{
BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
goto end;
}
if (!BN_mod_mul(t, t, t, p, ctx)) goto end;
}
/*
* Now that we have some non-square, we can find an element of order 2^e
* by computing its q'th power.
*/
if (!BN_mod_exp(y, y, q, p, ctx))
goto end;
if (BN_is_one(y)) {
BNerr(BN_F_BN_MOD_SQRT, BN_R_P_IS_NOT_PRIME);
goto end;
}
/* t := y^2^(e - i - 1) */
if (!BN_copy(t, y)) goto end;
for (j = e - i - 1; j > 0; j--)
{
if (!BN_mod_sqr(t, t, p, ctx)) goto end;
}
if (!BN_mod_mul(y, t, t, p, ctx)) goto end;
if (!BN_mod_mul(x, x, t, p, ctx)) goto end;
if (!BN_mod_mul(b, b, y, p, ctx)) goto end;
e = i;
}
/*-
* Now we know that (if p is indeed prime) there is an integer
* k, 0 <= k < 2^e, such that
*
* a^q * y^k == 1 (mod p).
*
* As a^q is a square and y is not, k must be even.
* q+1 is even, too, so there is an element
*
* X := a^((q+1)/2) * y^(k/2),
*
* and it satisfies
*
* X^2 = a^q * a * y^k
* = a,
*
* so it is the square root that we are looking for.
*/
/* t := (q-1)/2 (note that q is odd) */
if (!BN_rshift1(t, q))
goto end;
/* x := a^((q-1)/2) */
if (BN_is_zero(t)) { /* special case: p = 2^e + 1 */
if (!BN_nnmod(t, A, p, ctx))
goto end;
if (BN_is_zero(t)) {
/* special case: a == 0 (mod p) */
BN_zero(ret);
err = 0;
goto end;
} else if (!BN_one(x))
goto end;
} else {
if (!BN_mod_exp(x, A, t, p, ctx))
goto end;
if (BN_is_zero(x)) {
/* special case: a == 0 (mod p) */
BN_zero(ret);
err = 0;
goto end;
}
}
/* b := a*x^2 (= a^q) */
if (!BN_mod_sqr(b, x, p, ctx))
goto end;
if (!BN_mod_mul(b, b, A, p, ctx))
goto end;
/* x := a*x (= a^((q+1)/2)) */
if (!BN_mod_mul(x, x, A, p, ctx))
goto end;
while (1) {
/*-
* Now b is a^q * y^k for some even k (0 <= k < 2^E
* where E refers to the original value of e, which we
* don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
*
* We have a*b = x^2,
* y^2^(e-1) = -1,
* b^2^(e-1) = 1.
*/
if (BN_is_one(b)) {
if (!BN_copy(ret, x))
goto end;
err = 0;
goto vrfy;
}
/* find smallest i such that b^(2^i) = 1 */
i = 1;
if (!BN_mod_sqr(t, b, p, ctx))
goto end;
while (!BN_is_one(t)) {
i++;
if (i == e) {
BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
goto end;
}
if (!BN_mod_mul(t, t, t, p, ctx))
goto end;
}
/* t := y^2^(e - i - 1) */
if (!BN_copy(t, y))
goto end;
for (j = e - i - 1; j > 0; j--) {
if (!BN_mod_sqr(t, t, p, ctx))
goto end;
}
if (!BN_mod_mul(y, t, t, p, ctx))
goto end;
if (!BN_mod_mul(x, x, t, p, ctx))
goto end;
if (!BN_mod_mul(b, b, y, p, ctx))
goto end;
e = i;
}
vrfy:
if (!err)
{
/* verify the result -- the input might have been not a square
* (test added in 0.9.8) */
if (!BN_mod_sqr(x, ret, p, ctx))
err = 1;
if (!err && 0 != BN_cmp(x, A))
{
BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
err = 1;
}
}
if (!err) {
/*
* verify the result -- the input might have been not a square (test
* added in 0.9.8)
*/
if (!BN_mod_sqr(x, ret, p, ctx))
err = 1;
if (!err && 0 != BN_cmp(x, A)) {
BNerr(BN_F_BN_MOD_SQRT, BN_R_NOT_A_SQUARE);
err = 1;
}
}
end:
if (err)
{
if (ret != NULL && ret != in)
{
BN_clear_free(ret);
}
ret = NULL;
}
BN_CTX_end(ctx);
bn_check_top(ret);
return ret;
}
if (err) {
if (ret != NULL && ret != in) {
BN_clear_free(ret);
}
ret = NULL;
}
BN_CTX_end(ctx);
bn_check_top(ret);
return ret;
}

Binary file not shown.

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -61,187 +61,167 @@
#include "bn_lcl.h"
BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w)
{
{
#ifndef BN_LLONG
BN_ULONG ret=0;
BN_ULONG ret = 0;
#else
BN_ULLONG ret=0;
BN_ULLONG ret = 0;
#endif
int i;
int i;
if (w == 0)
return (BN_ULONG)-1;
if (w == 0)
return (BN_ULONG)-1;
bn_check_top(a);
w&=BN_MASK2;
for (i=a->top-1; i>=0; i--)
{
bn_check_top(a);
w &= BN_MASK2;
for (i = a->top - 1; i >= 0; i--) {
#ifndef BN_LLONG
ret=((ret<<BN_BITS4)|((a->d[i]>>BN_BITS4)&BN_MASK2l))%w;
ret=((ret<<BN_BITS4)|(a->d[i]&BN_MASK2l))%w;
ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
#else
ret=(BN_ULLONG)(((ret<<(BN_ULLONG)BN_BITS2)|a->d[i])%
(BN_ULLONG)w);
ret = (BN_ULLONG) (((ret << (BN_ULLONG) BN_BITS2) | a->d[i]) %
(BN_ULLONG) w);
#endif
}
return((BN_ULONG)ret);
}
}
return ((BN_ULONG)ret);
}
BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w)
{
BN_ULONG ret = 0;
int i, j;
{
BN_ULONG ret = 0;
int i, j;
bn_check_top(a);
w &= BN_MASK2;
bn_check_top(a);
w &= BN_MASK2;
if (!w)
/* actually this an error (division by zero) */
return (BN_ULONG)-1;
if (a->top == 0)
return 0;
if (!w)
/* actually this an error (division by zero) */
return (BN_ULONG)-1;
if (a->top == 0)
return 0;
/* normalize input (so bn_div_words doesn't complain) */
j = BN_BITS2 - BN_num_bits_word(w);
w <<= j;
if (!BN_lshift(a, a, j))
return (BN_ULONG)-1;
/* normalize input (so bn_div_words doesn't complain) */
j = BN_BITS2 - BN_num_bits_word(w);
w <<= j;
if (!BN_lshift(a, a, j))
return (BN_ULONG)-1;
for (i=a->top-1; i>=0; i--)
{
BN_ULONG l,d;
l=a->d[i];
d=bn_div_words(ret,l,w);
ret=(l-((d*w)&BN_MASK2))&BN_MASK2;
a->d[i]=d;
}
if ((a->top > 0) && (a->d[a->top-1] == 0))
a->top--;
ret >>= j;
bn_check_top(a);
return(ret);
}
for (i = a->top - 1; i >= 0; i--) {
BN_ULONG l, d;
l = a->d[i];
d = bn_div_words(ret, l, w);
ret = (l - ((d * w) & BN_MASK2)) & BN_MASK2;
a->d[i] = d;
}
if ((a->top > 0) && (a->d[a->top - 1] == 0))
a->top--;
ret >>= j;
bn_check_top(a);
return (ret);
}
int BN_add_word(BIGNUM *a, BN_ULONG w)
{
BN_ULONG l;
int i;
{
BN_ULONG l;
int i;
bn_check_top(a);
w &= BN_MASK2;
bn_check_top(a);
w &= BN_MASK2;
/* degenerate case: w is zero */
if (!w) return 1;
/* degenerate case: a is zero */
if(BN_is_zero(a)) return BN_set_word(a, w);
/* handle 'a' when negative */
if (a->neg)
{
a->neg=0;
i=BN_sub_word(a,w);
if (!BN_is_zero(a))
a->neg=!(a->neg);
return(i);
}
/* Only expand (and risk failing) if it's possibly necessary */
if (((BN_ULONG)(a->d[a->top - 1] + 1) == 0) &&
(bn_wexpand(a,a->top+1) == NULL))
return(0);
i=0;
for (;;)
{
if (i >= a->top)
l=w;
else
l=(a->d[i]+w)&BN_MASK2;
a->d[i]=l;
if (w > l)
w=1;
else
break;
i++;
}
if (i >= a->top)
a->top++;
bn_check_top(a);
return(1);
}
/* degenerate case: w is zero */
if (!w)
return 1;
/* degenerate case: a is zero */
if (BN_is_zero(a))
return BN_set_word(a, w);
/* handle 'a' when negative */
if (a->neg) {
a->neg = 0;
i = BN_sub_word(a, w);
if (!BN_is_zero(a))
a->neg = !(a->neg);
return (i);
}
for (i = 0; w != 0 && i < a->top; i++) {
a->d[i] = l = (a->d[i] + w) & BN_MASK2;
w = (w > l) ? 1 : 0;
}
if (w && i == a->top) {
if (bn_wexpand(a, a->top + 1) == NULL)
return 0;
a->top++;
a->d[i] = w;
}
bn_check_top(a);
return (1);
}
int BN_sub_word(BIGNUM *a, BN_ULONG w)
{
int i;
{
int i;
bn_check_top(a);
w &= BN_MASK2;
bn_check_top(a);
w &= BN_MASK2;
/* degenerate case: w is zero */
if (!w) return 1;
/* degenerate case: a is zero */
if(BN_is_zero(a))
{
i = BN_set_word(a,w);
if (i != 0)
BN_set_negative(a, 1);
return i;
}
/* handle 'a' when negative */
if (a->neg)
{
a->neg=0;
i=BN_add_word(a,w);
a->neg=1;
return(i);
}
/* degenerate case: w is zero */
if (!w)
return 1;
/* degenerate case: a is zero */
if (BN_is_zero(a)) {
i = BN_set_word(a, w);
if (i != 0)
BN_set_negative(a, 1);
return i;
}
/* handle 'a' when negative */
if (a->neg) {
a->neg = 0;
i = BN_add_word(a, w);
a->neg = 1;
return (i);
}
if ((a->top == 1) && (a->d[0] < w))
{
a->d[0]=w-a->d[0];
a->neg=1;
return(1);
}
i=0;
for (;;)
{
if (a->d[i] >= w)
{
a->d[i]-=w;
break;
}
else
{
a->d[i]=(a->d[i]-w)&BN_MASK2;
i++;
w=1;
}
}
if ((a->d[i] == 0) && (i == (a->top-1)))
a->top--;
bn_check_top(a);
return(1);
}
if ((a->top == 1) && (a->d[0] < w)) {
a->d[0] = w - a->d[0];
a->neg = 1;
return (1);
}
i = 0;
for (;;) {
if (a->d[i] >= w) {
a->d[i] -= w;
break;
} else {
a->d[i] = (a->d[i] - w) & BN_MASK2;
i++;
w = 1;
}
}
if ((a->d[i] == 0) && (i == (a->top - 1)))
a->top--;
bn_check_top(a);
return (1);
}
int BN_mul_word(BIGNUM *a, BN_ULONG w)
{
BN_ULONG ll;
bn_check_top(a);
w&=BN_MASK2;
if (a->top)
{
if (w == 0)
BN_zero(a);
else
{
ll=bn_mul_words(a->d,a->d,a->top,w);
if (ll)
{
if (bn_wexpand(a,a->top+1) == NULL) return(0);
a->d[a->top++]=ll;
}
}
}
bn_check_top(a);
return(1);
}
{
BN_ULONG ll;
bn_check_top(a);
w &= BN_MASK2;
if (a->top) {
if (w == 0)
BN_zero(a);
else {
ll = bn_mul_words(a->d, a->d, a->top, w);
if (ll) {
if (bn_wexpand(a, a->top + 1) == NULL)
return (0);
a->d[a->top++] = ll;
}
}
}
bn_check_top(a);
return (1);
}

Binary file not shown.

274
crypto/bn/bn_x931p.c Normal file
View File

@@ -0,0 +1,274 @@
/* bn_x931p.c */
/*
* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
* 2005.
*/
/* ====================================================================
* Copyright (c) 2005 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* licensing@OpenSSL.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com).
*
*/
#include <stdio.h>
#include <openssl/bn.h>
/* X9.31 routines for prime derivation */
/*
* X9.31 prime derivation. This is used to generate the primes pi (p1, p2,
* q1, q2) from a parameter Xpi by checking successive odd integers.
*/
static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
BN_GENCB *cb)
{
int i = 0;
if (!BN_copy(pi, Xpi))
return 0;
if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
return 0;
for (;;) {
i++;
BN_GENCB_call(cb, 0, i);
/* NB 27 MR is specificed in X9.31 */
if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb))
break;
if (!BN_add_word(pi, 2))
return 0;
}
BN_GENCB_call(cb, 2, i);
return 1;
}
/*
* This is the main X9.31 prime derivation function. From parameters Xp1, Xp2
* and Xp derive the prime p. If the parameters p1 or p2 are not NULL they
* will be returned too: this is needed for testing.
*/
int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
const BIGNUM *Xp, const BIGNUM *Xp1,
const BIGNUM *Xp2, const BIGNUM *e, BN_CTX *ctx,
BN_GENCB *cb)
{
int ret = 0;
BIGNUM *t, *p1p2, *pm1;
/* Only even e supported */
if (!BN_is_odd(e))
return 0;
BN_CTX_start(ctx);
if (!p1)
p1 = BN_CTX_get(ctx);
if (!p2)
p2 = BN_CTX_get(ctx);
t = BN_CTX_get(ctx);
p1p2 = BN_CTX_get(ctx);
pm1 = BN_CTX_get(ctx);
if (!bn_x931_derive_pi(p1, Xp1, ctx, cb))
goto err;
if (!bn_x931_derive_pi(p2, Xp2, ctx, cb))
goto err;
if (!BN_mul(p1p2, p1, p2, ctx))
goto err;
/* First set p to value of Rp */
if (!BN_mod_inverse(p, p2, p1, ctx))
goto err;
if (!BN_mul(p, p, p2, ctx))
goto err;
if (!BN_mod_inverse(t, p1, p2, ctx))
goto err;
if (!BN_mul(t, t, p1, ctx))
goto err;
if (!BN_sub(p, p, t))
goto err;
if (p->neg && !BN_add(p, p, p1p2))
goto err;
/* p now equals Rp */
if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
goto err;
if (!BN_add(p, p, Xp))
goto err;
/* p now equals Yp0 */
for (;;) {
int i = 1;
BN_GENCB_call(cb, 0, i++);
if (!BN_copy(pm1, p))
goto err;
if (!BN_sub_word(pm1, 1))
goto err;
if (!BN_gcd(t, pm1, e, ctx))
goto err;
if (BN_is_one(t)
/*
* X9.31 specifies 8 MR and 1 Lucas test or any prime test
* offering similar or better guarantees 50 MR is considerably
* better.
*/
&& BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb))
break;
if (!BN_add(p, p, p1p2))
goto err;
}
BN_GENCB_call(cb, 3, 0);
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
/*
* Generate pair of paramters Xp, Xq for X9.31 prime generation. Note: nbits
* paramter is sum of number of bits in both.
*/
int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
{
BIGNUM *t;
int i;
/*
* Number of bits for each prime is of the form 512+128s for s = 0, 1,
* ...
*/
if ((nbits < 1024) || (nbits & 0xff))
return 0;
nbits >>= 1;
/*
* The random value Xp must be between sqrt(2) * 2^(nbits-1) and 2^nbits
* - 1. By setting the top two bits we ensure that the lower bound is
* exceeded.
*/
if (!BN_rand(Xp, nbits, 1, 0))
return 0;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
for (i = 0; i < 1000; i++) {
if (!BN_rand(Xq, nbits, 1, 0))
return 0;
/* Check that |Xp - Xq| > 2^(nbits - 100) */
BN_sub(t, Xp, Xq);
if (BN_num_bits(t) > (nbits - 100))
break;
}
BN_CTX_end(ctx);
if (i < 1000)
return 1;
return 0;
}
/*
* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1 and
* Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL the
* relevant parameter will be stored in it. Due to the fact that |Xp - Xq| >
* 2^(nbits - 100) must be satisfied Xp and Xq are generated using the
* previous function and supplied as input.
*/
int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
BIGNUM *Xp1, BIGNUM *Xp2,
const BIGNUM *Xp,
const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb)
{
int ret = 0;
BN_CTX_start(ctx);
if (!Xp1)
Xp1 = BN_CTX_get(ctx);
if (!Xp2)
Xp2 = BN_CTX_get(ctx);
if (!BN_rand(Xp1, 101, 0, 0))
goto error;
if (!BN_rand(Xp2, 101, 0, 0))
goto error;
if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
goto error;
ret = 1;
error:
BN_CTX_end(ctx);
return ret;
}

View File

@@ -7,21 +7,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -36,10 +36,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -51,7 +51,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -60,7 +60,7 @@
/* most of this code has been pilfered from my libdes speed.c program */
#define BASENUM 1000000
#define BASENUM 1000000
#undef PROG
#define PROG bnspeed_main
@@ -72,33 +72,35 @@
#include <openssl/err.h>
#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
#define TIMES
# define TIMES
#endif
#ifndef _IRIX
#include <time.h>
# include <time.h>
#endif
#ifdef TIMES
#include <sys/types.h>
#include <sys/times.h>
# include <sys/types.h>
# include <sys/times.h>
#endif
/* Depending on the VMS version, the tms structure is perhaps defined.
The __TMS macro will show if it was. If it wasn't defined, we should
undefine TIMES, since that tells the rest of the program how things
should be handled. -- Richard Levitte */
/*
* Depending on the VMS version, the tms structure is perhaps defined. The
* __TMS macro will show if it was. If it wasn't defined, we should undefine
* TIMES, since that tells the rest of the program how things should be
* handled. -- Richard Levitte
*/
#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
#undef TIMES
# undef TIMES
#endif
#ifndef TIMES
#include <sys/timeb.h>
# include <sys/timeb.h>
#endif
#if defined(sun) || defined(__ultrix)
#define _POSIX_SOURCE
#include <limits.h>
#include <sys/param.h>
# define _POSIX_SOURCE
# include <limits.h>
# include <sys/param.h>
#endif
#include <openssl/bn.h>
@@ -107,127 +109,124 @@
/* The following if from times(3) man page. It may need to be changed */
#ifndef HZ
# ifndef CLK_TCK
# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
# define HZ 100.0
# else /* _BSD_CLK_TCK_ */
# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
# define HZ 100.0
# else /* _BSD_CLK_TCK_ */
# define HZ ((double)_BSD_CLK_TCK_)
# endif
# else /* CLK_TCK */
# else /* CLK_TCK */
# define HZ ((double)CLK_TCK)
# endif
#endif
#undef BUFSIZE
#define BUFSIZE ((long)1024*8)
int run=0;
#define BUFSIZE ((long)1024*8)
int run = 0;
static double Time_F(int s);
#define START 0
#define STOP 1
#define START 0
#define STOP 1
static double Time_F(int s)
{
double ret;
{
double ret;
#ifdef TIMES
static struct tms tstart,tend;
static struct tms tstart, tend;
if (s == START)
{
times(&tstart);
return(0);
}
else
{
times(&tend);
ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
return((ret < 1e-3)?1e-3:ret);
}
#else /* !times() */
static struct timeb tstart,tend;
long i;
if (s == START) {
times(&tstart);
return (0);
} else {
times(&tend);
ret = ((double)(tend.tms_utime - tstart.tms_utime)) / HZ;
return ((ret < 1e-3) ? 1e-3 : ret);
}
#else /* !times() */
static struct timeb tstart, tend;
long i;
if (s == START)
{
ftime(&tstart);
return(0);
}
else
{
ftime(&tend);
i=(long)tend.millitm-(long)tstart.millitm;
ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
return((ret < 0.001)?0.001:ret);
}
if (s == START) {
ftime(&tstart);
return (0);
} else {
ftime(&tend);
i = (long)tend.millitm - (long)tstart.millitm;
ret = ((double)(tend.time - tstart.time)) + ((double)i) / 1000.0;
return ((ret < 0.001) ? 0.001 : ret);
}
#endif
}
}
#define NUM_SIZES 5
static int sizes[NUM_SIZES]={128,256,512,1024,2048};
/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
#define NUM_SIZES 5
static int sizes[NUM_SIZES] = { 128, 256, 512, 1024, 2048 };
void do_mul(BIGNUM *r,BIGNUM *a,BIGNUM *b,BN_CTX *ctx);
/*
* static int sizes[NUM_SIZES]={59,179,299,419,539};
*/
void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx);
int main(int argc, char **argv)
{
BN_CTX *ctx;
BIGNUM a,b,c;
{
BN_CTX *ctx;
BIGNUM a, b, c;
ctx=BN_CTX_new();
BN_init(&a);
BN_init(&b);
BN_init(&c);
ctx = BN_CTX_new();
BN_init(&a);
BN_init(&b);
BN_init(&c);
do_mul(&a,&b,&c,ctx);
}
do_mul(&a, &b, &c, ctx);
}
void do_mul(BIGNUM *r, BIGNUM *a, BIGNUM *b, BN_CTX *ctx)
{
int i,j,k;
double tm;
long num;
{
int i, j, k;
double tm;
long num;
for (i=0; i<NUM_SIZES; i++)
{
num=BASENUM;
if (i) num/=(i*3);
BN_rand(a,sizes[i],1,0);
for (j=i; j<NUM_SIZES; j++)
{
BN_rand(b,sizes[j],1,0);
Time_F(START);
for (k=0; k<num; k++)
BN_mul(r,b,a,ctx);
tm=Time_F(STOP);
printf("mul %4d x %4d -> %8.3fms\n",sizes[i],sizes[j],tm*1000.0/num);
}
}
for (i = 0; i < NUM_SIZES; i++) {
num = BASENUM;
if (i)
num /= (i * 3);
BN_rand(a, sizes[i], 1, 0);
for (j = i; j < NUM_SIZES; j++) {
BN_rand(b, sizes[j], 1, 0);
Time_F(START);
for (k = 0; k < num; k++)
BN_mul(r, b, a, ctx);
tm = Time_F(STOP);
printf("mul %4d x %4d -> %8.3fms\n", sizes[i], sizes[j],
tm * 1000.0 / num);
}
}
for (i=0; i<NUM_SIZES; i++)
{
num=BASENUM;
if (i) num/=(i*3);
BN_rand(a,sizes[i],1,0);
Time_F(START);
for (k=0; k<num; k++)
BN_sqr(r,a,ctx);
tm=Time_F(STOP);
printf("sqr %4d x %4d -> %8.3fms\n",sizes[i],sizes[i],tm*1000.0/num);
}
for (i=0; i<NUM_SIZES; i++)
{
num=BASENUM/10;
if (i) num/=(i*3);
BN_rand(a,sizes[i]-1,1,0);
for (j=i; j<NUM_SIZES; j++)
{
BN_rand(b,sizes[j],1,0);
Time_F(START);
for (k=0; k<100000; k++)
BN_div(r, NULL, b, a,ctx);
tm=Time_F(STOP);
printf("div %4d / %4d -> %8.3fms\n",sizes[j],sizes[i]-1,tm*1000.0/num);
}
}
}
for (i = 0; i < NUM_SIZES; i++) {
num = BASENUM;
if (i)
num /= (i * 3);
BN_rand(a, sizes[i], 1, 0);
Time_F(START);
for (k = 0; k < num; k++)
BN_sqr(r, a, ctx);
tm = Time_F(STOP);
printf("sqr %4d x %4d -> %8.3fms\n", sizes[i], sizes[i],
tm * 1000.0 / num);
}
for (i = 0; i < NUM_SIZES; i++) {
num = BASENUM / 10;
if (i)
num /= (i * 3);
BN_rand(a, sizes[i] - 1, 1, 0);
for (j = i; j < NUM_SIZES; j++) {
BN_rand(b, sizes[j], 1, 0);
Time_F(START);
for (k = 0; k < 100000; k++)
BN_div(r, NULL, b, a, ctx);
tm = Time_F(STOP);
printf("div %4d / %4d -> %8.3fms\n", sizes[j], sizes[i] - 1,
tm * 1000.0 / num);
}
}
}

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -4,13 +4,13 @@
static int Rand(n)
{
unsigned char x[2];
RAND_pseudo_bytes(x,2);
return (x[0] + 2*x[1]);
RAND_pseudo_bytes(x, 2);
return (x[0] + 2 * x[1]);
}
static void bug(char *m, BIGNUM *a, BIGNUM *b)
{
printf("%s!\na=",m);
printf("%s!\na=", m);
BN_print_fp(stdout, a);
printf("\nb=");
BN_print_fp(stdout, b);
@@ -20,22 +20,23 @@ static void bug(char *m, BIGNUM *a, BIGNUM *b)
main()
{
BIGNUM *a=BN_new(), *b=BN_new(), *c=BN_new(), *d=BN_new(),
*C=BN_new(), *D=BN_new();
BN_RECP_CTX *recp=BN_RECP_CTX_new();
BN_CTX *ctx=BN_CTX_new();
BIGNUM *a = BN_new(), *b = BN_new(), *c = BN_new(), *d = BN_new(),
*C = BN_new(), *D = BN_new();
BN_RECP_CTX *recp = BN_RECP_CTX_new();
BN_CTX *ctx = BN_CTX_new();
for(;;) {
BN_pseudo_rand(a,Rand(),0,0);
BN_pseudo_rand(b,Rand(),0,0);
if (BN_is_zero(b)) continue;
for (;;) {
BN_pseudo_rand(a, Rand(), 0, 0);
BN_pseudo_rand(b, Rand(), 0, 0);
if (BN_is_zero(b))
continue;
BN_RECP_CTX_set(recp,b,ctx);
if (BN_div(C,D,a,b,ctx) != 1)
bug("BN_div failed",a,b);
if (BN_div_recp(c,d,a,recp,ctx) != 1)
bug("BN_div_recp failed",a,b);
else if (BN_cmp(c,C) != 0 || BN_cmp(c,C) != 0)
bug("mismatch",a,b);
BN_RECP_CTX_set(recp, b, ctx);
if (BN_div(C, D, a, b, ctx) != 1)
bug("BN_div failed", a, b);
if (BN_div_recp(c, d, a, recp, ctx) != 1)
bug("BN_div_recp failed", a, b);
else if (BN_cmp(c, C) != 0 || BN_cmp(c, C) != 0)
bug("mismatch", a, b);
}
}

View File

@@ -4,59 +4,58 @@
#include <openssl/tmdiff.h>
#include "bn_lcl.h"
#define SIZE 256
#define NUM (8*8*8)
#define MOD (8*8*8*8*8)
#define SIZE 256
#define NUM (8*8*8)
#define MOD (8*8*8*8*8)
main(argc,argv)
main(argc, argv)
int argc;
char *argv[];
{
BN_CTX ctx;
BIGNUM a,b,c,r,rr,t,l;
int j,i,size=SIZE,num=NUM,mod=MOD;
char *start,*end;
BN_MONT_CTX mont;
double d,md;
{
BN_CTX ctx;
BIGNUM a, b, c, r, rr, t, l;
int j, i, size = SIZE, num = NUM, mod = MOD;
char *start, *end;
BN_MONT_CTX mont;
double d, md;
BN_MONT_CTX_init(&mont);
BN_CTX_init(&ctx);
BN_init(&a);
BN_init(&b);
BN_init(&c);
BN_init(&r);
BN_MONT_CTX_init(&mont);
BN_CTX_init(&ctx);
BN_init(&a);
BN_init(&b);
BN_init(&c);
BN_init(&r);
start=ms_time_new();
end=ms_time_new();
while (size <= 1024*8)
{
BN_rand(&a,size,0,0);
BN_rand(&b,size,1,0);
BN_rand(&c,size,0,1);
start = ms_time_new();
end = ms_time_new();
while (size <= 1024 * 8) {
BN_rand(&a, size, 0, 0);
BN_rand(&b, size, 1, 0);
BN_rand(&c, size, 0, 1);
BN_mod(&a,&a,&c,&ctx);
BN_mod(&a, &a, &c, &ctx);
ms_time_get(start);
for (i=0; i<10; i++)
BN_MONT_CTX_set(&mont,&c,&ctx);
ms_time_get(end);
md=ms_time_diff(start,end);
ms_time_get(start);
for (i = 0; i < 10; i++)
BN_MONT_CTX_set(&mont, &c, &ctx);
ms_time_get(end);
md = ms_time_diff(start, end);
ms_time_get(start);
for (i=0; i<num; i++)
{
/* bn_mull(&r,&a,&b,&ctx); */
/* BN_sqr(&r,&a,&ctx); */
BN_mod_exp_mont(&r,&a,&b,&c,&ctx,&mont);
}
ms_time_get(end);
d=ms_time_diff(start,end)/* *50/33 */;
printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n",size,
d,num,d/num,(int)((d/num)*mod),md/10.0);
num/=8;
mod/=8;
if (num <= 0) num=1;
size*=2;
}
ms_time_get(start);
for (i = 0; i < num; i++) {
/* bn_mull(&r,&a,&b,&ctx); */
/* BN_sqr(&r,&a,&ctx); */
BN_mod_exp_mont(&r, &a, &b, &c, &ctx, &mont);
}
ms_time_get(end);
d = ms_time_diff(start, end) /* *50/33 */ ;
printf("%5d bit:%6.2f %6d %6.4f %4d m_set(%5.4f)\n", size,
d, num, d / num, (int)((d / num) * mod), md / 10.0);
num /= 8;
mod /= 8;
if (num <= 0)
num = 1;
size *= 2;
}
}
}

View File

@@ -7,21 +7,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -36,10 +36,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -51,7 +51,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -60,12 +60,13 @@
/* most of this code has been pilfered from my libdes speed.c program */
#define BASENUM 5000
#define BASENUM 5000
#define NUM_START 0
/* determine timings for modexp, modmul, modsqr, gcd, Kronecker symbol,
* modular inverse, or modular square roots */
/*
* determine timings for modexp, modmul, modsqr, gcd, Kronecker symbol,
* modular inverse, or modular square roots
*/
#define TEST_EXP
#undef TEST_MUL
#undef TEST_SQR
@@ -73,19 +74,18 @@
#undef TEST_KRON
#undef TEST_INV
#undef TEST_SQRT
#define P_MOD_64 9 /* least significant 6 bits for prime to be used for BN_sqrt timings */
#define P_MOD_64 9 /* least significant 6 bits for prime to be
* used for BN_sqrt timings */
#if defined(TEST_EXP) + defined(TEST_MUL) + defined(TEST_SQR) + defined(TEST_GCD) + defined(TEST_KRON) + defined(TEST_INV) +defined(TEST_SQRT) != 1
# error "choose one test"
# error "choose one test"
#endif
#if defined(TEST_INV) || defined(TEST_SQRT)
# define C_PRIME
# define C_PRIME
static void genprime_cb(int p, int n, void *arg);
#endif
#undef PROG
#define PROG bnspeed_main
@@ -98,33 +98,35 @@ static void genprime_cb(int p, int n, void *arg);
#include <openssl/rand.h>
#if !defined(OPENSSL_SYS_MSDOS) && (!defined(OPENSSL_SYS_VMS) || defined(__DECC)) && !defined(OPENSSL_SYS_MACOSX)
#define TIMES
# define TIMES
#endif
#ifndef _IRIX
#include <time.h>
# include <time.h>
#endif
#ifdef TIMES
#include <sys/types.h>
#include <sys/times.h>
# include <sys/types.h>
# include <sys/times.h>
#endif
/* Depending on the VMS version, the tms structure is perhaps defined.
The __TMS macro will show if it was. If it wasn't defined, we should
undefine TIMES, since that tells the rest of the program how things
should be handled. -- Richard Levitte */
/*
* Depending on the VMS version, the tms structure is perhaps defined. The
* __TMS macro will show if it was. If it wasn't defined, we should undefine
* TIMES, since that tells the rest of the program how things should be
* handled. -- Richard Levitte
*/
#if defined(OPENSSL_SYS_VMS_DECC) && !defined(__TMS)
#undef TIMES
# undef TIMES
#endif
#ifndef TIMES
#include <sys/timeb.h>
# include <sys/timeb.h>
#endif
#if defined(sun) || defined(__ultrix)
#define _POSIX_SOURCE
#include <limits.h>
#include <sys/param.h>
# define _POSIX_SOURCE
# include <limits.h>
# include <sys/param.h>
#endif
#include <openssl/bn.h>
@@ -133,221 +135,247 @@ static void genprime_cb(int p, int n, void *arg);
/* The following if from times(3) man page. It may need to be changed */
#ifndef HZ
# ifndef CLK_TCK
# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
# define HZ 100.0
# else /* _BSD_CLK_TCK_ */
# ifndef _BSD_CLK_TCK_ /* FreeBSD hack */
# define HZ 100.0
# else /* _BSD_CLK_TCK_ */
# define HZ ((double)_BSD_CLK_TCK_)
# endif
# else /* CLK_TCK */
# else /* CLK_TCK */
# define HZ ((double)CLK_TCK)
# endif
#endif
#undef BUFSIZE
#define BUFSIZE ((long)1024*8)
int run=0;
#define BUFSIZE ((long)1024*8)
int run = 0;
static double Time_F(int s);
#define START 0
#define STOP 1
#define START 0
#define STOP 1
static double Time_F(int s)
{
double ret;
{
double ret;
#ifdef TIMES
static struct tms tstart,tend;
static struct tms tstart, tend;
if (s == START)
{
times(&tstart);
return(0);
}
else
{
times(&tend);
ret=((double)(tend.tms_utime-tstart.tms_utime))/HZ;
return((ret < 1e-3)?1e-3:ret);
}
#else /* !times() */
static struct timeb tstart,tend;
long i;
if (s == START) {
times(&tstart);
return (0);
} else {
times(&tend);
ret = ((double)(tend.tms_utime - tstart.tms_utime)) / HZ;
return ((ret < 1e-3) ? 1e-3 : ret);
}
#else /* !times() */
static struct timeb tstart, tend;
long i;
if (s == START)
{
ftime(&tstart);
return(0);
}
else
{
ftime(&tend);
i=(long)tend.millitm-(long)tstart.millitm;
ret=((double)(tend.time-tstart.time))+((double)i)/1000.0;
return((ret < 0.001)?0.001:ret);
}
if (s == START) {
ftime(&tstart);
return (0);
} else {
ftime(&tend);
i = (long)tend.millitm - (long)tstart.millitm;
ret = ((double)(tend.time - tstart.time)) + ((double)i) / 1000.0;
return ((ret < 0.001) ? 0.001 : ret);
}
#endif
}
}
#define NUM_SIZES 7
#define NUM_SIZES 7
#if NUM_START > NUM_SIZES
# error "NUM_START > NUM_SIZES"
# error "NUM_START > NUM_SIZES"
#endif
static int sizes[NUM_SIZES]={128,256,512,1024,2048,4096,8192};
static int mul_c[NUM_SIZES]={8*8*8*8*8*8,8*8*8*8*8,8*8*8*8,8*8*8,8*8,8,1};
/*static int sizes[NUM_SIZES]={59,179,299,419,539}; */
static int sizes[NUM_SIZES] = { 128, 256, 512, 1024, 2048, 4096, 8192 };
static int mul_c[NUM_SIZES] =
{ 8 * 8 * 8 * 8 * 8 * 8, 8 * 8 * 8 * 8 * 8, 8 * 8 * 8 * 8, 8 * 8 * 8,
8 * 8, 8, 1
};
/*
* static int sizes[NUM_SIZES]={59,179,299,419,539};
*/
#define RAND_SEED(string) { const char str[] = string; RAND_seed(string, sizeof str); }
void do_mul_exp(BIGNUM *r,BIGNUM *a,BIGNUM *b,BIGNUM *c,BN_CTX *ctx);
void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx);
int main(int argc, char **argv)
{
BN_CTX *ctx;
BIGNUM *a,*b,*c,*r;
{
BN_CTX *ctx;
BIGNUM *a, *b, *c, *r;
#if 1
if (!CRYPTO_set_mem_debug_functions(0,0,0,0,0))
abort();
if (!CRYPTO_set_mem_debug_functions(0, 0, 0, 0, 0))
abort();
#endif
ctx=BN_CTX_new();
a=BN_new();
b=BN_new();
c=BN_new();
r=BN_new();
ctx = BN_CTX_new();
a = BN_new();
b = BN_new();
c = BN_new();
r = BN_new();
while (!RAND_status())
/* not enough bits */
RAND_SEED("I demand a manual recount!");
while (!RAND_status())
/* not enough bits */
RAND_SEED("I demand a manual recount!");
do_mul_exp(r,a,b,c,ctx);
return 0;
}
do_mul_exp(r, a, b, c, ctx);
return 0;
}
void do_mul_exp(BIGNUM *r, BIGNUM *a, BIGNUM *b, BIGNUM *c, BN_CTX *ctx)
{
int i,k;
double tm;
long num;
{
int i, k;
double tm;
long num;
num=BASENUM;
for (i=NUM_START; i<NUM_SIZES; i++)
{
num = BASENUM;
for (i = NUM_START; i < NUM_SIZES; i++) {
#ifdef C_PRIME
# ifdef TEST_SQRT
if (!BN_set_word(a, 64)) goto err;
if (!BN_set_word(b, P_MOD_64)) goto err;
# define ADD a
# define REM b
# else
# define ADD NULL
# define REM NULL
# endif
if (!BN_generate_prime(c,sizes[i],0,ADD,REM,genprime_cb,NULL)) goto err;
putc('\n', stderr);
fflush(stderr);
# ifdef TEST_SQRT
if (!BN_set_word(a, 64))
goto err;
if (!BN_set_word(b, P_MOD_64))
goto err;
# define ADD a
# define REM b
# else
# define ADD NULL
# define REM NULL
# endif
if (!BN_generate_prime(c, sizes[i], 0, ADD, REM, genprime_cb, NULL))
goto err;
putc('\n', stderr);
fflush(stderr);
#endif
for (k=0; k<num; k++)
{
if (k%50 == 0) /* Average over num/50 different choices of random numbers. */
{
if (!BN_pseudo_rand(a,sizes[i],1,0)) goto err;
for (k = 0; k < num; k++) {
if (k % 50 == 0) { /* Average over num/50 different choices of
* random numbers. */
if (!BN_pseudo_rand(a, sizes[i], 1, 0))
goto err;
if (!BN_pseudo_rand(b,sizes[i],1,0)) goto err;
if (!BN_pseudo_rand(b, sizes[i], 1, 0))
goto err;
#ifndef C_PRIME
if (!BN_pseudo_rand(c,sizes[i],1,1)) goto err;
if (!BN_pseudo_rand(c, sizes[i], 1, 1))
goto err;
#endif
#ifdef TEST_SQRT
if (!BN_mod_sqr(a,a,c,ctx)) goto err;
if (!BN_mod_sqr(b,b,c,ctx)) goto err;
#else
if (!BN_nnmod(a,a,c,ctx)) goto err;
if (!BN_nnmod(b,b,c,ctx)) goto err;
#endif
if (k == 0)
Time_F(START);
}
#if defined(TEST_EXP)
if (!BN_mod_exp(r,a,b,c,ctx)) goto err;
#elif defined(TEST_MUL)
{
int i = 0;
for (i = 0; i < 50; i++)
if (!BN_mod_mul(r,a,b,c,ctx)) goto err;
}
#elif defined(TEST_SQR)
{
int i = 0;
for (i = 0; i < 50; i++)
{
if (!BN_mod_sqr(r,a,c,ctx)) goto err;
if (!BN_mod_sqr(r,b,c,ctx)) goto err;
}
}
#elif defined(TEST_GCD)
if (!BN_gcd(r,a,b,ctx)) goto err;
if (!BN_gcd(r,b,c,ctx)) goto err;
if (!BN_gcd(r,c,a,ctx)) goto err;
#elif defined(TEST_KRON)
if (-2 == BN_kronecker(a,b,ctx)) goto err;
if (-2 == BN_kronecker(b,c,ctx)) goto err;
if (-2 == BN_kronecker(c,a,ctx)) goto err;
#elif defined(TEST_INV)
if (!BN_mod_inverse(r,a,c,ctx)) goto err;
if (!BN_mod_inverse(r,b,c,ctx)) goto err;
#else /* TEST_SQRT */
if (!BN_mod_sqrt(r,a,c,ctx)) goto err;
if (!BN_mod_sqrt(r,b,c,ctx)) goto err;
#endif
}
tm=Time_F(STOP);
printf(
#if defined(TEST_EXP)
"modexp %4d ^ %4d %% %4d"
#elif defined(TEST_MUL)
"50*modmul %4d %4d %4d"
#elif defined(TEST_SQR)
"100*modsqr %4d %4d %4d"
#elif defined(TEST_GCD)
"3*gcd %4d %4d %4d"
#elif defined(TEST_KRON)
"3*kronecker %4d %4d %4d"
#elif defined(TEST_INV)
"2*inv %4d %4d mod %4d"
#else /* TEST_SQRT */
"2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d"
#endif
" -> %8.6fms %5.1f (%ld)\n",
#ifdef TEST_SQRT
P_MOD_64,
if (!BN_mod_sqr(a, a, c, ctx))
goto err;
if (!BN_mod_sqr(b, b, c, ctx))
goto err;
#else
if (!BN_nnmod(a, a, c, ctx))
goto err;
if (!BN_nnmod(b, b, c, ctx))
goto err;
#endif
sizes[i],sizes[i],sizes[i],tm*1000.0/num,tm*mul_c[i]/num, num);
num/=7;
if (num <= 0) num=1;
}
return;
if (k == 0)
Time_F(START);
}
#if defined(TEST_EXP)
if (!BN_mod_exp(r, a, b, c, ctx))
goto err;
#elif defined(TEST_MUL)
{
int i = 0;
for (i = 0; i < 50; i++)
if (!BN_mod_mul(r, a, b, c, ctx))
goto err;
}
#elif defined(TEST_SQR)
{
int i = 0;
for (i = 0; i < 50; i++) {
if (!BN_mod_sqr(r, a, c, ctx))
goto err;
if (!BN_mod_sqr(r, b, c, ctx))
goto err;
}
}
#elif defined(TEST_GCD)
if (!BN_gcd(r, a, b, ctx))
goto err;
if (!BN_gcd(r, b, c, ctx))
goto err;
if (!BN_gcd(r, c, a, ctx))
goto err;
#elif defined(TEST_KRON)
if (-2 == BN_kronecker(a, b, ctx))
goto err;
if (-2 == BN_kronecker(b, c, ctx))
goto err;
if (-2 == BN_kronecker(c, a, ctx))
goto err;
#elif defined(TEST_INV)
if (!BN_mod_inverse(r, a, c, ctx))
goto err;
if (!BN_mod_inverse(r, b, c, ctx))
goto err;
#else /* TEST_SQRT */
if (!BN_mod_sqrt(r, a, c, ctx))
goto err;
if (!BN_mod_sqrt(r, b, c, ctx))
goto err;
#endif
}
tm = Time_F(STOP);
printf(
#if defined(TEST_EXP)
"modexp %4d ^ %4d %% %4d"
#elif defined(TEST_MUL)
"50*modmul %4d %4d %4d"
#elif defined(TEST_SQR)
"100*modsqr %4d %4d %4d"
#elif defined(TEST_GCD)
"3*gcd %4d %4d %4d"
#elif defined(TEST_KRON)
"3*kronecker %4d %4d %4d"
#elif defined(TEST_INV)
"2*inv %4d %4d mod %4d"
#else /* TEST_SQRT */
"2*sqrt [prime == %d (mod 64)] %4d %4d mod %4d"
#endif
" -> %8.6fms %5.1f (%ld)\n",
#ifdef TEST_SQRT
P_MOD_64,
#endif
sizes[i], sizes[i], sizes[i], tm * 1000.0 / num,
tm * mul_c[i] / num, num);
num /= 7;
if (num <= 0)
num = 1;
}
return;
err:
ERR_print_errors_fp(stderr);
}
ERR_print_errors_fp(stderr);
}
#ifdef C_PRIME
static void genprime_cb(int p, int n, void *arg)
{
char c='*';
{
char c = '*';
if (p == 0) c='.';
if (p == 1) c='+';
if (p == 2) c='*';
if (p == 3) c='\n';
putc(c, stderr);
fflush(stderr);
(void)n;
(void)arg;
}
if (p == 0)
c = '.';
if (p == 1)
c = '+';
if (p == 2)
c = '*';
if (p == 3)
c = '\n';
putc(c, stderr);
fflush(stderr);
(void)n;
(void)arg;
}
#endif

View File

@@ -5,21 +5,21 @@
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -34,10 +34,10 @@
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -49,7 +49,7 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
@@ -67,138 +67,183 @@
#include <openssl/rand.h>
#include <openssl/err.h>
#define NUM_BITS (BN_BITS*2)
#define NUM_BITS (BN_BITS*2)
static const char rnd_seed[] = "string to make the random number generator think it has entropy";
static const char rnd_seed[] =
"string to make the random number generator think it has entropy";
/*
* test_exp_mod_zero tests that x**0 mod 1 == 0. It returns zero on success.
*/
static int test_exp_mod_zero()
{
BIGNUM a, p, m;
BIGNUM r;
BN_CTX *ctx = BN_CTX_new();
int ret = 1;
BN_init(&m);
BN_one(&m);
BN_init(&a);
BN_one(&a);
BN_init(&p);
BN_zero(&p);
BN_init(&r);
BN_mod_exp(&r, &a, &p, &m, ctx);
BN_CTX_free(ctx);
if (BN_is_zero(&r))
ret = 0;
else {
printf("1**0 mod 1 = ");
BN_print_fp(stdout, &r);
printf(", should be 0\n");
}
BN_free(&r);
BN_free(&a);
BN_free(&p);
BN_free(&m);
return ret;
}
int main(int argc, char *argv[])
{
BN_CTX *ctx;
BIO *out=NULL;
int i,ret;
unsigned char c;
BIGNUM *r_mont,*r_mont_const,*r_recp,*r_simple,*a,*b,*m;
{
BN_CTX *ctx;
BIO *out = NULL;
int i, ret;
unsigned char c;
BIGNUM *r_mont, *r_mont_const, *r_recp, *r_simple, *a, *b, *m;
RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_rand may fail, and we don't
* even check its return value
* (which we should) */
RAND_seed(rnd_seed, sizeof rnd_seed); /* or BN_rand may fail, and we
* don't even check its return
* value (which we should) */
ERR_load_BN_strings();
ERR_load_BN_strings();
ctx=BN_CTX_new();
if (ctx == NULL) EXIT(1);
r_mont=BN_new();
r_mont_const=BN_new();
r_recp=BN_new();
r_simple=BN_new();
a=BN_new();
b=BN_new();
m=BN_new();
if ( (r_mont == NULL) || (r_recp == NULL) ||
(a == NULL) || (b == NULL))
goto err;
ctx = BN_CTX_new();
if (ctx == NULL)
EXIT(1);
r_mont = BN_new();
r_mont_const = BN_new();
r_recp = BN_new();
r_simple = BN_new();
a = BN_new();
b = BN_new();
m = BN_new();
if ((r_mont == NULL) || (r_recp == NULL) || (a == NULL) || (b == NULL))
goto err;
out=BIO_new(BIO_s_file());
out = BIO_new(BIO_s_file());
if (out == NULL) EXIT(1);
BIO_set_fp(out,stdout,BIO_NOCLOSE);
if (out == NULL)
EXIT(1);
BIO_set_fp(out, stdout, BIO_NOCLOSE);
for (i=0; i<200; i++)
{
RAND_bytes(&c,1);
c=(c%BN_BITS)-BN_BITS2;
BN_rand(a,NUM_BITS+c,0,0);
for (i = 0; i < 200; i++) {
RAND_bytes(&c, 1);
c = (c % BN_BITS) - BN_BITS2;
BN_rand(a, NUM_BITS + c, 0, 0);
RAND_bytes(&c,1);
c=(c%BN_BITS)-BN_BITS2;
BN_rand(b,NUM_BITS+c,0,0);
RAND_bytes(&c, 1);
c = (c % BN_BITS) - BN_BITS2;
BN_rand(b, NUM_BITS + c, 0, 0);
RAND_bytes(&c,1);
c=(c%BN_BITS)-BN_BITS2;
BN_rand(m,NUM_BITS+c,0,1);
RAND_bytes(&c, 1);
c = (c % BN_BITS) - BN_BITS2;
BN_rand(m, NUM_BITS + c, 0, 1);
BN_mod(a,a,m,ctx);
BN_mod(b,b,m,ctx);
BN_mod(a, a, m, ctx);
BN_mod(b, b, m, ctx);
ret=BN_mod_exp_mont(r_mont,a,b,m,ctx,NULL);
if (ret <= 0)
{
printf("BN_mod_exp_mont() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret = BN_mod_exp_mont(r_mont, a, b, m, ctx, NULL);
if (ret <= 0) {
printf("BN_mod_exp_mont() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret=BN_mod_exp_recp(r_recp,a,b,m,ctx);
if (ret <= 0)
{
printf("BN_mod_exp_recp() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret = BN_mod_exp_recp(r_recp, a, b, m, ctx);
if (ret <= 0) {
printf("BN_mod_exp_recp() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret=BN_mod_exp_simple(r_simple,a,b,m,ctx);
if (ret <= 0)
{
printf("BN_mod_exp_simple() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret = BN_mod_exp_simple(r_simple, a, b, m, ctx);
if (ret <= 0) {
printf("BN_mod_exp_simple() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret=BN_mod_exp_mont_consttime(r_mont_const,a,b,m,ctx,NULL);
if (ret <= 0)
{
printf("BN_mod_exp_mont_consttime() problems\n");
ERR_print_errors(out);
EXIT(1);
}
ret = BN_mod_exp_mont_consttime(r_mont_const, a, b, m, ctx, NULL);
if (ret <= 0) {
printf("BN_mod_exp_mont_consttime() problems\n");
ERR_print_errors(out);
EXIT(1);
}
if (BN_cmp(r_simple, r_mont) == 0
&& BN_cmp(r_simple,r_recp) == 0
&& BN_cmp(r_simple,r_mont_const) == 0)
{
printf(".");
fflush(stdout);
}
else
{
if (BN_cmp(r_simple,r_mont) != 0)
printf("\nsimple and mont results differ\n");
if (BN_cmp(r_simple,r_mont_const) != 0)
printf("\nsimple and mont const time results differ\n");
if (BN_cmp(r_simple,r_recp) != 0)
printf("\nsimple and recp results differ\n");
if (BN_cmp(r_simple, r_mont) == 0
&& BN_cmp(r_simple, r_recp) == 0
&& BN_cmp(r_simple, r_mont_const) == 0) {
printf(".");
fflush(stdout);
} else {
if (BN_cmp(r_simple, r_mont) != 0)
printf("\nsimple and mont results differ\n");
if (BN_cmp(r_simple, r_mont_const) != 0)
printf("\nsimple and mont const time results differ\n");
if (BN_cmp(r_simple, r_recp) != 0)
printf("\nsimple and recp results differ\n");
printf("a (%3d) = ",BN_num_bits(a)); BN_print(out,a);
printf("\nb (%3d) = ",BN_num_bits(b)); BN_print(out,b);
printf("\nm (%3d) = ",BN_num_bits(m)); BN_print(out,m);
printf("\nsimple ="); BN_print(out,r_simple);
printf("\nrecp ="); BN_print(out,r_recp);
printf("\nmont ="); BN_print(out,r_mont);
printf("\nmont_ct ="); BN_print(out,r_mont_const);
printf("\n");
EXIT(1);
}
}
BN_free(r_mont);
BN_free(r_mont_const);
BN_free(r_recp);
BN_free(r_simple);
BN_free(a);
BN_free(b);
BN_free(m);
BN_CTX_free(ctx);
ERR_remove_thread_state(NULL);
CRYPTO_mem_leaks(out);
BIO_free(out);
printf(" done\n");
EXIT(0);
err:
ERR_load_crypto_strings();
ERR_print_errors(out);
printf("a (%3d) = ", BN_num_bits(a));
BN_print(out, a);
printf("\nb (%3d) = ", BN_num_bits(b));
BN_print(out, b);
printf("\nm (%3d) = ", BN_num_bits(m));
BN_print(out, m);
printf("\nsimple =");
BN_print(out, r_simple);
printf("\nrecp =");
BN_print(out, r_recp);
printf("\nmont =");
BN_print(out, r_mont);
printf("\nmont_ct =");
BN_print(out, r_mont_const);
printf("\n");
EXIT(1);
}
}
BN_free(r_mont);
BN_free(r_mont_const);
BN_free(r_recp);
BN_free(r_simple);
BN_free(a);
BN_free(b);
BN_free(m);
BN_CTX_free(ctx);
ERR_remove_thread_state(NULL);
CRYPTO_mem_leaks(out);
BIO_free(out);
printf("\n");
if (test_exp_mod_zero() != 0)
goto err;
printf("done\n");
EXIT(0);
err:
ERR_load_crypto_strings();
ERR_print_errors(out);
#ifdef OPENSSL_SYS_NETWARE
printf("ERROR\n");
#endif
EXIT(1);
return(1);
}
EXIT(1);
return (1);
}

View File

346
crypto/bn/rsaz_exp.c Normal file
View File

@@ -0,0 +1,346 @@
/*****************************************************************************
* *
* Copyright (c) 2012, Intel Corporation *
* *
* All rights reserved. *
* *
* Redistribution and use in source and binary forms, with or without *
* modification, are permitted provided that the following conditions are *
* met: *
* *
* * Redistributions of source code must retain the above copyright *
* notice, this list of conditions and the following disclaimer. *
* *
* * Redistributions in binary form must reproduce the above copyright *
* notice, this list of conditions and the following disclaimer in the *
* documentation and/or other materials provided with the *
* distribution. *
* *
* * Neither the name of the Intel Corporation nor the names of its *
* contributors may be used to endorse or promote products derived from *
* this software without specific prior written permission. *
* *
* *
* THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY *
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE *
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR *
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR *
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, *
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, *
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR *
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF *
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING *
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS *
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *
* *
******************************************************************************
* Developers and authors: *
* Shay Gueron (1, 2), and Vlad Krasnov (1) *
* (1) Intel Corporation, Israel Development Center, Haifa, Israel *
* (2) University of Haifa, Israel *
*****************************************************************************/
#include "rsaz_exp.h"
#ifdef RSAZ_ENABLED
/*
* See crypto/bn/asm/rsaz-avx2.pl for further details.
*/
void rsaz_1024_norm2red_avx2(void *red, const void *norm);
void rsaz_1024_mul_avx2(void *ret, const void *a, const void *b,
const void *n, BN_ULONG k);
void rsaz_1024_sqr_avx2(void *ret, const void *a, const void *n, BN_ULONG k,
int cnt);
void rsaz_1024_scatter5_avx2(void *tbl, const void *val, int i);
void rsaz_1024_gather5_avx2(void *val, const void *tbl, int i);
void rsaz_1024_red2norm_avx2(void *norm, const void *red);
#if defined(__GNUC__)
# define ALIGN64 __attribute__((aligned(64)))
#elif defined(_MSC_VER)
# define ALIGN64 __declspec(align(64))
#elif defined(__SUNPRO_C)
# define ALIGN64
# pragma align 64(one,two80)
#else
/* not fatal, might hurt performance a little */
# define ALIGN64
#endif
ALIGN64 static const BN_ULONG one[40] = {
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
ALIGN64 static const BN_ULONG two80[40] = {
0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
const BN_ULONG base_norm[16],
const BN_ULONG exponent[16],
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
BN_ULONG k0)
{
unsigned char storage[320 * 3 + 32 * 9 * 16 + 64]; /* 5.5KB */
unsigned char *p_str = storage + (64 - ((size_t)storage % 64));
unsigned char *a_inv, *m, *result;
unsigned char *table_s = p_str + 320 * 3;
unsigned char *R2 = table_s; /* borrow */
int index;
int wvalue;
if ((((size_t)p_str & 4095) + 320) >> 12) {
result = p_str;
a_inv = p_str + 320;
m = p_str + 320 * 2; /* should not cross page */
} else {
m = p_str; /* should not cross page */
result = p_str + 320;
a_inv = p_str + 320 * 2;
}
rsaz_1024_norm2red_avx2(m, m_norm);
rsaz_1024_norm2red_avx2(a_inv, base_norm);
rsaz_1024_norm2red_avx2(R2, RR);
rsaz_1024_mul_avx2(R2, R2, R2, m, k0);
rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
/* table[0] = 1 */
rsaz_1024_mul_avx2(result, R2, one, m, k0);
/* table[1] = a_inv^1 */
rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 0);
rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
/* table[2] = a_inv^2 */
rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 2);
#if 0
/* this is almost 2x smaller and less than 1% slower */
for (index = 3; index < 32; index++) {
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, index);
}
#else
/* table[4] = a_inv^4 */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 4);
/* table[8] = a_inv^8 */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 8);
/* table[16] = a_inv^16 */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 16);
/* table[17] = a_inv^17 */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 17);
/* table[3] */
rsaz_1024_gather5_avx2(result, table_s, 2);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 3);
/* table[6] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 6);
/* table[12] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 12);
/* table[24] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 24);
/* table[25] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 25);
/* table[5] */
rsaz_1024_gather5_avx2(result, table_s, 4);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 5);
/* table[10] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 10);
/* table[20] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 20);
/* table[21] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 21);
/* table[7] */
rsaz_1024_gather5_avx2(result, table_s, 6);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 7);
/* table[14] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 14);
/* table[28] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 28);
/* table[29] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 29);
/* table[9] */
rsaz_1024_gather5_avx2(result, table_s, 8);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 9);
/* table[18] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 18);
/* table[19] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 19);
/* table[11] */
rsaz_1024_gather5_avx2(result, table_s, 10);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 11);
/* table[22] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 22);
/* table[23] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 23);
/* table[13] */
rsaz_1024_gather5_avx2(result, table_s, 12);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 13);
/* table[26] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 26);
/* table[27] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 27);
/* table[15] */
rsaz_1024_gather5_avx2(result, table_s, 14);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 15);
/* table[30] */
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 30);
/* table[31] */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 31);
#endif
/* load first window */
p_str = (unsigned char *)exponent;
wvalue = p_str[127] >> 3;
rsaz_1024_gather5_avx2(result, table_s, wvalue);
index = 1014;
while (index > -1) { /* loop for the remaining 127 windows */
rsaz_1024_sqr_avx2(result, result, m, k0, 5);
wvalue = *((unsigned short *)&p_str[index / 8]);
wvalue = (wvalue >> (index % 8)) & 31;
index -= 5;
rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
}
/* square four times */
rsaz_1024_sqr_avx2(result, result, m, k0, 4);
wvalue = p_str[0] & 15;
rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); /* borrow a_inv */
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
/* from Montgomery */
rsaz_1024_mul_avx2(result, result, one, m, k0);
rsaz_1024_red2norm_avx2(result_norm, result);
OPENSSL_cleanse(storage, sizeof(storage));
}
/*
* See crypto/bn/rsaz-x86_64.pl for further details.
*/
void rsaz_512_mul(void *ret, const void *a, const void *b, const void *n,
BN_ULONG k);
void rsaz_512_mul_scatter4(void *ret, const void *a, const void *n,
BN_ULONG k, const void *tbl, unsigned int power);
void rsaz_512_mul_gather4(void *ret, const void *a, const void *tbl,
const void *n, BN_ULONG k, unsigned int power);
void rsaz_512_mul_by_one(void *ret, const void *a, const void *n, BN_ULONG k);
void rsaz_512_sqr(void *ret, const void *a, const void *n, BN_ULONG k,
int cnt);
void rsaz_512_scatter4(void *tbl, const BN_ULONG *val, int power);
void rsaz_512_gather4(BN_ULONG *val, const void *tbl, int power);
void RSAZ_512_mod_exp(BN_ULONG result[8],
const BN_ULONG base[8], const BN_ULONG exponent[8],
const BN_ULONG m[8], BN_ULONG k0, const BN_ULONG RR[8])
{
unsigned char storage[16 * 8 * 8 + 64 * 2 + 64]; /* 1.2KB */
unsigned char *table = storage + (64 - ((size_t)storage % 64));
BN_ULONG *a_inv = (BN_ULONG *)(table + 16 * 8 * 8);
BN_ULONG *temp = (BN_ULONG *)(table + 16 * 8 * 8 + 8 * 8);
unsigned char *p_str = (unsigned char *)exponent;
int index;
unsigned int wvalue;
/* table[0] = 1_inv */
temp[0] = 0 - m[0];
temp[1] = ~m[1];
temp[2] = ~m[2];
temp[3] = ~m[3];
temp[4] = ~m[4];
temp[5] = ~m[5];
temp[6] = ~m[6];
temp[7] = ~m[7];
rsaz_512_scatter4(table, temp, 0);
/* table [1] = a_inv^1 */
rsaz_512_mul(a_inv, base, RR, m, k0);
rsaz_512_scatter4(table, a_inv, 1);
/* table [2] = a_inv^2 */
rsaz_512_sqr(temp, a_inv, m, k0, 1);
rsaz_512_scatter4(table, temp, 2);
for (index = 3; index < 16; index++)
rsaz_512_mul_scatter4(temp, a_inv, m, k0, table, index);
/* load first window */
wvalue = p_str[63];
rsaz_512_gather4(temp, table, wvalue >> 4);
rsaz_512_sqr(temp, temp, m, k0, 4);
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0xf);
for (index = 62; index >= 0; index--) {
wvalue = p_str[index];
rsaz_512_sqr(temp, temp, m, k0, 4);
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue >> 4);
rsaz_512_sqr(temp, temp, m, k0, 4);
rsaz_512_mul_gather4(temp, temp, table, m, k0, wvalue & 0x0f);
}
/* from Montgomery */
rsaz_512_mul_by_one(result, temp, m, k0);
OPENSSL_cleanse(storage, sizeof(storage));
}
#else
# if defined(PEDANTIC) || defined(__DECC) || defined(__clang__)
static void *dummy = &dummy;
# endif
#endif

56
crypto/bn/rsaz_exp.h Normal file
View File

@@ -0,0 +1,56 @@
/******************************************************************************
* Copyright(c) 2012, Intel Corp.
* Developers and authors:
* Shay Gueron (1, 2), and Vlad Krasnov (1)
* (1) Intel Corporation, Israel Development Center, Haifa, Israel
* (2) University of Haifa, Israel
******************************************************************************
* LICENSE:
* This submission to OpenSSL is to be made available under the OpenSSL
* license, and only to the OpenSSL project, in order to allow integration
* into the publicly distributed code.
* The use of this code, or portions of this code, or concepts embedded in
* this code, or modification of this code and/or algorithm(s) in it, or the
* use of this code for any other purpose than stated above, requires special
* licensing.
******************************************************************************
* DISCLAIMER:
* THIS SOFTWARE IS PROVIDED BY THE CONTRIBUTORS AND THE COPYRIGHT OWNERS
* ``AS IS''. ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS OR THE COPYRIGHT
* OWNERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
******************************************************************************/
#ifndef RSAZ_EXP_H
# define RSAZ_EXP_H
# undef RSAZ_ENABLED
# if defined(OPENSSL_BN_ASM_MONT) && \
(defined(__x86_64) || defined(__x86_64__) || \
defined(_M_AMD64) || defined(_M_X64))
# define RSAZ_ENABLED
# include <openssl/bn.h>
void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16],
const BN_ULONG base_norm[16],
const BN_ULONG exponent[16],
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
BN_ULONG k0);
int rsaz_avx2_eligible();
void RSAZ_512_mod_exp(BN_ULONG result[8],
const BN_ULONG base_norm[8], const BN_ULONG exponent[8],
const BN_ULONG m_norm[8], BN_ULONG k0,
const BN_ULONG RR[8]);
# endif
#endif

View File

@@ -7,7 +7,7 @@
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
@@ -60,9 +60,9 @@
bn_div_words_abort(int i)
{
#ifdef BN_DEBUG
#if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16)
fprintf(stderr,"Division would overflow (%d)\n",i);
#endif
abort();
# if !defined(OPENSSL_NO_STDIO) && !defined(OPENSSL_SYS_WIN16)
fprintf(stderr, "Division would overflow (%d)\n", i);
# endif
abort();
#endif
}

Binary file not shown.

View File

@@ -1,452 +0,0 @@
.file "asm/x86-mont.s"
.text
.globl _bn_mul_mont
.align 4
_bn_mul_mont:
L_bn_mul_mont_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
xorl %eax,%eax
movl 40(%esp),%edi
cmpl $4,%edi
jl L000just_leave
leal 20(%esp),%esi
leal 24(%esp),%edx
movl %esp,%ebp
addl $2,%edi
negl %edi
leal -32(%esp,%edi,4),%esp
negl %edi
movl %esp,%eax
subl %edx,%eax
andl $2047,%eax
subl %eax,%esp
xorl %esp,%edx
andl $2048,%edx
xorl $2048,%edx
subl %edx,%esp
andl $-64,%esp
movl (%esi),%eax
movl 4(%esi),%ebx
movl 8(%esi),%ecx
movl 12(%esi),%edx
movl 16(%esi),%esi
movl (%esi),%esi
movl %eax,4(%esp)
movl %ebx,8(%esp)
movl %ecx,12(%esp)
movl %edx,16(%esp)
movl %esi,20(%esp)
leal -3(%edi),%ebx
movl %ebp,24(%esp)
leal _OPENSSL_ia32cap_P,%eax
btl $26,(%eax)
jnc L001non_sse2
movl $-1,%eax
movd %eax,%mm7
movl 8(%esp),%esi
movl 12(%esp),%edi
movl 16(%esp),%ebp
xorl %edx,%edx
xorl %ecx,%ecx
movd (%edi),%mm4
movd (%esi),%mm5
movd (%ebp),%mm3
pmuludq %mm4,%mm5
movq %mm5,%mm2
movq %mm5,%mm0
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
incl %ecx
.align 4,0x90
L0021st:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
leal 1(%ecx),%ecx
cmpl %ebx,%ecx
jl L0021st
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm2,%mm3
movq %mm3,32(%esp,%ebx,4)
incl %edx
L003outer:
xorl %ecx,%ecx
movd (%edi,%edx,4),%mm4
movd (%esi),%mm5
movd 32(%esp),%mm6
movd (%ebp),%mm3
pmuludq %mm4,%mm5
paddq %mm6,%mm5
movq %mm5,%mm0
movq %mm5,%mm2
pand %mm7,%mm0
pmuludq 20(%esp),%mm5
pmuludq %mm5,%mm3
paddq %mm0,%mm3
movd 36(%esp),%mm6
movd 4(%ebp),%mm1
movd 4(%esi),%mm0
psrlq $32,%mm2
psrlq $32,%mm3
paddq %mm6,%mm2
incl %ecx
decl %ebx
L004inner:
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
movd 36(%esp,%ecx,4),%mm6
pand %mm7,%mm0
movd 4(%ebp,%ecx,4),%mm1
paddq %mm0,%mm3
movd 4(%esi,%ecx,4),%mm0
psrlq $32,%mm2
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm3
paddq %mm6,%mm2
decl %ebx
leal 1(%ecx),%ecx
jnz L004inner
movl %ecx,%ebx
pmuludq %mm4,%mm0
pmuludq %mm5,%mm1
paddq %mm0,%mm2
paddq %mm1,%mm3
movq %mm2,%mm0
pand %mm7,%mm0
paddq %mm0,%mm3
movd %mm3,28(%esp,%ecx,4)
psrlq $32,%mm2
psrlq $32,%mm3
movd 36(%esp,%ebx,4),%mm6
paddq %mm2,%mm3
paddq %mm6,%mm3
movq %mm3,32(%esp,%ebx,4)
leal 1(%edx),%edx
cmpl %ebx,%edx
jle L003outer
emms
jmp L005common_tail
.align 4,0x90
L001non_sse2:
movl 8(%esp),%esi
leal 1(%ebx),%ebp
movl 12(%esp),%edi
xorl %ecx,%ecx
movl %esi,%edx
andl $1,%ebp
subl %edi,%edx
leal 4(%edi,%ebx,4),%eax
orl %edx,%ebp
movl (%edi),%edi
jz L006bn_sqr_mont
movl %eax,28(%esp)
movl (%esi),%eax
xorl %edx,%edx
.align 4,0x90
L007mull:
movl %edx,%ebp
mull %edi
addl %eax,%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
movl (%esi,%ecx,4),%eax
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L007mull
movl %edx,%ebp
mull %edi
movl 20(%esp),%edi
addl %ebp,%eax
movl 16(%esp),%esi
adcl $0,%edx
imull 32(%esp),%edi
movl %eax,32(%esp,%ebx,4)
xorl %ecx,%ecx
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
movl (%esi),%eax
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
incl %ecx
jmp L0082ndmadd
.align 4,0x90
L0091stmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,28(%esp,%ecx,4)
jl L0091stmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
addl %eax,%ebp
adcl $0,%edx
imull 32(%esp),%edi
xorl %ecx,%ecx
addl 36(%esp,%ebx,4),%edx
movl %ebp,32(%esp,%ebx,4)
adcl $0,%ecx
movl (%esi),%eax
movl %edx,36(%esp,%ebx,4)
movl %ecx,40(%esp,%ebx,4)
mull %edi
addl 32(%esp),%eax
movl 4(%esi),%eax
adcl $0,%edx
movl $1,%ecx
.align 4,0x90
L0082ndmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0082ndmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
xorl %eax,%eax
movl 12(%esp),%ecx
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
leal 4(%ecx),%ecx
movl %edx,32(%esp,%ebx,4)
cmpl 28(%esp),%ecx
movl %eax,36(%esp,%ebx,4)
je L005common_tail
movl (%ecx),%edi
movl 8(%esp),%esi
movl %ecx,12(%esp)
xorl %ecx,%ecx
xorl %edx,%edx
movl (%esi),%eax
jmp L0091stmadd
.align 4,0x90
L006bn_sqr_mont:
movl %ebx,(%esp)
movl %ecx,12(%esp)
movl %edi,%eax
mull %edi
movl %eax,32(%esp)
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
incl %ecx
.align 4,0x90
L010sqr:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal 1(%ecx),%ecx
adcl $0,%edx
leal (%ebx,%eax,2),%ebp
shrl $31,%eax
cmpl (%esp),%ecx
movl %eax,%ebx
movl %ebp,28(%esp,%ecx,4)
jl L010sqr
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
movl 20(%esp),%edi
adcl $0,%edx
movl 16(%esp),%esi
leal (%ebx,%eax,2),%ebp
imull 32(%esp),%edi
shrl $31,%eax
movl %ebp,32(%esp,%ecx,4)
leal (%eax,%edx,2),%ebp
movl (%esi),%eax
shrl $31,%edx
movl %ebp,36(%esp,%ecx,4)
movl %edx,40(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
movl %ecx,%ebx
adcl $0,%edx
movl 4(%esi),%eax
movl $1,%ecx
.align 4,0x90
L0113rdmadd:
movl %edx,%ebp
mull %edi
addl 32(%esp,%ecx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
movl 4(%esi,%ecx,4),%eax
adcl $0,%edx
movl %ebp,28(%esp,%ecx,4)
movl %edx,%ebp
mull %edi
addl 36(%esp,%ecx,4),%ebp
leal 2(%ecx),%ecx
adcl $0,%edx
addl %eax,%ebp
movl (%esi,%ecx,4),%eax
adcl $0,%edx
cmpl %ebx,%ecx
movl %ebp,24(%esp,%ecx,4)
jl L0113rdmadd
movl %edx,%ebp
mull %edi
addl 32(%esp,%ebx,4),%ebp
adcl $0,%edx
addl %eax,%ebp
adcl $0,%edx
movl %ebp,28(%esp,%ebx,4)
movl 12(%esp),%ecx
xorl %eax,%eax
movl 8(%esp),%esi
addl 36(%esp,%ebx,4),%edx
adcl 40(%esp,%ebx,4),%eax
movl %edx,32(%esp,%ebx,4)
cmpl %ebx,%ecx
movl %eax,36(%esp,%ebx,4)
je L005common_tail
movl 4(%esi,%ecx,4),%edi
leal 1(%ecx),%ecx
movl %edi,%eax
movl %ecx,12(%esp)
mull %edi
addl 32(%esp,%ecx,4),%eax
adcl $0,%edx
movl %eax,32(%esp,%ecx,4)
xorl %ebp,%ebp
cmpl %ebx,%ecx
leal 1(%ecx),%ecx
je L012sqrlast
movl %edx,%ebx
shrl $1,%edx
andl $1,%ebx
.align 4,0x90
L013sqradd:
movl (%esi,%ecx,4),%eax
movl %edx,%ebp
mull %edi
addl %ebp,%eax
leal (%eax,%eax,1),%ebp
adcl $0,%edx
shrl $31,%eax
addl 32(%esp,%ecx,4),%ebp
leal 1(%ecx),%ecx
adcl $0,%eax
addl %ebx,%ebp
adcl $0,%eax
cmpl (%esp),%ecx
movl %ebp,28(%esp,%ecx,4)
movl %eax,%ebx
jle L013sqradd
movl %edx,%ebp
leal (%ebx,%edx,2),%edx
shrl $31,%ebp
L012sqrlast:
movl 20(%esp),%edi
movl 16(%esp),%esi
imull 32(%esp),%edi
addl 32(%esp,%ecx,4),%edx
movl (%esi),%eax
adcl $0,%ebp
movl %edx,32(%esp,%ecx,4)
movl %ebp,36(%esp,%ecx,4)
mull %edi
addl 32(%esp),%eax
leal -1(%ecx),%ebx
adcl $0,%edx
movl $1,%ecx
movl 4(%esi),%eax
jmp L0113rdmadd
.align 4,0x90
L005common_tail:
movl 16(%esp),%ebp
movl 4(%esp),%edi
leal 32(%esp),%esi
movl (%esi),%eax
movl %ebx,%ecx
xorl %edx,%edx
.align 4,0x90
L014sub:
sbbl (%ebp,%edx,4),%eax
movl %eax,(%edi,%edx,4)
decl %ecx
movl 4(%esi,%edx,4),%eax
leal 1(%edx),%edx
jge L014sub
sbbl $0,%eax
andl %eax,%esi
notl %eax
movl %edi,%ebp
andl %eax,%ebp
orl %ebp,%esi
.align 4,0x90
L015copy:
movl (%esi,%ebx,4),%eax
movl %eax,(%edi,%ebx,4)
movl %ecx,32(%esp,%ebx,4)
decl %ebx
jge L015copy
movl 24(%esp),%esp
movl $1,%eax
L000just_leave:
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
.byte 111,114,103,62,0
.comm _OPENSSL_ia32cap_P,4