提交 4d524040 编写于 作者: A Andy Polyakov

Change bn_mul_mont declaration and BN_MONT_CTX. Update CHANGES.

上级 0fe120ba
...@@ -4,6 +4,12 @@ ...@@ -4,6 +4,12 @@
Changes between 0.9.8a and 0.9.9 [xx XXX xxxx] Changes between 0.9.8a and 0.9.9 [xx XXX xxxx]
*) New candidate for BIGNUM assembler implementation, bn_mul_mont,
dedicated Montgomery multiplication procedure, is introduced.
BN_MONT_CTX is modified to allow bn_mul_mont to reach for higher
"64-bit" performance on certain 32-bit targets.
[Andy Polyakov]
*) New option SSL_OP_NO_COMP to disable use of compression selectively *) New option SSL_OP_NO_COMP to disable use of compression selectively
in SSL structures. New SSL ctrl to set maximum send fragment size. in SSL structures. New SSL ctrl to set maximum send fragment size.
Save memory by seeting the I/O buffer sizes dynamically instead of Save memory by seeting the I/O buffer sizes dynamically instead of
......
...@@ -70,7 +70,7 @@ $rp="%i0"; # BN_ULONG *rp, ...@@ -70,7 +70,7 @@ $rp="%i0"; # BN_ULONG *rp,
$ap="%i1"; # const BN_ULONG *ap, $ap="%i1"; # const BN_ULONG *ap,
$bp="%i2"; # const BN_ULONG *bp, $bp="%i2"; # const BN_ULONG *bp,
$np="%i3"; # const BN_ULONG *np, $np="%i3"; # const BN_ULONG *np,
$n0="%i4"; # BN_ULONG n0, $n0="%i4"; # const BN_ULONG *n0,
$num="%i5"; # int num); $num="%i5"; # int num);
$tp="%l0"; $tp="%l0";
...@@ -125,7 +125,7 @@ $fname: ...@@ -125,7 +125,7 @@ $fname:
sethi %hi(0xffff),$mask sethi %hi(0xffff),$mask
sll $num,3,$num ! num*=8 sll $num,3,$num ! num*=8
or $mask,%lo(0xffff),$mask or $mask,%lo(0xffff),$mask
mov %i4,$n0 ! reassigned, remember? ldx [%i4],$n0 ! reassigned, remember?
add %sp,$bias,%o0 ! real top of stack add %sp,$bias,%o0 ! real top of stack
sll $num,2,%o1 sll $num,2,%o1
......
...@@ -66,7 +66,7 @@ if($sse2) { ...@@ -66,7 +66,7 @@ if($sse2) {
&mov ("ebx",&wparam(1)); # const BN_ULONG *ap &mov ("ebx",&wparam(1)); # const BN_ULONG *ap
&mov ("ecx",&wparam(2)); # const BN_ULONG *bp &mov ("ecx",&wparam(2)); # const BN_ULONG *bp
&mov ("edx",&wparam(3)); # const BN_ULONG *np &mov ("edx",&wparam(3)); # const BN_ULONG *np
&mov ("esi",&wparam(4)); # BN_ULONG n0 &mov ("esi",&wparam(4)); # const BN_ULONG *n0
&mov ($num,&wparam(5)); # int num &mov ($num,&wparam(5)); # int num
&mov ("edi","esp"); # saved stack pointer! &mov ("edi","esp"); # saved stack pointer!
...@@ -78,6 +78,7 @@ if($sse2) { ...@@ -78,6 +78,7 @@ if($sse2) {
&sub ($num,1); # num is restored to its original value &sub ($num,1); # num is restored to its original value
# and will remain constant from now... # and will remain constant from now...
&mov ("esi",&DWP(0,"esi")); # pull n0[0]
&mov ($_rp,"eax"); # ... save a copy of argument block &mov ($_rp,"eax"); # ... save a copy of argument block
&mov ($_ap,"ebx"); &mov ($_ap,"ebx");
&mov ($_bp,"ecx"); &mov ($_bp,"ecx");
......
...@@ -22,7 +22,7 @@ $rp="%rdi"; # BN_ULONG *rp, ...@@ -22,7 +22,7 @@ $rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap, $ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp, $bp="%rdx"; # const BN_ULONG *bp,
$np="%rcx"; # const BN_ULONG *np, $np="%rcx"; # const BN_ULONG *np,
$n0="%r8"; # BN_ULONG n0, $n0="%r8"; # const BN_ULONG *n0,
$num="%r9"; # int num); $num="%r9"; # int num);
$lo0="%r10"; $lo0="%r10";
$hi0="%r11"; $hi0="%r11";
...@@ -55,6 +55,8 @@ bn_mul_mont: ...@@ -55,6 +55,8 @@ bn_mul_mont:
mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
mov %rdx,$bp # $bp reassigned, remember? mov %rdx,$bp # $bp reassigned, remember?
mov ($n0),$n0 # pull n0[0] value
xor $i,$i # i=0 xor $i,$i # i=0
xor $j,$j # j=0 xor $j,$j # j=0
......
...@@ -295,7 +295,7 @@ struct bn_mont_ctx_st ...@@ -295,7 +295,7 @@ struct bn_mont_ctx_st
BIGNUM N; /* The modulus */ BIGNUM N; /* The modulus */
BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
* (Ni is only stored for bignum algorithm) */ * (Ni is only stored for bignum algorithm) */
BN_ULONG n0; /* least significant word of Ni */ BN_ULONG n0[2];/* least significant word(s) of Ni */
int flags; int flags;
}; };
...@@ -729,7 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); ...@@ -729,7 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);
bn_pollute(a); \ bn_pollute(a); \
} }
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
......
...@@ -842,9 +842,9 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) ...@@ -842,9 +842,9 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
* versions. Assembler vs. assembler improvement coefficients can * versions. Assembler vs. assembler improvement coefficients can
* [and are known to] differ and are to be documented elsewhere. * [and are known to] differ and are to be documented elsewhere.
*/ */
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
{ {
BN_ULONG c0,c1,ml,*tp; BN_ULONG c0,c1,ml,*tp,n0;
#ifdef mul64 #ifdef mul64
BN_ULONG mh; BN_ULONG mh;
#endif #endif
...@@ -852,10 +852,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U ...@@ -852,10 +852,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
int i=0,j; int i=0,j;
#if 0 /* template for platform-specific implementation */ #if 0 /* template for platform-specific implementation */
if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num); if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
#endif #endif
vp = tp = alloca((num+2)*sizeof(BN_ULONG)); vp = tp = alloca((num+2)*sizeof(BN_ULONG));
n0 = *n0p;
tp[num] = bn_mul_words(tp,ap,num,bp[0]); tp[num] = bn_mul_words(tp,ap,num,bp[0]);
tp[num+1] = 0; tp[num+1] = 0;
goto enter; goto enter;
......
...@@ -152,7 +152,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) ...@@ -152,7 +152,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
#endif #endif
r->top=max; r->top=max;
n0=mont->n0; n0=mont->n0[0];
#ifdef BN_COUNT #ifdef BN_COUNT
fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
...@@ -323,16 +323,49 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) ...@@ -323,16 +323,49 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
BIGNUM tmod; BIGNUM tmod;
BN_ULONG buf[2]; BN_ULONG buf[2];
tmod.d=buf;
tmod.dmax=2;
tmod.neg=0;
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
BN_zero(R);
if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
tmod.top=0;
if (buf[0] = mod->d[0]) tmod.top=1;
if (buf[1] = mod->top>1 ? mod->d[1] : 0) tmod.top=2;
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
if (!BN_is_zero(Ri))
{
if (!BN_sub_word(Ri,1)) goto err;
}
else /* if N mod word size == 1 */
{
if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
goto err;
/* Ri-- (mod double word size) */
Ri->neg=0;
Ri->d[0]=BN_MASK2;
Ri->d[1]=BN_MASK2;
Ri->top=2;
}
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only couple of least significant words: */
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
#else
BN_zero(R); BN_zero(R);
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
buf[0]=mod->d[0]; /* tmod = N mod word size */ buf[0]=mod->d[0]; /* tmod = N mod word size */
buf[1]=0; buf[1]=0;
tmod.d=buf;
tmod.top = buf[0] != 0 ? 1 : 0; tmod.top = buf[0] != 0 ? 1 : 0;
tmod.dmax=2;
tmod.neg=0;
/* Ri = R^-1 mod N*/ /* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err; goto err;
...@@ -348,7 +381,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) ...@@ -348,7 +381,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N, /* Ni = (R*Ri-1)/N,
* keep only least significant word: */ * keep only least significant word: */
mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = 0;
#endif
} }
#else /* !MONT_WORD */ #else /* !MONT_WORD */
{ /* bignum version */ { /* bignum version */
...@@ -384,7 +419,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) ...@@ -384,7 +419,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
if (!BN_copy(&(to->N),&(from->N))) return NULL; if (!BN_copy(&(to->N),&(from->N))) return NULL;
if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
to->ri=from->ri; to->ri=from->ri;
to->n0=from->n0; to->n0[0]=from->n0[0];
to->n0[1]=from->n0[1];
return(to); return(to);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册