提交 4d524040 编写于 作者: A Andy Polyakov

Change bn_mul_mont declaration and BN_MONT_CTX. Update CHANGES.

上级 0fe120ba
......@@ -4,6 +4,12 @@
Changes between 0.9.8a and 0.9.9 [xx XXX xxxx]
*) New candidate for BIGNUM assembler implementation, bn_mul_mont,
dedicated Montgomery multiplication procedure, is introduced.
BN_MONT_CTX is modified to allow bn_mul_mont to reach for higher
"64-bit" performance on certain 32-bit targets.
[Andy Polyakov]
*) New option SSL_OP_NO_COMP to disable use of compression selectively
in SSL structures. New SSL ctrl to set maximum send fragment size.
Save memory by seeting the I/O buffer sizes dynamically instead of
......
......@@ -70,7 +70,7 @@ $rp="%i0"; # BN_ULONG *rp,
$ap="%i1"; # const BN_ULONG *ap,
$bp="%i2"; # const BN_ULONG *bp,
$np="%i3"; # const BN_ULONG *np,
$n0="%i4"; # BN_ULONG n0,
$n0="%i4"; # const BN_ULONG *n0,
$num="%i5"; # int num);
$tp="%l0";
......@@ -125,7 +125,7 @@ $fname:
sethi %hi(0xffff),$mask
sll $num,3,$num ! num*=8
or $mask,%lo(0xffff),$mask
mov %i4,$n0 ! reassigned, remember?
ldx [%i4],$n0 ! reassigned, remember?
add %sp,$bias,%o0 ! real top of stack
sll $num,2,%o1
......
......@@ -66,7 +66,7 @@ if($sse2) {
&mov ("ebx",&wparam(1)); # const BN_ULONG *ap
&mov ("ecx",&wparam(2)); # const BN_ULONG *bp
&mov ("edx",&wparam(3)); # const BN_ULONG *np
&mov ("esi",&wparam(4)); # BN_ULONG n0
&mov ("esi",&wparam(4)); # const BN_ULONG *n0
&mov ($num,&wparam(5)); # int num
&mov ("edi","esp"); # saved stack pointer!
......@@ -78,6 +78,7 @@ if($sse2) {
&sub ($num,1); # num is restored to its original value
# and will remain constant from now...
&mov ("esi",&DWP(0,"esi")); # pull n0[0]
&mov ($_rp,"eax"); # ... save a copy of argument block
&mov ($_ap,"ebx");
&mov ($_bp,"ecx");
......
......@@ -22,7 +22,7 @@ $rp="%rdi"; # BN_ULONG *rp,
$ap="%rsi"; # const BN_ULONG *ap,
$bp="%rdx"; # const BN_ULONG *bp,
$np="%rcx"; # const BN_ULONG *np,
$n0="%r8"; # BN_ULONG n0,
$n0="%r8"; # const BN_ULONG *n0,
$num="%r9"; # int num);
$lo0="%r10";
$hi0="%r11";
......@@ -55,6 +55,8 @@ bn_mul_mont:
mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp
mov %rdx,$bp # $bp reassigned, remember?
mov ($n0),$n0 # pull n0[0] value
xor $i,$i # i=0
xor $j,$j # j=0
......
......@@ -295,7 +295,7 @@ struct bn_mont_ctx_st
BIGNUM N; /* The modulus */
BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1
* (Ni is only stored for bignum algorithm) */
BN_ULONG n0; /* least significant word of Ni */
BN_ULONG n0[2];/* least significant word(s) of Ni */
int flags;
};
......@@ -729,7 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num);
bn_pollute(a); \
}
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num);
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w);
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num);
......
......@@ -842,9 +842,9 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a)
* versions. Assembler vs. assembler improvement coefficients can
* [and are known to] differ and are to be documented elsewhere.
*/
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num)
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num)
{
BN_ULONG c0,c1,ml,*tp;
BN_ULONG c0,c1,ml,*tp,n0;
#ifdef mul64
BN_ULONG mh;
#endif
......@@ -852,10 +852,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
int i=0,j;
#if 0 /* template for platform-specific implementation */
if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num);
if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num);
#endif
vp = tp = alloca((num+2)*sizeof(BN_ULONG));
n0 = *n0p;
tp[num] = bn_mul_words(tp,ap,num,bp[0]);
tp[num+1] = 0;
goto enter;
......
......@@ -152,7 +152,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont)
#endif
r->top=max;
n0=mont->n0;
n0=mont->n0[0];
#ifdef BN_COUNT
fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl);
......@@ -323,16 +323,49 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
BIGNUM tmod;
BN_ULONG buf[2];
tmod.d=buf;
tmod.dmax=2;
tmod.neg=0;
mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2;
#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32)
BN_zero(R);
if (!(BN_set_bit(R,2*BN_BITS2))) goto err;
tmod.top=0;
if (buf[0] = mod->d[0]) tmod.top=1;
if (buf[1] = mod->top>1 ? mod->d[1] : 0) tmod.top=2;
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */
if (!BN_is_zero(Ri))
{
if (!BN_sub_word(Ri,1)) goto err;
}
else /* if N mod word size == 1 */
{
if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL)
goto err;
/* Ri-- (mod double word size) */
Ri->neg=0;
Ri->d[0]=BN_MASK2;
Ri->d[1]=BN_MASK2;
Ri->top=2;
}
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only couple of least significant words: */
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0;
#else
BN_zero(R);
if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */
buf[0]=mod->d[0]; /* tmod = N mod word size */
buf[1]=0;
tmod.d=buf;
tmod.top = buf[0] != 0 ? 1 : 0;
tmod.dmax=2;
tmod.neg=0;
/* Ri = R^-1 mod N*/
if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL)
goto err;
......@@ -348,7 +381,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx)
if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err;
/* Ni = (R*Ri-1)/N,
* keep only least significant word: */
mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0;
mont->n0[1] = 0;
#endif
}
#else /* !MONT_WORD */
{ /* bignum version */
......@@ -384,7 +419,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from)
if (!BN_copy(&(to->N),&(from->N))) return NULL;
if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL;
to->ri=from->ri;
to->n0=from->n0;
to->n0[0]=from->n0[0];
to->n0[1]=from->n0[1];
return(to);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册