提交 e14d4443 编写于 作者: U Ulf Möller

Bignum library bug fix. IRIX 6 passes "make test" now!

This also avoids the problems with SC4.2 and unpatched SC5.

Submitted by: Andy Polyakov <appro@fy.chalmers.se>
上级 257e206d
...@@ -10,6 +10,10 @@ ...@@ -10,6 +10,10 @@
[23-Dec-1998] down below; but in later [23-Dec-1998] down below; but in later
versions, these hyphens are gone.] versions, these hyphens are gone.]
*) Bignum library bug fix. IRIX 6 passes "make test" now!
This also avoids the problems with SC4.2 and unpatched SC5.
[Andy Polyakov <appro@fy.chalmers.se>]
*) New functions sk_num, sk_value and sk_set to replace the previous macros. *) New functions sk_num, sk_value and sk_set to replace the previous macros.
These are required because of the typesafe stack would otherwise break These are required because of the typesafe stack would otherwise break
existing code. If old code used a structure member which used to be STACK existing code. If old code used a structure member which used to be STACK
......
...@@ -112,15 +112,12 @@ my %table=( ...@@ -112,15 +112,12 @@ my %table=(
"debug-solaris-usparc-gcc","gcc:-O3 -g -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::", "debug-solaris-usparc-gcc","gcc:-O3 -g -mcpu=ultrasparc -Wall -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_UNROLL BF_PTR:asm/sparcv8plus-gcc.o::",
# DO NOT use /xO[34] on sparc with SC3.0. It is broken, and will not pass the tests # DO NOT use /xO[34] on sparc with SC3.0. It is broken, and will not pass the tests
"solaris-sparc-cc","cc:-fast -O -Xa -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_UNROLL BF_PTR:::", "solaris-sparc-sc3","cc:-fast -O -Xa -DB_ENDIAN:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_UNROLL BF_PTR:::",
# SC4 is ok, better than gcc even on bn as long as you tell it -xarch=v8 # SC4 is ok, better than gcc even on bn as long as you tell it -xarch=v8
# -fast slows things like DES down quite a lot # -fast slows things like DES down quite a lot
# Don't use -xtarget=ultra with SC4.2. It is broken, and will break exptest. "solaris-sparc-cc","cc:-xarch=v8 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::",
"solaris-sparc-sc4","cc:-xarch=v8 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8.o::", "solaris-usparc-cc","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o:::asm/md5-sparcv8plus.o:",
"solaris-usparc-sc4","cc:-xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o::", "solaris64-usparc-cc","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::::asm/md5-sparcv9.o:",
# SC5.0 note: Compiler common patch 107357-01 or later is required!
"solaris-usparc-sc5","cc:-xtarget=ultra -xarch=v8plus -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC -DBN_DIV2W:-D_REENTRANT:-lsocket -lnsl:BN_LLONG RC4_CHAR DES_PTR DES_RISC1 DES_UNROLL BF_PTR:asm/sparcv8plus.o:::asm/md5-sparcv8plus.o:",
"solaris64-usparc-sc5","cc:-xtarget=ultra -xarch=v9 -xstrconst -xO5 -xdepend -Xa -DB_ENDIAN -DULTRASPARC:-D_REENTRANT:-lsocket -lnsl:SIXTY_FOUR_BIT_LONG RC4_CHAR DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR::::asm/md5-sparcv9.o:",
# Sunos configs, assuming sparc for the gcc one. # Sunos configs, assuming sparc for the gcc one.
##"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST:(unknown)::DES_UNROLL:::", ##"sunos-cc", "cc:-O4 -DNOPROTO -DNOCONST:(unknown)::DES_UNROLL:::",
...@@ -133,12 +130,12 @@ my %table=( ...@@ -133,12 +130,12 @@ my %table=(
# 3 times faster, use if at all possible. # 3 times faster, use if at all possible.
#"irix-gcc","gcc:-O2 -mips2::SIXTY_FOUR_BIT BN_LLONG RC4_INDEX RC4_CHAR:::", #"irix-gcc","gcc:-O2 -mips2::SIXTY_FOUR_BIT BN_LLONG RC4_INDEX RC4_CHAR:::",
"irix-gcc","gcc:-O2 -DTERMIOS -DB_ENDIAN:(unknown)::BN_LLONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC2 DES_PTR BF_PTR:::", "irix-gcc","gcc:-O2 -DTERMIOS -DB_ENDIAN:(unknown)::BN_LLONG MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC2 DES_PTR BF_PTR:::",
"irix64-gcc","gcc:-mips3 -O2 -DTERMIOS -DB_ENDIAN:(unknown)::MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC2 DES_PTR BF_PTR SIXTY_FOUR_BIT:::",
"irix-cc", "cc:-O2 -use_readonly_const -DTERMIOS -DB_ENDIAN:(unknown)::BN_LLONG DES_PTR DES_RISC2 DES_UNROLL BF_PTR:::", "irix-cc", "cc:-O2 -use_readonly_const -DTERMIOS -DB_ENDIAN:(unknown)::BN_LLONG DES_PTR DES_RISC2 DES_UNROLL BF_PTR:::",
"irix64-cc", "cc:-O2 -use_readonly_const -DTERMIOS -DB_ENDIAN:(unknown)::DES_PTR DES_RISC2 DES_UNROLL BF_PTR SIXTY_FOUR_BIT:::", "irix-mips3-gcc","gcc:-mips3 -O2 -DTERMIOS -DB_ENDIAN:(unknown)::MD2_CHAR RC4_INDEX RC4_CHAR DES_UNROLL DES_RISC2 DES_PTR BF_PTR SIXTY_FOUR_BIT:::",
"irix-mips3-cc", "cc:-n32 -mips3 -O2 -use_readonly_const -DTERMIOS -DB_ENDIAN:(unknown)::DES_PTR DES_RISC2 DES_UNROLL BF_PTR SIXTY_FOUR_BIT:::",
"debug-irix-cc", "cc:-w2 -g -DCRYPTO_MDEBUG -DTERMIOS -DB_ENDIAN:(unknown):::::", "debug-irix-cc", "cc:-w2 -g -DCRYPTO_MDEBUG -DTERMIOS -DB_ENDIAN:(unknown):::::",
# This is the n64 mode build. # This is the n64 mode build. (Untested!)
"irix-n64-cc", "cc:-64 -O2 -use_readonly_const -DTERMIOS:(unknown)::DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT:::", "irix64-mips4-cc", "cc:-64 -mips4 -O2 -use_readonly_const -DTERMIOS:(unknown)::DES_RISC2 DES_UNROLL SIXTY_FOUR_BIT:::",
# HPUX 9.X config. # HPUX 9.X config.
# Don't use the bundled cc. It is broken. Use HP ANSI C if possible, or gcc. # Don't use the bundled cc. It is broken. Use HP ANSI C if possible, or gcc.
......
OpenSSL STATUS Last modified at OpenSSL STATUS Last modified at
______________ $Date: 1999/05/18 08:52:01 $ ______________ $Date: 1999/05/20 01:42:57 $
DEVELOPMENT STATE DEVELOPMENT STATE
...@@ -14,18 +14,9 @@ ...@@ -14,18 +14,9 @@
o OpenSSL 0.9.2b: Released on March 22th, 1999 o OpenSSL 0.9.2b: Released on March 22th, 1999
o OpenSSL 0.9.1c: Released on December 23th, 1998 o OpenSSL 0.9.1c: Released on December 23th, 1998
[ Proposed new numbering scheme: <major>.<minor>[<patchlevel>]
0.9.1c is 0913
1.0 is 010000
1.0 a is 010001
1.8 z is 01081a ]
RELEASE SHOWSTOPPERS RELEASE SHOWSTOPPERS
o BSD/OS: assembler functions must not have leading underscores o BSD/OS: assembler functions must not have leading underscores
o exptest and rsa_oaep_test fail with irix64-*
(Don Badrak <dbadrak@geo.census.gov>: "Re: Problems to compile openssl
on IRIX 6.2", openssl-users)
AVAILABLE PATCHES AVAILABLE PATCHES
......
...@@ -286,9 +286,9 @@ else ...@@ -286,9 +286,9 @@ else
if [ "$SYSTEM" = "SunOS" ] if [ "$SYSTEM" = "SunOS" ]
then then
case `cc -V 2>&1` in case `cc -V 2>&1` in
*4*) CC=sc4;; *4*) CC=cc;;
*5*) CC=sc5;; *5*) CC=cc;;
*) CC=cc;; *) CC=sc3;;
esac esac
fi fi
fi fi
......
...@@ -119,11 +119,11 @@ extern "C" { ...@@ -119,11 +119,11 @@ extern "C" {
/* This is where the long long data type is 64 bits, but long is 32. /* This is where the long long data type is 64 bits, but long is 32.
* For machines where there are 64bit registers, this is the mode to use. * For machines where there are 64bit registers, this is the mode to use.
* IRIX, on R4000 and above should use this mode, along with the relevent * IRIX, on R4000 and above should use this mode, along with the relevent
* assember code :-). Do NOT define BN_ULLONG. * assember code :-). Do NOT define BN_LLONG.
*/ */
#ifdef SIXTY_FOUR_BIT #ifdef SIXTY_FOUR_BIT
#define BN_LLONG #undef BN_LLONG
/* #define BN_ULLONG unsigned long long */ #undef BN_ULLONG
#define BN_ULONG unsigned long long #define BN_ULONG unsigned long long
#define BN_LONG long long #define BN_LONG long long
#define BN_BITS 128 #define BN_BITS 128
......
...@@ -150,7 +150,7 @@ char *BN_options(void) ...@@ -150,7 +150,7 @@ char *BN_options(void)
int BN_num_bits_word(BN_ULONG l) int BN_num_bits_word(BN_ULONG l)
{ {
static char bits[256]={ static const char bits[256]={
0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4, 0,1,2,2,3,3,3,3,4,4,4,4,4,4,4,4,
5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
...@@ -343,8 +343,9 @@ void BN_CTX_free(BN_CTX *c) ...@@ -343,8 +343,9 @@ void BN_CTX_free(BN_CTX *c)
BIGNUM *bn_expand2(BIGNUM *b, int words) BIGNUM *bn_expand2(BIGNUM *b, int words)
{ {
BN_ULONG *A,*B,*a; BN_ULONG *A,*a;
int i,j; const BN_ULONG *B;
int i;
bn_check_top(b); bn_check_top(b);
...@@ -362,15 +363,38 @@ BIGNUM *bn_expand2(BIGNUM *b, int words) ...@@ -362,15 +363,38 @@ BIGNUM *bn_expand2(BIGNUM *b, int words)
BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE); BNerr(BN_F_BN_EXPAND2,ERR_R_MALLOC_FAILURE);
return(NULL); return(NULL);
} }
memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
#if 1 #if 1
B=b->d; B=b->d;
/* Check if the previous number needs to be copied */ /* Check if the previous number needs to be copied */
if (B != NULL) if (B != NULL)
{ {
#if 0
/* This lot is an unrolled loop to copy b->top /* This lot is an unrolled loop to copy b->top
* BN_ULONGs from B to A * BN_ULONGs from B to A
*/ */
/*
* I have nothing against unrolling but it's usually done for
* several reasons, namely:
* - minimize percentage of decision making code, i.e. branches;
* - avoid cache trashing;
* - make it possible to schedule loads earlier;
* Now let's examine the code below. The cornerstone of C is
* "programmer is always right" and that's what we love it for:-)
* For this very reason C compilers have to be paranoid when it
* comes to data aliasing and assume the worst. Yeah, but what
* does it mean in real life? This means that loop body below will
* be compiled to sequence of loads immediately followed by stores
* as compiler assumes the worst, something in A==B+1 style. As a
* result CPU pipeline is going to starve for incoming data. Secondly
* if A and B happen to share same cache line such code is going to
* cause severe cache trashing. Both factors have severe impact on
* performance of modern CPUs and this is the reason why this
* particulare piece of code is #ifdefed away and replaced by more
* "friendly" version found in #else section below. This comment
* also applies to BN_copy function.
*
* <appro@fy.chalmers.se>
*/
for (i=b->top&(~7); i>0; i-=8) for (i=b->top&(~7); i>0; i-=8)
{ {
A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3]; A[0]=B[0]; A[1]=B[1]; A[2]=B[2]; A[3]=B[3];
...@@ -407,6 +431,30 @@ memset(A,0x5c,sizeof(BN_ULONG)*(words+1)); ...@@ -407,6 +431,30 @@ memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
*/ */
; ;
} }
#else
for (i=b->top>>2; i>0; i--,A+=4,B+=4)
{
/*
* The fact that the loop is unrolled
* 4-wise is a tribute to Intel. It's
* the one that doesn't have enough
* registers to accomodate more data.
* I'd unroll it 8-wise otherwise:-)
*
* <appro@fy.chalmers.se>
*/
BN_ULONG a0,a1,a2,a3;
a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
}
switch (b->top&3)
{
case 3: A[2]=B[2];
case 2: A[1]=B[1];
case 1: A[0]=B[0];
case 0: ; /* ultrix cc workaround, see above */
}
#endif
Free(b->d); Free(b->d);
} }
...@@ -415,22 +463,19 @@ memset(A,0x5c,sizeof(BN_ULONG)*(words+1)); ...@@ -415,22 +463,19 @@ memset(A,0x5c,sizeof(BN_ULONG)*(words+1));
/* Now need to zero any data between b->top and b->max */ /* Now need to zero any data between b->top and b->max */
B= &(b->d[b->top]); A= &(b->d[b->top]);
j=(b->max - b->top) & ~7; for (i=(b->max - b->top)>>3; i>0; i--,A+=8)
for (i=0; i<j; i+=8)
{ {
B[0]=0; B[1]=0; B[2]=0; B[3]=0; A[0]=0; A[1]=0; A[2]=0; A[3]=0;
B[4]=0; B[5]=0; B[6]=0; B[7]=0; A[4]=0; A[5]=0; A[6]=0; A[7]=0;
B+=8;
}
j=(b->max - b->top) & 7;
for (i=0; i<j; i++)
{
B[0]=0;
B++;
} }
for (i=(b->max - b->top)&7; i>0; i--,A++)
A[0]=0;
#else #else
memcpy(a->d,b->d,sizeof(b->d[0])*b->top); memset(A,0,sizeof(BN_ULONG)*(words+1));
memcpy(A,b->d,sizeof(b->d[0])*b->top);
b->d=a;
b->max=words;
#endif #endif
/* memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */ /* memset(&(p[b->max]),0,((words+1)-b->max)*sizeof(BN_ULONG)); */
...@@ -454,7 +499,8 @@ BIGNUM *BN_dup(BIGNUM *a) ...@@ -454,7 +499,8 @@ BIGNUM *BN_dup(BIGNUM *a)
BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b) BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b)
{ {
int i; int i;
BN_ULONG *A,*B; BN_ULONG *A;
const BN_ULONG *B;
bn_check_top(b); bn_check_top(b);
...@@ -464,47 +510,18 @@ BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b) ...@@ -464,47 +510,18 @@ BIGNUM *BN_copy(BIGNUM *a, BIGNUM *b)
#if 1 #if 1
A=a->d; A=a->d;
B=b->d; B=b->d;
for (i=b->top&(~7); i>0; i-=8) for (i=b->top>>2; i>0; i--,A+=4,B+=4)
{ {
A[0]=B[0]; BN_ULONG a0,a1,a2,a3;
A[1]=B[1]; a0=B[0]; a1=B[1]; a2=B[2]; a3=B[3];
A[2]=B[2]; A[0]=a0; A[1]=a1; A[2]=a2; A[3]=a3;
A[3]=B[3];
A[4]=B[4];
A[5]=B[5];
A[6]=B[6];
A[7]=B[7];
A+=8;
B+=8;
} }
switch (b->top&7) switch (b->top&3)
{ {
case 7: case 3: A[2]=B[2];
A[6]=B[6]; case 2: A[1]=B[1];
case 6: case 1: A[0]=B[0];
A[5]=B[5]; case 0: ; /* ultrix cc workaround, see comments in bn_expand2 */
case 5:
A[4]=B[4];
case 4:
A[3]=B[3];
case 3:
A[2]=B[2];
case 2:
A[1]=B[1];
case 1:
A[0]=B[0];
case 0:
/* I need the 'case 0' entry for utrix cc.
* If the optimiser is turned on, it does the
* switch table by doing
* a=top&7
* a--;
* goto jump_table[a];
* If top is 0, this makes us jump to 0xffffffc which is
* rather bad :-(.
* eric 23-Apr-1998
*/
;
} }
#else #else
memcpy(a->d,b->d,sizeof(b->d[0])*b->top); memcpy(a->d,b->d,sizeof(b->d[0])*b->top);
...@@ -539,6 +556,8 @@ BN_ULONG BN_get_word(BIGNUM *a) ...@@ -539,6 +556,8 @@ BN_ULONG BN_get_word(BIGNUM *a)
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ #ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
ret<<=BN_BITS4; /* stops the compiler complaining */ ret<<=BN_BITS4; /* stops the compiler complaining */
ret<<=BN_BITS4; ret<<=BN_BITS4;
#else
ret=0;
#endif #endif
ret|=a->d[i]; ret|=a->d[i];
} }
...@@ -563,6 +582,8 @@ int BN_set_word(BIGNUM *a, BN_ULONG w) ...@@ -563,6 +582,8 @@ int BN_set_word(BIGNUM *a, BN_ULONG w)
#ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */ #ifndef SIXTY_FOUR_BIT /* the data item > unsigned long */
w>>=BN_BITS4; w>>=BN_BITS4;
w>>=BN_BITS4; w>>=BN_BITS4;
#else
w=0;
#endif #endif
a->d[i]=(BN_ULONG)w&BN_MASK2; a->d[i]=(BN_ULONG)w&BN_MASK2;
if (a->d[i] != 0) a->top=i+1; if (a->d[i] != 0) a->top=i+1;
...@@ -699,7 +720,7 @@ int BN_set_bit(BIGNUM *a, int n) ...@@ -699,7 +720,7 @@ int BN_set_bit(BIGNUM *a, int n)
a->top=i+1; a->top=i+1;
} }
a->d[i]|=(1L<<j); a->d[i]|=(((BN_ULONG)1)<<j);
return(1); return(1);
} }
...@@ -711,7 +732,7 @@ int BN_clear_bit(BIGNUM *a, int n) ...@@ -711,7 +732,7 @@ int BN_clear_bit(BIGNUM *a, int n)
j=n%BN_BITS2; j=n%BN_BITS2;
if (a->top <= i) return(0); if (a->top <= i) return(0);
a->d[i]&=(~(1L<<j)); a->d[i]&=(~(((BN_ULONG)1)<<j));
bn_fix_top(a); bn_fix_top(a);
return(1); return(1);
} }
......
...@@ -319,7 +319,7 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, BIGNUM *add, BIGNUM *rem, ...@@ -319,7 +319,7 @@ static int probable_prime_dh(BIGNUM *rnd, int bits, BIGNUM *add, BIGNUM *rem,
loop: for (i=1; i<NUMPRIMES; i++) loop: for (i=1; i<NUMPRIMES; i++)
{ {
/* check that rnd is a prime */ /* check that rnd is a prime */
if (BN_mod_word(rnd,(BN_LONG)primes[i]) <= 1) if (BN_mod_word(rnd,(BN_ULONG)primes[i]) <= 1)
{ {
if (!BN_add(rnd,rnd,add)) goto err; if (!BN_add(rnd,rnd,add)) goto err;
goto loop; goto loop;
...@@ -366,8 +366,8 @@ static int probable_prime_dh_strong(BIGNUM *p, int bits, BIGNUM *padd, ...@@ -366,8 +366,8 @@ static int probable_prime_dh_strong(BIGNUM *p, int bits, BIGNUM *padd,
/* check that p and q are prime */ /* check that p and q are prime */
/* check that for p and q /* check that for p and q
* gcd(p-1,primes) == 1 (except for 2) */ * gcd(p-1,primes) == 1 (except for 2) */
if ( (BN_mod_word(p,(BN_LONG)primes[i]) == 0) || if ( (BN_mod_word(p,(BN_ULONG)primes[i]) == 0) ||
(BN_mod_word(q,(BN_LONG)primes[i]) == 0)) (BN_mod_word(q,(BN_ULONG)primes[i]) == 0))
{ {
if (!BN_add(p,p,padd)) goto err; if (!BN_add(p,p,padd)) goto err;
if (!BN_add(q,q,qadd)) goto err; if (!BN_add(q,q,qadd)) goto err;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册