Why is my C bigint modexp so slow, or, why is it so fast in python? -
as learning experience decided cook own bigint routines taking modexp, in c. code below, summarize: it's repeated squaring montgomery multiplication. , takes forever run. on virtual machines has 1gb ram, bigger around 128-bit prime modulus causes program kill itself, , smaller ones still take appreciable amount of time. meanwhile, in python, did
def modexp ( g, u, p ): s = 1 while u != 0: if u & 1: s = (s * g)%p u >>= 1 g = (g * g)%p; return s p = 0xffffffffffffffffc90fdaa22168c234c4c6628b80dc1cd129024e088a67cc74020bbea63b139b22514a08798e3404ddef9519b3cd3a431b302b0a6df25f14374fe1356d6d51c245e485b576625e7ec6f44c42e9a637ed6b0bff5cb6f406b7edee386bfb5a899fa5ae9f24117c4b1fe649286651ece45b3dc2007cb8a163bf0598da48361c55d39a69163fa8fd24cf5f83655d23dca3ad961c62f356208552bb9ed529077096966d670c354e4abc9804f1746c08ca237327ffffffffffffffff g = 0x02 = int(urandom(1000).encode('hex'),base=16) % p modexp(g,a,p)
and it's, like, instant. python have ton of stuff precomputed dealing bigints, or there deep going on, or outcome once again, say, have no idea i'm doing?
here's of relevant c code, not i'm asking read through all, may include it...
bigints stored in typedef struct string
wrapper char *
len
, sign
fields.
string __modexpodd(string a, string e, string n){//assumes modulus odd. repeated squaring mongtomery multiplication. string r = chartos(0x01); int rpow=0; int ebits = countsignificantbits(e); while(__bigintcomp(r,n)==-1){ r = stringleftshift(r,1); rpow++; } string nprime = extendedeuclidean(r,n,chartos(0x01))[1]; nprime.sign = 1; string abar = bigintdivide(bigintmultiply(a,r),n)[1]; string xbar = bigintdivide(r,n)[1]; for(int = ebits-1; i>=0; i--){ xbar = __monpro(xbar, xbar, n, nprime, rpow); if(bigintparity(stringrightshift(e,i))){ xbar = __monpro(abar, xbar, n, nprime, rpow); } } return __monpro(xbar, chartos(0x01), n, nprime, rpow); }
debugging tells me worst bottleneck division step in main loop of extended euclidean algorithm:
string *extendedeuclidean(string a, string b, string c){ //returns x, y, 0 <= x < b such ax + = c string s,s,t,t,r,r,q,temp; s=chartos(0x00); s=chartos(0x01); t=chartos(0x01); t=chartos(0x00); r=bigintcopy(b); r=bigintcopy(a); int revflag = 0; if(bigintcomp(a,b)==-1){// need have a>b work. temp = r; r = r; r = temp; revflag = 1; } while(bigintcomp(r,chartos(0x00))!=0){ q = bigintdivide(r,r)[0]; memcpy(&temp, &r, sizeof(string)); r = bigintsubtract(r, bigintmultiply(q, r)); memcpy(&r, &temp, sizeof(string)); memcpy(&temp, &s, sizeof(string)); s = bigintsubtract(s, bigintmultiply(q, s)); memcpy(&s, &temp, sizeof(string)); memcpy(&temp, &t, sizeof(string)); t = bigintsubtract(t, bigintmultiply(q, t)); memcpy(&t, &temp, sizeof(string)); } //at point, s*a + t*b = r = gcd(a,b), , t,s quotients of a,b gcd. string *qr = bigintdivide(c,r); string ratio = bigintcopy(qr[0]); if(bigintcomp(qr[1],chartos(0x00))!=0) return null; if(revflag==1){ temp = s; s = t; t = temp; } //normalise s, t 0 <= s < b if(s.sign==-1){ qr = bigintdivide(bigintabs(s),bigintabs(b)); string q = (bigintcomp(qr[1],chartos(0x00))==0) ? qr[0] : bigintincr(qr[0]); s = bigintadd(s,bigintmultiply(q,bigintabs(b))); t = bigintsubtract(t, bigintmultiply(q,bigintabs(a))); } //multiply correct coefficients s = bigintmultiply(s,ratio); t = bigintmultiply(t,ratio); string *ret = calloc(2,sizeof(string)); ret[0] = s; ret[1] = t; return ret; }
and might curious see division routine, long division:
string *__bigintdivide(string a, string b){ //assumes a, b positive string *qr = calloc(2, sizeof(string)); if(__bigintcomp(b,chartos(0x00))==0){ return null; } if(__bigintcomp(a,b)==-1){ qr[0] = chartos(0x00); qr[1] = bigintcopy(b); return qr; } if(__bigintcomp(a,b)==0){ qr[0] = chartos(0x01); qr[1] = chartos(0x00); return qr; } int pow = ispowerof2(b); if(pow!=-1){ qr[0] = stringrightshift(a,pow); qr[1] = getsignificantbits(a,pow); qr[0].sign=1; qr[1].sign=1; return qr; } //long division, in base-256 string divisor = chartos(a.c[0]); string quotient = nullstring; int i=1; do{ while(__bigintcomp(divisor,b)==-1 && i<a.len){ divisor = stringcat(divisor, chartos(a.c[i])); i++; } if(i == a.len && __bigintcomp(divisor, b) == -1){ break; } qr = __findquotient(divisor, b, chartos(0x00), chartos(0xff)); quotient = stringcat(quotient, qr[0]); divisor = qr[1]; }while(i<a.len); qr[0] = bigintcopy(quotient); qr[1] = bigintcopy(divisor); return qr; } string *__findquotient(string divisor, string dividend, string minq, string maxq){ //an o(log n) division routine, finds q binary search instead of linear string q = stringrightshift(bigintadd(minq,maxq),1); string diff = stringrightshift(bigintsubtract(maxq,minq),1); string p = bigintmultiply(dividend, q); string *qr=calloc(2,sizeof(string)); while( __bigintcomp(diff, chartos(0x00)) == 1){ fflush(stdout); if(__bigintcomp(divisor, p) == 1){ // if divisor > dividend *q, q small. q = bigintadd(q, diff); } if(__bigintcomp(divisor, p) == -1){ // if divisor < dividend * q, q big. q = bigintsubtract(q, diff); } if(__bigintcomp(divisor, p)==0){ qr[0] = q; qr[1] = chartos(0x00); return qr; } p = bigintmultiply(dividend, q); diff = stringrightshift(diff,1); } while(__bigintcomp(divisor, p)==1){ // if > b*q, q small, increment it. afterwards, 1 big. q = bigintincr(q); p = bigintadd(p, dividend); } while(__bigintcomp(divisor, p) == -1){ // while < b*q, decrement q q = bigintdecr(q); p = bigintsubtract(p,dividend); } qr[0] = q; qr[1] = bigintsubtract(divisor, p); return qr; }
and montgomery product function, directly paper first described:
string __monpro(string abar, string bbar, string n, string nprime, int rpow){ string t = bigintmultiply(abar, bbar); string m = getsignificantbits(bigintmultiply(t,nprime),rpow); string u = stringrightshift(bigintadd(t,bigintmultiply(m,n)),rpow); if(bigintcomp(u,n)>=0) return bigintsubtract(u,n); return u; }
so i'm curious if it's precomputed in python, or if should looking memory leak? haven't tested of c bigint libraries because i've been lazy, , because assume python using 1 of internally anyway. taking look.
Comments
Post a Comment