Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members

integer.cpp

00001 // integer.cpp - written and placed in the public domain by Wei Dai
00002 // contains public domain code contributed by Alister Lee and Leonard Janke
00003 
00004 #include "pch.h"
00005 
00006 #ifndef CRYPTOPP_IMPORTS
00007 
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"             // for P1363_KDF2
00016 #include "sha.h"
00017 
00018 #include <iostream>
00019 
00020 #ifdef _M_X64
00021 #include <Intrin.h>
00022 #endif
00023 
00024 #ifdef SSE2_INTRINSICS_AVAILABLE
00025         #ifdef __GNUC__
00026                 #include <xmmintrin.h>
00027                 #include <signal.h>
00028                 #include <setjmp.h>
00029                 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00030                         #include <malloc.h>
00031                 #else
00032                         #include <stdlib.h>
00033                 #endif
00034         #else
00035                 #include <emmintrin.h>
00036         #endif
00037 #elif defined(_MSC_VER) && defined(_M_IX86)
00038         #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00039 #elif defined(__GNUC__) && defined(__i386__)
00040         #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00041 #endif
00042 
00043 NAMESPACE_BEGIN(CryptoPP)
00044 
00045 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00046 {
00047         if (valueType != typeid(Integer))
00048                 return false;
00049         *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00050         return true;
00051 }
00052 
00053 static const char s_RunAtStartup = (g_pAssignIntToInteger = AssignIntToInteger, 0);
00054 
00055 #ifdef SSE2_INTRINSICS_AVAILABLE
00056 template <class T>
00057 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00058 {
00059         CheckSize(n);
00060         if (n == 0)
00061                 return NULL;
00062         if (n >= 4)
00063         {
00064                 void *p;
00065         #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00066                 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00067         #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00068                 while (!(p = memalign(16, sizeof(T)*n)))
00069         #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00070                 while (!(p = malloc(sizeof(T)*n)))
00071         #else
00072                 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))  // assume malloc alignment is at least 8
00073         #endif
00074                         CallNewHandler();
00075 
00076         #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00077                 assert(m_pBlock == NULL);
00078                 m_pBlock = p;
00079                 if (!IsAlignedOn(p, 16))
00080                 {
00081                         assert(IsAlignedOn(p, 8));
00082                         p = (byte *)p + 8;
00083                 }
00084         #endif
00085 
00086                 assert(IsAlignedOn(p, 16));
00087                 return (T*)p;
00088         }
00089         return new T[n];
00090 }
00091 
00092 template <class T>
00093 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00094 {
00095         memset(p, 0, n*sizeof(T));
00096         if (n >= 4)
00097         {
00098                 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00099                         _mm_free(p);
00100                 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00101                         assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00102                         free(m_pBlock);
00103                         m_pBlock = NULL;
00104                 #else
00105                         free(p);
00106                 #endif
00107         }
00108         else
00109                 delete [] (T *)p;
00110 }
00111 #endif
00112 
00113 static int Compare(const word *A, const word *B, size_t N)
00114 {
00115         while (N--)
00116                 if (A[N] > B[N])
00117                         return 1;
00118                 else if (A[N] < B[N])
00119                         return -1;
00120 
00121         return 0;
00122 }
00123 
00124 static word Increment(word *A, size_t N, word B=1)
00125 {
00126         assert(N);
00127         word t = A[0];
00128         A[0] = t+B;
00129         if (A[0] >= t)
00130                 return 0;
00131         for (unsigned i=1; i<N; i++)
00132                 if (++A[i])
00133                         return 0;
00134         return 1;
00135 }
00136 
00137 static word Decrement(word *A, size_t N, word B=1)
00138 {
00139         assert(N);
00140         word t = A[0];
00141         A[0] = t-B;
00142         if (A[0] <= t)
00143                 return 0;
00144         for (unsigned i=1; i<N; i++)
00145                 if (A[i]--)
00146                         return 0;
00147         return 1;
00148 }
00149 
00150 static void TwosComplement(word *A, size_t N)
00151 {
00152         Decrement(A, N);
00153         for (unsigned i=0; i<N; i++)
00154                 A[i] = ~A[i];
00155 }
00156 
00157 static word AtomicInverseModPower2(word A)
00158 {
00159         assert(A%2==1);
00160 
00161         word R=A%8;
00162 
00163         for (unsigned i=3; i<WORD_BITS; i*=2)
00164                 R = R*(2-R*A);
00165 
00166         assert(R*A==1);
00167         return R;
00168 }
00169 
00170 // ********************************************************
00171 
00172 class DWord
00173 {
00174 public:
00175         DWord() {}
00176 
00177 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00178         explicit DWord(word low)
00179         {
00180                 m_whole = low;
00181         }
00182 #else
00183         explicit DWord(word low)
00184         {
00185                 m_halfs.low = low;
00186                 m_halfs.high = 0;
00187         }
00188 #endif
00189 
00190         DWord(word low, word high)
00191         {
00192                 m_halfs.low = low;
00193                 m_halfs.high = high;
00194         }
00195 
00196         static DWord Multiply(word a, word b)
00197         {
00198                 DWord r;
00199                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00200                         r.m_whole = (dword)a * b;
00201                 #elif defined(__alpha__)
00202                         r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00203                 #elif defined(__ia64__)
00204                         r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00205                 #elif defined(_ARCH_PPC64)
00206                         r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00207                 #elif defined(__x86_64__)
00208                         __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00209                 #elif defined(__mips64)
00210                         __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00211                 #elif defined(_M_X64)
00212                         r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00213                 #elif defined(_M_IX86)
00214                         // for testing
00215                         word64 t = (word64)a * b;
00216                         r.m_halfs.high = ((word32 *)(&t))[1];
00217                         r.m_halfs.low = (word32)t;
00218                 #else
00219                         #error can not implement DWord
00220                 #endif
00221                 return r;
00222         }
00223 
00224         static DWord MultiplyAndAdd(word a, word b, word c)
00225         {
00226                 DWord r = Multiply(a, b);
00227                 return r += c;
00228         }
00229 
00230         DWord & operator+=(word a)
00231         {
00232                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00233                         m_whole = m_whole + a;
00234                 #else
00235                         m_halfs.low += a;
00236                         m_halfs.high += (m_halfs.low < a);
00237                 #endif
00238                 return *this;
00239         }
00240 
00241         DWord operator+(word a)
00242         {
00243                 DWord r;
00244                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00245                         r.m_whole = m_whole + a;
00246                 #else
00247                         r.m_halfs.low = m_halfs.low + a;
00248                         r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00249                 #endif
00250                 return r;
00251         }
00252 
00253         DWord operator-(DWord a)
00254         {
00255                 DWord r;
00256                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00257                         r.m_whole = m_whole - a.m_whole;
00258                 #else
00259                         r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00260                         r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00261                 #endif
00262                 return r;
00263         }
00264 
00265         DWord operator-(word a)
00266         {
00267                 DWord r;
00268                 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00269                         r.m_whole = m_whole - a;
00270                 #else
00271                         r.m_halfs.low = m_halfs.low - a;
00272                         r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00273                 #endif
00274                 return r;
00275         }
00276 
00277         // returns quotient, which must fit in a word
00278         word operator/(word divisor);
00279 
00280         word operator%(word a);
00281 
00282         bool operator!() const
00283         {
00284         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00285                 return !m_whole;
00286         #else
00287                 return !m_halfs.high && !m_halfs.low;
00288         #endif
00289         }
00290 
00291         word GetLowHalf() const {return m_halfs.low;}
00292         word GetHighHalf() const {return m_halfs.high;}
00293         word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00294 
00295 private:
00296         union
00297         {
00298         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00299                 dword m_whole;
00300         #endif
00301                 struct
00302                 {
00303                 #ifdef IS_LITTLE_ENDIAN
00304                         word low;
00305                         word high;
00306                 #else
00307                         word high;
00308                         word low;
00309                 #endif
00310                 } m_halfs;
00311         };
00312 };
00313 
00314 class Word
00315 {
00316 public:
00317         Word() {}
00318 
00319         Word(word value)
00320         {
00321                 m_whole = value;
00322         }
00323 
00324         Word(hword low, hword high)
00325         {
00326                 m_whole = low | (word(high) << (WORD_BITS/2));
00327         }
00328 
00329         static Word Multiply(hword a, hword b)
00330         {
00331                 Word r;
00332                 r.m_whole = (word)a * b;
00333                 return r;
00334         }
00335 
00336         Word operator-(Word a)
00337         {
00338                 Word r;
00339                 r.m_whole = m_whole - a.m_whole;
00340                 return r;
00341         }
00342 
00343         Word operator-(hword a)
00344         {
00345                 Word r;
00346                 r.m_whole = m_whole - a;
00347                 return r;
00348         }
00349 
00350         // returns quotient, which must fit in a word
00351         hword operator/(hword divisor)
00352         {
00353                 return hword(m_whole / divisor);
00354         }
00355 
00356         bool operator!() const
00357         {
00358                 return !m_whole;
00359         }
00360 
00361         word GetWhole() const {return m_whole;}
00362         hword GetLowHalf() const {return hword(m_whole);}
00363         hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00364         hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00365         
00366 private:
00367         word m_whole;
00368 };
00369 
00370 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
00371 template <class S, class D>
00372 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00373 {
00374         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
00375         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00376 
00377         // estimate the quotient: do a 2 S by 1 S divide
00378         S Q;
00379         if (S(B1+1) == 0)
00380                 Q = A[2];
00381         else
00382                 Q = D(A[1], A[2]) / S(B1+1);
00383 
00384         // now subtract Q*B from A
00385         D p = D::Multiply(B0, Q);
00386         D u = (D) A[0] - p.GetLowHalf();
00387         A[0] = u.GetLowHalf();
00388         u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00389         A[1] = u.GetLowHalf();
00390         A[2] += u.GetHighHalf();
00391 
00392         // Q <= actual quotient, so fix it
00393         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00394         {
00395                 u = (D) A[0] - B0;
00396                 A[0] = u.GetLowHalf();
00397                 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00398                 A[1] = u.GetLowHalf();
00399                 A[2] += u.GetHighHalf();
00400                 Q++;
00401                 assert(Q);      // shouldn't overflow
00402         }
00403 
00404         return Q;
00405 }
00406 
00407 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
00408 template <class S, class D>
00409 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00410 {
00411         if (!B) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
00412                 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00413         else
00414         {
00415                 S Q[2];
00416                 T[0] = Al.GetLowHalf();
00417                 T[1] = Al.GetHighHalf(); 
00418                 T[2] = Ah.GetLowHalf();
00419                 T[3] = Ah.GetHighHalf();
00420                 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00421                 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00422                 return D(Q[0], Q[1]);
00423         }
00424 }
00425 
00426 // returns quotient, which must fit in a word
00427 inline word DWord::operator/(word a)
00428 {
00429         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00430                 return word(m_whole / a);
00431         #else
00432                 hword r[4];
00433                 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00434         #endif
00435 }
00436 
00437 inline word DWord::operator%(word a)
00438 {
00439         #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00440                 return word(m_whole % a);
00441         #else
00442                 if (a < (word(1) << (WORD_BITS/2)))
00443                 {
00444                         hword h = hword(a);
00445                         word r = m_halfs.high % h;
00446                         r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00447                         return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00448                 }
00449                 else
00450                 {
00451                         hword r[4];
00452                         DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00453                         return Word(r[0], r[1]).GetWhole();
00454                 }
00455         #endif
00456 }
00457 
00458 // ********************************************************
00459 
00460 class Portable
00461 {
00462 public:
00463         static word Add(word *C, const word *A, const word *B, size_t N);
00464         static word Subtract(word *C, const word *A, const word *B, size_t N);
00465 
00466         static inline void Multiply2(word *C, const word *A, const word *B);
00467         static inline word Multiply2Add(word *C, const word *A, const word *B);
00468         static void Multiply4(word *C, const word *A, const word *B);
00469         static void Multiply8(word *C, const word *A, const word *B);
00470         static inline unsigned int MultiplyRecursionLimit() {return 8;}
00471 
00472         static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00473         static void Multiply4Bottom(word *C, const word *A, const word *B);
00474         static void Multiply8Bottom(word *C, const word *A, const word *B);
00475         static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00476 
00477         static void Square2(word *R, const word *A);
00478         static void Square4(word *R, const word *A);
00479         static void Square8(word *R, const word *A) {assert(false);}
00480         static inline unsigned int SquareRecursionLimit() {return 4;}
00481 };
00482 
00483 word Portable::Add(word *C, const word *A, const word *B, size_t N)
00484 {
00485         assert (N%2 == 0);
00486 
00487         DWord u(0, 0);
00488         for (unsigned int i = 0; i < N; i+=2)
00489         {
00490                 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00491                 C[i] = u.GetLowHalf();
00492                 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00493                 C[i+1] = u.GetLowHalf();
00494         }
00495         return u.GetHighHalf();
00496 }
00497 
00498 word Portable::Subtract(word *C, const word *A, const word *B, size_t N)
00499 {
00500         assert (N%2 == 0);
00501 
00502         DWord u(0, 0);
00503         for (unsigned int i = 0; i < N; i+=2)
00504         {
00505                 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00506                 C[i] = u.GetLowHalf();
00507                 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00508                 C[i+1] = u.GetLowHalf();
00509         }
00510         return 0-u.GetHighHalf();
00511 }
00512 
00513 void Portable::Multiply2(word *C, const word *A, const word *B)
00514 {
00515 /*
00516         word s;
00517         dword d;
00518 
00519         if (A1 >= A0)
00520                 if (B0 >= B1)
00521                 {
00522                         s = 0;
00523                         d = (dword)(A1-A0)*(B0-B1);
00524                 }
00525                 else
00526                 {
00527                         s = (A1-A0);
00528                         d = (dword)s*(word)(B0-B1);
00529                 }
00530         else
00531                 if (B0 > B1)
00532                 {
00533                         s = (B0-B1);
00534                         d = (word)(A1-A0)*(dword)s;
00535                 }
00536                 else
00537                 {
00538                         s = 0;
00539                         d = (dword)(A0-A1)*(B1-B0);
00540                 }
00541 */
00542         // this segment is the branchless equivalent of above
00543         word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00544         unsigned int ai = A[1] < A[0];
00545         unsigned int bi = B[0] < B[1];
00546         unsigned int di = ai & bi;
00547         DWord d = DWord::Multiply(D[di], D[di+2]);
00548         D[1] = D[3] = 0;
00549         unsigned int si = ai + !bi;
00550         word s = D[si];
00551 
00552         DWord A0B0 = DWord::Multiply(A[0], B[0]);
00553         C[0] = A0B0.GetLowHalf();
00554 
00555         DWord A1B1 = DWord::Multiply(A[1], B[1]);
00556         DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00557         C[1] = t.GetLowHalf();
00558 
00559         t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00560         C[2] = t.GetLowHalf();
00561         C[3] = t.GetHighHalf();
00562 }
00563 
00564 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00565 {
00566         DWord t = DWord::Multiply(A[0], B[0]);
00567         C[0] = t.GetLowHalf();
00568         C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00569 }
00570 
00571 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00572 {
00573         word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00574         unsigned int ai = A[1] < A[0];
00575         unsigned int bi = B[0] < B[1];
00576         unsigned int di = ai & bi;
00577         DWord d = DWord::Multiply(D[di], D[di+2]);
00578         D[1] = D[3] = 0;
00579         unsigned int si = ai + !bi;
00580         word s = D[si];
00581 
00582         DWord A0B0 = DWord::Multiply(A[0], B[0]);
00583         DWord t = A0B0 + C[0];
00584         C[0] = t.GetLowHalf();
00585 
00586         DWord A1B1 = DWord::Multiply(A[1], B[1]);
00587         t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00588         C[1] = t.GetLowHalf();
00589 
00590         t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00591         C[2] = t.GetLowHalf();
00592 
00593         t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00594         C[3] = t.GetLowHalf();
00595         return t.GetHighHalf();
00596 }
00597 
00598 #define MulAcc(x, y)                                                            \
00599         p = DWord::MultiplyAndAdd(A[x], B[y], c);               \
00600         c = p.GetLowHalf();                                                             \
00601         p = (DWord) d + p.GetHighHalf();                                        \
00602         d = p.GetLowHalf();                                                             \
00603         e += p.GetHighHalf();
00604 
00605 #define SaveMulAcc(s, x, y)                                             \
00606         R[s] = c;                                                                               \
00607         p = DWord::MultiplyAndAdd(A[x], B[y], d);                               \
00608         c = p.GetLowHalf();                                                             \
00609         p = (DWord) e + p.GetHighHalf();                                        \
00610         d = p.GetLowHalf();                                                             \
00611         e = p.GetHighHalf();
00612 
00613 #define SquAcc(x, y)                                                            \
00614         q = DWord::Multiply(A[x], A[y]);        \
00615         p = q + c;                                      \
00616         c = p.GetLowHalf();                                                             \
00617         p = (DWord) d + p.GetHighHalf();                                        \
00618         d = p.GetLowHalf();                                                             \
00619         e += p.GetHighHalf();                   \
00620         p = q + c;                                      \
00621         c = p.GetLowHalf();                                                             \
00622         p = (DWord) d + p.GetHighHalf();                                        \
00623         d = p.GetLowHalf();                                                             \
00624         e += p.GetHighHalf();
00625 
00626 #define SaveSquAcc(s, x, y)                                             \
00627         R[s] = c;                                                                               \
00628         q = DWord::Multiply(A[x], A[y]);        \
00629         p = q + d;                                      \
00630         c = p.GetLowHalf();                                                             \
00631         p = (DWord) e + p.GetHighHalf();                                        \
00632         d = p.GetLowHalf();                                                             \
00633         e = p.GetHighHalf();                    \
00634         p = q + c;                                      \
00635         c = p.GetLowHalf();                                                             \
00636         p = (DWord) d + p.GetHighHalf();                                        \
00637         d = p.GetLowHalf();                                                             \
00638         e += p.GetHighHalf();
00639 
00640 void Portable::Multiply4(word *R, const word *A, const word *B)
00641 {
00642         DWord p;
00643         word c, d, e;
00644 
00645         p = DWord::Multiply(A[0], B[0]);
00646         R[0] = p.GetLowHalf();
00647         c = p.GetHighHalf();
00648         d = e = 0;
00649 
00650         MulAcc(0, 1);
00651         MulAcc(1, 0);
00652 
00653         SaveMulAcc(1, 2, 0);
00654         MulAcc(1, 1);
00655         MulAcc(0, 2);
00656 
00657         SaveMulAcc(2, 0, 3);
00658         MulAcc(1, 2);
00659         MulAcc(2, 1);
00660         MulAcc(3, 0);
00661 
00662         SaveMulAcc(3, 3, 1);
00663         MulAcc(2, 2);
00664         MulAcc(1, 3);
00665 
00666         SaveMulAcc(4, 2, 3);
00667         MulAcc(3, 2);
00668 
00669         R[5] = c;
00670         p = DWord::MultiplyAndAdd(A[3], B[3], d);
00671         R[6] = p.GetLowHalf();
00672         R[7] = e + p.GetHighHalf();
00673 }
00674 
00675 void Portable::Square2(word *R, const word *A)
00676 {
00677         DWord p, q;
00678         word c, d, e;
00679 
00680         p = DWord::Multiply(A[0], A[0]);
00681         R[0] = p.GetLowHalf();
00682         c = p.GetHighHalf();
00683         d = e = 0;
00684 
00685         SquAcc(0, 1);
00686 
00687         R[1] = c;
00688         p = DWord::MultiplyAndAdd(A[1], A[1], d);
00689         R[2] = p.GetLowHalf();
00690         R[3] = e + p.GetHighHalf();
00691 }
00692 
00693 void Portable::Square4(word *R, const word *A)
00694 {
00695 #ifdef _MSC_VER
00696         // VC60 workaround: MSVC 6.0 has an optimization bug that makes
00697         // (dword)A*B where either A or B has been cast to a dword before
00698         // very expensive. Revisit this function when this
00699         // bug is fixed.
00700         Multiply4(R, A, A);
00701 #else
00702         const word *B = A;
00703         DWord p, q;
00704         word c, d, e;
00705 
00706         p = DWord::Multiply(A[0], A[0]);
00707         R[0] = p.GetLowHalf();
00708         c = p.GetHighHalf();
00709         d = e = 0;
00710 
00711         SquAcc(0, 1);
00712 
00713         SaveSquAcc(1, 2, 0);
00714         MulAcc(1, 1);
00715 
00716         SaveSquAcc(2, 0, 3);
00717         SquAcc(1, 2);
00718 
00719         SaveSquAcc(3, 3, 1);
00720         MulAcc(2, 2);
00721 
00722         SaveSquAcc(4, 2, 3);
00723 
00724         R[5] = c;
00725         p = DWord::MultiplyAndAdd(A[3], A[3], d);
00726         R[6] = p.GetLowHalf();
00727         R[7] = e + p.GetHighHalf();
00728 #endif
00729 }
00730 
00731 void Portable::Multiply8(word *R, const word *A, const word *B)
00732 {
00733         DWord p;
00734         word c, d, e;
00735 
00736         p = DWord::Multiply(A[0], B[0]);
00737         R[0] = p.GetLowHalf();
00738         c = p.GetHighHalf();
00739         d = e = 0;
00740 
00741         MulAcc(0, 1);
00742         MulAcc(1, 0);
00743 
00744         SaveMulAcc(1, 2, 0);
00745         MulAcc(1, 1);
00746         MulAcc(0, 2);
00747 
00748         SaveMulAcc(2, 0, 3);
00749         MulAcc(1, 2);
00750         MulAcc(2, 1);
00751         MulAcc(3, 0);
00752 
00753         SaveMulAcc(3, 0, 4);
00754         MulAcc(1, 3);
00755         MulAcc(2, 2);
00756         MulAcc(3, 1);
00757         MulAcc(4, 0);
00758 
00759         SaveMulAcc(4, 0, 5);
00760         MulAcc(1, 4);
00761         MulAcc(2, 3);
00762         MulAcc(3, 2);
00763         MulAcc(4, 1);
00764         MulAcc(5, 0);
00765 
00766         SaveMulAcc(5, 0, 6);
00767         MulAcc(1, 5);
00768         MulAcc(2, 4);
00769         MulAcc(3, 3);
00770         MulAcc(4, 2);
00771         MulAcc(5, 1);
00772         MulAcc(6, 0);
00773 
00774         SaveMulAcc(6, 0, 7);
00775         MulAcc(1, 6);
00776         MulAcc(2, 5);
00777         MulAcc(3, 4);
00778         MulAcc(4, 3);
00779         MulAcc(5, 2);
00780         MulAcc(6, 1);
00781         MulAcc(7, 0);
00782 
00783         SaveMulAcc(7, 1, 7);
00784         MulAcc(2, 6);
00785         MulAcc(3, 5);
00786         MulAcc(4, 4);
00787         MulAcc(5, 3);
00788         MulAcc(6, 2);
00789         MulAcc(7, 1);
00790 
00791         SaveMulAcc(8, 2, 7);
00792         MulAcc(3, 6);
00793         MulAcc(4, 5);
00794         MulAcc(5, 4);
00795         MulAcc(6, 3);
00796         MulAcc(7, 2);
00797 
00798         SaveMulAcc(9, 3, 7);
00799         MulAcc(4, 6);
00800         MulAcc(5, 5);
00801         MulAcc(6, 4);
00802         MulAcc(7, 3);
00803 
00804         SaveMulAcc(10, 4, 7);
00805         MulAcc(5, 6);
00806         MulAcc(6, 5);
00807         MulAcc(7, 4);
00808 
00809         SaveMulAcc(11, 5, 7);
00810         MulAcc(6, 6);
00811         MulAcc(7, 5);
00812 
00813         SaveMulAcc(12, 6, 7);
00814         MulAcc(7, 6);
00815 
00816         R[13] = c;
00817         p = DWord::MultiplyAndAdd(A[7], B[7], d);
00818         R[14] = p.GetLowHalf();
00819         R[15] = e + p.GetHighHalf();
00820 }
00821 
00822 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00823 {
00824         DWord p;
00825         word c, d, e;
00826 
00827         p = DWord::Multiply(A[0], B[0]);
00828         R[0] = p.GetLowHalf();
00829         c = p.GetHighHalf();
00830         d = e = 0;
00831 
00832         MulAcc(0, 1);
00833         MulAcc(1, 0);
00834 
00835         SaveMulAcc(1, 2, 0);
00836         MulAcc(1, 1);
00837         MulAcc(0, 2);
00838 
00839         R[2] = c;
00840         R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00841 }
00842 
00843 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00844 {
00845         DWord p;
00846         word c, d, e;
00847 
00848         p = DWord::Multiply(A[0], B[0]);
00849         R[0] = p.GetLowHalf();
00850         c = p.GetHighHalf();
00851         d = e = 0;
00852 
00853         MulAcc(0, 1);
00854         MulAcc(1, 0);
00855 
00856         SaveMulAcc(1, 2, 0);
00857         MulAcc(1, 1);
00858         MulAcc(0, 2);
00859 
00860         SaveMulAcc(2, 0, 3);
00861         MulAcc(1, 2);
00862         MulAcc(2, 1);
00863         MulAcc(3, 0);
00864 
00865         SaveMulAcc(3, 0, 4);
00866         MulAcc(1, 3);
00867         MulAcc(2, 2);
00868         MulAcc(3, 1);
00869         MulAcc(4, 0);
00870 
00871         SaveMulAcc(4, 0, 5);
00872         MulAcc(1, 4);
00873         MulAcc(2, 3);
00874         MulAcc(3, 2);
00875         MulAcc(4, 1);
00876         MulAcc(5, 0);
00877 
00878         SaveMulAcc(5, 0, 6);
00879         MulAcc(1, 5);
00880         MulAcc(2, 4);
00881         MulAcc(3, 3);
00882         MulAcc(4, 2);
00883         MulAcc(5, 1);
00884         MulAcc(6, 0);
00885 
00886         R[6] = c;
00887         R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00888                                 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00889 }
00890 
00891 #undef MulAcc
00892 #undef SaveMulAcc
00893 #undef SquAcc
00894 #undef SaveSquAcc
00895 
00896 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00897 
00898 // ************** x86 feature detection ***************
00899 
00900 static bool s_sse2Enabled = true;
00901 
00902 static void CpuId(word32 input, word32 *output)
00903 {
00904 #ifdef __GNUC__
00905         __asm__
00906         (
00907                 // save ebx in case -fPIC is being used
00908                 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00909                 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00910                 : "a" (input)
00911         );
00912 #else
00913         __asm
00914         {
00915                 mov eax, input
00916                 cpuid
00917                 mov edi, output
00918                 mov [edi], eax
00919                 mov [edi+4], ebx
00920                 mov [edi+8], ecx
00921                 mov [edi+12], edx
00922         }
00923 #endif
00924 }
00925 
00926 #ifdef SSE2_INTRINSICS_AVAILABLE
00927 #ifndef _MSC_VER
00928 static jmp_buf s_env;
00929 static void SigIllHandler(int)
00930 {
00931         longjmp(s_env, 1);
00932 }
00933 #endif
00934 
00935 static bool HasSSE2()
00936 {
00937         if (!s_sse2Enabled)
00938                 return false;
00939 
00940         word32 cpuid[4];
00941         CpuId(1, cpuid);
00942         if ((cpuid[3] & (1 << 26)) == 0)
00943                 return false;
00944 
00945 #ifdef _MSC_VER
00946     __try
00947         {
00948         __asm xorpd xmm0, xmm0        // executing SSE2 instruction
00949         }
00950     __except (1)
00951         {
00952                 return false;
00953     }
00954         return true;
00955 #else
00956         typedef void (*SigHandler)(int);
00957 
00958         SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00959         if (oldHandler == SIG_ERR)
00960                 return false;
00961 
00962         bool result = true;
00963         if (setjmp(s_env))
00964                 result = false;
00965         else
00966                 __asm __volatile ("xorps %xmm0, %xmm0");
00967 
00968         signal(SIGILL, oldHandler);
00969         return result;
00970 #endif
00971 }
00972 #endif
00973 
00974 static bool IsP4()
00975 {
00976         word32 cpuid[4];
00977 
00978         CpuId(0, cpuid);
00979         std::swap(cpuid[2], cpuid[3]);
00980         if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00981                 return false;
00982 
00983         CpuId(1, cpuid);
00984         return ((cpuid[0] >> 8) & 0xf) == 0xf;
00985 }
00986 
00987 // ************** Pentium/P4 optimizations ***************
00988 
00989 class PentiumOptimized : public Portable
00990 {
00991 public:
00992         static word Add(word *C, const word *A, const word *B, size_t N);
00993         static word Subtract(word *C, const word *A, const word *B, size_t N);
00994         static void Multiply4(word *C, const word *A, const word *B);
00995         static void Multiply8(word *C, const word *A, const word *B);
00996         static void Multiply8Bottom(word *C, const word *A, const word *B);
00997 };
00998 
00999 class P4Optimized
01000 {
01001 public:
01002         static word Add(word *C, const word *A, const word *B, size_t N);
01003         static word Subtract(word *C, const word *A, const word *B, size_t N);
01004 #ifdef SSE2_INTRINSICS_AVAILABLE
01005         static void Multiply4(word *C, const word *A, const word *B);
01006         static void Multiply8(word *C, const word *A, const word *B);
01007         static void Multiply8Bottom(word *C, const word *A, const word *B);
01008 #endif
01009 };
01010 
01011 typedef word (* PAddSub)(word *C, const word *A, const word *B, size_t N);
01012 typedef void (* PMul)(word *C, const word *A, const word *B);
01013 
01014 static PAddSub s_pAdd, s_pSub;
01015 #ifdef SSE2_INTRINSICS_AVAILABLE
01016 static PMul s_pMul4, s_pMul8, s_pMul8B;
01017 #endif
01018 
01019 static void SetPentiumFunctionPointers()
01020 {
01021         if (IsP4())
01022         {
01023                 s_pAdd = &P4Optimized::Add;
01024                 s_pSub = &P4Optimized::Subtract;
01025         }
01026         else
01027         {
01028                 s_pAdd = &PentiumOptimized::Add;
01029                 s_pSub = &PentiumOptimized::Subtract;
01030         }
01031 
01032 #ifdef SSE2_INTRINSICS_AVAILABLE
01033         if (HasSSE2())
01034         {
01035                 s_pMul4 = &P4Optimized::Multiply4;
01036                 s_pMul8 = &P4Optimized::Multiply8;
01037                 s_pMul8B = &P4Optimized::Multiply8Bottom;
01038         }
01039         else
01040         {
01041                 s_pMul4 = &PentiumOptimized::Multiply4;
01042                 s_pMul8 = &PentiumOptimized::Multiply8;
01043                 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01044         }
01045 #endif
01046 }
01047 
01048 static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
01049 
01050 void DisableSSE2()
01051 {
01052         s_sse2Enabled = false;
01053         SetPentiumFunctionPointers();
01054 }
01055 
01056 class LowLevel : public PentiumOptimized
01057 {
01058 public:
01059         inline static word Add(word *C, const word *A, const word *B, size_t N)
01060                 {return s_pAdd(C, A, B, N);}
01061         inline static word Subtract(word *C, const word *A, const word *B, size_t N)
01062                 {return s_pSub(C, A, B, N);}
01063         inline static void Square4(word *R, const word *A)
01064                 {Multiply4(R, A, A);}
01065 #ifdef SSE2_INTRINSICS_AVAILABLE
01066         inline static void Multiply4(word *C, const word *A, const word *B)
01067                 {s_pMul4(C, A, B);}
01068         inline static void Multiply8(word *C, const word *A, const word *B)
01069                 {s_pMul8(C, A, B);}
01070         inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01071                 {s_pMul8B(C, A, B);}
01072 #endif
01073 };
01074 
01075 // use some tricks to share assembly code between MSVC and GCC
01076 #ifdef _MSC_VER
01077         #define CRYPTOPP_NAKED __declspec(naked)
01078         #define AS1(x) __asm x
01079         #define AS2(x, y) __asm x, y
01080         #define AddPrologue \
01081                 __asm   push ebp \
01082                 __asm   push ebx \
01083                 __asm   push esi \
01084                 __asm   push edi \
01085                 __asm   mov             ecx, [esp+20] \
01086                 __asm   mov             edx, [esp+24] \
01087                 __asm   mov             ebx, [esp+28] \
01088                 __asm   mov             esi, [esp+32]
01089         #define AddEpilogue \
01090                 __asm   pop edi \
01091                 __asm   pop esi \
01092                 __asm   pop ebx \
01093                 __asm   pop ebp \
01094                 __asm   ret
01095         #define MulPrologue \
01096                 __asm   push ebp \
01097                 __asm   push ebx \
01098                 __asm   push esi \
01099                 __asm   push edi \
01100                 __asm   mov ecx, [esp+28] \
01101                 __asm   mov esi, [esp+24] \
01102                 __asm   push [esp+20]
01103         #define MulEpilogue \
01104                 __asm   add esp, 4 \
01105                 __asm   pop edi \
01106                 __asm   pop esi \
01107                 __asm   pop ebx \
01108                 __asm   pop ebp \
01109                 __asm   ret
01110 #else
01111         #define CRYPTOPP_NAKED
01112         #define AS1(x) #x ";"
01113         #define AS2(x, y) #x ", " #y ";"
01114         #define AddPrologue \
01115                 __asm__ __volatile__ \
01116                 ( \
01117                         "push %%ebx;"   /* save this manually, in case of -fPIC */ \
01118                         "mov %2, %%ebx;" \
01119                         ".intel_syntax noprefix;" \
01120                         "push ebp;"
01121         #define AddEpilogue \
01122                         "pop ebp;" \
01123                         ".att_syntax prefix;" \
01124                         "pop %%ebx;" \
01125                                         : \
01126                                         : "c" (C), "d" (A), "m" (B), "S" (N) \
01127                                         : "%edi", "memory", "cc" \
01128                 );
01129         #define MulPrologue \
01130                 __asm__ __volatile__ \
01131                 ( \
01132                         "push %%ebx;"   /* save this manually, in case of -fPIC */ \
01133                         "push %%ebp;" \
01134                         "push %0;" \
01135                         ".intel_syntax noprefix;"
01136         #define MulEpilogue \
01137                         "add esp, 4;" \
01138                         "pop ebp;" \
01139                         "pop ebx;" \
01140                         ".att_syntax prefix;" \
01141                         : \
01142                         : "rm" (Z), "S" (X), "c" (Y) \
01143                         : "%eax", "%edx", "%edi", "memory", "cc" \
01144                 );
01145 #endif
01146 
01147 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, size_t N)
01148 {
01149         AddPrologue
01150 
01151         // now: ebx = B, ecx = C, edx = A, esi = N
01152         AS2(    sub ecx, edx)   // hold the distance between C & A so we can add this to A to get C
01153         AS2(    xor eax, eax)   // clear eax
01154 
01155         AS2(    sub eax, esi)   // eax is a negative index from end of B
01156         AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
01157 
01158         AS2(    sar eax, 1)             // unit of eax is now dwords; this also clears the carry flag
01159         AS1(    jz      loopendAdd)             // if no dwords then nothing to do
01160 
01161         AS1(loopstartAdd:)
01162         AS2(    mov    esi,[edx])                       // load lower word of A
01163         AS2(    mov    ebp,[edx+4])                     // load higher word of A
01164 
01165         AS2(    mov    edi,[ebx+8*eax])         // load lower word of B
01166         AS2(    lea    edx,[edx+8])                     // advance A and C
01167 
01168         AS2(    adc    esi,edi)                         // add lower words
01169         AS2(    mov    edi,[ebx+8*eax+4])       // load higher word of B
01170 
01171         AS2(    adc    ebp,edi)                         // add higher words
01172         AS1(    inc    eax)                                     // advance B
01173 
01174         AS2(    mov    [edx+ecx-8],esi)         // store lower word result
01175         AS2(    mov    [edx+ecx-4],ebp)         // store higher word result
01176 
01177         AS1(    jnz    loopstartAdd)                    // loop until eax overflows and becomes zero
01178 
01179         AS1(loopendAdd:)
01180         AS2(    adc eax, 0)             // store carry into eax (return result register)
01181 
01182         AddEpilogue
01183 }
01184 
01185 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, size_t N)
01186 {
01187         AddPrologue
01188 
01189         // now: ebx = B, ecx = C, edx = A, esi = N
01190         AS2(    sub ecx, edx)   // hold the distance between C & A so we can add this to A to get C
01191         AS2(    xor eax, eax)   // clear eax
01192 
01193         AS2(    sub eax, esi)   // eax is a negative index from end of B
01194         AS2(    lea ebx, [ebx+4*esi])   // ebx is end of B
01195 
01196         AS2(    sar eax, 1)             // unit of eax is now dwords; this also clears the carry flag
01197         AS1(    jz      loopendSub)             // if no dwords then nothing to do
01198 
01199         AS1(loopstartSub:)
01200         AS2(    mov    esi,[edx])                       // load lower word of A
01201         AS2(    mov    ebp,[edx+4])                     // load higher word of A
01202 
01203         AS2(    mov    edi,[ebx+8*eax])         // load lower word of B
01204         AS2(    lea    edx,[edx+8])                     // advance A and C
01205 
01206         AS2(    sbb    esi,edi)                         // subtract lower words
01207         AS2(    mov    edi,[ebx+8*eax+4])       // load higher word of B
01208 
01209         AS2(    sbb    ebp,edi)                         // subtract higher words
01210         AS1(    inc    eax)                                     // advance B
01211 
01212         AS2(    mov    [edx+ecx-8],esi)         // store lower word result
01213         AS2(    mov    [edx+ecx-4],ebp)         // store higher word result
01214 
01215         AS1(    jnz    loopstartSub)                    // loop until eax overflows and becomes zero
01216 
01217         AS1(loopendSub:)
01218         AS2(    adc eax, 0)             // store carry into eax (return result register)
01219 
01220         AddEpilogue
01221 }
01222 
01223 // On Pentium 4, the adc and sbb instructions are very expensive, so avoid them.
01224 
01225 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, size_t N)
01226 {
01227         AddPrologue
01228 
01229         // now: ebx = B, ecx = C, edx = A, esi = N
01230         AS2(    xor             eax, eax)
01231         AS1(    neg             esi)
01232         AS1(    jz              loopendAddP4)           // if no dwords then nothing to do
01233 
01234         AS2(    mov             edi, [edx])
01235         AS2(    mov             ebp, [ebx])
01236         AS1(    jmp             carry1AddP4)
01237 
01238         AS1(loopstartAddP4:)
01239         AS2(    mov             edi, [edx+8])
01240         AS2(    add             ecx, 8)
01241         AS2(    add             edx, 8)
01242         AS2(    mov             ebp, [ebx])
01243         AS2(    add             edi, eax)
01244         AS1(    jc              carry1AddP4)
01245         AS2(    xor             eax, eax)
01246 
01247         AS1(carry1AddP4:)
01248         AS2(    add             edi, ebp)
01249         AS2(    mov             ebp, 1)
01250         AS2(    mov             [ecx], edi)
01251         AS2(    mov             edi, [edx+4])
01252         AS2(    cmovc   eax, ebp)
01253         AS2(    mov             ebp, [ebx+4])
01254         AS2(    add             ebx, 8)
01255         AS2(    add             edi, eax)
01256         AS1(    jc              carry2AddP4)
01257         AS2(    xor             eax, eax)
01258 
01259         AS1(carry2AddP4:)
01260         AS2(    add             edi, ebp)
01261         AS2(    mov             ebp, 1)
01262         AS2(    cmovc   eax, ebp)
01263         AS2(    mov             [ecx+4], edi)
01264         AS2(    add             esi, 2)
01265         AS1(    jnz             loopstartAddP4)
01266 
01267         AS1(loopendAddP4:)
01268 
01269         AddEpilogue
01270 }
01271 
01272 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, size_t N)
01273 {
01274         AddPrologue
01275 
01276         // now: ebx = B, ecx = C, edx = A, esi = N
01277         AS2(    xor             eax, eax)
01278         AS1(    neg             esi)
01279         AS1(    jz              loopendSubP4)           // if no dwords then nothing to do
01280 
01281         AS2(    mov             edi, [edx])
01282         AS2(    mov             ebp, [ebx])
01283         AS1(    jmp             carry1SubP4)
01284 
01285         AS1(loopstartSubP4:)
01286         AS2(    mov             edi, [edx+8])
01287         AS2(    add             edx, 8)
01288         AS2(    add             ecx, 8)
01289         AS2(    mov             ebp, [ebx])
01290         AS2(    sub             edi, eax)
01291         AS1(    jc              carry1SubP4)
01292         AS2(    xor             eax, eax)
01293 
01294         AS1(carry1SubP4:)
01295         AS2(    sub             edi, ebp)
01296         AS2(    mov             ebp, 1)
01297         AS2(    mov             [ecx], edi)
01298         AS2(    mov             edi, [edx+4])
01299         AS2(    cmovc   eax, ebp)
01300         AS2(    mov             ebp, [ebx+4])
01301         AS2(    add             ebx, 8)
01302         AS2(    sub             edi, eax)
01303         AS1(    jc              carry2SubP4)
01304         AS2(    xor             eax, eax)
01305 
01306         AS1(carry2SubP4:)
01307         AS2(    sub             edi, ebp)
01308         AS2(    mov             ebp, 1)
01309         AS2(    cmovc   eax, ebp)
01310         AS2(    mov             [ecx+4], edi)
01311         AS2(    add             esi, 2)
01312         AS1(    jnz             loopstartSubP4)
01313 
01314         AS1(loopendSubP4:)
01315 
01316         AddEpilogue
01317 }
01318 
01319 // multiply assembly code originally contributed by Leonard Janke
01320 
01321 #define MulStartup \
01322         AS2(xor ebp, ebp) \
01323         AS2(xor edi, edi) \
01324         AS2(xor ebx, ebx) 
01325 
01326 #define MulShiftCarry \
01327         AS2(mov ebp, edx) \
01328         AS2(mov edi, ebx) \
01329         AS2(xor ebx, ebx)
01330 
01331 #define MulAccumulateBottom(i,j) \
01332         AS2(mov eax, [ecx+4*j]) \
01333         AS2(imul eax, dword ptr [esi+4*i]) \
01334         AS2(add ebp, eax)
01335 
01336 #define MulAccumulate(i,j) \
01337         AS2(mov eax, [ecx+4*j]) \
01338         AS1(mul dword ptr [esi+4*i]) \
01339         AS2(add ebp, eax) \
01340         AS2(adc edi, edx) \
01341         AS2(adc bl, bh)
01342 
01343 #define MulStoreDigit(i)  \
01344         AS2(mov edx, edi) \
01345         AS2(mov edi, [esp]) \
01346         AS2(mov [edi+4*i], ebp)
01347 
01348 #define MulLastDiagonal(digits) \
01349         AS2(mov eax, [ecx+4*(digits-1)]) \
01350         AS1(mul dword ptr [esi+4*(digits-1)]) \
01351         AS2(add ebp, eax) \
01352         AS2(adc edx, edi) \
01353         AS2(mov edi, [esp]) \
01354         AS2(mov [edi+4*(2*digits-2)], ebp) \
01355         AS2(mov [edi+4*(2*digits-1)], edx)
01356 
01357 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01358 {
01359         MulPrologue
01360         // now: [esp] = Z, esi = X, ecx = Y
01361         MulStartup
01362         MulAccumulate(0,0)
01363         MulStoreDigit(0)
01364         MulShiftCarry
01365 
01366         MulAccumulate(1,0)
01367         MulAccumulate(0,1)
01368         MulStoreDigit(1)
01369         MulShiftCarry
01370 
01371         MulAccumulate(2,0)
01372         MulAccumulate(1,1)
01373         MulAccumulate(0,2)
01374         MulStoreDigit(2)
01375         MulShiftCarry
01376 
01377         MulAccumulate(3,0)
01378         MulAccumulate(2,1)
01379         MulAccumulate(1,2)
01380         MulAccumulate(0,3)
01381         MulStoreDigit(3)
01382         MulShiftCarry
01383 
01384         MulAccumulate(3,1)
01385         MulAccumulate(2,2)
01386         MulAccumulate(1,3)
01387         MulStoreDigit(4)
01388         MulShiftCarry
01389 
01390         MulAccumulate(3,2)
01391         MulAccumulate(2,3)
01392         MulStoreDigit(5)
01393         MulShiftCarry
01394 
01395         MulLastDiagonal(4)
01396         MulEpilogue
01397 }
01398 
01399 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01400 {
01401         MulPrologue
01402         // now: [esp] = Z, esi = X, ecx = Y
01403         MulStartup
01404         MulAccumulate(0,0)
01405         MulStoreDigit(0)
01406         MulShiftCarry
01407 
01408         MulAccumulate(1,0)
01409         MulAccumulate(0,1)
01410         MulStoreDigit(1)
01411         MulShiftCarry
01412 
01413         MulAccumulate(2,0)
01414         MulAccumulate(1,1)
01415         MulAccumulate(0,2)
01416         MulStoreDigit(2)
01417         MulShiftCarry
01418 
01419         MulAccumulate(3,0)
01420         MulAccumulate(2,1)
01421         MulAccumulate(1,2)
01422         MulAccumulate(0,3)
01423         MulStoreDigit(3)
01424         MulShiftCarry
01425 
01426         MulAccumulate(4,0)
01427         MulAccumulate(3,1)
01428         MulAccumulate(2,2)
01429         MulAccumulate(1,3)
01430         MulAccumulate(0,4)
01431         MulStoreDigit(4)
01432         MulShiftCarry
01433 
01434         MulAccumulate(5,0)
01435         MulAccumulate(4,1)
01436         MulAccumulate(3,2)
01437         MulAccumulate(2,3)
01438         MulAccumulate(1,4)
01439         MulAccumulate(0,5)
01440         MulStoreDigit(5)
01441         MulShiftCarry
01442 
01443         MulAccumulate(6,0)
01444         MulAccumulate(5,1)
01445         MulAccumulate(4,2)
01446         MulAccumulate(3,3)
01447         MulAccumulate(2,4)
01448         MulAccumulate(1,5)
01449         MulAccumulate(0,6)
01450         MulStoreDigit(6)
01451         MulShiftCarry
01452 
01453         MulAccumulate(7,0)
01454         MulAccumulate(6,1)
01455         MulAccumulate(5,2)
01456         MulAccumulate(4,3)
01457         MulAccumulate(3,4)
01458         MulAccumulate(2,5)
01459         MulAccumulate(1,6)
01460         MulAccumulate(0,7)
01461         MulStoreDigit(7)
01462         MulShiftCarry
01463 
01464         MulAccumulate(7,1)
01465         MulAccumulate(6,2)
01466         MulAccumulate(5,3)
01467         MulAccumulate(4,4)
01468         MulAccumulate(3,5)
01469         MulAccumulate(2,6)
01470         MulAccumulate(1,7)
01471         MulStoreDigit(8)
01472         MulShiftCarry
01473 
01474         MulAccumulate(7,2)
01475         MulAccumulate(6,3)
01476         MulAccumulate(5,4)
01477         MulAccumulate(4,5)
01478         MulAccumulate(3,6)
01479         MulAccumulate(2,7)
01480         MulStoreDigit(9)
01481         MulShiftCarry
01482 
01483         MulAccumulate(7,3)
01484         MulAccumulate(6,4)
01485         MulAccumulate(5,5)
01486         MulAccumulate(4,6)
01487         MulAccumulate(3,7)
01488         MulStoreDigit(10)
01489         MulShiftCarry
01490 
01491         MulAccumulate(7,4)
01492         MulAccumulate(6,5)
01493         MulAccumulate(5,6)
01494         MulAccumulate(4,7)
01495         MulStoreDigit(11)
01496         MulShiftCarry
01497 
01498         MulAccumulate(7,5)
01499         MulAccumulate(6,6)
01500         MulAccumulate(5,7)
01501         MulStoreDigit(12)
01502         MulShiftCarry
01503 
01504         MulAccumulate(7,6)
01505         MulAccumulate(6,7)
01506         MulStoreDigit(13)
01507         MulShiftCarry
01508 
01509         MulLastDiagonal(8)
01510         MulEpilogue
01511 }
01512 
01513 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01514 {
01515         MulPrologue
01516         // now: [esp] = Z, esi = X, ecx = Y
01517         MulStartup
01518         MulAccumulate(0,0)
01519         MulStoreDigit(0)
01520         MulShiftCarry
01521 
01522         MulAccumulate(1,0)
01523         MulAccumulate(0,1)
01524         MulStoreDigit(1)
01525         MulShiftCarry
01526 
01527         MulAccumulate(2,0)
01528         MulAccumulate(1,1)
01529         MulAccumulate(0,2)
01530         MulStoreDigit(2)
01531         MulShiftCarry
01532 
01533         MulAccumulate(3,0)
01534         MulAccumulate(2,1)
01535         MulAccumulate(1,2)
01536         MulAccumulate(0,3)
01537         MulStoreDigit(3)
01538         MulShiftCarry
01539 
01540         MulAccumulate(4,0)
01541         MulAccumulate(3,1)
01542         MulAccumulate(2,2)
01543         MulAccumulate(1,3)
01544         MulAccumulate(0,4)
01545         MulStoreDigit(4)
01546         MulShiftCarry
01547 
01548         MulAccumulate(5,0)
01549         MulAccumulate(4,1)
01550         MulAccumulate(3,2)
01551         MulAccumulate(2,3)
01552         MulAccumulate(1,4)
01553         MulAccumulate(0,5)
01554         MulStoreDigit(5)
01555         MulShiftCarry
01556 
01557         MulAccumulate(6,0)
01558         MulAccumulate(5,1)
01559         MulAccumulate(4,2)
01560         MulAccumulate(3,3)
01561         MulAccumulate(2,4)
01562         MulAccumulate(1,5)
01563         MulAccumulate(0,6)
01564         MulStoreDigit(6)
01565         MulShiftCarry
01566 
01567         MulAccumulateBottom(7,0)
01568         MulAccumulateBottom(6,1)
01569         MulAccumulateBottom(5,2)
01570         MulAccumulateBottom(4,3)
01571         MulAccumulateBottom(3,4)
01572         MulAccumulateBottom(2,5)
01573         MulAccumulateBottom(1,6)
01574         MulAccumulateBottom(0,7)
01575         MulStoreDigit(7)
01576         MulEpilogue
01577 }
01578 
01579 #undef AS1
01580 #undef AS2
01581 
01582 #else   // not x86 - no processor specific code at this layer
01583 
01584 typedef Portable LowLevel;
01585 
01586 #endif
01587 
01588 #ifdef SSE2_INTRINSICS_AVAILABLE
01589 
01590 #ifdef __GNUC__
01591 #define CRYPTOPP_FASTCALL
01592 #else
01593 #define CRYPTOPP_FASTCALL __fastcall
01594 #endif
01595 
01596 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01597 {
01598         __m128i a3210 = _mm_load_si128(A);
01599         __m128i b3210 = _mm_load_si128(B);
01600 
01601         __m128i sum;
01602 
01603         __m128i z = _mm_setzero_si128();
01604         __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01605         C[0] = a2b2_a0b0;
01606 
01607         __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01608         __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01609         __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01610         __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01611         __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01612         C[1] = _mm_add_epi64(a1b0, a0b1);
01613 
01614         __m128i a31 = _mm_srli_epi64(a3210, 32);
01615         __m128i b31 = _mm_srli_epi64(b3210, 32);
01616         __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01617         C[6] = a3b3_a1b1;
01618 
01619         __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01620         __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01621         __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01622         __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01623         __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01624         sum = _mm_add_epi64(a1b1, a0b2);
01625         C[2] = _mm_add_epi64(sum, a2b0);
01626 
01627         __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01628         __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01629         __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01630         __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01631         __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01632         __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01633         __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01634         __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01635         __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01636         sum = _mm_add_epi64(a2b1, a0b3);
01637         C[3] = _mm_add_epi64(sum, sum1);
01638 
01639         __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01640         __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01641         __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01642         __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01643         sum = _mm_add_epi64(a2b2, a3b1);
01644         C[4] = _mm_add_epi64(sum, a1b3);
01645 
01646         __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01647         __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01648         __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01649         __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01650         __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01651         C[5] = _mm_add_epi64(a3b2, a2b3);
01652 }
01653 
01654 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01655 {
01656         __m128i temp[7];
01657         const word *w = (word *)temp;
01658         const __m64 *mw = (__m64 *)w;
01659 
01660         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01661 
01662         C[0] = w[0];
01663 
01664         __m64 s1, s2;
01665 
01666         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01667         __m64 w4 = mw[2];
01668         __m64 w6 = mw[3];
01669         __m64 w8 = mw[4];
01670         __m64 w10 = mw[5];
01671         __m64 w12 = mw[6];
01672         __m64 w14 = mw[7];
01673         __m64 w16 = mw[8];
01674         __m64 w18 = mw[9];
01675         __m64 w20 = mw[10];
01676         __m64 w22 = mw[11];
01677         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01678 
01679         s1 = _mm_add_si64(w1, w4);
01680         C[1] = _mm_cvtsi64_si32(s1);
01681         s1 = _mm_srli_si64(s1, 32);
01682 
01683         s2 = _mm_add_si64(w6, w8);
01684         s1 = _mm_add_si64(s1, s2);
01685         C[2] = _mm_cvtsi64_si32(s1);
01686         s1 = _mm_srli_si64(s1, 32);
01687 
01688         s2 = _mm_add_si64(w10, w12);
01689         s1 = _mm_add_si64(s1, s2);
01690         C[3] = _mm_cvtsi64_si32(s1);
01691         s1 = _mm_srli_si64(s1, 32);
01692 
01693         s2 = _mm_add_si64(w14, w16);
01694         s1 = _mm_add_si64(s1, s2);
01695         C[4] = _mm_cvtsi64_si32(s1);
01696         s1 = _mm_srli_si64(s1, 32);
01697 
01698         s2 = _mm_add_si64(w18, w20);
01699         s1 = _mm_add_si64(s1, s2);
01700         C[5] = _mm_cvtsi64_si32(s1);
01701         s1 = _mm_srli_si64(s1, 32);
01702 
01703         s2 = _mm_add_si64(w22, w26);
01704         s1 = _mm_add_si64(s1, s2);
01705         C[6] = _mm_cvtsi64_si32(s1);
01706         s1 = _mm_srli_si64(s1, 32);
01707 
01708         C[7] = _mm_cvtsi64_si32(s1) + w[27];
01709         _mm_empty();
01710 }
01711 
01712 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01713 {
01714         __m128i temp[28];
01715         const word *w = (word *)temp;
01716         const __m64 *mw = (__m64 *)w;
01717         const word *x = (word *)temp+7*4;
01718         const __m64 *mx = (__m64 *)x;
01719         const word *y = (word *)temp+7*4*2;
01720         const __m64 *my = (__m64 *)y;
01721         const word *z = (word *)temp+7*4*3;
01722         const __m64 *mz = (__m64 *)z;
01723 
01724         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01725 
01726         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01727 
01728         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01729 
01730         P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01731 
01732         C[0] = w[0];
01733 
01734         __m64 s1, s2, s3, s4;
01735 
01736         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01737         __m64 w4 = mw[2];
01738         __m64 w6 = mw[3];
01739         __m64 w8 = mw[4];
01740         __m64 w10 = mw[5];
01741         __m64 w12 = mw[6];
01742         __m64 w14 = mw[7];
01743         __m64 w16 = mw[8];
01744         __m64 w18 = mw[9];
01745         __m64 w20 = mw[10];
01746         __m64 w22 = mw[11];
01747         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01748         __m64 w27 = _mm_cvtsi32_si64(w[27]);
01749 
01750         __m64 x0 = _mm_cvtsi32_si64(x[0]);
01751         __m64 x1 = _mm_cvtsi32_si64(x[1]);
01752         __m64 x4 = mx[2];
01753         __m64 x6 = mx[3];
01754         __m64 x8 = mx[4];
01755         __m64 x10 = mx[5];
01756         __m64 x12 = mx[6];
01757         __m64 x14 = mx[7];
01758         __m64 x16 = mx[8];
01759         __m64 x18 = mx[9];
01760         __m64 x20 = mx[10];
01761         __m64 x22 = mx[11];
01762         __m64 x26 = _mm_cvtsi32_si64(x[26]);
01763         __m64 x27 = _mm_cvtsi32_si64(x[27]);
01764 
01765         __m64 y0 = _mm_cvtsi32_si64(y[0]);
01766         __m64 y1 = _mm_cvtsi32_si64(y[1]);
01767         __m64 y4 = my[2];
01768         __m64 y6 = my[3];
01769         __m64 y8 = my[4];
01770         __m64 y10 = my[5];
01771         __m64 y12 = my[6];
01772         __m64 y14 = my[7];
01773         __m64 y16 = my[8];
01774         __m64 y18 = my[9];
01775         __m64 y20 = my[10];
01776         __m64 y22 = my[11];
01777         __m64 y26 = _mm_cvtsi32_si64(y[26]);
01778         __m64 y27 = _mm_cvtsi32_si64(y[27]);
01779 
01780         __m64 z0 = _mm_cvtsi32_si64(z[0]);
01781         __m64 z1 = _mm_cvtsi32_si64(z[1]);
01782         __m64 z4 = mz[2];
01783         __m64 z6 = mz[3];
01784         __m64 z8 = mz[4];
01785         __m64 z10 = mz[5];
01786         __m64 z12 = mz[6];
01787         __m64 z14 = mz[7];
01788         __m64 z16 = mz[8];
01789         __m64 z18 = mz[9];
01790         __m64 z20 = mz[10];
01791         __m64 z22 = mz[11];
01792         __m64 z26 = _mm_cvtsi32_si64(z[26]);
01793 
01794         s1 = _mm_add_si64(w1, w4);
01795         C[1] = _mm_cvtsi64_si32(s1);
01796         s1 = _mm_srli_si64(s1, 32);
01797 
01798         s2 = _mm_add_si64(w6, w8);
01799         s1 = _mm_add_si64(s1, s2);
01800         C[2] = _mm_cvtsi64_si32(s1);
01801         s1 = _mm_srli_si64(s1, 32);
01802 
01803         s2 = _mm_add_si64(w10, w12);
01804         s1 = _mm_add_si64(s1, s2);
01805         C[3] = _mm_cvtsi64_si32(s1);
01806         s1 = _mm_srli_si64(s1, 32);
01807 
01808         s3 = _mm_add_si64(x0, y0);
01809         s2 = _mm_add_si64(w14, w16);
01810         s1 = _mm_add_si64(s1, s3);
01811         s1 = _mm_add_si64(s1, s2);
01812         C[4] = _mm_cvtsi64_si32(s1);
01813         s1 = _mm_srli_si64(s1, 32);
01814 
01815         s3 = _mm_add_si64(x1, y1);
01816         s4 = _mm_add_si64(x4, y4);
01817         s1 = _mm_add_si64(s1, w18);
01818         s3 = _mm_add_si64(s3, s4);
01819         s1 = _mm_add_si64(s1, w20);
01820         s1 = _mm_add_si64(s1, s3);
01821         C[5] = _mm_cvtsi64_si32(s1);
01822         s1 = _mm_srli_si64(s1, 32);
01823 
01824         s3 = _mm_add_si64(x6, y6);
01825         s4 = _mm_add_si64(x8, y8);
01826         s1 = _mm_add_si64(s1, w22);
01827         s3 = _mm_add_si64(s3, s4);
01828         s1 = _mm_add_si64(s1, w26);
01829         s1 = _mm_add_si64(s1, s3);
01830         C[6] = _mm_cvtsi64_si32(s1);
01831         s1 = _mm_srli_si64(s1, 32);
01832 
01833         s3 = _mm_add_si64(x10, y10);
01834         s4 = _mm_add_si64(x12, y12);
01835         s1 = _mm_add_si64(s1, w27);
01836         s3 = _mm_add_si64(s3, s4);
01837         s1 = _mm_add_si64(s1, s3);
01838         C[7] = _mm_cvtsi64_si32(s1);
01839         s1 = _mm_srli_si64(s1, 32);
01840 
01841         s3 = _mm_add_si64(x14, y14);
01842         s4 = _mm_add_si64(x16, y16);
01843         s1 = _mm_add_si64(s1, z0);
01844         s3 = _mm_add_si64(s3, s4);
01845         s1 = _mm_add_si64(s1, s3);
01846         C[8] = _mm_cvtsi64_si32(s1);
01847         s1 = _mm_srli_si64(s1, 32);
01848 
01849         s3 = _mm_add_si64(x18, y18);
01850         s4 = _mm_add_si64(x20, y20);
01851         s1 = _mm_add_si64(s1, z1);
01852         s3 = _mm_add_si64(s3, s4);
01853         s1 = _mm_add_si64(s1, z4);
01854         s1 = _mm_add_si64(s1, s3);
01855         C[9] = _mm_cvtsi64_si32(s1);
01856         s1 = _mm_srli_si64(s1, 32);
01857 
01858         s3 = _mm_add_si64(x22, y22);
01859         s4 = _mm_add_si64(x26, y26);
01860         s1 = _mm_add_si64(s1, z6);
01861         s3 = _mm_add_si64(s3, s4);
01862         s1 = _mm_add_si64(s1, z8);
01863         s1 = _mm_add_si64(s1, s3);
01864         C[10] = _mm_cvtsi64_si32(s1);
01865         s1 = _mm_srli_si64(s1, 32);
01866 
01867         s3 = _mm_add_si64(x27, y27);
01868         s1 = _mm_add_si64(s1, z10);
01869         s1 = _mm_add_si64(s1, z12);
01870         s1 = _mm_add_si64(s1, s3);
01871         C[11] = _mm_cvtsi64_si32(s1);
01872         s1 = _mm_srli_si64(s1, 32);
01873 
01874         s3 = _mm_add_si64(z14, z16);
01875         s1 = _mm_add_si64(s1, s3);
01876         C[12] = _mm_cvtsi64_si32(s1);
01877         s1 = _mm_srli_si64(s1, 32);
01878 
01879         s3 = _mm_add_si64(z18, z20);
01880         s1 = _mm_add_si64(s1, s3);
01881         C[13] = _mm_cvtsi64_si32(s1);
01882         s1 = _mm_srli_si64(s1, 32);
01883 
01884         s3 = _mm_add_si64(z22, z26);
01885         s1 = _mm_add_si64(s1, s3);
01886         C[14] = _mm_cvtsi64_si32(s1);
01887         s1 = _mm_srli_si64(s1, 32);
01888 
01889         C[15] = z[27] + _mm_cvtsi64_si32(s1);
01890         _mm_empty();
01891 }
01892 
01893 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01894 {
01895         __m128i temp[21];
01896         const word *w = (word *)temp;
01897         const __m64 *mw = (__m64 *)w;
01898         const word *x = (word *)temp+7*4;
01899         const __m64 *mx = (__m64 *)x;
01900         const word *y = (word *)temp+7*4*2;
01901         const __m64 *my = (__m64 *)y;
01902 
01903         P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01904 
01905         P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01906 
01907         P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01908 
01909         C[0] = w[0];
01910 
01911         __m64 s1, s2, s3, s4;
01912 
01913         __m64 w1 = _mm_cvtsi32_si64(w[1]);
01914         __m64 w4 = mw[2];
01915         __m64 w6 = mw[3];
01916         __m64 w8 = mw[4];
01917         __m64 w10 = mw[5];
01918         __m64 w12 = mw[6];
01919         __m64 w14 = mw[7];
01920         __m64 w16 = mw[8];
01921         __m64 w18 = mw[9];
01922         __m64 w20 = mw[10];
01923         __m64 w22 = mw[11];
01924         __m64 w26 = _mm_cvtsi32_si64(w[26]);
01925 
01926         __m64 x0 = _mm_cvtsi32_si64(x[0]);
01927         __m64 x1 = _mm_cvtsi32_si64(x[1]);
01928         __m64 x4 = mx[2];
01929         __m64 x6 = mx[3];
01930         __m64 x8 = mx[4];
01931 
01932         __m64 y0 = _mm_cvtsi32_si64(y[0]);
01933         __m64 y1 = _mm_cvtsi32_si64(y[1]);
01934         __m64 y4 = my[2];
01935         __m64 y6 = my[3];
01936         __m64 y8 = my[4];
01937 
01938         s1 = _mm_add_si64(w1, w4);
01939         C[1] = _mm_cvtsi64_si32(s1);
01940         s1 = _mm_srli_si64(s1, 32);
01941 
01942         s2 = _mm_add_si64(w6, w8);
01943         s1 = _mm_add_si64(s1, s2);
01944         C[2] = _mm_cvtsi64_si32(s1);
01945         s1 = _mm_srli_si64(s1, 32);
01946 
01947         s2 = _mm_add_si64(w10, w12);
01948         s1 = _mm_add_si64(s1, s2);
01949         C[3] = _mm_cvtsi64_si32(s1);
01950         s1 = _mm_srli_si64(s1, 32);
01951 
01952         s3 = _mm_add_si64(x0, y0);
01953         s2 = _mm_add_si64(w14, w16);
01954         s1 = _mm_add_si64(s1, s3);
01955         s1 = _mm_add_si64(s1, s2);
01956         C[4] = _mm_cvtsi64_si32(s1);
01957         s1 = _mm_srli_si64(s1, 32);
01958 
01959         s3 = _mm_add_si64(x1, y1);
01960         s4 = _mm_add_si64(x4, y4);
01961         s1 = _mm_add_si64(s1, w18);
01962         s3 = _mm_add_si64(s3, s4);
01963         s1 = _mm_add_si64(s1, w20);
01964         s1 = _mm_add_si64(s1, s3);
01965         C[5] = _mm_cvtsi64_si32(s1);
01966         s1 = _mm_srli_si64(s1, 32);
01967 
01968         s3 = _mm_add_si64(x6, y6);
01969         s4 = _mm_add_si64(x8, y8);
01970         s1 = _mm_add_si64(s1, w22);
01971         s3 = _mm_add_si64(s3, s4);
01972         s1 = _mm_add_si64(s1, w26);
01973         s1 = _mm_add_si64(s1, s3);
01974         C[6] = _mm_cvtsi64_si32(s1);
01975         s1 = _mm_srli_si64(s1, 32);
01976 
01977         C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01978         _mm_empty();
01979 }
01980 
01981 #endif  // #ifdef SSE2_INTRINSICS_AVAILABLE
01982 
01983 // ********************************************************
01984 
01985 #define A0              A
01986 #define A1              (A+N2)
01987 #define B0              B
01988 #define B1              (B+N2)
01989 
01990 #define T0              T
01991 #define T1              (T+N2)
01992 #define T2              (T+N)
01993 #define T3              (T+N+N2)
01994 
01995 #define R0              R
01996 #define R1              (R+N2)
01997 #define R2              (R+N)
01998 #define R3              (R+N+N2)
01999 
02000 // R[2*N] - result = A*B
02001 // T[2*N] - temporary work space
02002 // A[N] --- multiplier
02003 // B[N] --- multiplicant
02004 
02005 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02006 {
02007         assert(N>=2 && N%2==0);
02008 
02009         if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02010                 LowLevel::Multiply8(R, A, B);
02011         else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02012                 LowLevel::Multiply4(R, A, B);
02013         else if (N==2)
02014                 LowLevel::Multiply2(R, A, B);
02015         else
02016         {
02017                 const size_t N2 = N/2;
02018                 int carry;
02019 
02020                 int aComp = Compare(A0, A1, N2);
02021                 int bComp = Compare(B0, B1, N2);
02022 
02023                 switch (2*aComp + aComp + bComp)
02024                 {
02025                 case -4:
02026                         LowLevel::Subtract(R0, A1, A0, N2);
02027                         LowLevel::Subtract(R1, B0, B1, N2);
02028                         RecursiveMultiply(T0, T2, R0, R1, N2);
02029                         LowLevel::Subtract(T1, T1, R0, N2);
02030                         carry = -1;
02031                         break;
02032                 case -2:
02033                         LowLevel::Subtract(R0, A1, A0, N2);
02034                         LowLevel::Subtract(R1, B0, B1, N2);
02035                         RecursiveMultiply(T0, T2, R0, R1, N2);
02036                         carry = 0;
02037                         break;
02038                 case 2:
02039                         LowLevel::Subtract(R0, A0, A1, N2);
02040                         LowLevel::Subtract(R1, B1, B0, N2);
02041                         RecursiveMultiply(T0, T2, R0, R1, N2);
02042                         carry = 0;
02043                         break;
02044                 case 4:
02045                         LowLevel::Subtract(R0, A1, A0, N2);
02046                         LowLevel::Subtract(R1, B0, B1, N2);
02047                         RecursiveMultiply(T0, T2, R0, R1, N2);
02048                         LowLevel::Subtract(T1, T1, R1, N2);
02049                         carry = -1;
02050                         break;
02051                 default:
02052                         SetWords(T0, 0, N);
02053                         carry = 0;
02054                 }
02055 
02056                 RecursiveMultiply(R0, T2, A0, B0, N2);
02057                 RecursiveMultiply(R2, T2, A1, B1, N2);
02058 
02059                 // now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
02060 
02061                 carry += LowLevel::Add(T0, T0, R0, N);
02062                 carry += LowLevel::Add(T0, T0, R2, N);
02063                 carry += LowLevel::Add(R1, R1, T0, N);
02064 
02065                 assert (carry >= 0 && carry <= 2);
02066                 Increment(R3, N2, carry);
02067         }
02068 }
02069 
02070 // R[2*N] - result = A*A
02071 // T[2*N] - temporary work space
02072 // A[N] --- number to be squared
02073 
02074 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02075 {
02076         assert(N && N%2==0);
02077         if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02078                 LowLevel::Square8(R, A);
02079         if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02080                 LowLevel::Square4(R, A);
02081         else if (N==2)
02082                 LowLevel::Square2(R, A);
02083         else
02084         {
02085                 const size_t N2 = N/2;
02086 
02087                 RecursiveSquare(R0, T2, A0, N2);
02088                 RecursiveSquare(R2, T2, A1, N2);
02089                 RecursiveMultiply(T0, T2, A0, A1, N2);
02090 
02091                 word carry = LowLevel::Add(R1, R1, T0, N);
02092                 carry += LowLevel::Add(R1, R1, T0, N);
02093                 Increment(R3, N2, carry);
02094         }
02095 }
02096 
02097 // R[N] - bottom half of A*B
02098 // T[N] - temporary work space
02099 // A[N] - multiplier
02100 // B[N] - multiplicant
02101 
02102 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02103 {
02104         assert(N>=2 && N%2==0);
02105         if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02106                 LowLevel::Multiply8Bottom(R, A, B);
02107         else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02108                 LowLevel::Multiply4Bottom(R, A, B);
02109         else if (N==2)
02110                 LowLevel::Multiply2Bottom(R, A, B);
02111         else
02112         {
02113                 const size_t N2 = N/2;
02114 
02115                 RecursiveMultiply(R, T, A0, B0, N2);
02116                 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02117                 LowLevel::Add(R1, R1, T0, N2);
02118                 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02119                 LowLevel::Add(R1, R1, T0, N2);
02120         }
02121 }
02122 
02123 // R[N] --- upper half of A*B
02124 // T[2*N] - temporary work space
02125 // L[N] --- lower half of A*B
02126 // A[N] --- multiplier
02127 // B[N] --- multiplicant
02128 
02129 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02130 {
02131         assert(N>=2 && N%2==0);
02132 
02133         if (N==4)
02134         {
02135                 LowLevel::Multiply4(T, A, B);
02136                 memcpy(R, T+4, 4*WORD_SIZE);
02137         }
02138         else if (N==2)
02139         {
02140                 LowLevel::Multiply2(T, A, B);
02141                 memcpy(R, T+2, 2*WORD_SIZE);
02142         }
02143         else
02144         {
02145                 const size_t N2 = N/2;
02146                 int carry;
02147 
02148                 int aComp = Compare(A0, A1, N2);
02149                 int bComp = Compare(B0, B1, N2);
02150 
02151                 switch (2*aComp + aComp + bComp)
02152                 {
02153                 case -4:
02154                         LowLevel::Subtract(R0, A1, A0, N2);
02155                         LowLevel::Subtract(R1, B0, B1, N2);
02156                         RecursiveMultiply(T0, T2, R0, R1, N2);
02157                         LowLevel::Subtract(T1, T1, R0, N2);
02158                         carry = -1;
02159                         break;
02160                 case -2:
02161                         LowLevel::Subtract(R0, A1, A0, N2);
02162                         LowLevel::Subtract(R1, B0, B1, N2);
02163                         RecursiveMultiply(T0, T2, R0, R1, N2);
02164                         carry = 0;
02165                         break;
02166                 case 2:
02167                         LowLevel::Subtract(R0, A0, A1, N2);
02168                         LowLevel::Subtract(R1, B1, B0, N2);
02169                         RecursiveMultiply(T0, T2, R0, R1, N2);
02170                         carry = 0;
02171                         break;
02172                 case 4:
02173                         LowLevel::Subtract(R0, A1, A0, N2);
02174                         LowLevel::Subtract(R1, B0, B1, N2);
02175                         RecursiveMultiply(T0, T2, R0, R1, N2);
02176                         LowLevel::Subtract(T1, T1, R1, N2);
02177                         carry = -1;
02178                         break;
02179                 default:
02180                         SetWords(T0, 0, N);
02181                         carry = 0;
02182                 }
02183 
02184                 RecursiveMultiply(T2, R0, A1, B1, N2);
02185 
02186                 // now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
02187 
02188                 word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02189                 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02190                 word t = (Compare(R0, T2, N2) == -1);
02191 
02192                 carry += t;
02193                 carry += Increment(R0, N2, c2+t);
02194                 carry += LowLevel::Add(R0, R0, T1, N2);
02195                 carry += LowLevel::Add(R0, R0, T3, N2);
02196                 assert (carry >= 0 && carry <= 2);
02197 
02198                 CopyWords(R1, T3, N2);
02199                 Increment(R1, N2, carry);
02200         }
02201 }
02202 
02203 inline word Add(word *C, const word *A, const word *B, size_t N)
02204 {
02205         return LowLevel::Add(C, A, B, N);
02206 }
02207 
02208 inline word Subtract(word *C, const word *A, const word *B, size_t N)
02209 {
02210         return LowLevel::Subtract(C, A, B, N);
02211 }
02212 
02213 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02214 {
02215         RecursiveMultiply(R, T, A, B, N);
02216 }
02217 
02218 inline void Square(word *R, word *T, const word *A, size_t N)
02219 {
02220         RecursiveSquare(R, T, A, N);
02221 }
02222 
02223 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02224 {
02225         RecursiveMultiplyBottom(R, T, A, B, N);
02226 }
02227 
02228 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02229 {
02230         RecursiveMultiplyTop(R, T, L, A, B, N);
02231 }
02232 
02233 static word LinearMultiply(word *C, const word *A, word B, size_t N)
02234 {
02235         word carry=0;
02236         for(unsigned i=0; i<N; i++)
02237         {
02238                 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02239                 C[i] = p.GetLowHalf();
02240                 carry = p.GetHighHalf();
02241         }
02242         return carry;
02243 }
02244 
02245 // R[NA+NB] - result = A*B
02246 // T[NA+NB] - temporary work space
02247 // A[NA] ---- multiplier
02248 // B[NB] ---- multiplicant
02249 
02250 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02251 {
02252         if (NA == NB)
02253         {
02254                 if (A == B)
02255                         Square(R, T, A, NA);
02256                 else
02257                         Multiply(R, T, A, B, NA);
02258 
02259                 return;
02260         }
02261 
02262         if (NA > NB)
02263         {
02264                 std::swap(A, B);
02265                 std::swap(NA, NB);
02266         }
02267 
02268         assert(NB % NA == 0);
02269         assert((NB/NA)%2 == 0);         // NB is an even multiple of NA
02270 
02271         if (NA==2 && !A[1])
02272         {
02273                 switch (A[0])
02274                 {
02275                 case 0:
02276                         SetWords(R, 0, NB+2);
02277                         return;
02278                 case 1:
02279                         CopyWords(R, B, NB);
02280                         R[NB] = R[NB+1] = 0;
02281                         return;
02282                 default:
02283                         R[NB] = LinearMultiply(R, B, A[0], NB);
02284                         R[NB+1] = 0;
02285                         return;
02286                 }
02287         }
02288 
02289         Multiply(R, T, A, B, NA);
02290         CopyWords(T+2*NA, R+NA, NA);
02291 
02292         size_t i;
02293 
02294         for (i=2*NA; i<NB; i+=2*NA)
02295                 Multiply(T+NA+i, T, A, B+i, NA);
02296         for (i=NA; i<NB; i+=2*NA)
02297                 Multiply(R+i, T, A, B+i, NA);
02298 
02299         if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02300                 Increment(R+NB, NA);
02301 }
02302 
02303 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
02304 // T[3*N/2] - temporary work space
02305 // A[N] ----- an odd number as input
02306 
02307 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02308 {
02309         if (N==2)
02310         {
02311                 T[0] = AtomicInverseModPower2(A[0]);
02312                 T[1] = 0;
02313                 LowLevel::Multiply2Bottom(T+2, T, A);
02314                 TwosComplement(T+2, 2);
02315                 Increment(T+2, 2, 2);
02316                 LowLevel::Multiply2Bottom(R, T, T+2);
02317         }
02318         else
02319         {
02320                 const size_t N2 = N/2;
02321                 RecursiveInverseModPower2(R0, T0, A0, N2);
02322                 T0[0] = 1;
02323                 SetWords(T0+1, 0, N2-1);
02324                 MultiplyTop(R1, T1, T0, R0, A0, N2);
02325                 MultiplyBottom(T0, T1, R0, A1, N2);
02326                 Add(T0, R1, T0, N2);
02327                 TwosComplement(T0, N2);
02328                 MultiplyBottom(R1, T1, R0, T0, N2);
02329         }
02330 }
02331 
02332 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
02333 // T[3*N] - temporary work space
02334 // X[2*N] - number to be reduced
02335 // M[N] --- modulus
02336 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
02337 
02338 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, size_t N)
02339 {
02340         MultiplyBottom(R, T, X, U, N);
02341         MultiplyTop(T, T+N, X, R, M, N);
02342         word borrow = Subtract(T, X+N, T, N);
02343         // defend against timing attack by doing this Add even when not needed
02344         word carry = Add(T+N, T, M, N);
02345         assert(carry || !borrow);
02346         CopyWords(R, T + (borrow ? N : 0), N);
02347 }
02348 
02349 // R[N] --- result = X/(2**(WORD_BITS*N/2)) mod M
02350 // T[2*N] - temporary work space
02351 // X[2*N] - number to be reduced
02352 // M[N] --- modulus
02353 // U[N/2] - multiplicative inverse of M mod 2**(WORD_BITS*N/2)
02354 // V[N] --- 2**(WORD_BITS*3*N/2) mod M
02355 
02356 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02357 {
02358         assert(N%2==0 && N>=4);
02359 
02360 #define M0              M
02361 #define M1              (M+N2)
02362 #define V0              V
02363 #define V1              (V+N2)
02364 
02365 #define X0              X
02366 #define X1              (X+N2)
02367 #define X2              (X+N)
02368 #define X3              (X+N+N2)
02369 
02370         const size_t N2 = N/2;
02371         Multiply(T0, T2, V0, X3, N2);
02372         int c2 = Add(T0, T0, X0, N);
02373         MultiplyBottom(T3, T2, T0, U, N2);
02374         MultiplyTop(T2, R, T0, T3, M0, N2);
02375         c2 -= Subtract(T2, T1, T2, N2);
02376         Multiply(T0, R, T3, M1, N2);
02377         c2 -= Subtract(T0, T2, T0, N2);
02378         int c3 = -(int)Subtract(T1, X2, T1, N2);
02379         Multiply(R0, T2, V1, X3, N2);
02380         c3 += Add(R, R, T, N);
02381 
02382         if (c2>0)
02383                 c3 += Increment(R1, N2);
02384         else if (c2<0)
02385                 c3 -= Decrement(R1, N2, -c2);
02386 
02387         assert(c3>=-1 && c3<=1);
02388         if (c3>0)
02389                 Subtract(R, R, M, N);
02390         else if (c3<0)
02391                 Add(R, R, M, N);
02392 
02393 #undef M0
02394 #undef M1
02395 #undef V0
02396 #undef V1
02397 
02398 #undef X0
02399 #undef X1
02400 #undef X2
02401 #undef X3
02402 }
02403 
02404 #undef A0
02405 #undef A1
02406 #undef B0
02407 #undef B1
02408 
02409 #undef T0
02410 #undef T1
02411 #undef T2
02412 #undef T3
02413 
02414 #undef R0
02415 #undef R1
02416 #undef R2
02417 #undef R3
02418 
02419 /*
02420 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
02421 static word SubatomicDivide(word *A, word B0, word B1)
02422 {
02423         // assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
02424         assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
02425 
02426         // estimate the quotient: do a 2 word by 1 word divide
02427         word Q;
02428         if (B1+1 == 0)
02429                 Q = A[2];
02430         else
02431                 Q = DWord(A[1], A[2]).DividedBy(B1+1);
02432 
02433         // now subtract Q*B from A
02434         DWord p = DWord::Multiply(B0, Q);
02435         DWord u = (DWord) A[0] - p.GetLowHalf();
02436         A[0] = u.GetLowHalf();
02437         u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
02438         A[1] = u.GetLowHalf();
02439         A[2] += u.GetHighHalf();
02440 
02441         // Q <= actual quotient, so fix it
02442         while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
02443         {
02444                 u = (DWord) A[0] - B0;
02445                 A[0] = u.GetLowHalf();
02446                 u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
02447                 A[1] = u.GetLowHalf();
02448                 A[2] += u.GetHighHalf();
02449                 Q++;
02450                 assert(Q);      // shouldn't overflow
02451         }
02452 
02453         return Q;
02454 }
02455 
02456 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
02457 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02458 {
02459         if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
02460         {
02461                 Q[0] = A[2];
02462                 Q[1] = A[3];
02463         }
02464         else
02465         {
02466                 word T[4];
02467                 T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
02468                 Q[1] = SubatomicDivide(T+1, B[0], B[1]);
02469                 Q[0] = SubatomicDivide(T, B[0], B[1]);
02470 
02471 #ifndef NDEBUG
02472                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02473                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02474                 word P[4];
02475                 LowLevel::Multiply2(P, Q, B);
02476                 Add(P, P, T, 4);
02477                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02478 #endif
02479         }
02480 }
02481 */
02482 
02483 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02484 {
02485         word T[4];
02486         DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02487         Q[0] = q.GetLowHalf();
02488         Q[1] = q.GetHighHalf();
02489 
02490 #ifndef NDEBUG
02491         if (B[0] || B[1])
02492         {
02493                 // multiply quotient and divisor and add remainder, make sure it equals dividend
02494                 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02495                 word P[4];
02496                 Portable::Multiply2(P, Q, B);
02497                 Add(P, P, T, 4);
02498                 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02499         }
02500 #endif
02501 }
02502 
02503 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
02504 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02505 {
02506         assert(N && N%2==0);
02507 
02508         if (Q[1])
02509         {
02510                 T[N] = T[N+1] = 0;
02511                 unsigned i;
02512                 for (i=0; i<N; i+=4)
02513                         LowLevel::Multiply2(T+i, Q, B+i);
02514                 for (i=2; i<N; i+=4)
02515                         if (LowLevel::Multiply2Add(T+i, Q, B+i))
02516                                 T[i+5] += (++T[i+4]==0);
02517         }
02518         else
02519         {
02520                 T[N] = LinearMultiply(T, B, Q[0], N);
02521                 T[N+1] = 0;
02522         }
02523 
02524         word borrow = Subtract(R, R, T, N+2);
02525         assert(!borrow && !R[N+1]);
02526 
02527         while (R[N] || Compare(R, B, N) >= 0)
02528         {
02529                 R[N] -= Subtract(R, R, B, N);
02530                 Q[1] += (++Q[0]==0);
02531                 assert(Q[0] || Q[1]); // no overflow
02532         }
02533 }
02534 
02535 // R[NB] -------- remainder = A%B
02536 // Q[NA-NB+2] --- quotient      = A/B
02537 // T[NA+2*NB+4] - temp work space
02538 // A[NA] -------- dividend
02539 // B[NB] -------- divisor
02540 
02541 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02542 {
02543         assert(NA && NB && NA%2==0 && NB%2==0);
02544         assert(B[NB-1] || B[NB-2]);
02545         assert(NB <= NA);
02546 
02547         // set up temporary work space
02548         word *const TA=T;
02549         word *const TB=T+NA+2;
02550         word *const TP=T+NA+2+NB;
02551 
02552         // copy B into TB and normalize it so that TB has highest bit set to 1
02553         unsigned shiftWords = (B[NB-1]==0);
02554         TB[0] = TB[NB-1] = 0;
02555         CopyWords(TB+shiftWords, B, NB-shiftWords);
02556         unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02557         assert(shiftBits < WORD_BITS);
02558         ShiftWordsLeftByBits(TB, NB, shiftBits);
02559 
02560         // copy A into TA and normalize it
02561         TA[0] = TA[NA] = TA[NA+1] = 0;
02562         CopyWords(TA+shiftWords, A, NA);
02563         ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02564 
02565         if (TA[NA+1]==0 && TA[NA] <= 1)
02566         {
02567                 Q[NA-NB+1] = Q[NA-NB] = 0;
02568                 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02569                 {
02570                         TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02571                         ++Q[NA-NB];
02572                 }
02573         }
02574         else
02575         {
02576                 NA+=2;
02577                 assert(Compare(TA+NA-NB, TB, NB) < 0);
02578         }
02579 
02580         word BT[2];
02581         BT[0] = TB[NB-2] + 1;
02582         BT[1] = TB[NB-1] + (BT[0]==0);
02583 
02584         // start reducing TA mod TB, 2 words at a time
02585         for (size_t i=NA-2; i>=NB; i-=2)
02586         {
02587                 AtomicDivide(Q+i-NB, TA+i-2, BT);
02588                 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02589         }
02590 
02591         // copy TA into R, and denormalize it
02592         CopyWords(R, TA+shiftWords, NB);
02593         ShiftWordsRightByBits(R, NB, shiftBits);
02594 }
02595 
02596 static inline size_t EvenWordCount(const word *X, size_t N)
02597 {
02598         while (N && X[N-2]==0 && X[N-1]==0)
02599                 N-=2;
02600         return N;
02601 }
02602 
02603 // return k
02604 // R[N] --- result = A^(-1) * 2^k mod M
02605 // T[4*N] - temporary work space
02606 // A[NA] -- number to take inverse of
02607 // M[N] --- modulus
02608 
02609 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02610 {
02611         assert(NA<=N && N && N%2==0);
02612 
02613         word *b = T;
02614         word *c = T+N;
02615         word *f = T+2*N;
02616         word *g = T+3*N;
02617         size_t bcLen=2, fgLen=EvenWordCount(M, N);
02618         unsigned int k=0, s=0;
02619 
02620         SetWords(T, 0, 3*N);
02621         b[0]=1;
02622         CopyWords(f, A, NA);
02623         CopyWords(g, M, N);
02624 
02625         while (1)
02626         {
02627                 word t=f[0];
02628                 while (!t)
02629                 {
02630                         if (EvenWordCount(f, fgLen)==0)
02631                         {
02632                                 SetWords(R, 0, N);
02633                                 return 0;
02634                         }
02635 
02636                         ShiftWordsRightByWords(f, fgLen, 1);
02637                         if (c[bcLen-1]) bcLen+=2;
02638                         assert(bcLen <= N);
02639                         ShiftWordsLeftByWords(c, bcLen, 1);
02640                         k+=WORD_BITS;
02641                         t=f[0];
02642                 }
02643 
02644                 unsigned int i=0;
02645                 while (t%2 == 0)
02646                 {
02647                         t>>=1;
02648                         i++;
02649                 }
02650                 k+=i;
02651 
02652                 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02653                 {
02654                         if (s%2==0)
02655                                 CopyWords(R, b, N);
02656                         else
02657                                 Subtract(R, M, b, N);
02658                         return k;
02659                 }
02660 
02661                 ShiftWordsRightByBits(f, fgLen, i);
02662                 t=ShiftWordsLeftByBits(c, bcLen, i);
02663                 if (t)
02664                 {
02665                         c[bcLen] = t;
02666                         bcLen+=2;
02667                         assert(bcLen <= N);
02668                 }
02669 
02670                 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02671                         fgLen-=2;
02672 
02673                 if (Compare(f, g, fgLen)==-1)
02674                 {
02675                         std::swap(f, g);
02676                         std::swap(b, c);
02677                         s++;
02678                 }
02679 
02680                 Subtract(f, f, g, fgLen);
02681 
02682                 if (Add(b, b, c, bcLen))
02683                 {
02684                         b[bcLen] = 1;
02685                         bcLen+=2;
02686                         assert(bcLen <= N);
02687                 }
02688         }
02689 }
02690 
02691 // R[N] - result = A/(2^k) mod M
02692 // A[N] - input
02693 // M[N] - modulus
02694 
02695 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02696 {
02697         CopyWords(R, A, N);
02698 
02699         while (k--)
02700         {
02701                 if (R[0]%2==0)
02702                         ShiftWordsRightByBits(R, N, 1);
02703                 else
02704                 {
02705                         word carry = Add(R, R, M, N);
02706                         ShiftWordsRightByBits(R, N, 1);
02707                         R[N-1] += carry<<(WORD_BITS-1);
02708                 }
02709         }
02710 }
02711 
02712 // R[N] - result = A*(2^k) mod M
02713 // A[N] - input
02714 // M[N] - modulus
02715 
02716 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02717 {
02718         CopyWords(R, A, N);
02719 
02720         while (k--)
02721                 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02722                         Subtract(R, R, M, N);
02723 }
02724 
02725 // ******************************************************************
02726 
02727 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02728 
02729 static inline size_t RoundupSize(size_t n)
02730 {
02731         if (n<=8)
02732                 return RoundupSizeTable[n];
02733         else if (n<=16)
02734                 return 16;
02735         else if (n<=32)
02736                 return 32;
02737         else if (n<=64)
02738                 return 64;
02739         else return 1U << BitPrecision(n-1);
02740 }
02741 
02742 Integer::Integer()
02743         : reg(2), sign(POSITIVE)
02744 {
02745         reg[0] = reg[1] = 0;
02746 }
02747 
02748 Integer::Integer(const Integer& t)
02749         : reg(RoundupSize(t.WordCount())), sign(t.sign)
02750 {
02751         CopyWords(reg, t.reg, reg.size());
02752 }
02753 
02754 Integer::Integer(Sign s, lword value)
02755         : reg(2), sign(s)
02756 {
02757         reg[0] = word(value);
02758         reg[1] = word(SafeRightShift<WORD_BITS>(value));
02759 }
02760 
02761 Integer::Integer(signed long value)
02762         : reg(2)
02763 {
02764         if (value >= 0)
02765                 sign = POSITIVE;
02766         else
02767         {
02768                 sign = NEGATIVE;
02769                 value = -value;
02770         }
02771         reg[0] = word(value);
02772         reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02773 }
02774 
02775 Integer::Integer(Sign s, word high, word low)
02776         : reg(2), sign(s)
02777 {
02778         reg[0] = low;
02779         reg[1] = high;
02780 }
02781 
02782 bool Integer::IsConvertableToLong() const
02783 {
02784         if (ByteCount() > sizeof(long))
02785                 return false;
02786 
02787         unsigned long value = reg[0];
02788         value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02789 
02790         if (sign==POSITIVE)
02791                 return (signed long)value >= 0;
02792         else
02793                 return -(signed long)value < 0;
02794 }
02795 
02796 signed long Integer::ConvertToLong() const
02797 {
02798         assert(IsConvertableToLong());
02799 
02800         unsigned long value = reg[0];
02801         value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02802         return sign==POSITIVE ? value : -(signed long)value;
02803 }
02804 
02805 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02806 {
02807         Decode(encodedInteger, byteCount, s);
02808 }
02809 
02810 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02811 {
02812         Decode(encodedInteger, byteCount, s);
02813 }
02814 
02815 Integer::Integer(BufferedTransformation &bt)
02816 {
02817         BERDecode(bt);
02818 }
02819 
02820 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02821 {
02822         Randomize(rng, bitcount);
02823 }
02824 
02825 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02826 {
02827         if (!Randomize(rng, min, max, rnType, equiv, mod))
02828                 throw Integer::RandomNumberNotFound();
02829 }
02830 
02831 Integer Integer::Power2(size_t e)
02832 {
02833         Integer r((word)0, BitsToWords(e+1));
02834         r.SetBit(e);
02835         return r;
02836 }
02837 
02838 template <long i>
02839 struct NewInteger
02840 {
02841         Integer * operator()() const
02842         {
02843                 return new Integer(i);
02844         }
02845 };
02846 
02847 const Integer &Integer::Zero()
02848 {
02849         return Singleton<Integer>().Ref();
02850 }
02851 
02852 const Integer &Integer::One()
02853 {
02854         return Singleton<Integer, NewInteger<1> >().Ref();
02855 }
02856 
02857 const Integer &Integer::Two()
02858 {
02859         return Singleton<Integer, NewInteger<2> >().Ref();
02860 }
02861 
02862 bool Integer::operator!() const
02863 {
02864         return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02865 }
02866 
02867 Integer& Integer::operator=(const Integer& t)
02868 {
02869         if (this != &t)
02870         {
02871                 reg.New(RoundupSize(t.WordCount()));
02872                 CopyWords(reg, t.reg, reg.size());
02873                 sign = t.sign;
02874         }
02875         return *this;
02876 }
02877 
02878 bool Integer::GetBit(size_t n) const
02879 {
02880         if (n/WORD_BITS >= reg.size())
02881                 return 0;
02882         else
02883                 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02884 }
02885 
02886 void Integer::SetBit(size_t n, bool value)
02887 {
02888         if (value)
02889         {
02890                 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02891                 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02892         }
02893         else
02894         {
02895                 if (n/WORD_BITS < reg.size())
02896                         reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02897         }
02898 }
02899 
02900 byte Integer::GetByte(size_t n) const
02901 {
02902         if (n/WORD_SIZE >= reg.size())
02903                 return 0;
02904         else
02905                 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02906 }
02907 
02908 void Integer::SetByte(size_t n, byte value)
02909 {
02910         reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02911         reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02912         reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02913 }
02914 
02915 lword Integer::GetBits(size_t i, size_t n) const
02916 {
02917         lword v = 0;
02918         assert(n <= sizeof(v)*8);
02919         for (unsigned int j=0; j<n; j++)
02920                 v |= lword(GetBit(i+j)) << j;
02921         return v;
02922 }
02923 
02924 Integer Integer::operator-() const
02925 {
02926         Integer result(*this);
02927         result.Negate();
02928         return result;
02929 }
02930 
02931 Integer Integer::AbsoluteValue() const
02932 {
02933         Integer result(*this);
02934         result.sign = POSITIVE;
02935         return result;
02936 }
02937 
02938 void Integer::swap(Integer &a)
02939 {
02940         reg.swap(a.reg);
02941         std::swap(sign, a.sign);
02942 }
02943 
02944 Integer::Integer(word value, size_t length)
02945         : reg(RoundupSize(length)), sign(POSITIVE)
02946 {
02947         reg[0] = value;
02948         SetWords(reg+1, 0, reg.size()-1);
02949 }
02950 
02951 template <class T>
02952 static Integer StringToInteger(const T *str)
02953 {
02954         word radix;
02955         // GCC workaround
02956         // std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
02957         unsigned int length;
02958         for (length = 0; str[length] != 0; length++) {}
02959 
02960         Integer v;
02961 
02962         if (length == 0)
02963                 return v;
02964 
02965         switch (str[length-1])
02966         {
02967         case 'h':
02968         case 'H':
02969                 radix=16;
02970                 break;
02971         case 'o':
02972         case 'O':
02973                 radix=8;
02974                 break;
02975         case 'b':
02976         case 'B':
02977                 radix=2;
02978                 break;
02979         default:
02980                 radix=10;
02981         }
02982 
02983         if (length > 2 && str[0] == '0' && str[1] == 'x')
02984                 radix = 16;
02985 
02986         for (unsigned i=0; i<length; i++)
02987         {
02988                 word digit;
02989 
02990                 if (str[i] >= '0' && str[i] <= '9')
02991                         digit = str[i] - '0';
02992                 else if (str[i] >= 'A' && str[i] <= 'F')
02993                         digit = str[i] - 'A' + 10;
02994                 else if (str[i] >= 'a' && str[i] <= 'f')
02995                         digit = str[i] - 'a' + 10;
02996                 else
02997                         digit = radix;
02998 
02999                 if (digit < radix)
03000                 {
03001                         v *= radix;
03002                         v += digit;
03003                 }
03004         }
03005 
03006         if (str[0] == '-')
03007                 v.Negate();
03008 
03009         return v;
03010 }
03011 
03012 Integer::Integer(const char *str)
03013         : reg(2), sign(POSITIVE)
03014 {
03015         *this = StringToInteger(str);
03016 }
03017 
03018 Integer::Integer(const wchar_t *str)
03019         : reg(2), sign(POSITIVE)
03020 {
03021         *this = StringToInteger(str);
03022 }
03023 
03024 unsigned int Integer::WordCount() const
03025 {
03026         return (unsigned int)CountWords(reg, reg.size());
03027 }
03028 
03029 unsigned int Integer::ByteCount() const
03030 {
03031         unsigned wordCount = WordCount();
03032         if (wordCount)
03033                 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03034         else
03035                 return 0;
03036 }
03037 
03038 unsigned int Integer::BitCount() const
03039 {
03040         unsigned wordCount = WordCount();
03041         if (wordCount)
03042                 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03043         else
03044                 return 0;
03045 }
03046 
03047 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03048 {
03049         StringStore store(input, inputLen);
03050         Decode(store, inputLen, s);
03051 }
03052 
03053 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03054 {
03055         assert(bt.MaxRetrievable() >= inputLen);
03056 
03057         byte b;
03058         bt.Peek(b);
03059         sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03060 
03061         while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03062         {
03063                 bt.Skip(1);
03064                 inputLen--;
03065                 bt.Peek(b);
03066         }
03067 
03068         reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03069 
03070         for (size_t i=inputLen; i > 0; i--)
03071         {
03072                 bt.Get(b);
03073                 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03074         }
03075 
03076         if (sign == NEGATIVE)
03077         {
03078                 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03079                         reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03080                 TwosComplement(reg, reg.size());
03081         }
03082 }
03083 
03084 size_t Integer::MinEncodedSize(Signedness signedness) const
03085 {
03086         unsigned int outputLen = STDMAX(1U, ByteCount());
03087         if (signedness == UNSIGNED)
03088                 return outputLen;
03089         if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03090                 outputLen++;
03091         if (IsNegative() && *this < -Power2(outputLen*8-1))
03092                 outputLen++;
03093         return outputLen;
03094 }
03095 
03096 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03097 {
03098         ArraySink sink(output, outputLen);
03099         Encode(sink, outputLen, signedness);
03100 }
03101 
03102 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03103 {
03104         if (signedness == UNSIGNED || NotNegative())
03105         {
03106                 for (size_t i=outputLen; i > 0; i--)
03107                         bt.Put(GetByte(i-1));
03108         }
03109         else
03110         {
03111                 // take two's complement of *this
03112                 Integer temp = Integer::Power2(8*UnsignedMin(ByteCount(), outputLen)) + *this;
03113                 temp.Encode(bt, outputLen, UNSIGNED);
03114         }
03115 }
03116 
03117 void Integer::DEREncode(BufferedTransformation &bt) const
03118 {
03119         DERGeneralEncoder enc(bt, INTEGER);
03120         Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03121         enc.MessageEnd();
03122 }
03123 
03124 void Integer::BERDecode(const byte *input, size_t len)
03125 {
03126         StringStore store(input, len);
03127         BERDecode(store);
03128 }
03129 
03130 void Integer::BERDecode(BufferedTransformation &bt)
03131 {
03132         BERGeneralDecoder dec(bt, INTEGER);
03133         if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03134                 BERDecodeError();
03135         Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03136         dec.MessageEnd();
03137 }
03138 
03139 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03140 {
03141         DERGeneralEncoder enc(bt, OCTET_STRING);
03142         Encode(enc, length);
03143         enc.MessageEnd();
03144 }
03145 
03146 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03147 {
03148         BERGeneralDecoder dec(bt, OCTET_STRING);
03149         if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03150                 BERDecodeError();
03151         Decode(dec, length);
03152         dec.MessageEnd();
03153 }
03154 
03155 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03156 {
03157         ArraySink sink(output, len);
03158         return OpenPGPEncode(sink);
03159 }
03160 
03161 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03162 {
03163         word16 bitCount = BitCount();
03164         bt.PutWord16(bitCount);
03165         size_t byteCount = BitsToBytes(bitCount);
03166         Encode(bt, byteCount);
03167         return 2 + byteCount;
03168 }
03169 
03170 void Integer::OpenPGPDecode(const byte *input, size_t len)
03171 {
03172         StringStore store(input, len);
03173         OpenPGPDecode(store);
03174 }
03175 
03176 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03177 {
03178         word16 bitCount;
03179         if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03180                 throw OpenPGPDecodeErr();
03181         Decode(bt, BitsToBytes(bitCount));
03182 }
03183 
03184 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03185 {
03186         const size_t nbytes = nbits/8 + 1;
03187         SecByteBlock buf(nbytes);
03188         rng.GenerateBlock(buf, nbytes);
03189         if (nbytes)
03190                 buf[0] = (byte)Crop(buf[0], nbits % 8);
03191         Decode(buf, nbytes, UNSIGNED);
03192 }
03193 
03194 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03195 {
03196         if (min > max)
03197                 throw InvalidArgument("Integer: Min must be no greater than Max");
03198 
03199         Integer range = max - min;
03200         const unsigned int nbits = range.BitCount();
03201 
03202         do
03203         {
03204                 Randomize(rng, nbits);
03205         }
03206         while (*this > range);
03207 
03208         *this += min;
03209 }
03210 
03211 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03212 {
03213         return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03214 }
03215 
03216 class KDF2_RNG : public RandomNumberGenerator
03217 {
03218 public:
03219         KDF2_RNG(const byte *seed, size_t seedSize)
03220                 : m_counter(0), m_counterAndSeed(seedSize + 4)
03221         {
03222                 memcpy(m_counterAndSeed + 4, seed, seedSize);
03223         }
03224 
03225         byte GenerateByte()
03226         {
03227                 byte b;
03228                 GenerateBlock(&b, 1);
03229                 return b;
03230         }
03231 
03232         void GenerateBlock(byte *output, unsigned int size)
03233         {
03234                 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03235                 ++m_counter;
03236                 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03237         }
03238 
03239 private:
03240         word32 m_counter;
03241         SecByteBlock m_counterAndSeed;
03242 };
03243 
03244 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
03245 {
03246         Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03247         Integer max;
03248         if (!params.GetValue("Max", max))
03249         {
03250                 int bitLength;
03251                 if (params.GetIntValue("BitLength", bitLength))
03252                         max = Integer::Power2(bitLength);
03253                 else
03254                         throw InvalidArgument("Integer: missing Max argument");
03255         }
03256         if (min > max)
03257                 throw InvalidArgument("Integer: Min must be no greater than Max");
03258 
03259         Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03260         Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03261 
03262         if (equiv.IsNegative() || equiv >= mod)
03263                 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03264 
03265         Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03266 
03267         member_ptr<KDF2_RNG> kdf2Rng;
03268         ConstByteArrayParameter seed;
03269         if (params.GetValue("Seed", seed))
03270         {
03271                 ByteQueue bq;
03272                 DERSequenceEncoder seq(bq);
03273                 min.DEREncode(seq);
03274                 max.DEREncode(seq);
03275                 equiv.DEREncode(seq);
03276                 mod.DEREncode(seq);
03277                 DEREncodeUnsigned(seq, rnType);
03278                 DEREncodeOctetString(seq, seed.begin(), seed.size());
03279                 seq.MessageEnd();
03280 
03281                 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03282                 bq.Get(finalSeed, finalSeed.size());
03283                 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03284         }
03285         RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03286 
03287         switch (rnType)
03288         {
03289                 case ANY:
03290                         if (mod == One())
03291                                 Randomize(rng, min, max);
03292                         else
03293                         {
03294                                 Integer min1 = min + (equiv-min)%mod;
03295                                 if (max < min1)
03296                                         return false;
03297                                 Randomize(rng, Zero(), (max - min1) / mod);
03298                                 *this *= mod;
03299                                 *this += min1;
03300                         }
03301                         return true;
03302 
03303                 case PRIME:
03304                 {
03305                         const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03306 
03307                         int i;
03308                         i = 0;
03309                         while (1)
03310                         {
03311                                 if (++i==16)
03312                                 {
03313                                         // check if there are any suitable primes in [min, max]
03314                                         Integer first = min;
03315                                         if (FirstPrime(first, max, equiv, mod, pSelector))
03316                                         {
03317                                                 // if there is only one suitable prime, we're done
03318                                                 *this = first;
03319                                                 if (!FirstPrime(first, max, equiv, mod, pSelector))
03320                                                         return true;
03321                                         }
03322                                         else
03323                                                 return false;
03324                                 }
03325 
03326                                 Randomize(rng, min, max);
03327                                 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03328                                         return true;
03329                         }
03330                 }
03331 
03332                 default:
03333                         throw InvalidArgument("Integer: invalid RandomNumberType argument");
03334         }
03335 }
03336 
03337 std::istream& operator>>(std::istream& in, Integer &a)
03338 {
03339         char c;
03340         unsigned int length = 0;
03341         SecBlock<char> str(length + 16);
03342 
03343         std::ws(in);
03344 
03345         do
03346         {
03347                 in.read(&c, 1);
03348                 str[length++] = c;
03349                 if (length >= str.size())
03350                         str.Grow(length + 16);
03351         }
03352         while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03353 
03354         if (in.gcount())
03355                 in.putback(c);
03356         str[length-1] = '\0';
03357         a = Integer(str);
03358 
03359         return in;
03360 }
03361 
03362 std::ostream& operator<<(std::ostream& out, const Integer &a)
03363 {
03364         // Get relevant conversion specifications from ostream.
03365         long f = out.flags() & std::ios::basefield; // Get base digits.
03366         int base, block;
03367         char suffix;
03368         switch(f)
03369         {
03370         case std::ios::oct :
03371                 base = 8;
03372                 block = 8;
03373                 suffix = 'o';
03374                 break;
03375         case std::ios::hex :
03376                 base = 16;
03377                 block = 4;
03378                 suffix = 'h';
03379                 break;
03380         default :
03381                 base = 10;
03382                 block = 3;
03383                 suffix = '.';
03384         }
03385 
03386         SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03387         Integer temp1=a, temp2;
03388         unsigned i=0;
03389         const char vec[]="0123456789ABCDEF";
03390 
03391         if (a.IsNegative())
03392         {
03393                 out << '-';
03394                 temp1.Negate();
03395         }
03396 
03397         if (!a)
03398                 out << '0';
03399 
03400         while (!!temp1)
03401         {
03402                 word digit;
03403                 Integer::Divide(digit, temp2, temp1, base);
03404                 s[i++]=vec[digit];
03405                 temp1=temp2;
03406         }
03407 
03408         while (i--)
03409         {
03410                 out << s[i];
03411 //              if (i && !(i%block))
03412 //                      out << ",";
03413         }
03414         return out << suffix;
03415 }
03416 
03417 Integer& Integer::operator++()
03418 {
03419         if (NotNegative())
03420         {
03421                 if (Increment(reg, reg.size()))
03422                 {
03423                         reg.CleanGrow(2*reg.size());
03424                         reg[reg.size()/2]=1;
03425                 }
03426         }
03427         else
03428         {
03429                 word borrow = Decrement(reg, reg.size());
03430                 assert(!borrow);
03431                 if (WordCount()==0)
03432                         *this = Zero();
03433         }
03434         return *this;
03435 }
03436 
03437 Integer& Integer::operator--()
03438 {
03439         if (IsNegative())
03440         {
03441                 if (Increment(reg, reg.size()))
03442                 {
03443                         reg.CleanGrow(2*reg.size());
03444                         reg[reg.size()/2]=1;
03445                 }
03446         }
03447         else
03448         {
03449                 if (Decrement(reg, reg.size()))
03450                         *this = -One();
03451         }
03452         return *this;
03453 }
03454 
03455 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03456 {
03457         word carry;
03458         if (a.reg.size() == b.reg.size())
03459                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03460         else if (a.reg.size() > b.reg.size())
03461         {
03462                 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03463                 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03464                 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03465         }
03466         else
03467         {
03468                 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03469                 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03470                 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03471         }
03472 
03473         if (carry)
03474         {
03475                 sum.reg.CleanGrow(2*sum.reg.size());
03476                 sum.reg[sum.reg.size()/2] = 1;
03477         }
03478         sum.sign = Integer::POSITIVE;
03479 }
03480 
03481 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03482 {
03483         unsigned aSize = a.WordCount();
03484         aSize += aSize%2;
03485         unsigned bSize = b.WordCount();
03486         bSize += bSize%2;
03487 
03488         if (aSize == bSize)
03489         {
03490                 if (Compare(a.reg, b.reg, aSize) >= 0)
03491                 {
03492                         Subtract(diff.reg, a.reg, b.reg, aSize);
03493                         diff.sign = Integer::POSITIVE;
03494                 }
03495                 else
03496                 {
03497                         Subtract(diff.reg, b.reg, a.reg, aSize);
03498                         diff.sign = Integer::NEGATIVE;
03499                 }
03500         }
03501         else if (aSize > bSize)
03502         {
03503                 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03504                 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03505                 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03506                 assert(!borrow);
03507                 diff.sign = Integer::POSITIVE;
03508         }
03509         else
03510         {
03511                 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03512                 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03513                 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03514                 assert(!borrow);
03515                 diff.sign = Integer::NEGATIVE;
03516         }
03517 }
03518 
03519 // MSVC .NET 2003 workaround
03520 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03521 {
03522         return a < b ? b : a;
03523 }
03524 
03525 Integer Integer::Plus(const Integer& b) const
03526 {
03527         Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03528         if (NotNegative())
03529         {
03530                 if (b.NotNegative())
03531                         PositiveAdd(sum, *this, b);
03532                 else
03533                         PositiveSubtract(sum, *this, b);
03534         }
03535         else
03536         {
03537                 if (b.NotNegative())
03538                         PositiveSubtract(sum, b, *this);
03539                 else
03540                 {
03541                         PositiveAdd(sum, *this, b);
03542                         sum.sign = Integer::NEGATIVE;
03543                 }
03544         }
03545         return sum;
03546 }
03547 
03548 Integer& Integer::operator+=(const Integer& t)
03549 {
03550         reg.CleanGrow(t.reg.size());
03551         if (NotNegative())
03552         {
03553                 if (t.NotNegative())
03554                         PositiveAdd(*this, *this, t);
03555                 else
03556                         PositiveSubtract(*this, *this, t);
03557         }
03558         else
03559         {
03560                 if (t.NotNegative())
03561                         PositiveSubtract(*this, t, *this);
03562                 else
03563                 {
03564                         PositiveAdd(*this, *this, t);
03565                         sign = Integer::NEGATIVE;
03566                 }
03567         }
03568         return *this;
03569 }
03570 
03571 Integer Integer::Minus(const Integer& b) const
03572 {
03573         Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03574         if (NotNegative())
03575         {
03576                 if (b.NotNegative())
03577                         PositiveSubtract(diff, *this, b);
03578                 else
03579                         PositiveAdd(diff, *this, b);
03580         }
03581         else
03582         {
03583                 if (b.NotNegative())
03584                 {
03585                         PositiveAdd(diff, *this, b);
03586                         diff.sign = Integer::NEGATIVE;
03587                 }
03588                 else
03589                         PositiveSubtract(diff, b, *this);
03590         }
03591         return diff;
03592 }
03593 
03594 Integer& Integer::operator-=(const Integer& t)
03595 {
03596         reg.CleanGrow(t.reg.size());
03597         if (NotNegative())
03598         {
03599                 if (t.NotNegative())
03600                         PositiveSubtract(*this, *this, t);
03601                 else
03602                         PositiveAdd(*this, *this, t);
03603         }
03604         else
03605         {
03606                 if (t.NotNegative())
03607                 {
03608                         PositiveAdd(*this, *this, t);
03609                         sign = Integer::NEGATIVE;
03610                 }
03611                 else
03612                         PositiveSubtract(*this, t, *this);
03613         }
03614         return *this;
03615 }
03616 
03617 Integer& Integer::operator<<=(size_t n)
03618 {
03619         const size_t wordCount = WordCount();
03620         const size_t shiftWords = n / WORD_BITS;
03621         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03622 
03623         reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03624         ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03625         ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03626         return *this;
03627 }
03628 
03629 Integer& Integer::operator>>=(size_t n)
03630 {
03631         const size_t wordCount = WordCount();
03632         const size_t shiftWords = n / WORD_BITS;
03633         const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03634 
03635         ShiftWordsRightByWords(reg, wordCount, shiftWords);
03636         if (wordCount > shiftWords)
03637                 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03638         if (IsNegative() && WordCount()==0)   // avoid -0
03639                 *this = Zero();
03640         return *this;
03641 }
03642 
03643 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03644 {
03645         size_t aSize = RoundupSize(a.WordCount());
03646         size_t bSize = RoundupSize(b.WordCount());
03647 
03648         product.reg.CleanNew(RoundupSize(aSize+bSize));
03649         product.sign = Integer::POSITIVE;
03650 
03651         SecAlignedWordBlock workspace(aSize + bSize);
03652         AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03653 }
03654 
03655 void Multiply(Integer &product, const Integer &a, const Integer &b)
03656 {
03657         PositiveMultiply(product, a, b);
03658 
03659         if (a.NotNegative() != b.NotNegative())
03660                 product.Negate();
03661 }
03662 
03663 Integer Integer::Times(const Integer &b) const
03664 {
03665         Integer product;
03666         Multiply(product, *this, b);
03667         return product;
03668 }
03669 
03670 /*
03671 void PositiveDivide(Integer &remainder, Integer &quotient,
03672                                    const Integer &dividend, const Integer &divisor)
03673 {
03674         remainder.reg.CleanNew(divisor.reg.size());
03675         remainder.sign = Integer::POSITIVE;
03676         quotient.reg.New(0);
03677         quotient.sign = Integer::POSITIVE;
03678         unsigned i=dividend.BitCount();
03679         while (i--)
03680         {
03681                 word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
03682                 remainder.reg[0] |= dividend[i];
03683                 if (overflow || remainder >= divisor)
03684                 {
03685                         Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
03686                         quotient.SetBit(i);
03687                 }
03688         }
03689 }
03690 */
03691 
03692 void PositiveDivide(Integer &remainder, Integer &quotient,
03693                                    const Integer &a, const Integer &b)
03694 {
03695         unsigned aSize = a.WordCount();
03696         unsigned bSize = b.WordCount();
03697 
03698         if (!bSize)
03699                 throw Integer::DivideByZero();
03700 
03701         if (a.PositiveCompare(b) == -1)
03702         {
03703                 remainder = a;
03704                 remainder.sign = Integer::POSITIVE;
03705                 quotient = Integer::Zero();
03706                 return;
03707         }
03708 
03709         aSize += aSize%2;       // round up to next even number
03710         bSize += bSize%2;
03711 
03712         remainder.reg.CleanNew(RoundupSize(bSize));
03713         remainder.sign = Integer::POSITIVE;
03714         quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03715         quotient.sign = Integer::POSITIVE;
03716 
03717         SecAlignedWordBlock T(aSize+2*bSize+4);
03718         Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03719 }
03720 
03721 void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
03722 {
03723         PositiveDivide(remainder, quotient, dividend, divisor);
03724 
03725         if (dividend.IsNegative())
03726         {
03727                 quotient.Negate();
03728                 if (remainder.NotZero())
03729                 {
03730                         --quotient;
03731                         remainder = divisor.AbsoluteValue() - remainder;
03732                 }
03733         }
03734 
03735         if (divisor.IsNegative())
03736                 quotient.Negate();
03737 }
03738 
03739 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03740 {
03741         q = a;
03742         q >>= n;
03743 
03744         const size_t wordCount = BitsToWords(n);
03745         if (wordCount <= a.WordCount())
03746         {
03747                 r.reg.resize(RoundupSize(wordCount));
03748                 CopyWords(r.reg, a.reg, wordCount);
03749                 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03750                 if (n % WORD_BITS != 0)
03751                         r.reg[wordCount-1] %= (1 << (n % WORD_BITS));
03752         }
03753         else
03754         {
03755                 r.reg.resize(RoundupSize(a.WordCount()));
03756                 CopyWords(r.reg, a.reg, r.reg.size());
03757         }
03758         r.sign = POSITIVE;
03759 
03760         if (a.IsNegative() && r.NotZero())
03761         {
03762                 --q;
03763                 r = Power2(n) - r;
03764         }
03765 }
03766 
03767 Integer Integer::DividedBy(const Integer &b) const
03768 {
03769         Integer remainder, quotient;
03770         Integer::Divide(remainder, quotient, *this, b);
03771         return quotient;
03772 }
03773 
03774 Integer Integer::Modulo(const Integer &b) const
03775 {
03776         Integer remainder, quotient;
03777         Integer::Divide(remainder, quotient, *this, b);
03778         return remainder;
03779 }
03780 
03781 void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
03782 {
03783         if (!divisor)
03784                 throw Integer::DivideByZero();
03785 
03786         assert(divisor);
03787 
03788         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03789         {
03790                 quotient = dividend >> (BitPrecision(divisor)-1);
03791                 remainder = dividend.reg[0] & (divisor-1);
03792                 return;
03793         }
03794 
03795         unsigned int i = dividend.WordCount();
03796         quotient.reg.CleanNew(RoundupSize(i));
03797         remainder = 0;
03798         while (i--)
03799         {
03800                 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03801                 remainder = DWord(dividend.reg[i], remainder) % divisor;
03802         }
03803 
03804         if (dividend.NotNegative())
03805                 quotient.sign = POSITIVE;
03806         else
03807         {
03808                 quotient.sign = NEGATIVE;
03809                 if (remainder)
03810                 {
03811                         --quotient;
03812                         remainder = divisor - remainder;
03813                 }
03814         }
03815 }
03816 
03817 Integer Integer::DividedBy(word b) const
03818 {
03819         word remainder;
03820         Integer quotient;
03821         Integer::Divide(remainder, quotient, *this, b);
03822         return quotient;
03823 }
03824 
03825 word Integer::Modulo(word divisor) const
03826 {
03827         if (!divisor)
03828                 throw Integer::DivideByZero();
03829 
03830         assert(divisor);
03831 
03832         word remainder;
03833 
03834         if ((divisor & (divisor-1)) == 0)       // divisor is a power of 2
03835                 remainder = reg[0] & (divisor-1);
03836         else
03837         {
03838                 unsigned int i = WordCount();
03839 
03840                 if (divisor <= 5)
03841                 {
03842                         DWord sum(0, 0);
03843                         while (i--)
03844                                 sum += reg[i];
03845                         remainder = sum % divisor;
03846                 }
03847                 else
03848                 {
03849                         remainder = 0;
03850                         while (i--)
03851                                 remainder = DWord(reg[i], remainder) % divisor;
03852                 }
03853         }
03854 
03855         if (IsNegative() && remainder)
03856                 remainder = divisor - remainder;
03857 
03858         return remainder;
03859 }
03860 
03861 void Integer::Negate()
03862 {
03863         if (!!(*this))  // don't flip sign if *this==0
03864                 sign = Sign(1-sign);
03865 }
03866 
03867 int Integer::PositiveCompare(const Integer& t) const
03868 {
03869         unsigned size = WordCount(), tSize = t.WordCount();
03870 
03871         if (size == tSize)
03872                 return CryptoPP::Compare(reg, t.reg, size);
03873         else
03874                 return size > tSize ? 1 : -1;
03875 }
03876 
03877 int Integer::Compare(const Integer& t) const
03878 {
03879         if (NotNegative())
03880         {
03881                 if (t.NotNegative())
03882                         return PositiveCompare(t);
03883                 else
03884                         return 1;
03885         }
03886         else
03887         {
03888                 if (t.NotNegative())
03889                         return -1;
03890                 else
03891                         return -PositiveCompare(t);
03892         }
03893 }
03894 
03895 Integer Integer::SquareRoot() const
03896 {
03897         if (!IsPositive())
03898                 return Zero();
03899 
03900         // overestimate square root
03901         Integer x, y = Power2((BitCount()+1)/2);
03902         assert(y*y >= *this);
03903 
03904         do
03905         {
03906                 x = y;
03907                 y = (x + *this/x) >> 1;
03908         } while (y<x);
03909 
03910         return x;
03911 }
03912 
03913 bool Integer::IsSquare() const
03914 {
03915         Integer r = SquareRoot();
03916         return *this == r.Squared();
03917 }
03918 
03919 bool Integer::IsUnit() const
03920 {
03921         return (WordCount() == 1) && (reg[0] == 1);
03922 }
03923 
03924 Integer Integer::MultiplicativeInverse() const
03925 {
03926         return IsUnit() ? *this : Zero();
03927 }
03928 
03929 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03930 {
03931         return x*y%m;
03932 }
03933 
03934 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03935 {
03936         ModularArithmetic mr(m);
03937         return mr.Exponentiate(x, e);
03938 }
03939 
03940 Integer Integer::Gcd(const Integer &a, const Integer &b)
03941 {
03942         return EuclideanDomainOf<Integer>().Gcd(a, b);
03943 }
03944 
03945 Integer Integer::InverseMod(const Integer &m) const
03946 {
03947         assert(m.NotNegative());
03948 
03949         if (IsNegative() || *this>=m)
03950                 return (*this%m).InverseMod(m);
03951 
03952         if (m.IsEven())
03953         {
03954                 if (!m || IsEven())
03955                         return Zero();  // no inverse
03956                 if (*this == One())
03957                         return One();
03958 
03959                 Integer u = m.InverseMod(*this);
03960                 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03961         }
03962 
03963         SecBlock<word> T(m.reg.size() * 4);
03964         Integer r((word)0, m.reg.size());
03965         unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03966         DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03967         return r;
03968 }
03969 
03970 word Integer::InverseMod(const word mod) const
03971 {
03972         word g0 = mod, g1 = *this % mod;
03973         word v0 = 0, v1 = 1;
03974         word y;
03975 
03976         while (g1)
03977         {
03978                 if (g1 == 1)
03979                         return v1;
03980                 y = g0 / g1;
03981                 g0 = g0 % g1;
03982                 v0 += y * v1;
03983 
03984                 if (!g0)
03985                         break;
03986                 if (g0 == 1)
03987                         return mod-v0;
03988                 y = g1 / g0;
03989                 g1 = g1 % g0;
03990                 v1 += y * v0;
03991         }
03992         return 0;
03993 }
03994 
03995 // ********************************************************
03996 
03997 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
03998 {
03999         BERSequenceDecoder seq(bt);
04000         OID oid(seq);
04001         if (oid != ASN1::prime_field())
04002                 BERDecodeError();
04003         m_modulus.BERDecode(seq);
04004         seq.MessageEnd();
04005         m_result.reg.resize(m_modulus.reg.size());
04006 }
04007 
04008 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04009 {
04010         DERSequenceEncoder seq(bt);
04011         ASN1::prime_field().DEREncode(seq);
04012         m_modulus.DEREncode(seq);
04013         seq.MessageEnd();
04014 }
04015 
04016 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04017 {
04018         a.DEREncodeAsOctetString(out, MaxElementByteLength());
04019 }
04020 
04021 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04022 {
04023         a.BERDecodeAsOctetString(in, MaxElementByteLength());
04024 }
04025 
04026 const Integer& ModularArithmetic::Half(const Integer &a) const
04027 {
04028         if (a.reg.size()==m_modulus.reg.size())
04029         {
04030                 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04031                 return m_result;
04032         }
04033         else
04034                 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04035 }
04036 
04037 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04038 {
04039         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04040         {
04041                 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04042                         || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04043                 {
04044                         CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04045                 }
04046                 return m_result;
04047         }
04048         else
04049         {
04050                 m_result1 = a+b;
04051                 if (m_result1 >= m_modulus)
04052                         m_result1 -= m_modulus;
04053                 return m_result1;
04054         }
04055 }
04056 
04057 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04058 {
04059         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04060         {
04061                 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04062                         || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04063                 {
04064                         CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04065                 }
04066         }
04067         else
04068         {
04069                 a+=b;
04070                 if (a>=m_modulus)
04071                         a-=m_modulus;
04072         }
04073 
04074         return a;
04075 }
04076 
04077 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04078 {
04079         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04080         {
04081                 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04082                         CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04083                 return m_result;
04084         }
04085         else
04086         {
04087                 m_result1 = a-b;
04088                 if (m_result1.IsNegative())
04089                         m_result1 += m_modulus;
04090                 return m_result1;
04091         }
04092 }
04093 
04094 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04095 {
04096         if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04097         {
04098                 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04099                         CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04100         }
04101         else
04102         {
04103                 a-=b;
04104                 if (a.IsNegative())
04105                         a+=m_modulus;
04106         }
04107 
04108         return a;
04109 }
04110 
04111 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04112 {
04113         if (!a)
04114                 return a;
04115 
04116         CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04117         if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04118                 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04119 
04120         return m_result;
04121 }
04122 
04123 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04124 {
04125         if (m_modulus.IsOdd())
04126         {
04127                 MontgomeryRepresentation dr(m_modulus);
04128                 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04129         }
04130         else
04131                 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04132 }
04133 
04134 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04135 {
04136         if (m_modulus.IsOdd())
04137         {
04138                 MontgomeryRepresentation dr(m_modulus);
04139                 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04140                 for (unsigned int i=0; i<exponentsCount; i++)
04141                         results[i] = dr.ConvertOut(results[i]);
04142         }
04143         else
04144                 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04145 }
04146 
04147 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)    // modulus must be odd
04148         : ModularArithmetic(m),
04149           m_u((word)0, m_modulus.reg.size()),
04150           m_workspace(5*m_modulus.reg.size())
04151 {
04152         if (!m_modulus.IsOdd())
04153                 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04154 
04155         RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04156 }
04157 
04158 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04159 {
04160         word *const T = m_workspace.begin();
04161         word *const R = m_result.reg.begin();
04162         const size_t N = m_modulus.reg.size();
04163         assert(a.reg.size()<=N && b.reg.size()<=N);
04164 
04165         AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04166         SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04167         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04168         return m_result;
04169 }
04170 
04171 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04172 {
04173         word *const T = m_workspace.begin();
04174         word *const R = m_result.reg.begin();
04175         const size_t N = m_modulus.reg.size();
04176         assert(a.reg.size()<=N);
04177 
04178         CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04179         SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04180         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04181         return m_result;
04182 }
04183 
04184 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04185 {
04186         word *const T = m_workspace.begin();
04187         word *const R = m_result.reg.begin();
04188         const size_t N = m_modulus.reg.size();
04189         assert(a.reg.size()<=N);
04190 
04191         CopyWords(T, a.reg, a.reg.size());
04192         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04193         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04194         return m_result;
04195 }
04196 
04197 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04198 {
04199 //        return (EuclideanMultiplicativeInverse(a, modulus)<<(2*WORD_BITS*modulus.reg.size()))%modulus;
04200         word *const T = m_workspace.begin();
04201         word *const R = m_result.reg.begin();
04202         const size_t N = m_modulus.reg.size();
04203         assert(a.reg.size()<=N);
04204 
04205         CopyWords(T, a.reg, a.reg.size());
04206         SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04207         MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04208         unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04209 
04210 //      cout << "k=" << k << " N*32=" << 32*N << endl;
04211 
04212         if (k>N*WORD_BITS)
04213                 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04214         else
04215                 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04216 
04217         return m_result;
04218 }
04219 
04220 NAMESPACE_END
04221 
04222 #endif

Generated on Tue Aug 16 08:38:42 2005 for Crypto++ by  doxygen 1.3.9.1