00001
00002
00003
00004 #include "pch.h"
00005
00006 #ifndef CRYPTOPP_IMPORTS
00007
00008 #include "integer.h"
00009 #include "modarith.h"
00010 #include "nbtheory.h"
00011 #include "asn.h"
00012 #include "oids.h"
00013 #include "words.h"
00014 #include "algparam.h"
00015 #include "pubkey.h"
00016 #include "sha.h"
00017
00018 #include <iostream>
00019
00020 #ifdef _M_X64
00021 #include <Intrin.h>
00022 #endif
00023
00024 #ifdef SSE2_INTRINSICS_AVAILABLE
00025 #ifdef __GNUC__
00026 #include <xmmintrin.h>
00027 #include <signal.h>
00028 #include <setjmp.h>
00029 #ifdef CRYPTOPP_MEMALIGN_AVAILABLE
00030 #include <malloc.h>
00031 #else
00032 #include <stdlib.h>
00033 #endif
00034 #else
00035 #include <emmintrin.h>
00036 #endif
00037 #elif defined(_MSC_VER) && defined(_M_IX86)
00038 #pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 intrinsics will be disabled.")
00039 #elif defined(__GNUC__) && defined(__i386__)
00040 #warning "You do not have GCC 3.3 or later, or did not specify -msse2 compiler option, so use of SSE2 intrinsics will be disabled."
00041 #endif
00042
00043 NAMESPACE_BEGIN(CryptoPP)
00044
00045 bool AssignIntToInteger(const std::type_info &valueType, void *pInteger, const void *pInt)
00046 {
00047 if (valueType != typeid(Integer))
00048 return false;
00049 *reinterpret_cast<Integer *>(pInteger) = *reinterpret_cast<const int *>(pInt);
00050 return true;
00051 }
00052
00053 static const char s_RunAtStartup = (g_pAssignIntToInteger = AssignIntToInteger, 0);
00054
00055 #ifdef SSE2_INTRINSICS_AVAILABLE
00056 template <class T>
00057 CPP_TYPENAME AllocatorBase<T>::pointer AlignedAllocator<T>::allocate(size_type n, const void *)
00058 {
00059 CheckSize(n);
00060 if (n == 0)
00061 return NULL;
00062 if (n >= 4)
00063 {
00064 void *p;
00065 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00066 while (!(p = _mm_malloc(sizeof(T)*n, 16)))
00067 #elif defined(CRYPTOPP_MEMALIGN_AVAILABLE)
00068 while (!(p = memalign(16, sizeof(T)*n)))
00069 #elif defined(CRYPTOPP_MALLOC_ALIGNMENT_IS_16)
00070 while (!(p = malloc(sizeof(T)*n)))
00071 #else
00072 while (!(p = (byte *)malloc(sizeof(T)*n + 8)))
00073 #endif
00074 CallNewHandler();
00075
00076 #ifdef CRYPTOPP_NO_ALIGNED_ALLOC
00077 assert(m_pBlock == NULL);
00078 m_pBlock = p;
00079 if (!IsAlignedOn(p, 16))
00080 {
00081 assert(IsAlignedOn(p, 8));
00082 p = (byte *)p + 8;
00083 }
00084 #endif
00085
00086 assert(IsAlignedOn(p, 16));
00087 return (T*)p;
00088 }
00089 return new T[n];
00090 }
00091
00092 template <class T>
00093 void AlignedAllocator<T>::deallocate(void *p, size_type n)
00094 {
00095 memset(p, 0, n*sizeof(T));
00096 if (n >= 4)
00097 {
00098 #ifdef CRYPTOPP_MM_MALLOC_AVAILABLE
00099 _mm_free(p);
00100 #elif defined(CRYPTOPP_NO_ALIGNED_ALLOC)
00101 assert(m_pBlock == p || (byte *)m_pBlock+8 == p);
00102 free(m_pBlock);
00103 m_pBlock = NULL;
00104 #else
00105 free(p);
00106 #endif
00107 }
00108 else
00109 delete [] (T *)p;
00110 }
00111 #endif
00112
00113 static int Compare(const word *A, const word *B, size_t N)
00114 {
00115 while (N--)
00116 if (A[N] > B[N])
00117 return 1;
00118 else if (A[N] < B[N])
00119 return -1;
00120
00121 return 0;
00122 }
00123
00124 static word Increment(word *A, size_t N, word B=1)
00125 {
00126 assert(N);
00127 word t = A[0];
00128 A[0] = t+B;
00129 if (A[0] >= t)
00130 return 0;
00131 for (unsigned i=1; i<N; i++)
00132 if (++A[i])
00133 return 0;
00134 return 1;
00135 }
00136
00137 static word Decrement(word *A, size_t N, word B=1)
00138 {
00139 assert(N);
00140 word t = A[0];
00141 A[0] = t-B;
00142 if (A[0] <= t)
00143 return 0;
00144 for (unsigned i=1; i<N; i++)
00145 if (A[i]--)
00146 return 0;
00147 return 1;
00148 }
00149
00150 static void TwosComplement(word *A, size_t N)
00151 {
00152 Decrement(A, N);
00153 for (unsigned i=0; i<N; i++)
00154 A[i] = ~A[i];
00155 }
00156
00157 static word AtomicInverseModPower2(word A)
00158 {
00159 assert(A%2==1);
00160
00161 word R=A%8;
00162
00163 for (unsigned i=3; i<WORD_BITS; i*=2)
00164 R = R*(2-R*A);
00165
00166 assert(R*A==1);
00167 return R;
00168 }
00169
00170
00171
00172 class DWord
00173 {
00174 public:
00175 DWord() {}
00176
00177 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00178 explicit DWord(word low)
00179 {
00180 m_whole = low;
00181 }
00182 #else
00183 explicit DWord(word low)
00184 {
00185 m_halfs.low = low;
00186 m_halfs.high = 0;
00187 }
00188 #endif
00189
00190 DWord(word low, word high)
00191 {
00192 m_halfs.low = low;
00193 m_halfs.high = high;
00194 }
00195
00196 static DWord Multiply(word a, word b)
00197 {
00198 DWord r;
00199 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00200 r.m_whole = (dword)a * b;
00201 #elif defined(__alpha__)
00202 r.m_halfs.low = a*b; __asm__("umulh %1,%2,%0" : "=r" (r.m_halfs.high) : "r" (a), "r" (b));
00203 #elif defined(__ia64__)
00204 r.m_halfs.low = a*b; __asm__("xmpy.hu %0=%1,%2" : "=f" (r.m_halfs.high) : "f" (a), "f" (b));
00205 #elif defined(_ARCH_PPC64)
00206 r.m_halfs.low = a*b; __asm__("mulhdu %0,%1,%2" : "=r" (r.m_halfs.high) : "r" (a), "r" (b) : "cc");
00207 #elif defined(__x86_64__)
00208 __asm__("mulq %3" : "=d" (r.m_halfs.high), "=a" (r.m_halfs.low) : "a" (a), "rm" (b) : "cc");
00209 #elif defined(__mips64)
00210 __asm__("dmultu %2,%3" : "=h" (r.m_halfs.high), "=l" (r.m_halfs.low) : "r" (a), "r" (b));
00211 #elif defined(_M_X64)
00212 r.m_halfs.low = _umul128(a, b, &r.m_halfs.high);
00213 #elif defined(_M_IX86)
00214
00215 word64 t = (word64)a * b;
00216 r.m_halfs.high = ((word32 *)(&t))[1];
00217 r.m_halfs.low = (word32)t;
00218 #else
00219 #error can not implement DWord
00220 #endif
00221 return r;
00222 }
00223
00224 static DWord MultiplyAndAdd(word a, word b, word c)
00225 {
00226 DWord r = Multiply(a, b);
00227 return r += c;
00228 }
00229
00230 DWord & operator+=(word a)
00231 {
00232 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00233 m_whole = m_whole + a;
00234 #else
00235 m_halfs.low += a;
00236 m_halfs.high += (m_halfs.low < a);
00237 #endif
00238 return *this;
00239 }
00240
00241 DWord operator+(word a)
00242 {
00243 DWord r;
00244 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00245 r.m_whole = m_whole + a;
00246 #else
00247 r.m_halfs.low = m_halfs.low + a;
00248 r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
00249 #endif
00250 return r;
00251 }
00252
00253 DWord operator-(DWord a)
00254 {
00255 DWord r;
00256 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00257 r.m_whole = m_whole - a.m_whole;
00258 #else
00259 r.m_halfs.low = m_halfs.low - a.m_halfs.low;
00260 r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
00261 #endif
00262 return r;
00263 }
00264
00265 DWord operator-(word a)
00266 {
00267 DWord r;
00268 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00269 r.m_whole = m_whole - a;
00270 #else
00271 r.m_halfs.low = m_halfs.low - a;
00272 r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
00273 #endif
00274 return r;
00275 }
00276
00277
00278 word operator/(word divisor);
00279
00280 word operator%(word a);
00281
00282 bool operator!() const
00283 {
00284 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00285 return !m_whole;
00286 #else
00287 return !m_halfs.high && !m_halfs.low;
00288 #endif
00289 }
00290
00291 word GetLowHalf() const {return m_halfs.low;}
00292 word GetHighHalf() const {return m_halfs.high;}
00293 word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
00294
00295 private:
00296 union
00297 {
00298 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00299 dword m_whole;
00300 #endif
00301 struct
00302 {
00303 #ifdef IS_LITTLE_ENDIAN
00304 word low;
00305 word high;
00306 #else
00307 word high;
00308 word low;
00309 #endif
00310 } m_halfs;
00311 };
00312 };
00313
00314 class Word
00315 {
00316 public:
00317 Word() {}
00318
00319 Word(word value)
00320 {
00321 m_whole = value;
00322 }
00323
00324 Word(hword low, hword high)
00325 {
00326 m_whole = low | (word(high) << (WORD_BITS/2));
00327 }
00328
00329 static Word Multiply(hword a, hword b)
00330 {
00331 Word r;
00332 r.m_whole = (word)a * b;
00333 return r;
00334 }
00335
00336 Word operator-(Word a)
00337 {
00338 Word r;
00339 r.m_whole = m_whole - a.m_whole;
00340 return r;
00341 }
00342
00343 Word operator-(hword a)
00344 {
00345 Word r;
00346 r.m_whole = m_whole - a;
00347 return r;
00348 }
00349
00350
00351 hword operator/(hword divisor)
00352 {
00353 return hword(m_whole / divisor);
00354 }
00355
00356 bool operator!() const
00357 {
00358 return !m_whole;
00359 }
00360
00361 word GetWhole() const {return m_whole;}
00362 hword GetLowHalf() const {return hword(m_whole);}
00363 hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
00364 hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
00365
00366 private:
00367 word m_whole;
00368 };
00369
00370
00371 template <class S, class D>
00372 S DivideThreeWordsByTwo(S *A, S B0, S B1, D *dummy=NULL)
00373 {
00374
00375 assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
00376
00377
00378 S Q;
00379 if (S(B1+1) == 0)
00380 Q = A[2];
00381 else
00382 Q = D(A[1], A[2]) / S(B1+1);
00383
00384
00385 D p = D::Multiply(B0, Q);
00386 D u = (D) A[0] - p.GetLowHalf();
00387 A[0] = u.GetLowHalf();
00388 u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
00389 A[1] = u.GetLowHalf();
00390 A[2] += u.GetHighHalf();
00391
00392
00393 while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
00394 {
00395 u = (D) A[0] - B0;
00396 A[0] = u.GetLowHalf();
00397 u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
00398 A[1] = u.GetLowHalf();
00399 A[2] += u.GetHighHalf();
00400 Q++;
00401 assert(Q);
00402 }
00403
00404 return Q;
00405 }
00406
00407
00408 template <class S, class D>
00409 inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
00410 {
00411 if (!B)
00412 return D(Ah.GetLowHalf(), Ah.GetHighHalf());
00413 else
00414 {
00415 S Q[2];
00416 T[0] = Al.GetLowHalf();
00417 T[1] = Al.GetHighHalf();
00418 T[2] = Ah.GetLowHalf();
00419 T[3] = Ah.GetHighHalf();
00420 Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
00421 Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
00422 return D(Q[0], Q[1]);
00423 }
00424 }
00425
00426
00427 inline word DWord::operator/(word a)
00428 {
00429 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00430 return word(m_whole / a);
00431 #else
00432 hword r[4];
00433 return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
00434 #endif
00435 }
00436
00437 inline word DWord::operator%(word a)
00438 {
00439 #ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
00440 return word(m_whole % a);
00441 #else
00442 if (a < (word(1) << (WORD_BITS/2)))
00443 {
00444 hword h = hword(a);
00445 word r = m_halfs.high % h;
00446 r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
00447 return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
00448 }
00449 else
00450 {
00451 hword r[4];
00452 DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
00453 return Word(r[0], r[1]).GetWhole();
00454 }
00455 #endif
00456 }
00457
00458
00459
00460 class Portable
00461 {
00462 public:
00463 static word Add(word *C, const word *A, const word *B, size_t N);
00464 static word Subtract(word *C, const word *A, const word *B, size_t N);
00465
00466 static inline void Multiply2(word *C, const word *A, const word *B);
00467 static inline word Multiply2Add(word *C, const word *A, const word *B);
00468 static void Multiply4(word *C, const word *A, const word *B);
00469 static void Multiply8(word *C, const word *A, const word *B);
00470 static inline unsigned int MultiplyRecursionLimit() {return 8;}
00471
00472 static inline void Multiply2Bottom(word *C, const word *A, const word *B);
00473 static void Multiply4Bottom(word *C, const word *A, const word *B);
00474 static void Multiply8Bottom(word *C, const word *A, const word *B);
00475 static inline unsigned int MultiplyBottomRecursionLimit() {return 8;}
00476
00477 static void Square2(word *R, const word *A);
00478 static void Square4(word *R, const word *A);
00479 static void Square8(word *R, const word *A) {assert(false);}
00480 static inline unsigned int SquareRecursionLimit() {return 4;}
00481 };
00482
00483 word Portable::Add(word *C, const word *A, const word *B, size_t N)
00484 {
00485 assert (N%2 == 0);
00486
00487 DWord u(0, 0);
00488 for (unsigned int i = 0; i < N; i+=2)
00489 {
00490 u = DWord(A[i]) + B[i] + u.GetHighHalf();
00491 C[i] = u.GetLowHalf();
00492 u = DWord(A[i+1]) + B[i+1] + u.GetHighHalf();
00493 C[i+1] = u.GetLowHalf();
00494 }
00495 return u.GetHighHalf();
00496 }
00497
00498 word Portable::Subtract(word *C, const word *A, const word *B, size_t N)
00499 {
00500 assert (N%2 == 0);
00501
00502 DWord u(0, 0);
00503 for (unsigned int i = 0; i < N; i+=2)
00504 {
00505 u = (DWord) A[i] - B[i] - u.GetHighHalfAsBorrow();
00506 C[i] = u.GetLowHalf();
00507 u = (DWord) A[i+1] - B[i+1] - u.GetHighHalfAsBorrow();
00508 C[i+1] = u.GetLowHalf();
00509 }
00510 return 0-u.GetHighHalf();
00511 }
00512
00513 void Portable::Multiply2(word *C, const word *A, const word *B)
00514 {
00515
00516
00517
00518
00519
00520
00521
00522
00523
00524
00525
00526
00527
00528
00529
00530
00531
00532
00533
00534
00535
00536
00537
00538
00539
00540
00541
00542
00543 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00544 unsigned int ai = A[1] < A[0];
00545 unsigned int bi = B[0] < B[1];
00546 unsigned int di = ai & bi;
00547 DWord d = DWord::Multiply(D[di], D[di+2]);
00548 D[1] = D[3] = 0;
00549 unsigned int si = ai + !bi;
00550 word s = D[si];
00551
00552 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00553 C[0] = A0B0.GetLowHalf();
00554
00555 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00556 DWord t = (DWord) A0B0.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf();
00557 C[1] = t.GetLowHalf();
00558
00559 t = A1B1 + t.GetHighHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s;
00560 C[2] = t.GetLowHalf();
00561 C[3] = t.GetHighHalf();
00562 }
00563
00564 inline void Portable::Multiply2Bottom(word *C, const word *A, const word *B)
00565 {
00566 DWord t = DWord::Multiply(A[0], B[0]);
00567 C[0] = t.GetLowHalf();
00568 C[1] = t.GetHighHalf() + A[0]*B[1] + A[1]*B[0];
00569 }
00570
00571 word Portable::Multiply2Add(word *C, const word *A, const word *B)
00572 {
00573 word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
00574 unsigned int ai = A[1] < A[0];
00575 unsigned int bi = B[0] < B[1];
00576 unsigned int di = ai & bi;
00577 DWord d = DWord::Multiply(D[di], D[di+2]);
00578 D[1] = D[3] = 0;
00579 unsigned int si = ai + !bi;
00580 word s = D[si];
00581
00582 DWord A0B0 = DWord::Multiply(A[0], B[0]);
00583 DWord t = A0B0 + C[0];
00584 C[0] = t.GetLowHalf();
00585
00586 DWord A1B1 = DWord::Multiply(A[1], B[1]);
00587 t = (DWord) t.GetHighHalf() + A0B0.GetLowHalf() + d.GetLowHalf() + A1B1.GetLowHalf() + C[1];
00588 C[1] = t.GetLowHalf();
00589
00590 t = (DWord) t.GetHighHalf() + A1B1.GetLowHalf() + A0B0.GetHighHalf() + d.GetHighHalf() + A1B1.GetHighHalf() - s + C[2];
00591 C[2] = t.GetLowHalf();
00592
00593 t = (DWord) t.GetHighHalf() + A1B1.GetHighHalf() + C[3];
00594 C[3] = t.GetLowHalf();
00595 return t.GetHighHalf();
00596 }
00597
00598 #define MulAcc(x, y) \
00599 p = DWord::MultiplyAndAdd(A[x], B[y], c); \
00600 c = p.GetLowHalf(); \
00601 p = (DWord) d + p.GetHighHalf(); \
00602 d = p.GetLowHalf(); \
00603 e += p.GetHighHalf();
00604
00605 #define SaveMulAcc(s, x, y) \
00606 R[s] = c; \
00607 p = DWord::MultiplyAndAdd(A[x], B[y], d); \
00608 c = p.GetLowHalf(); \
00609 p = (DWord) e + p.GetHighHalf(); \
00610 d = p.GetLowHalf(); \
00611 e = p.GetHighHalf();
00612
00613 #define SquAcc(x, y) \
00614 q = DWord::Multiply(A[x], A[y]); \
00615 p = q + c; \
00616 c = p.GetLowHalf(); \
00617 p = (DWord) d + p.GetHighHalf(); \
00618 d = p.GetLowHalf(); \
00619 e += p.GetHighHalf(); \
00620 p = q + c; \
00621 c = p.GetLowHalf(); \
00622 p = (DWord) d + p.GetHighHalf(); \
00623 d = p.GetLowHalf(); \
00624 e += p.GetHighHalf();
00625
00626 #define SaveSquAcc(s, x, y) \
00627 R[s] = c; \
00628 q = DWord::Multiply(A[x], A[y]); \
00629 p = q + d; \
00630 c = p.GetLowHalf(); \
00631 p = (DWord) e + p.GetHighHalf(); \
00632 d = p.GetLowHalf(); \
00633 e = p.GetHighHalf(); \
00634 p = q + c; \
00635 c = p.GetLowHalf(); \
00636 p = (DWord) d + p.GetHighHalf(); \
00637 d = p.GetLowHalf(); \
00638 e += p.GetHighHalf();
00639
00640 void Portable::Multiply4(word *R, const word *A, const word *B)
00641 {
00642 DWord p;
00643 word c, d, e;
00644
00645 p = DWord::Multiply(A[0], B[0]);
00646 R[0] = p.GetLowHalf();
00647 c = p.GetHighHalf();
00648 d = e = 0;
00649
00650 MulAcc(0, 1);
00651 MulAcc(1, 0);
00652
00653 SaveMulAcc(1, 2, 0);
00654 MulAcc(1, 1);
00655 MulAcc(0, 2);
00656
00657 SaveMulAcc(2, 0, 3);
00658 MulAcc(1, 2);
00659 MulAcc(2, 1);
00660 MulAcc(3, 0);
00661
00662 SaveMulAcc(3, 3, 1);
00663 MulAcc(2, 2);
00664 MulAcc(1, 3);
00665
00666 SaveMulAcc(4, 2, 3);
00667 MulAcc(3, 2);
00668
00669 R[5] = c;
00670 p = DWord::MultiplyAndAdd(A[3], B[3], d);
00671 R[6] = p.GetLowHalf();
00672 R[7] = e + p.GetHighHalf();
00673 }
00674
00675 void Portable::Square2(word *R, const word *A)
00676 {
00677 DWord p, q;
00678 word c, d, e;
00679
00680 p = DWord::Multiply(A[0], A[0]);
00681 R[0] = p.GetLowHalf();
00682 c = p.GetHighHalf();
00683 d = e = 0;
00684
00685 SquAcc(0, 1);
00686
00687 R[1] = c;
00688 p = DWord::MultiplyAndAdd(A[1], A[1], d);
00689 R[2] = p.GetLowHalf();
00690 R[3] = e + p.GetHighHalf();
00691 }
00692
00693 void Portable::Square4(word *R, const word *A)
00694 {
00695 #ifdef _MSC_VER
00696
00697
00698
00699
00700 Multiply4(R, A, A);
00701 #else
00702 const word *B = A;
00703 DWord p, q;
00704 word c, d, e;
00705
00706 p = DWord::Multiply(A[0], A[0]);
00707 R[0] = p.GetLowHalf();
00708 c = p.GetHighHalf();
00709 d = e = 0;
00710
00711 SquAcc(0, 1);
00712
00713 SaveSquAcc(1, 2, 0);
00714 MulAcc(1, 1);
00715
00716 SaveSquAcc(2, 0, 3);
00717 SquAcc(1, 2);
00718
00719 SaveSquAcc(3, 3, 1);
00720 MulAcc(2, 2);
00721
00722 SaveSquAcc(4, 2, 3);
00723
00724 R[5] = c;
00725 p = DWord::MultiplyAndAdd(A[3], A[3], d);
00726 R[6] = p.GetLowHalf();
00727 R[7] = e + p.GetHighHalf();
00728 #endif
00729 }
00730
00731 void Portable::Multiply8(word *R, const word *A, const word *B)
00732 {
00733 DWord p;
00734 word c, d, e;
00735
00736 p = DWord::Multiply(A[0], B[0]);
00737 R[0] = p.GetLowHalf();
00738 c = p.GetHighHalf();
00739 d = e = 0;
00740
00741 MulAcc(0, 1);
00742 MulAcc(1, 0);
00743
00744 SaveMulAcc(1, 2, 0);
00745 MulAcc(1, 1);
00746 MulAcc(0, 2);
00747
00748 SaveMulAcc(2, 0, 3);
00749 MulAcc(1, 2);
00750 MulAcc(2, 1);
00751 MulAcc(3, 0);
00752
00753 SaveMulAcc(3, 0, 4);
00754 MulAcc(1, 3);
00755 MulAcc(2, 2);
00756 MulAcc(3, 1);
00757 MulAcc(4, 0);
00758
00759 SaveMulAcc(4, 0, 5);
00760 MulAcc(1, 4);
00761 MulAcc(2, 3);
00762 MulAcc(3, 2);
00763 MulAcc(4, 1);
00764 MulAcc(5, 0);
00765
00766 SaveMulAcc(5, 0, 6);
00767 MulAcc(1, 5);
00768 MulAcc(2, 4);
00769 MulAcc(3, 3);
00770 MulAcc(4, 2);
00771 MulAcc(5, 1);
00772 MulAcc(6, 0);
00773
00774 SaveMulAcc(6, 0, 7);
00775 MulAcc(1, 6);
00776 MulAcc(2, 5);
00777 MulAcc(3, 4);
00778 MulAcc(4, 3);
00779 MulAcc(5, 2);
00780 MulAcc(6, 1);
00781 MulAcc(7, 0);
00782
00783 SaveMulAcc(7, 1, 7);
00784 MulAcc(2, 6);
00785 MulAcc(3, 5);
00786 MulAcc(4, 4);
00787 MulAcc(5, 3);
00788 MulAcc(6, 2);
00789 MulAcc(7, 1);
00790
00791 SaveMulAcc(8, 2, 7);
00792 MulAcc(3, 6);
00793 MulAcc(4, 5);
00794 MulAcc(5, 4);
00795 MulAcc(6, 3);
00796 MulAcc(7, 2);
00797
00798 SaveMulAcc(9, 3, 7);
00799 MulAcc(4, 6);
00800 MulAcc(5, 5);
00801 MulAcc(6, 4);
00802 MulAcc(7, 3);
00803
00804 SaveMulAcc(10, 4, 7);
00805 MulAcc(5, 6);
00806 MulAcc(6, 5);
00807 MulAcc(7, 4);
00808
00809 SaveMulAcc(11, 5, 7);
00810 MulAcc(6, 6);
00811 MulAcc(7, 5);
00812
00813 SaveMulAcc(12, 6, 7);
00814 MulAcc(7, 6);
00815
00816 R[13] = c;
00817 p = DWord::MultiplyAndAdd(A[7], B[7], d);
00818 R[14] = p.GetLowHalf();
00819 R[15] = e + p.GetHighHalf();
00820 }
00821
00822 void Portable::Multiply4Bottom(word *R, const word *A, const word *B)
00823 {
00824 DWord p;
00825 word c, d, e;
00826
00827 p = DWord::Multiply(A[0], B[0]);
00828 R[0] = p.GetLowHalf();
00829 c = p.GetHighHalf();
00830 d = e = 0;
00831
00832 MulAcc(0, 1);
00833 MulAcc(1, 0);
00834
00835 SaveMulAcc(1, 2, 0);
00836 MulAcc(1, 1);
00837 MulAcc(0, 2);
00838
00839 R[2] = c;
00840 R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
00841 }
00842
00843 void Portable::Multiply8Bottom(word *R, const word *A, const word *B)
00844 {
00845 DWord p;
00846 word c, d, e;
00847
00848 p = DWord::Multiply(A[0], B[0]);
00849 R[0] = p.GetLowHalf();
00850 c = p.GetHighHalf();
00851 d = e = 0;
00852
00853 MulAcc(0, 1);
00854 MulAcc(1, 0);
00855
00856 SaveMulAcc(1, 2, 0);
00857 MulAcc(1, 1);
00858 MulAcc(0, 2);
00859
00860 SaveMulAcc(2, 0, 3);
00861 MulAcc(1, 2);
00862 MulAcc(2, 1);
00863 MulAcc(3, 0);
00864
00865 SaveMulAcc(3, 0, 4);
00866 MulAcc(1, 3);
00867 MulAcc(2, 2);
00868 MulAcc(3, 1);
00869 MulAcc(4, 0);
00870
00871 SaveMulAcc(4, 0, 5);
00872 MulAcc(1, 4);
00873 MulAcc(2, 3);
00874 MulAcc(3, 2);
00875 MulAcc(4, 1);
00876 MulAcc(5, 0);
00877
00878 SaveMulAcc(5, 0, 6);
00879 MulAcc(1, 5);
00880 MulAcc(2, 4);
00881 MulAcc(3, 3);
00882 MulAcc(4, 2);
00883 MulAcc(5, 1);
00884 MulAcc(6, 0);
00885
00886 R[6] = c;
00887 R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
00888 A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
00889 }
00890
00891 #undef MulAcc
00892 #undef SaveMulAcc
00893 #undef SquAcc
00894 #undef SaveSquAcc
00895
00896 #ifdef CRYPTOPP_X86ASM_AVAILABLE
00897
00898
00899
00900 static bool s_sse2Enabled = true;
00901
00902 static void CpuId(word32 input, word32 *output)
00903 {
00904 #ifdef __GNUC__
00905 __asm__
00906 (
00907
00908 "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
00909 : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
00910 : "a" (input)
00911 );
00912 #else
00913 __asm
00914 {
00915 mov eax, input
00916 cpuid
00917 mov edi, output
00918 mov [edi], eax
00919 mov [edi+4], ebx
00920 mov [edi+8], ecx
00921 mov [edi+12], edx
00922 }
00923 #endif
00924 }
00925
00926 #ifdef SSE2_INTRINSICS_AVAILABLE
00927 #ifndef _MSC_VER
00928 static jmp_buf s_env;
00929 static void SigIllHandler(int)
00930 {
00931 longjmp(s_env, 1);
00932 }
00933 #endif
00934
00935 static bool HasSSE2()
00936 {
00937 if (!s_sse2Enabled)
00938 return false;
00939
00940 word32 cpuid[4];
00941 CpuId(1, cpuid);
00942 if ((cpuid[3] & (1 << 26)) == 0)
00943 return false;
00944
00945 #ifdef _MSC_VER
00946 __try
00947 {
00948 __asm xorpd xmm0, xmm0
00949 }
00950 __except (1)
00951 {
00952 return false;
00953 }
00954 return true;
00955 #else
00956 typedef void (*SigHandler)(int);
00957
00958 SigHandler oldHandler = signal(SIGILL, SigIllHandler);
00959 if (oldHandler == SIG_ERR)
00960 return false;
00961
00962 bool result = true;
00963 if (setjmp(s_env))
00964 result = false;
00965 else
00966 __asm __volatile ("xorps %xmm0, %xmm0");
00967
00968 signal(SIGILL, oldHandler);
00969 return result;
00970 #endif
00971 }
00972 #endif
00973
00974 static bool IsP4()
00975 {
00976 word32 cpuid[4];
00977
00978 CpuId(0, cpuid);
00979 std::swap(cpuid[2], cpuid[3]);
00980 if (memcmp(cpuid+1, "GenuineIntel", 12) != 0)
00981 return false;
00982
00983 CpuId(1, cpuid);
00984 return ((cpuid[0] >> 8) & 0xf) == 0xf;
00985 }
00986
00987
00988
00989 class PentiumOptimized : public Portable
00990 {
00991 public:
00992 static word Add(word *C, const word *A, const word *B, size_t N);
00993 static word Subtract(word *C, const word *A, const word *B, size_t N);
00994 static void Multiply4(word *C, const word *A, const word *B);
00995 static void Multiply8(word *C, const word *A, const word *B);
00996 static void Multiply8Bottom(word *C, const word *A, const word *B);
00997 };
00998
00999 class P4Optimized
01000 {
01001 public:
01002 static word Add(word *C, const word *A, const word *B, size_t N);
01003 static word Subtract(word *C, const word *A, const word *B, size_t N);
01004 #ifdef SSE2_INTRINSICS_AVAILABLE
01005 static void Multiply4(word *C, const word *A, const word *B);
01006 static void Multiply8(word *C, const word *A, const word *B);
01007 static void Multiply8Bottom(word *C, const word *A, const word *B);
01008 #endif
01009 };
01010
01011 typedef word (* PAddSub)(word *C, const word *A, const word *B, size_t N);
01012 typedef void (* PMul)(word *C, const word *A, const word *B);
01013
01014 static PAddSub s_pAdd, s_pSub;
01015 #ifdef SSE2_INTRINSICS_AVAILABLE
01016 static PMul s_pMul4, s_pMul8, s_pMul8B;
01017 #endif
01018
01019 static void SetPentiumFunctionPointers()
01020 {
01021 if (IsP4())
01022 {
01023 s_pAdd = &P4Optimized::Add;
01024 s_pSub = &P4Optimized::Subtract;
01025 }
01026 else
01027 {
01028 s_pAdd = &PentiumOptimized::Add;
01029 s_pSub = &PentiumOptimized::Subtract;
01030 }
01031
01032 #ifdef SSE2_INTRINSICS_AVAILABLE
01033 if (HasSSE2())
01034 {
01035 s_pMul4 = &P4Optimized::Multiply4;
01036 s_pMul8 = &P4Optimized::Multiply8;
01037 s_pMul8B = &P4Optimized::Multiply8Bottom;
01038 }
01039 else
01040 {
01041 s_pMul4 = &PentiumOptimized::Multiply4;
01042 s_pMul8 = &PentiumOptimized::Multiply8;
01043 s_pMul8B = &PentiumOptimized::Multiply8Bottom;
01044 }
01045 #endif
01046 }
01047
01048 static const char s_RunAtStartupSetPentiumFunctionPointers = (SetPentiumFunctionPointers(), 0);
01049
01050 void DisableSSE2()
01051 {
01052 s_sse2Enabled = false;
01053 SetPentiumFunctionPointers();
01054 }
01055
01056 class LowLevel : public PentiumOptimized
01057 {
01058 public:
01059 inline static word Add(word *C, const word *A, const word *B, size_t N)
01060 {return s_pAdd(C, A, B, N);}
01061 inline static word Subtract(word *C, const word *A, const word *B, size_t N)
01062 {return s_pSub(C, A, B, N);}
01063 inline static void Square4(word *R, const word *A)
01064 {Multiply4(R, A, A);}
01065 #ifdef SSE2_INTRINSICS_AVAILABLE
01066 inline static void Multiply4(word *C, const word *A, const word *B)
01067 {s_pMul4(C, A, B);}
01068 inline static void Multiply8(word *C, const word *A, const word *B)
01069 {s_pMul8(C, A, B);}
01070 inline static void Multiply8Bottom(word *C, const word *A, const word *B)
01071 {s_pMul8B(C, A, B);}
01072 #endif
01073 };
01074
01075
01076 #ifdef _MSC_VER
01077 #define CRYPTOPP_NAKED __declspec(naked)
01078 #define AS1(x) __asm x
01079 #define AS2(x, y) __asm x, y
01080 #define AddPrologue \
01081 __asm push ebp \
01082 __asm push ebx \
01083 __asm push esi \
01084 __asm push edi \
01085 __asm mov ecx, [esp+20] \
01086 __asm mov edx, [esp+24] \
01087 __asm mov ebx, [esp+28] \
01088 __asm mov esi, [esp+32]
01089 #define AddEpilogue \
01090 __asm pop edi \
01091 __asm pop esi \
01092 __asm pop ebx \
01093 __asm pop ebp \
01094 __asm ret
01095 #define MulPrologue \
01096 __asm push ebp \
01097 __asm push ebx \
01098 __asm push esi \
01099 __asm push edi \
01100 __asm mov ecx, [esp+28] \
01101 __asm mov esi, [esp+24] \
01102 __asm push [esp+20]
01103 #define MulEpilogue \
01104 __asm add esp, 4 \
01105 __asm pop edi \
01106 __asm pop esi \
01107 __asm pop ebx \
01108 __asm pop ebp \
01109 __asm ret
01110 #else
01111 #define CRYPTOPP_NAKED
01112 #define AS1(x) #x ";"
01113 #define AS2(x, y) #x ", " #y ";"
01114 #define AddPrologue \
01115 __asm__ __volatile__ \
01116 ( \
01117 "push %%ebx;" \
01118 "mov %2, %%ebx;" \
01119 ".intel_syntax noprefix;" \
01120 "push ebp;"
01121 #define AddEpilogue \
01122 "pop ebp;" \
01123 ".att_syntax prefix;" \
01124 "pop %%ebx;" \
01125 : \
01126 : "c" (C), "d" (A), "m" (B), "S" (N) \
01127 : "%edi", "memory", "cc" \
01128 );
01129 #define MulPrologue \
01130 __asm__ __volatile__ \
01131 ( \
01132 "push %%ebx;" \
01133 "push %%ebp;" \
01134 "push %0;" \
01135 ".intel_syntax noprefix;"
01136 #define MulEpilogue \
01137 "add esp, 4;" \
01138 "pop ebp;" \
01139 "pop ebx;" \
01140 ".att_syntax prefix;" \
01141 : \
01142 : "rm" (Z), "S" (X), "c" (Y) \
01143 : "%eax", "%edx", "%edi", "memory", "cc" \
01144 );
01145 #endif
01146
01147 CRYPTOPP_NAKED word PentiumOptimized::Add(word *C, const word *A, const word *B, size_t N)
01148 {
01149 AddPrologue
01150
01151
01152 AS2( sub ecx, edx)
01153 AS2( xor eax, eax)
01154
01155 AS2( sub eax, esi)
01156 AS2( lea ebx, [ebx+4*esi])
01157
01158 AS2( sar eax, 1)
01159 AS1( jz loopendAdd)
01160
01161 AS1(loopstartAdd:)
01162 AS2( mov esi,[edx])
01163 AS2( mov ebp,[edx+4])
01164
01165 AS2( mov edi,[ebx+8*eax])
01166 AS2( lea edx,[edx+8])
01167
01168 AS2( adc esi,edi)
01169 AS2( mov edi,[ebx+8*eax+4])
01170
01171 AS2( adc ebp,edi)
01172 AS1( inc eax)
01173
01174 AS2( mov [edx+ecx-8],esi)
01175 AS2( mov [edx+ecx-4],ebp)
01176
01177 AS1( jnz loopstartAdd)
01178
01179 AS1(loopendAdd:)
01180 AS2( adc eax, 0)
01181
01182 AddEpilogue
01183 }
01184
01185 CRYPTOPP_NAKED word PentiumOptimized::Subtract(word *C, const word *A, const word *B, size_t N)
01186 {
01187 AddPrologue
01188
01189
01190 AS2( sub ecx, edx)
01191 AS2( xor eax, eax)
01192
01193 AS2( sub eax, esi)
01194 AS2( lea ebx, [ebx+4*esi])
01195
01196 AS2( sar eax, 1)
01197 AS1( jz loopendSub)
01198
01199 AS1(loopstartSub:)
01200 AS2( mov esi,[edx])
01201 AS2( mov ebp,[edx+4])
01202
01203 AS2( mov edi,[ebx+8*eax])
01204 AS2( lea edx,[edx+8])
01205
01206 AS2( sbb esi,edi)
01207 AS2( mov edi,[ebx+8*eax+4])
01208
01209 AS2( sbb ebp,edi)
01210 AS1( inc eax)
01211
01212 AS2( mov [edx+ecx-8],esi)
01213 AS2( mov [edx+ecx-4],ebp)
01214
01215 AS1( jnz loopstartSub)
01216
01217 AS1(loopendSub:)
01218 AS2( adc eax, 0)
01219
01220 AddEpilogue
01221 }
01222
01223
01224
01225 CRYPTOPP_NAKED word P4Optimized::Add(word *C, const word *A, const word *B, size_t N)
01226 {
01227 AddPrologue
01228
01229
01230 AS2( xor eax, eax)
01231 AS1( neg esi)
01232 AS1( jz loopendAddP4)
01233
01234 AS2( mov edi, [edx])
01235 AS2( mov ebp, [ebx])
01236 AS1( jmp carry1AddP4)
01237
01238 AS1(loopstartAddP4:)
01239 AS2( mov edi, [edx+8])
01240 AS2( add ecx, 8)
01241 AS2( add edx, 8)
01242 AS2( mov ebp, [ebx])
01243 AS2( add edi, eax)
01244 AS1( jc carry1AddP4)
01245 AS2( xor eax, eax)
01246
01247 AS1(carry1AddP4:)
01248 AS2( add edi, ebp)
01249 AS2( mov ebp, 1)
01250 AS2( mov [ecx], edi)
01251 AS2( mov edi, [edx+4])
01252 AS2( cmovc eax, ebp)
01253 AS2( mov ebp, [ebx+4])
01254 AS2( add ebx, 8)
01255 AS2( add edi, eax)
01256 AS1( jc carry2AddP4)
01257 AS2( xor eax, eax)
01258
01259 AS1(carry2AddP4:)
01260 AS2( add edi, ebp)
01261 AS2( mov ebp, 1)
01262 AS2( cmovc eax, ebp)
01263 AS2( mov [ecx+4], edi)
01264 AS2( add esi, 2)
01265 AS1( jnz loopstartAddP4)
01266
01267 AS1(loopendAddP4:)
01268
01269 AddEpilogue
01270 }
01271
01272 CRYPTOPP_NAKED word P4Optimized::Subtract(word *C, const word *A, const word *B, size_t N)
01273 {
01274 AddPrologue
01275
01276
01277 AS2( xor eax, eax)
01278 AS1( neg esi)
01279 AS1( jz loopendSubP4)
01280
01281 AS2( mov edi, [edx])
01282 AS2( mov ebp, [ebx])
01283 AS1( jmp carry1SubP4)
01284
01285 AS1(loopstartSubP4:)
01286 AS2( mov edi, [edx+8])
01287 AS2( add edx, 8)
01288 AS2( add ecx, 8)
01289 AS2( mov ebp, [ebx])
01290 AS2( sub edi, eax)
01291 AS1( jc carry1SubP4)
01292 AS2( xor eax, eax)
01293
01294 AS1(carry1SubP4:)
01295 AS2( sub edi, ebp)
01296 AS2( mov ebp, 1)
01297 AS2( mov [ecx], edi)
01298 AS2( mov edi, [edx+4])
01299 AS2( cmovc eax, ebp)
01300 AS2( mov ebp, [ebx+4])
01301 AS2( add ebx, 8)
01302 AS2( sub edi, eax)
01303 AS1( jc carry2SubP4)
01304 AS2( xor eax, eax)
01305
01306 AS1(carry2SubP4:)
01307 AS2( sub edi, ebp)
01308 AS2( mov ebp, 1)
01309 AS2( cmovc eax, ebp)
01310 AS2( mov [ecx+4], edi)
01311 AS2( add esi, 2)
01312 AS1( jnz loopstartSubP4)
01313
01314 AS1(loopendSubP4:)
01315
01316 AddEpilogue
01317 }
01318
01319
01320
01321 #define MulStartup \
01322 AS2(xor ebp, ebp) \
01323 AS2(xor edi, edi) \
01324 AS2(xor ebx, ebx)
01325
01326 #define MulShiftCarry \
01327 AS2(mov ebp, edx) \
01328 AS2(mov edi, ebx) \
01329 AS2(xor ebx, ebx)
01330
01331 #define MulAccumulateBottom(i,j) \
01332 AS2(mov eax, [ecx+4*j]) \
01333 AS2(imul eax, dword ptr [esi+4*i]) \
01334 AS2(add ebp, eax)
01335
01336 #define MulAccumulate(i,j) \
01337 AS2(mov eax, [ecx+4*j]) \
01338 AS1(mul dword ptr [esi+4*i]) \
01339 AS2(add ebp, eax) \
01340 AS2(adc edi, edx) \
01341 AS2(adc bl, bh)
01342
01343 #define MulStoreDigit(i) \
01344 AS2(mov edx, edi) \
01345 AS2(mov edi, [esp]) \
01346 AS2(mov [edi+4*i], ebp)
01347
01348 #define MulLastDiagonal(digits) \
01349 AS2(mov eax, [ecx+4*(digits-1)]) \
01350 AS1(mul dword ptr [esi+4*(digits-1)]) \
01351 AS2(add ebp, eax) \
01352 AS2(adc edx, edi) \
01353 AS2(mov edi, [esp]) \
01354 AS2(mov [edi+4*(2*digits-2)], ebp) \
01355 AS2(mov [edi+4*(2*digits-1)], edx)
01356
01357 CRYPTOPP_NAKED void PentiumOptimized::Multiply4(word* Z, const word* X, const word* Y)
01358 {
01359 MulPrologue
01360
01361 MulStartup
01362 MulAccumulate(0,0)
01363 MulStoreDigit(0)
01364 MulShiftCarry
01365
01366 MulAccumulate(1,0)
01367 MulAccumulate(0,1)
01368 MulStoreDigit(1)
01369 MulShiftCarry
01370
01371 MulAccumulate(2,0)
01372 MulAccumulate(1,1)
01373 MulAccumulate(0,2)
01374 MulStoreDigit(2)
01375 MulShiftCarry
01376
01377 MulAccumulate(3,0)
01378 MulAccumulate(2,1)
01379 MulAccumulate(1,2)
01380 MulAccumulate(0,3)
01381 MulStoreDigit(3)
01382 MulShiftCarry
01383
01384 MulAccumulate(3,1)
01385 MulAccumulate(2,2)
01386 MulAccumulate(1,3)
01387 MulStoreDigit(4)
01388 MulShiftCarry
01389
01390 MulAccumulate(3,2)
01391 MulAccumulate(2,3)
01392 MulStoreDigit(5)
01393 MulShiftCarry
01394
01395 MulLastDiagonal(4)
01396 MulEpilogue
01397 }
01398
01399 CRYPTOPP_NAKED void PentiumOptimized::Multiply8(word* Z, const word* X, const word* Y)
01400 {
01401 MulPrologue
01402
01403 MulStartup
01404 MulAccumulate(0,0)
01405 MulStoreDigit(0)
01406 MulShiftCarry
01407
01408 MulAccumulate(1,0)
01409 MulAccumulate(0,1)
01410 MulStoreDigit(1)
01411 MulShiftCarry
01412
01413 MulAccumulate(2,0)
01414 MulAccumulate(1,1)
01415 MulAccumulate(0,2)
01416 MulStoreDigit(2)
01417 MulShiftCarry
01418
01419 MulAccumulate(3,0)
01420 MulAccumulate(2,1)
01421 MulAccumulate(1,2)
01422 MulAccumulate(0,3)
01423 MulStoreDigit(3)
01424 MulShiftCarry
01425
01426 MulAccumulate(4,0)
01427 MulAccumulate(3,1)
01428 MulAccumulate(2,2)
01429 MulAccumulate(1,3)
01430 MulAccumulate(0,4)
01431 MulStoreDigit(4)
01432 MulShiftCarry
01433
01434 MulAccumulate(5,0)
01435 MulAccumulate(4,1)
01436 MulAccumulate(3,2)
01437 MulAccumulate(2,3)
01438 MulAccumulate(1,4)
01439 MulAccumulate(0,5)
01440 MulStoreDigit(5)
01441 MulShiftCarry
01442
01443 MulAccumulate(6,0)
01444 MulAccumulate(5,1)
01445 MulAccumulate(4,2)
01446 MulAccumulate(3,3)
01447 MulAccumulate(2,4)
01448 MulAccumulate(1,5)
01449 MulAccumulate(0,6)
01450 MulStoreDigit(6)
01451 MulShiftCarry
01452
01453 MulAccumulate(7,0)
01454 MulAccumulate(6,1)
01455 MulAccumulate(5,2)
01456 MulAccumulate(4,3)
01457 MulAccumulate(3,4)
01458 MulAccumulate(2,5)
01459 MulAccumulate(1,6)
01460 MulAccumulate(0,7)
01461 MulStoreDigit(7)
01462 MulShiftCarry
01463
01464 MulAccumulate(7,1)
01465 MulAccumulate(6,2)
01466 MulAccumulate(5,3)
01467 MulAccumulate(4,4)
01468 MulAccumulate(3,5)
01469 MulAccumulate(2,6)
01470 MulAccumulate(1,7)
01471 MulStoreDigit(8)
01472 MulShiftCarry
01473
01474 MulAccumulate(7,2)
01475 MulAccumulate(6,3)
01476 MulAccumulate(5,4)
01477 MulAccumulate(4,5)
01478 MulAccumulate(3,6)
01479 MulAccumulate(2,7)
01480 MulStoreDigit(9)
01481 MulShiftCarry
01482
01483 MulAccumulate(7,3)
01484 MulAccumulate(6,4)
01485 MulAccumulate(5,5)
01486 MulAccumulate(4,6)
01487 MulAccumulate(3,7)
01488 MulStoreDigit(10)
01489 MulShiftCarry
01490
01491 MulAccumulate(7,4)
01492 MulAccumulate(6,5)
01493 MulAccumulate(5,6)
01494 MulAccumulate(4,7)
01495 MulStoreDigit(11)
01496 MulShiftCarry
01497
01498 MulAccumulate(7,5)
01499 MulAccumulate(6,6)
01500 MulAccumulate(5,7)
01501 MulStoreDigit(12)
01502 MulShiftCarry
01503
01504 MulAccumulate(7,6)
01505 MulAccumulate(6,7)
01506 MulStoreDigit(13)
01507 MulShiftCarry
01508
01509 MulLastDiagonal(8)
01510 MulEpilogue
01511 }
01512
01513 CRYPTOPP_NAKED void PentiumOptimized::Multiply8Bottom(word* Z, const word* X, const word* Y)
01514 {
01515 MulPrologue
01516
01517 MulStartup
01518 MulAccumulate(0,0)
01519 MulStoreDigit(0)
01520 MulShiftCarry
01521
01522 MulAccumulate(1,0)
01523 MulAccumulate(0,1)
01524 MulStoreDigit(1)
01525 MulShiftCarry
01526
01527 MulAccumulate(2,0)
01528 MulAccumulate(1,1)
01529 MulAccumulate(0,2)
01530 MulStoreDigit(2)
01531 MulShiftCarry
01532
01533 MulAccumulate(3,0)
01534 MulAccumulate(2,1)
01535 MulAccumulate(1,2)
01536 MulAccumulate(0,3)
01537 MulStoreDigit(3)
01538 MulShiftCarry
01539
01540 MulAccumulate(4,0)
01541 MulAccumulate(3,1)
01542 MulAccumulate(2,2)
01543 MulAccumulate(1,3)
01544 MulAccumulate(0,4)
01545 MulStoreDigit(4)
01546 MulShiftCarry
01547
01548 MulAccumulate(5,0)
01549 MulAccumulate(4,1)
01550 MulAccumulate(3,2)
01551 MulAccumulate(2,3)
01552 MulAccumulate(1,4)
01553 MulAccumulate(0,5)
01554 MulStoreDigit(5)
01555 MulShiftCarry
01556
01557 MulAccumulate(6,0)
01558 MulAccumulate(5,1)
01559 MulAccumulate(4,2)
01560 MulAccumulate(3,3)
01561 MulAccumulate(2,4)
01562 MulAccumulate(1,5)
01563 MulAccumulate(0,6)
01564 MulStoreDigit(6)
01565 MulShiftCarry
01566
01567 MulAccumulateBottom(7,0)
01568 MulAccumulateBottom(6,1)
01569 MulAccumulateBottom(5,2)
01570 MulAccumulateBottom(4,3)
01571 MulAccumulateBottom(3,4)
01572 MulAccumulateBottom(2,5)
01573 MulAccumulateBottom(1,6)
01574 MulAccumulateBottom(0,7)
01575 MulStoreDigit(7)
01576 MulEpilogue
01577 }
01578
01579 #undef AS1
01580 #undef AS2
01581
01582 #else
01583
01584 typedef Portable LowLevel;
01585
01586 #endif
01587
01588 #ifdef SSE2_INTRINSICS_AVAILABLE
01589
01590 #ifdef __GNUC__
01591 #define CRYPTOPP_FASTCALL
01592 #else
01593 #define CRYPTOPP_FASTCALL __fastcall
01594 #endif
01595
01596 static void CRYPTOPP_FASTCALL P4_Mul(__m128i *C, const __m128i *A, const __m128i *B)
01597 {
01598 __m128i a3210 = _mm_load_si128(A);
01599 __m128i b3210 = _mm_load_si128(B);
01600
01601 __m128i sum;
01602
01603 __m128i z = _mm_setzero_si128();
01604 __m128i a2b2_a0b0 = _mm_mul_epu32(a3210, b3210);
01605 C[0] = a2b2_a0b0;
01606
01607 __m128i a3120 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(3, 1, 2, 0));
01608 __m128i b3021 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 2, 1));
01609 __m128i a1b0_a0b1 = _mm_mul_epu32(a3120, b3021);
01610 __m128i a1b0 = _mm_unpackhi_epi32(a1b0_a0b1, z);
01611 __m128i a0b1 = _mm_unpacklo_epi32(a1b0_a0b1, z);
01612 C[1] = _mm_add_epi64(a1b0, a0b1);
01613
01614 __m128i a31 = _mm_srli_epi64(a3210, 32);
01615 __m128i b31 = _mm_srli_epi64(b3210, 32);
01616 __m128i a3b3_a1b1 = _mm_mul_epu32(a31, b31);
01617 C[6] = a3b3_a1b1;
01618
01619 __m128i a1b1 = _mm_unpacklo_epi32(a3b3_a1b1, z);
01620 __m128i b3012 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(3, 0, 1, 2));
01621 __m128i a2b0_a0b2 = _mm_mul_epu32(a3210, b3012);
01622 __m128i a0b2 = _mm_unpacklo_epi32(a2b0_a0b2, z);
01623 __m128i a2b0 = _mm_unpackhi_epi32(a2b0_a0b2, z);
01624 sum = _mm_add_epi64(a1b1, a0b2);
01625 C[2] = _mm_add_epi64(sum, a2b0);
01626
01627 __m128i a2301 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(2, 3, 0, 1));
01628 __m128i b2103 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(2, 1, 0, 3));
01629 __m128i a3b0_a1b2 = _mm_mul_epu32(a2301, b3012);
01630 __m128i a2b1_a0b3 = _mm_mul_epu32(a3210, b2103);
01631 __m128i a3b0 = _mm_unpackhi_epi32(a3b0_a1b2, z);
01632 __m128i a1b2 = _mm_unpacklo_epi32(a3b0_a1b2, z);
01633 __m128i a2b1 = _mm_unpackhi_epi32(a2b1_a0b3, z);
01634 __m128i a0b3 = _mm_unpacklo_epi32(a2b1_a0b3, z);
01635 __m128i sum1 = _mm_add_epi64(a3b0, a1b2);
01636 sum = _mm_add_epi64(a2b1, a0b3);
01637 C[3] = _mm_add_epi64(sum, sum1);
01638
01639 __m128i a3b1_a1b3 = _mm_mul_epu32(a2301, b2103);
01640 __m128i a2b2 = _mm_unpackhi_epi32(a2b2_a0b0, z);
01641 __m128i a3b1 = _mm_unpackhi_epi32(a3b1_a1b3, z);
01642 __m128i a1b3 = _mm_unpacklo_epi32(a3b1_a1b3, z);
01643 sum = _mm_add_epi64(a2b2, a3b1);
01644 C[4] = _mm_add_epi64(sum, a1b3);
01645
01646 __m128i a1302 = _mm_shuffle_epi32(a3210, _MM_SHUFFLE(1, 3, 0, 2));
01647 __m128i b1203 = _mm_shuffle_epi32(b3210, _MM_SHUFFLE(1, 2, 0, 3));
01648 __m128i a3b2_a2b3 = _mm_mul_epu32(a1302, b1203);
01649 __m128i a3b2 = _mm_unpackhi_epi32(a3b2_a2b3, z);
01650 __m128i a2b3 = _mm_unpacklo_epi32(a3b2_a2b3, z);
01651 C[5] = _mm_add_epi64(a3b2, a2b3);
01652 }
01653
01654 void P4Optimized::Multiply4(word *C, const word *A, const word *B)
01655 {
01656 __m128i temp[7];
01657 const word *w = (word *)temp;
01658 const __m64 *mw = (__m64 *)w;
01659
01660 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01661
01662 C[0] = w[0];
01663
01664 __m64 s1, s2;
01665
01666 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01667 __m64 w4 = mw[2];
01668 __m64 w6 = mw[3];
01669 __m64 w8 = mw[4];
01670 __m64 w10 = mw[5];
01671 __m64 w12 = mw[6];
01672 __m64 w14 = mw[7];
01673 __m64 w16 = mw[8];
01674 __m64 w18 = mw[9];
01675 __m64 w20 = mw[10];
01676 __m64 w22 = mw[11];
01677 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01678
01679 s1 = _mm_add_si64(w1, w4);
01680 C[1] = _mm_cvtsi64_si32(s1);
01681 s1 = _mm_srli_si64(s1, 32);
01682
01683 s2 = _mm_add_si64(w6, w8);
01684 s1 = _mm_add_si64(s1, s2);
01685 C[2] = _mm_cvtsi64_si32(s1);
01686 s1 = _mm_srli_si64(s1, 32);
01687
01688 s2 = _mm_add_si64(w10, w12);
01689 s1 = _mm_add_si64(s1, s2);
01690 C[3] = _mm_cvtsi64_si32(s1);
01691 s1 = _mm_srli_si64(s1, 32);
01692
01693 s2 = _mm_add_si64(w14, w16);
01694 s1 = _mm_add_si64(s1, s2);
01695 C[4] = _mm_cvtsi64_si32(s1);
01696 s1 = _mm_srli_si64(s1, 32);
01697
01698 s2 = _mm_add_si64(w18, w20);
01699 s1 = _mm_add_si64(s1, s2);
01700 C[5] = _mm_cvtsi64_si32(s1);
01701 s1 = _mm_srli_si64(s1, 32);
01702
01703 s2 = _mm_add_si64(w22, w26);
01704 s1 = _mm_add_si64(s1, s2);
01705 C[6] = _mm_cvtsi64_si32(s1);
01706 s1 = _mm_srli_si64(s1, 32);
01707
01708 C[7] = _mm_cvtsi64_si32(s1) + w[27];
01709 _mm_empty();
01710 }
01711
01712 void P4Optimized::Multiply8(word *C, const word *A, const word *B)
01713 {
01714 __m128i temp[28];
01715 const word *w = (word *)temp;
01716 const __m64 *mw = (__m64 *)w;
01717 const word *x = (word *)temp+7*4;
01718 const __m64 *mx = (__m64 *)x;
01719 const word *y = (word *)temp+7*4*2;
01720 const __m64 *my = (__m64 *)y;
01721 const word *z = (word *)temp+7*4*3;
01722 const __m64 *mz = (__m64 *)z;
01723
01724 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01725
01726 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01727
01728 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01729
01730 P4_Mul(temp+21, (__m128i *)A+1, (__m128i *)B+1);
01731
01732 C[0] = w[0];
01733
01734 __m64 s1, s2, s3, s4;
01735
01736 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01737 __m64 w4 = mw[2];
01738 __m64 w6 = mw[3];
01739 __m64 w8 = mw[4];
01740 __m64 w10 = mw[5];
01741 __m64 w12 = mw[6];
01742 __m64 w14 = mw[7];
01743 __m64 w16 = mw[8];
01744 __m64 w18 = mw[9];
01745 __m64 w20 = mw[10];
01746 __m64 w22 = mw[11];
01747 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01748 __m64 w27 = _mm_cvtsi32_si64(w[27]);
01749
01750 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01751 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01752 __m64 x4 = mx[2];
01753 __m64 x6 = mx[3];
01754 __m64 x8 = mx[4];
01755 __m64 x10 = mx[5];
01756 __m64 x12 = mx[6];
01757 __m64 x14 = mx[7];
01758 __m64 x16 = mx[8];
01759 __m64 x18 = mx[9];
01760 __m64 x20 = mx[10];
01761 __m64 x22 = mx[11];
01762 __m64 x26 = _mm_cvtsi32_si64(x[26]);
01763 __m64 x27 = _mm_cvtsi32_si64(x[27]);
01764
01765 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01766 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01767 __m64 y4 = my[2];
01768 __m64 y6 = my[3];
01769 __m64 y8 = my[4];
01770 __m64 y10 = my[5];
01771 __m64 y12 = my[6];
01772 __m64 y14 = my[7];
01773 __m64 y16 = my[8];
01774 __m64 y18 = my[9];
01775 __m64 y20 = my[10];
01776 __m64 y22 = my[11];
01777 __m64 y26 = _mm_cvtsi32_si64(y[26]);
01778 __m64 y27 = _mm_cvtsi32_si64(y[27]);
01779
01780 __m64 z0 = _mm_cvtsi32_si64(z[0]);
01781 __m64 z1 = _mm_cvtsi32_si64(z[1]);
01782 __m64 z4 = mz[2];
01783 __m64 z6 = mz[3];
01784 __m64 z8 = mz[4];
01785 __m64 z10 = mz[5];
01786 __m64 z12 = mz[6];
01787 __m64 z14 = mz[7];
01788 __m64 z16 = mz[8];
01789 __m64 z18 = mz[9];
01790 __m64 z20 = mz[10];
01791 __m64 z22 = mz[11];
01792 __m64 z26 = _mm_cvtsi32_si64(z[26]);
01793
01794 s1 = _mm_add_si64(w1, w4);
01795 C[1] = _mm_cvtsi64_si32(s1);
01796 s1 = _mm_srli_si64(s1, 32);
01797
01798 s2 = _mm_add_si64(w6, w8);
01799 s1 = _mm_add_si64(s1, s2);
01800 C[2] = _mm_cvtsi64_si32(s1);
01801 s1 = _mm_srli_si64(s1, 32);
01802
01803 s2 = _mm_add_si64(w10, w12);
01804 s1 = _mm_add_si64(s1, s2);
01805 C[3] = _mm_cvtsi64_si32(s1);
01806 s1 = _mm_srli_si64(s1, 32);
01807
01808 s3 = _mm_add_si64(x0, y0);
01809 s2 = _mm_add_si64(w14, w16);
01810 s1 = _mm_add_si64(s1, s3);
01811 s1 = _mm_add_si64(s1, s2);
01812 C[4] = _mm_cvtsi64_si32(s1);
01813 s1 = _mm_srli_si64(s1, 32);
01814
01815 s3 = _mm_add_si64(x1, y1);
01816 s4 = _mm_add_si64(x4, y4);
01817 s1 = _mm_add_si64(s1, w18);
01818 s3 = _mm_add_si64(s3, s4);
01819 s1 = _mm_add_si64(s1, w20);
01820 s1 = _mm_add_si64(s1, s3);
01821 C[5] = _mm_cvtsi64_si32(s1);
01822 s1 = _mm_srli_si64(s1, 32);
01823
01824 s3 = _mm_add_si64(x6, y6);
01825 s4 = _mm_add_si64(x8, y8);
01826 s1 = _mm_add_si64(s1, w22);
01827 s3 = _mm_add_si64(s3, s4);
01828 s1 = _mm_add_si64(s1, w26);
01829 s1 = _mm_add_si64(s1, s3);
01830 C[6] = _mm_cvtsi64_si32(s1);
01831 s1 = _mm_srli_si64(s1, 32);
01832
01833 s3 = _mm_add_si64(x10, y10);
01834 s4 = _mm_add_si64(x12, y12);
01835 s1 = _mm_add_si64(s1, w27);
01836 s3 = _mm_add_si64(s3, s4);
01837 s1 = _mm_add_si64(s1, s3);
01838 C[7] = _mm_cvtsi64_si32(s1);
01839 s1 = _mm_srli_si64(s1, 32);
01840
01841 s3 = _mm_add_si64(x14, y14);
01842 s4 = _mm_add_si64(x16, y16);
01843 s1 = _mm_add_si64(s1, z0);
01844 s3 = _mm_add_si64(s3, s4);
01845 s1 = _mm_add_si64(s1, s3);
01846 C[8] = _mm_cvtsi64_si32(s1);
01847 s1 = _mm_srli_si64(s1, 32);
01848
01849 s3 = _mm_add_si64(x18, y18);
01850 s4 = _mm_add_si64(x20, y20);
01851 s1 = _mm_add_si64(s1, z1);
01852 s3 = _mm_add_si64(s3, s4);
01853 s1 = _mm_add_si64(s1, z4);
01854 s1 = _mm_add_si64(s1, s3);
01855 C[9] = _mm_cvtsi64_si32(s1);
01856 s1 = _mm_srli_si64(s1, 32);
01857
01858 s3 = _mm_add_si64(x22, y22);
01859 s4 = _mm_add_si64(x26, y26);
01860 s1 = _mm_add_si64(s1, z6);
01861 s3 = _mm_add_si64(s3, s4);
01862 s1 = _mm_add_si64(s1, z8);
01863 s1 = _mm_add_si64(s1, s3);
01864 C[10] = _mm_cvtsi64_si32(s1);
01865 s1 = _mm_srli_si64(s1, 32);
01866
01867 s3 = _mm_add_si64(x27, y27);
01868 s1 = _mm_add_si64(s1, z10);
01869 s1 = _mm_add_si64(s1, z12);
01870 s1 = _mm_add_si64(s1, s3);
01871 C[11] = _mm_cvtsi64_si32(s1);
01872 s1 = _mm_srli_si64(s1, 32);
01873
01874 s3 = _mm_add_si64(z14, z16);
01875 s1 = _mm_add_si64(s1, s3);
01876 C[12] = _mm_cvtsi64_si32(s1);
01877 s1 = _mm_srli_si64(s1, 32);
01878
01879 s3 = _mm_add_si64(z18, z20);
01880 s1 = _mm_add_si64(s1, s3);
01881 C[13] = _mm_cvtsi64_si32(s1);
01882 s1 = _mm_srli_si64(s1, 32);
01883
01884 s3 = _mm_add_si64(z22, z26);
01885 s1 = _mm_add_si64(s1, s3);
01886 C[14] = _mm_cvtsi64_si32(s1);
01887 s1 = _mm_srli_si64(s1, 32);
01888
01889 C[15] = z[27] + _mm_cvtsi64_si32(s1);
01890 _mm_empty();
01891 }
01892
01893 void P4Optimized::Multiply8Bottom(word *C, const word *A, const word *B)
01894 {
01895 __m128i temp[21];
01896 const word *w = (word *)temp;
01897 const __m64 *mw = (__m64 *)w;
01898 const word *x = (word *)temp+7*4;
01899 const __m64 *mx = (__m64 *)x;
01900 const word *y = (word *)temp+7*4*2;
01901 const __m64 *my = (__m64 *)y;
01902
01903 P4_Mul(temp, (__m128i *)A, (__m128i *)B);
01904
01905 P4_Mul(temp+7, (__m128i *)A+1, (__m128i *)B);
01906
01907 P4_Mul(temp+14, (__m128i *)A, (__m128i *)B+1);
01908
01909 C[0] = w[0];
01910
01911 __m64 s1, s2, s3, s4;
01912
01913 __m64 w1 = _mm_cvtsi32_si64(w[1]);
01914 __m64 w4 = mw[2];
01915 __m64 w6 = mw[3];
01916 __m64 w8 = mw[4];
01917 __m64 w10 = mw[5];
01918 __m64 w12 = mw[6];
01919 __m64 w14 = mw[7];
01920 __m64 w16 = mw[8];
01921 __m64 w18 = mw[9];
01922 __m64 w20 = mw[10];
01923 __m64 w22 = mw[11];
01924 __m64 w26 = _mm_cvtsi32_si64(w[26]);
01925
01926 __m64 x0 = _mm_cvtsi32_si64(x[0]);
01927 __m64 x1 = _mm_cvtsi32_si64(x[1]);
01928 __m64 x4 = mx[2];
01929 __m64 x6 = mx[3];
01930 __m64 x8 = mx[4];
01931
01932 __m64 y0 = _mm_cvtsi32_si64(y[0]);
01933 __m64 y1 = _mm_cvtsi32_si64(y[1]);
01934 __m64 y4 = my[2];
01935 __m64 y6 = my[3];
01936 __m64 y8 = my[4];
01937
01938 s1 = _mm_add_si64(w1, w4);
01939 C[1] = _mm_cvtsi64_si32(s1);
01940 s1 = _mm_srli_si64(s1, 32);
01941
01942 s2 = _mm_add_si64(w6, w8);
01943 s1 = _mm_add_si64(s1, s2);
01944 C[2] = _mm_cvtsi64_si32(s1);
01945 s1 = _mm_srli_si64(s1, 32);
01946
01947 s2 = _mm_add_si64(w10, w12);
01948 s1 = _mm_add_si64(s1, s2);
01949 C[3] = _mm_cvtsi64_si32(s1);
01950 s1 = _mm_srli_si64(s1, 32);
01951
01952 s3 = _mm_add_si64(x0, y0);
01953 s2 = _mm_add_si64(w14, w16);
01954 s1 = _mm_add_si64(s1, s3);
01955 s1 = _mm_add_si64(s1, s2);
01956 C[4] = _mm_cvtsi64_si32(s1);
01957 s1 = _mm_srli_si64(s1, 32);
01958
01959 s3 = _mm_add_si64(x1, y1);
01960 s4 = _mm_add_si64(x4, y4);
01961 s1 = _mm_add_si64(s1, w18);
01962 s3 = _mm_add_si64(s3, s4);
01963 s1 = _mm_add_si64(s1, w20);
01964 s1 = _mm_add_si64(s1, s3);
01965 C[5] = _mm_cvtsi64_si32(s1);
01966 s1 = _mm_srli_si64(s1, 32);
01967
01968 s3 = _mm_add_si64(x6, y6);
01969 s4 = _mm_add_si64(x8, y8);
01970 s1 = _mm_add_si64(s1, w22);
01971 s3 = _mm_add_si64(s3, s4);
01972 s1 = _mm_add_si64(s1, w26);
01973 s1 = _mm_add_si64(s1, s3);
01974 C[6] = _mm_cvtsi64_si32(s1);
01975 s1 = _mm_srli_si64(s1, 32);
01976
01977 C[7] = _mm_cvtsi64_si32(s1) + w[27] + x[10] + y[10] + x[12] + y[12];
01978 _mm_empty();
01979 }
01980
01981 #endif // #ifdef SSE2_INTRINSICS_AVAILABLE
01982
01983
01984
01985 #define A0 A
01986 #define A1 (A+N2)
01987 #define B0 B
01988 #define B1 (B+N2)
01989
01990 #define T0 T
01991 #define T1 (T+N2)
01992 #define T2 (T+N)
01993 #define T3 (T+N+N2)
01994
01995 #define R0 R
01996 #define R1 (R+N2)
01997 #define R2 (R+N)
01998 #define R3 (R+N+N2)
01999
02000
02001
02002
02003
02004
02005 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, size_t N)
02006 {
02007 assert(N>=2 && N%2==0);
02008
02009 if (LowLevel::MultiplyRecursionLimit() >= 8 && N==8)
02010 LowLevel::Multiply8(R, A, B);
02011 else if (LowLevel::MultiplyRecursionLimit() >= 4 && N==4)
02012 LowLevel::Multiply4(R, A, B);
02013 else if (N==2)
02014 LowLevel::Multiply2(R, A, B);
02015 else
02016 {
02017 const size_t N2 = N/2;
02018 int carry;
02019
02020 int aComp = Compare(A0, A1, N2);
02021 int bComp = Compare(B0, B1, N2);
02022
02023 switch (2*aComp + aComp + bComp)
02024 {
02025 case -4:
02026 LowLevel::Subtract(R0, A1, A0, N2);
02027 LowLevel::Subtract(R1, B0, B1, N2);
02028 RecursiveMultiply(T0, T2, R0, R1, N2);
02029 LowLevel::Subtract(T1, T1, R0, N2);
02030 carry = -1;
02031 break;
02032 case -2:
02033 LowLevel::Subtract(R0, A1, A0, N2);
02034 LowLevel::Subtract(R1, B0, B1, N2);
02035 RecursiveMultiply(T0, T2, R0, R1, N2);
02036 carry = 0;
02037 break;
02038 case 2:
02039 LowLevel::Subtract(R0, A0, A1, N2);
02040 LowLevel::Subtract(R1, B1, B0, N2);
02041 RecursiveMultiply(T0, T2, R0, R1, N2);
02042 carry = 0;
02043 break;
02044 case 4:
02045 LowLevel::Subtract(R0, A1, A0, N2);
02046 LowLevel::Subtract(R1, B0, B1, N2);
02047 RecursiveMultiply(T0, T2, R0, R1, N2);
02048 LowLevel::Subtract(T1, T1, R1, N2);
02049 carry = -1;
02050 break;
02051 default:
02052 SetWords(T0, 0, N);
02053 carry = 0;
02054 }
02055
02056 RecursiveMultiply(R0, T2, A0, B0, N2);
02057 RecursiveMultiply(R2, T2, A1, B1, N2);
02058
02059
02060
02061 carry += LowLevel::Add(T0, T0, R0, N);
02062 carry += LowLevel::Add(T0, T0, R2, N);
02063 carry += LowLevel::Add(R1, R1, T0, N);
02064
02065 assert (carry >= 0 && carry <= 2);
02066 Increment(R3, N2, carry);
02067 }
02068 }
02069
02070
02071
02072
02073
02074 void RecursiveSquare(word *R, word *T, const word *A, size_t N)
02075 {
02076 assert(N && N%2==0);
02077 if (LowLevel::SquareRecursionLimit() >= 8 && N==8)
02078 LowLevel::Square8(R, A);
02079 if (LowLevel::SquareRecursionLimit() >= 4 && N==4)
02080 LowLevel::Square4(R, A);
02081 else if (N==2)
02082 LowLevel::Square2(R, A);
02083 else
02084 {
02085 const size_t N2 = N/2;
02086
02087 RecursiveSquare(R0, T2, A0, N2);
02088 RecursiveSquare(R2, T2, A1, N2);
02089 RecursiveMultiply(T0, T2, A0, A1, N2);
02090
02091 word carry = LowLevel::Add(R1, R1, T0, N);
02092 carry += LowLevel::Add(R1, R1, T0, N);
02093 Increment(R3, N2, carry);
02094 }
02095 }
02096
02097
02098
02099
02100
02101
02102 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02103 {
02104 assert(N>=2 && N%2==0);
02105 if (LowLevel::MultiplyBottomRecursionLimit() >= 8 && N==8)
02106 LowLevel::Multiply8Bottom(R, A, B);
02107 else if (LowLevel::MultiplyBottomRecursionLimit() >= 4 && N==4)
02108 LowLevel::Multiply4Bottom(R, A, B);
02109 else if (N==2)
02110 LowLevel::Multiply2Bottom(R, A, B);
02111 else
02112 {
02113 const size_t N2 = N/2;
02114
02115 RecursiveMultiply(R, T, A0, B0, N2);
02116 RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
02117 LowLevel::Add(R1, R1, T0, N2);
02118 RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
02119 LowLevel::Add(R1, R1, T0, N2);
02120 }
02121 }
02122
02123
02124
02125
02126
02127
02128
02129 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02130 {
02131 assert(N>=2 && N%2==0);
02132
02133 if (N==4)
02134 {
02135 LowLevel::Multiply4(T, A, B);
02136 memcpy(R, T+4, 4*WORD_SIZE);
02137 }
02138 else if (N==2)
02139 {
02140 LowLevel::Multiply2(T, A, B);
02141 memcpy(R, T+2, 2*WORD_SIZE);
02142 }
02143 else
02144 {
02145 const size_t N2 = N/2;
02146 int carry;
02147
02148 int aComp = Compare(A0, A1, N2);
02149 int bComp = Compare(B0, B1, N2);
02150
02151 switch (2*aComp + aComp + bComp)
02152 {
02153 case -4:
02154 LowLevel::Subtract(R0, A1, A0, N2);
02155 LowLevel::Subtract(R1, B0, B1, N2);
02156 RecursiveMultiply(T0, T2, R0, R1, N2);
02157 LowLevel::Subtract(T1, T1, R0, N2);
02158 carry = -1;
02159 break;
02160 case -2:
02161 LowLevel::Subtract(R0, A1, A0, N2);
02162 LowLevel::Subtract(R1, B0, B1, N2);
02163 RecursiveMultiply(T0, T2, R0, R1, N2);
02164 carry = 0;
02165 break;
02166 case 2:
02167 LowLevel::Subtract(R0, A0, A1, N2);
02168 LowLevel::Subtract(R1, B1, B0, N2);
02169 RecursiveMultiply(T0, T2, R0, R1, N2);
02170 carry = 0;
02171 break;
02172 case 4:
02173 LowLevel::Subtract(R0, A1, A0, N2);
02174 LowLevel::Subtract(R1, B0, B1, N2);
02175 RecursiveMultiply(T0, T2, R0, R1, N2);
02176 LowLevel::Subtract(T1, T1, R1, N2);
02177 carry = -1;
02178 break;
02179 default:
02180 SetWords(T0, 0, N);
02181 carry = 0;
02182 }
02183
02184 RecursiveMultiply(T2, R0, A1, B1, N2);
02185
02186
02187
02188 word c2 = LowLevel::Subtract(R0, L+N2, L, N2);
02189 c2 += LowLevel::Subtract(R0, R0, T0, N2);
02190 word t = (Compare(R0, T2, N2) == -1);
02191
02192 carry += t;
02193 carry += Increment(R0, N2, c2+t);
02194 carry += LowLevel::Add(R0, R0, T1, N2);
02195 carry += LowLevel::Add(R0, R0, T3, N2);
02196 assert (carry >= 0 && carry <= 2);
02197
02198 CopyWords(R1, T3, N2);
02199 Increment(R1, N2, carry);
02200 }
02201 }
02202
02203 inline word Add(word *C, const word *A, const word *B, size_t N)
02204 {
02205 return LowLevel::Add(C, A, B, N);
02206 }
02207
02208 inline word Subtract(word *C, const word *A, const word *B, size_t N)
02209 {
02210 return LowLevel::Subtract(C, A, B, N);
02211 }
02212
02213 inline void Multiply(word *R, word *T, const word *A, const word *B, size_t N)
02214 {
02215 RecursiveMultiply(R, T, A, B, N);
02216 }
02217
02218 inline void Square(word *R, word *T, const word *A, size_t N)
02219 {
02220 RecursiveSquare(R, T, A, N);
02221 }
02222
02223 inline void MultiplyBottom(word *R, word *T, const word *A, const word *B, size_t N)
02224 {
02225 RecursiveMultiplyBottom(R, T, A, B, N);
02226 }
02227
02228 inline void MultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, size_t N)
02229 {
02230 RecursiveMultiplyTop(R, T, L, A, B, N);
02231 }
02232
02233 static word LinearMultiply(word *C, const word *A, word B, size_t N)
02234 {
02235 word carry=0;
02236 for(unsigned i=0; i<N; i++)
02237 {
02238 DWord p = DWord::MultiplyAndAdd(A[i], B, carry);
02239 C[i] = p.GetLowHalf();
02240 carry = p.GetHighHalf();
02241 }
02242 return carry;
02243 }
02244
02245
02246
02247
02248
02249
02250 void AsymmetricMultiply(word *R, word *T, const word *A, size_t NA, const word *B, size_t NB)
02251 {
02252 if (NA == NB)
02253 {
02254 if (A == B)
02255 Square(R, T, A, NA);
02256 else
02257 Multiply(R, T, A, B, NA);
02258
02259 return;
02260 }
02261
02262 if (NA > NB)
02263 {
02264 std::swap(A, B);
02265 std::swap(NA, NB);
02266 }
02267
02268 assert(NB % NA == 0);
02269 assert((NB/NA)%2 == 0);
02270
02271 if (NA==2 && !A[1])
02272 {
02273 switch (A[0])
02274 {
02275 case 0:
02276 SetWords(R, 0, NB+2);
02277 return;
02278 case 1:
02279 CopyWords(R, B, NB);
02280 R[NB] = R[NB+1] = 0;
02281 return;
02282 default:
02283 R[NB] = LinearMultiply(R, B, A[0], NB);
02284 R[NB+1] = 0;
02285 return;
02286 }
02287 }
02288
02289 Multiply(R, T, A, B, NA);
02290 CopyWords(T+2*NA, R+NA, NA);
02291
02292 size_t i;
02293
02294 for (i=2*NA; i<NB; i+=2*NA)
02295 Multiply(T+NA+i, T, A, B+i, NA);
02296 for (i=NA; i<NB; i+=2*NA)
02297 Multiply(R+i, T, A, B+i, NA);
02298
02299 if (Add(R+NA, R+NA, T+2*NA, NB-NA))
02300 Increment(R+NB, NA);
02301 }
02302
02303
02304
02305
02306
02307 void RecursiveInverseModPower2(word *R, word *T, const word *A, size_t N)
02308 {
02309 if (N==2)
02310 {
02311 T[0] = AtomicInverseModPower2(A[0]);
02312 T[1] = 0;
02313 LowLevel::Multiply2Bottom(T+2, T, A);
02314 TwosComplement(T+2, 2);
02315 Increment(T+2, 2, 2);
02316 LowLevel::Multiply2Bottom(R, T, T+2);
02317 }
02318 else
02319 {
02320 const size_t N2 = N/2;
02321 RecursiveInverseModPower2(R0, T0, A0, N2);
02322 T0[0] = 1;
02323 SetWords(T0+1, 0, N2-1);
02324 MultiplyTop(R1, T1, T0, R0, A0, N2);
02325 MultiplyBottom(T0, T1, R0, A1, N2);
02326 Add(T0, R1, T0, N2);
02327 TwosComplement(T0, N2);
02328 MultiplyBottom(R1, T1, R0, T0, N2);
02329 }
02330 }
02331
02332
02333
02334
02335
02336
02337
02338 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, size_t N)
02339 {
02340 MultiplyBottom(R, T, X, U, N);
02341 MultiplyTop(T, T+N, X, R, M, N);
02342 word borrow = Subtract(T, X+N, T, N);
02343
02344 word carry = Add(T+N, T, M, N);
02345 assert(carry || !borrow);
02346 CopyWords(R, T + (borrow ? N : 0), N);
02347 }
02348
02349
02350
02351
02352
02353
02354
02355
02356 void HalfMontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, const word *V, size_t N)
02357 {
02358 assert(N%2==0 && N>=4);
02359
02360 #define M0 M
02361 #define M1 (M+N2)
02362 #define V0 V
02363 #define V1 (V+N2)
02364
02365 #define X0 X
02366 #define X1 (X+N2)
02367 #define X2 (X+N)
02368 #define X3 (X+N+N2)
02369
02370 const size_t N2 = N/2;
02371 Multiply(T0, T2, V0, X3, N2);
02372 int c2 = Add(T0, T0, X0, N);
02373 MultiplyBottom(T3, T2, T0, U, N2);
02374 MultiplyTop(T2, R, T0, T3, M0, N2);
02375 c2 -= Subtract(T2, T1, T2, N2);
02376 Multiply(T0, R, T3, M1, N2);
02377 c2 -= Subtract(T0, T2, T0, N2);
02378 int c3 = -(int)Subtract(T1, X2, T1, N2);
02379 Multiply(R0, T2, V1, X3, N2);
02380 c3 += Add(R, R, T, N);
02381
02382 if (c2>0)
02383 c3 += Increment(R1, N2);
02384 else if (c2<0)
02385 c3 -= Decrement(R1, N2, -c2);
02386
02387 assert(c3>=-1 && c3<=1);
02388 if (c3>0)
02389 Subtract(R, R, M, N);
02390 else if (c3<0)
02391 Add(R, R, M, N);
02392
02393 #undef M0
02394 #undef M1
02395 #undef V0
02396 #undef V1
02397
02398 #undef X0
02399 #undef X1
02400 #undef X2
02401 #undef X3
02402 }
02403
02404 #undef A0
02405 #undef A1
02406 #undef B0
02407 #undef B1
02408
02409 #undef T0
02410 #undef T1
02411 #undef T2
02412 #undef T3
02413
02414 #undef R0
02415 #undef R1
02416 #undef R2
02417 #undef R3
02418
02419
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440
02441
02442
02443
02444
02445
02446
02447
02448
02449
02450
02451
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469
02470
02471
02472
02473
02474
02475
02476
02477
02478
02479
02480
02481
02482
02483 static inline void AtomicDivide(word *Q, const word *A, const word *B)
02484 {
02485 word T[4];
02486 DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
02487 Q[0] = q.GetLowHalf();
02488 Q[1] = q.GetHighHalf();
02489
02490 #ifndef NDEBUG
02491 if (B[0] || B[1])
02492 {
02493
02494 assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
02495 word P[4];
02496 Portable::Multiply2(P, Q, B);
02497 Add(P, P, T, 4);
02498 assert(memcmp(P, A, 4*WORD_SIZE)==0);
02499 }
02500 #endif
02501 }
02502
02503
02504 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, size_t N)
02505 {
02506 assert(N && N%2==0);
02507
02508 if (Q[1])
02509 {
02510 T[N] = T[N+1] = 0;
02511 unsigned i;
02512 for (i=0; i<N; i+=4)
02513 LowLevel::Multiply2(T+i, Q, B+i);
02514 for (i=2; i<N; i+=4)
02515 if (LowLevel::Multiply2Add(T+i, Q, B+i))
02516 T[i+5] += (++T[i+4]==0);
02517 }
02518 else
02519 {
02520 T[N] = LinearMultiply(T, B, Q[0], N);
02521 T[N+1] = 0;
02522 }
02523
02524 word borrow = Subtract(R, R, T, N+2);
02525 assert(!borrow && !R[N+1]);
02526
02527 while (R[N] || Compare(R, B, N) >= 0)
02528 {
02529 R[N] -= Subtract(R, R, B, N);
02530 Q[1] += (++Q[0]==0);
02531 assert(Q[0] || Q[1]);
02532 }
02533 }
02534
02535
02536
02537
02538
02539
02540
02541 void Divide(word *R, word *Q, word *T, const word *A, size_t NA, const word *B, size_t NB)
02542 {
02543 assert(NA && NB && NA%2==0 && NB%2==0);
02544 assert(B[NB-1] || B[NB-2]);
02545 assert(NB <= NA);
02546
02547
02548 word *const TA=T;
02549 word *const TB=T+NA+2;
02550 word *const TP=T+NA+2+NB;
02551
02552
02553 unsigned shiftWords = (B[NB-1]==0);
02554 TB[0] = TB[NB-1] = 0;
02555 CopyWords(TB+shiftWords, B, NB-shiftWords);
02556 unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
02557 assert(shiftBits < WORD_BITS);
02558 ShiftWordsLeftByBits(TB, NB, shiftBits);
02559
02560
02561 TA[0] = TA[NA] = TA[NA+1] = 0;
02562 CopyWords(TA+shiftWords, A, NA);
02563 ShiftWordsLeftByBits(TA, NA+2, shiftBits);
02564
02565 if (TA[NA+1]==0 && TA[NA] <= 1)
02566 {
02567 Q[NA-NB+1] = Q[NA-NB] = 0;
02568 while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
02569 {
02570 TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
02571 ++Q[NA-NB];
02572 }
02573 }
02574 else
02575 {
02576 NA+=2;
02577 assert(Compare(TA+NA-NB, TB, NB) < 0);
02578 }
02579
02580 word BT[2];
02581 BT[0] = TB[NB-2] + 1;
02582 BT[1] = TB[NB-1] + (BT[0]==0);
02583
02584
02585 for (size_t i=NA-2; i>=NB; i-=2)
02586 {
02587 AtomicDivide(Q+i-NB, TA+i-2, BT);
02588 CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
02589 }
02590
02591
02592 CopyWords(R, TA+shiftWords, NB);
02593 ShiftWordsRightByBits(R, NB, shiftBits);
02594 }
02595
02596 static inline size_t EvenWordCount(const word *X, size_t N)
02597 {
02598 while (N && X[N-2]==0 && X[N-1]==0)
02599 N-=2;
02600 return N;
02601 }
02602
02603
02604
02605
02606
02607
02608
02609 unsigned int AlmostInverse(word *R, word *T, const word *A, size_t NA, const word *M, size_t N)
02610 {
02611 assert(NA<=N && N && N%2==0);
02612
02613 word *b = T;
02614 word *c = T+N;
02615 word *f = T+2*N;
02616 word *g = T+3*N;
02617 size_t bcLen=2, fgLen=EvenWordCount(M, N);
02618 unsigned int k=0, s=0;
02619
02620 SetWords(T, 0, 3*N);
02621 b[0]=1;
02622 CopyWords(f, A, NA);
02623 CopyWords(g, M, N);
02624
02625 while (1)
02626 {
02627 word t=f[0];
02628 while (!t)
02629 {
02630 if (EvenWordCount(f, fgLen)==0)
02631 {
02632 SetWords(R, 0, N);
02633 return 0;
02634 }
02635
02636 ShiftWordsRightByWords(f, fgLen, 1);
02637 if (c[bcLen-1]) bcLen+=2;
02638 assert(bcLen <= N);
02639 ShiftWordsLeftByWords(c, bcLen, 1);
02640 k+=WORD_BITS;
02641 t=f[0];
02642 }
02643
02644 unsigned int i=0;
02645 while (t%2 == 0)
02646 {
02647 t>>=1;
02648 i++;
02649 }
02650 k+=i;
02651
02652 if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
02653 {
02654 if (s%2==0)
02655 CopyWords(R, b, N);
02656 else
02657 Subtract(R, M, b, N);
02658 return k;
02659 }
02660
02661 ShiftWordsRightByBits(f, fgLen, i);
02662 t=ShiftWordsLeftByBits(c, bcLen, i);
02663 if (t)
02664 {
02665 c[bcLen] = t;
02666 bcLen+=2;
02667 assert(bcLen <= N);
02668 }
02669
02670 if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
02671 fgLen-=2;
02672
02673 if (Compare(f, g, fgLen)==-1)
02674 {
02675 std::swap(f, g);
02676 std::swap(b, c);
02677 s++;
02678 }
02679
02680 Subtract(f, f, g, fgLen);
02681
02682 if (Add(b, b, c, bcLen))
02683 {
02684 b[bcLen] = 1;
02685 bcLen+=2;
02686 assert(bcLen <= N);
02687 }
02688 }
02689 }
02690
02691
02692
02693
02694
02695 void DivideByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02696 {
02697 CopyWords(R, A, N);
02698
02699 while (k--)
02700 {
02701 if (R[0]%2==0)
02702 ShiftWordsRightByBits(R, N, 1);
02703 else
02704 {
02705 word carry = Add(R, R, M, N);
02706 ShiftWordsRightByBits(R, N, 1);
02707 R[N-1] += carry<<(WORD_BITS-1);
02708 }
02709 }
02710 }
02711
02712
02713
02714
02715
02716 void MultiplyByPower2Mod(word *R, const word *A, size_t k, const word *M, size_t N)
02717 {
02718 CopyWords(R, A, N);
02719
02720 while (k--)
02721 if (ShiftWordsLeftByBits(R, N, 1) || Compare(R, M, N)>=0)
02722 Subtract(R, R, M, N);
02723 }
02724
02725
02726
02727 static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
02728
02729 static inline size_t RoundupSize(size_t n)
02730 {
02731 if (n<=8)
02732 return RoundupSizeTable[n];
02733 else if (n<=16)
02734 return 16;
02735 else if (n<=32)
02736 return 32;
02737 else if (n<=64)
02738 return 64;
02739 else return 1U << BitPrecision(n-1);
02740 }
02741
02742 Integer::Integer()
02743 : reg(2), sign(POSITIVE)
02744 {
02745 reg[0] = reg[1] = 0;
02746 }
02747
02748 Integer::Integer(const Integer& t)
02749 : reg(RoundupSize(t.WordCount())), sign(t.sign)
02750 {
02751 CopyWords(reg, t.reg, reg.size());
02752 }
02753
02754 Integer::Integer(Sign s, lword value)
02755 : reg(2), sign(s)
02756 {
02757 reg[0] = word(value);
02758 reg[1] = word(SafeRightShift<WORD_BITS>(value));
02759 }
02760
02761 Integer::Integer(signed long value)
02762 : reg(2)
02763 {
02764 if (value >= 0)
02765 sign = POSITIVE;
02766 else
02767 {
02768 sign = NEGATIVE;
02769 value = -value;
02770 }
02771 reg[0] = word(value);
02772 reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
02773 }
02774
02775 Integer::Integer(Sign s, word high, word low)
02776 : reg(2), sign(s)
02777 {
02778 reg[0] = low;
02779 reg[1] = high;
02780 }
02781
02782 bool Integer::IsConvertableToLong() const
02783 {
02784 if (ByteCount() > sizeof(long))
02785 return false;
02786
02787 unsigned long value = reg[0];
02788 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02789
02790 if (sign==POSITIVE)
02791 return (signed long)value >= 0;
02792 else
02793 return -(signed long)value < 0;
02794 }
02795
02796 signed long Integer::ConvertToLong() const
02797 {
02798 assert(IsConvertableToLong());
02799
02800 unsigned long value = reg[0];
02801 value += SafeLeftShift<WORD_BITS, unsigned long>(reg[1]);
02802 return sign==POSITIVE ? value : -(signed long)value;
02803 }
02804
02805 Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s)
02806 {
02807 Decode(encodedInteger, byteCount, s);
02808 }
02809
02810 Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s)
02811 {
02812 Decode(encodedInteger, byteCount, s);
02813 }
02814
02815 Integer::Integer(BufferedTransformation &bt)
02816 {
02817 BERDecode(bt);
02818 }
02819
02820 Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
02821 {
02822 Randomize(rng, bitcount);
02823 }
02824
02825 Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
02826 {
02827 if (!Randomize(rng, min, max, rnType, equiv, mod))
02828 throw Integer::RandomNumberNotFound();
02829 }
02830
02831 Integer Integer::Power2(size_t e)
02832 {
02833 Integer r((word)0, BitsToWords(e+1));
02834 r.SetBit(e);
02835 return r;
02836 }
02837
02838 template <long i>
02839 struct NewInteger
02840 {
02841 Integer * operator()() const
02842 {
02843 return new Integer(i);
02844 }
02845 };
02846
02847 const Integer &Integer::Zero()
02848 {
02849 return Singleton<Integer>().Ref();
02850 }
02851
02852 const Integer &Integer::One()
02853 {
02854 return Singleton<Integer, NewInteger<1> >().Ref();
02855 }
02856
02857 const Integer &Integer::Two()
02858 {
02859 return Singleton<Integer, NewInteger<2> >().Ref();
02860 }
02861
02862 bool Integer::operator!() const
02863 {
02864 return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
02865 }
02866
02867 Integer& Integer::operator=(const Integer& t)
02868 {
02869 if (this != &t)
02870 {
02871 reg.New(RoundupSize(t.WordCount()));
02872 CopyWords(reg, t.reg, reg.size());
02873 sign = t.sign;
02874 }
02875 return *this;
02876 }
02877
02878 bool Integer::GetBit(size_t n) const
02879 {
02880 if (n/WORD_BITS >= reg.size())
02881 return 0;
02882 else
02883 return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
02884 }
02885
02886 void Integer::SetBit(size_t n, bool value)
02887 {
02888 if (value)
02889 {
02890 reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
02891 reg[n/WORD_BITS] |= (word(1) << (n%WORD_BITS));
02892 }
02893 else
02894 {
02895 if (n/WORD_BITS < reg.size())
02896 reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
02897 }
02898 }
02899
02900 byte Integer::GetByte(size_t n) const
02901 {
02902 if (n/WORD_SIZE >= reg.size())
02903 return 0;
02904 else
02905 return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
02906 }
02907
02908 void Integer::SetByte(size_t n, byte value)
02909 {
02910 reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
02911 reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
02912 reg[n/WORD_SIZE] |= (word(value) << 8*(n%WORD_SIZE));
02913 }
02914
02915 lword Integer::GetBits(size_t i, size_t n) const
02916 {
02917 lword v = 0;
02918 assert(n <= sizeof(v)*8);
02919 for (unsigned int j=0; j<n; j++)
02920 v |= lword(GetBit(i+j)) << j;
02921 return v;
02922 }
02923
02924 Integer Integer::operator-() const
02925 {
02926 Integer result(*this);
02927 result.Negate();
02928 return result;
02929 }
02930
02931 Integer Integer::AbsoluteValue() const
02932 {
02933 Integer result(*this);
02934 result.sign = POSITIVE;
02935 return result;
02936 }
02937
02938 void Integer::swap(Integer &a)
02939 {
02940 reg.swap(a.reg);
02941 std::swap(sign, a.sign);
02942 }
02943
02944 Integer::Integer(word value, size_t length)
02945 : reg(RoundupSize(length)), sign(POSITIVE)
02946 {
02947 reg[0] = value;
02948 SetWords(reg+1, 0, reg.size()-1);
02949 }
02950
02951 template <class T>
02952 static Integer StringToInteger(const T *str)
02953 {
02954 word radix;
02955
02956
02957 unsigned int length;
02958 for (length = 0; str[length] != 0; length++) {}
02959
02960 Integer v;
02961
02962 if (length == 0)
02963 return v;
02964
02965 switch (str[length-1])
02966 {
02967 case 'h':
02968 case 'H':
02969 radix=16;
02970 break;
02971 case 'o':
02972 case 'O':
02973 radix=8;
02974 break;
02975 case 'b':
02976 case 'B':
02977 radix=2;
02978 break;
02979 default:
02980 radix=10;
02981 }
02982
02983 if (length > 2 && str[0] == '0' && str[1] == 'x')
02984 radix = 16;
02985
02986 for (unsigned i=0; i<length; i++)
02987 {
02988 word digit;
02989
02990 if (str[i] >= '0' && str[i] <= '9')
02991 digit = str[i] - '0';
02992 else if (str[i] >= 'A' && str[i] <= 'F')
02993 digit = str[i] - 'A' + 10;
02994 else if (str[i] >= 'a' && str[i] <= 'f')
02995 digit = str[i] - 'a' + 10;
02996 else
02997 digit = radix;
02998
02999 if (digit < radix)
03000 {
03001 v *= radix;
03002 v += digit;
03003 }
03004 }
03005
03006 if (str[0] == '-')
03007 v.Negate();
03008
03009 return v;
03010 }
03011
03012 Integer::Integer(const char *str)
03013 : reg(2), sign(POSITIVE)
03014 {
03015 *this = StringToInteger(str);
03016 }
03017
03018 Integer::Integer(const wchar_t *str)
03019 : reg(2), sign(POSITIVE)
03020 {
03021 *this = StringToInteger(str);
03022 }
03023
03024 unsigned int Integer::WordCount() const
03025 {
03026 return (unsigned int)CountWords(reg, reg.size());
03027 }
03028
03029 unsigned int Integer::ByteCount() const
03030 {
03031 unsigned wordCount = WordCount();
03032 if (wordCount)
03033 return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
03034 else
03035 return 0;
03036 }
03037
03038 unsigned int Integer::BitCount() const
03039 {
03040 unsigned wordCount = WordCount();
03041 if (wordCount)
03042 return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
03043 else
03044 return 0;
03045 }
03046
03047 void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
03048 {
03049 StringStore store(input, inputLen);
03050 Decode(store, inputLen, s);
03051 }
03052
03053 void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
03054 {
03055 assert(bt.MaxRetrievable() >= inputLen);
03056
03057 byte b;
03058 bt.Peek(b);
03059 sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
03060
03061 while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
03062 {
03063 bt.Skip(1);
03064 inputLen--;
03065 bt.Peek(b);
03066 }
03067
03068 reg.CleanNew(RoundupSize(BytesToWords(inputLen)));
03069
03070 for (size_t i=inputLen; i > 0; i--)
03071 {
03072 bt.Get(b);
03073 reg[(i-1)/WORD_SIZE] |= word(b) << ((i-1)%WORD_SIZE)*8;
03074 }
03075
03076 if (sign == NEGATIVE)
03077 {
03078 for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
03079 reg[i/WORD_SIZE] |= word(0xff) << (i%WORD_SIZE)*8;
03080 TwosComplement(reg, reg.size());
03081 }
03082 }
03083
03084 size_t Integer::MinEncodedSize(Signedness signedness) const
03085 {
03086 unsigned int outputLen = STDMAX(1U, ByteCount());
03087 if (signedness == UNSIGNED)
03088 return outputLen;
03089 if (NotNegative() && (GetByte(outputLen-1) & 0x80))
03090 outputLen++;
03091 if (IsNegative() && *this < -Power2(outputLen*8-1))
03092 outputLen++;
03093 return outputLen;
03094 }
03095
03096 void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
03097 {
03098 ArraySink sink(output, outputLen);
03099 Encode(sink, outputLen, signedness);
03100 }
03101
03102 void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
03103 {
03104 if (signedness == UNSIGNED || NotNegative())
03105 {
03106 for (size_t i=outputLen; i > 0; i--)
03107 bt.Put(GetByte(i-1));
03108 }
03109 else
03110 {
03111
03112 Integer temp = Integer::Power2(8*UnsignedMin(ByteCount(), outputLen)) + *this;
03113 temp.Encode(bt, outputLen, UNSIGNED);
03114 }
03115 }
03116
03117 void Integer::DEREncode(BufferedTransformation &bt) const
03118 {
03119 DERGeneralEncoder enc(bt, INTEGER);
03120 Encode(enc, MinEncodedSize(SIGNED), SIGNED);
03121 enc.MessageEnd();
03122 }
03123
03124 void Integer::BERDecode(const byte *input, size_t len)
03125 {
03126 StringStore store(input, len);
03127 BERDecode(store);
03128 }
03129
03130 void Integer::BERDecode(BufferedTransformation &bt)
03131 {
03132 BERGeneralDecoder dec(bt, INTEGER);
03133 if (!dec.IsDefiniteLength() || dec.MaxRetrievable() < dec.RemainingLength())
03134 BERDecodeError();
03135 Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
03136 dec.MessageEnd();
03137 }
03138
03139 void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
03140 {
03141 DERGeneralEncoder enc(bt, OCTET_STRING);
03142 Encode(enc, length);
03143 enc.MessageEnd();
03144 }
03145
03146 void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
03147 {
03148 BERGeneralDecoder dec(bt, OCTET_STRING);
03149 if (!dec.IsDefiniteLength() || dec.RemainingLength() != length)
03150 BERDecodeError();
03151 Decode(dec, length);
03152 dec.MessageEnd();
03153 }
03154
03155 size_t Integer::OpenPGPEncode(byte *output, size_t len) const
03156 {
03157 ArraySink sink(output, len);
03158 return OpenPGPEncode(sink);
03159 }
03160
03161 size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
03162 {
03163 word16 bitCount = BitCount();
03164 bt.PutWord16(bitCount);
03165 size_t byteCount = BitsToBytes(bitCount);
03166 Encode(bt, byteCount);
03167 return 2 + byteCount;
03168 }
03169
03170 void Integer::OpenPGPDecode(const byte *input, size_t len)
03171 {
03172 StringStore store(input, len);
03173 OpenPGPDecode(store);
03174 }
03175
03176 void Integer::OpenPGPDecode(BufferedTransformation &bt)
03177 {
03178 word16 bitCount;
03179 if (bt.GetWord16(bitCount) != 2 || bt.MaxRetrievable() < BitsToBytes(bitCount))
03180 throw OpenPGPDecodeErr();
03181 Decode(bt, BitsToBytes(bitCount));
03182 }
03183
03184 void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
03185 {
03186 const size_t nbytes = nbits/8 + 1;
03187 SecByteBlock buf(nbytes);
03188 rng.GenerateBlock(buf, nbytes);
03189 if (nbytes)
03190 buf[0] = (byte)Crop(buf[0], nbits % 8);
03191 Decode(buf, nbytes, UNSIGNED);
03192 }
03193
03194 void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
03195 {
03196 if (min > max)
03197 throw InvalidArgument("Integer: Min must be no greater than Max");
03198
03199 Integer range = max - min;
03200 const unsigned int nbits = range.BitCount();
03201
03202 do
03203 {
03204 Randomize(rng, nbits);
03205 }
03206 while (*this > range);
03207
03208 *this += min;
03209 }
03210
03211 bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
03212 {
03213 return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
03214 }
03215
03216 class KDF2_RNG : public RandomNumberGenerator
03217 {
03218 public:
03219 KDF2_RNG(const byte *seed, size_t seedSize)
03220 : m_counter(0), m_counterAndSeed(seedSize + 4)
03221 {
03222 memcpy(m_counterAndSeed + 4, seed, seedSize);
03223 }
03224
03225 byte GenerateByte()
03226 {
03227 byte b;
03228 GenerateBlock(&b, 1);
03229 return b;
03230 }
03231
03232 void GenerateBlock(byte *output, unsigned int size)
03233 {
03234 UnalignedPutWord(BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
03235 ++m_counter;
03236 P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
03237 }
03238
03239 private:
03240 word32 m_counter;
03241 SecByteBlock m_counterAndSeed;
03242 };
03243
03244 bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs ¶ms)
03245 {
03246 Integer min = params.GetValueWithDefault("Min", Integer::Zero());
03247 Integer max;
03248 if (!params.GetValue("Max", max))
03249 {
03250 int bitLength;
03251 if (params.GetIntValue("BitLength", bitLength))
03252 max = Integer::Power2(bitLength);
03253 else
03254 throw InvalidArgument("Integer: missing Max argument");
03255 }
03256 if (min > max)
03257 throw InvalidArgument("Integer: Min must be no greater than Max");
03258
03259 Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
03260 Integer mod = params.GetValueWithDefault("Mod", Integer::One());
03261
03262 if (equiv.IsNegative() || equiv >= mod)
03263 throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
03264
03265 Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
03266
03267 member_ptr<KDF2_RNG> kdf2Rng;
03268 ConstByteArrayParameter seed;
03269 if (params.GetValue("Seed", seed))
03270 {
03271 ByteQueue bq;
03272 DERSequenceEncoder seq(bq);
03273 min.DEREncode(seq);
03274 max.DEREncode(seq);
03275 equiv.DEREncode(seq);
03276 mod.DEREncode(seq);
03277 DEREncodeUnsigned(seq, rnType);
03278 DEREncodeOctetString(seq, seed.begin(), seed.size());
03279 seq.MessageEnd();
03280
03281 SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
03282 bq.Get(finalSeed, finalSeed.size());
03283 kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
03284 }
03285 RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
03286
03287 switch (rnType)
03288 {
03289 case ANY:
03290 if (mod == One())
03291 Randomize(rng, min, max);
03292 else
03293 {
03294 Integer min1 = min + (equiv-min)%mod;
03295 if (max < min1)
03296 return false;
03297 Randomize(rng, Zero(), (max - min1) / mod);
03298 *this *= mod;
03299 *this += min1;
03300 }
03301 return true;
03302
03303 case PRIME:
03304 {
03305 const PrimeSelector *pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector *)NULL);
03306
03307 int i;
03308 i = 0;
03309 while (1)
03310 {
03311 if (++i==16)
03312 {
03313
03314 Integer first = min;
03315 if (FirstPrime(first, max, equiv, mod, pSelector))
03316 {
03317
03318 *this = first;
03319 if (!FirstPrime(first, max, equiv, mod, pSelector))
03320 return true;
03321 }
03322 else
03323 return false;
03324 }
03325
03326 Randomize(rng, min, max);
03327 if (FirstPrime(*this, STDMIN(*this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
03328 return true;
03329 }
03330 }
03331
03332 default:
03333 throw InvalidArgument("Integer: invalid RandomNumberType argument");
03334 }
03335 }
03336
03337 std::istream& operator>>(std::istream& in, Integer &a)
03338 {
03339 char c;
03340 unsigned int length = 0;
03341 SecBlock<char> str(length + 16);
03342
03343 std::ws(in);
03344
03345 do
03346 {
03347 in.read(&c, 1);
03348 str[length++] = c;
03349 if (length >= str.size())
03350 str.Grow(length + 16);
03351 }
03352 while (in && (c=='-' || c=='x' || (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F') || c=='h' || c=='H' || c=='o' || c=='O' || c==',' || c=='.'));
03353
03354 if (in.gcount())
03355 in.putback(c);
03356 str[length-1] = '\0';
03357 a = Integer(str);
03358
03359 return in;
03360 }
03361
03362 std::ostream& operator<<(std::ostream& out, const Integer &a)
03363 {
03364
03365 long f = out.flags() & std::ios::basefield;
03366 int base, block;
03367 char suffix;
03368 switch(f)
03369 {
03370 case std::ios::oct :
03371 base = 8;
03372 block = 8;
03373 suffix = 'o';
03374 break;
03375 case std::ios::hex :
03376 base = 16;
03377 block = 4;
03378 suffix = 'h';
03379 break;
03380 default :
03381 base = 10;
03382 block = 3;
03383 suffix = '.';
03384 }
03385
03386 SecBlock<char> s(a.BitCount() / (BitPrecision(base)-1) + 1);
03387 Integer temp1=a, temp2;
03388 unsigned i=0;
03389 const char vec[]="0123456789ABCDEF";
03390
03391 if (a.IsNegative())
03392 {
03393 out << '-';
03394 temp1.Negate();
03395 }
03396
03397 if (!a)
03398 out << '0';
03399
03400 while (!!temp1)
03401 {
03402 word digit;
03403 Integer::Divide(digit, temp2, temp1, base);
03404 s[i++]=vec[digit];
03405 temp1=temp2;
03406 }
03407
03408 while (i--)
03409 {
03410 out << s[i];
03411
03412
03413 }
03414 return out << suffix;
03415 }
03416
03417 Integer& Integer::operator++()
03418 {
03419 if (NotNegative())
03420 {
03421 if (Increment(reg, reg.size()))
03422 {
03423 reg.CleanGrow(2*reg.size());
03424 reg[reg.size()/2]=1;
03425 }
03426 }
03427 else
03428 {
03429 word borrow = Decrement(reg, reg.size());
03430 assert(!borrow);
03431 if (WordCount()==0)
03432 *this = Zero();
03433 }
03434 return *this;
03435 }
03436
03437 Integer& Integer::operator--()
03438 {
03439 if (IsNegative())
03440 {
03441 if (Increment(reg, reg.size()))
03442 {
03443 reg.CleanGrow(2*reg.size());
03444 reg[reg.size()/2]=1;
03445 }
03446 }
03447 else
03448 {
03449 if (Decrement(reg, reg.size()))
03450 *this = -One();
03451 }
03452 return *this;
03453 }
03454
03455 void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
03456 {
03457 word carry;
03458 if (a.reg.size() == b.reg.size())
03459 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03460 else if (a.reg.size() > b.reg.size())
03461 {
03462 carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
03463 CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
03464 carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
03465 }
03466 else
03467 {
03468 carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
03469 CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
03470 carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
03471 }
03472
03473 if (carry)
03474 {
03475 sum.reg.CleanGrow(2*sum.reg.size());
03476 sum.reg[sum.reg.size()/2] = 1;
03477 }
03478 sum.sign = Integer::POSITIVE;
03479 }
03480
03481 void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
03482 {
03483 unsigned aSize = a.WordCount();
03484 aSize += aSize%2;
03485 unsigned bSize = b.WordCount();
03486 bSize += bSize%2;
03487
03488 if (aSize == bSize)
03489 {
03490 if (Compare(a.reg, b.reg, aSize) >= 0)
03491 {
03492 Subtract(diff.reg, a.reg, b.reg, aSize);
03493 diff.sign = Integer::POSITIVE;
03494 }
03495 else
03496 {
03497 Subtract(diff.reg, b.reg, a.reg, aSize);
03498 diff.sign = Integer::NEGATIVE;
03499 }
03500 }
03501 else if (aSize > bSize)
03502 {
03503 word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
03504 CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
03505 borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
03506 assert(!borrow);
03507 diff.sign = Integer::POSITIVE;
03508 }
03509 else
03510 {
03511 word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
03512 CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
03513 borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
03514 assert(!borrow);
03515 diff.sign = Integer::NEGATIVE;
03516 }
03517 }
03518
03519
03520 template <class T> inline const T& STDMAX2(const T& a, const T& b)
03521 {
03522 return a < b ? b : a;
03523 }
03524
03525 Integer Integer::Plus(const Integer& b) const
03526 {
03527 Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
03528 if (NotNegative())
03529 {
03530 if (b.NotNegative())
03531 PositiveAdd(sum, *this, b);
03532 else
03533 PositiveSubtract(sum, *this, b);
03534 }
03535 else
03536 {
03537 if (b.NotNegative())
03538 PositiveSubtract(sum, b, *this);
03539 else
03540 {
03541 PositiveAdd(sum, *this, b);
03542 sum.sign = Integer::NEGATIVE;
03543 }
03544 }
03545 return sum;
03546 }
03547
03548 Integer& Integer::operator+=(const Integer& t)
03549 {
03550 reg.CleanGrow(t.reg.size());
03551 if (NotNegative())
03552 {
03553 if (t.NotNegative())
03554 PositiveAdd(*this, *this, t);
03555 else
03556 PositiveSubtract(*this, *this, t);
03557 }
03558 else
03559 {
03560 if (t.NotNegative())
03561 PositiveSubtract(*this, t, *this);
03562 else
03563 {
03564 PositiveAdd(*this, *this, t);
03565 sign = Integer::NEGATIVE;
03566 }
03567 }
03568 return *this;
03569 }
03570
03571 Integer Integer::Minus(const Integer& b) const
03572 {
03573 Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
03574 if (NotNegative())
03575 {
03576 if (b.NotNegative())
03577 PositiveSubtract(diff, *this, b);
03578 else
03579 PositiveAdd(diff, *this, b);
03580 }
03581 else
03582 {
03583 if (b.NotNegative())
03584 {
03585 PositiveAdd(diff, *this, b);
03586 diff.sign = Integer::NEGATIVE;
03587 }
03588 else
03589 PositiveSubtract(diff, b, *this);
03590 }
03591 return diff;
03592 }
03593
03594 Integer& Integer::operator-=(const Integer& t)
03595 {
03596 reg.CleanGrow(t.reg.size());
03597 if (NotNegative())
03598 {
03599 if (t.NotNegative())
03600 PositiveSubtract(*this, *this, t);
03601 else
03602 PositiveAdd(*this, *this, t);
03603 }
03604 else
03605 {
03606 if (t.NotNegative())
03607 {
03608 PositiveAdd(*this, *this, t);
03609 sign = Integer::NEGATIVE;
03610 }
03611 else
03612 PositiveSubtract(*this, t, *this);
03613 }
03614 return *this;
03615 }
03616
03617 Integer& Integer::operator<<=(size_t n)
03618 {
03619 const size_t wordCount = WordCount();
03620 const size_t shiftWords = n / WORD_BITS;
03621 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03622
03623 reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
03624 ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
03625 ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
03626 return *this;
03627 }
03628
03629 Integer& Integer::operator>>=(size_t n)
03630 {
03631 const size_t wordCount = WordCount();
03632 const size_t shiftWords = n / WORD_BITS;
03633 const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
03634
03635 ShiftWordsRightByWords(reg, wordCount, shiftWords);
03636 if (wordCount > shiftWords)
03637 ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
03638 if (IsNegative() && WordCount()==0)
03639 *this = Zero();
03640 return *this;
03641 }
03642
03643 void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
03644 {
03645 size_t aSize = RoundupSize(a.WordCount());
03646 size_t bSize = RoundupSize(b.WordCount());
03647
03648 product.reg.CleanNew(RoundupSize(aSize+bSize));
03649 product.sign = Integer::POSITIVE;
03650
03651 SecAlignedWordBlock workspace(aSize + bSize);
03652 AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
03653 }
03654
03655 void Multiply(Integer &product, const Integer &a, const Integer &b)
03656 {
03657 PositiveMultiply(product, a, b);
03658
03659 if (a.NotNegative() != b.NotNegative())
03660 product.Negate();
03661 }
03662
03663 Integer Integer::Times(const Integer &b) const
03664 {
03665 Integer product;
03666 Multiply(product, *this, b);
03667 return product;
03668 }
03669
03670
03671
03672
03673
03674
03675
03676
03677
03678
03679
03680
03681
03682
03683
03684
03685
03686
03687
03688
03689
03690
03691
03692 void PositiveDivide(Integer &remainder, Integer "ient,
03693 const Integer &a, const Integer &b)
03694 {
03695 unsigned aSize = a.WordCount();
03696 unsigned bSize = b.WordCount();
03697
03698 if (!bSize)
03699 throw Integer::DivideByZero();
03700
03701 if (a.PositiveCompare(b) == -1)
03702 {
03703 remainder = a;
03704 remainder.sign = Integer::POSITIVE;
03705 quotient = Integer::Zero();
03706 return;
03707 }
03708
03709 aSize += aSize%2;
03710 bSize += bSize%2;
03711
03712 remainder.reg.CleanNew(RoundupSize(bSize));
03713 remainder.sign = Integer::POSITIVE;
03714 quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
03715 quotient.sign = Integer::POSITIVE;
03716
03717 SecAlignedWordBlock T(aSize+2*bSize+4);
03718 Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
03719 }
03720
03721 void Integer::Divide(Integer &remainder, Integer "ient, const Integer ÷nd, const Integer &divisor)
03722 {
03723 PositiveDivide(remainder, quotient, dividend, divisor);
03724
03725 if (dividend.IsNegative())
03726 {
03727 quotient.Negate();
03728 if (remainder.NotZero())
03729 {
03730 --quotient;
03731 remainder = divisor.AbsoluteValue() - remainder;
03732 }
03733 }
03734
03735 if (divisor.IsNegative())
03736 quotient.Negate();
03737 }
03738
03739 void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
03740 {
03741 q = a;
03742 q >>= n;
03743
03744 const size_t wordCount = BitsToWords(n);
03745 if (wordCount <= a.WordCount())
03746 {
03747 r.reg.resize(RoundupSize(wordCount));
03748 CopyWords(r.reg, a.reg, wordCount);
03749 SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
03750 if (n % WORD_BITS != 0)
03751 r.reg[wordCount-1] %= (1 << (n % WORD_BITS));
03752 }
03753 else
03754 {
03755 r.reg.resize(RoundupSize(a.WordCount()));
03756 CopyWords(r.reg, a.reg, r.reg.size());
03757 }
03758 r.sign = POSITIVE;
03759
03760 if (a.IsNegative() && r.NotZero())
03761 {
03762 --q;
03763 r = Power2(n) - r;
03764 }
03765 }
03766
03767 Integer Integer::DividedBy(const Integer &b) const
03768 {
03769 Integer remainder, quotient;
03770 Integer::Divide(remainder, quotient, *this, b);
03771 return quotient;
03772 }
03773
03774 Integer Integer::Modulo(const Integer &b) const
03775 {
03776 Integer remainder, quotient;
03777 Integer::Divide(remainder, quotient, *this, b);
03778 return remainder;
03779 }
03780
03781 void Integer::Divide(word &remainder, Integer "ient, const Integer ÷nd, word divisor)
03782 {
03783 if (!divisor)
03784 throw Integer::DivideByZero();
03785
03786 assert(divisor);
03787
03788 if ((divisor & (divisor-1)) == 0)
03789 {
03790 quotient = dividend >> (BitPrecision(divisor)-1);
03791 remainder = dividend.reg[0] & (divisor-1);
03792 return;
03793 }
03794
03795 unsigned int i = dividend.WordCount();
03796 quotient.reg.CleanNew(RoundupSize(i));
03797 remainder = 0;
03798 while (i--)
03799 {
03800 quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
03801 remainder = DWord(dividend.reg[i], remainder) % divisor;
03802 }
03803
03804 if (dividend.NotNegative())
03805 quotient.sign = POSITIVE;
03806 else
03807 {
03808 quotient.sign = NEGATIVE;
03809 if (remainder)
03810 {
03811 --quotient;
03812 remainder = divisor - remainder;
03813 }
03814 }
03815 }
03816
03817 Integer Integer::DividedBy(word b) const
03818 {
03819 word remainder;
03820 Integer quotient;
03821 Integer::Divide(remainder, quotient, *this, b);
03822 return quotient;
03823 }
03824
03825 word Integer::Modulo(word divisor) const
03826 {
03827 if (!divisor)
03828 throw Integer::DivideByZero();
03829
03830 assert(divisor);
03831
03832 word remainder;
03833
03834 if ((divisor & (divisor-1)) == 0)
03835 remainder = reg[0] & (divisor-1);
03836 else
03837 {
03838 unsigned int i = WordCount();
03839
03840 if (divisor <= 5)
03841 {
03842 DWord sum(0, 0);
03843 while (i--)
03844 sum += reg[i];
03845 remainder = sum % divisor;
03846 }
03847 else
03848 {
03849 remainder = 0;
03850 while (i--)
03851 remainder = DWord(reg[i], remainder) % divisor;
03852 }
03853 }
03854
03855 if (IsNegative() && remainder)
03856 remainder = divisor - remainder;
03857
03858 return remainder;
03859 }
03860
03861 void Integer::Negate()
03862 {
03863 if (!!(*this))
03864 sign = Sign(1-sign);
03865 }
03866
03867 int Integer::PositiveCompare(const Integer& t) const
03868 {
03869 unsigned size = WordCount(), tSize = t.WordCount();
03870
03871 if (size == tSize)
03872 return CryptoPP::Compare(reg, t.reg, size);
03873 else
03874 return size > tSize ? 1 : -1;
03875 }
03876
03877 int Integer::Compare(const Integer& t) const
03878 {
03879 if (NotNegative())
03880 {
03881 if (t.NotNegative())
03882 return PositiveCompare(t);
03883 else
03884 return 1;
03885 }
03886 else
03887 {
03888 if (t.NotNegative())
03889 return -1;
03890 else
03891 return -PositiveCompare(t);
03892 }
03893 }
03894
03895 Integer Integer::SquareRoot() const
03896 {
03897 if (!IsPositive())
03898 return Zero();
03899
03900
03901 Integer x, y = Power2((BitCount()+1)/2);
03902 assert(y*y >= *this);
03903
03904 do
03905 {
03906 x = y;
03907 y = (x + *this/x) >> 1;
03908 } while (y<x);
03909
03910 return x;
03911 }
03912
03913 bool Integer::IsSquare() const
03914 {
03915 Integer r = SquareRoot();
03916 return *this == r.Squared();
03917 }
03918
03919 bool Integer::IsUnit() const
03920 {
03921 return (WordCount() == 1) && (reg[0] == 1);
03922 }
03923
03924 Integer Integer::MultiplicativeInverse() const
03925 {
03926 return IsUnit() ? *this : Zero();
03927 }
03928
03929 Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
03930 {
03931 return x*y%m;
03932 }
03933
03934 Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
03935 {
03936 ModularArithmetic mr(m);
03937 return mr.Exponentiate(x, e);
03938 }
03939
03940 Integer Integer::Gcd(const Integer &a, const Integer &b)
03941 {
03942 return EuclideanDomainOf<Integer>().Gcd(a, b);
03943 }
03944
03945 Integer Integer::InverseMod(const Integer &m) const
03946 {
03947 assert(m.NotNegative());
03948
03949 if (IsNegative() || *this>=m)
03950 return (*this%m).InverseMod(m);
03951
03952 if (m.IsEven())
03953 {
03954 if (!m || IsEven())
03955 return Zero();
03956 if (*this == One())
03957 return One();
03958
03959 Integer u = m.InverseMod(*this);
03960 return !u ? Zero() : (m*(*this-u)+1)/(*this);
03961 }
03962
03963 SecBlock<word> T(m.reg.size() * 4);
03964 Integer r((word)0, m.reg.size());
03965 unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
03966 DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
03967 return r;
03968 }
03969
03970 word Integer::InverseMod(const word mod) const
03971 {
03972 word g0 = mod, g1 = *this % mod;
03973 word v0 = 0, v1 = 1;
03974 word y;
03975
03976 while (g1)
03977 {
03978 if (g1 == 1)
03979 return v1;
03980 y = g0 / g1;
03981 g0 = g0 % g1;
03982 v0 += y * v1;
03983
03984 if (!g0)
03985 break;
03986 if (g0 == 1)
03987 return mod-v0;
03988 y = g1 / g0;
03989 g1 = g1 % g0;
03990 v1 += y * v0;
03991 }
03992 return 0;
03993 }
03994
03995
03996
03997 ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
03998 {
03999 BERSequenceDecoder seq(bt);
04000 OID oid(seq);
04001 if (oid != ASN1::prime_field())
04002 BERDecodeError();
04003 m_modulus.BERDecode(seq);
04004 seq.MessageEnd();
04005 m_result.reg.resize(m_modulus.reg.size());
04006 }
04007
04008 void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
04009 {
04010 DERSequenceEncoder seq(bt);
04011 ASN1::prime_field().DEREncode(seq);
04012 m_modulus.DEREncode(seq);
04013 seq.MessageEnd();
04014 }
04015
04016 void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
04017 {
04018 a.DEREncodeAsOctetString(out, MaxElementByteLength());
04019 }
04020
04021 void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
04022 {
04023 a.BERDecodeAsOctetString(in, MaxElementByteLength());
04024 }
04025
04026 const Integer& ModularArithmetic::Half(const Integer &a) const
04027 {
04028 if (a.reg.size()==m_modulus.reg.size())
04029 {
04030 CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
04031 return m_result;
04032 }
04033 else
04034 return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
04035 }
04036
04037 const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
04038 {
04039 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04040 {
04041 if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
04042 || Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
04043 {
04044 CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04045 }
04046 return m_result;
04047 }
04048 else
04049 {
04050 m_result1 = a+b;
04051 if (m_result1 >= m_modulus)
04052 m_result1 -= m_modulus;
04053 return m_result1;
04054 }
04055 }
04056
04057 Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
04058 {
04059 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04060 {
04061 if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
04062 || Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
04063 {
04064 CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
04065 }
04066 }
04067 else
04068 {
04069 a+=b;
04070 if (a>=m_modulus)
04071 a-=m_modulus;
04072 }
04073
04074 return a;
04075 }
04076
04077 const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
04078 {
04079 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04080 {
04081 if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
04082 CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
04083 return m_result;
04084 }
04085 else
04086 {
04087 m_result1 = a-b;
04088 if (m_result1.IsNegative())
04089 m_result1 += m_modulus;
04090 return m_result1;
04091 }
04092 }
04093
04094 Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
04095 {
04096 if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
04097 {
04098 if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
04099 CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
04100 }
04101 else
04102 {
04103 a-=b;
04104 if (a.IsNegative())
04105 a+=m_modulus;
04106 }
04107
04108 return a;
04109 }
04110
04111 const Integer& ModularArithmetic::Inverse(const Integer &a) const
04112 {
04113 if (!a)
04114 return a;
04115
04116 CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
04117 if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
04118 Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
04119
04120 return m_result;
04121 }
04122
04123 Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
04124 {
04125 if (m_modulus.IsOdd())
04126 {
04127 MontgomeryRepresentation dr(m_modulus);
04128 return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
04129 }
04130 else
04131 return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
04132 }
04133
04134 void ModularArithmetic::SimultaneousExponentiate(Integer *results, const Integer &base, const Integer *exponents, unsigned int exponentsCount) const
04135 {
04136 if (m_modulus.IsOdd())
04137 {
04138 MontgomeryRepresentation dr(m_modulus);
04139 dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
04140 for (unsigned int i=0; i<exponentsCount; i++)
04141 results[i] = dr.ConvertOut(results[i]);
04142 }
04143 else
04144 AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
04145 }
04146
04147 MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m)
04148 : ModularArithmetic(m),
04149 m_u((word)0, m_modulus.reg.size()),
04150 m_workspace(5*m_modulus.reg.size())
04151 {
04152 if (!m_modulus.IsOdd())
04153 throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
04154
04155 RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
04156 }
04157
04158 const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
04159 {
04160 word *const T = m_workspace.begin();
04161 word *const R = m_result.reg.begin();
04162 const size_t N = m_modulus.reg.size();
04163 assert(a.reg.size()<=N && b.reg.size()<=N);
04164
04165 AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
04166 SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
04167 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04168 return m_result;
04169 }
04170
04171 const Integer& MontgomeryRepresentation::Square(const Integer &a) const
04172 {
04173 word *const T = m_workspace.begin();
04174 word *const R = m_result.reg.begin();
04175 const size_t N = m_modulus.reg.size();
04176 assert(a.reg.size()<=N);
04177
04178 CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
04179 SetWords(T+2*a.reg.size(), 0, 2*N-2*a.reg.size());
04180 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04181 return m_result;
04182 }
04183
04184 Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
04185 {
04186 word *const T = m_workspace.begin();
04187 word *const R = m_result.reg.begin();
04188 const size_t N = m_modulus.reg.size();
04189 assert(a.reg.size()<=N);
04190
04191 CopyWords(T, a.reg, a.reg.size());
04192 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04193 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04194 return m_result;
04195 }
04196
04197 const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
04198 {
04199
04200 word *const T = m_workspace.begin();
04201 word *const R = m_result.reg.begin();
04202 const size_t N = m_modulus.reg.size();
04203 assert(a.reg.size()<=N);
04204
04205 CopyWords(T, a.reg, a.reg.size());
04206 SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
04207 MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
04208 unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
04209
04210
04211
04212 if (k>N*WORD_BITS)
04213 DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
04214 else
04215 MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
04216
04217 return m_result;
04218 }
04219
04220 NAMESPACE_END
04221
04222 #endif