Context Navigation

source: trunk/src-cryptopp/integer.cpp

Last change on this file was e230cb0, checked in by David Stainton <dstainton415@…>, at 2016-10-12T13:27:29Z
Add cryptopp from tag CRYPTOPP_5_6_5
Property mode set to `100644`
File size: 121.1 KB

Line
1	// integer.cpp - written and placed in the public domain by Wei Dai
2	// contains public domain code contributed by Alister Lee and Leonard Janke
3
4	#include "pch.h"
5	#include "config.h"
6
7	#if CRYPTOPP_MSC_VERSION
8	# pragma warning(disable: 4100)
9	#endif
10
11	#if CRYPTOPP_GCC_DIAGNOSTIC_AVAILABLE
12	# pragma GCC diagnostic ignored "-Wunused"
13	# pragma GCC diagnostic ignored "-Wunused-but-set-variable"
14	#endif
15
16	#ifndef CRYPTOPP_IMPORTS
17
18	#include "integer.h"
19	#include "secblock.h"
20	#include "modarith.h"
21	#include "nbtheory.h"
22	#include "smartptr.h"
23	#include "algparam.h"
24	#include "filters.h"
25	#include "asn.h"
26	#include "oids.h"
27	#include "words.h"
28	#include "pubkey.h" // for P1363_KDF2
29	#include "sha.h"
30	#include "cpu.h"
31	#include "misc.h"
32
33	#include <iostream>
34
35	#if (_MSC_VER >= 1400) && !defined(_M_ARM)
36	#include <intrin.h>
37	#endif
38
39	#ifdef __DECCXX
40	#include <c_asm.h>
41	#endif
42
43	#ifdef CRYPTOPP_MSVC6_NO_PP
44	#pragma message("You do not seem to have the Visual C++ Processor Pack installed, so use of SSE2 instructions will be disabled.")
45	#endif
46
47	// "Error: The operand ___LKDB cannot be assigned to", http://github.com/weidai11/cryptopp/issues/188
48	#if (__SUNPRO_CC >= 0x5130)
49	# define MAYBE_CONST
50	# define MAYBE_UNCONST_CAST const_cast<word*>
51	#else
52	# define MAYBE_CONST const
53	# define MAYBE_UNCONST_CAST
54	#endif
55
56	// "Inline assembly operands don't work with .intel_syntax",
57	// http://llvm.org/bugs/show_bug.cgi?id=24232
58	#if CRYPTOPP_BOOL_X32 \|\| defined(CRYPTOPP_DISABLE_INTEL_ASM)
59	# undef CRYPTOPP_X86_ASM_AVAILABLE
60	# undef CRYPTOPP_X32_ASM_AVAILABLE
61	# undef CRYPTOPP_X64_ASM_AVAILABLE
62	# undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
63	# undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
64	# define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
65	# define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
66	#else
67	# define CRYPTOPP_INTEGER_SSE2 (CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE && CRYPTOPP_BOOL_X86)
68	#endif
69
70	NAMESPACE_BEGIN(CryptoPP)
71
72	bool AssignIntToInteger(const std::type_info &valueType, void pInteger, const void pInt)
73	{
74	if (valueType != typeid(Integer))
75	return false;
76	reinterpret_cast<Integer >(pInteger) = reinterpret_cast<const int >(pInt);
77	return true;
78	}
79
80	inline static int Compare(const word A, const word B, size_t N)
81	{
82	while (N--)
83	if (A[N] > B[N])
84	return 1;
85	else if (A[N] < B[N])
86	return -1;
87
88	return 0;
89	}
90
91	inline static int Increment(word *A, size_t N, word B=1)
92	{
93	CRYPTOPP_ASSERT(N);
94	word t = A[0];
95	A[0] = t+B;
96	if (A[0] >= t)
97	return 0;
98	for (unsigned i=1; i<N; i++)
99	if (++A[i])
100	return 0;
101	return 1;
102	}
103
104	inline static int Decrement(word *A, size_t N, word B=1)
105	{
106	CRYPTOPP_ASSERT(N);
107	word t = A[0];
108	A[0] = t-B;
109	if (A[0] <= t)
110	return 0;
111	for (unsigned i=1; i<N; i++)
112	if (A[i]--)
113	return 0;
114	return 1;
115	}
116
117	static void TwosComplement(word *A, size_t N)
118	{
119	Decrement(A, N);
120	for (unsigned i=0; i<N; i++)
121	A[i] = ~A[i];
122	}
123
124	static word AtomicInverseModPower2(word A)
125	{
126	CRYPTOPP_ASSERT(A%2==1);
127
128	word R=A%8;
129
130	for (unsigned i=3; i<WORD_BITS; i*=2)
131	R = R(2-RA);
132
133	CRYPTOPP_ASSERT(R*A==1);
134	return R;
135	}
136
137	// ********************************************************
138
139	#if !defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE) \|\| (defined(__x86_64__) && defined(CRYPTOPP_WORD128_AVAILABLE))
140	#define Declare2Words(x) word x##0, x##1;
141	#define AssignWord(a, b) a##0 = b; a##1 = 0;
142	#define Add2WordsBy1(a, b, c) a##0 = b##0 + c; a##1 = b##1 + (a##0 < c);
143	#define LowWord(a) a##0
144	#define HighWord(a) a##1
145	#ifdef _MSC_VER
146	#define MultiplyWordsLoHi(p0, p1, a, b) p0 = _umul128(a, b, &p1);
147	#ifndef __INTEL_COMPILER
148	#define Double3Words(c, d) d##1 = __shiftleft128(d##0, d##1, 1); d##0 = __shiftleft128(c, d##0, 1); c *= 2;
149	#endif
150	#elif defined(__DECCXX)
151	#define MultiplyWordsLoHi(p0, p1, a, b) p0 = a*b; p1 = asm("umulh %a0, %a1, %v0", a, b);
152	#elif defined(__x86_64__)
153	#if defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5100
154	// Sun Studio's gcc-style inline assembly is heavily bugged as of version 5.9 Patch 124864-09 2008/12/16, but this one works
155	#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "r"(b) : "cc");
156	#else
157	#define MultiplyWordsLoHi(p0, p1, a, b) asm ("mulq %3" : "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
158	#define MulAcc(c, d, a, b) asm ("mulq %6; addq %3, %0; adcq %4, %1; adcq $0, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1), "=a"(p0), "=d"(p1) : "a"(a), "g"(b) : "cc");
159	#define Double3Words(c, d) asm ("addq %0, %0; adcq %1, %1; adcq %2, %2;" : "+r"(c), "+r"(d##0), "+r"(d##1) : : "cc");
160	#define Acc2WordsBy1(a, b) asm ("addq %2, %0; adcq $0, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b) : "cc");
161	#define Acc2WordsBy2(a, b) asm ("addq %2, %0; adcq %3, %1;" : "+r"(a##0), "+r"(a##1) : "r"(b##0), "r"(b##1) : "cc");
162	#define Acc3WordsBy2(c, d, e) asm ("addq %5, %0; adcq %6, %1; adcq $0, %2;" : "+r"(c), "=r"(e##0), "=r"(e##1) : "1"(d##0), "2"(d##1), "r"(e##0), "r"(e##1) : "cc");
163	#endif
164	#endif
165	#define MultiplyWords(p, a, b) MultiplyWordsLoHi(p##0, p##1, a, b)
166	#ifndef Double3Words
167	#define Double3Words(c, d) d##1 = 2d##1 + (d##0>>(WORD_BITS-1)); d##0 = 2d##0 + (c>>(WORD_BITS-1)); c *= 2;
168	#endif
169	#ifndef Acc2WordsBy2
170	#define Acc2WordsBy2(a, b) a##0 += b##0; a##1 += a##0 < b##0; a##1 += b##1;
171	#endif
172	#define AddWithCarry(u, a, b) {word t = a+b; u##0 = t + u##1; u##1 = (t<a) + (u##0<t);}
173	#define SubtractWithBorrow(u, a, b) {word t = a-b; u##0 = t - u##1; u##1 = (t>a) + (u##0>t);}
174	#define GetCarry(u) u##1
175	#define GetBorrow(u) u##1
176	#else
177	#define Declare2Words(x) dword x;
178	#if _MSC_VER >= 1400 && !defined(__INTEL_COMPILER) && !defined(_M_ARM)
179	#define MultiplyWords(p, a, b) p = __emulu(a, b);
180	#else
181	#define MultiplyWords(p, a, b) p = (dword)a*b;
182	#endif
183	#define AssignWord(a, b) a = b;
184	#define Add2WordsBy1(a, b, c) a = b + c;
185	#define Acc2WordsBy2(a, b) a += b;
186	#define LowWord(a) word(a)
187	#define HighWord(a) word(a>>WORD_BITS)
188	#define Double3Words(c, d) d = 2d + (c>>(WORD_BITS-1)); c = 2;
189	#define AddWithCarry(u, a, b) u = dword(a) + b + GetCarry(u);
190	#define SubtractWithBorrow(u, a, b) u = dword(a) - b - GetBorrow(u);
191	#define GetCarry(u) HighWord(u)
192	#define GetBorrow(u) word(u>>(WORD_BITS*2-1))
193	#endif
194	#ifndef MulAcc
195	#define MulAcc(c, d, a, b) MultiplyWords(p, a, b); Acc2WordsBy1(p, c); c = LowWord(p); Acc2WordsBy1(d, HighWord(p));
196	#endif
197	#ifndef Acc2WordsBy1
198	#define Acc2WordsBy1(a, b) Add2WordsBy1(a, a, b)
199	#endif
200	#ifndef Acc3WordsBy2
201	#define Acc3WordsBy2(c, d, e) Acc2WordsBy1(e, c); c = LowWord(e); Add2WordsBy1(e, d, HighWord(e));
202	#endif
203
204	class DWord
205	{
206	public:
207	#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
208	DWord() : m_whole() { }
209	#else
210	DWord() : m_halfs() { }
211	#endif
212
213	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
214	explicit DWord(word low) : m_whole(low) { }
215	#else
216	explicit DWord(word low) : m_halfs()
217	{
218	m_halfs.low = low;
219	}
220	#endif
221
222	#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
223	DWord(word low, word high) : m_whole()
224	#else
225	DWord(word low, word high) : m_halfs()
226	#endif
227	{
228	#if defined(CRYPTOPP_NATIVE_DWORD_AVAILABLE)
229	# if defined(IS_LITTLE_ENDIAN)
230	const word t[2] = {low,high};
231	memcpy(&m_whole, &t, sizeof(m_whole));
232	# else
233	const word t[2] = {high,low};
234	memcpy(&m_whole, &t, sizeof(m_whole));
235	# endif
236	#else
237	m_halfs.low = low;
238	m_halfs.high = high;
239	#endif
240	}
241
242	static DWord Multiply(word a, word b)
243	{
244	DWord r;
245	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
246	r.m_whole = (dword)a * b;
247	#elif defined(MultiplyWordsLoHi)
248	MultiplyWordsLoHi(r.m_halfs.low, r.m_halfs.high, a, b);
249	#else
250	CRYPTOPP_ASSERT(0);
251	#endif
252	return r;
253	}
254
255	static DWord MultiplyAndAdd(word a, word b, word c)
256	{
257	DWord r = Multiply(a, b);
258	return r += c;
259	}
260
261	DWord & operator+=(word a)
262	{
263	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
264	m_whole = m_whole + a;
265	#else
266	m_halfs.low += a;
267	m_halfs.high += (m_halfs.low < a);
268	#endif
269	return *this;
270	}
271
272	DWord operator+(word a)
273	{
274	DWord r;
275	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
276	r.m_whole = m_whole + a;
277	#else
278	r.m_halfs.low = m_halfs.low + a;
279	r.m_halfs.high = m_halfs.high + (r.m_halfs.low < a);
280	#endif
281	return r;
282	}
283
284	DWord operator-(DWord a)
285	{
286	DWord r;
287	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
288	r.m_whole = m_whole - a.m_whole;
289	#else
290	r.m_halfs.low = m_halfs.low - a.m_halfs.low;
291	r.m_halfs.high = m_halfs.high - a.m_halfs.high - (r.m_halfs.low > m_halfs.low);
292	#endif
293	return r;
294	}
295
296	DWord operator-(word a)
297	{
298	DWord r;
299	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
300	r.m_whole = m_whole - a;
301	#else
302	r.m_halfs.low = m_halfs.low - a;
303	r.m_halfs.high = m_halfs.high - (r.m_halfs.low > m_halfs.low);
304	#endif
305	return r;
306	}
307
308	// returns quotient, which must fit in a word
309	word operator/(word divisor);
310
311	word operator%(word a);
312
313	bool operator!() const
314	{
315	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
316	return !m_whole;
317	#else
318	return !m_halfs.high && !m_halfs.low;
319	#endif
320	}
321
322	// TODO: When NATIVE_DWORD is in effect, we access high and low, which are inactive
323	// union members, and that's UB. Also see http://stackoverflow.com/q/11373203.
324	word GetLowHalf() const {return m_halfs.low;}
325	word GetHighHalf() const {return m_halfs.high;}
326	word GetHighHalfAsBorrow() const {return 0-m_halfs.high;}
327
328	private:
329	// Issue 274, "Types cannot be declared in anonymous union",
330	// http://github.com/weidai11/cryptopp/issues/274
331	// Thanks to Martin Bonner at http://stackoverflow.com/a/39507183
332	struct half_words
333	{
334	#ifdef IS_LITTLE_ENDIAN
335	word low;
336	word high;
337	#else
338	word high;
339	word low;
340	#endif
341	};
342	union
343	{
344	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
345	dword m_whole;
346	#endif
347	half_words m_halfs;
348	};
349	};
350
351	class Word
352	{
353	public:
354	// Converity finding on default ctor. We've isntrumented the code,
355	// and cannot uncover a case where it affects a result.
356	#if defined(__COVERITY__)
357	Word() : m_whole(0) {}
358	#elif CRYPTOPP_DEBUG
359	// Repeating pattern of 1010 for debug builds to break things...
360	Word() : m_whole(0) {memset(&m_whole, 0xaa, sizeof(m_whole));}
361	#else
362	Word() {}
363	#endif
364
365	Word(word value) : m_whole(value) {}
366	Word(hword low, hword high) : m_whole(low \| (word(high) << (WORD_BITS/2))) {}
367
368	static Word Multiply(hword a, hword b)
369	{
370	Word r;
371	r.m_whole = (word)a * b;
372	return r;
373	}
374
375	Word operator-(Word a)
376	{
377	Word r;
378	r.m_whole = m_whole - a.m_whole;
379	return r;
380	}
381
382	Word operator-(hword a)
383	{
384	Word r;
385	r.m_whole = m_whole - a;
386	return r;
387	}
388
389	// returns quotient, which must fit in a word
390	hword operator/(hword divisor)
391	{
392	return hword(m_whole / divisor);
393	}
394
395	bool operator!() const
396	{
397	return !m_whole;
398	}
399
400	word GetWhole() const {return m_whole;}
401	hword GetLowHalf() const {return hword(m_whole);}
402	hword GetHighHalf() const {return hword(m_whole>>(WORD_BITS/2));}
403	hword GetHighHalfAsBorrow() const {return 0-hword(m_whole>>(WORD_BITS/2));}
404
405	private:
406	word m_whole;
407	};
408
409	// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
410	template <class S, class D>
411	S DivideThreeWordsByTwo(S A, S B0, S B1, D dummy=NULL)
412	{
413	CRYPTOPP_UNUSED(dummy);
414
415	// CRYPTOPP_ASSERT {A[2],A[1]} < {B1,B0}, so quotient can fit in a S
416	CRYPTOPP_ASSERT(A[2] < B1 \|\| (A[2]==B1 && A[1] < B0));
417
418	// estimate the quotient: do a 2 S by 1 S divide.
419	// Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
420	// The code change occurred at Commit dc99266599a0e72d.
421
422	S Q; bool pre = (S(B1+1) == 0);
423	if (B1 > 0 && !pre)
424	Q = D(A[1], A[2]) / S(B1+1);
425	else if (pre)
426	Q = A[2];
427	else
428	Q = D(A[0], A[1]) / B0;
429
430	// now subtract Q*B from A
431	D p = D::Multiply(B0, Q);
432	D u = (D) A[0] - p.GetLowHalf();
433	A[0] = u.GetLowHalf();
434	u = (D) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - D::Multiply(B1, Q);
435	A[1] = u.GetLowHalf();
436	A[2] += u.GetHighHalf();
437
438	// Q <= actual quotient, so fix it
439	while (A[2] \|\| A[1] > B1 \|\| (A[1]==B1 && A[0]>=B0))
440	{
441	u = (D) A[0] - B0;
442	A[0] = u.GetLowHalf();
443	u = (D) A[1] - B1 - u.GetHighHalfAsBorrow();
444	A[1] = u.GetLowHalf();
445	A[2] += u.GetHighHalf();
446	Q++;
447	CRYPTOPP_ASSERT(Q); // shouldn't overflow
448	}
449
450	return Q;
451	}
452
453	// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
454	template <class S, class D>
455	inline D DivideFourWordsByTwo(S *T, const D &Al, const D &Ah, const D &B)
456	{
457	// Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
458	// The code change occurred at Commit dc99266599a0e72d.
459
460	if (!!B)
461	{
462	S Q[2];
463	T[0] = Al.GetLowHalf();
464	T[1] = Al.GetHighHalf();
465	T[2] = Ah.GetLowHalf();
466	T[3] = Ah.GetHighHalf();
467	Q[1] = DivideThreeWordsByTwo<S, D>(T+1, B.GetLowHalf(), B.GetHighHalf());
468	Q[0] = DivideThreeWordsByTwo<S, D>(T, B.GetLowHalf(), B.GetHighHalf());
469	return D(Q[0], Q[1]);
470	}
471	else // if divisor is 0, we assume divisor==2*(2WORD_BITS)
472	{
473	return D(Ah.GetLowHalf(), Ah.GetHighHalf());
474	}
475	}
476
477	// returns quotient, which must fit in a word
478	inline word DWord::operator/(word a)
479	{
480	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
481	return word(m_whole / a);
482	#else
483	hword r[4];
484	return DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a).GetWhole();
485	#endif
486	}
487
488	inline word DWord::operator%(word a)
489	{
490	#ifdef CRYPTOPP_NATIVE_DWORD_AVAILABLE
491	return word(m_whole % a);
492	#else
493	if (a < (word(1) << (WORD_BITS/2)))
494	{
495	hword h = hword(a);
496	word r = m_halfs.high % h;
497	r = ((m_halfs.low >> (WORD_BITS/2)) + (r << (WORD_BITS/2))) % h;
498	return hword((hword(m_halfs.low) + (r << (WORD_BITS/2))) % h);
499	}
500	else
501	{
502	hword r[4];
503	DivideFourWordsByTwo<hword, Word>(r, m_halfs.low, m_halfs.high, a);
504	return Word(r[0], r[1]).GetWhole();
505	}
506	#endif
507	}
508
509	// ********************************************************
510
511	// Use some tricks to share assembly code between MSVC, GCC, Clang and Sun CC.
512	#if defined(__GNUC__)
513	#define AddPrologue \
514	int result; \
515	__asm__ __volatile__ \
516	( \
517	INTEL_NOPREFIX
518	#define AddEpilogue \
519	ATT_PREFIX \
520	: "=a" (result)\
521	: "d" (C), "a" (A), "D" (B), "c" (N) \
522	: "%esi", "memory", "cc" \
523	);\
524	return result;
525	#define MulPrologue \
526	__asm__ __volatile__ \
527	( \
528	INTEL_NOPREFIX \
529	AS1( push ebx) \
530	AS2( mov ebx, edx)
531	#define MulEpilogue \
532	AS1( pop ebx) \
533	ATT_PREFIX \
534	: \
535	: "d" (s_maskLow16), "c" (C), "a" (A), "D" (B) \
536	: "%esi", "memory", "cc" \
537	);
538	#define SquPrologue MulPrologue
539	#define SquEpilogue \
540	AS1( pop ebx) \
541	ATT_PREFIX \
542	: \
543	: "d" (s_maskLow16), "c" (C), "a" (A) \
544	: "%esi", "%edi", "memory", "cc" \
545	);
546	#define TopPrologue MulPrologue
547	#define TopEpilogue \
548	AS1( pop ebx) \
549	ATT_PREFIX \
550	: \
551	: "d" (s_maskLow16), "c" (C), "a" (A), "D" (B), "S" (L) \
552	: "memory", "cc" \
553	);
554	#else
555	#define AddPrologue \
556	__asm push edi \
557	__asm push esi \
558	__asm mov eax, [esp+12] \
559	__asm mov edi, [esp+16]
560	#define AddEpilogue \
561	__asm pop esi \
562	__asm pop edi \
563	__asm ret 8
564	#if _MSC_VER < 1300
565	#define SaveEBX __asm push ebx
566	#define RestoreEBX __asm pop ebx
567	#else
568	#define SaveEBX
569	#define RestoreEBX
570	#endif
571	#define SquPrologue \
572	AS2( mov eax, A) \
573	AS2( mov ecx, C) \
574	SaveEBX \
575	AS2( lea ebx, s_maskLow16)
576	#define MulPrologue \
577	AS2( mov eax, A) \
578	AS2( mov edi, B) \
579	AS2( mov ecx, C) \
580	SaveEBX \
581	AS2( lea ebx, s_maskLow16)
582	#define TopPrologue \
583	AS2( mov eax, A) \
584	AS2( mov edi, B) \
585	AS2( mov ecx, C) \
586	AS2( mov esi, L) \
587	SaveEBX \
588	AS2( lea ebx, s_maskLow16)
589	#define SquEpilogue RestoreEBX
590	#define MulEpilogue RestoreEBX
591	#define TopEpilogue RestoreEBX
592	#endif
593
594	#ifdef CRYPTOPP_X64_MASM_AVAILABLE
595	extern "C" {
596	int Baseline_Add(size_t N, word C, const word A, const word *B);
597	int Baseline_Sub(size_t N, word C, const word A, const word *B);
598	}
599	#elif defined(CRYPTOPP_X64_ASM_AVAILABLE) && defined(__GNUC__) && defined(CRYPTOPP_WORD128_AVAILABLE)
600	int Baseline_Add(size_t N, word C, const word A, const word *B)
601	{
602	word result;
603	__asm__ __volatile__
604	(
605	INTEL_NOPREFIX
606	AS1( neg %1)
607	ASJ( jz, 1, f)
608	AS2( mov %0,[%3+8*%1])
609	AS2( add %0,[%4+8*%1])
610	AS2( mov [%2+8*%1],%0)
611	ASL(0)
612	AS2( mov %0,[%3+8*%1+8])
613	AS2( adc %0,[%4+8*%1+8])
614	AS2( mov [%2+8*%1+8],%0)
615	AS2( lea %1,[%1+2])
616	ASJ( jrcxz, 1, f)
617	AS2( mov %0,[%3+8*%1])
618	AS2( adc %0,[%4+8*%1])
619	AS2( mov [%2+8*%1],%0)
620	ASJ( jmp, 0, b)
621	ASL(1)
622	AS2( mov %0, 0)
623	AS2( adc %0, %0)
624	ATT_NOPREFIX
625	: "=&r" (result), "+c" (N)
626	: "r" (C+N), "r" (A+N), "r" (B+N)
627	: "memory", "cc"
628	);
629	return (int)result;
630	}
631
632	int Baseline_Sub(size_t N, word C, const word A, const word *B)
633	{
634	word result;
635	__asm__ __volatile__
636	(
637	INTEL_NOPREFIX
638	AS1( neg %1)
639	ASJ( jz, 1, f)
640	AS2( mov %0,[%3+8*%1])
641	AS2( sub %0,[%4+8*%1])
642	AS2( mov [%2+8*%1],%0)
643	ASL(0)
644	AS2( mov %0,[%3+8*%1+8])
645	AS2( sbb %0,[%4+8*%1+8])
646	AS2( mov [%2+8*%1+8],%0)
647	AS2( lea %1,[%1+2])
648	ASJ( jrcxz, 1, f)
649	AS2( mov %0,[%3+8*%1])
650	AS2( sbb %0,[%4+8*%1])
651	AS2( mov [%2+8*%1],%0)
652	ASJ( jmp, 0, b)
653	ASL(1)
654	AS2( mov %0, 0)
655	AS2( adc %0, %0)
656	ATT_NOPREFIX
657	: "=&r" (result), "+c" (N)
658	: "r" (C+N), "r" (A+N), "r" (B+N)
659	: "memory", "cc"
660	);
661	return (int)result;
662	}
663	#elif defined(CRYPTOPP_X86_ASM_AVAILABLE) && CRYPTOPP_BOOL_X86
664	CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word C, const word A, const word *B)
665	{
666	AddPrologue
667
668	// now: eax = A, edi = B, edx = C, ecx = N
669	AS2( lea eax, [eax+4*ecx])
670	AS2( lea edi, [edi+4*ecx])
671	AS2( lea edx, [edx+4*ecx])
672
673	AS1( neg ecx) // ecx is negative index
674	AS2( test ecx, 2) // this clears carry flag
675	ASJ( jz, 0, f)
676	AS2( sub ecx, 2)
677	ASJ( jmp, 1, f)
678
679	ASL(0)
680	ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
681	AS2( mov esi,[eax+4*ecx])
682	AS2( adc esi,[edi+4*ecx])
683	AS2( mov [edx+4*ecx],esi)
684	AS2( mov esi,[eax+4*ecx+4])
685	AS2( adc esi,[edi+4*ecx+4])
686	AS2( mov [edx+4*ecx+4],esi)
687	ASL(1)
688	AS2( mov esi,[eax+4*ecx+8])
689	AS2( adc esi,[edi+4*ecx+8])
690	AS2( mov [edx+4*ecx+8],esi)
691	AS2( mov esi,[eax+4*ecx+12])
692	AS2( adc esi,[edi+4*ecx+12])
693	AS2( mov [edx+4*ecx+12],esi)
694
695	AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
696	ASJ( jmp, 0, b)
697
698	ASL(2)
699	AS2( mov eax, 0)
700	AS1( setc al) // store carry into eax (return result register)
701
702	AddEpilogue
703	}
704
705	CRYPTOPP_NAKED int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word C, const word A, const word *B)
706	{
707	AddPrologue
708
709	// now: eax = A, edi = B, edx = C, ecx = N
710	AS2( lea eax, [eax+4*ecx])
711	AS2( lea edi, [edi+4*ecx])
712	AS2( lea edx, [edx+4*ecx])
713
714	AS1( neg ecx) // ecx is negative index
715	AS2( test ecx, 2) // this clears carry flag
716	ASJ( jz, 0, f)
717	AS2( sub ecx, 2)
718	ASJ( jmp, 1, f)
719
720	ASL(0)
721	ASJ( jecxz, 2, f) // loop until ecx overflows and becomes zero
722	AS2( mov esi,[eax+4*ecx])
723	AS2( sbb esi,[edi+4*ecx])
724	AS2( mov [edx+4*ecx],esi)
725	AS2( mov esi,[eax+4*ecx+4])
726	AS2( sbb esi,[edi+4*ecx+4])
727	AS2( mov [edx+4*ecx+4],esi)
728	ASL(1)
729	AS2( mov esi,[eax+4*ecx+8])
730	AS2( sbb esi,[edi+4*ecx+8])
731	AS2( mov [edx+4*ecx+8],esi)
732	AS2( mov esi,[eax+4*ecx+12])
733	AS2( sbb esi,[edi+4*ecx+12])
734	AS2( mov [edx+4*ecx+12],esi)
735
736	AS2( lea ecx,[ecx+4]) // advance index, avoid inc which causes slowdown on Intel Core 2
737	ASJ( jmp, 0, b)
738
739	ASL(2)
740	AS2( mov eax, 0)
741	AS1( setc al) // store carry into eax (return result register)
742
743	AddEpilogue
744	}
745
746	#if CRYPTOPP_INTEGER_SSE2
747	CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Add(size_t N, word C, const word A, const word *B)
748	{
749	AddPrologue
750
751	// now: eax = A, edi = B, edx = C, ecx = N
752	AS2( lea eax, [eax+4*ecx])
753	AS2( lea edi, [edi+4*ecx])
754	AS2( lea edx, [edx+4*ecx])
755
756	AS1( neg ecx) // ecx is negative index
757	AS2( pxor mm2, mm2)
758	ASJ( jz, 2, f)
759	AS2( test ecx, 2) // this clears carry flag
760	ASJ( jz, 0, f)
761	AS2( sub ecx, 2)
762	ASJ( jmp, 1, f)
763
764	ASL(0)
765	AS2( movd mm0, DWORD PTR [eax+4*ecx])
766	AS2( movd mm1, DWORD PTR [edi+4*ecx])
767	AS2( paddq mm0, mm1)
768	AS2( paddq mm2, mm0)
769	AS2( movd DWORD PTR [edx+4*ecx], mm2)
770	AS2( psrlq mm2, 32)
771
772	AS2( movd mm0, DWORD PTR [eax+4*ecx+4])
773	AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
774	AS2( paddq mm0, mm1)
775	AS2( paddq mm2, mm0)
776	AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
777	AS2( psrlq mm2, 32)
778
779	ASL(1)
780	AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
781	AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
782	AS2( paddq mm0, mm1)
783	AS2( paddq mm2, mm0)
784	AS2( movd DWORD PTR [edx+4*ecx+8], mm2)
785	AS2( psrlq mm2, 32)
786
787	AS2( movd mm0, DWORD PTR [eax+4*ecx+12])
788	AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
789	AS2( paddq mm0, mm1)
790	AS2( paddq mm2, mm0)
791	AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
792	AS2( psrlq mm2, 32)
793
794	AS2( add ecx, 4)
795	ASJ( jnz, 0, b)
796
797	ASL(2)
798	AS2( movd eax, mm2)
799	AS1( emms)
800
801	AddEpilogue
802	}
803	CRYPTOPP_NAKED int CRYPTOPP_FASTCALL SSE2_Sub(size_t N, word C, const word A, const word *B)
804	{
805	AddPrologue
806
807	// now: eax = A, edi = B, edx = C, ecx = N
808	AS2( lea eax, [eax+4*ecx])
809	AS2( lea edi, [edi+4*ecx])
810	AS2( lea edx, [edx+4*ecx])
811
812	AS1( neg ecx) // ecx is negative index
813	AS2( pxor mm2, mm2)
814	ASJ( jz, 2, f)
815	AS2( test ecx, 2) // this clears carry flag
816	ASJ( jz, 0, f)
817	AS2( sub ecx, 2)
818	ASJ( jmp, 1, f)
819
820	ASL(0)
821	AS2( movd mm0, DWORD PTR [eax+4*ecx])
822	AS2( movd mm1, DWORD PTR [edi+4*ecx])
823	AS2( psubq mm0, mm1)
824	AS2( psubq mm0, mm2)
825	AS2( movd DWORD PTR [edx+4*ecx], mm0)
826	AS2( psrlq mm0, 63)
827
828	AS2( movd mm2, DWORD PTR [eax+4*ecx+4])
829	AS2( movd mm1, DWORD PTR [edi+4*ecx+4])
830	AS2( psubq mm2, mm1)
831	AS2( psubq mm2, mm0)
832	AS2( movd DWORD PTR [edx+4*ecx+4], mm2)
833	AS2( psrlq mm2, 63)
834
835	ASL(1)
836	AS2( movd mm0, DWORD PTR [eax+4*ecx+8])
837	AS2( movd mm1, DWORD PTR [edi+4*ecx+8])
838	AS2( psubq mm0, mm1)
839	AS2( psubq mm0, mm2)
840	AS2( movd DWORD PTR [edx+4*ecx+8], mm0)
841	AS2( psrlq mm0, 63)
842
843	AS2( movd mm2, DWORD PTR [eax+4*ecx+12])
844	AS2( movd mm1, DWORD PTR [edi+4*ecx+12])
845	AS2( psubq mm2, mm1)
846	AS2( psubq mm2, mm0)
847	AS2( movd DWORD PTR [edx+4*ecx+12], mm2)
848	AS2( psrlq mm2, 63)
849
850	AS2( add ecx, 4)
851	ASJ( jnz, 0, b)
852
853	ASL(2)
854	AS2( movd eax, mm2)
855	AS1( emms)
856
857	AddEpilogue
858	}
859	#endif // #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
860	#else
861	int CRYPTOPP_FASTCALL Baseline_Add(size_t N, word C, const word A, const word *B)
862	{
863	CRYPTOPP_ASSERT (N%2 == 0);
864
865	Declare2Words(u);
866	AssignWord(u, 0);
867	for (size_t i=0; i<N; i+=2)
868	{
869	AddWithCarry(u, A[i], B[i]);
870	C[i] = LowWord(u);
871	AddWithCarry(u, A[i+1], B[i+1]);
872	C[i+1] = LowWord(u);
873	}
874	return int(GetCarry(u));
875	}
876
877	int CRYPTOPP_FASTCALL Baseline_Sub(size_t N, word C, const word A, const word *B)
878	{
879	CRYPTOPP_ASSERT (N%2 == 0);
880
881	Declare2Words(u);
882	AssignWord(u, 0);
883	for (size_t i=0; i<N; i+=2)
884	{
885	SubtractWithBorrow(u, A[i], B[i]);
886	C[i] = LowWord(u);
887	SubtractWithBorrow(u, A[i+1], B[i+1]);
888	C[i+1] = LowWord(u);
889	}
890	return int(GetBorrow(u));
891	}
892	#endif
893
894	static word LinearMultiply(word C, const word AA, word B, size_t N)
895	{
896	// http://github.com/weidai11/cryptopp/issues/188
897	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
898
899	word carry=0;
900	for(unsigned i=0; i<N; i++)
901	{
902	Declare2Words(p);
903	MultiplyWords(p, A[i], B);
904	Acc2WordsBy1(p, carry);
905	C[i] = LowWord(p);
906	carry = HighWord(p);
907	}
908	return carry;
909	}
910
911	#ifndef CRYPTOPP_DOXYGEN_PROCESSING
912
913	#define Mul_2 \
914	Mul_Begin(2) \
915	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
916	Mul_End(1, 1)
917
918	#define Mul_4 \
919	Mul_Begin(4) \
920	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
921	Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
922	Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
923	Mul_SaveAcc(3, 1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
924	Mul_SaveAcc(4, 2, 3) Mul_Acc(3, 2) \
925	Mul_End(5, 3)
926
927	#define Mul_8 \
928	Mul_Begin(8) \
929	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
930	Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
931	Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
932	Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
933	Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
934	Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
935	Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
936	Mul_SaveAcc(7, 1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
937	Mul_SaveAcc(8, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
938	Mul_SaveAcc(9, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
939	Mul_SaveAcc(10, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
940	Mul_SaveAcc(11, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
941	Mul_SaveAcc(12, 6, 7) Mul_Acc(7, 6) \
942	Mul_End(13, 7)
943
944	#define Mul_16 \
945	Mul_Begin(16) \
946	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
947	Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
948	Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
949	Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
950	Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
951	Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
952	Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
953	Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
954	Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
955	Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
956	Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
957	Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
958	Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
959	Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
960	Mul_SaveAcc(14, 0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
961	Mul_SaveAcc(15, 1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
962	Mul_SaveAcc(16, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
963	Mul_SaveAcc(17, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
964	Mul_SaveAcc(18, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
965	Mul_SaveAcc(19, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
966	Mul_SaveAcc(20, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
967	Mul_SaveAcc(21, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
968	Mul_SaveAcc(22, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
969	Mul_SaveAcc(23, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
970	Mul_SaveAcc(24, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
971	Mul_SaveAcc(25, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
972	Mul_SaveAcc(26, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
973	Mul_SaveAcc(27, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
974	Mul_SaveAcc(28, 14, 15) Mul_Acc(15, 14) \
975	Mul_End(29, 15)
976
977	#define Squ_2 \
978	Squ_Begin(2) \
979	Squ_End(2)
980
981	#define Squ_4 \
982	Squ_Begin(4) \
983	Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
984	Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
985	Squ_SaveAcc(3, 1, 3) Squ_Diag(2) \
986	Squ_SaveAcc(4, 2, 3) Squ_NonDiag \
987	Squ_End(4)
988
989	#define Squ_8 \
990	Squ_Begin(8) \
991	Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
992	Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
993	Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
994	Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
995	Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
996	Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
997	Squ_SaveAcc(7, 1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
998	Squ_SaveAcc(8, 2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
999	Squ_SaveAcc(9, 3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1000	Squ_SaveAcc(10, 4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1001	Squ_SaveAcc(11, 5, 7) Squ_Diag(6) \
1002	Squ_SaveAcc(12, 6, 7) Squ_NonDiag \
1003	Squ_End(8)
1004
1005	#define Squ_16 \
1006	Squ_Begin(16) \
1007	Squ_SaveAcc(1, 0, 2) Squ_Diag(1) \
1008	Squ_SaveAcc(2, 0, 3) Squ_Acc(1, 2) Squ_NonDiag \
1009	Squ_SaveAcc(3, 0, 4) Squ_Acc(1, 3) Squ_Diag(2) \
1010	Squ_SaveAcc(4, 0, 5) Squ_Acc(1, 4) Squ_Acc(2, 3) Squ_NonDiag \
1011	Squ_SaveAcc(5, 0, 6) Squ_Acc(1, 5) Squ_Acc(2, 4) Squ_Diag(3) \
1012	Squ_SaveAcc(6, 0, 7) Squ_Acc(1, 6) Squ_Acc(2, 5) Squ_Acc(3, 4) Squ_NonDiag \
1013	Squ_SaveAcc(7, 0, 8) Squ_Acc(1, 7) Squ_Acc(2, 6) Squ_Acc(3, 5) Squ_Diag(4) \
1014	Squ_SaveAcc(8, 0, 9) Squ_Acc(1, 8) Squ_Acc(2, 7) Squ_Acc(3, 6) Squ_Acc(4, 5) Squ_NonDiag \
1015	Squ_SaveAcc(9, 0, 10) Squ_Acc(1, 9) Squ_Acc(2, 8) Squ_Acc(3, 7) Squ_Acc(4, 6) Squ_Diag(5) \
1016	Squ_SaveAcc(10, 0, 11) Squ_Acc(1, 10) Squ_Acc(2, 9) Squ_Acc(3, 8) Squ_Acc(4, 7) Squ_Acc(5, 6) Squ_NonDiag \
1017	Squ_SaveAcc(11, 0, 12) Squ_Acc(1, 11) Squ_Acc(2, 10) Squ_Acc(3, 9) Squ_Acc(4, 8) Squ_Acc(5, 7) Squ_Diag(6) \
1018	Squ_SaveAcc(12, 0, 13) Squ_Acc(1, 12) Squ_Acc(2, 11) Squ_Acc(3, 10) Squ_Acc(4, 9) Squ_Acc(5, 8) Squ_Acc(6, 7) Squ_NonDiag \
1019	Squ_SaveAcc(13, 0, 14) Squ_Acc(1, 13) Squ_Acc(2, 12) Squ_Acc(3, 11) Squ_Acc(4, 10) Squ_Acc(5, 9) Squ_Acc(6, 8) Squ_Diag(7) \
1020	Squ_SaveAcc(14, 0, 15) Squ_Acc(1, 14) Squ_Acc(2, 13) Squ_Acc(3, 12) Squ_Acc(4, 11) Squ_Acc(5, 10) Squ_Acc(6, 9) Squ_Acc(7, 8) Squ_NonDiag \
1021	Squ_SaveAcc(15, 1, 15) Squ_Acc(2, 14) Squ_Acc(3, 13) Squ_Acc(4, 12) Squ_Acc(5, 11) Squ_Acc(6, 10) Squ_Acc(7, 9) Squ_Diag(8) \
1022	Squ_SaveAcc(16, 2, 15) Squ_Acc(3, 14) Squ_Acc(4, 13) Squ_Acc(5, 12) Squ_Acc(6, 11) Squ_Acc(7, 10) Squ_Acc(8, 9) Squ_NonDiag \
1023	Squ_SaveAcc(17, 3, 15) Squ_Acc(4, 14) Squ_Acc(5, 13) Squ_Acc(6, 12) Squ_Acc(7, 11) Squ_Acc(8, 10) Squ_Diag(9) \
1024	Squ_SaveAcc(18, 4, 15) Squ_Acc(5, 14) Squ_Acc(6, 13) Squ_Acc(7, 12) Squ_Acc(8, 11) Squ_Acc(9, 10) Squ_NonDiag \
1025	Squ_SaveAcc(19, 5, 15) Squ_Acc(6, 14) Squ_Acc(7, 13) Squ_Acc(8, 12) Squ_Acc(9, 11) Squ_Diag(10) \
1026	Squ_SaveAcc(20, 6, 15) Squ_Acc(7, 14) Squ_Acc(8, 13) Squ_Acc(9, 12) Squ_Acc(10, 11) Squ_NonDiag \
1027	Squ_SaveAcc(21, 7, 15) Squ_Acc(8, 14) Squ_Acc(9, 13) Squ_Acc(10, 12) Squ_Diag(11) \
1028	Squ_SaveAcc(22, 8, 15) Squ_Acc(9, 14) Squ_Acc(10, 13) Squ_Acc(11, 12) Squ_NonDiag \
1029	Squ_SaveAcc(23, 9, 15) Squ_Acc(10, 14) Squ_Acc(11, 13) Squ_Diag(12) \
1030	Squ_SaveAcc(24, 10, 15) Squ_Acc(11, 14) Squ_Acc(12, 13) Squ_NonDiag \
1031	Squ_SaveAcc(25, 11, 15) Squ_Acc(12, 14) Squ_Diag(13) \
1032	Squ_SaveAcc(26, 12, 15) Squ_Acc(13, 14) Squ_NonDiag \
1033	Squ_SaveAcc(27, 13, 15) Squ_Diag(14) \
1034	Squ_SaveAcc(28, 14, 15) Squ_NonDiag \
1035	Squ_End(16)
1036
1037	#define Bot_2 \
1038	Mul_Begin(2) \
1039	Bot_SaveAcc(0, 0, 1) Bot_Acc(1, 0) \
1040	Bot_End(2)
1041
1042	#define Bot_4 \
1043	Mul_Begin(4) \
1044	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1045	Mul_SaveAcc(1, 2, 0) Mul_Acc(1, 1) Mul_Acc(0, 2) \
1046	Bot_SaveAcc(2, 0, 3) Bot_Acc(1, 2) Bot_Acc(2, 1) Bot_Acc(3, 0) \
1047	Bot_End(4)
1048
1049	#define Bot_8 \
1050	Mul_Begin(8) \
1051	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1052	Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1053	Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1054	Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1055	Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1056	Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1057	Bot_SaveAcc(6, 0, 7) Bot_Acc(1, 6) Bot_Acc(2, 5) Bot_Acc(3, 4) Bot_Acc(4, 3) Bot_Acc(5, 2) Bot_Acc(6, 1) Bot_Acc(7, 0) \
1058	Bot_End(8)
1059
1060	#define Bot_16 \
1061	Mul_Begin(16) \
1062	Mul_SaveAcc(0, 0, 1) Mul_Acc(1, 0) \
1063	Mul_SaveAcc(1, 0, 2) Mul_Acc(1, 1) Mul_Acc(2, 0) \
1064	Mul_SaveAcc(2, 0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1065	Mul_SaveAcc(3, 0, 4) Mul_Acc(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) Mul_Acc(4, 0) \
1066	Mul_SaveAcc(4, 0, 5) Mul_Acc(1, 4) Mul_Acc(2, 3) Mul_Acc(3, 2) Mul_Acc(4, 1) Mul_Acc(5, 0) \
1067	Mul_SaveAcc(5, 0, 6) Mul_Acc(1, 5) Mul_Acc(2, 4) Mul_Acc(3, 3) Mul_Acc(4, 2) Mul_Acc(5, 1) Mul_Acc(6, 0) \
1068	Mul_SaveAcc(6, 0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1069	Mul_SaveAcc(7, 0, 8) Mul_Acc(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) Mul_Acc(8, 0) \
1070	Mul_SaveAcc(8, 0, 9) Mul_Acc(1, 8) Mul_Acc(2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) Mul_Acc(8, 1) Mul_Acc(9, 0) \
1071	Mul_SaveAcc(9, 0, 10) Mul_Acc(1, 9) Mul_Acc(2, 8) Mul_Acc(3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) Mul_Acc(8, 2) Mul_Acc(9, 1) Mul_Acc(10, 0) \
1072	Mul_SaveAcc(10, 0, 11) Mul_Acc(1, 10) Mul_Acc(2, 9) Mul_Acc(3, 8) Mul_Acc(4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) Mul_Acc(8, 3) Mul_Acc(9, 2) Mul_Acc(10, 1) Mul_Acc(11, 0) \
1073	Mul_SaveAcc(11, 0, 12) Mul_Acc(1, 11) Mul_Acc(2, 10) Mul_Acc(3, 9) Mul_Acc(4, 8) Mul_Acc(5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) Mul_Acc(8, 4) Mul_Acc(9, 3) Mul_Acc(10, 2) Mul_Acc(11, 1) Mul_Acc(12, 0) \
1074	Mul_SaveAcc(12, 0, 13) Mul_Acc(1, 12) Mul_Acc(2, 11) Mul_Acc(3, 10) Mul_Acc(4, 9) Mul_Acc(5, 8) Mul_Acc(6, 7) Mul_Acc(7, 6) Mul_Acc(8, 5) Mul_Acc(9, 4) Mul_Acc(10, 3) Mul_Acc(11, 2) Mul_Acc(12, 1) Mul_Acc(13, 0) \
1075	Mul_SaveAcc(13, 0, 14) Mul_Acc(1, 13) Mul_Acc(2, 12) Mul_Acc(3, 11) Mul_Acc(4, 10) Mul_Acc(5, 9) Mul_Acc(6, 8) Mul_Acc(7, 7) Mul_Acc(8, 6) Mul_Acc(9, 5) Mul_Acc(10, 4) Mul_Acc(11, 3) Mul_Acc(12, 2) Mul_Acc(13, 1) Mul_Acc(14, 0) \
1076	Bot_SaveAcc(14, 0, 15) Bot_Acc(1, 14) Bot_Acc(2, 13) Bot_Acc(3, 12) Bot_Acc(4, 11) Bot_Acc(5, 10) Bot_Acc(6, 9) Bot_Acc(7, 8) Bot_Acc(8, 7) Bot_Acc(9, 6) Bot_Acc(10, 5) Bot_Acc(11, 4) Bot_Acc(12, 3) Bot_Acc(13, 2) Bot_Acc(14, 1) Bot_Acc(15, 0) \
1077	Bot_End(16)
1078
1079	#endif
1080
1081	#if 0
1082	#define Mul_Begin(n) \
1083	Declare2Words(p) \
1084	Declare2Words(c) \
1085	Declare2Words(d) \
1086	MultiplyWords(p, A[0], B[0]) \
1087	AssignWord(c, LowWord(p)) \
1088	AssignWord(d, HighWord(p))
1089
1090	#define Mul_Acc(i, j) \
1091	MultiplyWords(p, A[i], B[j]) \
1092	Acc2WordsBy1(c, LowWord(p)) \
1093	Acc2WordsBy1(d, HighWord(p))
1094
1095	#define Mul_SaveAcc(k, i, j) \
1096	R[k] = LowWord(c); \
1097	Add2WordsBy1(c, d, HighWord(c)) \
1098	MultiplyWords(p, A[i], B[j]) \
1099	AssignWord(d, HighWord(p)) \
1100	Acc2WordsBy1(c, LowWord(p))
1101
1102	#define Mul_End(n) \
1103	R[2*n-3] = LowWord(c); \
1104	Acc2WordsBy1(d, HighWord(c)) \
1105	MultiplyWords(p, A[n-1], B[n-1])\
1106	Acc2WordsBy2(d, p) \
1107	R[2*n-2] = LowWord(d); \
1108	R[2*n-1] = HighWord(d);
1109
1110	#define Bot_SaveAcc(k, i, j) \
1111	R[k] = LowWord(c); \
1112	word e = LowWord(d) + HighWord(c); \
1113	e += A[i] * B[j];
1114
1115	#define Bot_Acc(i, j) \
1116	e += A[i] * B[j];
1117
1118	#define Bot_End(n) \
1119	R[n-1] = e;
1120	#else
1121	#define Mul_Begin(n) \
1122	Declare2Words(p) \
1123	word c; \
1124	Declare2Words(d) \
1125	MultiplyWords(p, A[0], B[0]) \
1126	c = LowWord(p); \
1127	AssignWord(d, HighWord(p))
1128
1129	#define Mul_Acc(i, j) \
1130	MulAcc(c, d, A[i], B[j])
1131
1132	#define Mul_SaveAcc(k, i, j) \
1133	R[k] = c; \
1134	c = LowWord(d); \
1135	AssignWord(d, HighWord(d)) \
1136	MulAcc(c, d, A[i], B[j])
1137
1138	#define Mul_End(k, i) \
1139	R[k] = c; \
1140	MultiplyWords(p, A[i], B[i]) \
1141	Acc2WordsBy2(p, d) \
1142	R[k+1] = LowWord(p); \
1143	R[k+2] = HighWord(p);
1144
1145	#define Bot_SaveAcc(k, i, j) \
1146	R[k] = c; \
1147	c = LowWord(d); \
1148	c += A[i] * B[j];
1149
1150	#define Bot_Acc(i, j) \
1151	c += A[i] * B[j];
1152
1153	#define Bot_End(n) \
1154	R[n-1] = c;
1155	#endif
1156
1157	#define Squ_Begin(n) \
1158	Declare2Words(p) \
1159	word c; \
1160	Declare2Words(d) \
1161	Declare2Words(e) \
1162	MultiplyWords(p, A[0], A[0]) \
1163	R[0] = LowWord(p); \
1164	AssignWord(e, HighWord(p)) \
1165	MultiplyWords(p, A[0], A[1]) \
1166	c = LowWord(p); \
1167	AssignWord(d, HighWord(p)) \
1168	Squ_NonDiag \
1169
1170	#define Squ_NonDiag \
1171	Double3Words(c, d)
1172
1173	#define Squ_SaveAcc(k, i, j) \
1174	Acc3WordsBy2(c, d, e) \
1175	R[k] = c; \
1176	MultiplyWords(p, A[i], A[j]) \
1177	c = LowWord(p); \
1178	AssignWord(d, HighWord(p)) \
1179
1180	#define Squ_Acc(i, j) \
1181	MulAcc(c, d, A[i], A[j])
1182
1183	#define Squ_Diag(i) \
1184	Squ_NonDiag \
1185	MulAcc(c, d, A[i], A[i])
1186
1187	#define Squ_End(n) \
1188	Acc3WordsBy2(c, d, e) \
1189	R[2*n-3] = c; \
1190	MultiplyWords(p, A[n-1], A[n-1])\
1191	Acc2WordsBy2(p, e) \
1192	R[2*n-2] = LowWord(p); \
1193	R[2*n-1] = HighWord(p);
1194
1195
1196	void Baseline_Multiply2(word R, const word AA, const word *BB)
1197	{
1198	// http://github.com/weidai11/cryptopp/issues/188
1199	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1200	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1201
1202	Mul_2
1203	}
1204
1205	void Baseline_Multiply4(word R, const word AA, const word *BB)
1206	{
1207	// http://github.com/weidai11/cryptopp/issues/188
1208	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1209	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1210
1211	Mul_4
1212	}
1213
1214	void Baseline_Multiply8(word R, const word AA, const word *BB)
1215	{
1216	// http://github.com/weidai11/cryptopp/issues/188
1217	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1218	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1219
1220	Mul_8
1221	}
1222
1223	void Baseline_Square2(word R, const word AA)
1224	{
1225	// http://github.com/weidai11/cryptopp/issues/188
1226	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1227
1228	Squ_2
1229	}
1230
1231	void Baseline_Square4(word R, const word AA)
1232	{
1233	// http://github.com/weidai11/cryptopp/issues/188
1234	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1235
1236	Squ_4
1237	}
1238
1239	void Baseline_Square8(word R, const word AA)
1240	{
1241	// http://github.com/weidai11/cryptopp/issues/188
1242	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1243
1244	Squ_8
1245	}
1246
1247	void Baseline_MultiplyBottom2(word R, const word AA, const word *BB)
1248	{
1249	// http://github.com/weidai11/cryptopp/issues/188
1250	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1251	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1252
1253	Bot_2
1254	}
1255
1256	void Baseline_MultiplyBottom4(word R, const word AA, const word *BB)
1257	{
1258	// http://github.com/weidai11/cryptopp/issues/188
1259	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1260	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1261
1262	Bot_4
1263	}
1264
1265	void Baseline_MultiplyBottom8(word R, const word AA, const word *BB)
1266	{
1267	// http://github.com/weidai11/cryptopp/issues/188
1268	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1269	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1270
1271	Bot_8
1272	}
1273
1274	#define Top_Begin(n) \
1275	Declare2Words(p) \
1276	word c; \
1277	Declare2Words(d) \
1278	MultiplyWords(p, A[0], B[n-2]);\
1279	AssignWord(d, HighWord(p));
1280
1281	#define Top_Acc(i, j) \
1282	MultiplyWords(p, A[i], B[j]);\
1283	Acc2WordsBy1(d, HighWord(p));
1284
1285	#define Top_SaveAcc0(i, j) \
1286	c = LowWord(d); \
1287	AssignWord(d, HighWord(d)) \
1288	MulAcc(c, d, A[i], B[j])
1289
1290	#define Top_SaveAcc1(i, j) \
1291	c = L<c; \
1292	Acc2WordsBy1(d, c); \
1293	c = LowWord(d); \
1294	AssignWord(d, HighWord(d)) \
1295	MulAcc(c, d, A[i], B[j])
1296
1297	void Baseline_MultiplyTop2(word R, const word A, const word *B, word L)
1298	{
1299	CRYPTOPP_UNUSED(L);
1300	word T[4];
1301	Baseline_Multiply2(T, A, B);
1302	R[0] = T[2];
1303	R[1] = T[3];
1304	}
1305
1306	void Baseline_MultiplyTop4(word R, const word AA, const word *BB, word L)
1307	{
1308	// http://github.com/weidai11/cryptopp/issues/188
1309	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1310	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1311
1312	Top_Begin(4)
1313	Top_Acc(1, 1) Top_Acc(2, 0) \
1314	Top_SaveAcc0(0, 3) Mul_Acc(1, 2) Mul_Acc(2, 1) Mul_Acc(3, 0) \
1315	Top_SaveAcc1(1, 3) Mul_Acc(2, 2) Mul_Acc(3, 1) \
1316	Mul_SaveAcc(0, 2, 3) Mul_Acc(3, 2) \
1317	Mul_End(1, 3)
1318	}
1319
1320	void Baseline_MultiplyTop8(word R, const word AA, const word *BB, word L)
1321	{
1322	// http://github.com/weidai11/cryptopp/issues/188
1323	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1324	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1325
1326	Top_Begin(8)
1327	Top_Acc(1, 5) Top_Acc(2, 4) Top_Acc(3, 3) Top_Acc(4, 2) Top_Acc(5, 1) Top_Acc(6, 0) \
1328	Top_SaveAcc0(0, 7) Mul_Acc(1, 6) Mul_Acc(2, 5) Mul_Acc(3, 4) Mul_Acc(4, 3) Mul_Acc(5, 2) Mul_Acc(6, 1) Mul_Acc(7, 0) \
1329	Top_SaveAcc1(1, 7) Mul_Acc(2, 6) Mul_Acc(3, 5) Mul_Acc(4, 4) Mul_Acc(5, 3) Mul_Acc(6, 2) Mul_Acc(7, 1) \
1330	Mul_SaveAcc(0, 2, 7) Mul_Acc(3, 6) Mul_Acc(4, 5) Mul_Acc(5, 4) Mul_Acc(6, 3) Mul_Acc(7, 2) \
1331	Mul_SaveAcc(1, 3, 7) Mul_Acc(4, 6) Mul_Acc(5, 5) Mul_Acc(6, 4) Mul_Acc(7, 3) \
1332	Mul_SaveAcc(2, 4, 7) Mul_Acc(5, 6) Mul_Acc(6, 5) Mul_Acc(7, 4) \
1333	Mul_SaveAcc(3, 5, 7) Mul_Acc(6, 6) Mul_Acc(7, 5) \
1334	Mul_SaveAcc(4, 6, 7) Mul_Acc(7, 6) \
1335	Mul_End(5, 7)
1336	}
1337
1338	#if !CRYPTOPP_INTEGER_SSE2 // save memory by not compiling these functions when SSE2 is available
1339	void Baseline_Multiply16(word R, const word AA, const word *BB)
1340	{
1341	// http://github.com/weidai11/cryptopp/issues/188
1342	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1343	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1344
1345	Mul_16
1346	}
1347
1348	void Baseline_Square16(word R, const word AA)
1349	{
1350	// http://github.com/weidai11/cryptopp/issues/188
1351	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1352
1353	Squ_16
1354	}
1355
1356	void Baseline_MultiplyBottom16(word R, const word AA, const word *BB)
1357	{
1358	// http://github.com/weidai11/cryptopp/issues/188
1359	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1360	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1361
1362	Bot_16
1363	}
1364
1365	void Baseline_MultiplyTop16(word R, const word AA, const word *BB, word L)
1366	{
1367	// http://github.com/weidai11/cryptopp/issues/188
1368	MAYBE_CONST word* A = MAYBE_UNCONST_CAST(AA);
1369	MAYBE_CONST word* B = MAYBE_UNCONST_CAST(BB);
1370
1371	Top_Begin(16)
1372	Top_Acc(1, 13) Top_Acc(2, 12) Top_Acc(3, 11) Top_Acc(4, 10) Top_Acc(5, 9) Top_Acc(6, 8) Top_Acc(7, 7) Top_Acc(8, 6) Top_Acc(9, 5) Top_Acc(10, 4) Top_Acc(11, 3) Top_Acc(12, 2) Top_Acc(13, 1) Top_Acc(14, 0) \
1373	Top_SaveAcc0(0, 15) Mul_Acc(1, 14) Mul_Acc(2, 13) Mul_Acc(3, 12) Mul_Acc(4, 11) Mul_Acc(5, 10) Mul_Acc(6, 9) Mul_Acc(7, 8) Mul_Acc(8, 7) Mul_Acc(9, 6) Mul_Acc(10, 5) Mul_Acc(11, 4) Mul_Acc(12, 3) Mul_Acc(13, 2) Mul_Acc(14, 1) Mul_Acc(15, 0) \
1374	Top_SaveAcc1(1, 15) Mul_Acc(2, 14) Mul_Acc(3, 13) Mul_Acc(4, 12) Mul_Acc(5, 11) Mul_Acc(6, 10) Mul_Acc(7, 9) Mul_Acc(8, 8) Mul_Acc(9, 7) Mul_Acc(10, 6) Mul_Acc(11, 5) Mul_Acc(12, 4) Mul_Acc(13, 3) Mul_Acc(14, 2) Mul_Acc(15, 1) \
1375	Mul_SaveAcc(0, 2, 15) Mul_Acc(3, 14) Mul_Acc(4, 13) Mul_Acc(5, 12) Mul_Acc(6, 11) Mul_Acc(7, 10) Mul_Acc(8, 9) Mul_Acc(9, 8) Mul_Acc(10, 7) Mul_Acc(11, 6) Mul_Acc(12, 5) Mul_Acc(13, 4) Mul_Acc(14, 3) Mul_Acc(15, 2) \
1376	Mul_SaveAcc(1, 3, 15) Mul_Acc(4, 14) Mul_Acc(5, 13) Mul_Acc(6, 12) Mul_Acc(7, 11) Mul_Acc(8, 10) Mul_Acc(9, 9) Mul_Acc(10, 8) Mul_Acc(11, 7) Mul_Acc(12, 6) Mul_Acc(13, 5) Mul_Acc(14, 4) Mul_Acc(15, 3) \
1377	Mul_SaveAcc(2, 4, 15) Mul_Acc(5, 14) Mul_Acc(6, 13) Mul_Acc(7, 12) Mul_Acc(8, 11) Mul_Acc(9, 10) Mul_Acc(10, 9) Mul_Acc(11, 8) Mul_Acc(12, 7) Mul_Acc(13, 6) Mul_Acc(14, 5) Mul_Acc(15, 4) \
1378	Mul_SaveAcc(3, 5, 15) Mul_Acc(6, 14) Mul_Acc(7, 13) Mul_Acc(8, 12) Mul_Acc(9, 11) Mul_Acc(10, 10) Mul_Acc(11, 9) Mul_Acc(12, 8) Mul_Acc(13, 7) Mul_Acc(14, 6) Mul_Acc(15, 5) \
1379	Mul_SaveAcc(4, 6, 15) Mul_Acc(7, 14) Mul_Acc(8, 13) Mul_Acc(9, 12) Mul_Acc(10, 11) Mul_Acc(11, 10) Mul_Acc(12, 9) Mul_Acc(13, 8) Mul_Acc(14, 7) Mul_Acc(15, 6) \
1380	Mul_SaveAcc(5, 7, 15) Mul_Acc(8, 14) Mul_Acc(9, 13) Mul_Acc(10, 12) Mul_Acc(11, 11) Mul_Acc(12, 10) Mul_Acc(13, 9) Mul_Acc(14, 8) Mul_Acc(15, 7) \
1381	Mul_SaveAcc(6, 8, 15) Mul_Acc(9, 14) Mul_Acc(10, 13) Mul_Acc(11, 12) Mul_Acc(12, 11) Mul_Acc(13, 10) Mul_Acc(14, 9) Mul_Acc(15, 8) \
1382	Mul_SaveAcc(7, 9, 15) Mul_Acc(10, 14) Mul_Acc(11, 13) Mul_Acc(12, 12) Mul_Acc(13, 11) Mul_Acc(14, 10) Mul_Acc(15, 9) \
1383	Mul_SaveAcc(8, 10, 15) Mul_Acc(11, 14) Mul_Acc(12, 13) Mul_Acc(13, 12) Mul_Acc(14, 11) Mul_Acc(15, 10) \
1384	Mul_SaveAcc(9, 11, 15) Mul_Acc(12, 14) Mul_Acc(13, 13) Mul_Acc(14, 12) Mul_Acc(15, 11) \
1385	Mul_SaveAcc(10, 12, 15) Mul_Acc(13, 14) Mul_Acc(14, 13) Mul_Acc(15, 12) \
1386	Mul_SaveAcc(11, 13, 15) Mul_Acc(14, 14) Mul_Acc(15, 13) \
1387	Mul_SaveAcc(12, 14, 15) Mul_Acc(15, 14) \
1388	Mul_End(13, 15)
1389	}
1390	#endif
1391
1392	// ********************************************************
1393
1394	#if CRYPTOPP_INTEGER_SSE2
1395
1396	CRYPTOPP_ALIGN_DATA(16) static const word32 s_maskLow16[4] CRYPTOPP_SECTION_ALIGN16 = {0xffff,0xffff,0xffff,0xffff};
1397
1398	#undef Mul_Begin
1399	#undef Mul_Acc
1400	#undef Top_Begin
1401	#undef Top_Acc
1402	#undef Squ_Acc
1403	#undef Squ_NonDiag
1404	#undef Squ_Diag
1405	#undef Squ_SaveAcc
1406	#undef Squ_Begin
1407	#undef Mul_SaveAcc
1408	#undef Bot_Acc
1409	#undef Bot_SaveAcc
1410	#undef Bot_End
1411	#undef Squ_End
1412	#undef Mul_End
1413
1414	#define SSE2_FinalSave(k) \
1415	AS2( psllq xmm5, 16) \
1416	AS2( paddq xmm4, xmm5) \
1417	AS2( movq QWORD PTR [ecx+8*(k)], xmm4)
1418
1419	#define SSE2_SaveShift(k) \
1420	AS2( movq xmm0, xmm6) \
1421	AS2( punpckhqdq xmm6, xmm0) \
1422	AS2( movq xmm1, xmm7) \
1423	AS2( punpckhqdq xmm7, xmm1) \
1424	AS2( paddd xmm6, xmm0) \
1425	AS2( pslldq xmm6, 4) \
1426	AS2( paddd xmm7, xmm1) \
1427	AS2( paddd xmm4, xmm6) \
1428	AS2( pslldq xmm7, 4) \
1429	AS2( movq xmm6, xmm4) \
1430	AS2( paddd xmm5, xmm7) \
1431	AS2( movq xmm7, xmm5) \
1432	AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1433	AS2( psrlq xmm6, 16) \
1434	AS2( paddq xmm6, xmm7) \
1435	AS2( punpckhqdq xmm4, xmm0) \
1436	AS2( punpckhqdq xmm5, xmm0) \
1437	AS2( movq QWORD PTR [ecx+8*(k)+2], xmm6) \
1438	AS2( psrlq xmm6, 3*16) \
1439	AS2( paddd xmm4, xmm6) \
1440
1441	#define Squ_SSE2_SaveShift(k) \
1442	AS2( movq xmm0, xmm6) \
1443	AS2( punpckhqdq xmm6, xmm0) \
1444	AS2( movq xmm1, xmm7) \
1445	AS2( punpckhqdq xmm7, xmm1) \
1446	AS2( paddd xmm6, xmm0) \
1447	AS2( pslldq xmm6, 4) \
1448	AS2( paddd xmm7, xmm1) \
1449	AS2( paddd xmm4, xmm6) \
1450	AS2( pslldq xmm7, 4) \
1451	AS2( movhlps xmm6, xmm4) \
1452	AS2( movd DWORD PTR [ecx+8*(k)], xmm4) \
1453	AS2( paddd xmm5, xmm7) \
1454	AS2( movhps QWORD PTR [esp+12], xmm5)\
1455	AS2( psrlq xmm4, 16) \
1456	AS2( paddq xmm4, xmm5) \
1457	AS2( movq QWORD PTR [ecx+8*(k)+2], xmm4) \
1458	AS2( psrlq xmm4, 3*16) \
1459	AS2( paddd xmm4, xmm6) \
1460	AS2( movq QWORD PTR [esp+4], xmm4)\
1461
1462	#define SSE2_FirstMultiply(i) \
1463	AS2( movdqa xmm7, [esi+(i)*16])\
1464	AS2( movdqa xmm5, [edi-(i)*16])\
1465	AS2( pmuludq xmm5, xmm7) \
1466	AS2( movdqa xmm4, [ebx])\
1467	AS2( movdqa xmm6, xmm4) \
1468	AS2( pand xmm4, xmm5) \
1469	AS2( psrld xmm5, 16) \
1470	AS2( pmuludq xmm7, [edx-(i)*16])\
1471	AS2( pand xmm6, xmm7) \
1472	AS2( psrld xmm7, 16)
1473
1474	#define Squ_Begin(n) \
1475	SquPrologue \
1476	AS2( mov esi, esp)\
1477	AS2( and esp, 0xfffffff0)\
1478	AS2( lea edi, [esp-32*n])\
1479	AS2( sub esp, 32*n+16)\
1480	AS1( push esi)\
1481	AS2( mov esi, edi) \
1482	AS2( xor edx, edx) \
1483	ASL(1) \
1484	ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1485	ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1486	AS2( movdqa [edi+2*edx], xmm0) \
1487	AS2( psrlq xmm0, 32) \
1488	AS2( movdqa [edi+2*edx+16], xmm0) \
1489	AS2( movdqa [edi+16n+2edx], xmm1) \
1490	AS2( psrlq xmm1, 32) \
1491	AS2( movdqa [edi+16n+2edx+16], xmm1) \
1492	AS2( add edx, 16) \
1493	AS2( cmp edx, 8*(n)) \
1494	ASJ( jne, 1, b) \
1495	AS2( lea edx, [edi+16*n])\
1496	SSE2_FirstMultiply(0) \
1497
1498	#define Squ_Acc(i) \
1499	ASL(LSqu##i) \
1500	AS2( movdqa xmm1, [esi+(i)*16]) \
1501	AS2( movdqa xmm0, [edi-(i)*16]) \
1502	AS2( movdqa xmm2, [ebx]) \
1503	AS2( pmuludq xmm0, xmm1) \
1504	AS2( pmuludq xmm1, [edx-(i)*16]) \
1505	AS2( movdqa xmm3, xmm2) \
1506	AS2( pand xmm2, xmm0) \
1507	AS2( psrld xmm0, 16) \
1508	AS2( paddd xmm4, xmm2) \
1509	AS2( paddd xmm5, xmm0) \
1510	AS2( pand xmm3, xmm1) \
1511	AS2( psrld xmm1, 16) \
1512	AS2( paddd xmm6, xmm3) \
1513	AS2( paddd xmm7, xmm1) \
1514
1515	#define Squ_Acc1(i)
1516	#define Squ_Acc2(i) ASC(call, LSqu##i)
1517	#define Squ_Acc3(i) Squ_Acc2(i)
1518	#define Squ_Acc4(i) Squ_Acc2(i)
1519	#define Squ_Acc5(i) Squ_Acc2(i)
1520	#define Squ_Acc6(i) Squ_Acc2(i)
1521	#define Squ_Acc7(i) Squ_Acc2(i)
1522	#define Squ_Acc8(i) Squ_Acc2(i)
1523
1524	#define SSE2_End(E, n) \
1525	SSE2_SaveShift(2*(n)-3) \
1526	AS2( movdqa xmm7, [esi+16]) \
1527	AS2( movdqa xmm0, [edi]) \
1528	AS2( pmuludq xmm0, xmm7) \
1529	AS2( movdqa xmm2, [ebx]) \
1530	AS2( pmuludq xmm7, [edx]) \
1531	AS2( movdqa xmm6, xmm2) \
1532	AS2( pand xmm2, xmm0) \
1533	AS2( psrld xmm0, 16) \
1534	AS2( paddd xmm4, xmm2) \
1535	AS2( paddd xmm5, xmm0) \
1536	AS2( pand xmm6, xmm7) \
1537	AS2( psrld xmm7, 16) \
1538	SSE2_SaveShift(2*(n)-2) \
1539	SSE2_FinalSave(2*(n)-1) \
1540	AS1( pop esp)\
1541	E
1542
1543	#define Squ_End(n) SSE2_End(SquEpilogue, n)
1544	#define Mul_End(n) SSE2_End(MulEpilogue, n)
1545	#define Top_End(n) SSE2_End(TopEpilogue, n)
1546
1547	#define Squ_Column1(k, i) \
1548	Squ_SSE2_SaveShift(k) \
1549	AS2( add esi, 16) \
1550	SSE2_FirstMultiply(1)\
1551	Squ_Acc##i(i) \
1552	AS2( paddd xmm4, xmm4) \
1553	AS2( paddd xmm5, xmm5) \
1554	AS2( movdqa xmm3, [esi]) \
1555	AS2( movq xmm1, QWORD PTR [esi+8]) \
1556	AS2( pmuludq xmm1, xmm3) \
1557	AS2( pmuludq xmm3, xmm3) \
1558	AS2( movdqa xmm0, [ebx])\
1559	AS2( movdqa xmm2, xmm0) \
1560	AS2( pand xmm0, xmm1) \
1561	AS2( psrld xmm1, 16) \
1562	AS2( paddd xmm6, xmm0) \
1563	AS2( paddd xmm7, xmm1) \
1564	AS2( pand xmm2, xmm3) \
1565	AS2( psrld xmm3, 16) \
1566	AS2( paddd xmm6, xmm6) \
1567	AS2( paddd xmm7, xmm7) \
1568	AS2( paddd xmm4, xmm2) \
1569	AS2( paddd xmm5, xmm3) \
1570	AS2( movq xmm0, QWORD PTR [esp+4])\
1571	AS2( movq xmm1, QWORD PTR [esp+12])\
1572	AS2( paddd xmm4, xmm0)\
1573	AS2( paddd xmm5, xmm1)\
1574
1575	#define Squ_Column0(k, i) \
1576	Squ_SSE2_SaveShift(k) \
1577	AS2( add edi, 16) \
1578	AS2( add edx, 16) \
1579	SSE2_FirstMultiply(1)\
1580	Squ_Acc##i(i) \
1581	AS2( paddd xmm6, xmm6) \
1582	AS2( paddd xmm7, xmm7) \
1583	AS2( paddd xmm4, xmm4) \
1584	AS2( paddd xmm5, xmm5) \
1585	AS2( movq xmm0, QWORD PTR [esp+4])\
1586	AS2( movq xmm1, QWORD PTR [esp+12])\
1587	AS2( paddd xmm4, xmm0)\
1588	AS2( paddd xmm5, xmm1)\
1589
1590	#define SSE2_MulAdd45 \
1591	AS2( movdqa xmm7, [esi]) \
1592	AS2( movdqa xmm0, [edi]) \
1593	AS2( pmuludq xmm0, xmm7) \
1594	AS2( movdqa xmm2, [ebx]) \
1595	AS2( pmuludq xmm7, [edx]) \
1596	AS2( movdqa xmm6, xmm2) \
1597	AS2( pand xmm2, xmm0) \
1598	AS2( psrld xmm0, 16) \
1599	AS2( paddd xmm4, xmm2) \
1600	AS2( paddd xmm5, xmm0) \
1601	AS2( pand xmm6, xmm7) \
1602	AS2( psrld xmm7, 16)
1603
1604	#define Mul_Begin(n) \
1605	MulPrologue \
1606	AS2( mov esi, esp)\
1607	AS2( and esp, 0xfffffff0)\
1608	AS2( sub esp, 48*n+16)\
1609	AS1( push esi)\
1610	AS2( xor edx, edx) \
1611	ASL(1) \
1612	ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1613	ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1614	ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1615	AS2( movdqa [esp+20+2*edx], xmm0) \
1616	AS2( psrlq xmm0, 32) \
1617	AS2( movdqa [esp+20+2*edx+16], xmm0) \
1618	AS2( movdqa [esp+20+16n+2edx], xmm1) \
1619	AS2( psrlq xmm1, 32) \
1620	AS2( movdqa [esp+20+16n+2edx+16], xmm1) \
1621	AS2( movdqa [esp+20+32n+2edx], xmm2) \
1622	AS2( psrlq xmm2, 32) \
1623	AS2( movdqa [esp+20+32n+2edx+16], xmm2) \
1624	AS2( add edx, 16) \
1625	AS2( cmp edx, 8*(n)) \
1626	ASJ( jne, 1, b) \
1627	AS2( lea edi, [esp+20])\
1628	AS2( lea edx, [esp+20+16*n])\
1629	AS2( lea esi, [esp+20+32*n])\
1630	SSE2_FirstMultiply(0) \
1631
1632	#define Mul_Acc(i) \
1633	ASL(LMul##i) \
1634	AS2( movdqa xmm1, [esi+i/2(1-(i-2(i/2))2)16]) \
1635	AS2( movdqa xmm0, [edi-i/2(1-(i-2(i/2))2)16]) \
1636	AS2( movdqa xmm2, [ebx]) \
1637	AS2( pmuludq xmm0, xmm1) \
1638	AS2( pmuludq xmm1, [edx-i/2(1-(i-2(i/2))2)16]) \
1639	AS2( movdqa xmm3, xmm2) \
1640	AS2( pand xmm2, xmm0) \
1641	AS2( psrld xmm0, 16) \
1642	AS2( paddd xmm4, xmm2) \
1643	AS2( paddd xmm5, xmm0) \
1644	AS2( pand xmm3, xmm1) \
1645	AS2( psrld xmm1, 16) \
1646	AS2( paddd xmm6, xmm3) \
1647	AS2( paddd xmm7, xmm1) \
1648
1649	#define Mul_Acc1(i)
1650	#define Mul_Acc2(i) ASC(call, LMul##i)
1651	#define Mul_Acc3(i) Mul_Acc2(i)
1652	#define Mul_Acc4(i) Mul_Acc2(i)
1653	#define Mul_Acc5(i) Mul_Acc2(i)
1654	#define Mul_Acc6(i) Mul_Acc2(i)
1655	#define Mul_Acc7(i) Mul_Acc2(i)
1656	#define Mul_Acc8(i) Mul_Acc2(i)
1657	#define Mul_Acc9(i) Mul_Acc2(i)
1658	#define Mul_Acc10(i) Mul_Acc2(i)
1659	#define Mul_Acc11(i) Mul_Acc2(i)
1660	#define Mul_Acc12(i) Mul_Acc2(i)
1661	#define Mul_Acc13(i) Mul_Acc2(i)
1662	#define Mul_Acc14(i) Mul_Acc2(i)
1663	#define Mul_Acc15(i) Mul_Acc2(i)
1664	#define Mul_Acc16(i) Mul_Acc2(i)
1665
1666	#define Mul_Column1(k, i) \
1667	SSE2_SaveShift(k) \
1668	AS2( add esi, 16) \
1669	SSE2_MulAdd45\
1670	Mul_Acc##i(i) \
1671
1672	#define Mul_Column0(k, i) \
1673	SSE2_SaveShift(k) \
1674	AS2( add edi, 16) \
1675	AS2( add edx, 16) \
1676	SSE2_MulAdd45\
1677	Mul_Acc##i(i) \
1678
1679	#define Bot_Acc(i) \
1680	AS2( movdqa xmm1, [esi+i/2(1-(i-2(i/2))2)16]) \
1681	AS2( movdqa xmm0, [edi-i/2(1-(i-2(i/2))2)16]) \
1682	AS2( pmuludq xmm0, xmm1) \
1683	AS2( pmuludq xmm1, [edx-i/2(1-(i-2(i/2))2)16]) \
1684	AS2( paddq xmm4, xmm0) \
1685	AS2( paddd xmm6, xmm1)
1686
1687	#define Bot_SaveAcc(k) \
1688	SSE2_SaveShift(k) \
1689	AS2( add edi, 16) \
1690	AS2( add edx, 16) \
1691	AS2( movdqa xmm6, [esi]) \
1692	AS2( movdqa xmm0, [edi]) \
1693	AS2( pmuludq xmm0, xmm6) \
1694	AS2( paddq xmm4, xmm0) \
1695	AS2( psllq xmm5, 16) \
1696	AS2( paddq xmm4, xmm5) \
1697	AS2( pmuludq xmm6, [edx])
1698
1699	#define Bot_End(n) \
1700	AS2( movhlps xmm7, xmm6) \
1701	AS2( paddd xmm6, xmm7) \
1702	AS2( psllq xmm6, 32) \
1703	AS2( paddd xmm4, xmm6) \
1704	AS2( movq QWORD PTR [ecx+8*((n)-1)], xmm4) \
1705	AS1( pop esp)\
1706	MulEpilogue
1707
1708	#define Top_Begin(n) \
1709	TopPrologue \
1710	AS2( mov edx, esp)\
1711	AS2( and esp, 0xfffffff0)\
1712	AS2( sub esp, 48*n+16)\
1713	AS1( push edx)\
1714	AS2( xor edx, edx) \
1715	ASL(1) \
1716	ASS( pshufd xmm0, [eax+edx], 3,1,2,0) \
1717	ASS( pshufd xmm1, [eax+edx], 2,0,3,1) \
1718	ASS( pshufd xmm2, [edi+edx], 3,1,2,0) \
1719	AS2( movdqa [esp+20+2*edx], xmm0) \
1720	AS2( psrlq xmm0, 32) \
1721	AS2( movdqa [esp+20+2*edx+16], xmm0) \
1722	AS2( movdqa [esp+20+16n+2edx], xmm1) \
1723	AS2( psrlq xmm1, 32) \
1724	AS2( movdqa [esp+20+16n+2edx+16], xmm1) \
1725	AS2( movdqa [esp+20+32n+2edx], xmm2) \
1726	AS2( psrlq xmm2, 32) \
1727	AS2( movdqa [esp+20+32n+2edx+16], xmm2) \
1728	AS2( add edx, 16) \
1729	AS2( cmp edx, 8*(n)) \
1730	ASJ( jne, 1, b) \
1731	AS2( mov eax, esi) \
1732	AS2( lea edi, [esp+20+00n+16(n/2-1)])\
1733	AS2( lea edx, [esp+20+16n+16(n/2-1)])\
1734	AS2( lea esi, [esp+20+32n+16(n/2-1)])\
1735	AS2( pxor xmm4, xmm4)\
1736	AS2( pxor xmm5, xmm5)
1737
1738	#define Top_Acc(i) \
1739	AS2( movq xmm0, QWORD PTR [esi+i/2(1-(i-2(i/2))2)16+8]) \
1740	AS2( pmuludq xmm0, [edx-i/2(1-(i-2(i/2))2)16]) \
1741	AS2( psrlq xmm0, 48) \
1742	AS2( paddd xmm5, xmm0)\
1743
1744	#define Top_Column0(i) \
1745	AS2( psllq xmm5, 32) \
1746	AS2( add edi, 16) \
1747	AS2( add edx, 16) \
1748	SSE2_MulAdd45\
1749	Mul_Acc##i(i) \
1750
1751	#define Top_Column1(i) \
1752	SSE2_SaveShift(0) \
1753	AS2( add esi, 16) \
1754	SSE2_MulAdd45\
1755	Mul_Acc##i(i) \
1756	AS2( shr eax, 16) \
1757	AS2( movd xmm0, eax)\
1758	AS2( movd xmm1, [ecx+4])\
1759	AS2( psrld xmm1, 16)\
1760	AS2( pcmpgtd xmm1, xmm0)\
1761	AS2( psrld xmm1, 31)\
1762	AS2( paddd xmm4, xmm1)\
1763
1764	void SSE2_Square4(word C, const word A)
1765	{
1766	Squ_Begin(2)
1767	Squ_Column0(0, 1)
1768	Squ_End(2)
1769	}
1770
1771	void SSE2_Square8(word C, const word A)
1772	{
1773	Squ_Begin(4)
1774	#ifndef __GNUC__
1775	ASJ( jmp, 0, f)
1776	Squ_Acc(2)
1777	AS1( ret) ASL(0)
1778	#endif
1779	Squ_Column0(0, 1)
1780	Squ_Column1(1, 1)
1781	Squ_Column0(2, 2)
1782	Squ_Column1(3, 1)
1783	Squ_Column0(4, 1)
1784	Squ_End(4)
1785	}
1786
1787	void SSE2_Square16(word C, const word A)
1788	{
1789	Squ_Begin(8)
1790	#ifndef __GNUC__
1791	ASJ( jmp, 0, f)
1792	Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1793	AS1( ret) ASL(0)
1794	#endif
1795	Squ_Column0(0, 1)
1796	Squ_Column1(1, 1)
1797	Squ_Column0(2, 2)
1798	Squ_Column1(3, 2)
1799	Squ_Column0(4, 3)
1800	Squ_Column1(5, 3)
1801	Squ_Column0(6, 4)
1802	Squ_Column1(7, 3)
1803	Squ_Column0(8, 3)
1804	Squ_Column1(9, 2)
1805	Squ_Column0(10, 2)
1806	Squ_Column1(11, 1)
1807	Squ_Column0(12, 1)
1808	Squ_End(8)
1809	}
1810
1811	void SSE2_Square32(word C, const word A)
1812	{
1813	Squ_Begin(16)
1814	ASJ( jmp, 0, f)
1815	Squ_Acc(8) Squ_Acc(7) Squ_Acc(6) Squ_Acc(5) Squ_Acc(4) Squ_Acc(3) Squ_Acc(2)
1816	AS1( ret) ASL(0)
1817	Squ_Column0(0, 1)
1818	Squ_Column1(1, 1)
1819	Squ_Column0(2, 2)
1820	Squ_Column1(3, 2)
1821	Squ_Column0(4, 3)
1822	Squ_Column1(5, 3)
1823	Squ_Column0(6, 4)
1824	Squ_Column1(7, 4)
1825	Squ_Column0(8, 5)
1826	Squ_Column1(9, 5)
1827	Squ_Column0(10, 6)
1828	Squ_Column1(11, 6)
1829	Squ_Column0(12, 7)
1830	Squ_Column1(13, 7)
1831	Squ_Column0(14, 8)
1832	Squ_Column1(15, 7)
1833	Squ_Column0(16, 7)
1834	Squ_Column1(17, 6)
1835	Squ_Column0(18, 6)
1836	Squ_Column1(19, 5)
1837	Squ_Column0(20, 5)
1838	Squ_Column1(21, 4)
1839	Squ_Column0(22, 4)
1840	Squ_Column1(23, 3)
1841	Squ_Column0(24, 3)
1842	Squ_Column1(25, 2)
1843	Squ_Column0(26, 2)
1844	Squ_Column1(27, 1)
1845	Squ_Column0(28, 1)
1846	Squ_End(16)
1847	}
1848
1849	void SSE2_Multiply4(word C, const word A, const word *B)
1850	{
1851	Mul_Begin(2)
1852	#ifndef __GNUC__
1853	ASJ( jmp, 0, f)
1854	Mul_Acc(2)
1855	AS1( ret) ASL(0)
1856	#endif
1857	Mul_Column0(0, 2)
1858	Mul_End(2)
1859	}
1860
1861	void SSE2_Multiply8(word C, const word A, const word *B)
1862	{
1863	Mul_Begin(4)
1864	#ifndef __GNUC__
1865	ASJ( jmp, 0, f)
1866	Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1867	AS1( ret) ASL(0)
1868	#endif
1869	Mul_Column0(0, 2)
1870	Mul_Column1(1, 3)
1871	Mul_Column0(2, 4)
1872	Mul_Column1(3, 3)
1873	Mul_Column0(4, 2)
1874	Mul_End(4)
1875	}
1876
1877	void SSE2_Multiply16(word C, const word A, const word *B)
1878	{
1879	Mul_Begin(8)
1880	#ifndef __GNUC__
1881	ASJ( jmp, 0, f)
1882	Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1883	AS1( ret) ASL(0)
1884	#endif
1885	Mul_Column0(0, 2)
1886	Mul_Column1(1, 3)
1887	Mul_Column0(2, 4)
1888	Mul_Column1(3, 5)
1889	Mul_Column0(4, 6)
1890	Mul_Column1(5, 7)
1891	Mul_Column0(6, 8)
1892	Mul_Column1(7, 7)
1893	Mul_Column0(8, 6)
1894	Mul_Column1(9, 5)
1895	Mul_Column0(10, 4)
1896	Mul_Column1(11, 3)
1897	Mul_Column0(12, 2)
1898	Mul_End(8)
1899	}
1900
1901	void SSE2_Multiply32(word C, const word A, const word *B)
1902	{
1903	Mul_Begin(16)
1904	ASJ( jmp, 0, f)
1905	Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1906	AS1( ret) ASL(0)
1907	Mul_Column0(0, 2)
1908	Mul_Column1(1, 3)
1909	Mul_Column0(2, 4)
1910	Mul_Column1(3, 5)
1911	Mul_Column0(4, 6)
1912	Mul_Column1(5, 7)
1913	Mul_Column0(6, 8)
1914	Mul_Column1(7, 9)
1915	Mul_Column0(8, 10)
1916	Mul_Column1(9, 11)
1917	Mul_Column0(10, 12)
1918	Mul_Column1(11, 13)
1919	Mul_Column0(12, 14)
1920	Mul_Column1(13, 15)
1921	Mul_Column0(14, 16)
1922	Mul_Column1(15, 15)
1923	Mul_Column0(16, 14)
1924	Mul_Column1(17, 13)
1925	Mul_Column0(18, 12)
1926	Mul_Column1(19, 11)
1927	Mul_Column0(20, 10)
1928	Mul_Column1(21, 9)
1929	Mul_Column0(22, 8)
1930	Mul_Column1(23, 7)
1931	Mul_Column0(24, 6)
1932	Mul_Column1(25, 5)
1933	Mul_Column0(26, 4)
1934	Mul_Column1(27, 3)
1935	Mul_Column0(28, 2)
1936	Mul_End(16)
1937	}
1938
1939	void SSE2_MultiplyBottom4(word C, const word A, const word *B)
1940	{
1941	Mul_Begin(2)
1942	Bot_SaveAcc(0) Bot_Acc(2)
1943	Bot_End(2)
1944	}
1945
1946	void SSE2_MultiplyBottom8(word C, const word A, const word *B)
1947	{
1948	Mul_Begin(4)
1949	#ifndef __GNUC__
1950	ASJ( jmp, 0, f)
1951	Mul_Acc(3) Mul_Acc(2)
1952	AS1( ret) ASL(0)
1953	#endif
1954	Mul_Column0(0, 2)
1955	Mul_Column1(1, 3)
1956	Bot_SaveAcc(2) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1957	Bot_End(4)
1958	}
1959
1960	void SSE2_MultiplyBottom16(word C, const word A, const word *B)
1961	{
1962	Mul_Begin(8)
1963	#ifndef __GNUC__
1964	ASJ( jmp, 0, f)
1965	Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1966	AS1( ret) ASL(0)
1967	#endif
1968	Mul_Column0(0, 2)
1969	Mul_Column1(1, 3)
1970	Mul_Column0(2, 4)
1971	Mul_Column1(3, 5)
1972	Mul_Column0(4, 6)
1973	Mul_Column1(5, 7)
1974	Bot_SaveAcc(6) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
1975	Bot_End(8)
1976	}
1977
1978	void SSE2_MultiplyBottom32(word C, const word A, const word *B)
1979	{
1980	Mul_Begin(16)
1981	#ifndef __GNUC__
1982	ASJ( jmp, 0, f)
1983	Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
1984	AS1( ret) ASL(0)
1985	#endif
1986	Mul_Column0(0, 2)
1987	Mul_Column1(1, 3)
1988	Mul_Column0(2, 4)
1989	Mul_Column1(3, 5)
1990	Mul_Column0(4, 6)
1991	Mul_Column1(5, 7)
1992	Mul_Column0(6, 8)
1993	Mul_Column1(7, 9)
1994	Mul_Column0(8, 10)
1995	Mul_Column1(9, 11)
1996	Mul_Column0(10, 12)
1997	Mul_Column1(11, 13)
1998	Mul_Column0(12, 14)
1999	Mul_Column1(13, 15)
2000	Bot_SaveAcc(14) Bot_Acc(16) Bot_Acc(15) Bot_Acc(14) Bot_Acc(13) Bot_Acc(12) Bot_Acc(11) Bot_Acc(10) Bot_Acc(9) Bot_Acc(8) Bot_Acc(7) Bot_Acc(6) Bot_Acc(5) Bot_Acc(4) Bot_Acc(3) Bot_Acc(2)
2001	Bot_End(16)
2002	}
2003
2004	void SSE2_MultiplyTop8(word C, const word A, const word *B, word L)
2005	{
2006	Top_Begin(4)
2007	Top_Acc(3) Top_Acc(2) Top_Acc(1)
2008	#ifndef __GNUC__
2009	ASJ( jmp, 0, f)
2010	Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2011	AS1( ret) ASL(0)
2012	#endif
2013	Top_Column0(4)
2014	Top_Column1(3)
2015	Mul_Column0(0, 2)
2016	Top_End(2)
2017	}
2018
2019	void SSE2_MultiplyTop16(word C, const word A, const word *B, word L)
2020	{
2021	Top_Begin(8)
2022	Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2023	#ifndef __GNUC__
2024	ASJ( jmp, 0, f)
2025	Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2026	AS1( ret) ASL(0)
2027	#endif
2028	Top_Column0(8)
2029	Top_Column1(7)
2030	Mul_Column0(0, 6)
2031	Mul_Column1(1, 5)
2032	Mul_Column0(2, 4)
2033	Mul_Column1(3, 3)
2034	Mul_Column0(4, 2)
2035	Top_End(4)
2036	}
2037
2038	void SSE2_MultiplyTop32(word C, const word A, const word *B, word L)
2039	{
2040	Top_Begin(16)
2041	Top_Acc(15) Top_Acc(14) Top_Acc(13) Top_Acc(12) Top_Acc(11) Top_Acc(10) Top_Acc(9) Top_Acc(8) Top_Acc(7) Top_Acc(6) Top_Acc(5) Top_Acc(4) Top_Acc(3) Top_Acc(2) Top_Acc(1)
2042	#ifndef __GNUC__
2043	ASJ( jmp, 0, f)
2044	Mul_Acc(16) Mul_Acc(15) Mul_Acc(14) Mul_Acc(13) Mul_Acc(12) Mul_Acc(11) Mul_Acc(10) Mul_Acc(9) Mul_Acc(8) Mul_Acc(7) Mul_Acc(6) Mul_Acc(5) Mul_Acc(4) Mul_Acc(3) Mul_Acc(2)
2045	AS1( ret) ASL(0)
2046	#endif
2047	Top_Column0(16)
2048	Top_Column1(15)
2049	Mul_Column0(0, 14)
2050	Mul_Column1(1, 13)
2051	Mul_Column0(2, 12)
2052	Mul_Column1(3, 11)
2053	Mul_Column0(4, 10)
2054	Mul_Column1(5, 9)
2055	Mul_Column0(6, 8)
2056	Mul_Column1(7, 7)
2057	Mul_Column0(8, 6)
2058	Mul_Column1(9, 5)
2059	Mul_Column0(10, 4)
2060	Mul_Column1(11, 3)
2061	Mul_Column0(12, 2)
2062	Top_End(8)
2063	}
2064
2065	#endif // #if CRYPTOPP_INTEGER_SSE2
2066
2067	// ********************************************************
2068
2069	typedef int (CRYPTOPP_FASTCALL * PAdd)(size_t N, word C, const word A, const word *B);
2070	typedef void (* PMul)(word C, const word A, const word *B);
2071	typedef void (* PSqu)(word C, const word A);
2072	typedef void (* PMulTop)(word C, const word A, const word *B, word L);
2073
2074	#if CRYPTOPP_INTEGER_SSE2
2075	static PAdd s_pAdd = &Baseline_Add, s_pSub = &Baseline_Sub;
2076	static size_t s_recursionLimit = 8;
2077	#else
2078	static const size_t s_recursionLimit = 16;
2079	#endif
2080
2081	static PMul s_pMul[9], s_pBot[9];
2082	static PSqu s_pSqu[9];
2083	static PMulTop s_pTop[9];
2084
2085	static void SetFunctionPointers()
2086	{
2087	s_pMul[0] = &Baseline_Multiply2;
2088	s_pBot[0] = &Baseline_MultiplyBottom2;
2089	s_pSqu[0] = &Baseline_Square2;
2090	s_pTop[0] = &Baseline_MultiplyTop2;
2091	s_pTop[1] = &Baseline_MultiplyTop4;
2092
2093	#if CRYPTOPP_INTEGER_SSE2
2094	if (HasSSE2())
2095	{
2096	#if _MSC_VER != 1200 \|\| !(CRYPTOPP_DEBUG)
2097	if (IsP4())
2098	{
2099	s_pAdd = &SSE2_Add;
2100	s_pSub = &SSE2_Sub;
2101	}
2102	#endif
2103
2104	s_recursionLimit = 32;
2105
2106	s_pMul[1] = &SSE2_Multiply4;
2107	s_pMul[2] = &SSE2_Multiply8;
2108	s_pMul[4] = &SSE2_Multiply16;
2109	s_pMul[8] = &SSE2_Multiply32;
2110
2111	s_pBot[1] = &SSE2_MultiplyBottom4;
2112	s_pBot[2] = &SSE2_MultiplyBottom8;
2113	s_pBot[4] = &SSE2_MultiplyBottom16;
2114	s_pBot[8] = &SSE2_MultiplyBottom32;
2115
2116	s_pSqu[1] = &SSE2_Square4;
2117	s_pSqu[2] = &SSE2_Square8;
2118	s_pSqu[4] = &SSE2_Square16;
2119	s_pSqu[8] = &SSE2_Square32;
2120
2121	s_pTop[2] = &SSE2_MultiplyTop8;
2122	s_pTop[4] = &SSE2_MultiplyTop16;
2123	s_pTop[8] = &SSE2_MultiplyTop32;
2124	}
2125	else
2126	#endif
2127	{
2128	s_pMul[1] = &Baseline_Multiply4;
2129	s_pMul[2] = &Baseline_Multiply8;
2130
2131	s_pBot[1] = &Baseline_MultiplyBottom4;
2132	s_pBot[2] = &Baseline_MultiplyBottom8;
2133
2134	s_pSqu[1] = &Baseline_Square4;
2135	s_pSqu[2] = &Baseline_Square8;
2136
2137	s_pTop[2] = &Baseline_MultiplyTop8;
2138
2139	#if !CRYPTOPP_INTEGER_SSE2
2140	s_pMul[4] = &Baseline_Multiply16;
2141	s_pBot[4] = &Baseline_MultiplyBottom16;
2142	s_pSqu[4] = &Baseline_Square16;
2143	s_pTop[4] = &Baseline_MultiplyTop16;
2144	#endif
2145	}
2146	}
2147
2148	inline int Add(word C, const word A, const word *B, size_t N)
2149	{
2150	#if CRYPTOPP_INTEGER_SSE2
2151	return s_pAdd(N, C, A, B);
2152	#else
2153	return Baseline_Add(N, C, A, B);
2154	#endif
2155	}
2156
2157	inline int Subtract(word C, const word A, const word *B, size_t N)
2158	{
2159	#if CRYPTOPP_INTEGER_SSE2
2160	return s_pSub(N, C, A, B);
2161	#else
2162	return Baseline_Sub(N, C, A, B);
2163	#endif
2164	}
2165
2166	// ********************************************************
2167
2168
2169	#define A0 A
2170	#define A1 (A+N2)
2171	#define B0 B
2172	#define B1 (B+N2)
2173
2174	#define T0 T
2175	#define T1 (T+N2)
2176	#define T2 (T+N)
2177	#define T3 (T+N+N2)
2178
2179	#define R0 R
2180	#define R1 (R+N2)
2181	#define R2 (R+N)
2182	#define R3 (R+N+N2)
2183
2184	// R[2N] - result = AB
2185	// T[2*N] - temporary work space
2186	// A[N] --- multiplier
2187	// B[N] --- multiplicant
2188
2189	void RecursiveMultiply(word R, word T, const word A, const word B, size_t N)
2190	{
2191	CRYPTOPP_ASSERT(N>=2 && N%2==0);
2192
2193	if (N <= s_recursionLimit)
2194	s_pMul[N/4](R, A, B);
2195	else
2196	{
2197	const size_t N2 = N/2;
2198
2199	size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2200	Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2201
2202	size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2203	Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2204
2205	RecursiveMultiply(R2, T2, A1, B1, N2);
2206	RecursiveMultiply(T0, T2, R0, R1, N2);
2207	RecursiveMultiply(R0, T2, A0, B0, N2);
2208
2209	// now T[01] holds (A1-A0)(B0-B1), R[01] holds A0B0, R[23] holds A1*B1
2210
2211	int c2 = Add(R2, R2, R1, N2);
2212	int c3 = c2;
2213	c2 += Add(R1, R2, R0, N2);
2214	c3 += Add(R2, R2, R3, N2);
2215
2216	if (AN2 == BN2)
2217	c3 -= Subtract(R1, R1, T0, N);
2218	else
2219	c3 += Add(R1, R1, T0, N);
2220
2221	c3 += Increment(R2, N2, c2);
2222	CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2223	Increment(R3, N2, c3);
2224	}
2225	}
2226
2227	// R[2N] - result = AA
2228	// T[2*N] - temporary work space
2229	// A[N] --- number to be squared
2230
2231	void RecursiveSquare(word R, word T, const word *A, size_t N)
2232	{
2233	CRYPTOPP_ASSERT(N && N%2==0);
2234
2235	if (N <= s_recursionLimit)
2236	s_pSqu[N/4](R, A);
2237	else
2238	{
2239	const size_t N2 = N/2;
2240
2241	RecursiveSquare(R0, T2, A0, N2);
2242	RecursiveSquare(R2, T2, A1, N2);
2243	RecursiveMultiply(T0, T2, A0, A1, N2);
2244
2245	int carry = Add(R1, R1, T0, N);
2246	carry += Add(R1, R1, T0, N);
2247	Increment(R3, N2, carry);
2248	}
2249	}
2250
2251	// R[N] - bottom half of A*B
2252	// T[3*N/2] - temporary work space
2253	// A[N] - multiplier
2254	// B[N] - multiplicant
2255
2256	void RecursiveMultiplyBottom(word R, word T, const word A, const word B, size_t N)
2257	{
2258	CRYPTOPP_ASSERT(N>=2 && N%2==0);
2259
2260	if (N <= s_recursionLimit)
2261	s_pBot[N/4](R, A, B);
2262	else
2263	{
2264	const size_t N2 = N/2;
2265
2266	RecursiveMultiply(R, T, A0, B0, N2);
2267	RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
2268	Add(R1, R1, T0, N2);
2269	RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
2270	Add(R1, R1, T0, N2);
2271	}
2272	}
2273
2274	// R[N] --- upper half of A*B
2275	// T[2*N] - temporary work space
2276	// L[N] --- lower half of A*B
2277	// A[N] --- multiplier
2278	// B[N] --- multiplicant
2279
2280	void MultiplyTop(word R, word T, const word L, const word A, const word *B, size_t N)
2281	{
2282	CRYPTOPP_ASSERT(N>=2 && N%2==0);
2283
2284	if (N <= s_recursionLimit)
2285	s_pTop[N/4](R, A, B, L[N-1]);
2286	else
2287	{
2288	const size_t N2 = N/2;
2289
2290	size_t AN2 = Compare(A0, A1, N2) > 0 ? 0 : N2;
2291	Subtract(R0, A + AN2, A + (N2 ^ AN2), N2);
2292
2293	size_t BN2 = Compare(B0, B1, N2) > 0 ? 0 : N2;
2294	Subtract(R1, B + BN2, B + (N2 ^ BN2), N2);
2295
2296	RecursiveMultiply(T0, T2, R0, R1, N2);
2297	RecursiveMultiply(R0, T2, A1, B1, N2);
2298
2299	// now T[01] holds (A1-A0)(B0-B1) = A1B0+A0B1-A1B1-A0B0, R[01] holds A1B1
2300
2301	int t, c3;
2302	int c2 = Subtract(T2, L+N2, L, N2);
2303
2304	if (AN2 == BN2)
2305	{
2306	c2 -= Add(T2, T2, T0, N2);
2307	t = (Compare(T2, R0, N2) == -1);
2308	c3 = t - Subtract(T2, T2, T1, N2);
2309	}
2310	else
2311	{
2312	c2 += Subtract(T2, T2, T0, N2);
2313	t = (Compare(T2, R0, N2) == -1);
2314	c3 = t + Add(T2, T2, T1, N2);
2315	}
2316
2317	c2 += t;
2318	if (c2 >= 0)
2319	c3 += Increment(T2, N2, c2);
2320	else
2321	c3 -= Decrement(T2, N2, -c2);
2322	c3 += Add(R0, T2, R1, N2);
2323
2324	CRYPTOPP_ASSERT (c3 >= 0 && c3 <= 2);
2325	Increment(R1, N2, c3);
2326	}
2327	}
2328
2329	inline void Multiply(word R, word T, const word A, const word B, size_t N)
2330	{
2331	RecursiveMultiply(R, T, A, B, N);
2332	}
2333
2334	inline void Square(word R, word T, const word *A, size_t N)
2335	{
2336	RecursiveSquare(R, T, A, N);
2337	}
2338
2339	inline void MultiplyBottom(word R, word T, const word A, const word B, size_t N)
2340	{
2341	RecursiveMultiplyBottom(R, T, A, B, N);
2342	}
2343
2344	// R[NA+NB] - result = A*B
2345	// T[NA+NB] - temporary work space
2346	// A[NA] ---- multiplier
2347	// B[NB] ---- multiplicant
2348
2349	void AsymmetricMultiply(word R, word T, const word A, size_t NA, const word B, size_t NB)
2350	{
2351	if (NA == NB)
2352	{
2353	// Profiling tells us the original second case was dominant, so it was promoted to the first If statement.
2354	// The code change occurred at Commit dc99266599a0e72d.
2355	if (A != B)
2356	Multiply(R, T, A, B, NA);
2357	else
2358	Square(R, T, A, NA);
2359
2360	return;
2361	}
2362
2363	if (NA > NB)
2364	{
2365	std::swap(A, B);
2366	std::swap(NA, NB);
2367	}
2368
2369	CRYPTOPP_ASSERT(NB % NA == 0);
2370
2371	if (NA==2 && !A[1])
2372	{
2373	// Profiling tells us the original Default case was dominant, so it was promoted to the first Case statement.
2374	// The code change occurred at Commit dc99266599a0e72d.
2375	switch (A[0])
2376	{
2377	default:
2378	R[NB] = LinearMultiply(R, B, A[0], NB);
2379	R[NB+1] = 0;
2380	return;
2381	case 0:
2382	SetWords(R, 0, NB+2);
2383	return;
2384	case 1:
2385	CopyWords(R, B, NB);
2386	R[NB] = R[NB+1] = 0;
2387	return;
2388	}
2389	}
2390
2391	size_t i;
2392	if ((NB/NA)%2 == 0)
2393	{
2394	Multiply(R, T, A, B, NA);
2395	CopyWords(T+2*NA, R+NA, NA);
2396
2397	for (i=2NA; i<NB; i+=2NA)
2398	Multiply(T+NA+i, T, A, B+i, NA);
2399	for (i=NA; i<NB; i+=2*NA)
2400	Multiply(R+i, T, A, B+i, NA);
2401	}
2402	else
2403	{
2404	for (i=0; i<NB; i+=2*NA)
2405	Multiply(R+i, T, A, B+i, NA);
2406	for (i=NA; i<NB; i+=2*NA)
2407	Multiply(T+NA+i, T, A, B+i, NA);
2408	}
2409
2410	if (Add(R+NA, R+NA, T+2*NA, NB-NA))
2411	Increment(R+NB, NA);
2412	}
2413
2414	// R[N] ----- result = A inverse mod 2*(WORD_BITSN)
2415	// T[3*N/2] - temporary work space
2416	// A[N] ----- an odd number as input
2417
2418	void RecursiveInverseModPower2(word R, word T, const word *A, size_t N)
2419	{
2420	// Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
2421	// The code change occurred at Commit dc99266599a0e72d.
2422	if (N!=2)
2423	{
2424	const size_t N2 = N/2;
2425	RecursiveInverseModPower2(R0, T0, A0, N2);
2426	T0[0] = 1;
2427	SetWords(T0+1, 0, N2-1);
2428	MultiplyTop(R1, T1, T0, R0, A0, N2);
2429	MultiplyBottom(T0, T1, R0, A1, N2);
2430	Add(T0, R1, T0, N2);
2431	TwosComplement(T0, N2);
2432	MultiplyBottom(R1, T1, R0, T0, N2);
2433	}
2434	else
2435	{
2436	T[0] = AtomicInverseModPower2(A[0]);
2437	T[1] = 0;
2438	s_pBot[0](T+2, T, A);
2439	TwosComplement(T+2, 2);
2440	Increment(T+2, 2, 2);
2441	s_pBot[0](R, T, T+2);
2442	}
2443	}
2444
2445	// R[N] --- result = X/(2*(WORD_BITSN)) mod M
2446	// T[3*N] - temporary work space
2447	// X[2*N] - number to be reduced
2448	// M[N] --- modulus
2449	// U[N] --- multiplicative inverse of M mod 2*(WORD_BITSN)
2450
2451	void MontgomeryReduce(word R, word T, word X, const word M, const word *U, size_t N)
2452	{
2453	#if 1
2454	MultiplyBottom(R, T, X, U, N);
2455	MultiplyTop(T, T+N, X, R, M, N);
2456	word borrow = Subtract(T, X+N, T, N);
2457	// defend against timing attack by doing this Add even when not needed
2458	word carry = Add(T+N, T, M, N);
2459	CRYPTOPP_ASSERT(carry \| !borrow);
2460	CRYPTOPP_UNUSED(carry), CRYPTOPP_UNUSED(borrow);
2461	CopyWords(R, T + ((0-borrow) & N), N);
2462	#elif 0
2463	const word u = 0-U[0];
2464	Declare2Words(p)
2465	for (size_t i=0; i<N; i++)
2466	{
2467	const word t = u * X[i];
2468	word c = 0;
2469	for (size_t j=0; j<N; j+=2)
2470	{
2471	MultiplyWords(p, t, M[j]);
2472	Acc2WordsBy1(p, X[i+j]);
2473	Acc2WordsBy1(p, c);
2474	X[i+j] = LowWord(p);
2475	c = HighWord(p);
2476	MultiplyWords(p, t, M[j+1]);
2477	Acc2WordsBy1(p, X[i+j+1]);
2478	Acc2WordsBy1(p, c);
2479	X[i+j+1] = LowWord(p);
2480	c = HighWord(p);
2481	}
2482
2483	if (Increment(X+N+i, N-i, c))
2484	while (!Subtract(X+N, X+N, M, N)) {}
2485	}
2486
2487	memcpy(R, X+N, N*WORD_SIZE);
2488	#else
2489	__m64 u = _mm_cvtsi32_si64(0-U[0]), p;
2490	for (size_t i=0; i<N; i++)
2491	{
2492	__m64 t = _mm_cvtsi32_si64(X[i]);
2493	t = _mm_mul_su32(t, u);
2494	__m64 c = _mm_setzero_si64();
2495	for (size_t j=0; j<N; j+=2)
2496	{
2497	p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j]));
2498	p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j]));
2499	c = _mm_add_si64(c, p);
2500	X[i+j] = _mm_cvtsi64_si32(c);
2501	c = _mm_srli_si64(c, 32);
2502	p = _mm_mul_su32(t, _mm_cvtsi32_si64(M[j+1]));
2503	p = _mm_add_si64(p, _mm_cvtsi32_si64(X[i+j+1]));
2504	c = _mm_add_si64(c, p);
2505	X[i+j+1] = _mm_cvtsi64_si32(c);
2506	c = _mm_srli_si64(c, 32);
2507	}
2508
2509	if (Increment(X+N+i, N-i, _mm_cvtsi64_si32(c)))
2510	while (!Subtract(X+N, X+N, M, N)) {}
2511	}
2512
2513	memcpy(R, X+N, N*WORD_SIZE);
2514	_mm_empty();
2515	#endif
2516	}
2517
2518	// R[N] --- result = X/(2*(WORD_BITSN/2)) mod M
2519	// T[2*N] - temporary work space
2520	// X[2*N] - number to be reduced
2521	// M[N] --- modulus
2522	// U[N/2] - multiplicative inverse of M mod 2*(WORD_BITSN/2)
2523	// V[N] --- 2*(WORD_BITS3*N/2) mod M
2524
2525	void HalfMontgomeryReduce(word R, word T, const word X, const word M, const word U, const word V, size_t N)
2526	{
2527	CRYPTOPP_ASSERT(N%2==0 && N>=4);
2528
2529	#define M0 M
2530	#define M1 (M+N2)
2531	#define V0 V
2532	#define V1 (V+N2)
2533
2534	#define X0 X
2535	#define X1 (X+N2)
2536	#define X2 (X+N)
2537	#define X3 (X+N+N2)
2538
2539	const size_t N2 = N/2;
2540	Multiply(T0, T2, V0, X3, N2);
2541	int c2 = Add(T0, T0, X0, N);
2542	MultiplyBottom(T3, T2, T0, U, N2);
2543	MultiplyTop(T2, R, T0, T3, M0, N2);
2544	c2 -= Subtract(T2, T1, T2, N2);
2545	Multiply(T0, R, T3, M1, N2);
2546	c2 -= Subtract(T0, T2, T0, N2);
2547	int c3 = -(int)Subtract(T1, X2, T1, N2);
2548	Multiply(R0, T2, V1, X3, N2);
2549	c3 += Add(R, R, T, N);
2550
2551	if (c2>0)
2552	c3 += Increment(R1, N2);
2553	else if (c2<0)
2554	c3 -= Decrement(R1, N2, -c2);
2555
2556	CRYPTOPP_ASSERT(c3>=-1 && c3<=1);
2557	if (c3>0)
2558	Subtract(R, R, M, N);
2559	else if (c3<0)
2560	Add(R, R, M, N);
2561
2562	#undef M0
2563	#undef M1
2564	#undef V0
2565	#undef V1
2566
2567	#undef X0
2568	#undef X1
2569	#undef X2
2570	#undef X3
2571	}
2572
2573	#undef A0
2574	#undef A1
2575	#undef B0
2576	#undef B1
2577
2578	#undef T0
2579	#undef T1
2580	#undef T2
2581	#undef T3
2582
2583	#undef R0
2584	#undef R1
2585	#undef R2
2586	#undef R3
2587
2588	/*
2589	// do a 3 word by 2 word divide, returns quotient and leaves remainder in A
2590	static word SubatomicDivide(word *A, word B0, word B1)
2591	{
2592	// CRYPTOPP_ASSERT {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
2593	CRYPTOPP_ASSERT(A[2] < B1 \|\| (A[2]==B1 && A[1] < B0));
2594
2595	// estimate the quotient: do a 2 word by 1 word divide
2596	word Q;
2597	if (B1+1 == 0)
2598	Q = A[2];
2599	else
2600	Q = DWord(A[1], A[2]).DividedBy(B1+1);
2601
2602	// now subtract Q*B from A
2603	DWord p = DWord::Multiply(B0, Q);
2604	DWord u = (DWord) A[0] - p.GetLowHalf();
2605	A[0] = u.GetLowHalf();
2606	u = (DWord) A[1] - p.GetHighHalf() - u.GetHighHalfAsBorrow() - DWord::Multiply(B1, Q);
2607	A[1] = u.GetLowHalf();
2608	A[2] += u.GetHighHalf();
2609
2610	// Q <= actual quotient, so fix it
2611	while (A[2] \|\| A[1] > B1 \|\| (A[1]==B1 && A[0]>=B0))
2612	{
2613	u = (DWord) A[0] - B0;
2614	A[0] = u.GetLowHalf();
2615	u = (DWord) A[1] - B1 - u.GetHighHalfAsBorrow();
2616	A[1] = u.GetLowHalf();
2617	A[2] += u.GetHighHalf();
2618	Q++;
2619	CRYPTOPP_ASSERT(Q); // shouldn't overflow
2620	}
2621
2622	return Q;
2623	}
2624
2625	// do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
2626	static inline void AtomicDivide(word Q, const word A, const word *B)
2627	{
2628	if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2*(2WORD_BITS)
2629	{
2630	Q[0] = A[2];
2631	Q[1] = A[3];
2632	}
2633	else
2634	{
2635	word T[4];
2636	T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
2637	Q[1] = SubatomicDivide(T+1, B[0], B[1]);
2638	Q[0] = SubatomicDivide(T, B[0], B[1]);
2639
2640	#if CRYPTOPP_DEBUG
2641	// multiply quotient and divisor and add remainder, make sure it equals dividend
2642	CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] \|\| (T[1]==B[1] && T[0]<B[0])));
2643	word P[4];
2644	LowLevel::Multiply2(P, Q, B);
2645	Add(P, P, T, 4);
2646	CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2647	#endif
2648	}
2649	}
2650	*/
2651
2652	static inline void AtomicDivide(word Q, const word A, const word *B)
2653	{
2654	word T[4];
2655	DWord q = DivideFourWordsByTwo<word, DWord>(T, DWord(A[0], A[1]), DWord(A[2], A[3]), DWord(B[0], B[1]));
2656	Q[0] = q.GetLowHalf();
2657	Q[1] = q.GetHighHalf();
2658
2659	#if CRYPTOPP_DEBUG
2660	if (B[0] \|\| B[1])
2661	{
2662	// multiply quotient and divisor and add remainder, make sure it equals dividend
2663	CRYPTOPP_ASSERT(!T[2] && !T[3] && (T[1] < B[1] \|\| (T[1]==B[1] && T[0]<B[0])));
2664	word P[4];
2665	s_pMul[0](P, Q, B);
2666	Add(P, P, T, 4);
2667	CRYPTOPP_ASSERT(memcmp(P, A, 4*WORD_SIZE)==0);
2668	}
2669	#endif
2670	}
2671
2672	// for use by Divide(), corrects the underestimated quotient {Q1,Q0}
2673	static void CorrectQuotientEstimate(word R, word T, word Q, const word B, size_t N)
2674	{
2675	CRYPTOPP_ASSERT(N && N%2==0);
2676
2677	AsymmetricMultiply(T, T+N+2, Q, 2, B, N);
2678
2679	word borrow = Subtract(R, R, T, N+2);
2680	CRYPTOPP_ASSERT(!borrow && !R[N+1]);
2681	CRYPTOPP_UNUSED(borrow);
2682
2683	while (R[N] \|\| Compare(R, B, N) >= 0)
2684	{
2685	R[N] -= Subtract(R, R, B, N);
2686	Q[1] += (++Q[0]==0);
2687	CRYPTOPP_ASSERT(Q[0] \|\| Q[1]); // no overflow
2688	}
2689	}
2690
2691	// R[NB] -------- remainder = A%B
2692	// Q[NA-NB+2] --- quotient = A/B
2693	// T[NA+3*(NB+2)] - temp work space
2694	// A[NA] -------- dividend
2695	// B[NB] -------- divisor
2696
2697	void Divide(word R, word Q, word T, const word A, size_t NA, const word *B, size_t NB)
2698	{
2699	CRYPTOPP_ASSERT(NA && NB && NA%2==0 && NB%2==0);
2700	CRYPTOPP_ASSERT(B[NB-1] \|\| B[NB-2]);
2701	CRYPTOPP_ASSERT(NB <= NA);
2702
2703	// set up temporary work space
2704	word *const TA=T;
2705	word *const TB=T+NA+2;
2706	word *const TP=T+NA+2+NB;
2707
2708	// copy B into TB and normalize it so that TB has highest bit set to 1
2709	unsigned shiftWords = (B[NB-1]==0);
2710	TB[0] = TB[NB-1] = 0;
2711	CopyWords(TB+shiftWords, B, NB-shiftWords);
2712	unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
2713	CRYPTOPP_ASSERT(shiftBits < WORD_BITS);
2714	ShiftWordsLeftByBits(TB, NB, shiftBits);
2715
2716	// copy A into TA and normalize it
2717	TA[0] = TA[NA] = TA[NA+1] = 0;
2718	CopyWords(TA+shiftWords, A, NA);
2719	ShiftWordsLeftByBits(TA, NA+2, shiftBits);
2720
2721	if (TA[NA+1]==0 && TA[NA] <= 1)
2722	{
2723	Q[NA-NB+1] = Q[NA-NB] = 0;
2724	while (TA[NA] \|\| Compare(TA+NA-NB, TB, NB) >= 0)
2725	{
2726	TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
2727	++Q[NA-NB];
2728	}
2729	}
2730	else
2731	{
2732	NA+=2;
2733	CRYPTOPP_ASSERT(Compare(TA+NA-NB, TB, NB) < 0);
2734	}
2735
2736	word BT[2];
2737	BT[0] = TB[NB-2] + 1;
2738	BT[1] = TB[NB-1] + (BT[0]==0);
2739
2740	// start reducing TA mod TB, 2 words at a time
2741	for (size_t i=NA-2; i>=NB; i-=2)
2742	{
2743	AtomicDivide(Q+i-NB, TA+i-2, BT);
2744	CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
2745	}
2746
2747	// copy TA into R, and denormalize it
2748	CopyWords(R, TA+shiftWords, NB);
2749	ShiftWordsRightByBits(R, NB, shiftBits);
2750	}
2751
2752	static inline size_t EvenWordCount(const word *X, size_t N)
2753	{
2754	while (N && X[N-2]==0 && X[N-1]==0)
2755	N-=2;
2756	return N;
2757	}
2758
2759	// return k
2760	// R[N] --- result = A^(-1) * 2^k mod M
2761	// T[4*N] - temporary work space
2762	// A[NA] -- number to take inverse of
2763	// M[N] --- modulus
2764
2765	unsigned int AlmostInverse(word R, word T, const word A, size_t NA, const word M, size_t N)
2766	{
2767	CRYPTOPP_ASSERT(NA<=N && N && N%2==0);
2768
2769	word *b = T;
2770	word *c = T+N;
2771	word f = T+2N;
2772	word g = T+3N;
2773	size_t bcLen=2, fgLen=EvenWordCount(M, N);
2774	unsigned int k=0;
2775	bool s=false;
2776
2777	SetWords(T, 0, 3*N);
2778	b[0]=1;
2779	CopyWords(f, A, NA);
2780	CopyWords(g, M, N);
2781
2782	while (1)
2783	{
2784	word t=f[0];
2785	while (!t)
2786	{
2787	if (EvenWordCount(f, fgLen)==0)
2788	{
2789	SetWords(R, 0, N);
2790	return 0;
2791	}
2792
2793	ShiftWordsRightByWords(f, fgLen, 1);
2794	bcLen += 2 * (c[bcLen-1] != 0);
2795	CRYPTOPP_ASSERT(bcLen <= N);
2796	ShiftWordsLeftByWords(c, bcLen, 1);
2797	k+=WORD_BITS;
2798	t=f[0];
2799	}
2800
2801	// t must be non-0; otherwise undefined.
2802	unsigned int i = TrailingZeros(t);
2803	t >>= i;
2804	k += i;
2805
2806	if (t==1 && f[1]==0 && EvenWordCount(f+2, fgLen-2)==0)
2807	{
2808	if (s)
2809	Subtract(R, M, b, N);
2810	else
2811	CopyWords(R, b, N);
2812	return k;
2813	}
2814
2815	ShiftWordsRightByBits(f, fgLen, i);
2816	t = ShiftWordsLeftByBits(c, bcLen, i);
2817	c[bcLen] += t;
2818	bcLen += 2 * (t!=0);
2819	CRYPTOPP_ASSERT(bcLen <= N);
2820
2821	bool swap = Compare(f, g, fgLen)==-1;
2822	ConditionalSwapPointers(swap, f, g);
2823	ConditionalSwapPointers(swap, b, c);
2824	s ^= swap;
2825
2826	fgLen -= 2 * !(f[fgLen-2] \| f[fgLen-1]);
2827
2828	Subtract(f, f, g, fgLen);
2829	t = Add(b, b, c, bcLen);
2830	b[bcLen] += t;
2831	bcLen += 2*t;
2832	CRYPTOPP_ASSERT(bcLen <= N);
2833	}
2834	}
2835
2836	// R[N] - result = A/(2^k) mod M
2837	// A[N] - input
2838	// M[N] - modulus
2839
2840	void DivideByPower2Mod(word R, const word A, size_t k, const word *M, size_t N)
2841	{
2842	CopyWords(R, A, N);
2843
2844	while (k--)
2845	{
2846	if (R[0]%2==0)
2847	ShiftWordsRightByBits(R, N, 1);
2848	else
2849	{
2850	word carry = Add(R, R, M, N);
2851	ShiftWordsRightByBits(R, N, 1);
2852	R[N-1] += carry<<(WORD_BITS-1);
2853	}
2854	}
2855	}
2856
2857	// R[N] - result = A*(2^k) mod M
2858	// A[N] - input
2859	// M[N] - modulus
2860
2861	void MultiplyByPower2Mod(word R, const word A, size_t k, const word *M, size_t N)
2862	{
2863	CopyWords(R, A, N);
2864
2865	while (k--)
2866	if (ShiftWordsLeftByBits(R, N, 1) \|\| Compare(R, M, N)>=0)
2867	Subtract(R, R, M, N);
2868	}
2869
2870	// ******************************************************************
2871
2872	InitializeInteger::InitializeInteger()
2873	{
2874	if (!g_pAssignIntToInteger)
2875	{
2876	SetFunctionPointers();
2877	g_pAssignIntToInteger = (CryptoPP::PAssignIntToInteger)AssignIntToInteger;
2878	}
2879	}
2880
2881	static const unsigned int RoundupSizeTable[] = {2, 2, 2, 4, 4, 8, 8, 8, 8};
2882
2883	static inline size_t RoundupSize(size_t n)
2884	{
2885	if (n<=8)
2886	return RoundupSizeTable[n];
2887	else if (n<=16)
2888	return 16;
2889	else if (n<=32)
2890	return 32;
2891	else if (n<=64)
2892	return 64;
2893	else
2894	return size_t(1) << BitPrecision(n-1);
2895	}
2896
2897	Integer::Integer()
2898	: reg(2), sign(POSITIVE)
2899	{
2900	reg[0] = reg[1] = 0;
2901	}
2902
2903	Integer::Integer(const Integer& t)
2904	: reg(RoundupSize(t.WordCount())), sign(t.sign)
2905	{
2906	CopyWords(reg, t.reg, reg.size());
2907	}
2908
2909	Integer::Integer(Sign s, lword value)
2910	: reg(2), sign(s)
2911	{
2912	reg[0] = word(value);
2913	reg[1] = word(SafeRightShift<WORD_BITS>(value));
2914	}
2915
2916	Integer::Integer(signed long value)
2917	: reg(2)
2918	{
2919	if (value >= 0)
2920	sign = POSITIVE;
2921	else
2922	{
2923	sign = NEGATIVE;
2924	value = -value;
2925	}
2926	reg[0] = word(value);
2927	reg[1] = word(SafeRightShift<WORD_BITS>((unsigned long)value));
2928	}
2929
2930	Integer::Integer(Sign s, word high, word low)
2931	: reg(2), sign(s)
2932	{
2933	reg[0] = low;
2934	reg[1] = high;
2935	}
2936
2937	bool Integer::IsConvertableToLong() const
2938	{
2939	if (ByteCount() > sizeof(long))
2940	return false;
2941
2942	unsigned long value = (unsigned long)reg[0];
2943	value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2944
2945	if (sign==POSITIVE)
2946	return (signed long)value >= 0;
2947	else
2948	return -(signed long)value < 0;
2949	}
2950
2951	signed long Integer::ConvertToLong() const
2952	{
2953	CRYPTOPP_ASSERT(IsConvertableToLong());
2954
2955	unsigned long value = (unsigned long)reg[0];
2956	value += SafeLeftShift<WORD_BITS, unsigned long>((unsigned long)reg[1]);
2957	return sign==POSITIVE ? value : -(signed long)value;
2958	}
2959
2960	Integer::Integer(BufferedTransformation &encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2961	{
2962	CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER \|\| o == LITTLE_ENDIAN_ORDER);
2963
2964	if (o == LITTLE_ENDIAN_ORDER)
2965	{
2966	SecByteBlock block(byteCount);
2967	encodedInteger.Get(block, block.size());
2968	std::reverse(block.begin(), block.begin()+block.size());
2969
2970	Decode(block.begin(), block.size(), s);
2971	return;
2972	}
2973
2974	Decode(encodedInteger, byteCount, s);
2975	}
2976
2977	Integer::Integer(const byte *encodedInteger, size_t byteCount, Signedness s, ByteOrder o)
2978	{
2979	CRYPTOPP_ASSERT(o == BIG_ENDIAN_ORDER \|\| o == LITTLE_ENDIAN_ORDER);
2980
2981	if (o == LITTLE_ENDIAN_ORDER)
2982	{
2983	SecByteBlock block(byteCount);
2984	#if (CRYPTOPP_MSC_VERSION >= 1400)
2985	std::reverse_copy(encodedInteger, encodedInteger+byteCount,
2986	stdext::make_checked_array_iterator(block.begin(), block.size()));
2987	#else
2988	std::reverse_copy(encodedInteger, encodedInteger+byteCount, block.begin());
2989	#endif
2990	Decode(block.begin(), block.size(), s);
2991	return;
2992	}
2993
2994	Decode(encodedInteger, byteCount, s);
2995	}
2996
2997	Integer::Integer(BufferedTransformation &bt)
2998	{
2999	BERDecode(bt);
3000	}
3001
3002	Integer::Integer(RandomNumberGenerator &rng, size_t bitcount)
3003	{
3004	Randomize(rng, bitcount);
3005	}
3006
3007	Integer::Integer(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3008	{
3009	if (!Randomize(rng, min, max, rnType, equiv, mod))
3010	throw Integer::RandomNumberNotFound();
3011	}
3012
3013	Integer Integer::Power2(size_t e)
3014	{
3015	Integer r((word)0, BitsToWords(e+1));
3016	r.SetBit(e);
3017	return r;
3018	}
3019
3020	template <long i>
3021	struct NewInteger
3022	{
3023	Integer * operator()() const
3024	{
3025	return new Integer(i);
3026	}
3027	};
3028
3029	// File scope static due to subtle initialization problems in a threaded
3030	// Windows environment. See the comments for Singleton. Thanks DB.
3031	namespace { const Integer& s_zero = Singleton<Integer>().Ref(); }
3032	const Integer &Integer::Zero()
3033	{
3034	return s_zero;
3035	}
3036
3037	// File scope static due to subtle initialization problems in a threaded
3038	// Windows environment. See the comments for Singleton. Thanks DB.
3039	namespace { const Integer& s_one = Singleton<Integer, NewInteger<1> >().Ref(); }
3040	const Integer &Integer::One()
3041	{
3042	return s_one;
3043	}
3044
3045	// File scope static due to subtle initialization problems in a threaded
3046	// Windows environment. See the comments for Singleton. Thanks DB.
3047	namespace { const Integer& s_two = Singleton<Integer, NewInteger<2> >().Ref(); }
3048	const Integer &Integer::Two()
3049	{
3050	return s_two;
3051	}
3052
3053	bool Integer::operator!() const
3054	{
3055	return IsNegative() ? false : (reg[0]==0 && WordCount()==0);
3056	}
3057
3058	Integer& Integer::operator=(const Integer& t)
3059	{
3060	if (this != &t)
3061	{
3062	if (reg.size() != t.reg.size() \|\| t.reg[t.reg.size()/2] == 0)
3063	reg.New(RoundupSize(t.WordCount()));
3064	CopyWords(reg, t.reg, reg.size());
3065	sign = t.sign;
3066	}
3067	return *this;
3068	}
3069
3070	bool Integer::GetBit(size_t n) const
3071	{
3072	// Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3073	// The code change occurred at Commit dc99266599a0e72d.
3074	if (n/WORD_BITS < reg.size())
3075	return bool((reg[n/WORD_BITS] >> (n % WORD_BITS)) & 1);
3076	else
3077	return 0;
3078	}
3079
3080	void Integer::SetBit(size_t n, bool value)
3081	{
3082	if (value)
3083	{
3084	reg.CleanGrow(RoundupSize(BitsToWords(n+1)));
3085	reg[n/WORD_BITS] \|= (word(1) << (n%WORD_BITS));
3086	}
3087	else
3088	{
3089	if (n/WORD_BITS < reg.size())
3090	reg[n/WORD_BITS] &= ~(word(1) << (n%WORD_BITS));
3091	}
3092	}
3093
3094	byte Integer::GetByte(size_t n) const
3095	{
3096	// Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
3097	// The code change occurred at Commit dc99266599a0e72d.
3098	if (n/WORD_SIZE < reg.size())
3099	return byte(reg[n/WORD_SIZE] >> ((n%WORD_SIZE)*8));
3100	else
3101	return 0;
3102	}
3103
3104	void Integer::SetByte(size_t n, byte value)
3105	{
3106	reg.CleanGrow(RoundupSize(BytesToWords(n+1)));
3107	reg[n/WORD_SIZE] &= ~(word(0xff) << 8*(n%WORD_SIZE));
3108	reg[n/WORD_SIZE] \|= (word(value) << 8*(n%WORD_SIZE));
3109	}
3110
3111	lword Integer::GetBits(size_t i, size_t n) const
3112	{
3113	lword v = 0;
3114	CRYPTOPP_ASSERT(n <= sizeof(v)*8);
3115	for (unsigned int j=0; j<n; j++)
3116	v \|= lword(GetBit(i+j)) << j;
3117	return v;
3118	}
3119
3120	Integer Integer::operator-() const
3121	{
3122	Integer result(*this);
3123	result.Negate();
3124	return result;
3125	}
3126
3127	Integer Integer::AbsoluteValue() const
3128	{
3129	Integer result(*this);
3130	result.sign = POSITIVE;
3131	return result;
3132	}
3133
3134	void Integer::swap(Integer &a)
3135	{
3136	reg.swap(a.reg);
3137	std::swap(sign, a.sign);
3138	}
3139
3140	Integer::Integer(word value, size_t length)
3141	: reg(RoundupSize(length)), sign(POSITIVE)
3142	{
3143	reg[0] = value;
3144	SetWords(reg+1, 0, reg.size()-1);
3145	}
3146
3147	template <class T>
3148	static Integer StringToInteger(const T *str, ByteOrder order)
3149	{
3150	CRYPTOPP_ASSERT( order == BIG_ENDIAN_ORDER \|\| order == LITTLE_ENDIAN_ORDER );
3151
3152	int radix, sign = 1;
3153	// GCC workaround
3154	// std::char_traits<wchar_t>::length() not defined in GCC 3.2 and STLport 4.5.3
3155	unsigned int length;
3156	for (length = 0; str[length] != 0; length++) {}
3157
3158	Integer v;
3159
3160	if (length == 0)
3161	return Integer::Zero();
3162
3163	switch (str[length-1])
3164	{
3165	case 'h':
3166	case 'H':
3167	radix=16;
3168	break;
3169	case 'o':
3170	case 'O':
3171	radix=8;
3172	break;
3173	case 'b':
3174	case 'B':
3175	radix=2;
3176	break;
3177	default:
3178	radix=10;
3179	}
3180
3181	// 'str' is of length 1 or more
3182	if (str[0] == '-')
3183	{
3184	sign = -1;
3185	str += 1, length -= 1;
3186	}
3187
3188	if (length > 2 && str[0] == '0' && (str[1] == 'x' \|\| str[1] == 'X'))
3189	{
3190	radix = 16;
3191	str += 2, length -= 2;
3192	}
3193
3194	if (order == BIG_ENDIAN_ORDER)
3195	{
3196	for (unsigned int i=0; i<length; i++)
3197	{
3198	int digit, ch = static_cast<int>(str[i]);
3199
3200	// Profiling showd the second and third Else needed to be swapped
3201	// The code change occurred at Commit dc99266599a0e72d.
3202	if (ch >= '0' && ch <= '9')
3203	digit = ch - '0';
3204	else if (ch >= 'a' && ch <= 'f')
3205	digit = ch - 'a' + 10;
3206	else if (ch >= 'A' && ch <= 'F')
3207	digit = ch - 'A' + 10;
3208	else
3209	digit = radix;
3210
3211	if (digit < radix)
3212	{
3213	v *= radix;
3214	v += digit;
3215	}
3216	}
3217	}
3218	else if (radix == 16 && order == LITTLE_ENDIAN_ORDER)
3219	{
3220	// Nibble high, low and count
3221	unsigned int nh = 0, nl = 0, nc = 0;
3222	Integer position(Integer::One());
3223
3224	for (unsigned int i=0; i<length; i++)
3225	{
3226	int digit, ch = static_cast<int>(str[i]);
3227
3228	if (ch >= '0' && ch <= '9')
3229	digit = ch - '0';
3230	else if (ch >= 'a' && ch <= 'f')
3231	digit = ch - 'a' + 10;
3232	else if (ch >= 'A' && ch <= 'F')
3233	digit = ch - 'A' + 10;
3234	else
3235	digit = radix;
3236
3237	if (digit < radix)
3238	{
3239	if (nc++ == 0)
3240	nh = digit;
3241	else
3242	nl = digit;
3243
3244	if (nc == 2)
3245	{
3246	v += position * (nh << 4 \| nl);
3247	nc = 0, position <<= 8;
3248	}
3249	}
3250	}
3251
3252	if (nc == 1)
3253	v += nh * position;
3254	}
3255	else // LITTLE_ENDIAN_ORDER && radix != 16
3256	{
3257	for (int i=static_cast<int>(length)-1; i>=0; i--)
3258	{
3259	int digit, ch = static_cast<int>(str[i]);
3260
3261	if (ch >= '0' && ch <= '9')
3262	digit = ch - '0';
3263	else if (ch >= 'a' && ch <= 'f')
3264	digit = ch - 'a' + 10;
3265	else if (ch >= 'A' && ch <= 'F')
3266	digit = ch - 'A' + 10;
3267	else
3268	digit = radix;
3269
3270	if (digit < radix)
3271	{
3272	v *= radix;
3273	v += digit;
3274	}
3275	}
3276	}
3277
3278	if (sign == -1)
3279	v.Negate();
3280
3281	return v;
3282	}
3283
3284	Integer::Integer(const char *str, ByteOrder order)
3285	: reg(2), sign(POSITIVE)
3286	{
3287	*this = StringToInteger(str,order);
3288	}
3289
3290	Integer::Integer(const wchar_t *str, ByteOrder order)
3291	: reg(2), sign(POSITIVE)
3292	{
3293	*this = StringToInteger(str,order);
3294	}
3295
3296	unsigned int Integer::WordCount() const
3297	{
3298	return (unsigned int)CountWords(reg, reg.size());
3299	}
3300
3301	unsigned int Integer::ByteCount() const
3302	{
3303	unsigned wordCount = WordCount();
3304	if (wordCount)
3305	return (wordCount-1)*WORD_SIZE + BytePrecision(reg[wordCount-1]);
3306	else
3307	return 0;
3308	}
3309
3310	unsigned int Integer::BitCount() const
3311	{
3312	unsigned wordCount = WordCount();
3313	if (wordCount)
3314	return (wordCount-1)*WORD_BITS + BitPrecision(reg[wordCount-1]);
3315	else
3316	return 0;
3317	}
3318
3319	void Integer::Decode(const byte *input, size_t inputLen, Signedness s)
3320	{
3321	StringStore store(input, inputLen);
3322	Decode(store, inputLen, s);
3323	}
3324
3325	void Integer::Decode(BufferedTransformation &bt, size_t inputLen, Signedness s)
3326	{
3327	CRYPTOPP_ASSERT(bt.MaxRetrievable() >= inputLen);
3328
3329	byte b;
3330	bt.Peek(b);
3331	sign = ((s==SIGNED) && (b & 0x80)) ? NEGATIVE : POSITIVE;
3332
3333	while (inputLen>0 && (sign==POSITIVE ? b==0 : b==0xff))
3334	{
3335	bt.Skip(1);
3336	inputLen--;
3337	bt.Peek(b);
3338	}
3339
3340	// The call to CleanNew is optimized away above -O0/-Og.
3341	const size_t size = RoundupSize(BytesToWords(inputLen));
3342	reg.CleanNew(size);
3343
3344	CRYPTOPP_ASSERT(reg.SizeInBytes() >= inputLen);
3345	for (size_t i=inputLen; i > 0; i--)
3346	{
3347	bt.Get(b);
3348	reg[(i-1)/WORD_SIZE] \|= word(b) << ((i-1)%WORD_SIZE)*8;
3349	}
3350
3351	if (sign == NEGATIVE)
3352	{
3353	for (size_t i=inputLen; i<reg.size()*WORD_SIZE; i++)
3354	reg[i/WORD_SIZE] \|= word(0xff) << (i%WORD_SIZE)*8;
3355	TwosComplement(reg, reg.size());
3356	}
3357	}
3358
3359	size_t Integer::MinEncodedSize(Signedness signedness) const
3360	{
3361	// Profiling tells us the original second If was dominant, so it was promoted to the first If statement.
3362	// The code change occurred at Commit dc99266599a0e72d.
3363	unsigned int outputLen = STDMAX(1U, ByteCount());
3364	const bool pre = (signedness == UNSIGNED);
3365	if (!pre && NotNegative() && (GetByte(outputLen-1) & 0x80))
3366	outputLen++;
3367	if (pre)
3368	return outputLen;
3369	if (IsNegative() && this < -Power2(outputLen8-1))
3370	outputLen++;
3371	return outputLen;
3372	}
3373
3374	void Integer::Encode(byte *output, size_t outputLen, Signedness signedness) const
3375	{
3376	CRYPTOPP_ASSERT(output && outputLen);
3377	ArraySink sink(output, outputLen);
3378	Encode(sink, outputLen, signedness);
3379	}
3380
3381	void Integer::Encode(BufferedTransformation &bt, size_t outputLen, Signedness signedness) const
3382	{
3383	if (signedness == UNSIGNED \|\| NotNegative())
3384	{
3385	for (size_t i=outputLen; i > 0; i--)
3386	bt.Put(GetByte(i-1));
3387	}
3388	else
3389	{
3390	// take two's complement of *this
3391	Integer temp = Integer::Power2(8STDMAX((size_t)ByteCount(), outputLen)) + this;
3392	temp.Encode(bt, outputLen, UNSIGNED);
3393	}
3394	}
3395
3396	void Integer::DEREncode(BufferedTransformation &bt) const
3397	{
3398	DERGeneralEncoder enc(bt, INTEGER);
3399	Encode(enc, MinEncodedSize(SIGNED), SIGNED);
3400	enc.MessageEnd();
3401	}
3402
3403	void Integer::BERDecode(const byte *input, size_t len)
3404	{
3405	StringStore store(input, len);
3406	BERDecode(store);
3407	}
3408
3409	void Integer::BERDecode(BufferedTransformation &bt)
3410	{
3411	BERGeneralDecoder dec(bt, INTEGER);
3412	if (!dec.IsDefiniteLength() \|\| dec.MaxRetrievable() < dec.RemainingLength())
3413	BERDecodeError();
3414	Decode(dec, (size_t)dec.RemainingLength(), SIGNED);
3415	dec.MessageEnd();
3416	}
3417
3418	void Integer::DEREncodeAsOctetString(BufferedTransformation &bt, size_t length) const
3419	{
3420	DERGeneralEncoder enc(bt, OCTET_STRING);
3421	Encode(enc, length);
3422	enc.MessageEnd();
3423	}
3424
3425	void Integer::BERDecodeAsOctetString(BufferedTransformation &bt, size_t length)
3426	{
3427	BERGeneralDecoder dec(bt, OCTET_STRING);
3428	if (!dec.IsDefiniteLength() \|\| dec.RemainingLength() != length)
3429	BERDecodeError();
3430	Decode(dec, length);
3431	dec.MessageEnd();
3432	}
3433
3434	size_t Integer::OpenPGPEncode(byte *output, size_t len) const
3435	{
3436	ArraySink sink(output, len);
3437	return OpenPGPEncode(sink);
3438	}
3439
3440	size_t Integer::OpenPGPEncode(BufferedTransformation &bt) const
3441	{
3442	word16 bitCount = word16(BitCount());
3443	bt.PutWord16(bitCount);
3444	size_t byteCount = BitsToBytes(bitCount);
3445	Encode(bt, byteCount);
3446	return 2 + byteCount;
3447	}
3448
3449	void Integer::OpenPGPDecode(const byte *input, size_t len)
3450	{
3451	StringStore store(input, len);
3452	OpenPGPDecode(store);
3453	}
3454
3455	void Integer::OpenPGPDecode(BufferedTransformation &bt)
3456	{
3457	word16 bitCount;
3458	if (bt.GetWord16(bitCount) != 2 \|\| bt.MaxRetrievable() < BitsToBytes(bitCount))
3459	throw OpenPGPDecodeErr();
3460	Decode(bt, BitsToBytes(bitCount));
3461	}
3462
3463	void Integer::Randomize(RandomNumberGenerator &rng, size_t nbits)
3464	{
3465	const size_t nbytes = nbits/8 + 1;
3466	SecByteBlock buf(nbytes);
3467	rng.GenerateBlock(buf, nbytes);
3468	if (nbytes)
3469	buf[0] = (byte)Crop(buf[0], nbits % 8);
3470	Decode(buf, nbytes, UNSIGNED);
3471	}
3472
3473	void Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max)
3474	{
3475	if (min > max)
3476	throw InvalidArgument("Integer: Min must be no greater than Max");
3477
3478	Integer range = max - min;
3479	const unsigned int nbits = range.BitCount();
3480
3481	do
3482	{
3483	Randomize(rng, nbits);
3484	}
3485	while (*this > range);
3486
3487	*this += min;
3488	}
3489
3490	bool Integer::Randomize(RandomNumberGenerator &rng, const Integer &min, const Integer &max, RandomNumberType rnType, const Integer &equiv, const Integer &mod)
3491	{
3492	return GenerateRandomNoThrow(rng, MakeParameters("Min", min)("Max", max)("RandomNumberType", rnType)("EquivalentTo", equiv)("Mod", mod));
3493	}
3494
3495	class KDF2_RNG : public RandomNumberGenerator
3496	{
3497	public:
3498	KDF2_RNG(const byte *seed, size_t seedSize)
3499	: m_counter(0), m_counterAndSeed(seedSize + 4)
3500	{
3501	memcpy(m_counterAndSeed + 4, seed, seedSize);
3502	}
3503
3504	void GenerateBlock(byte *output, size_t size)
3505	{
3506	PutWord(false, BIG_ENDIAN_ORDER, m_counterAndSeed, m_counter);
3507	++m_counter;
3508	P1363_KDF2<SHA1>::DeriveKey(output, size, m_counterAndSeed, m_counterAndSeed.size(), NULL, 0);
3509	}
3510
3511	private:
3512	word32 m_counter;
3513	SecByteBlock m_counterAndSeed;
3514	};
3515
3516	bool Integer::GenerateRandomNoThrow(RandomNumberGenerator &i_rng, const NameValuePairs &params)
3517	{
3518	Integer min = params.GetValueWithDefault("Min", Integer::Zero());
3519	Integer max;
3520	if (!params.GetValue("Max", max))
3521	{
3522	int bitLength;
3523	if (params.GetIntValue("BitLength", bitLength))
3524	max = Integer::Power2(bitLength);
3525	else
3526	throw InvalidArgument("Integer: missing Max argument");
3527	}
3528	if (min > max)
3529	throw InvalidArgument("Integer: Min must be no greater than Max");
3530
3531	Integer equiv = params.GetValueWithDefault("EquivalentTo", Integer::Zero());
3532	Integer mod = params.GetValueWithDefault("Mod", Integer::One());
3533
3534	if (equiv.IsNegative() \|\| equiv >= mod)
3535	throw InvalidArgument("Integer: invalid EquivalentTo and/or Mod argument");
3536
3537	Integer::RandomNumberType rnType = params.GetValueWithDefault("RandomNumberType", Integer::ANY);
3538
3539	member_ptr<KDF2_RNG> kdf2Rng;
3540	ConstByteArrayParameter seed;
3541	if (params.GetValue(Name::Seed(), seed))
3542	{
3543	ByteQueue bq;
3544	DERSequenceEncoder seq(bq);
3545	min.DEREncode(seq);
3546	max.DEREncode(seq);
3547	equiv.DEREncode(seq);
3548	mod.DEREncode(seq);
3549	DEREncodeUnsigned(seq, rnType);
3550	DEREncodeOctetString(seq, seed.begin(), seed.size());
3551	seq.MessageEnd();
3552
3553	SecByteBlock finalSeed((size_t)bq.MaxRetrievable());
3554	bq.Get(finalSeed, finalSeed.size());
3555	kdf2Rng.reset(new KDF2_RNG(finalSeed.begin(), finalSeed.size()));
3556	}
3557	RandomNumberGenerator &rng = kdf2Rng.get() ? (RandomNumberGenerator &)*kdf2Rng : i_rng;
3558
3559	switch (rnType)
3560	{
3561	case ANY:
3562	if (mod == One())
3563	Randomize(rng, min, max);
3564	else
3565	{
3566	Integer min1 = min + (equiv-min)%mod;
3567	if (max < min1)
3568	return false;
3569	Randomize(rng, Zero(), (max - min1) / mod);
3570	this = mod;
3571	*this += min1;
3572	}
3573	return true;
3574
3575	case PRIME:
3576	{
3577	const PrimeSelector pSelector = params.GetValueWithDefault(Name::PointerToPrimeSelector(), (const PrimeSelector )NULL);
3578
3579	int i;
3580	i = 0;
3581	while (1)
3582	{
3583	if (++i==16)
3584	{
3585	// check if there are any suitable primes in [min, max]
3586	Integer first = min;
3587	if (FirstPrime(first, max, equiv, mod, pSelector))
3588	{
3589	// if there is only one suitable prime, we're done
3590	*this = first;
3591	if (!FirstPrime(first, max, equiv, mod, pSelector))
3592	return true;
3593	}
3594	else
3595	return false;
3596	}
3597
3598	Randomize(rng, min, max);
3599	if (FirstPrime(this, STDMIN(this+mod*PrimeSearchInterval(max), max), equiv, mod, pSelector))
3600	return true;
3601	}
3602	}
3603
3604	default:
3605	throw InvalidArgument("Integer: invalid RandomNumberType argument");
3606	}
3607	}
3608
3609	std::istream& operator>>(std::istream& in, Integer &a)
3610	{
3611	char c;
3612	unsigned int length = 0;
3613	SecBlock<char> str(length + 16);
3614
3615	std::ws(in);
3616
3617	do
3618	{
3619	in.read(&c, 1);
3620	str[length++] = c;
3621	if (length >= str.size())
3622	str.Grow(length + 16);
3623	}
3624	while (in && (c=='-' \|\| c=='x' \|\| (c>='0' && c<='9') \|\| (c>='a' && c<='f') \|\| (c>='A' && c<='F') \|\| c=='h' \|\| c=='H' \|\| c=='o' \|\| c=='O' \|\| c==',' \|\| c=='.'));
3625
3626	if (in.gcount())
3627	in.putback(c);
3628	str[length-1] = '\0';
3629	a = Integer(str);
3630
3631	return in;
3632	}
3633
3634	std::ostream& operator<<(std::ostream& out, const Integer &a)
3635	{
3636	// Get relevant conversion specifications from ostream.
3637	const long f = out.flags() & std::ios::basefield; // Get base digits.
3638	int base, block;
3639	char suffix;
3640	switch(f)
3641	{
3642	case std::ios::oct :
3643	base = 8;
3644	block = 8;
3645	suffix = 'o';
3646	break;
3647	case std::ios::hex :
3648	base = 16;
3649	block = 4;
3650	suffix = 'h';
3651	break;
3652	default :
3653	base = 10;
3654	block = 3;
3655	suffix = '.';
3656	}
3657
3658	Integer temp1=a, temp2;
3659
3660	if (a.IsNegative())
3661	{
3662	out << '-';
3663	temp1.Negate();
3664	}
3665
3666	if (!a)
3667	out << '0';
3668
3669	static const char upper[]="0123456789ABCDEF";
3670	static const char lower[]="0123456789abcdef";
3671
3672	const char* vec = (out.flags() & std::ios::uppercase) ? upper : lower;
3673	unsigned int i=0;
3674	SecBlock<char> s(a.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
3675
3676	while (!!temp1)
3677	{
3678	word digit;
3679	Integer::Divide(digit, temp2, temp1, base);
3680	s[i++]=vec[digit];
3681	temp1.swap(temp2);
3682	}
3683
3684	while (i--)
3685	{
3686	out << s[i];
3687	// if (i && !(i%block))
3688	// out << ",";
3689	}
3690
3691	#ifdef CRYPTOPP_USE_STD_SHOWBASE
3692	if (out.flags() & std::ios_base::showbase)
3693	out << suffix;
3694
3695	return out;
3696	#else
3697	return out << suffix;
3698	#endif
3699	}
3700
3701	Integer& Integer::operator++()
3702	{
3703	if (NotNegative())
3704	{
3705	if (Increment(reg, reg.size()))
3706	{
3707	reg.CleanGrow(2*reg.size());
3708	reg[reg.size()/2]=1;
3709	}
3710	}
3711	else
3712	{
3713	word borrow = Decrement(reg, reg.size());
3714	CRYPTOPP_ASSERT(!borrow);
3715	CRYPTOPP_UNUSED(borrow);
3716
3717	if (WordCount()==0)
3718	*this = Zero();
3719	}
3720	return *this;
3721	}
3722
3723	Integer& Integer::operator--()
3724	{
3725	if (IsNegative())
3726	{
3727	if (Increment(reg, reg.size()))
3728	{
3729	reg.CleanGrow(2*reg.size());
3730	reg[reg.size()/2]=1;
3731	}
3732	}
3733	else
3734	{
3735	if (Decrement(reg, reg.size()))
3736	*this = -One();
3737	}
3738	return *this;
3739	}
3740
3741	void PositiveAdd(Integer &sum, const Integer &a, const Integer& b)
3742	{
3743	// Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3744	// The code change occurred at Commit dc99266599a0e72d.
3745	int carry; const bool pre = (a.reg.size() == b.reg.size());
3746	if (!pre && a.reg.size() > b.reg.size())
3747	{
3748	carry = Add(sum.reg, a.reg, b.reg, b.reg.size());
3749	CopyWords(sum.reg+b.reg.size(), a.reg+b.reg.size(), a.reg.size()-b.reg.size());
3750	carry = Increment(sum.reg+b.reg.size(), a.reg.size()-b.reg.size(), carry);
3751	}
3752	else if (pre)
3753	{
3754	carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3755	}
3756	else
3757	{
3758	carry = Add(sum.reg, a.reg, b.reg, a.reg.size());
3759	CopyWords(sum.reg+a.reg.size(), b.reg+a.reg.size(), b.reg.size()-a.reg.size());
3760	carry = Increment(sum.reg+a.reg.size(), b.reg.size()-a.reg.size(), carry);
3761	}
3762
3763	if (carry)
3764	{
3765	sum.reg.CleanGrow(2*sum.reg.size());
3766	sum.reg[sum.reg.size()/2] = 1;
3767	}
3768	sum.sign = Integer::POSITIVE;
3769	}
3770
3771	void PositiveSubtract(Integer &diff, const Integer &a, const Integer& b)
3772	{
3773	unsigned aSize = a.WordCount();
3774	aSize += aSize%2;
3775	unsigned bSize = b.WordCount();
3776	bSize += bSize%2;
3777
3778	// Profiling tells us the original second Else If was dominant, so it was promoted to the first If statement.
3779	// The code change occurred at Commit dc99266599a0e72d.
3780	if (aSize > bSize)
3781	{
3782	word borrow = Subtract(diff.reg, a.reg, b.reg, bSize);
3783	CopyWords(diff.reg+bSize, a.reg+bSize, aSize-bSize);
3784	borrow = Decrement(diff.reg+bSize, aSize-bSize, borrow);
3785	CRYPTOPP_ASSERT(!borrow);
3786	diff.sign = Integer::POSITIVE;
3787	}
3788	else if (aSize == bSize)
3789	{
3790	if (Compare(a.reg, b.reg, aSize) >= 0)
3791	{
3792	Subtract(diff.reg, a.reg, b.reg, aSize);
3793	diff.sign = Integer::POSITIVE;
3794	}
3795	else
3796	{
3797	Subtract(diff.reg, b.reg, a.reg, aSize);
3798	diff.sign = Integer::NEGATIVE;
3799	}
3800	}
3801	else
3802	{
3803	word borrow = Subtract(diff.reg, b.reg, a.reg, aSize);
3804	CopyWords(diff.reg+aSize, b.reg+aSize, bSize-aSize);
3805	borrow = Decrement(diff.reg+aSize, bSize-aSize, borrow);
3806	CRYPTOPP_ASSERT(!borrow);
3807	diff.sign = Integer::NEGATIVE;
3808	}
3809	}
3810
3811	// MSVC .NET 2003 workaround
3812	template <class T> inline const T& STDMAX2(const T& a, const T& b)
3813	{
3814	return a < b ? b : a;
3815	}
3816
3817	Integer Integer::Plus(const Integer& b) const
3818	{
3819	Integer sum((word)0, STDMAX2(reg.size(), b.reg.size()));
3820	if (NotNegative())
3821	{
3822	if (b.NotNegative())
3823	PositiveAdd(sum, *this, b);
3824	else
3825	PositiveSubtract(sum, *this, b);
3826	}
3827	else
3828	{
3829	if (b.NotNegative())
3830	PositiveSubtract(sum, b, *this);
3831	else
3832	{
3833	PositiveAdd(sum, *this, b);
3834	sum.sign = Integer::NEGATIVE;
3835	}
3836	}
3837	return sum;
3838	}
3839
3840	Integer& Integer::operator+=(const Integer& t)
3841	{
3842	reg.CleanGrow(t.reg.size());
3843	if (NotNegative())
3844	{
3845	if (t.NotNegative())
3846	PositiveAdd(this, this, t);
3847	else
3848	PositiveSubtract(this, this, t);
3849	}
3850	else
3851	{
3852	if (t.NotNegative())
3853	PositiveSubtract(this, t, this);
3854	else
3855	{
3856	PositiveAdd(this, this, t);
3857	sign = Integer::NEGATIVE;
3858	}
3859	}
3860	return *this;
3861	}
3862
3863	Integer Integer::Minus(const Integer& b) const
3864	{
3865	Integer diff((word)0, STDMAX2(reg.size(), b.reg.size()));
3866	if (NotNegative())
3867	{
3868	if (b.NotNegative())
3869	PositiveSubtract(diff, *this, b);
3870	else
3871	PositiveAdd(diff, *this, b);
3872	}
3873	else
3874	{
3875	if (b.NotNegative())
3876	{
3877	PositiveAdd(diff, *this, b);
3878	diff.sign = Integer::NEGATIVE;
3879	}
3880	else
3881	PositiveSubtract(diff, b, *this);
3882	}
3883	return diff;
3884	}
3885
3886	Integer& Integer::operator-=(const Integer& t)
3887	{
3888	reg.CleanGrow(t.reg.size());
3889	if (NotNegative())
3890	{
3891	if (t.NotNegative())
3892	PositiveSubtract(this, this, t);
3893	else
3894	PositiveAdd(this, this, t);
3895	}
3896	else
3897	{
3898	if (t.NotNegative())
3899	{
3900	PositiveAdd(this, this, t);
3901	sign = Integer::NEGATIVE;
3902	}
3903	else
3904	PositiveSubtract(this, t, this);
3905	}
3906	return *this;
3907	}
3908
3909	Integer& Integer::operator<<=(size_t n)
3910	{
3911	const size_t wordCount = WordCount();
3912	const size_t shiftWords = n / WORD_BITS;
3913	const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3914
3915	reg.CleanGrow(RoundupSize(wordCount+BitsToWords(n)));
3916	ShiftWordsLeftByWords(reg, wordCount + shiftWords, shiftWords);
3917	ShiftWordsLeftByBits(reg+shiftWords, wordCount+BitsToWords(shiftBits), shiftBits);
3918	return *this;
3919	}
3920
3921	Integer& Integer::operator>>=(size_t n)
3922	{
3923	const size_t wordCount = WordCount();
3924	const size_t shiftWords = n / WORD_BITS;
3925	const unsigned int shiftBits = (unsigned int)(n % WORD_BITS);
3926
3927	ShiftWordsRightByWords(reg, wordCount, shiftWords);
3928	if (wordCount > shiftWords)
3929	ShiftWordsRightByBits(reg, wordCount-shiftWords, shiftBits);
3930	if (IsNegative() && WordCount()==0) // avoid -0
3931	*this = Zero();
3932	return *this;
3933	}
3934
3935	void PositiveMultiply(Integer &product, const Integer &a, const Integer &b)
3936	{
3937	size_t aSize = RoundupSize(a.WordCount());
3938	size_t bSize = RoundupSize(b.WordCount());
3939
3940	product.reg.CleanNew(RoundupSize(aSize+bSize));
3941	product.sign = Integer::POSITIVE;
3942
3943	IntegerSecBlock workspace(aSize + bSize);
3944	AsymmetricMultiply(product.reg, workspace, a.reg, aSize, b.reg, bSize);
3945	}
3946
3947	void Multiply(Integer &product, const Integer &a, const Integer &b)
3948	{
3949	PositiveMultiply(product, a, b);
3950
3951	if (a.NotNegative() != b.NotNegative())
3952	product.Negate();
3953	}
3954
3955	Integer Integer::Times(const Integer &b) const
3956	{
3957	Integer product;
3958	Multiply(product, *this, b);
3959	return product;
3960	}
3961
3962	/*
3963	void PositiveDivide(Integer &remainder, Integer &quotient,
3964	const Integer &dividend, const Integer &divisor)
3965	{
3966	remainder.reg.CleanNew(divisor.reg.size());
3967	remainder.sign = Integer::POSITIVE;
3968	quotient.reg.New(0);
3969	quotient.sign = Integer::POSITIVE;
3970	unsigned i=dividend.BitCount();
3971	while (i--)
3972	{
3973	word overflow = ShiftWordsLeftByBits(remainder.reg, remainder.reg.size(), 1);
3974	remainder.reg[0] \|= dividend[i];
3975	if (overflow \|\| remainder >= divisor)
3976	{
3977	Subtract(remainder.reg, remainder.reg, divisor.reg, remainder.reg.size());
3978	quotient.SetBit(i);
3979	}
3980	}
3981	}
3982	*/
3983
3984	void PositiveDivide(Integer &remainder, Integer &quotient,
3985	const Integer &a, const Integer &b)
3986	{
3987	unsigned aSize = a.WordCount();
3988	unsigned bSize = b.WordCount();
3989
3990	if (!bSize)
3991	throw Integer::DivideByZero();
3992
3993	if (aSize < bSize)
3994	{
3995	remainder = a;
3996	remainder.sign = Integer::POSITIVE;
3997	quotient = Integer::Zero();
3998	return;
3999	}
4000
4001	aSize += aSize%2; // round up to next even number
4002	bSize += bSize%2;
4003
4004	remainder.reg.CleanNew(RoundupSize(bSize));
4005	remainder.sign = Integer::POSITIVE;
4006	quotient.reg.CleanNew(RoundupSize(aSize-bSize+2));
4007	quotient.sign = Integer::POSITIVE;
4008
4009	IntegerSecBlock T(aSize+3*(bSize+2));
4010	Divide(remainder.reg, quotient.reg, T, a.reg, aSize, b.reg, bSize);
4011	}
4012
4013	void Integer::Divide(Integer &remainder, Integer &quotient, const Integer &dividend, const Integer &divisor)
4014	{
4015	PositiveDivide(remainder, quotient, dividend, divisor);
4016
4017	if (dividend.IsNegative())
4018	{
4019	quotient.Negate();
4020	if (remainder.NotZero())
4021	{
4022	--quotient;
4023	remainder = divisor.AbsoluteValue() - remainder;
4024	}
4025	}
4026
4027	if (divisor.IsNegative())
4028	quotient.Negate();
4029	}
4030
4031	void Integer::DivideByPowerOf2(Integer &r, Integer &q, const Integer &a, unsigned int n)
4032	{
4033	q = a;
4034	q >>= n;
4035
4036	const size_t wordCount = BitsToWords(n);
4037	if (wordCount <= a.WordCount())
4038	{
4039	r.reg.resize(RoundupSize(wordCount));
4040	CopyWords(r.reg, a.reg, wordCount);
4041	SetWords(r.reg+wordCount, 0, r.reg.size()-wordCount);
4042	if (n % WORD_BITS != 0)
4043	r.reg[wordCount-1] %= (word(1) << (n % WORD_BITS));
4044	}
4045	else
4046	{
4047	r.reg.resize(RoundupSize(a.WordCount()));
4048	CopyWords(r.reg, a.reg, r.reg.size());
4049	}
4050	r.sign = POSITIVE;
4051
4052	if (a.IsNegative() && r.NotZero())
4053	{
4054	--q;
4055	r = Power2(n) - r;
4056	}
4057	}
4058
4059	Integer Integer::DividedBy(const Integer &b) const
4060	{
4061	Integer remainder, quotient;
4062	Integer::Divide(remainder, quotient, *this, b);
4063	return quotient;
4064	}
4065
4066	Integer Integer::Modulo(const Integer &b) const
4067	{
4068	Integer remainder, quotient;
4069	Integer::Divide(remainder, quotient, *this, b);
4070	return remainder;
4071	}
4072
4073	void Integer::Divide(word &remainder, Integer &quotient, const Integer &dividend, word divisor)
4074	{
4075	if (!divisor)
4076	throw Integer::DivideByZero();
4077
4078	if ((divisor & (divisor-1)) == 0) // divisor is a power of 2
4079	{
4080	quotient = dividend >> (BitPrecision(divisor)-1);
4081	remainder = dividend.reg[0] & (divisor-1);
4082	return;
4083	}
4084
4085	unsigned int i = dividend.WordCount();
4086	quotient.reg.CleanNew(RoundupSize(i));
4087	remainder = 0;
4088	while (i--)
4089	{
4090	quotient.reg[i] = DWord(dividend.reg[i], remainder) / divisor;
4091	remainder = DWord(dividend.reg[i], remainder) % divisor;
4092	}
4093
4094	if (dividend.NotNegative())
4095	quotient.sign = POSITIVE;
4096	else
4097	{
4098	quotient.sign = NEGATIVE;
4099	if (remainder)
4100	{
4101	--quotient;
4102	remainder = divisor - remainder;
4103	}
4104	}
4105	}
4106
4107	Integer Integer::DividedBy(word b) const
4108	{
4109	word remainder;
4110	Integer quotient;
4111	Integer::Divide(remainder, quotient, *this, b);
4112	return quotient;
4113	}
4114
4115	word Integer::Modulo(word divisor) const
4116	{
4117	if (!divisor)
4118	throw Integer::DivideByZero();
4119
4120	word remainder;
4121
4122	// Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4123	// The code change occurred at Commit dc99266599a0e72d.
4124	if ((divisor & (divisor-1)) != 0) // divisor is not a power of 2
4125	{
4126	// Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4127	// The code change occurred at Commit dc99266599a0e72d.
4128	unsigned int i = WordCount();
4129	if (divisor > 5)
4130	{
4131	remainder = 0;
4132	while (i--)
4133	remainder = DWord(reg[i], remainder) % divisor;
4134	}
4135	else
4136	{
4137	DWord sum(0, 0);
4138	while (i--)
4139	sum += reg[i];
4140	remainder = sum % divisor;
4141	}
4142	}
4143	else // divisor is a power of 2
4144	{
4145	remainder = reg[0] & (divisor-1);
4146	}
4147
4148	if (IsNegative() && remainder)
4149	remainder = divisor - remainder;
4150
4151	return remainder;
4152	}
4153
4154	void Integer::Negate()
4155	{
4156	if (!!(this)) // don't flip sign if this==0
4157	sign = Sign(1-sign);
4158	}
4159
4160	int Integer::PositiveCompare(const Integer& t) const
4161	{
4162	// Profiling tells us the original Else was dominant, so it was promoted to the first If statement.
4163	// The code change occurred at Commit dc99266599a0e72d.
4164	const unsigned size = WordCount(), tSize = t.WordCount();
4165	if (size != tSize)
4166	return size > tSize ? 1 : -1;
4167	else
4168	return CryptoPP::Compare(reg, t.reg, size);
4169	}
4170
4171	int Integer::Compare(const Integer& t) const
4172	{
4173	if (NotNegative())
4174	{
4175	if (t.NotNegative())
4176	return PositiveCompare(t);
4177	else
4178	return 1;
4179	}
4180	else
4181	{
4182	if (t.NotNegative())
4183	return -1;
4184	else
4185	return -PositiveCompare(t);
4186	}
4187	}
4188
4189	Integer Integer::SquareRoot() const
4190	{
4191	if (!IsPositive())
4192	return Zero();
4193
4194	// overestimate square root
4195	Integer x, y = Power2((BitCount()+1)/2);
4196	CRYPTOPP_ASSERT(yy >= this);
4197
4198	do
4199	{
4200	x = y;
4201	y = (x + *this/x) >> 1;
4202	} while (y<x);
4203
4204	return x;
4205	}
4206
4207	bool Integer::IsSquare() const
4208	{
4209	Integer r = SquareRoot();
4210	return *this == r.Squared();
4211	}
4212
4213	bool Integer::IsUnit() const
4214	{
4215	return (WordCount() == 1) && (reg[0] == 1);
4216	}
4217
4218	Integer Integer::MultiplicativeInverse() const
4219	{
4220	return IsUnit() ? *this : Zero();
4221	}
4222
4223	Integer a_times_b_mod_c(const Integer &x, const Integer& y, const Integer& m)
4224	{
4225	return x*y%m;
4226	}
4227
4228	Integer a_exp_b_mod_c(const Integer &x, const Integer& e, const Integer& m)
4229	{
4230	ModularArithmetic mr(m);
4231	return mr.Exponentiate(x, e);
4232	}
4233
4234	Integer Integer::Gcd(const Integer &a, const Integer &b)
4235	{
4236	return EuclideanDomainOf<Integer>().Gcd(a, b);
4237	}
4238
4239	Integer Integer::InverseMod(const Integer &m) const
4240	{
4241	CRYPTOPP_ASSERT(m.NotNegative());
4242
4243	if (IsNegative())
4244	return Modulo(m).InverseMod(m);
4245
4246	if (m.IsEven())
4247	{
4248	if (!m \|\| IsEven())
4249	return Zero(); // no inverse
4250	if (*this == One())
4251	return One();
4252
4253	Integer u = m.Modulo(this).InverseMod(this);
4254	return !u ? Zero() : (m(this-u)+1)/(*this);
4255	}
4256
4257	SecBlock<word> T(m.reg.size() * 4);
4258	Integer r((word)0, m.reg.size());
4259	unsigned k = AlmostInverse(r.reg, T, reg, reg.size(), m.reg, m.reg.size());
4260	DivideByPower2Mod(r.reg, r.reg, k, m.reg, m.reg.size());
4261	return r;
4262	}
4263
4264	word Integer::InverseMod(word mod) const
4265	{
4266	word g0 = mod, g1 = *this % mod;
4267	word v0 = 0, v1 = 1;
4268	word y;
4269
4270	while (g1)
4271	{
4272	if (g1 == 1)
4273	return v1;
4274	y = g0 / g1;
4275	g0 = g0 % g1;
4276	v0 += y * v1;
4277
4278	if (!g0)
4279	break;
4280	if (g0 == 1)
4281	return mod-v0;
4282	y = g1 / g0;
4283	g1 = g1 % g0;
4284	v1 += y * v0;
4285	}
4286	return 0;
4287	}
4288
4289	// ********************************************************
4290
4291	ModularArithmetic::ModularArithmetic(BufferedTransformation &bt)
4292	{
4293	BERSequenceDecoder seq(bt);
4294	OID oid(seq);
4295	if (oid != ASN1::prime_field())
4296	BERDecodeError();
4297	m_modulus.BERDecode(seq);
4298	seq.MessageEnd();
4299	m_result.reg.resize(m_modulus.reg.size());
4300	}
4301
4302	void ModularArithmetic::DEREncode(BufferedTransformation &bt) const
4303	{
4304	DERSequenceEncoder seq(bt);
4305	ASN1::prime_field().DEREncode(seq);
4306	m_modulus.DEREncode(seq);
4307	seq.MessageEnd();
4308	}
4309
4310	void ModularArithmetic::DEREncodeElement(BufferedTransformation &out, const Element &a) const
4311	{
4312	a.DEREncodeAsOctetString(out, MaxElementByteLength());
4313	}
4314
4315	void ModularArithmetic::BERDecodeElement(BufferedTransformation &in, Element &a) const
4316	{
4317	a.BERDecodeAsOctetString(in, MaxElementByteLength());
4318	}
4319
4320	const Integer& ModularArithmetic::Half(const Integer &a) const
4321	{
4322	if (a.reg.size()==m_modulus.reg.size())
4323	{
4324	CryptoPP::DivideByPower2Mod(m_result.reg.begin(), a.reg, 1, m_modulus.reg, a.reg.size());
4325	return m_result;
4326	}
4327	else
4328	return m_result1 = (a.IsEven() ? (a >> 1) : ((a+m_modulus) >> 1));
4329	}
4330
4331	const Integer& ModularArithmetic::Add(const Integer &a, const Integer &b) const
4332	{
4333	if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4334	{
4335	if (CryptoPP::Add(m_result.reg.begin(), a.reg, b.reg, a.reg.size())
4336	\|\| Compare(m_result.reg, m_modulus.reg, a.reg.size()) >= 0)
4337	{
4338	CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4339	}
4340	return m_result;
4341	}
4342	else
4343	{
4344	m_result1 = a+b;
4345	if (m_result1 >= m_modulus)
4346	m_result1 -= m_modulus;
4347	return m_result1;
4348	}
4349	}
4350
4351	Integer& ModularArithmetic::Accumulate(Integer &a, const Integer &b) const
4352	{
4353	if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4354	{
4355	if (CryptoPP::Add(a.reg, a.reg, b.reg, a.reg.size())
4356	\|\| Compare(a.reg, m_modulus.reg, a.reg.size()) >= 0)
4357	{
4358	CryptoPP::Subtract(a.reg, a.reg, m_modulus.reg, a.reg.size());
4359	}
4360	}
4361	else
4362	{
4363	a+=b;
4364	if (a>=m_modulus)
4365	a-=m_modulus;
4366	}
4367
4368	return a;
4369	}
4370
4371	const Integer& ModularArithmetic::Subtract(const Integer &a, const Integer &b) const
4372	{
4373	if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4374	{
4375	if (CryptoPP::Subtract(m_result.reg.begin(), a.reg, b.reg, a.reg.size()))
4376	CryptoPP::Add(m_result.reg.begin(), m_result.reg, m_modulus.reg, a.reg.size());
4377	return m_result;
4378	}
4379	else
4380	{
4381	m_result1 = a-b;
4382	if (m_result1.IsNegative())
4383	m_result1 += m_modulus;
4384	return m_result1;
4385	}
4386	}
4387
4388	Integer& ModularArithmetic::Reduce(Integer &a, const Integer &b) const
4389	{
4390	if (a.reg.size()==m_modulus.reg.size() && b.reg.size()==m_modulus.reg.size())
4391	{
4392	if (CryptoPP::Subtract(a.reg, a.reg, b.reg, a.reg.size()))
4393	CryptoPP::Add(a.reg, a.reg, m_modulus.reg, a.reg.size());
4394	}
4395	else
4396	{
4397	a-=b;
4398	if (a.IsNegative())
4399	a+=m_modulus;
4400	}
4401
4402	return a;
4403	}
4404
4405	const Integer& ModularArithmetic::Inverse(const Integer &a) const
4406	{
4407	if (!a)
4408	return a;
4409
4410	CopyWords(m_result.reg.begin(), m_modulus.reg, m_modulus.reg.size());
4411	if (CryptoPP::Subtract(m_result.reg.begin(), m_result.reg, a.reg, a.reg.size()))
4412	Decrement(m_result.reg.begin()+a.reg.size(), m_modulus.reg.size()-a.reg.size());
4413
4414	return m_result;
4415	}
4416
4417	Integer ModularArithmetic::CascadeExponentiate(const Integer &x, const Integer &e1, const Integer &y, const Integer &e2) const
4418	{
4419	if (m_modulus.IsOdd())
4420	{
4421	MontgomeryRepresentation dr(m_modulus);
4422	return dr.ConvertOut(dr.CascadeExponentiate(dr.ConvertIn(x), e1, dr.ConvertIn(y), e2));
4423	}
4424	else
4425	return AbstractRing<Integer>::CascadeExponentiate(x, e1, y, e2);
4426	}
4427
4428	void ModularArithmetic::SimultaneousExponentiate(Integer results, const Integer &base, const Integer exponents, unsigned int exponentsCount) const
4429	{
4430	if (m_modulus.IsOdd())
4431	{
4432	MontgomeryRepresentation dr(m_modulus);
4433	dr.SimultaneousExponentiate(results, dr.ConvertIn(base), exponents, exponentsCount);
4434	for (unsigned int i=0; i<exponentsCount; i++)
4435	results[i] = dr.ConvertOut(results[i]);
4436	}
4437	else
4438	AbstractRing<Integer>::SimultaneousExponentiate(results, base, exponents, exponentsCount);
4439	}
4440
4441	MontgomeryRepresentation::MontgomeryRepresentation(const Integer &m) // modulus must be odd
4442	: ModularArithmetic(m),
4443	m_u((word)0, m_modulus.reg.size()),
4444	m_workspace(5*m_modulus.reg.size())
4445	{
4446	if (!m_modulus.IsOdd())
4447	throw InvalidArgument("MontgomeryRepresentation: Montgomery representation requires an odd modulus");
4448
4449	RecursiveInverseModPower2(m_u.reg, m_workspace, m_modulus.reg, m_modulus.reg.size());
4450	}
4451
4452	const Integer& MontgomeryRepresentation::Multiply(const Integer &a, const Integer &b) const
4453	{
4454	word *const T = m_workspace.begin();
4455	word *const R = m_result.reg.begin();
4456	const size_t N = m_modulus.reg.size();
4457	CRYPTOPP_ASSERT(a.reg.size()<=N && b.reg.size()<=N);
4458
4459	AsymmetricMultiply(T, T+2*N, a.reg, a.reg.size(), b.reg, b.reg.size());
4460	SetWords(T+a.reg.size()+b.reg.size(), 0, 2*N-a.reg.size()-b.reg.size());
4461	MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4462	return m_result;
4463	}
4464
4465	const Integer& MontgomeryRepresentation::Square(const Integer &a) const
4466	{
4467	word *const T = m_workspace.begin();
4468	word *const R = m_result.reg.begin();
4469	const size_t N = m_modulus.reg.size();
4470	CRYPTOPP_ASSERT(a.reg.size()<=N);
4471
4472	CryptoPP::Square(T, T+2*N, a.reg, a.reg.size());
4473	SetWords(T+2a.reg.size(), 0, 2N-2*a.reg.size());
4474	MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4475	return m_result;
4476	}
4477
4478	Integer MontgomeryRepresentation::ConvertOut(const Integer &a) const
4479	{
4480	word *const T = m_workspace.begin();
4481	word *const R = m_result.reg.begin();
4482	const size_t N = m_modulus.reg.size();
4483	CRYPTOPP_ASSERT(a.reg.size()<=N);
4484
4485	CopyWords(T, a.reg, a.reg.size());
4486	SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4487	MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4488	return m_result;
4489	}
4490
4491	const Integer& MontgomeryRepresentation::MultiplicativeInverse(const Integer &a) const
4492	{
4493	// return (EuclideanMultiplicativeInverse(a, modulus)<<(2WORD_BITSmodulus.reg.size()))%modulus;
4494	word *const T = m_workspace.begin();
4495	word *const R = m_result.reg.begin();
4496	const size_t N = m_modulus.reg.size();
4497	CRYPTOPP_ASSERT(a.reg.size()<=N);
4498
4499	CopyWords(T, a.reg, a.reg.size());
4500	SetWords(T+a.reg.size(), 0, 2*N-a.reg.size());
4501	MontgomeryReduce(R, T+2*N, T, m_modulus.reg, m_u.reg, N);
4502	unsigned k = AlmostInverse(R, T, R, N, m_modulus.reg, N);
4503
4504	// cout << "k=" << k << " N32=" << 32N << endl;
4505
4506	if (k>N*WORD_BITS)
4507	DivideByPower2Mod(R, R, k-N*WORD_BITS, m_modulus.reg, N);
4508	else
4509	MultiplyByPower2Mod(R, R, N*WORD_BITS-k, m_modulus.reg, N);
4510
4511	return m_result;
4512	}
4513
4514	// Specialization declared in misc.h to allow us to print integers
4515	// with additional control options, like arbirary bases and uppercase.
4516	template <> CRYPTOPP_DLL
4517	std::string IntToString<Integer>(Integer value, unsigned int base)
4518	{
4519	// Hack... set the high bit for uppercase. Set the next bit fo a suffix.
4520	static const unsigned int BIT_32 = (1U << 31);
4521	const bool UPPER = !!(base & BIT_32);
4522	static const unsigned int BIT_31 = (1U << 30);
4523	const bool BASE = !!(base & BIT_31);
4524
4525	const char CH = UPPER ? 'A' : 'a';
4526	base &= ~(BIT_32\|BIT_31);
4527	CRYPTOPP_ASSERT(base >= 2 && base <= 32);
4528
4529	if (value == 0)
4530	return "0";
4531
4532	bool negative = false, zero = false;
4533	if (value.IsNegative())
4534	{
4535	negative = true;
4536	value.Negate();
4537	}
4538
4539	if (!value)
4540	zero = true;
4541
4542	SecBlock<char> s(value.BitCount() / (SaturatingSubtract1(BitPrecision(base),1U)) + 1);
4543	Integer temp;
4544
4545	unsigned int i=0;
4546	while (!!value)
4547	{
4548	word digit;
4549	Integer::Divide(digit, temp, value, word(base));
4550	s[i++]=char((digit < 10 ? '0' : (CH - 10)) + digit);
4551	value.swap(temp);
4552	}
4553
4554	std::string result;
4555	result.reserve(i+2);
4556
4557	if (negative)
4558	result += '-';
4559
4560	if (zero)
4561	result += '0';
4562
4563	while (i--)
4564	result += s[i];
4565
4566	if (BASE)
4567	{
4568	if (base == 10)
4569	result += '.';
4570	else if (base == 16)
4571	result += 'h';
4572	else if (base == 8)
4573	result += 'o';
4574	else if (base == 2)
4575	result += 'b';
4576	}
4577
4578	return result;
4579	}
4580
4581	// Specialization declared in misc.h to avoid Coverity findings.
4582	template <> CRYPTOPP_DLL
4583	std::string IntToString<word64>(word64 value, unsigned int base)
4584	{
4585	// Hack... set the high bit for uppercase.
4586	static const unsigned int HIGH_BIT = (1U << 31);
4587	const char CH = !!(base & HIGH_BIT) ? 'A' : 'a';
4588	base &= ~HIGH_BIT;
4589
4590	CRYPTOPP_ASSERT(base >= 2);
4591	if (value == 0)
4592	return "0";
4593
4594	std::string result;
4595	while (value > 0)
4596	{
4597	word64 digit = value % base;
4598	result = char((digit < 10 ? '0' : (CH - 10)) + digit) + result;
4599	value /= base;
4600	}
4601	return result;
4602	}
4603
4604	NAMESPACE_END
4605
4606	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats: