1 | // salsa.cpp - written and placed in the public domain by Wei Dai |
---|
2 | |
---|
3 | // use "cl /EP /P /DCRYPTOPP_GENERATE_X64_MASM salsa.cpp" to generate MASM code |
---|
4 | |
---|
5 | #include "pch.h" |
---|
6 | #include "config.h" |
---|
7 | |
---|
8 | #ifndef CRYPTOPP_GENERATE_X64_MASM |
---|
9 | |
---|
10 | #include "salsa.h" |
---|
11 | #include "argnames.h" |
---|
12 | #include "misc.h" |
---|
13 | #include "cpu.h" |
---|
14 | |
---|
15 | #if CRYPTOPP_MSC_VERSION |
---|
16 | # pragma warning(disable: 4702 4740) |
---|
17 | #endif |
---|
18 | |
---|
19 | // TODO: work around GCC 4.8+ issue with SSE2 ASM until the exact details are known |
---|
20 | // and fix is released. Duplicate with "valgrind ./cryptest.exe tv salsa" |
---|
21 | // Clang due to "Inline assembly operands don't work with .intel_syntax" |
---|
22 | // https://llvm.org/bugs/show_bug.cgi?id=24232 |
---|
23 | #if defined(CRYPTOPP_DISABLE_SALSA_ASM) |
---|
24 | # undef CRYPTOPP_X86_ASM_AVAILABLE |
---|
25 | # undef CRYPTOPP_X32_ASM_AVAILABLE |
---|
26 | # undef CRYPTOPP_X64_ASM_AVAILABLE |
---|
27 | # undef CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
---|
28 | # undef CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE |
---|
29 | # define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0 |
---|
30 | # define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0 |
---|
31 | #endif |
---|
32 | |
---|
33 | NAMESPACE_BEGIN(CryptoPP) |
---|
34 | |
---|
35 | #if CRYPTOPP_DEBUG && !defined(CRYPTOPP_DOXYGEN_PROCESSING) |
---|
36 | void Salsa20_TestInstantiations() |
---|
37 | { |
---|
38 | Salsa20::Encryption x1; |
---|
39 | XSalsa20::Encryption x2; |
---|
40 | } |
---|
41 | #endif |
---|
42 | |
---|
43 | void Salsa20_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length) |
---|
44 | { |
---|
45 | m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20); |
---|
46 | |
---|
47 | if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20)) |
---|
48 | throw InvalidRounds(Salsa20::StaticAlgorithmName(), m_rounds); |
---|
49 | |
---|
50 | // m_state is reordered for SSE2 |
---|
51 | GetBlock<word32, LittleEndian> get1(key); |
---|
52 | get1(m_state[13])(m_state[10])(m_state[7])(m_state[4]); |
---|
53 | GetBlock<word32, LittleEndian> get2(key + length - 16); |
---|
54 | get2(m_state[15])(m_state[12])(m_state[9])(m_state[6]); |
---|
55 | |
---|
56 | // "expand 16-byte k" or "expand 32-byte k" |
---|
57 | m_state[0] = 0x61707865; |
---|
58 | m_state[1] = (length == 16) ? 0x3120646e : 0x3320646e; |
---|
59 | m_state[2] = (length == 16) ? 0x79622d36 : 0x79622d32; |
---|
60 | m_state[3] = 0x6b206574; |
---|
61 | } |
---|
62 | |
---|
63 | void Salsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length) |
---|
64 | { |
---|
65 | CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length); |
---|
66 | CRYPTOPP_ASSERT(length==8); |
---|
67 | |
---|
68 | GetBlock<word32, LittleEndian> get(IV); |
---|
69 | get(m_state[14])(m_state[11]); |
---|
70 | m_state[8] = m_state[5] = 0; |
---|
71 | } |
---|
72 | |
---|
73 | void Salsa20_Policy::SeekToIteration(lword iterationCount) |
---|
74 | { |
---|
75 | m_state[8] = (word32)iterationCount; |
---|
76 | m_state[5] = (word32)SafeRightShift<32>(iterationCount); |
---|
77 | } |
---|
78 | |
---|
79 | #if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64) && !defined(CRYPTOPP_DISABLE_SALSA_ASM) |
---|
80 | unsigned int Salsa20_Policy::GetAlignment() const |
---|
81 | { |
---|
82 | #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
---|
83 | if (HasSSE2()) |
---|
84 | return 16; |
---|
85 | else |
---|
86 | #endif |
---|
87 | return GetAlignmentOf<word32>(); |
---|
88 | } |
---|
89 | |
---|
90 | unsigned int Salsa20_Policy::GetOptimalBlockSize() const |
---|
91 | { |
---|
92 | #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
---|
93 | if (HasSSE2()) |
---|
94 | return 4*BYTES_PER_ITERATION; |
---|
95 | else |
---|
96 | #endif |
---|
97 | return BYTES_PER_ITERATION; |
---|
98 | } |
---|
99 | #endif |
---|
100 | |
---|
101 | #ifdef CRYPTOPP_X64_MASM_AVAILABLE |
---|
102 | extern "C" { |
---|
103 | void Salsa20_OperateKeystream(byte *output, const byte *input, size_t iterationCount, int rounds, void *state); |
---|
104 | } |
---|
105 | #endif |
---|
106 | |
---|
107 | #if CRYPTOPP_MSC_VERSION |
---|
108 | # pragma warning(disable: 4731) // frame pointer register 'ebp' modified by inline assembly code |
---|
109 | #endif |
---|
110 | |
---|
111 | void Salsa20_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount) |
---|
112 | { |
---|
113 | #endif // #ifdef CRYPTOPP_GENERATE_X64_MASM |
---|
114 | |
---|
115 | #ifdef CRYPTOPP_X64_MASM_AVAILABLE |
---|
116 | Salsa20_OperateKeystream(output, input, iterationCount, m_rounds, m_state.data()); |
---|
117 | return; |
---|
118 | #endif |
---|
119 | |
---|
120 | #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
---|
121 | #ifdef CRYPTOPP_GENERATE_X64_MASM |
---|
122 | ALIGN 8 |
---|
123 | Salsa20_OperateKeystream PROC FRAME |
---|
124 | mov r10, [rsp + 5*8] ; state |
---|
125 | alloc_stack(10*16 + 32*16 + 8) |
---|
126 | save_xmm128 xmm6, 0200h |
---|
127 | save_xmm128 xmm7, 0210h |
---|
128 | save_xmm128 xmm8, 0220h |
---|
129 | save_xmm128 xmm9, 0230h |
---|
130 | save_xmm128 xmm10, 0240h |
---|
131 | save_xmm128 xmm11, 0250h |
---|
132 | save_xmm128 xmm12, 0260h |
---|
133 | save_xmm128 xmm13, 0270h |
---|
134 | save_xmm128 xmm14, 0280h |
---|
135 | save_xmm128 xmm15, 0290h |
---|
136 | .endprolog |
---|
137 | |
---|
138 | #define REG_output rcx |
---|
139 | #define REG_input rdx |
---|
140 | #define REG_iterationCount r8 |
---|
141 | #define REG_state r10 |
---|
142 | #define REG_rounds e9d |
---|
143 | #define REG_roundsLeft eax |
---|
144 | #define REG_temp32 r11d |
---|
145 | #define REG_temp r11 |
---|
146 | #define SSE2_WORKSPACE rsp |
---|
147 | #else |
---|
148 | if (HasSSE2()) |
---|
149 | { |
---|
150 | #if CRYPTOPP_BOOL_X64 |
---|
151 | #define REG_output %1 |
---|
152 | #define REG_input %0 |
---|
153 | #define REG_iterationCount %2 |
---|
154 | #define REG_state %4 /* constant */ |
---|
155 | #define REG_rounds %3 /* constant */ |
---|
156 | #define REG_roundsLeft eax |
---|
157 | #define REG_temp32 edx |
---|
158 | #define REG_temp rdx |
---|
159 | #define SSE2_WORKSPACE %5 /* constant */ |
---|
160 | |
---|
161 | CRYPTOPP_ALIGN_DATA(16) byte workspace[16*32]; |
---|
162 | #else |
---|
163 | #define REG_output edi |
---|
164 | #define REG_input eax |
---|
165 | #define REG_iterationCount ecx |
---|
166 | #define REG_state esi |
---|
167 | #define REG_rounds edx |
---|
168 | #define REG_roundsLeft ebx |
---|
169 | #define REG_temp32 ebp |
---|
170 | #define REG_temp ebp |
---|
171 | #define SSE2_WORKSPACE esp + WORD_SZ |
---|
172 | #endif |
---|
173 | |
---|
174 | #ifdef __GNUC__ |
---|
175 | __asm__ __volatile__ |
---|
176 | ( |
---|
177 | INTEL_NOPREFIX |
---|
178 | AS_PUSH_IF86( bx) |
---|
179 | #else |
---|
180 | void *s = m_state.data(); |
---|
181 | word32 r = m_rounds; |
---|
182 | |
---|
183 | AS2( mov REG_iterationCount, iterationCount) |
---|
184 | AS2( mov REG_input, input) |
---|
185 | AS2( mov REG_output, output) |
---|
186 | AS2( mov REG_state, s) |
---|
187 | AS2( mov REG_rounds, r) |
---|
188 | #endif |
---|
189 | #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM |
---|
190 | |
---|
191 | AS_PUSH_IF86( bp) |
---|
192 | AS2( cmp REG_iterationCount, 4) |
---|
193 | ASJ( jl, 5, f) |
---|
194 | |
---|
195 | #if CRYPTOPP_BOOL_X86 |
---|
196 | AS2( mov ebx, esp) |
---|
197 | AS2( and esp, -16) |
---|
198 | AS2( sub esp, 32*16) |
---|
199 | AS1( push ebx) |
---|
200 | #endif |
---|
201 | |
---|
202 | #define SSE2_EXPAND_S(i, j) \ |
---|
203 | ASS( pshufd xmm4, xmm##i, j, j, j, j) \ |
---|
204 | AS2( movdqa [SSE2_WORKSPACE + (i*4+j)*16 + 256], xmm4) |
---|
205 | |
---|
206 | AS2( movdqa xmm0, [REG_state + 0*16]) |
---|
207 | AS2( movdqa xmm1, [REG_state + 1*16]) |
---|
208 | AS2( movdqa xmm2, [REG_state + 2*16]) |
---|
209 | AS2( movdqa xmm3, [REG_state + 3*16]) |
---|
210 | SSE2_EXPAND_S(0, 0) |
---|
211 | SSE2_EXPAND_S(0, 1) |
---|
212 | SSE2_EXPAND_S(0, 2) |
---|
213 | SSE2_EXPAND_S(0, 3) |
---|
214 | SSE2_EXPAND_S(1, 0) |
---|
215 | SSE2_EXPAND_S(1, 2) |
---|
216 | SSE2_EXPAND_S(1, 3) |
---|
217 | SSE2_EXPAND_S(2, 1) |
---|
218 | SSE2_EXPAND_S(2, 2) |
---|
219 | SSE2_EXPAND_S(2, 3) |
---|
220 | SSE2_EXPAND_S(3, 0) |
---|
221 | SSE2_EXPAND_S(3, 1) |
---|
222 | SSE2_EXPAND_S(3, 2) |
---|
223 | SSE2_EXPAND_S(3, 3) |
---|
224 | |
---|
225 | #define SSE2_EXPAND_S85(i) \ |
---|
226 | AS2( mov dword ptr [SSE2_WORKSPACE + 8*16 + i*4 + 256], REG_roundsLeft) \ |
---|
227 | AS2( mov dword ptr [SSE2_WORKSPACE + 5*16 + i*4 + 256], REG_temp32) \ |
---|
228 | AS2( add REG_roundsLeft, 1) \ |
---|
229 | AS2( adc REG_temp32, 0) |
---|
230 | |
---|
231 | ASL(1) |
---|
232 | AS2( mov REG_roundsLeft, dword ptr [REG_state + 8*4]) |
---|
233 | AS2( mov REG_temp32, dword ptr [REG_state + 5*4]) |
---|
234 | SSE2_EXPAND_S85(0) |
---|
235 | SSE2_EXPAND_S85(1) |
---|
236 | SSE2_EXPAND_S85(2) |
---|
237 | SSE2_EXPAND_S85(3) |
---|
238 | AS2( mov dword ptr [REG_state + 8*4], REG_roundsLeft) |
---|
239 | AS2( mov dword ptr [REG_state + 5*4], REG_temp32) |
---|
240 | |
---|
241 | #define SSE2_QUARTER_ROUND(a, b, d, i) \ |
---|
242 | AS2( movdqa xmm4, xmm##d) \ |
---|
243 | AS2( paddd xmm4, xmm##a) \ |
---|
244 | AS2( movdqa xmm5, xmm4) \ |
---|
245 | AS2( pslld xmm4, i) \ |
---|
246 | AS2( psrld xmm5, 32-i) \ |
---|
247 | AS2( pxor xmm##b, xmm4) \ |
---|
248 | AS2( pxor xmm##b, xmm5) |
---|
249 | |
---|
250 | #define L01(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) /* y3 */ |
---|
251 | #define L02(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##C, [SSE2_WORKSPACE + a*16 + i*256]) /* y0 */ |
---|
252 | #define L03(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) /* y0+y3 */ |
---|
253 | #define L04(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) |
---|
254 | #define L05(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 7) |
---|
255 | #define L06(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-7) |
---|
256 | #define L07(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + b*16 + i*256]) |
---|
257 | #define L08(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) /* z1 */ |
---|
258 | #define L09(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + b*16], xmm##A) |
---|
259 | #define L10(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) |
---|
260 | #define L11(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##C) /* z1+y0 */ |
---|
261 | #define L12(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) |
---|
262 | #define L13(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 9) |
---|
263 | #define L14(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-9) |
---|
264 | #define L15(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + c*16 + i*256]) |
---|
265 | #define L16(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) /* z2 */ |
---|
266 | #define L17(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + c*16], xmm##A) |
---|
267 | #define L18(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) |
---|
268 | #define L19(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##B) /* z2+z1 */ |
---|
269 | #define L20(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##B, xmm##A) |
---|
270 | #define L21(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 13) |
---|
271 | #define L22(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##B, 32-13) |
---|
272 | #define L23(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, [SSE2_WORKSPACE + d*16 + i*256]) |
---|
273 | #define L24(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##B) /* z3 */ |
---|
274 | #define L25(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + d*16], xmm##A) |
---|
275 | #define L26(A,B,C,D,a,b,c,d,i) AS2( paddd xmm##A, xmm##D) /* z3+z2 */ |
---|
276 | #define L27(A,B,C,D,a,b,c,d,i) AS2( movdqa xmm##D, xmm##A) |
---|
277 | #define L28(A,B,C,D,a,b,c,d,i) AS2( pslld xmm##A, 18) |
---|
278 | #define L29(A,B,C,D,a,b,c,d,i) AS2( psrld xmm##D, 32-18) |
---|
279 | #define L30(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##C) /* xor y0 */ |
---|
280 | #define L31(A,B,C,D,a,b,c,d,i) AS2( pxor xmm##A, xmm##D) /* z0 */ |
---|
281 | #define L32(A,B,C,D,a,b,c,d,i) AS2( movdqa [SSE2_WORKSPACE + a*16], xmm##A) |
---|
282 | |
---|
283 | #define SSE2_QUARTER_ROUND_X8(i, a, b, c, d, e, f, g, h) \ |
---|
284 | L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) \ |
---|
285 | L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) \ |
---|
286 | L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) \ |
---|
287 | L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) \ |
---|
288 | L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) \ |
---|
289 | L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) \ |
---|
290 | L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) \ |
---|
291 | L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) \ |
---|
292 | L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) \ |
---|
293 | L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) \ |
---|
294 | L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) \ |
---|
295 | L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) \ |
---|
296 | L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) \ |
---|
297 | L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) \ |
---|
298 | L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) \ |
---|
299 | L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) \ |
---|
300 | L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) \ |
---|
301 | L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) \ |
---|
302 | L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) \ |
---|
303 | L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) \ |
---|
304 | L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) \ |
---|
305 | L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) \ |
---|
306 | L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) \ |
---|
307 | L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) \ |
---|
308 | L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) \ |
---|
309 | L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) \ |
---|
310 | L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) \ |
---|
311 | L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) \ |
---|
312 | L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) \ |
---|
313 | L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) \ |
---|
314 | L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) \ |
---|
315 | L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) |
---|
316 | |
---|
317 | #define SSE2_QUARTER_ROUND_X16(i, a, b, c, d, e, f, g, h, A, B, C, D, E, F, G, H) \ |
---|
318 | L01(0,1,2,3, a,b,c,d, i) L01(4,5,6,7, e,f,g,h, i) L01(8,9,10,11, A,B,C,D, i) L01(12,13,14,15, E,F,G,H, i) \ |
---|
319 | L02(0,1,2,3, a,b,c,d, i) L02(4,5,6,7, e,f,g,h, i) L02(8,9,10,11, A,B,C,D, i) L02(12,13,14,15, E,F,G,H, i) \ |
---|
320 | L03(0,1,2,3, a,b,c,d, i) L03(4,5,6,7, e,f,g,h, i) L03(8,9,10,11, A,B,C,D, i) L03(12,13,14,15, E,F,G,H, i) \ |
---|
321 | L04(0,1,2,3, a,b,c,d, i) L04(4,5,6,7, e,f,g,h, i) L04(8,9,10,11, A,B,C,D, i) L04(12,13,14,15, E,F,G,H, i) \ |
---|
322 | L05(0,1,2,3, a,b,c,d, i) L05(4,5,6,7, e,f,g,h, i) L05(8,9,10,11, A,B,C,D, i) L05(12,13,14,15, E,F,G,H, i) \ |
---|
323 | L06(0,1,2,3, a,b,c,d, i) L06(4,5,6,7, e,f,g,h, i) L06(8,9,10,11, A,B,C,D, i) L06(12,13,14,15, E,F,G,H, i) \ |
---|
324 | L07(0,1,2,3, a,b,c,d, i) L07(4,5,6,7, e,f,g,h, i) L07(8,9,10,11, A,B,C,D, i) L07(12,13,14,15, E,F,G,H, i) \ |
---|
325 | L08(0,1,2,3, a,b,c,d, i) L08(4,5,6,7, e,f,g,h, i) L08(8,9,10,11, A,B,C,D, i) L08(12,13,14,15, E,F,G,H, i) \ |
---|
326 | L09(0,1,2,3, a,b,c,d, i) L09(4,5,6,7, e,f,g,h, i) L09(8,9,10,11, A,B,C,D, i) L09(12,13,14,15, E,F,G,H, i) \ |
---|
327 | L10(0,1,2,3, a,b,c,d, i) L10(4,5,6,7, e,f,g,h, i) L10(8,9,10,11, A,B,C,D, i) L10(12,13,14,15, E,F,G,H, i) \ |
---|
328 | L11(0,1,2,3, a,b,c,d, i) L11(4,5,6,7, e,f,g,h, i) L11(8,9,10,11, A,B,C,D, i) L11(12,13,14,15, E,F,G,H, i) \ |
---|
329 | L12(0,1,2,3, a,b,c,d, i) L12(4,5,6,7, e,f,g,h, i) L12(8,9,10,11, A,B,C,D, i) L12(12,13,14,15, E,F,G,H, i) \ |
---|
330 | L13(0,1,2,3, a,b,c,d, i) L13(4,5,6,7, e,f,g,h, i) L13(8,9,10,11, A,B,C,D, i) L13(12,13,14,15, E,F,G,H, i) \ |
---|
331 | L14(0,1,2,3, a,b,c,d, i) L14(4,5,6,7, e,f,g,h, i) L14(8,9,10,11, A,B,C,D, i) L14(12,13,14,15, E,F,G,H, i) \ |
---|
332 | L15(0,1,2,3, a,b,c,d, i) L15(4,5,6,7, e,f,g,h, i) L15(8,9,10,11, A,B,C,D, i) L15(12,13,14,15, E,F,G,H, i) \ |
---|
333 | L16(0,1,2,3, a,b,c,d, i) L16(4,5,6,7, e,f,g,h, i) L16(8,9,10,11, A,B,C,D, i) L16(12,13,14,15, E,F,G,H, i) \ |
---|
334 | L17(0,1,2,3, a,b,c,d, i) L17(4,5,6,7, e,f,g,h, i) L17(8,9,10,11, A,B,C,D, i) L17(12,13,14,15, E,F,G,H, i) \ |
---|
335 | L18(0,1,2,3, a,b,c,d, i) L18(4,5,6,7, e,f,g,h, i) L18(8,9,10,11, A,B,C,D, i) L18(12,13,14,15, E,F,G,H, i) \ |
---|
336 | L19(0,1,2,3, a,b,c,d, i) L19(4,5,6,7, e,f,g,h, i) L19(8,9,10,11, A,B,C,D, i) L19(12,13,14,15, E,F,G,H, i) \ |
---|
337 | L20(0,1,2,3, a,b,c,d, i) L20(4,5,6,7, e,f,g,h, i) L20(8,9,10,11, A,B,C,D, i) L20(12,13,14,15, E,F,G,H, i) \ |
---|
338 | L21(0,1,2,3, a,b,c,d, i) L21(4,5,6,7, e,f,g,h, i) L21(8,9,10,11, A,B,C,D, i) L21(12,13,14,15, E,F,G,H, i) \ |
---|
339 | L22(0,1,2,3, a,b,c,d, i) L22(4,5,6,7, e,f,g,h, i) L22(8,9,10,11, A,B,C,D, i) L22(12,13,14,15, E,F,G,H, i) \ |
---|
340 | L23(0,1,2,3, a,b,c,d, i) L23(4,5,6,7, e,f,g,h, i) L23(8,9,10,11, A,B,C,D, i) L23(12,13,14,15, E,F,G,H, i) \ |
---|
341 | L24(0,1,2,3, a,b,c,d, i) L24(4,5,6,7, e,f,g,h, i) L24(8,9,10,11, A,B,C,D, i) L24(12,13,14,15, E,F,G,H, i) \ |
---|
342 | L25(0,1,2,3, a,b,c,d, i) L25(4,5,6,7, e,f,g,h, i) L25(8,9,10,11, A,B,C,D, i) L25(12,13,14,15, E,F,G,H, i) \ |
---|
343 | L26(0,1,2,3, a,b,c,d, i) L26(4,5,6,7, e,f,g,h, i) L26(8,9,10,11, A,B,C,D, i) L26(12,13,14,15, E,F,G,H, i) \ |
---|
344 | L27(0,1,2,3, a,b,c,d, i) L27(4,5,6,7, e,f,g,h, i) L27(8,9,10,11, A,B,C,D, i) L27(12,13,14,15, E,F,G,H, i) \ |
---|
345 | L28(0,1,2,3, a,b,c,d, i) L28(4,5,6,7, e,f,g,h, i) L28(8,9,10,11, A,B,C,D, i) L28(12,13,14,15, E,F,G,H, i) \ |
---|
346 | L29(0,1,2,3, a,b,c,d, i) L29(4,5,6,7, e,f,g,h, i) L29(8,9,10,11, A,B,C,D, i) L29(12,13,14,15, E,F,G,H, i) \ |
---|
347 | L30(0,1,2,3, a,b,c,d, i) L30(4,5,6,7, e,f,g,h, i) L30(8,9,10,11, A,B,C,D, i) L30(12,13,14,15, E,F,G,H, i) \ |
---|
348 | L31(0,1,2,3, a,b,c,d, i) L31(4,5,6,7, e,f,g,h, i) L31(8,9,10,11, A,B,C,D, i) L31(12,13,14,15, E,F,G,H, i) \ |
---|
349 | L32(0,1,2,3, a,b,c,d, i) L32(4,5,6,7, e,f,g,h, i) L32(8,9,10,11, A,B,C,D, i) L32(12,13,14,15, E,F,G,H, i) |
---|
350 | |
---|
351 | #if CRYPTOPP_BOOL_X64 |
---|
352 | SSE2_QUARTER_ROUND_X16(1, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) |
---|
353 | #else |
---|
354 | SSE2_QUARTER_ROUND_X8(1, 2, 6, 10, 14, 3, 7, 11, 15) |
---|
355 | SSE2_QUARTER_ROUND_X8(1, 0, 4, 8, 12, 1, 5, 9, 13) |
---|
356 | #endif |
---|
357 | AS2( mov REG_roundsLeft, REG_rounds) |
---|
358 | ASJ( jmp, 2, f) |
---|
359 | |
---|
360 | ASL(SSE2_Salsa_Output) |
---|
361 | AS2( movdqa xmm0, xmm4) |
---|
362 | AS2( punpckldq xmm4, xmm5) |
---|
363 | AS2( movdqa xmm1, xmm6) |
---|
364 | AS2( punpckldq xmm6, xmm7) |
---|
365 | AS2( movdqa xmm2, xmm4) |
---|
366 | AS2( punpcklqdq xmm4, xmm6) // e |
---|
367 | AS2( punpckhqdq xmm2, xmm6) // f |
---|
368 | AS2( punpckhdq xmm0, xmm5) |
---|
369 | AS2( punpckhdq xmm1, xmm7) |
---|
370 | AS2( movdqa xmm6, xmm0) |
---|
371 | AS2( punpcklqdq xmm0, xmm1) // g |
---|
372 | AS2( punpckhqdq xmm6, xmm1) // h |
---|
373 | AS_XMM_OUTPUT4(SSE2_Salsa_Output_A, REG_input, REG_output, 4, 2, 0, 6, 1, 0, 4, 8, 12, 1) |
---|
374 | AS1( ret) |
---|
375 | |
---|
376 | ASL(6) |
---|
377 | #if CRYPTOPP_BOOL_X64 |
---|
378 | SSE2_QUARTER_ROUND_X16(0, 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15) |
---|
379 | ASL(2) |
---|
380 | SSE2_QUARTER_ROUND_X16(0, 0, 13, 10, 7, 1, 14, 11, 4, 2, 15, 8, 5, 3, 12, 9, 6) |
---|
381 | #else |
---|
382 | SSE2_QUARTER_ROUND_X8(0, 2, 6, 10, 14, 3, 7, 11, 15) |
---|
383 | SSE2_QUARTER_ROUND_X8(0, 0, 4, 8, 12, 1, 5, 9, 13) |
---|
384 | ASL(2) |
---|
385 | SSE2_QUARTER_ROUND_X8(0, 2, 15, 8, 5, 3, 12, 9, 6) |
---|
386 | SSE2_QUARTER_ROUND_X8(0, 0, 13, 10, 7, 1, 14, 11, 4) |
---|
387 | #endif |
---|
388 | AS2( sub REG_roundsLeft, 2) |
---|
389 | ASJ( jnz, 6, b) |
---|
390 | |
---|
391 | #define SSE2_OUTPUT_4(a, b, c, d) \ |
---|
392 | AS2( movdqa xmm4, [SSE2_WORKSPACE + a*16 + 256])\ |
---|
393 | AS2( paddd xmm4, [SSE2_WORKSPACE + a*16])\ |
---|
394 | AS2( movdqa xmm5, [SSE2_WORKSPACE + b*16 + 256])\ |
---|
395 | AS2( paddd xmm5, [SSE2_WORKSPACE + b*16])\ |
---|
396 | AS2( movdqa xmm6, [SSE2_WORKSPACE + c*16 + 256])\ |
---|
397 | AS2( paddd xmm6, [SSE2_WORKSPACE + c*16])\ |
---|
398 | AS2( movdqa xmm7, [SSE2_WORKSPACE + d*16 + 256])\ |
---|
399 | AS2( paddd xmm7, [SSE2_WORKSPACE + d*16])\ |
---|
400 | ASC( call, SSE2_Salsa_Output) |
---|
401 | |
---|
402 | SSE2_OUTPUT_4(0, 13, 10, 7) |
---|
403 | SSE2_OUTPUT_4(4, 1, 14, 11) |
---|
404 | SSE2_OUTPUT_4(8, 5, 2, 15) |
---|
405 | SSE2_OUTPUT_4(12, 9, 6, 3) |
---|
406 | AS2( test REG_input, REG_input) |
---|
407 | ASJ( jz, 9, f) |
---|
408 | AS2( add REG_input, 12*16) |
---|
409 | ASL(9) |
---|
410 | AS2( add REG_output, 12*16) |
---|
411 | AS2( sub REG_iterationCount, 4) |
---|
412 | AS2( cmp REG_iterationCount, 4) |
---|
413 | ASJ( jge, 1, b) |
---|
414 | AS_POP_IF86( sp) |
---|
415 | |
---|
416 | ASL(5) |
---|
417 | AS2( sub REG_iterationCount, 1) |
---|
418 | ASJ( jl, 4, f) |
---|
419 | AS2( movdqa xmm0, [REG_state + 0*16]) |
---|
420 | AS2( movdqa xmm1, [REG_state + 1*16]) |
---|
421 | AS2( movdqa xmm2, [REG_state + 2*16]) |
---|
422 | AS2( movdqa xmm3, [REG_state + 3*16]) |
---|
423 | AS2( mov REG_roundsLeft, REG_rounds) |
---|
424 | |
---|
425 | ASL(0) |
---|
426 | SSE2_QUARTER_ROUND(0, 1, 3, 7) |
---|
427 | SSE2_QUARTER_ROUND(1, 2, 0, 9) |
---|
428 | SSE2_QUARTER_ROUND(2, 3, 1, 13) |
---|
429 | SSE2_QUARTER_ROUND(3, 0, 2, 18) |
---|
430 | ASS( pshufd xmm1, xmm1, 2, 1, 0, 3) |
---|
431 | ASS( pshufd xmm2, xmm2, 1, 0, 3, 2) |
---|
432 | ASS( pshufd xmm3, xmm3, 0, 3, 2, 1) |
---|
433 | SSE2_QUARTER_ROUND(0, 3, 1, 7) |
---|
434 | SSE2_QUARTER_ROUND(3, 2, 0, 9) |
---|
435 | SSE2_QUARTER_ROUND(2, 1, 3, 13) |
---|
436 | SSE2_QUARTER_ROUND(1, 0, 2, 18) |
---|
437 | ASS( pshufd xmm1, xmm1, 0, 3, 2, 1) |
---|
438 | ASS( pshufd xmm2, xmm2, 1, 0, 3, 2) |
---|
439 | ASS( pshufd xmm3, xmm3, 2, 1, 0, 3) |
---|
440 | AS2( sub REG_roundsLeft, 2) |
---|
441 | ASJ( jnz, 0, b) |
---|
442 | |
---|
443 | AS2( paddd xmm0, [REG_state + 0*16]) |
---|
444 | AS2( paddd xmm1, [REG_state + 1*16]) |
---|
445 | AS2( paddd xmm2, [REG_state + 2*16]) |
---|
446 | AS2( paddd xmm3, [REG_state + 3*16]) |
---|
447 | |
---|
448 | AS2( add dword ptr [REG_state + 8*4], 1) |
---|
449 | AS2( adc dword ptr [REG_state + 5*4], 0) |
---|
450 | |
---|
451 | AS2( pcmpeqb xmm6, xmm6) // all ones |
---|
452 | AS2( psrlq xmm6, 32) // lo32 mask |
---|
453 | ASS( pshufd xmm7, xmm6, 0, 1, 2, 3) // hi32 mask |
---|
454 | AS2( movdqa xmm4, xmm0) |
---|
455 | AS2( movdqa xmm5, xmm3) |
---|
456 | AS2( pand xmm0, xmm7) |
---|
457 | AS2( pand xmm4, xmm6) |
---|
458 | AS2( pand xmm3, xmm6) |
---|
459 | AS2( pand xmm5, xmm7) |
---|
460 | AS2( por xmm4, xmm5) // 0,13,2,15 |
---|
461 | AS2( movdqa xmm5, xmm1) |
---|
462 | AS2( pand xmm1, xmm7) |
---|
463 | AS2( pand xmm5, xmm6) |
---|
464 | AS2( por xmm0, xmm5) // 4,1,6,3 |
---|
465 | AS2( pand xmm6, xmm2) |
---|
466 | AS2( pand xmm2, xmm7) |
---|
467 | AS2( por xmm1, xmm6) // 8,5,10,7 |
---|
468 | AS2( por xmm2, xmm3) // 12,9,14,11 |
---|
469 | |
---|
470 | AS2( movdqa xmm5, xmm4) |
---|
471 | AS2( movdqa xmm6, xmm0) |
---|
472 | AS3( shufpd xmm4, xmm1, 2) // 0,13,10,7 |
---|
473 | AS3( shufpd xmm0, xmm2, 2) // 4,1,14,11 |
---|
474 | AS3( shufpd xmm1, xmm5, 2) // 8,5,2,15 |
---|
475 | AS3( shufpd xmm2, xmm6, 2) // 12,9,6,3 |
---|
476 | |
---|
477 | // output keystream |
---|
478 | AS_XMM_OUTPUT4(SSE2_Salsa_Output_B, REG_input, REG_output, 4, 0, 1, 2, 3, 0, 1, 2, 3, 4) |
---|
479 | ASJ( jmp, 5, b) |
---|
480 | ASL(4) |
---|
481 | |
---|
482 | AS_POP_IF86( bp) |
---|
483 | #ifdef __GNUC__ |
---|
484 | AS_POP_IF86( bx) |
---|
485 | ATT_PREFIX |
---|
486 | #if CRYPTOPP_BOOL_X64 |
---|
487 | : "+r" (input), "+r" (output), "+r" (iterationCount) |
---|
488 | : "r" (m_rounds), "r" (m_state.m_ptr), "r" (workspace) |
---|
489 | : "%eax", "%rdx", "memory", "cc", "%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", "%xmm10", "%xmm11", "%xmm12", "%xmm13", "%xmm14", "%xmm15" |
---|
490 | #else |
---|
491 | : "+a" (input), "+D" (output), "+c" (iterationCount) |
---|
492 | : "d" (m_rounds), "S" (m_state.m_ptr) |
---|
493 | : "memory", "cc" |
---|
494 | #endif |
---|
495 | ); |
---|
496 | #endif |
---|
497 | #ifdef CRYPTOPP_GENERATE_X64_MASM |
---|
498 | movdqa xmm6, [rsp + 0200h] |
---|
499 | movdqa xmm7, [rsp + 0210h] |
---|
500 | movdqa xmm8, [rsp + 0220h] |
---|
501 | movdqa xmm9, [rsp + 0230h] |
---|
502 | movdqa xmm10, [rsp + 0240h] |
---|
503 | movdqa xmm11, [rsp + 0250h] |
---|
504 | movdqa xmm12, [rsp + 0260h] |
---|
505 | movdqa xmm13, [rsp + 0270h] |
---|
506 | movdqa xmm14, [rsp + 0280h] |
---|
507 | movdqa xmm15, [rsp + 0290h] |
---|
508 | add rsp, 10*16 + 32*16 + 8 |
---|
509 | ret |
---|
510 | Salsa20_OperateKeystream ENDP |
---|
511 | #else |
---|
512 | } |
---|
513 | else |
---|
514 | #endif |
---|
515 | #endif |
---|
516 | #ifndef CRYPTOPP_GENERATE_X64_MASM |
---|
517 | { |
---|
518 | word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
---|
519 | |
---|
520 | while (iterationCount--) |
---|
521 | { |
---|
522 | x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; |
---|
523 | x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7]; |
---|
524 | x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11]; |
---|
525 | x12 = m_state[12]; x13 = m_state[13]; x14 = m_state[14]; x15 = m_state[15]; |
---|
526 | |
---|
527 | for (int i=m_rounds; i>0; i-=2) |
---|
528 | { |
---|
529 | #define QUARTER_ROUND(a, b, c, d) \ |
---|
530 | b = b ^ rotlFixed(a + d, 7); \ |
---|
531 | c = c ^ rotlFixed(b + a, 9); \ |
---|
532 | d = d ^ rotlFixed(c + b, 13); \ |
---|
533 | a = a ^ rotlFixed(d + c, 18); |
---|
534 | |
---|
535 | QUARTER_ROUND(x0, x4, x8, x12) |
---|
536 | QUARTER_ROUND(x1, x5, x9, x13) |
---|
537 | QUARTER_ROUND(x2, x6, x10, x14) |
---|
538 | QUARTER_ROUND(x3, x7, x11, x15) |
---|
539 | |
---|
540 | QUARTER_ROUND(x0, x13, x10, x7) |
---|
541 | QUARTER_ROUND(x1, x14, x11, x4) |
---|
542 | QUARTER_ROUND(x2, x15, x8, x5) |
---|
543 | QUARTER_ROUND(x3, x12, x9, x6) |
---|
544 | } |
---|
545 | |
---|
546 | #define SALSA_OUTPUT(x) {\ |
---|
547 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\ |
---|
548 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\ |
---|
549 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 2, x10 + m_state[10]);\ |
---|
550 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 3, x7 + m_state[7]);\ |
---|
551 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 4, x4 + m_state[4]);\ |
---|
552 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 5, x1 + m_state[1]);\ |
---|
553 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 6, x14 + m_state[14]);\ |
---|
554 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 7, x11 + m_state[11]);\ |
---|
555 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 8, x8 + m_state[8]);\ |
---|
556 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 9, x5 + m_state[5]);\ |
---|
557 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 10, x2 + m_state[2]);\ |
---|
558 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 11, x15 + m_state[15]);\ |
---|
559 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 12, x12 + m_state[12]);\ |
---|
560 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 13, x9 + m_state[9]);\ |
---|
561 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\ |
---|
562 | CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);} |
---|
563 | |
---|
564 | #ifndef CRYPTOPP_DOXYGEN_PROCESSING |
---|
565 | CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION); |
---|
566 | #endif |
---|
567 | |
---|
568 | if (++m_state[8] == 0) |
---|
569 | ++m_state[5]; |
---|
570 | } |
---|
571 | } |
---|
572 | } // see comment above if an internal compiler error occurs here |
---|
573 | |
---|
574 | void XSalsa20_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length) |
---|
575 | { |
---|
576 | m_rounds = params.GetIntValueWithDefault(Name::Rounds(), 20); |
---|
577 | |
---|
578 | if (!(m_rounds == 8 || m_rounds == 12 || m_rounds == 20)) |
---|
579 | throw InvalidRounds(XSalsa20::StaticAlgorithmName(), m_rounds); |
---|
580 | |
---|
581 | GetUserKey(LITTLE_ENDIAN_ORDER, m_key.begin(), m_key.size(), key, length); |
---|
582 | if (length == 16) |
---|
583 | memcpy(m_key.begin()+4, m_key.begin(), 16); |
---|
584 | |
---|
585 | // "expand 32-byte k" |
---|
586 | m_state[0] = 0x61707865; |
---|
587 | m_state[1] = 0x3320646e; |
---|
588 | m_state[2] = 0x79622d32; |
---|
589 | m_state[3] = 0x6b206574; |
---|
590 | } |
---|
591 | |
---|
592 | void XSalsa20_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length) |
---|
593 | { |
---|
594 | CRYPTOPP_UNUSED(keystreamBuffer), CRYPTOPP_UNUSED(length); |
---|
595 | CRYPTOPP_ASSERT(length==24); |
---|
596 | |
---|
597 | word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; |
---|
598 | |
---|
599 | GetBlock<word32, LittleEndian> get(IV); |
---|
600 | get(x14)(x11)(x8)(x5)(m_state[14])(m_state[11]); |
---|
601 | |
---|
602 | x13 = m_key[0]; x10 = m_key[1]; x7 = m_key[2]; x4 = m_key[3]; |
---|
603 | x15 = m_key[4]; x12 = m_key[5]; x9 = m_key[6]; x6 = m_key[7]; |
---|
604 | x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3]; |
---|
605 | |
---|
606 | for (int i=m_rounds; i>0; i-=2) |
---|
607 | { |
---|
608 | QUARTER_ROUND(x0, x4, x8, x12) |
---|
609 | QUARTER_ROUND(x1, x5, x9, x13) |
---|
610 | QUARTER_ROUND(x2, x6, x10, x14) |
---|
611 | QUARTER_ROUND(x3, x7, x11, x15) |
---|
612 | |
---|
613 | QUARTER_ROUND(x0, x13, x10, x7) |
---|
614 | QUARTER_ROUND(x1, x14, x11, x4) |
---|
615 | QUARTER_ROUND(x2, x15, x8, x5) |
---|
616 | QUARTER_ROUND(x3, x12, x9, x6) |
---|
617 | } |
---|
618 | |
---|
619 | m_state[13] = x0; m_state[10] = x1; m_state[7] = x2; m_state[4] = x3; |
---|
620 | m_state[15] = x14; m_state[12] = x11; m_state[9] = x8; m_state[6] = x5; |
---|
621 | m_state[8] = m_state[5] = 0; |
---|
622 | } |
---|
623 | |
---|
624 | NAMESPACE_END |
---|
625 | |
---|
626 | #endif // #ifndef CRYPTOPP_GENERATE_X64_MASM |
---|