1 | include ksamd64.inc |
---|
2 | EXTERNDEF s_sosemanukMulTables:FAR |
---|
3 | .CODE |
---|
4 | |
---|
5 | ALIGN 8 |
---|
6 | Salsa20_OperateKeystream PROC FRAME |
---|
7 | mov r10, [rsp + 5*8] |
---|
8 | alloc_stack(10*16 + 32*16 + 8) |
---|
9 | save_xmm128 xmm6, 0200h |
---|
10 | save_xmm128 xmm7, 0210h |
---|
11 | save_xmm128 xmm8, 0220h |
---|
12 | save_xmm128 xmm9, 0230h |
---|
13 | save_xmm128 xmm10, 0240h |
---|
14 | save_xmm128 xmm11, 0250h |
---|
15 | save_xmm128 xmm12, 0260h |
---|
16 | save_xmm128 xmm13, 0270h |
---|
17 | save_xmm128 xmm14, 0280h |
---|
18 | save_xmm128 xmm15, 0290h |
---|
19 | .endprolog |
---|
20 | cmp r8, 4 |
---|
21 | jl label5 |
---|
22 | movdqa xmm0, [r10 + 0*16] |
---|
23 | movdqa xmm1, [r10 + 1*16] |
---|
24 | movdqa xmm2, [r10 + 2*16] |
---|
25 | movdqa xmm3, [r10 + 3*16] |
---|
26 | pshufd xmm4, xmm0, 0*64+0*16+0*4+0 |
---|
27 | movdqa [rsp + (0*4+0)*16 + 256], xmm4 |
---|
28 | pshufd xmm4, xmm0, 1*64+1*16+1*4+1 |
---|
29 | movdqa [rsp + (0*4+1)*16 + 256], xmm4 |
---|
30 | pshufd xmm4, xmm0, 2*64+2*16+2*4+2 |
---|
31 | movdqa [rsp + (0*4+2)*16 + 256], xmm4 |
---|
32 | pshufd xmm4, xmm0, 3*64+3*16+3*4+3 |
---|
33 | movdqa [rsp + (0*4+3)*16 + 256], xmm4 |
---|
34 | pshufd xmm4, xmm1, 0*64+0*16+0*4+0 |
---|
35 | movdqa [rsp + (1*4+0)*16 + 256], xmm4 |
---|
36 | pshufd xmm4, xmm1, 2*64+2*16+2*4+2 |
---|
37 | movdqa [rsp + (1*4+2)*16 + 256], xmm4 |
---|
38 | pshufd xmm4, xmm1, 3*64+3*16+3*4+3 |
---|
39 | movdqa [rsp + (1*4+3)*16 + 256], xmm4 |
---|
40 | pshufd xmm4, xmm2, 1*64+1*16+1*4+1 |
---|
41 | movdqa [rsp + (2*4+1)*16 + 256], xmm4 |
---|
42 | pshufd xmm4, xmm2, 2*64+2*16+2*4+2 |
---|
43 | movdqa [rsp + (2*4+2)*16 + 256], xmm4 |
---|
44 | pshufd xmm4, xmm2, 3*64+3*16+3*4+3 |
---|
45 | movdqa [rsp + (2*4+3)*16 + 256], xmm4 |
---|
46 | pshufd xmm4, xmm3, 0*64+0*16+0*4+0 |
---|
47 | movdqa [rsp + (3*4+0)*16 + 256], xmm4 |
---|
48 | pshufd xmm4, xmm3, 1*64+1*16+1*4+1 |
---|
49 | movdqa [rsp + (3*4+1)*16 + 256], xmm4 |
---|
50 | pshufd xmm4, xmm3, 2*64+2*16+2*4+2 |
---|
51 | movdqa [rsp + (3*4+2)*16 + 256], xmm4 |
---|
52 | pshufd xmm4, xmm3, 3*64+3*16+3*4+3 |
---|
53 | movdqa [rsp + (3*4+3)*16 + 256], xmm4 |
---|
54 | label1: |
---|
55 | mov eax, dword ptr [r10 + 8*4] |
---|
56 | mov r11d, dword ptr [r10 + 5*4] |
---|
57 | mov dword ptr [rsp + 8*16 + 0*4 + 256], eax |
---|
58 | mov dword ptr [rsp + 5*16 + 0*4 + 256], r11d |
---|
59 | add eax, 1 |
---|
60 | adc r11d, 0 |
---|
61 | mov dword ptr [rsp + 8*16 + 1*4 + 256], eax |
---|
62 | mov dword ptr [rsp + 5*16 + 1*4 + 256], r11d |
---|
63 | add eax, 1 |
---|
64 | adc r11d, 0 |
---|
65 | mov dword ptr [rsp + 8*16 + 2*4 + 256], eax |
---|
66 | mov dword ptr [rsp + 5*16 + 2*4 + 256], r11d |
---|
67 | add eax, 1 |
---|
68 | adc r11d, 0 |
---|
69 | mov dword ptr [rsp + 8*16 + 3*4 + 256], eax |
---|
70 | mov dword ptr [rsp + 5*16 + 3*4 + 256], r11d |
---|
71 | add eax, 1 |
---|
72 | adc r11d, 0 |
---|
73 | mov dword ptr [r10 + 8*4], eax |
---|
74 | mov dword ptr [r10 + 5*4], r11d |
---|
75 | movdqa xmm0, [rsp + 12*16 + 1*256] |
---|
76 | movdqa xmm4, [rsp + 13*16 + 1*256] |
---|
77 | movdqa xmm8, [rsp + 14*16 + 1*256] |
---|
78 | movdqa xmm12, [rsp + 15*16 + 1*256] |
---|
79 | movdqa xmm2, [rsp + 0*16 + 1*256] |
---|
80 | movdqa xmm6, [rsp + 1*16 + 1*256] |
---|
81 | movdqa xmm10, [rsp + 2*16 + 1*256] |
---|
82 | movdqa xmm14, [rsp + 3*16 + 1*256] |
---|
83 | paddd xmm0, xmm2 |
---|
84 | paddd xmm4, xmm6 |
---|
85 | paddd xmm8, xmm10 |
---|
86 | paddd xmm12, xmm14 |
---|
87 | movdqa xmm1, xmm0 |
---|
88 | movdqa xmm5, xmm4 |
---|
89 | movdqa xmm9, xmm8 |
---|
90 | movdqa xmm13, xmm12 |
---|
91 | pslld xmm0, 7 |
---|
92 | pslld xmm4, 7 |
---|
93 | pslld xmm8, 7 |
---|
94 | pslld xmm12, 7 |
---|
95 | psrld xmm1, 32-7 |
---|
96 | psrld xmm5, 32-7 |
---|
97 | psrld xmm9, 32-7 |
---|
98 | psrld xmm13, 32-7 |
---|
99 | pxor xmm0, [rsp + 4*16 + 1*256] |
---|
100 | pxor xmm4, [rsp + 5*16 + 1*256] |
---|
101 | pxor xmm8, [rsp + 6*16 + 1*256] |
---|
102 | pxor xmm12, [rsp + 7*16 + 1*256] |
---|
103 | pxor xmm0, xmm1 |
---|
104 | pxor xmm4, xmm5 |
---|
105 | pxor xmm8, xmm9 |
---|
106 | pxor xmm12, xmm13 |
---|
107 | movdqa [rsp + 4*16], xmm0 |
---|
108 | movdqa [rsp + 5*16], xmm4 |
---|
109 | movdqa [rsp + 6*16], xmm8 |
---|
110 | movdqa [rsp + 7*16], xmm12 |
---|
111 | movdqa xmm1, xmm0 |
---|
112 | movdqa xmm5, xmm4 |
---|
113 | movdqa xmm9, xmm8 |
---|
114 | movdqa xmm13, xmm12 |
---|
115 | paddd xmm0, xmm2 |
---|
116 | paddd xmm4, xmm6 |
---|
117 | paddd xmm8, xmm10 |
---|
118 | paddd xmm12, xmm14 |
---|
119 | movdqa xmm3, xmm0 |
---|
120 | movdqa xmm7, xmm4 |
---|
121 | movdqa xmm11, xmm8 |
---|
122 | movdqa xmm15, xmm12 |
---|
123 | pslld xmm0, 9 |
---|
124 | pslld xmm4, 9 |
---|
125 | pslld xmm8, 9 |
---|
126 | pslld xmm12, 9 |
---|
127 | psrld xmm3, 32-9 |
---|
128 | psrld xmm7, 32-9 |
---|
129 | psrld xmm11, 32-9 |
---|
130 | psrld xmm15, 32-9 |
---|
131 | pxor xmm0, [rsp + 8*16 + 1*256] |
---|
132 | pxor xmm4, [rsp + 9*16 + 1*256] |
---|
133 | pxor xmm8, [rsp + 10*16 + 1*256] |
---|
134 | pxor xmm12, [rsp + 11*16 + 1*256] |
---|
135 | pxor xmm0, xmm3 |
---|
136 | pxor xmm4, xmm7 |
---|
137 | pxor xmm8, xmm11 |
---|
138 | pxor xmm12, xmm15 |
---|
139 | movdqa [rsp + 8*16], xmm0 |
---|
140 | movdqa [rsp + 9*16], xmm4 |
---|
141 | movdqa [rsp + 10*16], xmm8 |
---|
142 | movdqa [rsp + 11*16], xmm12 |
---|
143 | movdqa xmm3, xmm0 |
---|
144 | movdqa xmm7, xmm4 |
---|
145 | movdqa xmm11, xmm8 |
---|
146 | movdqa xmm15, xmm12 |
---|
147 | paddd xmm0, xmm1 |
---|
148 | paddd xmm4, xmm5 |
---|
149 | paddd xmm8, xmm9 |
---|
150 | paddd xmm12, xmm13 |
---|
151 | movdqa xmm1, xmm0 |
---|
152 | movdqa xmm5, xmm4 |
---|
153 | movdqa xmm9, xmm8 |
---|
154 | movdqa xmm13, xmm12 |
---|
155 | pslld xmm0, 13 |
---|
156 | pslld xmm4, 13 |
---|
157 | pslld xmm8, 13 |
---|
158 | pslld xmm12, 13 |
---|
159 | psrld xmm1, 32-13 |
---|
160 | psrld xmm5, 32-13 |
---|
161 | psrld xmm9, 32-13 |
---|
162 | psrld xmm13, 32-13 |
---|
163 | pxor xmm0, [rsp + 12*16 + 1*256] |
---|
164 | pxor xmm4, [rsp + 13*16 + 1*256] |
---|
165 | pxor xmm8, [rsp + 14*16 + 1*256] |
---|
166 | pxor xmm12, [rsp + 15*16 + 1*256] |
---|
167 | pxor xmm0, xmm1 |
---|
168 | pxor xmm4, xmm5 |
---|
169 | pxor xmm8, xmm9 |
---|
170 | pxor xmm12, xmm13 |
---|
171 | movdqa [rsp + 12*16], xmm0 |
---|
172 | movdqa [rsp + 13*16], xmm4 |
---|
173 | movdqa [rsp + 14*16], xmm8 |
---|
174 | movdqa [rsp + 15*16], xmm12 |
---|
175 | paddd xmm0, xmm3 |
---|
176 | paddd xmm4, xmm7 |
---|
177 | paddd xmm8, xmm11 |
---|
178 | paddd xmm12, xmm15 |
---|
179 | movdqa xmm3, xmm0 |
---|
180 | movdqa xmm7, xmm4 |
---|
181 | movdqa xmm11, xmm8 |
---|
182 | movdqa xmm15, xmm12 |
---|
183 | pslld xmm0, 18 |
---|
184 | pslld xmm4, 18 |
---|
185 | pslld xmm8, 18 |
---|
186 | pslld xmm12, 18 |
---|
187 | psrld xmm3, 32-18 |
---|
188 | psrld xmm7, 32-18 |
---|
189 | psrld xmm11, 32-18 |
---|
190 | psrld xmm15, 32-18 |
---|
191 | pxor xmm0, xmm2 |
---|
192 | pxor xmm4, xmm6 |
---|
193 | pxor xmm8, xmm10 |
---|
194 | pxor xmm12, xmm14 |
---|
195 | pxor xmm0, xmm3 |
---|
196 | pxor xmm4, xmm7 |
---|
197 | pxor xmm8, xmm11 |
---|
198 | pxor xmm12, xmm15 |
---|
199 | movdqa [rsp + 0*16], xmm0 |
---|
200 | movdqa [rsp + 1*16], xmm4 |
---|
201 | movdqa [rsp + 2*16], xmm8 |
---|
202 | movdqa [rsp + 3*16], xmm12 |
---|
203 | mov rax, r9 |
---|
204 | jmp label2 |
---|
205 | labelSSE2_Salsa_Output: |
---|
206 | movdqa xmm0, xmm4 |
---|
207 | punpckldq xmm4, xmm5 |
---|
208 | movdqa xmm1, xmm6 |
---|
209 | punpckldq xmm6, xmm7 |
---|
210 | movdqa xmm2, xmm4 |
---|
211 | punpcklqdq xmm4, xmm6 |
---|
212 | punpckhqdq xmm2, xmm6 |
---|
213 | punpckhdq xmm0, xmm5 |
---|
214 | punpckhdq xmm1, xmm7 |
---|
215 | movdqa xmm6, xmm0 |
---|
216 | punpcklqdq xmm0, xmm1 |
---|
217 | punpckhqdq xmm6, xmm1 |
---|
218 | test rdx, rdx |
---|
219 | jz labelSSE2_Salsa_Output_A3 |
---|
220 | test rdx, 15 |
---|
221 | jnz labelSSE2_Salsa_Output_A7 |
---|
222 | pxor xmm4, [rdx+0*16] |
---|
223 | pxor xmm2, [rdx+4*16] |
---|
224 | pxor xmm0, [rdx+8*16] |
---|
225 | pxor xmm6, [rdx+12*16] |
---|
226 | add rdx, 1*16 |
---|
227 | jmp labelSSE2_Salsa_Output_A3 |
---|
228 | labelSSE2_Salsa_Output_A7: |
---|
229 | movdqu xmm1, [rdx+0*16] |
---|
230 | pxor xmm4, xmm1 |
---|
231 | movdqu xmm1, [rdx+4*16] |
---|
232 | pxor xmm2, xmm1 |
---|
233 | movdqu xmm1, [rdx+8*16] |
---|
234 | pxor xmm0, xmm1 |
---|
235 | movdqu xmm1, [rdx+12*16] |
---|
236 | pxor xmm6, xmm1 |
---|
237 | add rdx, 1*16 |
---|
238 | labelSSE2_Salsa_Output_A3: |
---|
239 | test rcx, 15 |
---|
240 | jnz labelSSE2_Salsa_Output_A8 |
---|
241 | movdqa [rcx+0*16], xmm4 |
---|
242 | movdqa [rcx+4*16], xmm2 |
---|
243 | movdqa [rcx+8*16], xmm0 |
---|
244 | movdqa [rcx+12*16], xmm6 |
---|
245 | jmp labelSSE2_Salsa_Output_A9 |
---|
246 | labelSSE2_Salsa_Output_A8: |
---|
247 | movdqu [rcx+0*16], xmm4 |
---|
248 | movdqu [rcx+4*16], xmm2 |
---|
249 | movdqu [rcx+8*16], xmm0 |
---|
250 | movdqu [rcx+12*16], xmm6 |
---|
251 | labelSSE2_Salsa_Output_A9: |
---|
252 | add rcx, 1*16 |
---|
253 | ret |
---|
254 | label6: |
---|
255 | movdqa xmm0, [rsp + 12*16 + 0*256] |
---|
256 | movdqa xmm4, [rsp + 13*16 + 0*256] |
---|
257 | movdqa xmm8, [rsp + 14*16 + 0*256] |
---|
258 | movdqa xmm12, [rsp + 15*16 + 0*256] |
---|
259 | movdqa xmm2, [rsp + 0*16 + 0*256] |
---|
260 | movdqa xmm6, [rsp + 1*16 + 0*256] |
---|
261 | movdqa xmm10, [rsp + 2*16 + 0*256] |
---|
262 | movdqa xmm14, [rsp + 3*16 + 0*256] |
---|
263 | paddd xmm0, xmm2 |
---|
264 | paddd xmm4, xmm6 |
---|
265 | paddd xmm8, xmm10 |
---|
266 | paddd xmm12, xmm14 |
---|
267 | movdqa xmm1, xmm0 |
---|
268 | movdqa xmm5, xmm4 |
---|
269 | movdqa xmm9, xmm8 |
---|
270 | movdqa xmm13, xmm12 |
---|
271 | pslld xmm0, 7 |
---|
272 | pslld xmm4, 7 |
---|
273 | pslld xmm8, 7 |
---|
274 | pslld xmm12, 7 |
---|
275 | psrld xmm1, 32-7 |
---|
276 | psrld xmm5, 32-7 |
---|
277 | psrld xmm9, 32-7 |
---|
278 | psrld xmm13, 32-7 |
---|
279 | pxor xmm0, [rsp + 4*16 + 0*256] |
---|
280 | pxor xmm4, [rsp + 5*16 + 0*256] |
---|
281 | pxor xmm8, [rsp + 6*16 + 0*256] |
---|
282 | pxor xmm12, [rsp + 7*16 + 0*256] |
---|
283 | pxor xmm0, xmm1 |
---|
284 | pxor xmm4, xmm5 |
---|
285 | pxor xmm8, xmm9 |
---|
286 | pxor xmm12, xmm13 |
---|
287 | movdqa [rsp + 4*16], xmm0 |
---|
288 | movdqa [rsp + 5*16], xmm4 |
---|
289 | movdqa [rsp + 6*16], xmm8 |
---|
290 | movdqa [rsp + 7*16], xmm12 |
---|
291 | movdqa xmm1, xmm0 |
---|
292 | movdqa xmm5, xmm4 |
---|
293 | movdqa xmm9, xmm8 |
---|
294 | movdqa xmm13, xmm12 |
---|
295 | paddd xmm0, xmm2 |
---|
296 | paddd xmm4, xmm6 |
---|
297 | paddd xmm8, xmm10 |
---|
298 | paddd xmm12, xmm14 |
---|
299 | movdqa xmm3, xmm0 |
---|
300 | movdqa xmm7, xmm4 |
---|
301 | movdqa xmm11, xmm8 |
---|
302 | movdqa xmm15, xmm12 |
---|
303 | pslld xmm0, 9 |
---|
304 | pslld xmm4, 9 |
---|
305 | pslld xmm8, 9 |
---|
306 | pslld xmm12, 9 |
---|
307 | psrld xmm3, 32-9 |
---|
308 | psrld xmm7, 32-9 |
---|
309 | psrld xmm11, 32-9 |
---|
310 | psrld xmm15, 32-9 |
---|
311 | pxor xmm0, [rsp + 8*16 + 0*256] |
---|
312 | pxor xmm4, [rsp + 9*16 + 0*256] |
---|
313 | pxor xmm8, [rsp + 10*16 + 0*256] |
---|
314 | pxor xmm12, [rsp + 11*16 + 0*256] |
---|
315 | pxor xmm0, xmm3 |
---|
316 | pxor xmm4, xmm7 |
---|
317 | pxor xmm8, xmm11 |
---|
318 | pxor xmm12, xmm15 |
---|
319 | movdqa [rsp + 8*16], xmm0 |
---|
320 | movdqa [rsp + 9*16], xmm4 |
---|
321 | movdqa [rsp + 10*16], xmm8 |
---|
322 | movdqa [rsp + 11*16], xmm12 |
---|
323 | movdqa xmm3, xmm0 |
---|
324 | movdqa xmm7, xmm4 |
---|
325 | movdqa xmm11, xmm8 |
---|
326 | movdqa xmm15, xmm12 |
---|
327 | paddd xmm0, xmm1 |
---|
328 | paddd xmm4, xmm5 |
---|
329 | paddd xmm8, xmm9 |
---|
330 | paddd xmm12, xmm13 |
---|
331 | movdqa xmm1, xmm0 |
---|
332 | movdqa xmm5, xmm4 |
---|
333 | movdqa xmm9, xmm8 |
---|
334 | movdqa xmm13, xmm12 |
---|
335 | pslld xmm0, 13 |
---|
336 | pslld xmm4, 13 |
---|
337 | pslld xmm8, 13 |
---|
338 | pslld xmm12, 13 |
---|
339 | psrld xmm1, 32-13 |
---|
340 | psrld xmm5, 32-13 |
---|
341 | psrld xmm9, 32-13 |
---|
342 | psrld xmm13, 32-13 |
---|
343 | pxor xmm0, [rsp + 12*16 + 0*256] |
---|
344 | pxor xmm4, [rsp + 13*16 + 0*256] |
---|
345 | pxor xmm8, [rsp + 14*16 + 0*256] |
---|
346 | pxor xmm12, [rsp + 15*16 + 0*256] |
---|
347 | pxor xmm0, xmm1 |
---|
348 | pxor xmm4, xmm5 |
---|
349 | pxor xmm8, xmm9 |
---|
350 | pxor xmm12, xmm13 |
---|
351 | movdqa [rsp + 12*16], xmm0 |
---|
352 | movdqa [rsp + 13*16], xmm4 |
---|
353 | movdqa [rsp + 14*16], xmm8 |
---|
354 | movdqa [rsp + 15*16], xmm12 |
---|
355 | paddd xmm0, xmm3 |
---|
356 | paddd xmm4, xmm7 |
---|
357 | paddd xmm8, xmm11 |
---|
358 | paddd xmm12, xmm15 |
---|
359 | movdqa xmm3, xmm0 |
---|
360 | movdqa xmm7, xmm4 |
---|
361 | movdqa xmm11, xmm8 |
---|
362 | movdqa xmm15, xmm12 |
---|
363 | pslld xmm0, 18 |
---|
364 | pslld xmm4, 18 |
---|
365 | pslld xmm8, 18 |
---|
366 | pslld xmm12, 18 |
---|
367 | psrld xmm3, 32-18 |
---|
368 | psrld xmm7, 32-18 |
---|
369 | psrld xmm11, 32-18 |
---|
370 | psrld xmm15, 32-18 |
---|
371 | pxor xmm0, xmm2 |
---|
372 | pxor xmm4, xmm6 |
---|
373 | pxor xmm8, xmm10 |
---|
374 | pxor xmm12, xmm14 |
---|
375 | pxor xmm0, xmm3 |
---|
376 | pxor xmm4, xmm7 |
---|
377 | pxor xmm8, xmm11 |
---|
378 | pxor xmm12, xmm15 |
---|
379 | movdqa [rsp + 0*16], xmm0 |
---|
380 | movdqa [rsp + 1*16], xmm4 |
---|
381 | movdqa [rsp + 2*16], xmm8 |
---|
382 | movdqa [rsp + 3*16], xmm12 |
---|
383 | label2: |
---|
384 | movdqa xmm0, [rsp + 7*16 + 0*256] |
---|
385 | movdqa xmm4, [rsp + 4*16 + 0*256] |
---|
386 | movdqa xmm8, [rsp + 5*16 + 0*256] |
---|
387 | movdqa xmm12, [rsp + 6*16 + 0*256] |
---|
388 | movdqa xmm2, [rsp + 0*16 + 0*256] |
---|
389 | movdqa xmm6, [rsp + 1*16 + 0*256] |
---|
390 | movdqa xmm10, [rsp + 2*16 + 0*256] |
---|
391 | movdqa xmm14, [rsp + 3*16 + 0*256] |
---|
392 | paddd xmm0, xmm2 |
---|
393 | paddd xmm4, xmm6 |
---|
394 | paddd xmm8, xmm10 |
---|
395 | paddd xmm12, xmm14 |
---|
396 | movdqa xmm1, xmm0 |
---|
397 | movdqa xmm5, xmm4 |
---|
398 | movdqa xmm9, xmm8 |
---|
399 | movdqa xmm13, xmm12 |
---|
400 | pslld xmm0, 7 |
---|
401 | pslld xmm4, 7 |
---|
402 | pslld xmm8, 7 |
---|
403 | pslld xmm12, 7 |
---|
404 | psrld xmm1, 32-7 |
---|
405 | psrld xmm5, 32-7 |
---|
406 | psrld xmm9, 32-7 |
---|
407 | psrld xmm13, 32-7 |
---|
408 | pxor xmm0, [rsp + 13*16 + 0*256] |
---|
409 | pxor xmm4, [rsp + 14*16 + 0*256] |
---|
410 | pxor xmm8, [rsp + 15*16 + 0*256] |
---|
411 | pxor xmm12, [rsp + 12*16 + 0*256] |
---|
412 | pxor xmm0, xmm1 |
---|
413 | pxor xmm4, xmm5 |
---|
414 | pxor xmm8, xmm9 |
---|
415 | pxor xmm12, xmm13 |
---|
416 | movdqa [rsp + 13*16], xmm0 |
---|
417 | movdqa [rsp + 14*16], xmm4 |
---|
418 | movdqa [rsp + 15*16], xmm8 |
---|
419 | movdqa [rsp + 12*16], xmm12 |
---|
420 | movdqa xmm1, xmm0 |
---|
421 | movdqa xmm5, xmm4 |
---|
422 | movdqa xmm9, xmm8 |
---|
423 | movdqa xmm13, xmm12 |
---|
424 | paddd xmm0, xmm2 |
---|
425 | paddd xmm4, xmm6 |
---|
426 | paddd xmm8, xmm10 |
---|
427 | paddd xmm12, xmm14 |
---|
428 | movdqa xmm3, xmm0 |
---|
429 | movdqa xmm7, xmm4 |
---|
430 | movdqa xmm11, xmm8 |
---|
431 | movdqa xmm15, xmm12 |
---|
432 | pslld xmm0, 9 |
---|
433 | pslld xmm4, 9 |
---|
434 | pslld xmm8, 9 |
---|
435 | pslld xmm12, 9 |
---|
436 | psrld xmm3, 32-9 |
---|
437 | psrld xmm7, 32-9 |
---|
438 | psrld xmm11, 32-9 |
---|
439 | psrld xmm15, 32-9 |
---|
440 | pxor xmm0, [rsp + 10*16 + 0*256] |
---|
441 | pxor xmm4, [rsp + 11*16 + 0*256] |
---|
442 | pxor xmm8, [rsp + 8*16 + 0*256] |
---|
443 | pxor xmm12, [rsp + 9*16 + 0*256] |
---|
444 | pxor xmm0, xmm3 |
---|
445 | pxor xmm4, xmm7 |
---|
446 | pxor xmm8, xmm11 |
---|
447 | pxor xmm12, xmm15 |
---|
448 | movdqa [rsp + 10*16], xmm0 |
---|
449 | movdqa [rsp + 11*16], xmm4 |
---|
450 | movdqa [rsp + 8*16], xmm8 |
---|
451 | movdqa [rsp + 9*16], xmm12 |
---|
452 | movdqa xmm3, xmm0 |
---|
453 | movdqa xmm7, xmm4 |
---|
454 | movdqa xmm11, xmm8 |
---|
455 | movdqa xmm15, xmm12 |
---|
456 | paddd xmm0, xmm1 |
---|
457 | paddd xmm4, xmm5 |
---|
458 | paddd xmm8, xmm9 |
---|
459 | paddd xmm12, xmm13 |
---|
460 | movdqa xmm1, xmm0 |
---|
461 | movdqa xmm5, xmm4 |
---|
462 | movdqa xmm9, xmm8 |
---|
463 | movdqa xmm13, xmm12 |
---|
464 | pslld xmm0, 13 |
---|
465 | pslld xmm4, 13 |
---|
466 | pslld xmm8, 13 |
---|
467 | pslld xmm12, 13 |
---|
468 | psrld xmm1, 32-13 |
---|
469 | psrld xmm5, 32-13 |
---|
470 | psrld xmm9, 32-13 |
---|
471 | psrld xmm13, 32-13 |
---|
472 | pxor xmm0, [rsp + 7*16 + 0*256] |
---|
473 | pxor xmm4, [rsp + 4*16 + 0*256] |
---|
474 | pxor xmm8, [rsp + 5*16 + 0*256] |
---|
475 | pxor xmm12, [rsp + 6*16 + 0*256] |
---|
476 | pxor xmm0, xmm1 |
---|
477 | pxor xmm4, xmm5 |
---|
478 | pxor xmm8, xmm9 |
---|
479 | pxor xmm12, xmm13 |
---|
480 | movdqa [rsp + 7*16], xmm0 |
---|
481 | movdqa [rsp + 4*16], xmm4 |
---|
482 | movdqa [rsp + 5*16], xmm8 |
---|
483 | movdqa [rsp + 6*16], xmm12 |
---|
484 | paddd xmm0, xmm3 |
---|
485 | paddd xmm4, xmm7 |
---|
486 | paddd xmm8, xmm11 |
---|
487 | paddd xmm12, xmm15 |
---|
488 | movdqa xmm3, xmm0 |
---|
489 | movdqa xmm7, xmm4 |
---|
490 | movdqa xmm11, xmm8 |
---|
491 | movdqa xmm15, xmm12 |
---|
492 | pslld xmm0, 18 |
---|
493 | pslld xmm4, 18 |
---|
494 | pslld xmm8, 18 |
---|
495 | pslld xmm12, 18 |
---|
496 | psrld xmm3, 32-18 |
---|
497 | psrld xmm7, 32-18 |
---|
498 | psrld xmm11, 32-18 |
---|
499 | psrld xmm15, 32-18 |
---|
500 | pxor xmm0, xmm2 |
---|
501 | pxor xmm4, xmm6 |
---|
502 | pxor xmm8, xmm10 |
---|
503 | pxor xmm12, xmm14 |
---|
504 | pxor xmm0, xmm3 |
---|
505 | pxor xmm4, xmm7 |
---|
506 | pxor xmm8, xmm11 |
---|
507 | pxor xmm12, xmm15 |
---|
508 | movdqa [rsp + 0*16], xmm0 |
---|
509 | movdqa [rsp + 1*16], xmm4 |
---|
510 | movdqa [rsp + 2*16], xmm8 |
---|
511 | movdqa [rsp + 3*16], xmm12 |
---|
512 | sub eax, 2 |
---|
513 | jnz label6 |
---|
514 | movdqa xmm4, [rsp + 0*16 + 256] |
---|
515 | paddd xmm4, [rsp + 0*16] |
---|
516 | movdqa xmm5, [rsp + 13*16 + 256] |
---|
517 | paddd xmm5, [rsp + 13*16] |
---|
518 | movdqa xmm6, [rsp + 10*16 + 256] |
---|
519 | paddd xmm6, [rsp + 10*16] |
---|
520 | movdqa xmm7, [rsp + 7*16 + 256] |
---|
521 | paddd xmm7, [rsp + 7*16] |
---|
522 | call labelSSE2_Salsa_Output |
---|
523 | movdqa xmm4, [rsp + 4*16 + 256] |
---|
524 | paddd xmm4, [rsp + 4*16] |
---|
525 | movdqa xmm5, [rsp + 1*16 + 256] |
---|
526 | paddd xmm5, [rsp + 1*16] |
---|
527 | movdqa xmm6, [rsp + 14*16 + 256] |
---|
528 | paddd xmm6, [rsp + 14*16] |
---|
529 | movdqa xmm7, [rsp + 11*16 + 256] |
---|
530 | paddd xmm7, [rsp + 11*16] |
---|
531 | call labelSSE2_Salsa_Output |
---|
532 | movdqa xmm4, [rsp + 8*16 + 256] |
---|
533 | paddd xmm4, [rsp + 8*16] |
---|
534 | movdqa xmm5, [rsp + 5*16 + 256] |
---|
535 | paddd xmm5, [rsp + 5*16] |
---|
536 | movdqa xmm6, [rsp + 2*16 + 256] |
---|
537 | paddd xmm6, [rsp + 2*16] |
---|
538 | movdqa xmm7, [rsp + 15*16 + 256] |
---|
539 | paddd xmm7, [rsp + 15*16] |
---|
540 | call labelSSE2_Salsa_Output |
---|
541 | movdqa xmm4, [rsp + 12*16 + 256] |
---|
542 | paddd xmm4, [rsp + 12*16] |
---|
543 | movdqa xmm5, [rsp + 9*16 + 256] |
---|
544 | paddd xmm5, [rsp + 9*16] |
---|
545 | movdqa xmm6, [rsp + 6*16 + 256] |
---|
546 | paddd xmm6, [rsp + 6*16] |
---|
547 | movdqa xmm7, [rsp + 3*16 + 256] |
---|
548 | paddd xmm7, [rsp + 3*16] |
---|
549 | call labelSSE2_Salsa_Output |
---|
550 | test rdx, rdx |
---|
551 | jz label9 |
---|
552 | add rdx, 12*16 |
---|
553 | label9: |
---|
554 | add rcx, 12*16 |
---|
555 | sub r8, 4 |
---|
556 | cmp r8, 4 |
---|
557 | jge label1 |
---|
558 | label5: |
---|
559 | sub r8, 1 |
---|
560 | jl label4 |
---|
561 | movdqa xmm0, [r10 + 0*16] |
---|
562 | movdqa xmm1, [r10 + 1*16] |
---|
563 | movdqa xmm2, [r10 + 2*16] |
---|
564 | movdqa xmm3, [r10 + 3*16] |
---|
565 | mov rax, r9 |
---|
566 | label0: |
---|
567 | movdqa xmm4, xmm3 |
---|
568 | paddd xmm4, xmm0 |
---|
569 | movdqa xmm5, xmm4 |
---|
570 | pslld xmm4, 7 |
---|
571 | psrld xmm5, 32-7 |
---|
572 | pxor xmm1, xmm4 |
---|
573 | pxor xmm1, xmm5 |
---|
574 | movdqa xmm4, xmm0 |
---|
575 | paddd xmm4, xmm1 |
---|
576 | movdqa xmm5, xmm4 |
---|
577 | pslld xmm4, 9 |
---|
578 | psrld xmm5, 32-9 |
---|
579 | pxor xmm2, xmm4 |
---|
580 | pxor xmm2, xmm5 |
---|
581 | movdqa xmm4, xmm1 |
---|
582 | paddd xmm4, xmm2 |
---|
583 | movdqa xmm5, xmm4 |
---|
584 | pslld xmm4, 13 |
---|
585 | psrld xmm5, 32-13 |
---|
586 | pxor xmm3, xmm4 |
---|
587 | pxor xmm3, xmm5 |
---|
588 | movdqa xmm4, xmm2 |
---|
589 | paddd xmm4, xmm3 |
---|
590 | movdqa xmm5, xmm4 |
---|
591 | pslld xmm4, 18 |
---|
592 | psrld xmm5, 32-18 |
---|
593 | pxor xmm0, xmm4 |
---|
594 | pxor xmm0, xmm5 |
---|
595 | pshufd xmm1, xmm1, 2*64+1*16+0*4+3 |
---|
596 | pshufd xmm2, xmm2, 1*64+0*16+3*4+2 |
---|
597 | pshufd xmm3, xmm3, 0*64+3*16+2*4+1 |
---|
598 | movdqa xmm4, xmm1 |
---|
599 | paddd xmm4, xmm0 |
---|
600 | movdqa xmm5, xmm4 |
---|
601 | pslld xmm4, 7 |
---|
602 | psrld xmm5, 32-7 |
---|
603 | pxor xmm3, xmm4 |
---|
604 | pxor xmm3, xmm5 |
---|
605 | movdqa xmm4, xmm0 |
---|
606 | paddd xmm4, xmm3 |
---|
607 | movdqa xmm5, xmm4 |
---|
608 | pslld xmm4, 9 |
---|
609 | psrld xmm5, 32-9 |
---|
610 | pxor xmm2, xmm4 |
---|
611 | pxor xmm2, xmm5 |
---|
612 | movdqa xmm4, xmm3 |
---|
613 | paddd xmm4, xmm2 |
---|
614 | movdqa xmm5, xmm4 |
---|
615 | pslld xmm4, 13 |
---|
616 | psrld xmm5, 32-13 |
---|
617 | pxor xmm1, xmm4 |
---|
618 | pxor xmm1, xmm5 |
---|
619 | movdqa xmm4, xmm2 |
---|
620 | paddd xmm4, xmm1 |
---|
621 | movdqa xmm5, xmm4 |
---|
622 | pslld xmm4, 18 |
---|
623 | psrld xmm5, 32-18 |
---|
624 | pxor xmm0, xmm4 |
---|
625 | pxor xmm0, xmm5 |
---|
626 | pshufd xmm1, xmm1, 0*64+3*16+2*4+1 |
---|
627 | pshufd xmm2, xmm2, 1*64+0*16+3*4+2 |
---|
628 | pshufd xmm3, xmm3, 2*64+1*16+0*4+3 |
---|
629 | sub eax, 2 |
---|
630 | jnz label0 |
---|
631 | paddd xmm0, [r10 + 0*16] |
---|
632 | paddd xmm1, [r10 + 1*16] |
---|
633 | paddd xmm2, [r10 + 2*16] |
---|
634 | paddd xmm3, [r10 + 3*16] |
---|
635 | add dword ptr [r10 + 8*4], 1 |
---|
636 | adc dword ptr [r10 + 5*4], 0 |
---|
637 | pcmpeqb xmm6, xmm6 |
---|
638 | psrlq xmm6, 32 |
---|
639 | pshufd xmm7, xmm6, 0*64+1*16+2*4+3 |
---|
640 | movdqa xmm4, xmm0 |
---|
641 | movdqa xmm5, xmm3 |
---|
642 | pand xmm0, xmm7 |
---|
643 | pand xmm4, xmm6 |
---|
644 | pand xmm3, xmm6 |
---|
645 | pand xmm5, xmm7 |
---|
646 | por xmm4, xmm5 |
---|
647 | movdqa xmm5, xmm1 |
---|
648 | pand xmm1, xmm7 |
---|
649 | pand xmm5, xmm6 |
---|
650 | por xmm0, xmm5 |
---|
651 | pand xmm6, xmm2 |
---|
652 | pand xmm2, xmm7 |
---|
653 | por xmm1, xmm6 |
---|
654 | por xmm2, xmm3 |
---|
655 | movdqa xmm5, xmm4 |
---|
656 | movdqa xmm6, xmm0 |
---|
657 | shufpd xmm4, xmm1, 2 |
---|
658 | shufpd xmm0, xmm2, 2 |
---|
659 | shufpd xmm1, xmm5, 2 |
---|
660 | shufpd xmm2, xmm6, 2 |
---|
661 | test rdx, rdx |
---|
662 | jz labelSSE2_Salsa_Output_B3 |
---|
663 | test rdx, 15 |
---|
664 | jnz labelSSE2_Salsa_Output_B7 |
---|
665 | pxor xmm4, [rdx+0*16] |
---|
666 | pxor xmm0, [rdx+1*16] |
---|
667 | pxor xmm1, [rdx+2*16] |
---|
668 | pxor xmm2, [rdx+3*16] |
---|
669 | add rdx, 4*16 |
---|
670 | jmp labelSSE2_Salsa_Output_B3 |
---|
671 | labelSSE2_Salsa_Output_B7: |
---|
672 | movdqu xmm3, [rdx+0*16] |
---|
673 | pxor xmm4, xmm3 |
---|
674 | movdqu xmm3, [rdx+1*16] |
---|
675 | pxor xmm0, xmm3 |
---|
676 | movdqu xmm3, [rdx+2*16] |
---|
677 | pxor xmm1, xmm3 |
---|
678 | movdqu xmm3, [rdx+3*16] |
---|
679 | pxor xmm2, xmm3 |
---|
680 | add rdx, 4*16 |
---|
681 | labelSSE2_Salsa_Output_B3: |
---|
682 | test rcx, 15 |
---|
683 | jnz labelSSE2_Salsa_Output_B8 |
---|
684 | movdqa [rcx+0*16], xmm4 |
---|
685 | movdqa [rcx+1*16], xmm0 |
---|
686 | movdqa [rcx+2*16], xmm1 |
---|
687 | movdqa [rcx+3*16], xmm2 |
---|
688 | jmp labelSSE2_Salsa_Output_B9 |
---|
689 | labelSSE2_Salsa_Output_B8: |
---|
690 | movdqu [rcx+0*16], xmm4 |
---|
691 | movdqu [rcx+1*16], xmm0 |
---|
692 | movdqu [rcx+2*16], xmm1 |
---|
693 | movdqu [rcx+3*16], xmm2 |
---|
694 | labelSSE2_Salsa_Output_B9: |
---|
695 | add rcx, 4*16 |
---|
696 | jmp label5 |
---|
697 | label4: |
---|
698 | movdqa xmm6, [rsp + 0200h] |
---|
699 | movdqa xmm7, [rsp + 0210h] |
---|
700 | movdqa xmm8, [rsp + 0220h] |
---|
701 | movdqa xmm9, [rsp + 0230h] |
---|
702 | movdqa xmm10, [rsp + 0240h] |
---|
703 | movdqa xmm11, [rsp + 0250h] |
---|
704 | movdqa xmm12, [rsp + 0260h] |
---|
705 | movdqa xmm13, [rsp + 0270h] |
---|
706 | movdqa xmm14, [rsp + 0280h] |
---|
707 | movdqa xmm15, [rsp + 0290h] |
---|
708 | add rsp, 10*16 + 32*16 + 8 |
---|
709 | ret |
---|
710 | Salsa20_OperateKeystream ENDP |
---|
711 | |
---|
712 | ALIGN 8 |
---|
713 | Sosemanuk_OperateKeystream PROC FRAME |
---|
714 | rex_push_reg rsi |
---|
715 | push_reg rdi |
---|
716 | alloc_stack(80*4*2+12*4+8*8 + 2*16+8) |
---|
717 | save_xmm128 xmm6, 02f0h |
---|
718 | save_xmm128 xmm7, 0300h |
---|
719 | .endprolog |
---|
720 | mov rdi, r8 |
---|
721 | mov rax, r9 |
---|
722 | mov QWORD PTR [rsp+1*8], rdi |
---|
723 | mov QWORD PTR [rsp+2*8], rdx |
---|
724 | mov QWORD PTR [rsp+6*8], rax |
---|
725 | lea rcx, [4*rcx+rcx] |
---|
726 | lea rsi, [4*rcx] |
---|
727 | mov QWORD PTR [rsp+3*8], rsi |
---|
728 | movdqa xmm0, [rax+0*16] |
---|
729 | movdqa [rsp + 8*8+0*16], xmm0 |
---|
730 | movdqa xmm0, [rax+1*16] |
---|
731 | movdqa [rsp + 8*8+1*16], xmm0 |
---|
732 | movq xmm0, QWORD PTR [rax+2*16] |
---|
733 | movq QWORD PTR [rsp + 8*8+2*16], xmm0 |
---|
734 | psrlq xmm0, 32 |
---|
735 | movd r10d, xmm0 |
---|
736 | mov ecx, [rax+10*4] |
---|
737 | mov edx, [rax+11*4] |
---|
738 | pcmpeqb xmm7, xmm7 |
---|
739 | label2: |
---|
740 | lea rdi, [rsp + 8*8 + 12*4] |
---|
741 | mov rax, 80 |
---|
742 | cmp rsi, 80 |
---|
743 | cmovg rsi, rax |
---|
744 | mov QWORD PTR [rsp+7*8], rsi |
---|
745 | lea rsi, [rdi+rsi] |
---|
746 | mov QWORD PTR [rsp+4*8], rsi |
---|
747 | lea rsi, s_sosemanukMulTables |
---|
748 | label0: |
---|
749 | mov eax, [rsp + 8*8 + ((0+0)-((0+0)/(10))*(10))*4] |
---|
750 | mov [rdi + (((0)-((0)/(4))*(4))*20 + (0/4)) * 4 + 80*4], eax |
---|
751 | rol eax, 8 |
---|
752 | lea r11d, [r10d + edx] |
---|
753 | xor r11d, ecx |
---|
754 | mov [rdi + (((0)-((0)/(4))*(4))*20 + (0/4)) * 4], r11d |
---|
755 | mov r11d, 1 |
---|
756 | and r11d, edx |
---|
757 | neg r11d |
---|
758 | and r11d, r10d |
---|
759 | xor r10d, eax |
---|
760 | movzx eax, al |
---|
761 | xor r10d, [rsi+rax*4] |
---|
762 | mov eax, [rsp + 8*8 + ((0+3)-((0+3)/(10))*(10))*4] |
---|
763 | xor r11d, [rsp + 8*8 + ((0+2)-((0+2)/(10))*(10))*4] |
---|
764 | add ecx, r11d |
---|
765 | movzx r11d, al |
---|
766 | shr eax, 8 |
---|
767 | xor r10d, [rsi+1024+r11*4] |
---|
768 | xor r10d, eax |
---|
769 | imul edx, 54655307h |
---|
770 | rol edx, 7 |
---|
771 | mov [rsp + 8*8 + ((0+0)-((0+0)/(10))*(10))*4], r10d |
---|
772 | mov eax, [rsp + 8*8 + ((1+0)-((1+0)/(10))*(10))*4] |
---|
773 | mov [rdi + (((1)-((1)/(4))*(4))*20 + (1/4)) * 4 + 80*4], eax |
---|
774 | rol eax, 8 |
---|
775 | lea r11d, [r10d + ecx] |
---|
776 | xor r11d, edx |
---|
777 | mov [rdi + (((1)-((1)/(4))*(4))*20 + (1/4)) * 4], r11d |
---|
778 | mov r11d, 1 |
---|
779 | and r11d, ecx |
---|
780 | neg r11d |
---|
781 | and r11d, r10d |
---|
782 | xor r10d, eax |
---|
783 | movzx eax, al |
---|
784 | xor r10d, [rsi+rax*4] |
---|
785 | mov eax, [rsp + 8*8 + ((1+3)-((1+3)/(10))*(10))*4] |
---|
786 | xor r11d, [rsp + 8*8 + ((1+2)-((1+2)/(10))*(10))*4] |
---|
787 | add edx, r11d |
---|
788 | movzx r11d, al |
---|
789 | shr eax, 8 |
---|
790 | xor r10d, [rsi+1024+r11*4] |
---|
791 | xor r10d, eax |
---|
792 | imul ecx, 54655307h |
---|
793 | rol ecx, 7 |
---|
794 | mov [rsp + 8*8 + ((1+0)-((1+0)/(10))*(10))*4], r10d |
---|
795 | mov eax, [rsp + 8*8 + ((2+0)-((2+0)/(10))*(10))*4] |
---|
796 | mov [rdi + (((2)-((2)/(4))*(4))*20 + (2/4)) * 4 + 80*4], eax |
---|
797 | rol eax, 8 |
---|
798 | lea r11d, [r10d + edx] |
---|
799 | xor r11d, ecx |
---|
800 | mov [rdi + (((2)-((2)/(4))*(4))*20 + (2/4)) * 4], r11d |
---|
801 | mov r11d, 1 |
---|
802 | and r11d, edx |
---|
803 | neg r11d |
---|
804 | and r11d, r10d |
---|
805 | xor r10d, eax |
---|
806 | movzx eax, al |
---|
807 | xor r10d, [rsi+rax*4] |
---|
808 | mov eax, [rsp + 8*8 + ((2+3)-((2+3)/(10))*(10))*4] |
---|
809 | xor r11d, [rsp + 8*8 + ((2+2)-((2+2)/(10))*(10))*4] |
---|
810 | add ecx, r11d |
---|
811 | movzx r11d, al |
---|
812 | shr eax, 8 |
---|
813 | xor r10d, [rsi+1024+r11*4] |
---|
814 | xor r10d, eax |
---|
815 | imul edx, 54655307h |
---|
816 | rol edx, 7 |
---|
817 | mov [rsp + 8*8 + ((2+0)-((2+0)/(10))*(10))*4], r10d |
---|
818 | mov eax, [rsp + 8*8 + ((3+0)-((3+0)/(10))*(10))*4] |
---|
819 | mov [rdi + (((3)-((3)/(4))*(4))*20 + (3/4)) * 4 + 80*4], eax |
---|
820 | rol eax, 8 |
---|
821 | lea r11d, [r10d + ecx] |
---|
822 | xor r11d, edx |
---|
823 | mov [rdi + (((3)-((3)/(4))*(4))*20 + (3/4)) * 4], r11d |
---|
824 | mov r11d, 1 |
---|
825 | and r11d, ecx |
---|
826 | neg r11d |
---|
827 | and r11d, r10d |
---|
828 | xor r10d, eax |
---|
829 | movzx eax, al |
---|
830 | xor r10d, [rsi+rax*4] |
---|
831 | mov eax, [rsp + 8*8 + ((3+3)-((3+3)/(10))*(10))*4] |
---|
832 | xor r11d, [rsp + 8*8 + ((3+2)-((3+2)/(10))*(10))*4] |
---|
833 | add edx, r11d |
---|
834 | movzx r11d, al |
---|
835 | shr eax, 8 |
---|
836 | xor r10d, [rsi+1024+r11*4] |
---|
837 | xor r10d, eax |
---|
838 | imul ecx, 54655307h |
---|
839 | rol ecx, 7 |
---|
840 | mov [rsp + 8*8 + ((3+0)-((3+0)/(10))*(10))*4], r10d |
---|
841 | mov eax, [rsp + 8*8 + ((4+0)-((4+0)/(10))*(10))*4] |
---|
842 | mov [rdi + (((4)-((4)/(4))*(4))*20 + (4/4)) * 4 + 80*4], eax |
---|
843 | rol eax, 8 |
---|
844 | lea r11d, [r10d + edx] |
---|
845 | xor r11d, ecx |
---|
846 | mov [rdi + (((4)-((4)/(4))*(4))*20 + (4/4)) * 4], r11d |
---|
847 | mov r11d, 1 |
---|
848 | and r11d, edx |
---|
849 | neg r11d |
---|
850 | and r11d, r10d |
---|
851 | xor r10d, eax |
---|
852 | movzx eax, al |
---|
853 | xor r10d, [rsi+rax*4] |
---|
854 | mov eax, [rsp + 8*8 + ((4+3)-((4+3)/(10))*(10))*4] |
---|
855 | xor r11d, [rsp + 8*8 + ((4+2)-((4+2)/(10))*(10))*4] |
---|
856 | add ecx, r11d |
---|
857 | movzx r11d, al |
---|
858 | shr eax, 8 |
---|
859 | xor r10d, [rsi+1024+r11*4] |
---|
860 | xor r10d, eax |
---|
861 | imul edx, 54655307h |
---|
862 | rol edx, 7 |
---|
863 | mov [rsp + 8*8 + ((4+0)-((4+0)/(10))*(10))*4], r10d |
---|
864 | mov eax, [rsp + 8*8 + ((5+0)-((5+0)/(10))*(10))*4] |
---|
865 | mov [rdi + (((5)-((5)/(4))*(4))*20 + (5/4)) * 4 + 80*4], eax |
---|
866 | rol eax, 8 |
---|
867 | lea r11d, [r10d + ecx] |
---|
868 | xor r11d, edx |
---|
869 | mov [rdi + (((5)-((5)/(4))*(4))*20 + (5/4)) * 4], r11d |
---|
870 | mov r11d, 1 |
---|
871 | and r11d, ecx |
---|
872 | neg r11d |
---|
873 | and r11d, r10d |
---|
874 | xor r10d, eax |
---|
875 | movzx eax, al |
---|
876 | xor r10d, [rsi+rax*4] |
---|
877 | mov eax, [rsp + 8*8 + ((5+3)-((5+3)/(10))*(10))*4] |
---|
878 | xor r11d, [rsp + 8*8 + ((5+2)-((5+2)/(10))*(10))*4] |
---|
879 | add edx, r11d |
---|
880 | movzx r11d, al |
---|
881 | shr eax, 8 |
---|
882 | xor r10d, [rsi+1024+r11*4] |
---|
883 | xor r10d, eax |
---|
884 | imul ecx, 54655307h |
---|
885 | rol ecx, 7 |
---|
886 | mov [rsp + 8*8 + ((5+0)-((5+0)/(10))*(10))*4], r10d |
---|
887 | mov eax, [rsp + 8*8 + ((6+0)-((6+0)/(10))*(10))*4] |
---|
888 | mov [rdi + (((6)-((6)/(4))*(4))*20 + (6/4)) * 4 + 80*4], eax |
---|
889 | rol eax, 8 |
---|
890 | lea r11d, [r10d + edx] |
---|
891 | xor r11d, ecx |
---|
892 | mov [rdi + (((6)-((6)/(4))*(4))*20 + (6/4)) * 4], r11d |
---|
893 | mov r11d, 1 |
---|
894 | and r11d, edx |
---|
895 | neg r11d |
---|
896 | and r11d, r10d |
---|
897 | xor r10d, eax |
---|
898 | movzx eax, al |
---|
899 | xor r10d, [rsi+rax*4] |
---|
900 | mov eax, [rsp + 8*8 + ((6+3)-((6+3)/(10))*(10))*4] |
---|
901 | xor r11d, [rsp + 8*8 + ((6+2)-((6+2)/(10))*(10))*4] |
---|
902 | add ecx, r11d |
---|
903 | movzx r11d, al |
---|
904 | shr eax, 8 |
---|
905 | xor r10d, [rsi+1024+r11*4] |
---|
906 | xor r10d, eax |
---|
907 | imul edx, 54655307h |
---|
908 | rol edx, 7 |
---|
909 | mov [rsp + 8*8 + ((6+0)-((6+0)/(10))*(10))*4], r10d |
---|
910 | mov eax, [rsp + 8*8 + ((7+0)-((7+0)/(10))*(10))*4] |
---|
911 | mov [rdi + (((7)-((7)/(4))*(4))*20 + (7/4)) * 4 + 80*4], eax |
---|
912 | rol eax, 8 |
---|
913 | lea r11d, [r10d + ecx] |
---|
914 | xor r11d, edx |
---|
915 | mov [rdi + (((7)-((7)/(4))*(4))*20 + (7/4)) * 4], r11d |
---|
916 | mov r11d, 1 |
---|
917 | and r11d, ecx |
---|
918 | neg r11d |
---|
919 | and r11d, r10d |
---|
920 | xor r10d, eax |
---|
921 | movzx eax, al |
---|
922 | xor r10d, [rsi+rax*4] |
---|
923 | mov eax, [rsp + 8*8 + ((7+3)-((7+3)/(10))*(10))*4] |
---|
924 | xor r11d, [rsp + 8*8 + ((7+2)-((7+2)/(10))*(10))*4] |
---|
925 | add edx, r11d |
---|
926 | movzx r11d, al |
---|
927 | shr eax, 8 |
---|
928 | xor r10d, [rsi+1024+r11*4] |
---|
929 | xor r10d, eax |
---|
930 | imul ecx, 54655307h |
---|
931 | rol ecx, 7 |
---|
932 | mov [rsp + 8*8 + ((7+0)-((7+0)/(10))*(10))*4], r10d |
---|
933 | mov eax, [rsp + 8*8 + ((8+0)-((8+0)/(10))*(10))*4] |
---|
934 | mov [rdi + (((8)-((8)/(4))*(4))*20 + (8/4)) * 4 + 80*4], eax |
---|
935 | rol eax, 8 |
---|
936 | lea r11d, [r10d + edx] |
---|
937 | xor r11d, ecx |
---|
938 | mov [rdi + (((8)-((8)/(4))*(4))*20 + (8/4)) * 4], r11d |
---|
939 | mov r11d, 1 |
---|
940 | and r11d, edx |
---|
941 | neg r11d |
---|
942 | and r11d, r10d |
---|
943 | xor r10d, eax |
---|
944 | movzx eax, al |
---|
945 | xor r10d, [rsi+rax*4] |
---|
946 | mov eax, [rsp + 8*8 + ((8+3)-((8+3)/(10))*(10))*4] |
---|
947 | xor r11d, [rsp + 8*8 + ((8+2)-((8+2)/(10))*(10))*4] |
---|
948 | add ecx, r11d |
---|
949 | movzx r11d, al |
---|
950 | shr eax, 8 |
---|
951 | xor r10d, [rsi+1024+r11*4] |
---|
952 | xor r10d, eax |
---|
953 | imul edx, 54655307h |
---|
954 | rol edx, 7 |
---|
955 | mov [rsp + 8*8 + ((8+0)-((8+0)/(10))*(10))*4], r10d |
---|
956 | mov eax, [rsp + 8*8 + ((9+0)-((9+0)/(10))*(10))*4] |
---|
957 | mov [rdi + (((9)-((9)/(4))*(4))*20 + (9/4)) * 4 + 80*4], eax |
---|
958 | rol eax, 8 |
---|
959 | lea r11d, [r10d + ecx] |
---|
960 | xor r11d, edx |
---|
961 | mov [rdi + (((9)-((9)/(4))*(4))*20 + (9/4)) * 4], r11d |
---|
962 | mov r11d, 1 |
---|
963 | and r11d, ecx |
---|
964 | neg r11d |
---|
965 | and r11d, r10d |
---|
966 | xor r10d, eax |
---|
967 | movzx eax, al |
---|
968 | xor r10d, [rsi+rax*4] |
---|
969 | mov eax, [rsp + 8*8 + ((9+3)-((9+3)/(10))*(10))*4] |
---|
970 | xor r11d, [rsp + 8*8 + ((9+2)-((9+2)/(10))*(10))*4] |
---|
971 | add edx, r11d |
---|
972 | movzx r11d, al |
---|
973 | shr eax, 8 |
---|
974 | xor r10d, [rsi+1024+r11*4] |
---|
975 | xor r10d, eax |
---|
976 | imul ecx, 54655307h |
---|
977 | rol ecx, 7 |
---|
978 | mov [rsp + 8*8 + ((9+0)-((9+0)/(10))*(10))*4], r10d |
---|
979 | mov eax, [rsp + 8*8 + ((10+0)-((10+0)/(10))*(10))*4] |
---|
980 | mov [rdi + (((10)-((10)/(4))*(4))*20 + (10/4)) * 4 + 80*4], eax |
---|
981 | rol eax, 8 |
---|
982 | lea r11d, [r10d + edx] |
---|
983 | xor r11d, ecx |
---|
984 | mov [rdi + (((10)-((10)/(4))*(4))*20 + (10/4)) * 4], r11d |
---|
985 | mov r11d, 1 |
---|
986 | and r11d, edx |
---|
987 | neg r11d |
---|
988 | and r11d, r10d |
---|
989 | xor r10d, eax |
---|
990 | movzx eax, al |
---|
991 | xor r10d, [rsi+rax*4] |
---|
992 | mov eax, [rsp + 8*8 + ((10+3)-((10+3)/(10))*(10))*4] |
---|
993 | xor r11d, [rsp + 8*8 + ((10+2)-((10+2)/(10))*(10))*4] |
---|
994 | add ecx, r11d |
---|
995 | movzx r11d, al |
---|
996 | shr eax, 8 |
---|
997 | xor r10d, [rsi+1024+r11*4] |
---|
998 | xor r10d, eax |
---|
999 | imul edx, 54655307h |
---|
1000 | rol edx, 7 |
---|
1001 | mov [rsp + 8*8 + ((10+0)-((10+0)/(10))*(10))*4], r10d |
---|
1002 | mov eax, [rsp + 8*8 + ((11+0)-((11+0)/(10))*(10))*4] |
---|
1003 | mov [rdi + (((11)-((11)/(4))*(4))*20 + (11/4)) * 4 + 80*4], eax |
---|
1004 | rol eax, 8 |
---|
1005 | lea r11d, [r10d + ecx] |
---|
1006 | xor r11d, edx |
---|
1007 | mov [rdi + (((11)-((11)/(4))*(4))*20 + (11/4)) * 4], r11d |
---|
1008 | mov r11d, 1 |
---|
1009 | and r11d, ecx |
---|
1010 | neg r11d |
---|
1011 | and r11d, r10d |
---|
1012 | xor r10d, eax |
---|
1013 | movzx eax, al |
---|
1014 | xor r10d, [rsi+rax*4] |
---|
1015 | mov eax, [rsp + 8*8 + ((11+3)-((11+3)/(10))*(10))*4] |
---|
1016 | xor r11d, [rsp + 8*8 + ((11+2)-((11+2)/(10))*(10))*4] |
---|
1017 | add edx, r11d |
---|
1018 | movzx r11d, al |
---|
1019 | shr eax, 8 |
---|
1020 | xor r10d, [rsi+1024+r11*4] |
---|
1021 | xor r10d, eax |
---|
1022 | imul ecx, 54655307h |
---|
1023 | rol ecx, 7 |
---|
1024 | mov [rsp + 8*8 + ((11+0)-((11+0)/(10))*(10))*4], r10d |
---|
1025 | mov eax, [rsp + 8*8 + ((12+0)-((12+0)/(10))*(10))*4] |
---|
1026 | mov [rdi + (((12)-((12)/(4))*(4))*20 + (12/4)) * 4 + 80*4], eax |
---|
1027 | rol eax, 8 |
---|
1028 | lea r11d, [r10d + edx] |
---|
1029 | xor r11d, ecx |
---|
1030 | mov [rdi + (((12)-((12)/(4))*(4))*20 + (12/4)) * 4], r11d |
---|
1031 | mov r11d, 1 |
---|
1032 | and r11d, edx |
---|
1033 | neg r11d |
---|
1034 | and r11d, r10d |
---|
1035 | xor r10d, eax |
---|
1036 | movzx eax, al |
---|
1037 | xor r10d, [rsi+rax*4] |
---|
1038 | mov eax, [rsp + 8*8 + ((12+3)-((12+3)/(10))*(10))*4] |
---|
1039 | xor r11d, [rsp + 8*8 + ((12+2)-((12+2)/(10))*(10))*4] |
---|
1040 | add ecx, r11d |
---|
1041 | movzx r11d, al |
---|
1042 | shr eax, 8 |
---|
1043 | xor r10d, [rsi+1024+r11*4] |
---|
1044 | xor r10d, eax |
---|
1045 | imul edx, 54655307h |
---|
1046 | rol edx, 7 |
---|
1047 | mov [rsp + 8*8 + ((12+0)-((12+0)/(10))*(10))*4], r10d |
---|
1048 | mov eax, [rsp + 8*8 + ((13+0)-((13+0)/(10))*(10))*4] |
---|
1049 | mov [rdi + (((13)-((13)/(4))*(4))*20 + (13/4)) * 4 + 80*4], eax |
---|
1050 | rol eax, 8 |
---|
1051 | lea r11d, [r10d + ecx] |
---|
1052 | xor r11d, edx |
---|
1053 | mov [rdi + (((13)-((13)/(4))*(4))*20 + (13/4)) * 4], r11d |
---|
1054 | mov r11d, 1 |
---|
1055 | and r11d, ecx |
---|
1056 | neg r11d |
---|
1057 | and r11d, r10d |
---|
1058 | xor r10d, eax |
---|
1059 | movzx eax, al |
---|
1060 | xor r10d, [rsi+rax*4] |
---|
1061 | mov eax, [rsp + 8*8 + ((13+3)-((13+3)/(10))*(10))*4] |
---|
1062 | xor r11d, [rsp + 8*8 + ((13+2)-((13+2)/(10))*(10))*4] |
---|
1063 | add edx, r11d |
---|
1064 | movzx r11d, al |
---|
1065 | shr eax, 8 |
---|
1066 | xor r10d, [rsi+1024+r11*4] |
---|
1067 | xor r10d, eax |
---|
1068 | imul ecx, 54655307h |
---|
1069 | rol ecx, 7 |
---|
1070 | mov [rsp + 8*8 + ((13+0)-((13+0)/(10))*(10))*4], r10d |
---|
1071 | mov eax, [rsp + 8*8 + ((14+0)-((14+0)/(10))*(10))*4] |
---|
1072 | mov [rdi + (((14)-((14)/(4))*(4))*20 + (14/4)) * 4 + 80*4], eax |
---|
1073 | rol eax, 8 |
---|
1074 | lea r11d, [r10d + edx] |
---|
1075 | xor r11d, ecx |
---|
1076 | mov [rdi + (((14)-((14)/(4))*(4))*20 + (14/4)) * 4], r11d |
---|
1077 | mov r11d, 1 |
---|
1078 | and r11d, edx |
---|
1079 | neg r11d |
---|
1080 | and r11d, r10d |
---|
1081 | xor r10d, eax |
---|
1082 | movzx eax, al |
---|
1083 | xor r10d, [rsi+rax*4] |
---|
1084 | mov eax, [rsp + 8*8 + ((14+3)-((14+3)/(10))*(10))*4] |
---|
1085 | xor r11d, [rsp + 8*8 + ((14+2)-((14+2)/(10))*(10))*4] |
---|
1086 | add ecx, r11d |
---|
1087 | movzx r11d, al |
---|
1088 | shr eax, 8 |
---|
1089 | xor r10d, [rsi+1024+r11*4] |
---|
1090 | xor r10d, eax |
---|
1091 | imul edx, 54655307h |
---|
1092 | rol edx, 7 |
---|
1093 | mov [rsp + 8*8 + ((14+0)-((14+0)/(10))*(10))*4], r10d |
---|
1094 | mov eax, [rsp + 8*8 + ((15+0)-((15+0)/(10))*(10))*4] |
---|
1095 | mov [rdi + (((15)-((15)/(4))*(4))*20 + (15/4)) * 4 + 80*4], eax |
---|
1096 | rol eax, 8 |
---|
1097 | lea r11d, [r10d + ecx] |
---|
1098 | xor r11d, edx |
---|
1099 | mov [rdi + (((15)-((15)/(4))*(4))*20 + (15/4)) * 4], r11d |
---|
1100 | mov r11d, 1 |
---|
1101 | and r11d, ecx |
---|
1102 | neg r11d |
---|
1103 | and r11d, r10d |
---|
1104 | xor r10d, eax |
---|
1105 | movzx eax, al |
---|
1106 | xor r10d, [rsi+rax*4] |
---|
1107 | mov eax, [rsp + 8*8 + ((15+3)-((15+3)/(10))*(10))*4] |
---|
1108 | xor r11d, [rsp + 8*8 + ((15+2)-((15+2)/(10))*(10))*4] |
---|
1109 | add edx, r11d |
---|
1110 | movzx r11d, al |
---|
1111 | shr eax, 8 |
---|
1112 | xor r10d, [rsi+1024+r11*4] |
---|
1113 | xor r10d, eax |
---|
1114 | imul ecx, 54655307h |
---|
1115 | rol ecx, 7 |
---|
1116 | mov [rsp + 8*8 + ((15+0)-((15+0)/(10))*(10))*4], r10d |
---|
1117 | mov eax, [rsp + 8*8 + ((16+0)-((16+0)/(10))*(10))*4] |
---|
1118 | mov [rdi + (((16)-((16)/(4))*(4))*20 + (16/4)) * 4 + 80*4], eax |
---|
1119 | rol eax, 8 |
---|
1120 | lea r11d, [r10d + edx] |
---|
1121 | xor r11d, ecx |
---|
1122 | mov [rdi + (((16)-((16)/(4))*(4))*20 + (16/4)) * 4], r11d |
---|
1123 | mov r11d, 1 |
---|
1124 | and r11d, edx |
---|
1125 | neg r11d |
---|
1126 | and r11d, r10d |
---|
1127 | xor r10d, eax |
---|
1128 | movzx eax, al |
---|
1129 | xor r10d, [rsi+rax*4] |
---|
1130 | mov eax, [rsp + 8*8 + ((16+3)-((16+3)/(10))*(10))*4] |
---|
1131 | xor r11d, [rsp + 8*8 + ((16+2)-((16+2)/(10))*(10))*4] |
---|
1132 | add ecx, r11d |
---|
1133 | movzx r11d, al |
---|
1134 | shr eax, 8 |
---|
1135 | xor r10d, [rsi+1024+r11*4] |
---|
1136 | xor r10d, eax |
---|
1137 | imul edx, 54655307h |
---|
1138 | rol edx, 7 |
---|
1139 | mov [rsp + 8*8 + ((16+0)-((16+0)/(10))*(10))*4], r10d |
---|
1140 | mov eax, [rsp + 8*8 + ((17+0)-((17+0)/(10))*(10))*4] |
---|
1141 | mov [rdi + (((17)-((17)/(4))*(4))*20 + (17/4)) * 4 + 80*4], eax |
---|
1142 | rol eax, 8 |
---|
1143 | lea r11d, [r10d + ecx] |
---|
1144 | xor r11d, edx |
---|
1145 | mov [rdi + (((17)-((17)/(4))*(4))*20 + (17/4)) * 4], r11d |
---|
1146 | mov r11d, 1 |
---|
1147 | and r11d, ecx |
---|
1148 | neg r11d |
---|
1149 | and r11d, r10d |
---|
1150 | xor r10d, eax |
---|
1151 | movzx eax, al |
---|
1152 | xor r10d, [rsi+rax*4] |
---|
1153 | mov eax, [rsp + 8*8 + ((17+3)-((17+3)/(10))*(10))*4] |
---|
1154 | xor r11d, [rsp + 8*8 + ((17+2)-((17+2)/(10))*(10))*4] |
---|
1155 | add edx, r11d |
---|
1156 | movzx r11d, al |
---|
1157 | shr eax, 8 |
---|
1158 | xor r10d, [rsi+1024+r11*4] |
---|
1159 | xor r10d, eax |
---|
1160 | imul ecx, 54655307h |
---|
1161 | rol ecx, 7 |
---|
1162 | mov [rsp + 8*8 + ((17+0)-((17+0)/(10))*(10))*4], r10d |
---|
1163 | mov eax, [rsp + 8*8 + ((18+0)-((18+0)/(10))*(10))*4] |
---|
1164 | mov [rdi + (((18)-((18)/(4))*(4))*20 + (18/4)) * 4 + 80*4], eax |
---|
1165 | rol eax, 8 |
---|
1166 | lea r11d, [r10d + edx] |
---|
1167 | xor r11d, ecx |
---|
1168 | mov [rdi + (((18)-((18)/(4))*(4))*20 + (18/4)) * 4], r11d |
---|
1169 | mov r11d, 1 |
---|
1170 | and r11d, edx |
---|
1171 | neg r11d |
---|
1172 | and r11d, r10d |
---|
1173 | xor r10d, eax |
---|
1174 | movzx eax, al |
---|
1175 | xor r10d, [rsi+rax*4] |
---|
1176 | mov eax, [rsp + 8*8 + ((18+3)-((18+3)/(10))*(10))*4] |
---|
1177 | xor r11d, [rsp + 8*8 + ((18+2)-((18+2)/(10))*(10))*4] |
---|
1178 | add ecx, r11d |
---|
1179 | movzx r11d, al |
---|
1180 | shr eax, 8 |
---|
1181 | xor r10d, [rsi+1024+r11*4] |
---|
1182 | xor r10d, eax |
---|
1183 | imul edx, 54655307h |
---|
1184 | rol edx, 7 |
---|
1185 | mov [rsp + 8*8 + ((18+0)-((18+0)/(10))*(10))*4], r10d |
---|
1186 | mov eax, [rsp + 8*8 + ((19+0)-((19+0)/(10))*(10))*4] |
---|
1187 | mov [rdi + (((19)-((19)/(4))*(4))*20 + (19/4)) * 4 + 80*4], eax |
---|
1188 | rol eax, 8 |
---|
1189 | lea r11d, [r10d + ecx] |
---|
1190 | xor r11d, edx |
---|
1191 | mov [rdi + (((19)-((19)/(4))*(4))*20 + (19/4)) * 4], r11d |
---|
1192 | mov r11d, 1 |
---|
1193 | and r11d, ecx |
---|
1194 | neg r11d |
---|
1195 | and r11d, r10d |
---|
1196 | xor r10d, eax |
---|
1197 | movzx eax, al |
---|
1198 | xor r10d, [rsi+rax*4] |
---|
1199 | mov eax, [rsp + 8*8 + ((19+3)-((19+3)/(10))*(10))*4] |
---|
1200 | xor r11d, [rsp + 8*8 + ((19+2)-((19+2)/(10))*(10))*4] |
---|
1201 | add edx, r11d |
---|
1202 | movzx r11d, al |
---|
1203 | shr eax, 8 |
---|
1204 | xor r10d, [rsi+1024+r11*4] |
---|
1205 | xor r10d, eax |
---|
1206 | imul ecx, 54655307h |
---|
1207 | rol ecx, 7 |
---|
1208 | mov [rsp + 8*8 + ((19+0)-((19+0)/(10))*(10))*4], r10d |
---|
1209 | add rdi, 5*4 |
---|
1210 | cmp rdi, QWORD PTR [rsp+4*8] |
---|
1211 | jne label0 |
---|
1212 | mov rax, QWORD PTR [rsp+2*8] |
---|
1213 | mov r11, QWORD PTR [rsp+1*8] |
---|
1214 | lea rdi, [rsp + 8*8 + 12*4] |
---|
1215 | mov rsi, QWORD PTR [rsp+7*8] |
---|
1216 | label1: |
---|
1217 | movdqa xmm0, [rdi+0*20*4] |
---|
1218 | movdqa xmm2, [rdi+2*20*4] |
---|
1219 | movdqa xmm3, [rdi+3*20*4] |
---|
1220 | movdqa xmm1, [rdi+1*20*4] |
---|
1221 | movdqa xmm4, xmm0 |
---|
1222 | pand xmm0, xmm2 |
---|
1223 | pxor xmm0, xmm3 |
---|
1224 | pxor xmm2, xmm1 |
---|
1225 | pxor xmm2, xmm0 |
---|
1226 | por xmm3, xmm4 |
---|
1227 | pxor xmm3, xmm1 |
---|
1228 | pxor xmm4, xmm2 |
---|
1229 | movdqa xmm1, xmm3 |
---|
1230 | por xmm3, xmm4 |
---|
1231 | pxor xmm3, xmm0 |
---|
1232 | pand xmm0, xmm1 |
---|
1233 | pxor xmm4, xmm0 |
---|
1234 | pxor xmm1, xmm3 |
---|
1235 | pxor xmm1, xmm4 |
---|
1236 | pxor xmm4, xmm7 |
---|
1237 | pxor xmm2, [rdi+80*4] |
---|
1238 | pxor xmm3, [rdi+80*5] |
---|
1239 | pxor xmm1, [rdi+80*6] |
---|
1240 | pxor xmm4, [rdi+80*7] |
---|
1241 | cmp rsi, 16 |
---|
1242 | jl label4 |
---|
1243 | movdqa xmm6, xmm2 |
---|
1244 | punpckldq xmm2, xmm3 |
---|
1245 | movdqa xmm5, xmm1 |
---|
1246 | punpckldq xmm1, xmm4 |
---|
1247 | movdqa xmm0, xmm2 |
---|
1248 | punpcklqdq xmm2, xmm1 |
---|
1249 | punpckhqdq xmm0, xmm1 |
---|
1250 | punpckhdq xmm6, xmm3 |
---|
1251 | punpckhdq xmm5, xmm4 |
---|
1252 | movdqa xmm3, xmm6 |
---|
1253 | punpcklqdq xmm6, xmm5 |
---|
1254 | punpckhqdq xmm3, xmm5 |
---|
1255 | test rax, rax |
---|
1256 | jz labelSSE2_Sosemanuk_Output3 |
---|
1257 | test rax, 15 |
---|
1258 | jnz labelSSE2_Sosemanuk_Output7 |
---|
1259 | pxor xmm2, [rax+0*16] |
---|
1260 | pxor xmm0, [rax+1*16] |
---|
1261 | pxor xmm6, [rax+2*16] |
---|
1262 | pxor xmm3, [rax+3*16] |
---|
1263 | add rax, 4*16 |
---|
1264 | jmp labelSSE2_Sosemanuk_Output3 |
---|
1265 | labelSSE2_Sosemanuk_Output7: |
---|
1266 | movdqu xmm1, [rax+0*16] |
---|
1267 | pxor xmm2, xmm1 |
---|
1268 | movdqu xmm1, [rax+1*16] |
---|
1269 | pxor xmm0, xmm1 |
---|
1270 | movdqu xmm1, [rax+2*16] |
---|
1271 | pxor xmm6, xmm1 |
---|
1272 | movdqu xmm1, [rax+3*16] |
---|
1273 | pxor xmm3, xmm1 |
---|
1274 | add rax, 4*16 |
---|
1275 | labelSSE2_Sosemanuk_Output3: |
---|
1276 | test r11, 15 |
---|
1277 | jnz labelSSE2_Sosemanuk_Output8 |
---|
1278 | movdqa [r11+0*16], xmm2 |
---|
1279 | movdqa [r11+1*16], xmm0 |
---|
1280 | movdqa [r11+2*16], xmm6 |
---|
1281 | movdqa [r11+3*16], xmm3 |
---|
1282 | jmp labelSSE2_Sosemanuk_Output9 |
---|
1283 | labelSSE2_Sosemanuk_Output8: |
---|
1284 | movdqu [r11+0*16], xmm2 |
---|
1285 | movdqu [r11+1*16], xmm0 |
---|
1286 | movdqu [r11+2*16], xmm6 |
---|
1287 | movdqu [r11+3*16], xmm3 |
---|
1288 | labelSSE2_Sosemanuk_Output9: |
---|
1289 | add r11, 4*16 |
---|
1290 | add rdi, 4*4 |
---|
1291 | sub rsi, 16 |
---|
1292 | jnz label1 |
---|
1293 | mov rsi, QWORD PTR [rsp+3*8] |
---|
1294 | sub rsi, 80 |
---|
1295 | jz label6 |
---|
1296 | mov QWORD PTR [rsp+3*8], rsi |
---|
1297 | mov QWORD PTR [rsp+2*8], rax |
---|
1298 | mov QWORD PTR [rsp+1*8], r11 |
---|
1299 | jmp label2 |
---|
1300 | label4: |
---|
1301 | test rax, rax |
---|
1302 | jz label5 |
---|
1303 | movd xmm0, dword ptr [rax+0*4] |
---|
1304 | pxor xmm2, xmm0 |
---|
1305 | movd xmm0, dword ptr [rax+1*4] |
---|
1306 | pxor xmm3, xmm0 |
---|
1307 | movd xmm0, dword ptr [rax+2*4] |
---|
1308 | pxor xmm1, xmm0 |
---|
1309 | movd xmm0, dword ptr [rax+3*4] |
---|
1310 | pxor xmm4, xmm0 |
---|
1311 | add rax, 16 |
---|
1312 | label5: |
---|
1313 | movd dword ptr [r11+0*4], xmm2 |
---|
1314 | movd dword ptr [r11+1*4], xmm3 |
---|
1315 | movd dword ptr [r11+2*4], xmm1 |
---|
1316 | movd dword ptr [r11+3*4], xmm4 |
---|
1317 | sub rsi, 4 |
---|
1318 | jz label6 |
---|
1319 | add r11, 16 |
---|
1320 | psrldq xmm2, 4 |
---|
1321 | psrldq xmm3, 4 |
---|
1322 | psrldq xmm1, 4 |
---|
1323 | psrldq xmm4, 4 |
---|
1324 | jmp label4 |
---|
1325 | label6: |
---|
1326 | mov r10, QWORD PTR [rsp+6*8] |
---|
1327 | movdqa xmm0, [rsp + 8*8+0*16] |
---|
1328 | movdqa [r10+0*16], xmm0 |
---|
1329 | movdqa xmm0, [rsp + 8*8+1*16] |
---|
1330 | movdqa [r10+1*16], xmm0 |
---|
1331 | movq xmm0, QWORD PTR [rsp + 8*8+2*16] |
---|
1332 | movq QWORD PTR [r10+2*16], xmm0 |
---|
1333 | mov [r10+10*4], ecx |
---|
1334 | mov [r10+11*4], edx |
---|
1335 | movdqa xmm6, [rsp + 02f0h] |
---|
1336 | movdqa xmm7, [rsp + 0300h] |
---|
1337 | add rsp, 80*4*2+12*4+8*8 + 2*16+8 |
---|
1338 | pop rdi |
---|
1339 | pop rsi |
---|
1340 | ret |
---|
1341 | Sosemanuk_OperateKeystream ENDP |
---|
1342 | |
---|
1343 | Panama_SSE2_Pull PROC FRAME |
---|
1344 | rex_push_reg rdi |
---|
1345 | alloc_stack(2*16) |
---|
1346 | save_xmm128 xmm6, 0h |
---|
1347 | save_xmm128 xmm7, 10h |
---|
1348 | .endprolog |
---|
1349 | shl rcx, 5 |
---|
1350 | jz label5 |
---|
1351 | mov r10d, [rdx+4*17] |
---|
1352 | add rcx, r10 |
---|
1353 | mov rdi, rcx |
---|
1354 | movdqa xmm0, xmmword ptr [rdx+0*16] |
---|
1355 | movdqa xmm1, xmmword ptr [rdx+1*16] |
---|
1356 | movdqa xmm2, xmmword ptr [rdx+2*16] |
---|
1357 | movdqa xmm3, xmmword ptr [rdx+3*16] |
---|
1358 | mov eax, dword ptr [rdx+4*16] |
---|
1359 | label4: |
---|
1360 | movdqa xmm6, xmm2 |
---|
1361 | movss xmm6, xmm3 |
---|
1362 | pshufd xmm5, xmm6, 0*64+3*16+2*4+1 |
---|
1363 | movd xmm6, eax |
---|
1364 | movdqa xmm7, xmm3 |
---|
1365 | movss xmm7, xmm6 |
---|
1366 | pshufd xmm6, xmm7, 0*64+3*16+2*4+1 |
---|
1367 | movd ecx, xmm2 |
---|
1368 | not ecx |
---|
1369 | movd r11d, xmm3 |
---|
1370 | or ecx, r11d |
---|
1371 | xor eax, ecx |
---|
1372 | pcmpeqb xmm7, xmm7 |
---|
1373 | pxor xmm7, xmm1 |
---|
1374 | por xmm7, xmm2 |
---|
1375 | pxor xmm7, xmm3 |
---|
1376 | movd ecx, xmm7 |
---|
1377 | rol ecx, (((((5*1) MOD (17))*(((5*1) MOD (17))+1)/2)) MOD (32)) |
---|
1378 | mov [rdx+((((((5*(1)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1379 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1380 | movd ecx, xmm7 |
---|
1381 | rol ecx, (((((5*5) MOD (17))*(((5*5) MOD (17))+1)/2)) MOD (32)) |
---|
1382 | mov [rdx+((((((5*(5)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1383 | punpckhqdq xmm7, xmm7 |
---|
1384 | movd ecx, xmm7 |
---|
1385 | rol ecx, (((((5*9) MOD (17))*(((5*9) MOD (17))+1)/2)) MOD (32)) |
---|
1386 | mov [rdx+((((((5*(9)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1387 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1388 | movd ecx, xmm7 |
---|
1389 | rol ecx, (((((5*13) MOD (17))*(((5*13) MOD (17))+1)/2)) MOD (32)) |
---|
1390 | mov [rdx+((((((5*(13)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1391 | pcmpeqb xmm7, xmm7 |
---|
1392 | pxor xmm7, xmm0 |
---|
1393 | por xmm7, xmm1 |
---|
1394 | pxor xmm7, xmm2 |
---|
1395 | movd ecx, xmm7 |
---|
1396 | rol ecx, (((((5*2) MOD (17))*(((5*2) MOD (17))+1)/2)) MOD (32)) |
---|
1397 | mov [rdx+((((((5*(2)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1398 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1399 | movd ecx, xmm7 |
---|
1400 | rol ecx, (((((5*6) MOD (17))*(((5*6) MOD (17))+1)/2)) MOD (32)) |
---|
1401 | mov [rdx+((((((5*(6)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1402 | punpckhqdq xmm7, xmm7 |
---|
1403 | movd ecx, xmm7 |
---|
1404 | rol ecx, (((((5*10) MOD (17))*(((5*10) MOD (17))+1)/2)) MOD (32)) |
---|
1405 | mov [rdx+((((((5*(10)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1406 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1407 | movd ecx, xmm7 |
---|
1408 | rol ecx, (((((5*14) MOD (17))*(((5*14) MOD (17))+1)/2)) MOD (32)) |
---|
1409 | mov [rdx+((((((5*(14)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1410 | pcmpeqb xmm7, xmm7 |
---|
1411 | pxor xmm7, xmm6 |
---|
1412 | por xmm7, xmm0 |
---|
1413 | pxor xmm7, xmm1 |
---|
1414 | movd ecx, xmm7 |
---|
1415 | rol ecx, (((((5*3) MOD (17))*(((5*3) MOD (17))+1)/2)) MOD (32)) |
---|
1416 | mov [rdx+((((((5*(3)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1417 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1418 | movd ecx, xmm7 |
---|
1419 | rol ecx, (((((5*7) MOD (17))*(((5*7) MOD (17))+1)/2)) MOD (32)) |
---|
1420 | mov [rdx+((((((5*(7)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1421 | punpckhqdq xmm7, xmm7 |
---|
1422 | movd ecx, xmm7 |
---|
1423 | rol ecx, (((((5*11) MOD (17))*(((5*11) MOD (17))+1)/2)) MOD (32)) |
---|
1424 | mov [rdx+((((((5*(11)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1425 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1426 | movd ecx, xmm7 |
---|
1427 | rol ecx, (((((5*15) MOD (17))*(((5*15) MOD (17))+1)/2)) MOD (32)) |
---|
1428 | mov [rdx+((((((5*(15)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1429 | pcmpeqb xmm7, xmm7 |
---|
1430 | pxor xmm7, xmm5 |
---|
1431 | por xmm7, xmm6 |
---|
1432 | pxor xmm7, xmm0 |
---|
1433 | movd ecx, xmm7 |
---|
1434 | rol ecx, (((((5*4) MOD (17))*(((5*4) MOD (17))+1)/2)) MOD (32)) |
---|
1435 | mov [rdx+((((((5*(4)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1436 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1437 | movd ecx, xmm7 |
---|
1438 | rol ecx, (((((5*8) MOD (17))*(((5*8) MOD (17))+1)/2)) MOD (32)) |
---|
1439 | mov [rdx+((((((5*(8)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1440 | punpckhqdq xmm7, xmm7 |
---|
1441 | movd ecx, xmm7 |
---|
1442 | rol ecx, (((((5*12) MOD (17))*(((5*12) MOD (17))+1)/2)) MOD (32)) |
---|
1443 | mov [rdx+((((((5*(12)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1444 | pshuflw xmm7, xmm7, 1*64+0*16+3*4+2 |
---|
1445 | movd ecx, xmm7 |
---|
1446 | rol ecx, (((((5*16) MOD (17))*(((5*16) MOD (17))+1)/2)) MOD (32)) |
---|
1447 | mov [rdx+((((((5*(16)) MOD (17)))*13+16)) MOD (17))*4], ecx |
---|
1448 | movdqa xmm4, xmm3 |
---|
1449 | punpcklqdq xmm3, xmm2 |
---|
1450 | punpckhdq xmm4, xmm2 |
---|
1451 | movdqa xmm2, xmm1 |
---|
1452 | punpcklqdq xmm1, xmm0 |
---|
1453 | punpckhdq xmm2, xmm0 |
---|
1454 | test r8, r8 |
---|
1455 | jz label0 |
---|
1456 | movdqa xmm6, xmm4 |
---|
1457 | punpcklqdq xmm4, xmm2 |
---|
1458 | punpckhqdq xmm6, xmm2 |
---|
1459 | test r9, 15 |
---|
1460 | jnz label2 |
---|
1461 | test r9, r9 |
---|
1462 | jz label1 |
---|
1463 | pxor xmm4, [r9] |
---|
1464 | pxor xmm6, [r9+16] |
---|
1465 | add r9, 32 |
---|
1466 | jmp label1 |
---|
1467 | label2: |
---|
1468 | movdqu xmm0, [r9] |
---|
1469 | movdqu xmm2, [r9+16] |
---|
1470 | pxor xmm4, xmm0 |
---|
1471 | pxor xmm6, xmm2 |
---|
1472 | add r9, 32 |
---|
1473 | label1: |
---|
1474 | test r8, 15 |
---|
1475 | jnz label3 |
---|
1476 | movdqa xmmword ptr [r8], xmm4 |
---|
1477 | movdqa xmmword ptr [r8+16], xmm6 |
---|
1478 | add r8, 32 |
---|
1479 | jmp label0 |
---|
1480 | label3: |
---|
1481 | movdqu xmmword ptr [r8], xmm4 |
---|
1482 | movdqu xmmword ptr [r8+16], xmm6 |
---|
1483 | add r8, 32 |
---|
1484 | label0: |
---|
1485 | lea rcx, [r10 + 32] |
---|
1486 | and rcx, 31*32 |
---|
1487 | lea r11, [r10 + (32-24)*32] |
---|
1488 | and r11, 31*32 |
---|
1489 | movdqa xmm0, xmmword ptr [rdx+20*4+rcx+0*8] |
---|
1490 | pxor xmm3, xmm0 |
---|
1491 | pshufd xmm0, xmm0, 2*64+3*16+0*4+1 |
---|
1492 | movdqa xmmword ptr [rdx+20*4+rcx+0*8], xmm3 |
---|
1493 | pxor xmm0, xmmword ptr [rdx+20*4+r11+2*8] |
---|
1494 | movdqa xmmword ptr [rdx+20*4+r11+2*8], xmm0 |
---|
1495 | movdqa xmm4, xmmword ptr [rdx+20*4+rcx+2*8] |
---|
1496 | pxor xmm1, xmm4 |
---|
1497 | movdqa xmmword ptr [rdx+20*4+rcx+2*8], xmm1 |
---|
1498 | pxor xmm4, xmmword ptr [rdx+20*4+r11+0*8] |
---|
1499 | movdqa xmmword ptr [rdx+20*4+r11+0*8], xmm4 |
---|
1500 | movdqa xmm3, xmmword ptr [rdx+3*16] |
---|
1501 | movdqa xmm2, xmmword ptr [rdx+2*16] |
---|
1502 | movdqa xmm1, xmmword ptr [rdx+1*16] |
---|
1503 | movdqa xmm0, xmmword ptr [rdx+0*16] |
---|
1504 | movd xmm6, eax |
---|
1505 | movdqa xmm7, xmm3 |
---|
1506 | movss xmm7, xmm6 |
---|
1507 | movdqa xmm6, xmm2 |
---|
1508 | movss xmm6, xmm3 |
---|
1509 | movdqa xmm5, xmm1 |
---|
1510 | movss xmm5, xmm2 |
---|
1511 | movdqa xmm4, xmm0 |
---|
1512 | movss xmm4, xmm1 |
---|
1513 | pshufd xmm7, xmm7, 0*64+3*16+2*4+1 |
---|
1514 | pshufd xmm6, xmm6, 0*64+3*16+2*4+1 |
---|
1515 | pshufd xmm5, xmm5, 0*64+3*16+2*4+1 |
---|
1516 | pshufd xmm4, xmm4, 0*64+3*16+2*4+1 |
---|
1517 | xor eax, 1 |
---|
1518 | movd ecx, xmm0 |
---|
1519 | xor eax, ecx |
---|
1520 | movd ecx, xmm3 |
---|
1521 | xor eax, ecx |
---|
1522 | pxor xmm3, xmm2 |
---|
1523 | pxor xmm2, xmm1 |
---|
1524 | pxor xmm1, xmm0 |
---|
1525 | pxor xmm0, xmm7 |
---|
1526 | pxor xmm3, xmm7 |
---|
1527 | pxor xmm2, xmm6 |
---|
1528 | pxor xmm1, xmm5 |
---|
1529 | pxor xmm0, xmm4 |
---|
1530 | lea rcx, [r10 + (32-4)*32] |
---|
1531 | and rcx, 31*32 |
---|
1532 | lea r11, [r10 + 16*32] |
---|
1533 | and r11, 31*32 |
---|
1534 | movdqa xmm4, xmmword ptr [rdx+20*4+rcx+0*16] |
---|
1535 | movdqa xmm5, xmmword ptr [rdx+20*4+r11+0*16] |
---|
1536 | movdqa xmm6, xmm4 |
---|
1537 | punpcklqdq xmm4, xmm5 |
---|
1538 | punpckhqdq xmm6, xmm5 |
---|
1539 | pxor xmm3, xmm4 |
---|
1540 | pxor xmm2, xmm6 |
---|
1541 | movdqa xmm4, xmmword ptr [rdx+20*4+rcx+1*16] |
---|
1542 | movdqa xmm5, xmmword ptr [rdx+20*4+r11+1*16] |
---|
1543 | movdqa xmm6, xmm4 |
---|
1544 | punpcklqdq xmm4, xmm5 |
---|
1545 | punpckhqdq xmm6, xmm5 |
---|
1546 | pxor xmm1, xmm4 |
---|
1547 | pxor xmm0, xmm6 |
---|
1548 | add r10, 32 |
---|
1549 | cmp r10, rdi |
---|
1550 | jne label4 |
---|
1551 | mov [rdx+4*16], eax |
---|
1552 | movdqa xmmword ptr [rdx+3*16], xmm3 |
---|
1553 | movdqa xmmword ptr [rdx+2*16], xmm2 |
---|
1554 | movdqa xmmword ptr [rdx+1*16], xmm1 |
---|
1555 | movdqa xmmword ptr [rdx+0*16], xmm0 |
---|
1556 | label5: |
---|
1557 | movdqa xmm6, [rsp + 0h] |
---|
1558 | movdqa xmm7, [rsp + 10h] |
---|
1559 | add rsp, 2*16 |
---|
1560 | pop rdi |
---|
1561 | ret |
---|
1562 | Panama_SSE2_Pull ENDP |
---|
1563 | |
---|
1564 | _TEXT ENDS |
---|
1565 | END |
---|