1 | // cpu.cpp - written and placed in the public domain by Wei Dai |
---|
2 | |
---|
3 | #include "pch.h" |
---|
4 | #include "config.h" |
---|
5 | |
---|
6 | #ifndef EXCEPTION_EXECUTE_HANDLER |
---|
7 | # define EXCEPTION_EXECUTE_HANDLER 1 |
---|
8 | #endif |
---|
9 | |
---|
10 | #ifndef CRYPTOPP_IMPORTS |
---|
11 | |
---|
12 | #include "cpu.h" |
---|
13 | #include "misc.h" |
---|
14 | #include <algorithm> |
---|
15 | |
---|
16 | #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY |
---|
17 | #include <signal.h> |
---|
18 | #include <setjmp.h> |
---|
19 | #endif |
---|
20 | |
---|
21 | #if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE |
---|
22 | #include <emmintrin.h> |
---|
23 | #endif |
---|
24 | |
---|
25 | NAMESPACE_BEGIN(CryptoPP) |
---|
26 | |
---|
27 | #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY |
---|
28 | extern "C" { |
---|
29 | typedef void (*SigHandler)(int); |
---|
30 | }; |
---|
31 | #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY |
---|
32 | |
---|
33 | #ifdef CRYPTOPP_CPUID_AVAILABLE |
---|
34 | |
---|
35 | #if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64 |
---|
36 | |
---|
37 | bool CpuId(word32 input, word32 output[4]) |
---|
38 | { |
---|
39 | __cpuid((int *)output, input); |
---|
40 | return true; |
---|
41 | } |
---|
42 | |
---|
43 | #else |
---|
44 | |
---|
45 | #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY |
---|
46 | extern "C" |
---|
47 | { |
---|
48 | static jmp_buf s_jmpNoCPUID; |
---|
49 | static void SigIllHandlerCPUID(int) |
---|
50 | { |
---|
51 | longjmp(s_jmpNoCPUID, 1); |
---|
52 | } |
---|
53 | |
---|
54 | static jmp_buf s_jmpNoSSE2; |
---|
55 | static void SigIllHandlerSSE2(int) |
---|
56 | { |
---|
57 | longjmp(s_jmpNoSSE2, 1); |
---|
58 | } |
---|
59 | } |
---|
60 | #endif |
---|
61 | |
---|
62 | bool CpuId(word32 input, word32 output[4]) |
---|
63 | { |
---|
64 | #if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
65 | __try |
---|
66 | { |
---|
67 | __asm |
---|
68 | { |
---|
69 | mov eax, input |
---|
70 | mov ecx, 0 |
---|
71 | cpuid |
---|
72 | mov edi, output |
---|
73 | mov [edi], eax |
---|
74 | mov [edi+4], ebx |
---|
75 | mov [edi+8], ecx |
---|
76 | mov [edi+12], edx |
---|
77 | } |
---|
78 | } |
---|
79 | // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION |
---|
80 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
81 | { |
---|
82 | return false; |
---|
83 | } |
---|
84 | |
---|
85 | // function 0 returns the highest basic function understood in EAX |
---|
86 | if(input == 0) |
---|
87 | return !!output[0]; |
---|
88 | |
---|
89 | return true; |
---|
90 | #else |
---|
91 | // longjmp and clobber warnings. Volatile is required. |
---|
92 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
93 | volatile bool result = true; |
---|
94 | |
---|
95 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID); |
---|
96 | if (oldHandler == SIG_ERR) |
---|
97 | return false; |
---|
98 | |
---|
99 | # ifndef __MINGW32__ |
---|
100 | volatile sigset_t oldMask; |
---|
101 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
102 | return false; |
---|
103 | # endif |
---|
104 | |
---|
105 | if (setjmp(s_jmpNoCPUID)) |
---|
106 | result = false; |
---|
107 | else |
---|
108 | { |
---|
109 | asm volatile |
---|
110 | ( |
---|
111 | // save ebx in case -fPIC is being used |
---|
112 | // TODO: this might need an early clobber on EDI. |
---|
113 | # if CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 |
---|
114 | "pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx" |
---|
115 | # else |
---|
116 | "push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx" |
---|
117 | # endif |
---|
118 | : "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3]) |
---|
119 | : "a" (input), "c" (0) |
---|
120 | ); |
---|
121 | } |
---|
122 | |
---|
123 | # ifndef __MINGW32__ |
---|
124 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
125 | # endif |
---|
126 | |
---|
127 | signal(SIGILL, oldHandler); |
---|
128 | return result; |
---|
129 | #endif |
---|
130 | } |
---|
131 | |
---|
132 | #endif |
---|
133 | |
---|
134 | static bool TrySSE2() |
---|
135 | { |
---|
136 | #if CRYPTOPP_BOOL_X64 |
---|
137 | return true; |
---|
138 | #elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
139 | __try |
---|
140 | { |
---|
141 | #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
---|
142 | AS2(por xmm0, xmm0) // executing SSE2 instruction |
---|
143 | #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE |
---|
144 | __m128i x = _mm_setzero_si128(); |
---|
145 | return _mm_cvtsi128_si32(x) == 0; |
---|
146 | #endif |
---|
147 | } |
---|
148 | // GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION |
---|
149 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
150 | { |
---|
151 | return false; |
---|
152 | } |
---|
153 | return true; |
---|
154 | #else |
---|
155 | // longjmp and clobber warnings. Volatile is required. |
---|
156 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
157 | volatile bool result = true; |
---|
158 | |
---|
159 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2); |
---|
160 | if (oldHandler == SIG_ERR) |
---|
161 | return false; |
---|
162 | |
---|
163 | # ifndef __MINGW32__ |
---|
164 | volatile sigset_t oldMask; |
---|
165 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
166 | return false; |
---|
167 | # endif |
---|
168 | |
---|
169 | if (setjmp(s_jmpNoSSE2)) |
---|
170 | result = false; |
---|
171 | else |
---|
172 | { |
---|
173 | #if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE |
---|
174 | __asm __volatile ("por %xmm0, %xmm0"); |
---|
175 | #elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE |
---|
176 | __m128i x = _mm_setzero_si128(); |
---|
177 | result = _mm_cvtsi128_si32(x) == 0; |
---|
178 | #endif |
---|
179 | } |
---|
180 | |
---|
181 | # ifndef __MINGW32__ |
---|
182 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
183 | # endif |
---|
184 | |
---|
185 | signal(SIGILL, oldHandler); |
---|
186 | return result; |
---|
187 | #endif |
---|
188 | } |
---|
189 | |
---|
190 | bool CRYPTOPP_SECTION_INIT g_x86DetectionDone = false; |
---|
191 | bool CRYPTOPP_SECTION_INIT g_hasMMX = false, CRYPTOPP_SECTION_INIT g_hasISSE = false, CRYPTOPP_SECTION_INIT g_hasSSE2 = false, CRYPTOPP_SECTION_INIT g_hasSSSE3 = false; |
---|
192 | bool CRYPTOPP_SECTION_INIT g_hasSSE4 = false, CRYPTOPP_SECTION_INIT g_hasAESNI = false, CRYPTOPP_SECTION_INIT g_hasCLMUL = false, CRYPTOPP_SECTION_INIT g_isP4 = false; |
---|
193 | bool CRYPTOPP_SECTION_INIT g_hasRDRAND = false, CRYPTOPP_SECTION_INIT g_hasRDSEED = false; |
---|
194 | bool CRYPTOPP_SECTION_INIT g_hasPadlockRNG = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE = false, CRYPTOPP_SECTION_INIT g_hasPadlockACE2 = false; |
---|
195 | bool CRYPTOPP_SECTION_INIT g_hasPadlockPHE = false, CRYPTOPP_SECTION_INIT g_hasPadlockPMM = false; |
---|
196 | word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; |
---|
197 | |
---|
198 | static inline bool IsIntel(const word32 output[4]) |
---|
199 | { |
---|
200 | // This is the "GenuineIntel" string |
---|
201 | return (output[1] /*EBX*/ == 0x756e6547) && |
---|
202 | (output[2] /*ECX*/ == 0x6c65746e) && |
---|
203 | (output[3] /*EDX*/ == 0x49656e69); |
---|
204 | } |
---|
205 | |
---|
206 | static inline bool IsAMD(const word32 output[4]) |
---|
207 | { |
---|
208 | // This is the "AuthenticAMD" string. Some early K5's can return "AMDisbetter!" |
---|
209 | return (output[1] /*EBX*/ == 0x68747541) && |
---|
210 | (output[2] /*ECX*/ == 0x444D4163) && |
---|
211 | (output[3] /*EDX*/ == 0x69746E65); |
---|
212 | } |
---|
213 | |
---|
214 | static inline bool IsVIA(const word32 output[4]) |
---|
215 | { |
---|
216 | // This is the "CentaurHauls" string. Some non-PadLock's can return "VIA VIA VIA " |
---|
217 | return (output[1] /*EBX*/ == 0x746e6543) && |
---|
218 | (output[2] /*ECX*/ == 0x736c7561) && |
---|
219 | (output[3] /*EDX*/ == 0x48727561); |
---|
220 | } |
---|
221 | |
---|
222 | #if HAVE_GCC_CONSTRUCTOR1 |
---|
223 | void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectX86Features() |
---|
224 | #elif HAVE_GCC_CONSTRUCTOR0 |
---|
225 | void __attribute__ ((constructor)) DetectX86Features() |
---|
226 | #else |
---|
227 | void DetectX86Features() |
---|
228 | #endif |
---|
229 | { |
---|
230 | word32 cpuid[4], cpuid1[4]; |
---|
231 | if (!CpuId(0, cpuid)) |
---|
232 | return; |
---|
233 | if (!CpuId(1, cpuid1)) |
---|
234 | return; |
---|
235 | |
---|
236 | g_hasMMX = (cpuid1[3] & (1 << 23)) != 0; |
---|
237 | if ((cpuid1[3] & (1 << 26)) != 0) |
---|
238 | g_hasSSE2 = TrySSE2(); |
---|
239 | g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9)); |
---|
240 | g_hasSSE4 = g_hasSSE2 && ((cpuid1[2] & (1<<19)) && (cpuid1[2] & (1<<20))); |
---|
241 | g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25)); |
---|
242 | g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1)); |
---|
243 | |
---|
244 | if ((cpuid1[3] & (1 << 25)) != 0) |
---|
245 | g_hasISSE = true; |
---|
246 | else |
---|
247 | { |
---|
248 | word32 cpuid2[4]; |
---|
249 | CpuId(0x080000000, cpuid2); |
---|
250 | if (cpuid2[0] >= 0x080000001) |
---|
251 | { |
---|
252 | CpuId(0x080000001, cpuid2); |
---|
253 | g_hasISSE = (cpuid2[3] & (1 << 22)) != 0; |
---|
254 | } |
---|
255 | } |
---|
256 | |
---|
257 | if (IsIntel(cpuid)) |
---|
258 | { |
---|
259 | static const unsigned int RDRAND_FLAG = (1 << 30); |
---|
260 | static const unsigned int RDSEED_FLAG = (1 << 18); |
---|
261 | |
---|
262 | g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf; |
---|
263 | g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1); |
---|
264 | g_hasRDRAND = !!(cpuid1[2] /*ECX*/ & RDRAND_FLAG); |
---|
265 | |
---|
266 | if (cpuid[0] /*EAX*/ >= 7) |
---|
267 | { |
---|
268 | word32 cpuid3[4]; |
---|
269 | if (CpuId(7, cpuid3)) |
---|
270 | g_hasRDSEED = !!(cpuid3[1] /*EBX*/ & RDSEED_FLAG); |
---|
271 | } |
---|
272 | } |
---|
273 | else if (IsAMD(cpuid)) |
---|
274 | { |
---|
275 | static const unsigned int RDRAND_FLAG = (1 << 30); |
---|
276 | |
---|
277 | CpuId(0x01, cpuid); |
---|
278 | g_hasRDRAND = !!(cpuid[2] /*ECX*/ & RDRAND_FLAG); |
---|
279 | |
---|
280 | CpuId(0x80000005, cpuid); |
---|
281 | g_cacheLineSize = GETBYTE(cpuid[2], 0); |
---|
282 | } |
---|
283 | else if (IsVIA(cpuid)) |
---|
284 | { |
---|
285 | static const unsigned int RNG_FLAGS = (0x3 << 2); |
---|
286 | static const unsigned int ACE_FLAGS = (0x3 << 6); |
---|
287 | static const unsigned int ACE2_FLAGS = (0x3 << 8); |
---|
288 | static const unsigned int PHE_FLAGS = (0x3 << 10); |
---|
289 | static const unsigned int PMM_FLAGS = (0x3 << 12); |
---|
290 | |
---|
291 | CpuId(0xC0000000, cpuid); |
---|
292 | if (cpuid[0] >= 0xC0000001) |
---|
293 | { |
---|
294 | // Extended features available |
---|
295 | CpuId(0xC0000001, cpuid); |
---|
296 | g_hasPadlockRNG = !!(cpuid[3] /*EDX*/ & RNG_FLAGS); |
---|
297 | g_hasPadlockACE = !!(cpuid[3] /*EDX*/ & ACE_FLAGS); |
---|
298 | g_hasPadlockACE2 = !!(cpuid[3] /*EDX*/ & ACE2_FLAGS); |
---|
299 | g_hasPadlockPHE = !!(cpuid[3] /*EDX*/ & PHE_FLAGS); |
---|
300 | g_hasPadlockPMM = !!(cpuid[3] /*EDX*/ & PMM_FLAGS); |
---|
301 | } |
---|
302 | } |
---|
303 | |
---|
304 | if (!g_cacheLineSize) |
---|
305 | g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; |
---|
306 | |
---|
307 | *((volatile bool*)&g_x86DetectionDone) = true; |
---|
308 | } |
---|
309 | |
---|
310 | #elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64) |
---|
311 | |
---|
312 | // The ARM equivalent of CPUID probing is reading a MSR. The code requires Exception Level 1 (EL1) and above, but user space runs at EL0. |
---|
313 | // Attempting to run the code results in a SIGILL and termination. |
---|
314 | // |
---|
315 | // #if defined(__arm64__) || defined(__aarch64__) |
---|
316 | // word64 caps = 0; // Read ID_AA64ISAR0_EL1 |
---|
317 | // __asm __volatile("mrs %0, " "id_aa64isar0_el1" : "=r" (caps)); |
---|
318 | // #elif defined(__arm__) || defined(__aarch32__) |
---|
319 | // word32 caps = 0; // Read ID_ISAR5_EL1 |
---|
320 | // __asm __volatile("mrs %0, " "id_isar5_el1" : "=r" (caps)); |
---|
321 | // #endif |
---|
322 | // |
---|
323 | // The following does not work well either. Its appears to be missing constants, and it does not detect Aarch32 execution environments on Aarch64 |
---|
324 | // http://community.arm.com/groups/android-community/blog/2014/10/10/runtime-detection-of-cpu-features-on-an-armv8-a-cpu |
---|
325 | // |
---|
326 | bool CRYPTOPP_SECTION_INIT g_ArmDetectionDone = false; |
---|
327 | bool CRYPTOPP_SECTION_INIT g_hasNEON = false, CRYPTOPP_SECTION_INIT g_hasPMULL = false, CRYPTOPP_SECTION_INIT g_hasCRC32 = false; |
---|
328 | bool CRYPTOPP_SECTION_INIT g_hasAES = false, CRYPTOPP_SECTION_INIT g_hasSHA1 = false, CRYPTOPP_SECTION_INIT g_hasSHA2 = false; |
---|
329 | word32 CRYPTOPP_SECTION_INIT g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE; |
---|
330 | |
---|
331 | #ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY |
---|
332 | extern "C" |
---|
333 | { |
---|
334 | static jmp_buf s_jmpNoNEON; |
---|
335 | static void SigIllHandlerNEON(int) |
---|
336 | { |
---|
337 | longjmp(s_jmpNoNEON, 1); |
---|
338 | } |
---|
339 | |
---|
340 | static jmp_buf s_jmpNoPMULL; |
---|
341 | static void SigIllHandlerPMULL(int) |
---|
342 | { |
---|
343 | longjmp(s_jmpNoPMULL, 1); |
---|
344 | } |
---|
345 | |
---|
346 | static jmp_buf s_jmpNoCRC32; |
---|
347 | static void SigIllHandlerCRC32(int) |
---|
348 | { |
---|
349 | longjmp(s_jmpNoCRC32, 1); |
---|
350 | } |
---|
351 | |
---|
352 | static jmp_buf s_jmpNoAES; |
---|
353 | static void SigIllHandlerAES(int) |
---|
354 | { |
---|
355 | longjmp(s_jmpNoAES, 1); |
---|
356 | } |
---|
357 | |
---|
358 | static jmp_buf s_jmpNoSHA1; |
---|
359 | static void SigIllHandlerSHA1(int) |
---|
360 | { |
---|
361 | longjmp(s_jmpNoSHA1, 1); |
---|
362 | } |
---|
363 | |
---|
364 | static jmp_buf s_jmpNoSHA2; |
---|
365 | static void SigIllHandlerSHA2(int) |
---|
366 | { |
---|
367 | longjmp(s_jmpNoSHA2, 1); |
---|
368 | } |
---|
369 | }; |
---|
370 | #endif // Not CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY |
---|
371 | |
---|
372 | static bool TryNEON() |
---|
373 | { |
---|
374 | #if (CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE) |
---|
375 | # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
376 | volatile bool result = true; |
---|
377 | __try |
---|
378 | { |
---|
379 | uint32_t v1[4] = {1,1,1,1}; |
---|
380 | uint32x4_t x1 = vld1q_u32(v1); |
---|
381 | uint64_t v2[2] = {1,1}; |
---|
382 | uint64x2_t x2 = vld1q_u64(v2); |
---|
383 | |
---|
384 | uint32x4_t x3 = vdupq_n_u32(2); |
---|
385 | x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); |
---|
386 | x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); |
---|
387 | uint64x2_t x4 = vdupq_n_u64(2); |
---|
388 | x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); |
---|
389 | x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); |
---|
390 | |
---|
391 | result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); |
---|
392 | } |
---|
393 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
394 | { |
---|
395 | return false; |
---|
396 | } |
---|
397 | return result; |
---|
398 | # else |
---|
399 | // longjmp and clobber warnings. Volatile is required. |
---|
400 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
401 | volatile bool result = true; |
---|
402 | |
---|
403 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerNEON); |
---|
404 | if (oldHandler == SIG_ERR) |
---|
405 | return false; |
---|
406 | |
---|
407 | volatile sigset_t oldMask; |
---|
408 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
409 | return false; |
---|
410 | |
---|
411 | if (setjmp(s_jmpNoNEON)) |
---|
412 | result = false; |
---|
413 | else |
---|
414 | { |
---|
415 | uint32_t v1[4] = {1,1,1,1}; |
---|
416 | uint32x4_t x1 = vld1q_u32(v1); |
---|
417 | uint64_t v2[2] = {1,1}; |
---|
418 | uint64x2_t x2 = vld1q_u64(v2); |
---|
419 | |
---|
420 | uint32x4_t x3 = {0,0,0,0}; |
---|
421 | x3 = vsetq_lane_u32(vgetq_lane_u32(x1,0),x3,0); |
---|
422 | x3 = vsetq_lane_u32(vgetq_lane_u32(x1,3),x3,3); |
---|
423 | uint64x2_t x4 = {0,0}; |
---|
424 | x4 = vsetq_lane_u64(vgetq_lane_u64(x2,0),x4,0); |
---|
425 | x4 = vsetq_lane_u64(vgetq_lane_u64(x2,1),x4,1); |
---|
426 | |
---|
427 | // Hack... GCC optimizes away the code and returns true |
---|
428 | result = !!(vgetq_lane_u32(x3,0) | vgetq_lane_u64(x4,1)); |
---|
429 | } |
---|
430 | |
---|
431 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
432 | signal(SIGILL, oldHandler); |
---|
433 | return result; |
---|
434 | # endif |
---|
435 | #else |
---|
436 | return false; |
---|
437 | #endif // CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE |
---|
438 | } |
---|
439 | |
---|
440 | static bool TryPMULL() |
---|
441 | { |
---|
442 | #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) |
---|
443 | # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
444 | volatile bool result = true; |
---|
445 | __try |
---|
446 | { |
---|
447 | const poly64_t a1={2}, b1={3}; |
---|
448 | const poly64x2_t a2={4,5}, b2={6,7}; |
---|
449 | const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; |
---|
450 | |
---|
451 | const poly128_t r1 = vmull_p64(a1, b1); |
---|
452 | const poly128_t r2 = vmull_high_p64(a2, b2); |
---|
453 | const poly128_t r3 = vmull_high_p64(a3, b3); |
---|
454 | |
---|
455 | // Also see https://github.com/weidai11/cryptopp/issues/233. |
---|
456 | const uint64x2_t& t1 = vreinterpretq_u64_p128(r1); // {6,0} |
---|
457 | const uint64x2_t& t2 = vreinterpretq_u64_p128(r2); // {24,0} |
---|
458 | const uint64x2_t& t3 = vreinterpretq_u64_p128(r3); // {bignum,bignum} |
---|
459 | |
---|
460 | result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && |
---|
461 | vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); |
---|
462 | } |
---|
463 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
464 | { |
---|
465 | return false; |
---|
466 | } |
---|
467 | return result; |
---|
468 | # else |
---|
469 | // longjmp and clobber warnings. Volatile is required. |
---|
470 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
471 | volatile bool result = true; |
---|
472 | |
---|
473 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerPMULL); |
---|
474 | if (oldHandler == SIG_ERR) |
---|
475 | return false; |
---|
476 | |
---|
477 | volatile sigset_t oldMask; |
---|
478 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
479 | return false; |
---|
480 | |
---|
481 | if (setjmp(s_jmpNoPMULL)) |
---|
482 | result = false; |
---|
483 | else |
---|
484 | { |
---|
485 | const poly64_t a1={2}, b1={3}; |
---|
486 | const poly64x2_t a2={4,5}, b2={6,7}; |
---|
487 | const poly64x2_t a3={0x8080808080808080,0xa0a0a0a0a0a0a0a0}, b3={0xc0c0c0c0c0c0c0c0, 0xe0e0e0e0e0e0e0e0}; |
---|
488 | |
---|
489 | const poly128_t r1 = vmull_p64(a1, b1); |
---|
490 | const poly128_t r2 = vmull_high_p64(a2, b2); |
---|
491 | const poly128_t r3 = vmull_high_p64(a3, b3); |
---|
492 | |
---|
493 | // Linaro is missing vreinterpretq_u64_p128. Also see https://github.com/weidai11/cryptopp/issues/233. |
---|
494 | const uint64x2_t& t1 = (uint64x2_t)(r1); // {6,0} |
---|
495 | const uint64x2_t& t2 = (uint64x2_t)(r2); // {24,0} |
---|
496 | const uint64x2_t& t3 = (uint64x2_t)(r3); // {bignum,bignum} |
---|
497 | |
---|
498 | result = !!(vgetq_lane_u64(t1,0) == 0x06 && vgetq_lane_u64(t1,1) == 0x00 && vgetq_lane_u64(t2,0) == 0x1b && |
---|
499 | vgetq_lane_u64(t2,1) == 0x00 && vgetq_lane_u64(t3,0) == 0x6c006c006c006c00 && vgetq_lane_u64(t3,1) == 0x6c006c006c006c00); |
---|
500 | } |
---|
501 | |
---|
502 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
503 | signal(SIGILL, oldHandler); |
---|
504 | return result; |
---|
505 | # endif |
---|
506 | #else |
---|
507 | return false; |
---|
508 | #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE |
---|
509 | } |
---|
510 | |
---|
511 | static bool TryCRC32() |
---|
512 | { |
---|
513 | #if (CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) |
---|
514 | # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
515 | volatile bool result = true; |
---|
516 | __try |
---|
517 | { |
---|
518 | word32 w=0, x=1; word16 y=2; byte z=3; |
---|
519 | w = __crc32cw(w,x); |
---|
520 | w = __crc32ch(w,y); |
---|
521 | w = __crc32cb(w,z); |
---|
522 | |
---|
523 | result = !!w; |
---|
524 | } |
---|
525 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
526 | { |
---|
527 | return false; |
---|
528 | } |
---|
529 | return result; |
---|
530 | # else |
---|
531 | // longjmp and clobber warnings. Volatile is required. |
---|
532 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
533 | volatile bool result = true; |
---|
534 | |
---|
535 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerCRC32); |
---|
536 | if (oldHandler == SIG_ERR) |
---|
537 | return false; |
---|
538 | |
---|
539 | volatile sigset_t oldMask; |
---|
540 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
541 | return false; |
---|
542 | |
---|
543 | if (setjmp(s_jmpNoCRC32)) |
---|
544 | result = false; |
---|
545 | else |
---|
546 | { |
---|
547 | word32 w=0, x=1; word16 y=2; byte z=3; |
---|
548 | w = __crc32cw(w,x); |
---|
549 | w = __crc32ch(w,y); |
---|
550 | w = __crc32cb(w,z); |
---|
551 | |
---|
552 | // Hack... GCC optimizes away the code and returns true |
---|
553 | result = !!w; |
---|
554 | } |
---|
555 | |
---|
556 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
557 | signal(SIGILL, oldHandler); |
---|
558 | return result; |
---|
559 | # endif |
---|
560 | #else |
---|
561 | return false; |
---|
562 | #endif // CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE |
---|
563 | } |
---|
564 | |
---|
565 | static bool TryAES() |
---|
566 | { |
---|
567 | #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) |
---|
568 | # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
569 | volatile bool result = true; |
---|
570 | __try |
---|
571 | { |
---|
572 | // AES encrypt and decrypt |
---|
573 | uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); |
---|
574 | uint8x16_t r1 = vaeseq_u8(data, key); |
---|
575 | uint8x16_t r2 = vaesdq_u8(data, key); |
---|
576 | |
---|
577 | result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); |
---|
578 | } |
---|
579 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
580 | { |
---|
581 | return false; |
---|
582 | } |
---|
583 | return result; |
---|
584 | # else |
---|
585 | // longjmp and clobber warnings. Volatile is required. |
---|
586 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
587 | volatile bool result = true; |
---|
588 | |
---|
589 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerAES); |
---|
590 | if (oldHandler == SIG_ERR) |
---|
591 | return false; |
---|
592 | |
---|
593 | volatile sigset_t oldMask; |
---|
594 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
595 | return false; |
---|
596 | |
---|
597 | if (setjmp(s_jmpNoAES)) |
---|
598 | result = false; |
---|
599 | else |
---|
600 | { |
---|
601 | uint8x16_t data = vdupq_n_u8(0), key = vdupq_n_u8(0); |
---|
602 | uint8x16_t r1 = vaeseq_u8(data, key); |
---|
603 | uint8x16_t r2 = vaesdq_u8(data, key); |
---|
604 | |
---|
605 | // Hack... GCC optimizes away the code and returns true |
---|
606 | result = !!(vgetq_lane_u8(r1,0) | vgetq_lane_u8(r2,7)); |
---|
607 | } |
---|
608 | |
---|
609 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
610 | signal(SIGILL, oldHandler); |
---|
611 | return result; |
---|
612 | # endif |
---|
613 | #else |
---|
614 | return false; |
---|
615 | #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE |
---|
616 | } |
---|
617 | |
---|
618 | static bool TrySHA1() |
---|
619 | { |
---|
620 | #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) |
---|
621 | # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
622 | volatile bool result = true; |
---|
623 | __try |
---|
624 | { |
---|
625 | uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; |
---|
626 | |
---|
627 | uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); |
---|
628 | uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); |
---|
629 | uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); |
---|
630 | uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); |
---|
631 | uint32x4_t r5 = vsha1su1q_u32 (data1, data2); |
---|
632 | |
---|
633 | result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); |
---|
634 | } |
---|
635 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
636 | { |
---|
637 | return false; |
---|
638 | } |
---|
639 | return result; |
---|
640 | # else |
---|
641 | // longjmp and clobber warnings. Volatile is required. |
---|
642 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
643 | volatile bool result = true; |
---|
644 | |
---|
645 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA1); |
---|
646 | if (oldHandler == SIG_ERR) |
---|
647 | return false; |
---|
648 | |
---|
649 | volatile sigset_t oldMask; |
---|
650 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
651 | return false; |
---|
652 | |
---|
653 | if (setjmp(s_jmpNoSHA1)) |
---|
654 | result = false; |
---|
655 | else |
---|
656 | { |
---|
657 | uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; |
---|
658 | |
---|
659 | uint32x4_t r1 = vsha1cq_u32 (data1, 0, data2); |
---|
660 | uint32x4_t r2 = vsha1mq_u32 (data1, 0, data2); |
---|
661 | uint32x4_t r3 = vsha1pq_u32 (data1, 0, data2); |
---|
662 | uint32x4_t r4 = vsha1su0q_u32 (data1, data2, data3); |
---|
663 | uint32x4_t r5 = vsha1su1q_u32 (data1, data2); |
---|
664 | |
---|
665 | // Hack... GCC optimizes away the code and returns true |
---|
666 | result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3) | vgetq_lane_u32(r5,0)); |
---|
667 | } |
---|
668 | |
---|
669 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
670 | signal(SIGILL, oldHandler); |
---|
671 | return result; |
---|
672 | # endif |
---|
673 | #else |
---|
674 | return false; |
---|
675 | #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE |
---|
676 | } |
---|
677 | |
---|
678 | static bool TrySHA2() |
---|
679 | { |
---|
680 | #if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE) |
---|
681 | # if defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY) |
---|
682 | volatile bool result = true; |
---|
683 | __try |
---|
684 | { |
---|
685 | uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; |
---|
686 | |
---|
687 | uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); |
---|
688 | uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); |
---|
689 | uint32x4_t r3 = vsha256su0q_u32 (data1, data2); |
---|
690 | uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); |
---|
691 | |
---|
692 | result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); |
---|
693 | } |
---|
694 | __except (EXCEPTION_EXECUTE_HANDLER) |
---|
695 | { |
---|
696 | return false; |
---|
697 | } |
---|
698 | return result; |
---|
699 | # else |
---|
700 | // longjmp and clobber warnings. Volatile is required. |
---|
701 | // http://github.com/weidai11/cryptopp/issues/24 and http://stackoverflow.com/q/7721854 |
---|
702 | volatile bool result = true; |
---|
703 | |
---|
704 | volatile SigHandler oldHandler = signal(SIGILL, SigIllHandlerSHA2); |
---|
705 | if (oldHandler == SIG_ERR) |
---|
706 | return false; |
---|
707 | |
---|
708 | volatile sigset_t oldMask; |
---|
709 | if (sigprocmask(0, NULL, (sigset_t*)&oldMask)) |
---|
710 | return false; |
---|
711 | |
---|
712 | if (setjmp(s_jmpNoSHA2)) |
---|
713 | result = false; |
---|
714 | else |
---|
715 | { |
---|
716 | uint32x4_t data1 = {1,2,3,4}, data2 = {5,6,7,8}, data3 = {9,10,11,12}; |
---|
717 | |
---|
718 | uint32x4_t r1 = vsha256hq_u32 (data1, data2, data3); |
---|
719 | uint32x4_t r2 = vsha256h2q_u32 (data1, data2, data3); |
---|
720 | uint32x4_t r3 = vsha256su0q_u32 (data1, data2); |
---|
721 | uint32x4_t r4 = vsha256su1q_u32 (data1, data2, data3); |
---|
722 | |
---|
723 | // Hack... GCC optimizes away the code and returns true |
---|
724 | result = !!(vgetq_lane_u32(r1,0) | vgetq_lane_u32(r2,1) | vgetq_lane_u32(r3,2) | vgetq_lane_u32(r4,3)); |
---|
725 | } |
---|
726 | |
---|
727 | sigprocmask(SIG_SETMASK, (sigset_t*)&oldMask, NULL); |
---|
728 | signal(SIGILL, oldHandler); |
---|
729 | return result; |
---|
730 | # endif |
---|
731 | #else |
---|
732 | return false; |
---|
733 | #endif // CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE |
---|
734 | } |
---|
735 | |
---|
736 | #if HAVE_GCC_CONSTRUCTOR1 |
---|
737 | void __attribute__ ((constructor (CRYPTOPP_INIT_PRIORITY + 50))) DetectArmFeatures() |
---|
738 | #elif HAVE_GCC_CONSTRUCTOR0 |
---|
739 | void __attribute__ ((constructor)) DetectArmFeatures() |
---|
740 | #else |
---|
741 | void DetectArmFeatures() |
---|
742 | #endif |
---|
743 | { |
---|
744 | g_hasNEON = TryNEON(); |
---|
745 | g_hasPMULL = TryPMULL(); |
---|
746 | g_hasCRC32 = TryCRC32(); |
---|
747 | g_hasAES = TryAES(); |
---|
748 | g_hasSHA1 = TrySHA1(); |
---|
749 | g_hasSHA2 = TrySHA2(); |
---|
750 | |
---|
751 | *((volatile bool*)&g_ArmDetectionDone) = true; |
---|
752 | } |
---|
753 | |
---|
754 | #endif |
---|
755 | |
---|
756 | NAMESPACE_END |
---|
757 | |
---|
758 | #endif |
---|