source: trunk/src-cryptopp/cpu.h

Last change on this file was e230cb0, checked in by David Stainton <dstainton415@…>, at 2016-10-12T13:27:29Z

Add cryptopp from tag CRYPTOPP_5_6_5

  • Property mode set to 100644
File size: 19.6 KB
Line 
1// cpu.h - written and placed in the public domain by Wei Dai
2
3//! \file cpu.h
4//! \brief Functions for CPU features and intrinsics
5//! \details The functions are used in X86/X32/X64 and NEON code paths
6
7#ifndef CRYPTOPP_CPU_H
8#define CRYPTOPP_CPU_H
9
10#include "config.h"
11
12// ARM32/ARM64 Headers
13#if (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
14# if defined(__GNUC__)
15#  include <stdint.h>
16# endif
17# if CRYPTOPP_BOOL_NEON_INTRINSICS_AVAILABLE || defined(__ARM_NEON)
18#  include <arm_neon.h>
19# endif
20# if (CRYPTOPP_BOOL_ARM_CRYPTO_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_ARM_CRC32_INTRINSICS_AVAILABLE) || defined(__ARM_ACLE)
21#  include <arm_acle.h>
22# endif
23#endif  // ARM32 and ARM64 Headers
24
25// X86/X64/X32 Headers
26#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
27
28// GCC X86 super-include
29#if (CRYPTOPP_GCC_VERSION >= 40800)
30#  include <x86intrin.h>
31#endif
32#if (CRYPTOPP_MSC_VERSION >= 1400)
33#  include <intrin.h>
34#endif
35
36// Baseline include
37#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
38#  include <emmintrin.h>    // __m64, __m128i, _mm_set_epi64x
39#endif
40#if CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE
41#  include <tmmintrin.h>    // _mm_shuffle_pi8, _mm_shuffle_epi8
42#endif // tmmintrin.h
43#if CRYPTOPP_BOOL_SSE4_INTRINSICS_AVAILABLE
44#  include <smmintrin.h>    // _mm_blend_epi16
45#  include <nmmintrin.h>    // _mm_crc32_u{8|16|32}
46#endif // smmintrin.h
47#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
48#  include <wmmintrin.h>    // aesenc, aesdec, etc
49#endif // wmmintrin.h
50#if CRYPTOPP_BOOL_AVX_INTRINSICS_AVAILABLE
51#  include <immintrin.h>    // RDRAND, RDSEED and AVX
52#endif
53#if CRYPTOPP_BOOL_AVX2_INTRINSICS_AVAILABLE
54#  include <zmmintrin.h>    // AVX 512-bit extensions
55#endif
56#endif  // X86/X64/X32 Headers
57
58// Applies to both X86/X32/X64 and ARM32/ARM64. And we've got MIPS devices on the way.
59#if defined(_MSC_VER) || defined(__BORLANDC__)
60# define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
61#else
62# define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
63#endif
64
65// Applies to both X86/X32/X64 and ARM32/ARM64
66#if defined(CRYPTOPP_LLVM_CLANG_VERSION) || defined(CRYPTOPP_APPLE_CLANG_VERSION) || defined(CRYPTOPP_CLANG_INTEGRATED_ASSEMBLER)
67        #define NEW_LINE "\n"
68        #define INTEL_PREFIX ".intel_syntax;"
69        #define INTEL_NOPREFIX ".intel_syntax;"
70        #define ATT_PREFIX ".att_syntax;"
71        #define ATT_NOPREFIX ".att_syntax;"
72#elif defined(__GNUC__)
73        #define NEW_LINE
74        #define INTEL_PREFIX ".intel_syntax prefix;"
75        #define INTEL_NOPREFIX ".intel_syntax noprefix;"
76        #define ATT_PREFIX ".att_syntax prefix;"
77        #define ATT_NOPREFIX ".att_syntax noprefix;"
78#else
79        #define NEW_LINE
80        #define INTEL_PREFIX
81        #define INTEL_NOPREFIX
82        #define ATT_PREFIX
83        #define ATT_NOPREFIX
84#endif
85
86#ifdef CRYPTOPP_GENERATE_X64_MASM
87
88#define CRYPTOPP_X86_ASM_AVAILABLE
89#define CRYPTOPP_BOOL_X64 1
90#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
91#define NAMESPACE_END
92
93#else
94
95NAMESPACE_BEGIN(CryptoPP)
96
97#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64 || CRYPTOPP_DOXYGEN_PROCESSING
98
99#define CRYPTOPP_CPUID_AVAILABLE
100
101// Hide from Doxygen
102#ifndef CRYPTOPP_DOXYGEN_PROCESSING
103// These should not be used directly
104extern CRYPTOPP_DLL bool g_x86DetectionDone;
105extern CRYPTOPP_DLL bool g_hasMMX;
106extern CRYPTOPP_DLL bool g_hasISSE;
107extern CRYPTOPP_DLL bool g_hasSSE2;
108extern CRYPTOPP_DLL bool g_hasSSSE3;
109extern CRYPTOPP_DLL bool g_hasSSE4;
110extern CRYPTOPP_DLL bool g_hasAESNI;
111extern CRYPTOPP_DLL bool g_hasCLMUL;
112extern CRYPTOPP_DLL bool g_isP4;
113extern CRYPTOPP_DLL bool g_hasRDRAND;
114extern CRYPTOPP_DLL bool g_hasRDSEED;
115extern CRYPTOPP_DLL bool g_hasPadlockRNG;
116extern CRYPTOPP_DLL bool g_hasPadlockACE;
117extern CRYPTOPP_DLL bool g_hasPadlockACE2;
118extern CRYPTOPP_DLL bool g_hasPadlockPHE;
119extern CRYPTOPP_DLL bool g_hasPadlockPMM;
120extern CRYPTOPP_DLL word32 g_cacheLineSize;
121
122CRYPTOPP_DLL void CRYPTOPP_API DetectX86Features();
123CRYPTOPP_DLL bool CRYPTOPP_API CpuId(word32 input, word32 output[4]);
124#endif // CRYPTOPP_DOXYGEN_PROCESSING
125
126//! \brief Determines MMX availability
127//! \returns true if MMX is determined to be available, false otherwise
128//! \details MMX, SSE and SSE2 are core processor features for x86_64, and
129//!   the function always returns true for the platform.
130inline bool HasMMX()
131{
132#if CRYPTOPP_BOOL_X64
133        return true;
134#else
135        if (!g_x86DetectionDone)
136                DetectX86Features();
137        return g_hasMMX;
138#endif
139}
140
141//! \brief Determines SSE availability
142//! \returns true if SSE is determined to be available, false otherwise
143//! \details MMX, SSE and SSE2 are core processor features for x86_64, and
144//!   the function always returns true for the platform.
145inline bool HasISSE()
146{
147#if CRYPTOPP_BOOL_X64
148        return true;
149#else
150        if (!g_x86DetectionDone)
151                DetectX86Features();
152        return g_hasISSE;
153#endif
154}
155
156//! \brief Determines SSE2 availability
157//! \returns true if SSE2 is determined to be available, false otherwise
158//! \details MMX, SSE and SSE2 are core processor features for x86_64, and
159//!   the function always returns true for the platform.
160inline bool HasSSE2()
161{
162#if CRYPTOPP_BOOL_X64
163        return true;
164#else
165        if (!g_x86DetectionDone)
166                DetectX86Features();
167        return g_hasSSE2;
168#endif
169}
170
171//! \brief Determines SSSE3 availability
172//! \returns true if SSSE3 is determined to be available, false otherwise
173//! \details HasSSSE3() is a runtime check performed using CPUID
174//! \note Some Clang compilers incorrectly omit SSSE3 even though its native to the processor.
175inline bool HasSSSE3()
176{
177        if (!g_x86DetectionDone)
178                DetectX86Features();
179        return g_hasSSSE3;
180}
181
182//! \brief Determines SSE4 availability
183//! \returns true if SSE4.1 and SSE4.2 are determined to be available, false otherwise
184//! \details HasSSE4() is a runtime check performed using CPUID which requires both SSE4.1 and SSE4.2
185inline bool HasSSE4()
186{
187        if (!g_x86DetectionDone)
188                DetectX86Features();
189        return g_hasSSE4;
190}
191
192//! \brief Determines AES-NI availability
193//! \returns true if AES-NI is determined to be available, false otherwise
194//! \details HasAESNI() is a runtime check performed using CPUID
195inline bool HasAESNI()
196{
197        if (!g_x86DetectionDone)
198                DetectX86Features();
199        return g_hasAESNI;
200}
201
202//! \brief Determines Carryless Multiply availability
203//! \returns true if pclmulqdq is determined to be available, false otherwise
204//! \details HasCLMUL() is a runtime check performed using CPUID
205inline bool HasCLMUL()
206{
207        if (!g_x86DetectionDone)
208                DetectX86Features();
209        return g_hasCLMUL;
210}
211
212//! \brief Determines if the CPU is an Intel P4
213//! \returns true if the CPU is a P4, false otherwise
214//! \details IsP4() is a runtime check performed using CPUID
215inline bool IsP4()
216{
217        if (!g_x86DetectionDone)
218                DetectX86Features();
219        return g_isP4;
220}
221
222//! \brief Determines RDRAND availability
223//! \returns true if RDRAND is determined to be available, false otherwise
224//! \details HasRDRAND() is a runtime check performed using CPUID
225inline bool HasRDRAND()
226{
227        if (!g_x86DetectionDone)
228                DetectX86Features();
229        return g_hasRDRAND;
230}
231
232//! \brief Determines RDSEED availability
233//! \returns true if RDSEED is determined to be available, false otherwise
234//! \details HasRDSEED() is a runtime check performed using CPUID
235inline bool HasRDSEED()
236{
237        if (!g_x86DetectionDone)
238                DetectX86Features();
239        return g_hasRDSEED;
240}
241
242//! \brief Determines Padlock RNG availability
243//! \returns true if VIA Padlock RNG is determined to be available, false otherwise
244//! \details HasPadlockRNG() is a runtime check performed using CPUID
245inline bool HasPadlockRNG()
246{
247        if (!g_x86DetectionDone)
248                DetectX86Features();
249        return g_hasPadlockRNG;
250}
251
252//! \brief Determines Padlock ACE availability
253//! \returns true if VIA Padlock ACE is determined to be available, false otherwise
254//! \details HasPadlockACE() is a runtime check performed using CPUID
255inline bool HasPadlockACE()
256{
257        if (!g_x86DetectionDone)
258                DetectX86Features();
259        return g_hasPadlockACE;
260}
261
262//! \brief Determines Padlock ACE2 availability
263//! \returns true if VIA Padlock ACE2 is determined to be available, false otherwise
264//! \details HasPadlockACE2() is a runtime check performed using CPUID
265inline bool HasPadlockACE2()
266{
267        if (!g_x86DetectionDone)
268                DetectX86Features();
269        return g_hasPadlockACE2;
270}
271
272//! \brief Determines Padlock PHE availability
273//! \returns true if VIA Padlock PHE is determined to be available, false otherwise
274//! \details HasPadlockPHE() is a runtime check performed using CPUID
275inline bool HasPadlockPHE()
276{
277        if (!g_x86DetectionDone)
278                DetectX86Features();
279        return g_hasPadlockPHE;
280}
281
282//! \brief Determines Padlock PMM availability
283//! \returns true if VIA Padlock PMM is determined to be available, false otherwise
284//! \details HasPadlockPMM() is a runtime check performed using CPUID
285inline bool HasPadlockPMM()
286{
287        if (!g_x86DetectionDone)
288                DetectX86Features();
289        return g_hasPadlockPMM;
290}
291
292//! \brief Provides the cache line size
293//! \returns lower bound on the size of a cache line in bytes, if available
294//! \details GetCacheLineSize() returns the lower bound on the size of a cache line, if it
295//!   is available. If the value is not available at runtime, then 32 is returned for a 32-bit
296//!   processor and 64 is returned for a 64-bit processor.
297//! \details x86/x32/x64 uses CPUID to determine the value and its usually accurate. The ARM
298//!   processor equivalent is a privileged instruction, so a compile time value is returned.
299inline int GetCacheLineSize()
300{
301        if (!g_x86DetectionDone)
302                DetectX86Features();
303        return g_cacheLineSize;
304}
305
306#elif (CRYPTOPP_BOOL_ARM32 || CRYPTOPP_BOOL_ARM64)
307
308extern bool g_ArmDetectionDone;
309extern bool g_hasNEON, g_hasPMULL, g_hasCRC32, g_hasAES, g_hasSHA1, g_hasSHA2;
310void CRYPTOPP_API DetectArmFeatures();
311
312//! \brief Determine if an ARM processor has Advanced SIMD available
313//! \returns true if the hardware is capable of Advanced SIMD at runtime, false otherwise.
314//! \details Advanced SIMD instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
315//! \details Runtime support requires compile time support. When compiling with GCC, you may
316//!   need to compile with <tt>-mfpu=neon</tt> (32-bit) or <tt>-march=armv8-a</tt>
317//!   (64-bit). Also see ARM's <tt>__ARM_NEON</tt> preprocessor macro.
318inline bool HasNEON()
319{
320        if (!g_ArmDetectionDone)
321                DetectArmFeatures();
322        return g_hasNEON;
323}
324
325//! \brief Determine if an ARM processor provides Polynomial Multiplication (long)
326//! \returns true if the hardware is capable of polynomial multiplications at runtime, false otherwise.
327//! \details The multiplication instructions are available under Aarch64 (ARM-64) and Aarch32 (ARM-32).
328//! \details Runtime support requires compile time support. When compiling with GCC, you may
329//!   need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
330//!   <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
331inline bool HasPMULL()
332{
333        if (!g_ArmDetectionDone)
334                DetectArmFeatures();
335        return g_hasPMULL;
336}
337
338//! \brief Determine if an ARM processor has CRC32 available
339//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
340//! \details CRC32 instructions provide access to the processor's CRC32 and CRC32-C intructions.
341//!   They are provided by ARM C Language Extensions 2.0 (ACLE 2.0) and available under Aarch64
342//!   (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an AArch32 execution environment).
343//! \details Runtime support requires compile time support. When compiling with GCC, you may
344//!   need to compile with <tt>-march=armv8-a+crc</tt>; while Apple requires
345//!   <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRC32</tt> preprocessor macro.
346inline bool HasCRC32()
347{
348        if (!g_ArmDetectionDone)
349                DetectArmFeatures();
350        return g_hasCRC32;
351}
352
353//! \brief Determine if an ARM processor has AES available
354//! \returns true if the hardware is capable of AES at runtime, false otherwise.
355//! \details AES is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0)
356//!   and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an
357//!   AArch32 execution environment).
358//! \details Runtime support requires compile time support. When compiling with GCC, you may
359//!   need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
360//!   <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
361inline bool HasAES()
362{
363        if (!g_ArmDetectionDone)
364                DetectArmFeatures();
365        return g_hasAES;
366}
367
368//! \brief Determine if an ARM processor has SHA1 available
369//! \returns true if the hardware is capable of SHA1 at runtime, false otherwise.
370//! \details SHA1 is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0)
371//!   and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an
372//!   AArch32 execution environment).
373//! \details Runtime support requires compile time support. When compiling with GCC, you may
374//!   need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
375//!   <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
376inline bool HasSHA1()
377{
378        if (!g_ArmDetectionDone)
379                DetectArmFeatures();
380        return g_hasSHA1;
381}
382
383//! \brief Determine if an ARM processor has SHA2 available
384//! \returns true if the hardware is capable of SHA2 at runtime, false otherwise.
385//! \details SHA2 is part of the Crypto extensions from ARM C Language Extensions 2.0 (ACLE 2.0)
386//!   and available under Aarch64 (ARM-64) and Aarch32 (ARM-32) running on Aarch64 (i.e., an
387//!   AArch32 execution environment).
388//! \details Runtime support requires compile time support. When compiling with GCC, you may
389//!   need to compile with <tt>-march=armv8-a+crypto</tt>; while Apple requires
390//!   <tt>-arch arm64</tt>. Also see ARM's <tt>__ARM_FEATURE_CRYPTO</tt> preprocessor macro.
391inline bool HasSHA2()
392{
393        if (!g_ArmDetectionDone)
394                DetectArmFeatures();
395        return g_hasSHA2;
396}
397
398//! \brief Provides the cache line size at runtime
399//! \returns true if the hardware is capable of CRC32 at runtime, false otherwise.
400//! \details GetCacheLineSize() provides is an estimate using CRYPTOPP_L1_CACHE_LINE_SIZE.
401//!   The runtime instructions to query the processor are privileged.
402inline int GetCacheLineSize()
403{
404        return CRYPTOPP_L1_CACHE_LINE_SIZE;
405}
406
407#else
408
409inline int GetCacheLineSize()
410{
411        return CRYPTOPP_L1_CACHE_LINE_SIZE;
412}
413
414#endif  // X86/X32/X64 and ARM
415
416#endif
417
418#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64
419
420#ifdef CRYPTOPP_GENERATE_X64_MASM
421        #define AS1(x) x*newline*
422        #define AS2(x, y) x, y*newline*
423        #define AS3(x, y, z) x, y, z*newline*
424        #define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
425        #define ASL(x) label##x:*newline*
426        #define ASJ(x, y, z) x label##y*newline*
427        #define ASC(x, y) x label##y*newline*
428        #define AS_HEX(y) 0##y##h
429#elif defined(_MSC_VER) || defined(__BORLANDC__)
430        #define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
431        #define AS1(x) __asm {x}
432        #define AS2(x, y) __asm {x, y}
433        #define AS3(x, y, z) __asm {x, y, z}
434        #define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
435        #define ASL(x) __asm {label##x:}
436        #define ASJ(x, y, z) __asm {x label##y}
437        #define ASC(x, y) __asm {x label##y}
438        #define CRYPTOPP_NAKED __declspec(naked)
439        #define AS_HEX(y) 0x##y
440#else
441        #define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
442
443        // define these in two steps to allow arguments to be expanded
444        #define GNU_AS1(x) #x ";" NEW_LINE
445        #define GNU_AS2(x, y) #x ", " #y ";" NEW_LINE
446        #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";" NEW_LINE
447        #define GNU_ASL(x) "\n" #x ":" NEW_LINE
448        #define GNU_ASJ(x, y, z) #x " " #y #z ";" NEW_LINE
449        #define AS1(x) GNU_AS1(x)
450        #define AS2(x, y) GNU_AS2(x, y)
451        #define AS3(x, y, z) GNU_AS3(x, y, z)
452        #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
453        #define ASL(x) GNU_ASL(x)
454        #define ASJ(x, y, z) GNU_ASJ(x, y, z)
455        #define ASC(x, y) #x " " #y ";"
456        #define CRYPTOPP_NAKED
457        #define AS_HEX(y) 0x##y
458#endif
459
460#define IF0(y)
461#define IF1(y) y
462
463#ifdef CRYPTOPP_GENERATE_X64_MASM
464#define ASM_MOD(x, y) ((x) MOD (y))
465#define XMMWORD_PTR XMMWORD PTR
466#else
467// GNU assembler doesn't seem to have mod operator
468#define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
469// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
470#define XMMWORD_PTR
471#endif
472
473#if CRYPTOPP_BOOL_X86
474        #define AS_REG_1 ecx
475        #define AS_REG_2 edx
476        #define AS_REG_3 esi
477        #define AS_REG_4 edi
478        #define AS_REG_5 eax
479        #define AS_REG_6 ebx
480        #define AS_REG_7 ebp
481        #define AS_REG_1d ecx
482        #define AS_REG_2d edx
483        #define AS_REG_3d esi
484        #define AS_REG_4d edi
485        #define AS_REG_5d eax
486        #define AS_REG_6d ebx
487        #define AS_REG_7d ebp
488        #define WORD_SZ 4
489        #define WORD_REG(x)     e##x
490        #define WORD_PTR DWORD PTR
491        #define AS_PUSH_IF86(x) AS1(push e##x)
492        #define AS_POP_IF86(x) AS1(pop e##x)
493        #define AS_JCXZ jecxz
494#elif CRYPTOPP_BOOL_X32
495        #define AS_REG_1 ecx
496        #define AS_REG_2 edx
497        #define AS_REG_3 r8d
498        #define AS_REG_4 r9d
499        #define AS_REG_5 eax
500        #define AS_REG_6 r10d
501        #define AS_REG_7 r11d
502        #define AS_REG_1d ecx
503        #define AS_REG_2d edx
504        #define AS_REG_3d r8d
505        #define AS_REG_4d r9d
506        #define AS_REG_5d eax
507        #define AS_REG_6d r10d
508        #define AS_REG_7d r11d
509        #define WORD_SZ 4
510        #define WORD_REG(x)     e##x
511        #define WORD_PTR DWORD PTR
512        #define AS_PUSH_IF86(x) AS1(push r##x)
513        #define AS_POP_IF86(x) AS1(pop r##x)
514        #define AS_JCXZ jecxz
515#elif CRYPTOPP_BOOL_X64
516        #ifdef CRYPTOPP_GENERATE_X64_MASM
517                #define AS_REG_1 rcx
518                #define AS_REG_2 rdx
519                #define AS_REG_3 r8
520                #define AS_REG_4 r9
521                #define AS_REG_5 rax
522                #define AS_REG_6 r10
523                #define AS_REG_7 r11
524                #define AS_REG_1d ecx
525                #define AS_REG_2d edx
526                #define AS_REG_3d r8d
527                #define AS_REG_4d r9d
528                #define AS_REG_5d eax
529                #define AS_REG_6d r10d
530                #define AS_REG_7d r11d
531        #else
532                #define AS_REG_1 rdi
533                #define AS_REG_2 rsi
534                #define AS_REG_3 rdx
535                #define AS_REG_4 rcx
536                #define AS_REG_5 r8
537                #define AS_REG_6 r9
538                #define AS_REG_7 r10
539                #define AS_REG_1d edi
540                #define AS_REG_2d esi
541                #define AS_REG_3d edx
542                #define AS_REG_4d ecx
543                #define AS_REG_5d r8d
544                #define AS_REG_6d r9d
545                #define AS_REG_7d r10d
546        #endif
547        #define WORD_SZ 8
548        #define WORD_REG(x)     r##x
549        #define WORD_PTR QWORD PTR
550        #define AS_PUSH_IF86(x)
551        #define AS_POP_IF86(x)
552        #define AS_JCXZ jrcxz
553#endif
554
555// helper macro for stream cipher output
556#define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
557        AS2(    test    inputPtr, inputPtr)\
558        ASC(    jz,             labelPrefix##3)\
559        AS2(    test    inputPtr, 15)\
560        ASC(    jnz,    labelPrefix##7)\
561        AS2(    pxor    xmm##x0, [inputPtr+p0*16])\
562        AS2(    pxor    xmm##x1, [inputPtr+p1*16])\
563        AS2(    pxor    xmm##x2, [inputPtr+p2*16])\
564        AS2(    pxor    xmm##x3, [inputPtr+p3*16])\
565        AS2(    add             inputPtr, increment*16)\
566        ASC(    jmp,    labelPrefix##3)\
567        ASL(labelPrefix##7)\
568        AS2(    movdqu  xmm##t, [inputPtr+p0*16])\
569        AS2(    pxor    xmm##x0, xmm##t)\
570        AS2(    movdqu  xmm##t, [inputPtr+p1*16])\
571        AS2(    pxor    xmm##x1, xmm##t)\
572        AS2(    movdqu  xmm##t, [inputPtr+p2*16])\
573        AS2(    pxor    xmm##x2, xmm##t)\
574        AS2(    movdqu  xmm##t, [inputPtr+p3*16])\
575        AS2(    pxor    xmm##x3, xmm##t)\
576        AS2(    add             inputPtr, increment*16)\
577        ASL(labelPrefix##3)\
578        AS2(    test    outputPtr, 15)\
579        ASC(    jnz,    labelPrefix##8)\
580        AS2(    movdqa  [outputPtr+p0*16], xmm##x0)\
581        AS2(    movdqa  [outputPtr+p1*16], xmm##x1)\
582        AS2(    movdqa  [outputPtr+p2*16], xmm##x2)\
583        AS2(    movdqa  [outputPtr+p3*16], xmm##x3)\
584        ASC(    jmp,    labelPrefix##9)\
585        ASL(labelPrefix##8)\
586        AS2(    movdqu  [outputPtr+p0*16], xmm##x0)\
587        AS2(    movdqu  [outputPtr+p1*16], xmm##x1)\
588        AS2(    movdqu  [outputPtr+p2*16], xmm##x2)\
589        AS2(    movdqu  [outputPtr+p3*16], xmm##x3)\
590        ASL(labelPrefix##9)\
591        AS2(    add             outputPtr, increment*16)
592
593#endif  //  X86/X32/X64
594
595NAMESPACE_END
596
597#endif  // CRYPTOPP_CPU_H
Note: See TracBrowser for help on using the repository browser.