39 #ifndef headerfilettmathuint_x86_64 40 #define headerfilettmathuint_x86_64 44 #ifdef TTMATH_PLATFORM64 84 uint __fastcall ttmath_addvector_x64(
const uint * ss1,
const uint * ss2,
uint ss1_size,
uint ss2_size,
uint * result);
87 uint __fastcall ttmath_subvector_x64(
const uint * ss1,
const uint * ss2,
uint ss1_size,
uint ss2_size,
uint * result);
107 template<u
int value_size>
111 static const char info[] =
"asm_vc_64";
115 static const char info[] =
"asm_gcc_64";
125 template<u
int value_size>
157 template<u
int value_size>
168 c = ttmath_adc_x64(p1,p2,b,c);
177 __asm__ __volatile__(
179 "xorq %%rdx, %%rdx \n" 183 "movq (%%rsi,%%rdx,8), %%rax \n" 184 "adcq %%rax, (%%rbx,%%rdx,8) \n" 190 "adcq %%rcx, %%rcx \n" 192 :
"=c" (c),
"=a" (dummy),
"=d" (dummy2)
193 :
"0" (b),
"1" (c),
"b" (p1),
"S" (p2)
198 TTMATH_LOGC(
"UInt::Add", c)
230 template<u
int value_size>
237 TTMATH_ASSERT( index < value_size )
240 c = ttmath_addindexed_x64(p1,b,index,value);
247 __asm__ __volatile__(
249 "subq %%rdx, %%rcx \n" 252 "addq %%rax, (%%rbx,%%rdx,8) \n" 262 "movzx %%al, %%rdx \n" 264 :
"=d" (c),
"=a" (dummy),
"=c" (dummy2)
265 :
"0" (index),
"1" (value),
"2" (b),
"b" (p1)
270 TTMATH_LOGC(
"UInt::AddInt", c)
315 template<u
int value_size>
322 TTMATH_ASSERT( index < value_size - 1 )
325 c = ttmath_addindexed2_x64(p1,b,index,x1,x2);
332 __asm__ __volatile__(
334 "subq %%rdx, %%rcx \n" 336 "addq %%rsi, (%%rbx,%%rdx,8) \n" 341 "adcq %%rax, (%%rbx,%%rdx,8) \n" 351 "movzx %%al, %%rax \n" 353 :
"=a" (c),
"=c" (dummy),
"=d" (dummy2)
354 :
"0" (x2),
"1" (b),
"2" (index),
"b" (p1),
"S" (x1)
359 TTMATH_LOGC(
"UInt::AddTwoInts", c)
386 template<u
int value_size>
389 TTMATH_ASSERT( ss1_size >= ss2_size )
394 c = ttmath_addvector_x64(ss1, ss2, ss1_size, ss2_size, result);
399 uint dummy1, dummy2, dummy3;
400 uint rest = ss1_size - ss2_size;
404 __asm__ __volatile__(
406 "xor %%rdx, %%rdx \n" 408 "mov (%%rsi,%%rdx,8), %%rax \n" 409 "adc (%%rbx,%%rdx,8), %%rax \n" 410 "mov %%rax, (%%rdi,%%rdx,8) \n" 416 "adc %%rcx, %%rcx \n" 421 "xor %%rbx, %%rbx \n" 425 "mov (%%rsi, %%rdx, 8), %%rax \n" 426 "adc %%rbx, %%rax \n" 427 "mov %%rax, (%%rdi, %%rdx, 8) \n" 433 "adc %%rcx, %%rcx \n" 436 :
"=a" (dummy1),
"=b" (dummy2),
"=c" (c),
"=d" (dummy3)
437 :
"1" (ss2),
"2" (ss2_size),
"3" (rest),
"S" (ss1),
"D" (result)
438 :
"%r8",
"cc",
"memory" );
442 TTMATH_VECTOR_LOGC(
"UInt::AddVector", c, result, ss1_size)
459 template<u
int value_size>
470 c = ttmath_sbb_x64(p1,p2,b,c);
477 __asm__ __volatile__(
479 "xorq %%rdx, %%rdx \n" 483 "movq (%%rsi,%%rdx,8), %%rax \n" 484 "sbbq %%rax, (%%rbx,%%rdx,8) \n" 490 "adcq %%rcx, %%rcx \n" 492 :
"=c" (c),
"=a" (dummy),
"=d" (dummy2)
493 :
"0" (b),
"1" (c),
"b" (p1),
"S" (p2)
498 TTMATH_LOGC(
"UInt::Sub", c)
529 template<u
int value_size>
536 TTMATH_ASSERT( index < value_size )
539 c = ttmath_subindexed_x64(p1,b,index,value);
546 __asm__ __volatile__(
548 "subq %%rdx, %%rcx \n" 551 "subq %%rax, (%%rbx,%%rdx,8) \n" 561 "movzx %%al, %%rdx \n" 563 :
"=d" (c),
"=a" (dummy),
"=c" (dummy2)
564 :
"0" (index),
"1" (value),
"2" (b),
"b" (p1)
569 TTMATH_LOGC(
"UInt::SubInt", c)
596 template<u
int value_size>
599 TTMATH_ASSERT( ss1_size >= ss2_size )
604 c = ttmath_subvector_x64(ss1, ss2, ss1_size, ss2_size, result);
613 uint dummy1, dummy2, dummy3;
614 uint rest = ss1_size - ss2_size;
616 __asm__ __volatile__(
618 "xor %%rdx, %%rdx \n" 620 "mov (%%rsi,%%rdx,8), %%rax \n" 621 "sbb (%%rbx,%%rdx,8), %%rax \n" 622 "mov %%rax, (%%rdi,%%rdx,8) \n" 628 "adc %%rcx, %%rcx \n" 633 "xor %%rbx, %%rbx \n" 637 "mov (%%rsi, %%rdx, 8), %%rax \n" 638 "sbb %%rbx, %%rax \n" 639 "mov %%rax, (%%rdi, %%rdx, 8) \n" 645 "adc %%rcx, %%rcx \n" 648 :
"=a" (dummy1),
"=b" (dummy2),
"=c" (c),
"=d" (dummy3)
649 :
"1" (ss2),
"2" (ss2_size),
"3" (rest),
"S" (ss1),
"D" (result)
650 :
"%r8",
"cc",
"memory" );
654 TTMATH_VECTOR_LOGC(
"UInt::SubVector", c, result, ss1_size)
674 template<u
int value_size>
682 c = ttmath_rcl_x64(p1,b,c);
689 __asm__ __volatile__(
691 "xorq %%rdx, %%rdx \n" 695 "rclq $1, (%%rbx, %%rdx, 8) \n" 701 "adcq %%rcx, %%rcx \n" 703 :
"=c" (c),
"=a" (dummy),
"=d" (dummy2)
704 :
"0" (b),
"1" (c),
"b" (p1)
709 TTMATH_LOGC(
"UInt::Rcl2_one", c)
729 template<u
int value_size>
737 c = ttmath_rcr_x64(p1,b,c);
744 __asm__ __volatile__(
749 "rcrq $1, -8(%%rbx, %%rcx, 8) \n" 754 "adcq %%rcx, %%rcx \n" 756 :
"=c" (c),
"=a" (dummy)
757 :
"0" (b),
"1" (c),
"b" (p1)
762 TTMATH_LOGC(
"UInt::Rcr2_one", c)
783 template<u
int value_size>
793 c = ttmath_rcl2_x64(p1,b,bits,c);
798 uint dummy, dummy2, dummy3;
800 __asm__ __volatile__(
802 "movq %%rcx, %%rsi \n" 804 "subq %%rsi, %%rcx \n" 806 "shrq %%cl, %%rdx \n" 807 "movq %%rdx, %%r8 \n" 808 "movq %%rsi, %%rcx \n" 810 "xorq %%rdx, %%rdx \n" 811 "movq %%rdx, %%rsi \n" 812 "orq %%rax, %%rax \n" 813 "cmovnz %%r8, %%rsi \n" 816 "rolq %%cl, (%%rbx,%%rdx,8) \n" 818 "movq (%%rbx,%%rdx,8), %%rax \n" 819 "andq %%r8, %%rax \n" 820 "xorq %%rax, (%%rbx,%%rdx,8) \n" 821 "orq %%rsi, (%%rbx,%%rdx,8) \n" 822 "movq %%rax, %%rsi \n" 830 :
"=a" (c),
"=D" (dummy),
"=S" (dummy2),
"=d" (dummy3)
831 :
"0" (c),
"1" (b),
"b" (p1),
"c" (bits)
832 :
"%r8",
"cc",
"memory" );
836 TTMATH_LOGC(
"UInt::Rcl2", c)
856 template<u
int value_size>
866 c = ttmath_rcr2_x64(p1,b,bits,c);
871 uint dummy, dummy2, dummy3;
873 __asm__ __volatile__(
875 "movq %%rcx, %%rsi \n" 877 "subq %%rsi, %%rcx \n" 879 "shlq %%cl, %%rdx \n" 880 "movq %%rdx, %%R8 \n" 881 "movq %%rsi, %%rcx \n" 883 "xorq %%rdx, %%rdx \n" 884 "movq %%rdx, %%rsi \n" 885 "addq %%rdi, %%rdx \n" 887 "orq %%rax, %%rax \n" 888 "cmovnz %%R8, %%rsi \n" 891 "rorq %%cl, (%%rbx,%%rdx,8) \n" 893 "movq (%%rbx,%%rdx,8), %%rax \n" 894 "andq %%R8, %%rax \n" 895 "xorq %%rax, (%%rbx,%%rdx,8) \n" 896 "orq %%rsi, (%%rbx,%%rdx,8) \n" 897 "movq %%rax, %%rsi \n" 906 :
"=a" (c),
"=D" (dummy),
"=S" (dummy2),
"=d" (dummy3)
907 :
"0" (c),
"1" (b),
"b" (p1),
"c" (bits)
908 :
"%r8",
"cc",
"memory" );
912 TTMATH_LOGC(
"UInt::Rcr2", c)
924 template<u
int value_size>
932 unsigned long nIndex = 0;
934 if( _BitScanReverse64(&nIndex,x) == 0 )
951 :
"=r" (result),
"=&r" (dummy)
968 template<u
int value_size>
976 unsigned long nIndex = 0;
978 if( _BitScanForward64(&nIndex,x) == 0 )
995 :
"=r" (result),
"=&r" (dummy)
1019 template<u
int value_size>
1029 old_bit = _bittestandset64((__int64*)&value,bit) != 0;
1037 "btsq %%rbx, %%rax \n" 1039 "movzx %%bl, %%rbx \n" 1041 :
"=a" (v),
"=b" (old_bit)
1042 :
"0" (v),
"1" (bit)
1071 template<u
int value_size>
1086 result1_ = _umul128(a,b,&result2_);
1096 :
"=a" (result1_),
"=d" (result2_)
1103 *result_low = result1_;
1104 *result_high = result2_;
1131 template<u
int value_size>
1141 TTMATH_ASSERT( c != 0 )
1146 ttmath_div_x64(&a,&b,c);
1159 :
"=a" (r_),
"=d" (rest_)
1160 :
"d" (a),
"a" (b),
"c" (c)
1173 #endif //ifdef TTMATH_PLATFORM64 1174 #endif //ifndef TTMATH_NOASM
uint SubInt(uint value, uint index=0)
uint AddInt(uint value, uint index=0)
static const char * LibTypeStr()
uint Sub(const UInt< value_size > &ss2, uint c=0)
uint Add(const UInt< value_size > &ss2, uint c=0)
static LibTypeCode LibType()
static void MulTwoWords(uint a, uint b, uint *result_high, uint *result_low)
a namespace for the TTMath library
uint AddTwoInts(uint x2, uint x1, uint index)
#define TTMATH_BITS_PER_UINT
UInt implements a big integer value without a sign.
static uint SetBitInWord(uint &value, uint bit)
static uint AddVector(const uint *ss1, const uint *ss2, uint ss1_size, uint ss2_size, uint *result)
static void DivTwoWords(uint a, uint b, uint c, uint *r, uint *rest)
static uint SubVector(const uint *ss1, const uint *ss2, uint ss1_size, uint ss2_size, uint *result)