TTMath  0.9.4
 C++ bignum library
ttmathuint_x86_64.h
Go to the documentation of this file.
1 /*
2  * This file is a part of TTMath Bignum Library
3  * and is distributed under the 3-Clause BSD Licence.
4  * Author: Tomasz Sowa <t.sowa@ttmath.org>
5  */
6 
7 /*
8  * Copyright (c) 2006-2010, Tomasz Sowa
9  * All rights reserved.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions are met:
13  *
14  * * Redistributions of source code must retain the above copyright notice,
15  * this list of conditions and the following disclaimer.
16  *
17  * * Redistributions in binary form must reproduce the above copyright
18  * notice, this list of conditions and the following disclaimer in the
19  * documentation and/or other materials provided with the distribution.
20  *
21  * * Neither the name Tomasz Sowa nor the names of contributors to this
22  * project may be used to endorse or promote products derived
23  * from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
35  * THE POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 
39 #ifndef headerfilettmathuint_x86_64
40 #define headerfilettmathuint_x86_64
41 
42 
43 #ifndef TTMATH_NOASM
44 #ifdef TTMATH_PLATFORM64
45 
46 
47 /*!
48  \file ttmathuint_x86_64.h
49  \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
50 
51  this file is included at the end of ttmathuint.h
52 */
53 
54 
55 /*!
56  \file ttmathuint_x86_64_msvc.asm
57  \brief some asm routines for x86_64 when using Microsoft compiler
58 
59  this file should be first compiled:
60  - compile with debug info: ml64.exe /c /Zd /Zi ttmathuint_x86_64_msvc.asm
61  - compile without debug info: ml64.exe /c ttmathuint_x86_64_msvc.asm
62 
63  this creates ttmathuint_x86_64_msvc.obj file which can be linked with your program
64 
65  (you can use win64_assemble.bat file from ttmath subdirectory)
66 */
67 
68 
69 #ifndef __GNUC__
70 #include <intrin.h>
71 #endif
72 
73 
74 namespace ttmath
75 {
76 
77  #ifndef __GNUC__
78 
79  extern "C"
80  {
81  uint __fastcall ttmath_adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
82  uint __fastcall ttmath_addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
83  uint __fastcall ttmath_addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
84  uint __fastcall ttmath_addvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
85  uint __fastcall ttmath_sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
86  uint __fastcall ttmath_subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
87  uint __fastcall ttmath_subvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
88  uint __fastcall ttmath_rcl_x64(uint* p1, uint nSize, uint nLowestBit);
89  uint __fastcall ttmath_rcr_x64(uint* p1, uint nSize, uint nLowestBit);
90  uint __fastcall ttmath_div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
91  uint __fastcall ttmath_rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
92  uint __fastcall ttmath_rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
93  };
94  #endif
95 
96 
97  /*!
98  returning the string represents the currect type of the library
99  we have following types:
100  asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
101  asm_gcc_32 - with asm code designed for GCC (32 bits)
102  asm_vc_64 - with asm for VC (64 bit)
103  asm_gcc_64 - with asm for GCC (64 bit)
104  no_asm_32 - pure C++ version (32 bit) - without any asm code
105  no_asm_64 - pure C++ version (64 bit) - without any asm code
106  */
107  template<uint value_size>
109  {
110  #ifndef __GNUC__
111  static const char info[] = "asm_vc_64";
112  #endif
113 
114  #ifdef __GNUC__
115  static const char info[] = "asm_gcc_64";
116  #endif
117 
118  return info;
119  }
120 
121 
122  /*!
123  returning the currect type of the library
124  */
125  template<uint value_size>
127  {
128  #ifndef __GNUC__
129  LibTypeCode info = asm_vc_64;
130  #endif
131 
132  #ifdef __GNUC__
133  LibTypeCode info = asm_gcc_64;
134  #endif
135 
136  return info;
137  }
138 
139 
140  /*!
141  *
142  * basic mathematic functions
143  *
144  */
145 
146 
147 
148  /*!
149  this method adding ss2 to the this and adding carry if it's defined
150  (this = this + ss2 + c)
151 
152  ***this method is created only on a 64bit platform***
153 
154  c must be zero or one (might be a bigger value than 1)
155  function returns carry (1) (if it was)
156  */
157  template<uint value_size>
159  {
160  uint b = value_size;
161  uint * p1 = table;
162  const uint * p2 = ss2.table;
163 
164  // we don't have to use TTMATH_REFERENCE_ASSERT here
165  // this algorithm doesn't require it
166 
167  #ifndef __GNUC__
168  c = ttmath_adc_x64(p1,p2,b,c);
169  #endif
170 
171  #ifdef __GNUC__
172  uint dummy, dummy2;
173 
174  /*
175  this part should be compiled with gcc
176  */
177  __asm__ __volatile__(
178 
179  "xorq %%rdx, %%rdx \n"
180  "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
181 
182  "1: \n"
183  "movq (%%rsi,%%rdx,8), %%rax \n"
184  "adcq %%rax, (%%rbx,%%rdx,8) \n"
185 
186  "incq %%rdx \n"
187  "decq %%rcx \n"
188  "jnz 1b \n"
189 
190  "adcq %%rcx, %%rcx \n"
191 
192  : "=c" (c), "=a" (dummy), "=d" (dummy2)
193  : "0" (b), "1" (c), "b" (p1), "S" (p2)
194  : "cc", "memory" );
195 
196  #endif
197 
198  TTMATH_LOGC("UInt::Add", c)
199 
200  return c;
201  }
202 
203 
204 
205  /*!
206  this method adds one word (at a specific position)
207  and returns a carry (if it was)
208 
209  ***this method is created only on a 64bit platform***
210 
211 
212  if we've got (value_size=3):
213 
214  table[0] = 10;
215  table[1] = 30;
216  table[2] = 5;
217 
218  and we call:
219 
220  AddInt(2,1)
221 
222  then it'll be:
223 
224  table[0] = 10;
225  table[1] = 30 + 2;
226  table[2] = 5;
227 
228  of course if there was a carry from table[2] it would be returned
229  */
230  template<uint value_size>
232  {
233  uint b = value_size;
234  uint * p1 = table;
235  uint c;
236 
237  TTMATH_ASSERT( index < value_size )
238 
239  #ifndef __GNUC__
240  c = ttmath_addindexed_x64(p1,b,index,value);
241  #endif
242 
243 
244  #ifdef __GNUC__
245  uint dummy, dummy2;
246 
247  __asm__ __volatile__(
248 
249  "subq %%rdx, %%rcx \n"
250 
251  "1: \n"
252  "addq %%rax, (%%rbx,%%rdx,8) \n"
253  "jnc 2f \n"
254 
255  "movq $1, %%rax \n"
256  "incq %%rdx \n"
257  "decq %%rcx \n"
258  "jnz 1b \n"
259 
260  "2: \n"
261  "setc %%al \n"
262  "movzx %%al, %%rdx \n"
263 
264  : "=d" (c), "=a" (dummy), "=c" (dummy2)
265  : "0" (index), "1" (value), "2" (b), "b" (p1)
266  : "cc", "memory" );
267 
268  #endif
269 
270  TTMATH_LOGC("UInt::AddInt", c)
271 
272  return c;
273  }
274 
275 
276 
277  /*!
278  this method adds only two unsigned words to the existing value
279  and these words begin on the 'index' position
280  (it's used in the multiplication algorithm 2)
281 
282  ***this method is created only on a 64bit platform***
283 
284  index should be equal or smaller than value_size-2 (index <= value_size-2)
285  x1 - lower word, x2 - higher word
286 
287  for example if we've got value_size equal 4 and:
288 
289  table[0] = 3
290  table[1] = 4
291  table[2] = 5
292  table[3] = 6
293 
294  then let
295 
296  x1 = 10
297  x2 = 20
298 
299  and
300 
301  index = 1
302 
303  the result of this method will be:
304 
305  table[0] = 3
306  table[1] = 4 + x1 = 14
307  table[2] = 5 + x2 = 25
308  table[3] = 6
309 
310  and no carry at the end of table[3]
311 
312  (of course if there was a carry in table[2](5+20) then
313  this carry would be passed to the table[3] etc.)
314  */
315  template<uint value_size>
317  {
318  uint b = value_size;
319  uint * p1 = table;
320  uint c;
321 
322  TTMATH_ASSERT( index < value_size - 1 )
323 
324  #ifndef __GNUC__
325  c = ttmath_addindexed2_x64(p1,b,index,x1,x2);
326  #endif
327 
328 
329  #ifdef __GNUC__
330  uint dummy, dummy2;
331 
332  __asm__ __volatile__(
333 
334  "subq %%rdx, %%rcx \n"
335 
336  "addq %%rsi, (%%rbx,%%rdx,8) \n"
337  "incq %%rdx \n"
338  "decq %%rcx \n"
339 
340  "1: \n"
341  "adcq %%rax, (%%rbx,%%rdx,8) \n"
342  "jnc 2f \n"
343 
344  "mov $0, %%rax \n"
345  "incq %%rdx \n"
346  "decq %%rcx \n"
347  "jnz 1b \n"
348 
349  "2: \n"
350  "setc %%al \n"
351  "movzx %%al, %%rax \n"
352 
353  : "=a" (c), "=c" (dummy), "=d" (dummy2)
354  : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
355  : "cc", "memory" );
356 
357  #endif
358 
359  TTMATH_LOGC("UInt::AddTwoInts", c)
360 
361  return c;
362  }
363 
364 
365 
366  /*!
367  this static method addes one vector to the other
368  'ss1' is larger in size or equal to 'ss2'
369 
370  - ss1 points to the first (larger) vector
371  - ss2 points to the second vector
372  - ss1_size - size of the ss1 (and size of the result too)
373  - ss2_size - size of the ss2
374  - result - is the result vector (which has size the same as ss1: ss1_size)
375 
376  Example: ss1_size is 5, ss2_size is 3
377  ss1: ss2: result (output):
378  5 1 5+1
379  4 3 4+3
380  2 7 2+7
381  6 6
382  9 9
383  of course the carry is propagated and will be returned from the last item
384  (this method is used by the Karatsuba multiplication algorithm)
385  */
386  template<uint value_size>
387  uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
388  {
389  TTMATH_ASSERT( ss1_size >= ss2_size )
390 
391  uint c;
392 
393  #ifndef __GNUC__
394  c = ttmath_addvector_x64(ss1, ss2, ss1_size, ss2_size, result);
395  #endif
396 
397 
398  #ifdef __GNUC__
399  uint dummy1, dummy2, dummy3;
400  uint rest = ss1_size - ss2_size;
401 
402  // this part should be compiled with gcc
403 
404  __asm__ __volatile__(
405  "mov %%rdx, %%r8 \n"
406  "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
407  "1: \n"
408  "mov (%%rsi,%%rdx,8), %%rax \n"
409  "adc (%%rbx,%%rdx,8), %%rax \n"
410  "mov %%rax, (%%rdi,%%rdx,8) \n"
411 
412  "inc %%rdx \n"
413  "dec %%rcx \n"
414  "jnz 1b \n"
415 
416  "adc %%rcx, %%rcx \n" // rcx has the cf state
417 
418  "or %%r8, %%r8 \n"
419  "jz 3f \n"
420 
421  "xor %%rbx, %%rbx \n" // ebx = 0
422  "neg %%rcx \n" // setting cf from rcx
423  "mov %%r8, %%rcx \n" // rcx=rest and is != 0
424  "2: \n"
425  "mov (%%rsi, %%rdx, 8), %%rax \n"
426  "adc %%rbx, %%rax \n"
427  "mov %%rax, (%%rdi, %%rdx, 8) \n"
428 
429  "inc %%rdx \n"
430  "dec %%rcx \n"
431  "jnz 2b \n"
432 
433  "adc %%rcx, %%rcx \n"
434  "3: \n"
435 
436  : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
437  : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
438  : "%r8", "cc", "memory" );
439 
440  #endif
441 
442  TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
443 
444  return c;
445  }
446 
447 
448 
449  /*!
450  this method's subtracting ss2 from the 'this' and subtracting
451  carry if it has been defined
452  (this = this - ss2 - c)
453 
454  ***this method is created only on a 64bit platform***
455 
456  c must be zero or one (might be a bigger value than 1)
457  function returns carry (1) (if it was)
458  */
459  template<uint value_size>
461  {
462  uint b = value_size;
463  uint * p1 = table;
464  const uint * p2 = ss2.table;
465 
466  // we don't have to use TTMATH_REFERENCE_ASSERT here
467  // this algorithm doesn't require it
468 
469  #ifndef __GNUC__
470  c = ttmath_sbb_x64(p1,p2,b,c);
471  #endif
472 
473 
474  #ifdef __GNUC__
475  uint dummy, dummy2;
476 
477  __asm__ __volatile__(
478 
479  "xorq %%rdx, %%rdx \n"
480  "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
481 
482  "1: \n"
483  "movq (%%rsi,%%rdx,8), %%rax \n"
484  "sbbq %%rax, (%%rbx,%%rdx,8) \n"
485 
486  "incq %%rdx \n"
487  "decq %%rcx \n"
488  "jnz 1b \n"
489 
490  "adcq %%rcx, %%rcx \n"
491 
492  : "=c" (c), "=a" (dummy), "=d" (dummy2)
493  : "0" (b), "1" (c), "b" (p1), "S" (p2)
494  : "cc", "memory" );
495 
496  #endif
497 
498  TTMATH_LOGC("UInt::Sub", c)
499 
500  return c;
501  }
502 
503 
504 
505  /*!
506  this method subtracts one word (at a specific position)
507  and returns a carry (if it was)
508 
509  ***this method is created only on a 64bit platform***
510 
511  if we've got (value_size=3):
512 
513  table[0] = 10;
514  table[1] = 30;
515  table[2] = 5;
516 
517  and we call:
518 
519  SubInt(2,1)
520 
521  then it'll be:
522 
523  table[0] = 10;
524  table[1] = 30 - 2;
525  table[2] = 5;
526 
527  of course if there was a carry from table[2] it would be returned
528  */
529  template<uint value_size>
531  {
532  uint b = value_size;
533  uint * p1 = table;
534  uint c;
535 
536  TTMATH_ASSERT( index < value_size )
537 
538  #ifndef __GNUC__
539  c = ttmath_subindexed_x64(p1,b,index,value);
540  #endif
541 
542 
543  #ifdef __GNUC__
544  uint dummy, dummy2;
545 
546  __asm__ __volatile__(
547 
548  "subq %%rdx, %%rcx \n"
549 
550  "1: \n"
551  "subq %%rax, (%%rbx,%%rdx,8) \n"
552  "jnc 2f \n"
553 
554  "movq $1, %%rax \n"
555  "incq %%rdx \n"
556  "decq %%rcx \n"
557  "jnz 1b \n"
558 
559  "2: \n"
560  "setc %%al \n"
561  "movzx %%al, %%rdx \n"
562 
563  : "=d" (c), "=a" (dummy), "=c" (dummy2)
564  : "0" (index), "1" (value), "2" (b), "b" (p1)
565  : "cc", "memory" );
566 
567  #endif
568 
569  TTMATH_LOGC("UInt::SubInt", c)
570 
571  return c;
572  }
573 
574 
575  /*!
576  this static method subtractes one vector from the other
577  'ss1' is larger in size or equal to 'ss2'
578 
579  - ss1 points to the first (larger) vector
580  - ss2 points to the second vector
581  - ss1_size - size of the ss1 (and size of the result too)
582  - ss2_size - size of the ss2
583  - result - is the result vector (which has size the same as ss1: ss1_size)
584 
585  Example: ss1_size is 5, ss2_size is 3
586  ss1: ss2: result (output):
587  5 1 5-1
588  4 3 4-3
589  2 7 2-7
590  6 6-1 (the borrow from previous item)
591  9 9
592  return (carry): 0
593  of course the carry (borrow) is propagated and will be returned from the last item
594  (this method is used by the Karatsuba multiplication algorithm)
595  */
596  template<uint value_size>
597  uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
598  {
599  TTMATH_ASSERT( ss1_size >= ss2_size )
600 
601  uint c;
602 
603  #ifndef __GNUC__
604  c = ttmath_subvector_x64(ss1, ss2, ss1_size, ss2_size, result);
605  #endif
606 
607 
608  #ifdef __GNUC__
609 
610  // the asm code is nearly the same as in AddVector
611  // only two instructions 'adc' are changed to 'sbb'
612 
613  uint dummy1, dummy2, dummy3;
614  uint rest = ss1_size - ss2_size;
615 
616  __asm__ __volatile__(
617  "mov %%rdx, %%r8 \n"
618  "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
619  "1: \n"
620  "mov (%%rsi,%%rdx,8), %%rax \n"
621  "sbb (%%rbx,%%rdx,8), %%rax \n"
622  "mov %%rax, (%%rdi,%%rdx,8) \n"
623 
624  "inc %%rdx \n"
625  "dec %%rcx \n"
626  "jnz 1b \n"
627 
628  "adc %%rcx, %%rcx \n" // rcx has the cf state
629 
630  "or %%r8, %%r8 \n"
631  "jz 3f \n"
632 
633  "xor %%rbx, %%rbx \n" // ebx = 0
634  "neg %%rcx \n" // setting cf from rcx
635  "mov %%r8, %%rcx \n" // rcx=rest and is != 0
636  "2: \n"
637  "mov (%%rsi, %%rdx, 8), %%rax \n"
638  "sbb %%rbx, %%rax \n"
639  "mov %%rax, (%%rdi, %%rdx, 8) \n"
640 
641  "inc %%rdx \n"
642  "dec %%rcx \n"
643  "jnz 2b \n"
644 
645  "adc %%rcx, %%rcx \n"
646  "3: \n"
647 
648  : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
649  : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
650  : "%r8", "cc", "memory" );
651 
652  #endif
653 
654  TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
655 
656  return c;
657  }
658 
659 
660  /*!
661  this method moves all bits into the left hand side
662  return value <- this <- c
663 
664  the lowest *bit* will be held the 'c' and
665  the state of one additional bit (on the left hand side)
666  will be returned
667 
668  for example:
669  let this is 001010000
670  after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
671 
672  ***this method is created only on a 64bit platform***
673  */
674  template<uint value_size>
676  {
677  sint b = value_size;
678  uint * p1 = table;
679 
680 
681  #ifndef __GNUC__
682  c = ttmath_rcl_x64(p1,b,c);
683  #endif
684 
685 
686  #ifdef __GNUC__
687  uint dummy, dummy2;
688 
689  __asm__ __volatile__(
690 
691  "xorq %%rdx, %%rdx \n" // rdx=0
692  "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
693 
694  "1: \n"
695  "rclq $1, (%%rbx, %%rdx, 8) \n"
696 
697  "incq %%rdx \n"
698  "decq %%rcx \n"
699  "jnz 1b \n"
700 
701  "adcq %%rcx, %%rcx \n"
702 
703  : "=c" (c), "=a" (dummy), "=d" (dummy2)
704  : "0" (b), "1" (c), "b" (p1)
705  : "cc", "memory" );
706 
707  #endif
708 
709  TTMATH_LOGC("UInt::Rcl2_one", c)
710 
711  return c;
712  }
713 
714 
715  /*!
716  this method moves all bits into the right hand side
717  c -> this -> return value
718 
719  the highest *bit* will be held the 'c' and
720  the state of one additional bit (on the right hand side)
721  will be returned
722 
723  for example:
724  let this is 000000010
725  after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
726 
727  ***this method is created only on a 64bit platform***
728  */
729  template<uint value_size>
731  {
732  sint b = value_size;
733  uint * p1 = table;
734 
735 
736  #ifndef __GNUC__
737  c = ttmath_rcr_x64(p1,b,c);
738  #endif
739 
740 
741  #ifdef __GNUC__
742  uint dummy;
743 
744  __asm__ __volatile__(
745 
746  "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
747 
748  "1: \n"
749  "rcrq $1, -8(%%rbx, %%rcx, 8) \n"
750 
751  "decq %%rcx \n"
752  "jnz 1b \n"
753 
754  "adcq %%rcx, %%rcx \n"
755 
756  : "=c" (c), "=a" (dummy)
757  : "0" (b), "1" (c), "b" (p1)
758  : "cc", "memory" );
759 
760  #endif
761 
762  TTMATH_LOGC("UInt::Rcr2_one", c)
763 
764  return c;
765  }
766 
767 
768 
769  /*!
770  this method moves all bits into the left hand side
771  return value <- this <- c
772 
773  the lowest *bits* will be held the 'c' and
774  the state of one additional bit (on the left hand side)
775  will be returned
776 
777  for example:
778  let this is 001010000
779  after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
780 
781  ***this method is created only on a 64bit platform***
782  */
783  template<uint value_size>
785  {
786  TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
787 
788  uint b = value_size;
789  uint * p1 = table;
790 
791 
792  #ifndef __GNUC__
793  c = ttmath_rcl2_x64(p1,b,bits,c);
794  #endif
795 
796 
797  #ifdef __GNUC__
798  uint dummy, dummy2, dummy3;
799 
800  __asm__ __volatile__(
801 
802  "movq %%rcx, %%rsi \n"
803  "movq $64, %%rcx \n"
804  "subq %%rsi, %%rcx \n"
805  "movq $-1, %%rdx \n"
806  "shrq %%cl, %%rdx \n"
807  "movq %%rdx, %%r8 \n"
808  "movq %%rsi, %%rcx \n"
809 
810  "xorq %%rdx, %%rdx \n"
811  "movq %%rdx, %%rsi \n"
812  "orq %%rax, %%rax \n"
813  "cmovnz %%r8, %%rsi \n"
814 
815  "1: \n"
816  "rolq %%cl, (%%rbx,%%rdx,8) \n"
817 
818  "movq (%%rbx,%%rdx,8), %%rax \n"
819  "andq %%r8, %%rax \n"
820  "xorq %%rax, (%%rbx,%%rdx,8) \n"
821  "orq %%rsi, (%%rbx,%%rdx,8) \n"
822  "movq %%rax, %%rsi \n"
823 
824  "incq %%rdx \n"
825  "decq %%rdi \n"
826  "jnz 1b \n"
827 
828  "and $1, %%rax \n"
829 
830  : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
831  : "0" (c), "1" (b), "b" (p1), "c" (bits)
832  : "%r8", "cc", "memory" );
833 
834  #endif
835 
836  TTMATH_LOGC("UInt::Rcl2", c)
837 
838  return c;
839  }
840 
841 
842  /*!
843  this method moves all bits into the right hand side
844  C -> this -> return value
845 
846  the highest *bits* will be held the 'c' and
847  the state of one additional bit (on the right hand side)
848  will be returned
849 
850  for example:
851  let this is 000000010
852  after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
853 
854  ***this method is created only on a 64bit platform***
855  */
856  template<uint value_size>
858  {
859  TTMATH_ASSERT( bits>0 && bits<TTMATH_BITS_PER_UINT )
860 
861  sint b = value_size;
862  uint * p1 = table;
863 
864 
865  #ifndef __GNUC__
866  c = ttmath_rcr2_x64(p1,b,bits,c);
867  #endif
868 
869 
870  #ifdef __GNUC__
871  uint dummy, dummy2, dummy3;
872 
873  __asm__ __volatile__(
874 
875  "movq %%rcx, %%rsi \n"
876  "movq $64, %%rcx \n"
877  "subq %%rsi, %%rcx \n"
878  "movq $-1, %%rdx \n"
879  "shlq %%cl, %%rdx \n"
880  "movq %%rdx, %%R8 \n"
881  "movq %%rsi, %%rcx \n"
882 
883  "xorq %%rdx, %%rdx \n"
884  "movq %%rdx, %%rsi \n"
885  "addq %%rdi, %%rdx \n"
886  "decq %%rdx \n"
887  "orq %%rax, %%rax \n"
888  "cmovnz %%R8, %%rsi \n"
889 
890  "1: \n"
891  "rorq %%cl, (%%rbx,%%rdx,8) \n"
892 
893  "movq (%%rbx,%%rdx,8), %%rax \n"
894  "andq %%R8, %%rax \n"
895  "xorq %%rax, (%%rbx,%%rdx,8) \n"
896  "orq %%rsi, (%%rbx,%%rdx,8) \n"
897  "movq %%rax, %%rsi \n"
898 
899  "decq %%rdx \n"
900  "decq %%rdi \n"
901  "jnz 1b \n"
902 
903  "rolq $1, %%rax \n"
904  "andq $1, %%rax \n"
905 
906  : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
907  : "0" (c), "1" (b), "b" (p1), "c" (bits)
908  : "%r8", "cc", "memory" );
909 
910  #endif
911 
912  TTMATH_LOGC("UInt::Rcr2", c)
913 
914  return c;
915  }
916 
917 
918  /*
919  this method returns the number of the highest set bit in one 64-bit word
920  if the 'x' is zero this method returns '-1'
921 
922  ***this method is created only on a 64bit platform***
923  */
924  template<uint value_size>
926  {
927  sint result;
928 
929 
930  #ifndef __GNUC__
931 
932  unsigned long nIndex = 0;
933 
934  if( _BitScanReverse64(&nIndex,x) == 0 )
935  result = -1;
936  else
937  result = nIndex;
938 
939  #endif
940 
941 
942  #ifdef __GNUC__
943  uint dummy;
944 
945  __asm__ (
946 
947  "movq $-1, %1 \n"
948  "bsrq %2, %0 \n"
949  "cmovz %1, %0 \n"
950 
951  : "=r" (result), "=&r" (dummy)
952  : "r" (x)
953  : "cc" );
954 
955  #endif
956 
957 
958  return result;
959  }
960 
961 
962  /*
963  this method returns the number of the highest set bit in one 64-bit word
964  if the 'x' is zero this method returns '-1'
965 
966  ***this method is created only on a 64bit platform***
967  */
968  template<uint value_size>
970  {
971  sint result;
972 
973 
974  #ifndef __GNUC__
975 
976  unsigned long nIndex = 0;
977 
978  if( _BitScanForward64(&nIndex,x) == 0 )
979  result = -1;
980  else
981  result = nIndex;
982 
983  #endif
984 
985 
986  #ifdef __GNUC__
987  uint dummy;
988 
989  __asm__ (
990 
991  "movq $-1, %1 \n"
992  "bsfq %2, %0 \n"
993  "cmovz %1, %0 \n"
994 
995  : "=r" (result), "=&r" (dummy)
996  : "r" (x)
997  : "cc" );
998 
999  #endif
1000 
1001 
1002  return result;
1003  }
1004 
1005 
1006  /*!
1007  this method sets a special bit in the 'value'
1008  and returns the last state of the bit (zero or one)
1009 
1010  ***this method is created only on a 64bit platform***
1011 
1012  bit is from <0,63>
1013 
1014  e.g.
1015  uint x = 100;
1016  uint bit = SetBitInWord(x, 3);
1017  now: x = 108 and bit = 0
1018  */
1019  template<uint value_size>
1021  {
1022  TTMATH_ASSERT( bit < TTMATH_BITS_PER_UINT )
1023 
1024  uint old_bit;
1025  uint v = value;
1026 
1027 
1028  #ifndef __GNUC__
1029  old_bit = _bittestandset64((__int64*)&value,bit) != 0;
1030  #endif
1031 
1032 
1033  #ifdef __GNUC__
1034 
1035  __asm__ (
1036 
1037  "btsq %%rbx, %%rax \n"
1038  "setc %%bl \n"
1039  "movzx %%bl, %%rbx \n"
1040 
1041  : "=a" (v), "=b" (old_bit)
1042  : "0" (v), "1" (bit)
1043  : "cc" );
1044 
1045  #endif
1046 
1047  value = v;
1048 
1049  return old_bit;
1050  }
1051 
1052 
1053  /*!
1054  *
1055  * Multiplication
1056  *
1057  *
1058  */
1059 
1060 
1061  /*!
1062  multiplication: result_high:result_low = a * b
1063  - result_high - higher word of the result
1064  - result_low - lower word of the result
1065 
1066  this methos never returns a carry
1067  this method is used in the second version of the multiplication algorithms
1068 
1069  ***this method is created only on a 64bit platform***
1070  */
1071  template<uint value_size>
1072  void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
1073  {
1074  /*
1075  we must use these temporary variables in order to inform the compilator
1076  that value pointed with result1 and result2 has changed
1077 
1078  this has no effect in visual studio but it's usefull when
1079  using gcc and options like -O
1080  */
1081  uint result1_;
1082  uint result2_;
1083 
1084 
1085  #ifndef __GNUC__
1086  result1_ = _umul128(a,b,&result2_);
1087  #endif
1088 
1089 
1090  #ifdef __GNUC__
1091 
1092  __asm__ (
1093 
1094  "mulq %%rdx \n"
1095 
1096  : "=a" (result1_), "=d" (result2_)
1097  : "0" (a), "1" (b)
1098  : "cc" );
1099 
1100  #endif
1101 
1102 
1103  *result_low = result1_;
1104  *result_high = result2_;
1105  }
1106 
1107 
1108 
1109 
1110  /*!
1111  *
1112  * Division
1113  *
1114  *
1115  */
1116 
1117 
1118  /*!
1119  this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
1120  r = a:b / c and rest - remainder
1121 
1122  ***this method is created only on a 64bit platform***
1123 
1124  *
1125  * WARNING:
1126  * if r (one word) is too small for the result or c is equal zero
1127  * there'll be a hardware interruption (0)
1128  * and probably the end of your program
1129  *
1130  */
1131  template<uint value_size>
1133  {
1134  uint r_;
1135  uint rest_;
1136  /*
1137  these variables have similar meaning like those in
1138  the multiplication algorithm MulTwoWords
1139  */
1140 
1141  TTMATH_ASSERT( c != 0 )
1142 
1143 
1144  #ifndef __GNUC__
1145 
1146  ttmath_div_x64(&a,&b,c);
1147  r_ = a;
1148  rest_ = b;
1149 
1150  #endif
1151 
1152 
1153  #ifdef __GNUC__
1154 
1155  __asm__ (
1156 
1157  "divq %%rcx \n"
1158 
1159  : "=a" (r_), "=d" (rest_)
1160  : "d" (a), "a" (b), "c" (c)
1161  : "cc" );
1162 
1163  #endif
1164 
1165 
1166  *r = r_;
1167  *rest = rest_;
1168  }
1169 
1170 } //namespace
1171 
1172 
1173 #endif //ifdef TTMATH_PLATFORM64
1174 #endif //ifndef TTMATH_NOASM
1175 #endif
1176 
1177 
signed long sint
Definition: ttmathtypes.h:243
uint SubInt(uint value, uint index=0)
uint AddInt(uint value, uint index=0)
uint table[value_size]
Definition: ttmathuint.h:81
static const char * LibTypeStr()
uint Sub(const UInt< value_size > &ss2, uint c=0)
uint Add(const UInt< value_size > &ss2, uint c=0)
static LibTypeCode LibType()
static void MulTwoWords(uint a, uint b, uint *result_high, uint *result_low)
a namespace for the TTMath library
Definition: ttmath.h:62
uint AddTwoInts(uint x2, uint x1, uint index)
#define TTMATH_BITS_PER_UINT
Definition: ttmathtypes.h:253
UInt implements a big integer value without a sign.
Definition: ttmathuint.h:73
static uint SetBitInWord(uint &value, uint bit)
static uint AddVector(const uint *ss1, const uint *ss2, uint ss1_size, uint ss2_size, uint *result)
unsigned long uint
Definition: ttmathtypes.h:238
static void DivTwoWords(uint a, uint b, uint c, uint *r, uint *rest)
static uint SubVector(const uint *ss1, const uint *ss2, uint ss1_size, uint ss2_size, uint *result)