2 ; This file is a part of TTMath Bignum Library
3 ; and is distributed under the 3-Clause BSD Licence.
4 ; Author: Christian Kaiser <chk@online.de>, Tomasz Sowa <t.sowa@ttmath.org>
8 ; Copyright (c) 2009-2017, Christian Kaiser, Tomasz Sowa
11 ; Redistribution and use in source and binary forms, with or without
12 ; modification, are permitted provided that the following conditions are met:
14 ; * Redistributions of source code must retain the above copyright notice,
15 ; this list of conditions and the following disclaimer.
17 ; * Redistributions in binary form must reproduce the above copyright
18 ; notice, this list of conditions and the following disclaimer in the
19 ; documentation and/or other materials provided with the distribution.
21 ; * Neither the name Christian Kaiser nor the names of contributors to this
22 ; project may be used to endorse or promote products derived
23 ; from this software without specific prior written permission.
25 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26 ; AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 ; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 ; ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29 ; LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 ; CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 ; SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 ; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 ; CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 ; ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
35 ; THE POSSIBILITY OF SUCH DAMAGE.
39 ; compile with debug info: ml64.exe /c /Zd /Zi ttmathuint_x86_64_msvc.asm
40 ; compile without debug info: ml64.exe /c ttmathuint_x86_64_msvc.asm
41 ; this creates ttmathuint_x86_64_msvc.obj file which can be linked with your program
44 ; doxygen info is put to ttmathuint_x86_64.h file
48 PUBLIC ttmath_addindexed_x64
49 PUBLIC ttmath_addindexed2_x64
50 PUBLIC ttmath_addvector_x64
53 PUBLIC ttmath_subindexed_x64
54 PUBLIC ttmath_subvector_x64
59 PUBLIC ttmath_rcl2_x64
60 PUBLIC ttmath_rcr2_x64
65 ; Microsoft x86_64 convention: http://msdn.microsoft.com/en-us/library/9b372w95.aspx
67 ; "rax, rcx, rdx, r8-r11 are volatile."
68 ; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
77 ;----------------------------------------
87 sub rax, r9 ; sets CARRY if r9 != 0
91 mov rax,qword ptr [rdx + r11 * 8]
92 adc qword ptr [rcx + r11 * 8], rax
104 ;----------------------------------------
108 ;----------------------------------------
110 ttmath_addindexed_x64 PROC
117 xor rax, rax ; rax = result
118 sub rdx, r8 ; rdx = remaining count of uints
120 add qword ptr [rcx + r8 * 8], r9
133 add qword ptr [rcx + r8 * 8], r9
139 lea rax, [rax+1] ; rax = 1
143 ttmath_addindexed_x64 ENDP
145 ;----------------------------------------
149 ;----------------------------------------
151 ttmath_addindexed2_x64 PROC
154 ; rdx = b (value size)
157 ; [rsp+0x28] = nValue2
159 xor rax, rax ; return value
161 sub rdx, r8 ; rdx = remaining count of uints
162 mov r10, [rsp+028h] ; r10 = nValue2
164 add qword ptr [r11 + r8 * 8], r9
167 adc qword ptr [r11 + r8 * 8], r10
174 add qword ptr [r11 + r8 * 8], 1
179 dec rdx ; does not modify CY too...
184 ttmath_addindexed2_x64 ENDP
188 ;----------------------------------------
192 ;----------------------------------------
195 ttmath_addvector_x64 PROC
200 ; [rsp+0x28] = result
204 xor r11, r11 ; r11=0, cf=0
208 mov rax, qword ptr [rcx + r11 * 8]
209 adc rax, qword ptr [rdx + r11 * 8]
210 mov qword ptr [r10 + r11 * 8], rax
215 adc r9, r9 ; r9 has the cf state
220 neg r9 ; setting cf from r9
221 mov r9, 0 ; don't use xor here (cf is used)
223 mov rax, qword ptr [rcx + r11 * 8]
225 mov qword ptr [r10 + r11 * 8], rax
239 ttmath_addvector_x64 ENDP
242 ;----------------------------------------
246 ;----------------------------------------
257 sub rax, r9 ; sets CARRY if r9 != 0
261 mov rax,qword ptr [rdx + r11 * 8]
262 sbb qword ptr [rcx + r11 * 8], rax
274 ;----------------------------------------
278 ;----------------------------------------
280 ttmath_subindexed_x64 PROC
286 sub rdx, r8 ; rdx = remaining count of uints
290 sub qword ptr [rcx + r8 * 8], r9
305 ttmath_subindexed_x64 ENDP
309 ;----------------------------------------
313 ;----------------------------------------
315 ; the same asm code as in addvector_x64 only two instructions 'adc' changed to 'sbb'
317 ttmath_subvector_x64 PROC
322 ; [rsp+0x28] = result
326 xor r11, r11 ; r11=0, cf=0
330 mov rax, qword ptr [rcx + r11 * 8]
331 sbb rax, qword ptr [rdx + r11 * 8]
332 mov qword ptr [r10 + r11 * 8], rax
337 adc r9, r9 ; r9 has the cf state
342 neg r9 ; setting cf from r9
343 mov r9, 0 ; don't use xor here (cf is used)
345 mov rax, qword ptr [rcx + r11 * 8]
347 mov qword ptr [r10 + r11 * 8], rax
361 ttmath_subvector_x64 ENDP
366 ;----------------------------------------
370 ;----------------------------------------
379 neg r8 ; CY set if r8 <> 0
383 rcl qword ptr [r11 + r10 * 8], 1
395 ;----------------------------------------
399 ;----------------------------------------
407 neg r8 ; CY set if r8 <> 0
411 rcr qword ptr -8[rcx + rdx * 8], 1
422 ;----------------------------------------
426 ;----------------------------------------
437 mov rdx, qword ptr [r11]
438 mov rax, qword ptr [r10]
440 mov qword ptr [r10], rdx ; remainder
441 mov qword ptr [r11], rax ; value
447 ;----------------------------------------
451 ;----------------------------------------
461 mov r10, rcx ; r10 = p1
468 shr r11, cl ; r11 = mask
470 mov rcx, r8 ; rcx = count of bits
472 mov rbx, rax ; rbx = old value = 0
474 cmovnz rbx, r11 ; if (c) then old value = mask
476 mov r9, rax ; r9 = index (0..nSize-1)
480 rol qword ptr [r10+r9*8], cl
481 mov rax, qword ptr [r10+r9*8]
483 xor qword ptr [r10+r9*8], rax
484 or qword ptr [r10+r9*8], rbx
498 ;----------------------------------------
502 ;----------------------------------------
511 mov r10, rcx ; r10 = p1
518 shl r11, cl ; r11 = mask
520 mov rcx, r8 ; rcx = count of bits
522 mov rbx, rax ; rbx = old value = 0
524 cmovnz rbx, r11 ; if (c) then old value = mask
526 mov r9, rdx ; r9 = index (0..nSize-1)
531 ror qword ptr [r10+r9*8], cl
532 mov rax, qword ptr [r10+r9*8]
534 xor qword ptr [r10+r9*8], rax
535 or qword ptr [r10+r9*8], rbx