diff --git a/libpolyml/x86assembly_gas32.S b/libpolyml/x86assembly_gas32.S index 4707d083..38beb93e 100644 --- a/libpolyml/x86assembly_gas32.S +++ b/libpolyml/x86assembly_gas32.S @@ -1,207 +1,205 @@ /* Title: Assembly code routines for the poly system. Author: David Matthews Copyright (c) David C. J. Matthews 2000-2019 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This is the 32-bit Unix version of the assembly code file. There are separate versions of 32/64 and Windows (Intel syntax) and Unix (gas syntax). */ /* Registers used :- %%eax: First argument to function. Result of function call. %%ebx: Second argument to function. %%ecx: General register %%edx: Closure pointer in call. %%ebp: Points to memory used for extra registers %%esi: General register. %%edi: General register. %%esp: Stack pointer. */ #include "config.h" #ifdef SYMBOLS_REQUIRE_UNDERSCORE #define EXTNAME(x) _##x #else #define EXTNAME(x) x #endif # # Macro to begin the hand-coded functions # #ifdef MACOSX #define GLOBAL .globl #else #define GLOBAL .global #endif #define INLINE_ROUTINE(id) \ GLOBAL EXTNAME(id); \ EXTNAME(id): #define Fr_Size 16 /* This is the argument vector passed in to X86AsmSwitchToPoly It is used to initialise the frame. A few values are updated when ML returns. */ #define Arg_LocalMpointer 0x0 #define Arg_HandlerRegister 0x4 #define Arg_LocalMbottom 0x8 #define Arg_StackLimit 0xc #define Arg_ExceptionPacket 0x10 /* Address of packet to raise */ #define Arg_RequestCode 0x14 /* Byte: Io function to call. */ #define Arg_ReturnReason 0x16 /* Byte: Reason for returning from ML. */ #define Arg_FullRestore 0x17 /* Byte: Full/partial restore */ #define Arg_SaveCStack 0x18 /* Save C Stack pointer */ #define Arg_ThreadId 0x1c /* My thread id */ #define Arg_StackPtr 0x20 /* Stack Pointer */ #define Arg_SaveRAX 0x34 #define Arg_SaveRBX 0x38 #define Arg_SaveRCX 0x3c #define Arg_SaveRDX 0x40 #define Arg_SaveRSI 0x44 #define Arg_SaveRDI 0x48 #define Arg_SaveFP 0x4c #define RETURN_HEAP_OVERFLOW 1 #define RETURN_STACK_OVERFLOW 2 #define RETURN_STACK_OVERFLOWEX 3 #define RETURN_CALLBACK_RETURN 6 #define RETURN_CALLBACK_EXCEPTION 7 #define RETURN_KILL_SELF 9 # Mark the stack as non-executable when supported #ifdef HAVE_GNU_STACK .section .note.GNU-stack, "", @progbits #endif # # CODE STARTS HERE # .text #define CALL_EXTRA(index) \ - pushl %ecx; \ movb $index,Arg_ReturnReason(%ebp); \ - popl %ecx; \ jmp SaveFullState; /* Load the registers from the ML stack and jump to the code. This is used to start ML code. The argument is the address of the MemRegisters struct and goes into %rbp. This is the general code for switching control to ML. There are a number of cases to consider: 1. Initial entry to root function or a new thread. Needs to load EDX at least. 2. Normal return from an RTS call. Could just do a simple return. 3. Exception raised in RTS call. 4. Callback from C to an ML function. In effect this is a coroutine. Similar to 1. 5. Return from "trap" i.e. Heap/Stack overflow. Stack-overflow can result in an exception either because the stack can't be grown or because Interrupt has been raised. */ INLINE_ROUTINE(X86AsmSwitchToPoly) pushl %ebp # Standard entry sequence movl 8(%esp),%ebp # Address of argument vector pushl %ebx pushl %edi pushl %esi # Push callee-save registers subl $(Fr_Size-12),%esp # Allocate frame movl %esp,Arg_SaveCStack(%ebp) movl Arg_StackPtr(%ebp),%esp movl Arg_ExceptionPacket(%ebp),%eax cmpl $1,%eax # Did we raise an exception? jnz raisexlocal FRSTOR Arg_SaveFP(%ebp) movl Arg_SaveRAX(%ebp),%eax # Load the registers movl Arg_SaveRBX(%ebp),%ebx # Load the registers movl Arg_SaveRCX(%ebp),%ecx movl Arg_SaveRDX(%ebp),%edx movl Arg_SaveRSI(%ebp),%esi movl Arg_SaveRDI(%ebp),%edi cld # Clear this just in case ret /* Code to save the state and switch to C This saves the full register state. */ SaveFullState: movl %eax,Arg_SaveRAX(%ebp) movl %ebx,Arg_SaveRBX(%ebp) movl %ecx,Arg_SaveRCX(%ebp) movl %edx,Arg_SaveRDX(%ebp) movl %esi,Arg_SaveRSI(%ebp) movl %edi,Arg_SaveRDI(%ebp) fnsave Arg_SaveFP(%ebp) # Save FP state. Also resets the state so... fldcw Arg_SaveFP(%ebp) # ...load because we need the same rounding mode in the RTS movl %esp,Arg_StackPtr(%ebp) # Save ML stack pointer movl Arg_SaveCStack(%ebp),%esp # Restore C stack pointer addl $(Fr_Size-12),%esp popl %esi popl %edi popl %ebx popl %ebp ret INLINE_ROUTINE(X86AsmCallExtraRETURN_HEAP_OVERFLOW) CALL_EXTRA(RETURN_HEAP_OVERFLOW) INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOW) CALL_EXTRA(RETURN_STACK_OVERFLOW) INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOWEX) CALL_EXTRA(RETURN_STACK_OVERFLOWEX) /* Used when entering new code. The argument and closure are on the stack in case there is a GC before we enter the code. */ INLINE_ROUTINE(X86AsmPopArgAndClosure) popl %edx popl %eax jmp *(%edx) INLINE_ROUTINE(X86AsmRaiseException) raisexlocal: movl Arg_HandlerRegister(%ebp),%ecx # Get next handler into %rcx jmp *(%ecx) # Additional assembly code routines # RTS call to kill the current thread. INLINE_ROUTINE(X86AsmKillSelf) CALL_EXTRA(RETURN_KILL_SELF) INLINE_ROUTINE(X86AsmCallbackReturn) CALL_EXTRA(RETURN_CALLBACK_RETURN) INLINE_ROUTINE(X86AsmCallbackException) CALL_EXTRA(RETURN_CALLBACK_EXCEPTION) # This implements atomic addition in the same way as atomic_increment INLINE_ROUTINE(X86AsmAtomicIncrement) #ifndef HOSTARCHITECTURE_X86_64 movl 4(%esp),%eax #else movl %edi,%eax # On X86_64 the argument is passed in %edi #endif # Use %ecx and %eax because they are volatile (unlike %ebx on X86/64/Unix) movl $2,%ecx lock; xaddl %ecx,(%eax) addl $2,%ecx movl %ecx,%eax ret diff --git a/libpolyml/x86assembly_gas64.S b/libpolyml/x86assembly_gas64.S index d90c93b3..28976da0 100644 --- a/libpolyml/x86assembly_gas64.S +++ b/libpolyml/x86assembly_gas64.S @@ -1,288 +1,286 @@ /* Title: Assembly code routines for the poly system. Author: David Matthews Copyright (c) David C. J. Matthews 2000-2019 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This is the 64-bit gas version of the assembly code file. There are separate versions of 32/64 and MAMS (Intel syntax) and and GCC (gas syntax). */ /* Registers used :- %rax: First argument to function. Result of function call. %rbx: Second argument to function. %rcx: General register %rdx: Closure pointer in call. %rbp: Points to memory used for extra registers %rsi: General register. %rdi: General register. %rsp: Stack pointer. %r8: Third argument to function %r9: Fourth argument to function %r10: Fifth argument to function %r11: General register %r12: General register %r13: General register %r14: General register %r15: Memory allocation pointer */ #include "config.h" #ifdef SYMBOLS_REQUIRE_UNDERSCORE #define EXTNAME(x) _##x #else #define EXTNAME(x) x #endif #ifdef __CYGWIN__ #define _WIN32 1 #endif /* Macro to begin the hand-coded functions */ #ifdef MACOSX #define GLOBAL .globl #else #define GLOBAL .global #endif #define INLINE_ROUTINE(id) \ GLOBAL EXTNAME(id); \ EXTNAME(id): /* Extra entries on the C stack */ #define Fr_Size 64 /* Must be multiple of 16 to get alignment correct */ /* This is the argument vector passed in to X86AsmSwitchToPoly It is used to initialise the frame. A few values are updated when ML returns. */ #define Arg_LocalMpointer 0x0 #define Arg_HandlerRegister 0x8 #define Arg_LocalMbottom 0x10 #define Arg_StackLimit 0x18 #define Arg_ExceptionPacket 0x20 /* Address of packet to raise */ #define Arg_RequestCode 0x28 /* Byte: Io function to call. */ #define Arg_ReturnReason 0x2a /* Byte: Reason for returning from ML. */ #define Arg_UnusedRestore 0x2b /* Byte: Full/partial restore */ #define Arg_SaveCStack 0x30 /* Current stack base */ #define Arg_ThreadId 0x38 /* My thread id */ #define Arg_StackPtr 0x40 /* Stack Pointer */ #define Arg_SaveRAX 0x68 #define Arg_SaveRBX 0x70 #define Arg_SaveRCX 0x78 #define Arg_SaveRDX 0x80 #define Arg_SaveRSI 0x88 #define Arg_SaveRDI 0x90 #define Arg_SaveR8 0x98 #define Arg_SaveR9 0xa0 #define Arg_SaveR10 0xa8 #define Arg_SaveR11 0xb0 #define Arg_SaveR12 0xb8 #define Arg_SaveR13 0xc0 #define Arg_SaveR14 0xc8 #define Arg_SaveXMM0 0xd0 #define Arg_SaveXMM1 0xd8 #define Arg_SaveXMM2 0xe0 #define Arg_SaveXMM3 0xe8 #define Arg_SaveXMM4 0xf0 #define Arg_SaveXMM5 0xf8 #define Arg_SaveXMM6 0x100 /* IO function numbers. These are functions that are called to handle special cases in this code */ #include "sys.h" #define RETURN_HEAP_OVERFLOW 1 #define RETURN_STACK_OVERFLOW 2 #define RETURN_STACK_OVERFLOWEX 3 #define RETURN_CALLBACK_RETURN 6 #define RETURN_CALLBACK_EXCEPTION 7 #define RETURN_RAISE_OVERFLOW 8 #define RETURN_KILL_SELF 9 # Mark the stack as non-executable when supported #ifdef HAVE_GNU_STACK .section .note.GNU-stack, "", @progbits #endif # # CODE STARTS HERE # .text #define CALL_EXTRA(index) \ - pushq %rcx; \ movb $index,Arg_ReturnReason(%rbp); \ - popq %rcx; \ jmp SaveFullState; /* Load the registers from the ML stack and jump to the code. */ INLINE_ROUTINE(X86AsmSwitchToPoly) pushq %rbp # Standard entry sequence /* If we're compiling with Mingw we're using Windows calling conventions. */ #ifdef _WIN32 movq %rcx,%rbp # Argument is in %rcx #else movq %rdi,%rbp # Argument is in %rdi #endif pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 #ifdef _WIN32 pushq %rdi # Callee save in Windows pushq %rsi subq $(Fr_Size-56),%rsp # Argument is already in %rcx #else subq $(Fr_Size-40),%rsp #endif movq %rsp,Arg_SaveCStack(%rbp) movq Arg_LocalMpointer(%rbp),%r15 movq Arg_StackPtr(%rbp),%rsp # Set the new stack ptr movsd Arg_SaveXMM0(%rbp),%xmm0 # Load the registers movsd Arg_SaveXMM1(%rbp),%xmm1 movsd Arg_SaveXMM2(%rbp),%xmm2 movsd Arg_SaveXMM3(%rbp),%xmm3 movsd Arg_SaveXMM4(%rbp),%xmm4 movsd Arg_SaveXMM5(%rbp),%xmm5 movsd Arg_SaveXMM6(%rbp),%xmm6 movq Arg_SaveRBX(%rbp),%rbx movq Arg_SaveRCX(%rbp),%rcx movq Arg_SaveRDX(%rbp),%rdx movq Arg_SaveRSI(%rbp),%rsi movq Arg_SaveRDI(%rbp),%rdi movq Arg_SaveR8(%rbp),%r8 movq Arg_SaveR9(%rbp),%r9 movq Arg_SaveR10(%rbp),%r10 movq Arg_SaveR11(%rbp),%r11 movq Arg_SaveR12(%rbp),%r12 movq Arg_SaveR13(%rbp),%r13 movq Arg_SaveR14(%rbp),%r14 movq Arg_ExceptionPacket(%rbp),%rax cmpq $1,%rax # Did we raise an exception? jnz raisexLocal movq Arg_SaveRAX(%rbp),%rax cld # Clear this just in case ret /* This is exactly the same as raisex but seems to be needed to work round a PIC problem. */ raisexLocal: movq Arg_HandlerRegister(%rbp),%rcx # Get next handler into %rcx jmp *(%rcx) /* Code to save the state and switch to C This saves the full register state. */ SaveFullState: movq %rax,Arg_SaveRAX(%rbp) movq %rbx,Arg_SaveRBX(%rbp) movq %rcx,Arg_SaveRCX(%rbp) movq %rdx,Arg_SaveRDX(%rbp) movq %rsi,Arg_SaveRSI(%rbp) movq %rdi,Arg_SaveRDI(%rbp) movsd %xmm0,Arg_SaveXMM0(%rbp) movsd %xmm1,Arg_SaveXMM1(%rbp) movsd %xmm2,Arg_SaveXMM2(%rbp) movsd %xmm3,Arg_SaveXMM3(%rbp) movsd %xmm4,Arg_SaveXMM4(%rbp) movsd %xmm5,Arg_SaveXMM5(%rbp) movsd %xmm6,Arg_SaveXMM6(%rbp) movq %r8,Arg_SaveR8(%rbp) movq %r9,Arg_SaveR9(%rbp) movq %r10,Arg_SaveR10(%rbp) movq %r11,Arg_SaveR11(%rbp) movq %r12,Arg_SaveR12(%rbp) movq %r13,Arg_SaveR13(%rbp) movq %r14,Arg_SaveR14(%rbp) movq %rsp,Arg_StackPtr(%rbp) movq %r15,Arg_LocalMpointer(%rbp) # Save back heap pointer movq Arg_SaveCStack(%rbp),%rsp # Restore C stack pointer #ifdef _WIN32 addq $(Fr_Size-56),%rsp popq %rsi popq %rdi #else addq $(Fr_Size-40),%rsp #endif popq %r15 # Restore callee-save registers popq %r14 popq %r13 popq %r12 popq %rbx popq %rbp ret /* Used when entering new code. The argument and closure are on the stack in case there is a GC before we enter the code. */ INLINE_ROUTINE(X86AsmPopArgAndClosure) popq %rdx popq %rax #ifdef POLYML32IN64 jmp *(%rbx,%rdx,4) #else jmp *(%rdx) #endif # This is used if the RTS sets up an exception. It's probably no longer relevant. INLINE_ROUTINE(X86AsmRaiseException) movq Arg_HandlerRegister(%rbp),%rcx # Get next handler into %rcx jmp *(%rcx) # Additional assembly code routines # RTS call to kill the current thread. INLINE_ROUTINE(X86AsmKillSelf) CALL_EXTRA(RETURN_KILL_SELF) INLINE_ROUTINE(X86AsmCallbackReturn) CALL_EXTRA(RETURN_CALLBACK_RETURN) INLINE_ROUTINE(X86AsmCallbackException) CALL_EXTRA(RETURN_CALLBACK_EXCEPTION) INLINE_ROUTINE(X86AsmCallExtraRETURN_HEAP_OVERFLOW) CALL_EXTRA(RETURN_HEAP_OVERFLOW) INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOW) CALL_EXTRA(RETURN_STACK_OVERFLOW) INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOWEX) CALL_EXTRA(RETURN_STACK_OVERFLOWEX) # This implements atomic addition in the same way as atomic_increment INLINE_ROUTINE(X86AsmAtomicIncrement) #ifdef _WIN32 movq %rcx,%rax # On Windows the argument is passed in %rcx #else movq %rdi,%rax # On X86_64 the argument is passed in %rdi #endif # Use %rcx and %rax because they are volatile (unlike %rbx on X86/64/Unix) movq $2,%rcx #ifdef POLYML32IN64 lock xaddl %ecx,(%rax) # Rax is an absolute address but this is only a word #else lock xaddq %rcx,(%rax) #endif addq $2,%rcx movq %rcx,%rax ret diff --git a/libpolyml/x86assembly_masm32.S b/libpolyml/x86assembly_masm32.S index 2f312db7..cace143e 100644 --- a/libpolyml/x86assembly_masm32.S +++ b/libpolyml/x86assembly_masm32.S @@ -1,194 +1,192 @@ ; ; Title: Assembly code routines for the poly system. ; Author: David Matthews ; Copyright (c) David C. J. Matthews 2000-2019 ; ; This library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Lesser General Public ; License version 2.1 as published by the Free Software Foundation. ; ; This library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Lesser General Public License for more details. ; ; You should have received a copy of the GNU Lesser General Public ; License along with this library; if not, write to the Free Software ; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ; ; ; Registers used :- ; ; eax: First argument to function. Result of function call. ; ebx: Second argument to function. ; ecx: General register ; edx: Closure pointer in call. ; ebp: Points to memory used for extra registers ; esi: General register. ; edi: General register. ; esp: Stack pointer. .486 .model flat,c Fr_Size EQU 16 ; Make this a multiple of 16 ; This is the argument vector passed in to X86AsmSwitchToPoly ; It is used to initialise the frame. A few values are updated ; when ML returns. ArgVector STRUCT LocalMPointer DWORD ? HandlerRegister DWORD ? LocalMbottom DWORD ? StackLimit DWORD ? ExceptionPacket DWORD ? ; Address of packet to raise UnusedRequestCode DB ? ; Byte: Io function to call. UnusedFlag DB ? ReturnReason DB ? ; Byte: Reason for returning from ML. UnusedRestore DB ? ; SaveCStack DWORD ? ; Saved C stack pointer ThreadId DWORD ? ; My thread id StackPtr DWORD ? ; Stack pointer UnusedProgramCtr DWORD ? HeapOverFlowCall DWORD ? StackOverFlowCall DWORD ? StackOverFlowCallEx DWORD ? SaveRAX DWORD ? SaveRBX DWORD ? SaveRCX DWORD ? SaveRDX DWORD ? SaveRSI DWORD ? SaveRDI DWORD ? SaveFP WORD ? ; Actually larger ArgVector ENDS ; Codes to indicate the reason for return. RETURN_HEAP_OVERFLOW EQU 1 RETURN_STACK_OVERFLOW EQU 2 RETURN_STACK_OVERFLOWEX EQU 3 RETURN_CALLBACK_RETURN EQU 6 RETURN_CALLBACK_EXCEPTION EQU 7 RETURN_KILL_SELF EQU 9 ; ; CODE STARTS HERE ; .CODE ; Load the registers from the ML stack and jump to the code. ; This is used to start ML code. ; The argument is the address of the MemRegisters struct and goes into ebp. ; This is the general code for switching control to ML. There are a number of cases to consider: ; 1. Initial entry to root function or a new thread. Needs to load EDX at least. ; 2. Normal return from an RTS call. Could just do a simple return. ; 3. Exception raised in RTS call. ; 4. Callback from C to an ML function. In effect this is a coroutine. Similar to 1. ; 5. Return from "trap" i.e. Heap/Stack overflow. Stack-overflow can result in an exception ; either because the stack can't be grown or because Interrupt has been raised. PUBLIC X86AsmSwitchToPoly X86AsmSwitchToPoly: push ebp ; Standard entry sequence mov ebp,[8+esp] ; Address of argument vector push ebx ; Push callee-save registers push edi push esi sub esp,(Fr_size-12) ; Allocate frame mov [ArgVector.SaveCStack+ebp],esp mov esp,[ArgVector.StackPtr+ebp] mov eax,[ArgVector.ExceptionPacket+ebp] cmp eax,1 ; Did we raise an exception? jnz raisex frstor [ArgVector.SaveFP+ebp] mov eax,[ArgVector.SaveRAX+ebp] mov ebx,[ArgVector.SaveRBX+ebp] mov ecx,[ArgVector.SaveRCX+ebp] mov edx,[ArgVector.SaveRDX+ebp] mov esi,[ArgVector.SaveRSI+ebp] mov edi,[ArgVector.SaveRDI+ebp] cld ; Clear this just in case ret ; Code to save the state and switch to C ; This saves the full register state. SaveFullState: mov [ArgVector.SaveRAX+ebp],eax mov [ArgVector.SaveRBX+ebp],ebx mov [ArgVector.SaveRCX+ebp],ecx mov [ArgVector.SaveRDX+ebp],edx mov [ArgVector.SaveRSI+ebp],esi mov [ArgVector.SaveRDI+ebp],edi FNSAVE [ArgVector.SaveFP+ebp] ; Save FP state. Also resets the state so... FLDCW [ArgVector.SaveFP+ebp] ; ...load because we need the same rounding mode in the RTS mov [ArgVector.StackPtr+ebp],esp ; Save ML stack pointer mov esp,[ArgVector.SaveCStack+ebp] ; Restore C stack pointer add esp,(Fr_size-12) pop esi ; Restore saved registers pop edi pop ebx pop ebp ret ; Used when entering new code. The argument and closure are on the stack ; in case there is a GC before we enter the code. PUBLIC X86AsmPopArgAndClosure X86AsmPopArgAndClosure: pop edx pop eax jmp dword ptr [edx] ; This is used if the RTS sets up an exception. It's probably no longer relevant. PUBLIC X86AsmRaiseException X86AsmRaiseException: raisex: mov ecx,[ArgVector.HandlerRegister+ebp] jmp dword ptr [ecx] ; Define standard call macro. ; Defined as an Masm macro because there are multiple instructions. CALL_EXTRA MACRO index - push ecx mov byte ptr [ArgVector.ReturnReason+ebp],index - pop ecx jmp SaveFullState ENDM ; Terminate the current thread PUBLIC X86AsmKillSelf X86AsmKillSelf: CALL_EXTRA RETURN_KILL_SELF PUBLIC X86AsmCallbackReturn X86AsmCallbackReturn: CALL_EXTRA RETURN_CALLBACK_RETURN PUBLIC X86AsmCallbackException X86AsmCallbackException: CALL_EXTRA RETURN_CALLBACK_EXCEPTION ; This implements atomic addition in the same way as atomic_increment ; N.B. It is called from the RTS so uses C linkage conventions. PUBLIC X86AsmAtomicIncrement X86AsmAtomicIncrement: mov eax,4[esp] ; Use ecx and eax because they are volatile (unlike ebx on X86/64/Unix) mov ecx,2 lock xadd [eax],ecx add ecx,2 mov eax,ecx ret CREATE_EXTRA_CALL MACRO index PUBLIC X86AsmCallExtra&index& X86AsmCallExtra&index&: CALL_EXTRA index ENDM CREATE_EXTRA_CALL RETURN_HEAP_OVERFLOW CREATE_EXTRA_CALL RETURN_STACK_OVERFLOW CREATE_EXTRA_CALL RETURN_STACK_OVERFLOWEX END diff --git a/libpolyml/x86assembly_masm64.S b/libpolyml/x86assembly_masm64.S index c10d3ee1..7874b0f0 100644 --- a/libpolyml/x86assembly_masm64.S +++ b/libpolyml/x86assembly_masm64.S @@ -1,263 +1,261 @@ ; ; Title: Assembly code routines for the poly system. ; Author: David Matthews ; Copyright (c) David C. J. Matthews 2000-2019 ; ; This library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Lesser General Public ; License version 2.1 as published by the Free Software Foundation. ; ; This library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Lesser General Public License for more details. ; ; You should have received a copy of the GNU Lesser General Public ; License along with this library; if not, write to the Free Software ; Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ; ; ; Registers used :- ; ; rax: First argument to function. Result of function call. ; rbx: Second argument to function. ; rcx: General register ; rdx: Closure pointer in call. ; rbp: Points to memory used for extra registers ; rsi: General register. ; rdi: General register. ; rsp: Stack pointer. ; r8: Third argument to function ; r9: Fourth argument to function ; r10: Fifth argument to function ; r11: General register ; r12: General register ; r13: General register ; r14: General register ; r15: Memory allocation pointer ; Extra entries on the C stack Fr_Size EQU 64 ; Must be multiple of 16 to get alignment correct ; This is the argument vector passed in to X86AsmSwitchToPoly ; It is used to initialise the frame. A few values are updated ; when ML returns. ArgVector STRUCT LocalMPointer QWORD ? HandlerRegister QWORD ? LocalMbottom QWORD ? StackLimit QWORD ? ExceptionPacket QWORD ? ; Address of packet to raise UnusedRequestCode DB ? ; Byte: Io function to call. UnusedFlag DB ? ReturnReason DB ? ; Byte: Reason for returning from ML. UnusedRestore DB ? ; Byte: UnusedAlign DWORD ? SaveCStack QWORD ? ; Saved C stack frame ThreadId QWORD ? ; My thread id StackPtr QWORD ? ; Stack pointer UnusedProgramCtr QWORD ? HeapOverFlowCall QWORD ? StackOverFlowCall QWORD ? StackOverFlowCallEx QWORD ? SaveRAX QWORD ? SaveRBX QWORD ? SaveRCX QWORD ? SaveRDX QWORD ? SaveRSI QWORD ? SaveRDI QWORD ? SaveR8 QWORD ? SaveR9 QWORD ? SaveR10 QWORD ? SaveR11 QWORD ? SaveR12 QWORD ? SaveR13 QWORD ? SaveR14 QWORD ? SaveXMM0 QWORD ? SaveXMM1 QWORD ? SaveXMM2 QWORD ? SaveXMM3 QWORD ? SaveXMM4 QWORD ? SaveXMM5 QWORD ? SaveXMM6 QWORD ? ArgVector ENDS RETURN_HEAP_OVERFLOW EQU 1 RETURN_STACK_OVERFLOW EQU 2 RETURN_STACK_OVERFLOWEX EQU 3 RETURN_CALLBACK_RETURN EQU 6 RETURN_CALLBACK_EXCEPTION EQU 7 RETURN_KILL_SELF EQU 9 ; ; CODE STARTS HERE ; .CODE ; Define standard call macro. CALL_EXTRA MACRO index - push rcx mov byte ptr [ArgVector.ReturnReason+rbp],index - pop rcx jmp SaveFullState ENDM ; Load the registers from the ML stack and jump to the code. ; This is used to start ML code. ; The argument is the address of the MemRegisters struct and goes into rbp. ; This is the general code for switching control to ML. There are a number of cases to consider: ; 1. Initial entry to root function or a new thread. Needs to load EDX at least. ; 2. Normal return from an RTS call. Could just do a simple return. ; 3. Exception raised in RTS call. ; 4. Callback from C to an ML function. In effect this is a coroutine. Similar to 1. ; 5. Return from "trap" i.e. Heap/Stack overflow. Stack-overflow can result in an exception ; either because the stack can't be grown or because Interrupt has been raised. ; Switch from C code to ML. This code uses the X86/64 Windows calling conventions. It ; saves the callee-save registers. ; This does not set up a correct frame because we don't want to reserve a register for ; that. RBP needs to be the original argument because we need to be able to modify ; the stack limit "register" from another thread in order to be able to interrupt ; this one. X86AsmSwitchToPoly PROC FRAME push rbp ; Standard entry sequence push rbx ; Save callee-save registers push r12 push r13 push r14 push r15 push rdi ; Callee save in Windows push rsi ; Strictly, we should also save xmm6 .endprolog mov rbp,rcx ; Move argument into rbp - this is definitely non-standard sub rsp,(Fr_size-56) mov [ArgVector.SaveCStack+rcx],rsp ; Save the C stack pointer mov r15,[ArgVector.LocalMpointer+rbp] mov rsp,[ArgVector.StackPtr+rbp] movsd xmm0,[ArgVector.SaveXMM0+rbp] movsd xmm1,[ArgVector.SaveXMM1+rbp] movsd xmm2,[ArgVector.SaveXMM2+rbp] movsd xmm3,[ArgVector.SaveXMM3+rbp] movsd xmm4,[ArgVector.SaveXMM4+rbp] movsd xmm5,[ArgVector.SaveXMM5+rbp] movsd xmm6,[ArgVector.SaveXMM6+rbp] mov rbx,[ArgVector.SaveRBX+rbp] mov rcx,[ArgVector.SaveRCX+rbp] mov rdx,[ArgVector.SaveRDX+rbp] mov rsi,[ArgVector.SaveRSI+rbp] mov rdi,[ArgVector.SaveRDI+rbp] mov r8,[ArgVector.SaveR8+rbp] mov r9,[ArgVector.SaveR9+rbp] mov r10,[ArgVector.SaveR10+rbp] mov r11,[ArgVector.SaveR11+rbp] mov r12,[ArgVector.SaveR12+rbp] mov r13,[ArgVector.SaveR13+rbp] mov r14,[ArgVector.SaveR14+rbp] mov rax,[ArgVector.ExceptionPacket+rbp] cmp rax,1 ; Did we raise an exception? jnz raisex mov rax,[ArgVector.SaveRAX+rbp] cld ; Clear this just in case ret ; Everything up to here is considered as part of the X86AsmSwitchToPoly proc X86AsmSwitchToPoly ENDP ; Code to save the state and switch to C ; This saves the full register state. SaveFullState: mov [ArgVector.SaveRAX+rbp],rax mov [ArgVector.SaveRBX+rbp],rbx mov [ArgVector.SaveRCX+rbp],rcx mov [ArgVector.SaveRDX+rbp],rdx mov [ArgVector.SaveRSI+rbp],rsi mov [ArgVector.SaveRDI+rbp],rdi movsd [ArgVector.SaveXMM0+rbp],xmm0 movsd [ArgVector.SaveXMM1+rbp],xmm1 movsd [ArgVector.SaveXMM2+rbp],xmm2 movsd [ArgVector.SaveXMM3+rbp],xmm3 movsd [ArgVector.SaveXMM4+rbp],xmm4 movsd [ArgVector.SaveXMM5+rbp],xmm5 movsd [ArgVector.SaveXMM6+rbp],xmm6 mov [ArgVector.SaveR8+rbp],r8 mov [ArgVector.SaveR9+rbp],r9 mov [ArgVector.SaveR10+rbp],r10 mov [ArgVector.SaveR11+rbp],r11 mov [ArgVector.SaveR12+rbp],r12 mov [ArgVector.SaveR13+rbp],r13 mov [ArgVector.SaveR14+rbp],r14 mov [ArgVector.StackPtr+rbp],rsp mov [ArgVector.LocalMpointer+rbp],r15 ; Save back heap pointer mov rsp,[ArgVector.SaveCStack+rbp] ; Restore C stack pointer add rsp,(Fr_size-56) pop rsi pop rdi pop r15 ; Restore callee-save registers pop r14 pop r13 pop r12 pop rbx pop rbp ret ;# Used when entering new code. The argument and closure are on the stack ;# in case there is a GC before we enter the code. PUBLIC X86AsmPopArgAndClosure X86AsmPopArgAndClosure: pop rdx pop rax #ifdef POLYML32IN64 jmp qword ptr [rbx+rdx*4] #else jmp qword ptr [rdx] #endif ;# This is used if the RTS sets up an exception. It's probably no longer relevant. PUBLIC X86AsmRaiseException X86AsmRaiseException: raisex: mov rcx,[ArgVector.HandlerRegister+rbp] jmp qword ptr [rcx] ; RTS call to kill the current thread. PUBLIC X86AsmKillSelf X86AsmKillSelf: CALL_EXTRA RETURN_KILL_SELF PUBLIC X86AsmCallbackReturn X86AsmCallbackReturn: CALL_EXTRA RETURN_CALLBACK_RETURN PUBLIC X86AsmCallbackException X86AsmCallbackException: CALL_EXTRA RETURN_CALLBACK_EXCEPTION ; This implements atomic addition in the same way as atomic_increment PUBLIC X86AsmAtomicIncrement X86AsmAtomicIncrement: mov rax,rcx ; Use rcx and rax because they are volatile (unlike rbx on X86/64/Unix) mov rcx,2 #ifdef POLYML32IN64 lock xadd [rax],ecx ;# Rax is an absolute address but this is only a word #else lock xadd [rax],rcx #endif add rcx,2 mov rax,rcx ret CREATE_EXTRA_CALL MACRO index PUBLIC X86AsmCallExtra&index& X86AsmCallExtra&index&: CALL_EXTRA index ENDM CREATE_EXTRA_CALL RETURN_HEAP_OVERFLOW CREATE_EXTRA_CALL RETURN_STACK_OVERFLOW CREATE_EXTRA_CALL RETURN_STACK_OVERFLOWEX END