diff --git a/libpolyml/x86assembly_gas64.S b/libpolyml/x86assembly_gas64.S index d90c93b3..c4126476 100644 --- a/libpolyml/x86assembly_gas64.S +++ b/libpolyml/x86assembly_gas64.S @@ -1,288 +1,300 @@ /* Title: Assembly code routines for the poly system. Author: David Matthews Copyright (c) David C. J. Matthews 2000-2019 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This is the 64-bit gas version of the assembly code file. There are separate versions of 32/64 and MAMS (Intel syntax) and and GCC (gas syntax). */ /* Registers used :- %rax: First argument to function. Result of function call. %rbx: Second argument to function. %rcx: General register %rdx: Closure pointer in call. %rbp: Points to memory used for extra registers %rsi: General register. %rdi: General register. %rsp: Stack pointer. %r8: Third argument to function %r9: Fourth argument to function %r10: Fifth argument to function %r11: General register %r12: General register %r13: General register %r14: General register %r15: Memory allocation pointer */ #include "config.h" #ifdef SYMBOLS_REQUIRE_UNDERSCORE #define EXTNAME(x) _##x #else #define EXTNAME(x) x #endif #ifdef __CYGWIN__ #define _WIN32 1 #endif /* Macro to begin the hand-coded functions */ #ifdef MACOSX #define GLOBAL .globl #else #define GLOBAL .global #endif #define INLINE_ROUTINE(id) \ GLOBAL EXTNAME(id); \ EXTNAME(id): /* Extra entries on the C stack */ #define Fr_Size 64 /* Must be multiple of 16 to get alignment correct */ /* This is the argument vector passed in to X86AsmSwitchToPoly It is used to initialise the frame. A few values are updated when ML returns. */ #define Arg_LocalMpointer 0x0 #define Arg_HandlerRegister 0x8 #define Arg_LocalMbottom 0x10 #define Arg_StackLimit 0x18 #define Arg_ExceptionPacket 0x20 /* Address of packet to raise */ #define Arg_RequestCode 0x28 /* Byte: Io function to call. */ #define Arg_ReturnReason 0x2a /* Byte: Reason for returning from ML. */ #define Arg_UnusedRestore 0x2b /* Byte: Full/partial restore */ #define Arg_SaveCStack 0x30 /* Current stack base */ #define Arg_ThreadId 0x38 /* My thread id */ #define Arg_StackPtr 0x40 /* Stack Pointer */ #define Arg_SaveRAX 0x68 #define Arg_SaveRBX 0x70 #define Arg_SaveRCX 0x78 #define Arg_SaveRDX 0x80 #define Arg_SaveRSI 0x88 #define Arg_SaveRDI 0x90 #define Arg_SaveR8 0x98 #define Arg_SaveR9 0xa0 #define Arg_SaveR10 0xa8 #define Arg_SaveR11 0xb0 #define Arg_SaveR12 0xb8 #define Arg_SaveR13 0xc0 #define Arg_SaveR14 0xc8 #define Arg_SaveXMM0 0xd0 #define Arg_SaveXMM1 0xd8 #define Arg_SaveXMM2 0xe0 #define Arg_SaveXMM3 0xe8 #define Arg_SaveXMM4 0xf0 #define Arg_SaveXMM5 0xf8 #define Arg_SaveXMM6 0x100 /* IO function numbers. These are functions that are called to handle special cases in this code */ #include "sys.h" #define RETURN_HEAP_OVERFLOW 1 #define RETURN_STACK_OVERFLOW 2 #define RETURN_STACK_OVERFLOWEX 3 -#define RETURN_CALLBACK_RETURN 6 -#define RETURN_CALLBACK_EXCEPTION 7 #define RETURN_RAISE_OVERFLOW 8 #define RETURN_KILL_SELF 9 # Mark the stack as non-executable when supported #ifdef HAVE_GNU_STACK .section .note.GNU-stack, "", @progbits #endif # # CODE STARTS HERE # .text #define CALL_EXTRA(index) \ pushq %rcx; \ movb $index,Arg_ReturnReason(%rbp); \ popq %rcx; \ - jmp SaveFullState; + jmp CallTrapHandler; -/* Load the registers from the ML stack and jump to the code. */ +/* Enter ML code. This is now only ever used to start a new thread. + It is probably unnecessary to save the callee-save regs or load the ML regs. */ INLINE_ROUTINE(X86AsmSwitchToPoly) pushq %rbp # Standard entry sequence /* If we're compiling with Mingw we're using Windows calling conventions. */ #ifdef _WIN32 movq %rcx,%rbp # Argument is in %rcx #else movq %rdi,%rbp # Argument is in %rdi #endif pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 #ifdef _WIN32 pushq %rdi # Callee save in Windows pushq %rsi subq $(Fr_Size-56),%rsp # Argument is already in %rcx #else subq $(Fr_Size-40),%rsp #endif movq %rsp,Arg_SaveCStack(%rbp) movq Arg_LocalMpointer(%rbp),%r15 movq Arg_StackPtr(%rbp),%rsp # Set the new stack ptr movsd Arg_SaveXMM0(%rbp),%xmm0 # Load the registers movsd Arg_SaveXMM1(%rbp),%xmm1 movsd Arg_SaveXMM2(%rbp),%xmm2 movsd Arg_SaveXMM3(%rbp),%xmm3 movsd Arg_SaveXMM4(%rbp),%xmm4 movsd Arg_SaveXMM5(%rbp),%xmm5 movsd Arg_SaveXMM6(%rbp),%xmm6 movq Arg_SaveRBX(%rbp),%rbx movq Arg_SaveRCX(%rbp),%rcx movq Arg_SaveRDX(%rbp),%rdx movq Arg_SaveRSI(%rbp),%rsi movq Arg_SaveRDI(%rbp),%rdi movq Arg_SaveR8(%rbp),%r8 movq Arg_SaveR9(%rbp),%r9 movq Arg_SaveR10(%rbp),%r10 movq Arg_SaveR11(%rbp),%r11 movq Arg_SaveR12(%rbp),%r12 movq Arg_SaveR13(%rbp),%r13 movq Arg_SaveR14(%rbp),%r14 movq Arg_ExceptionPacket(%rbp),%rax cmpq $1,%rax # Did we raise an exception? jnz raisexLocal movq Arg_SaveRAX(%rbp),%rax cld # Clear this just in case ret /* This is exactly the same as raisex but seems to be needed to work round a PIC problem. */ raisexLocal: movq Arg_HandlerRegister(%rbp),%rcx # Get next handler into %rcx jmp *(%rcx) -/* Code to save the state and switch to C - This saves the full register state. */ -SaveFullState: +/* Save all the registers and enter the trap handler. + It is probably unnecessary to save the FP state now. */ +CallTrapHandler: movq %rax,Arg_SaveRAX(%rbp) movq %rbx,Arg_SaveRBX(%rbp) movq %rcx,Arg_SaveRCX(%rbp) movq %rdx,Arg_SaveRDX(%rbp) movq %rsi,Arg_SaveRSI(%rbp) movq %rdi,Arg_SaveRDI(%rbp) movsd %xmm0,Arg_SaveXMM0(%rbp) movsd %xmm1,Arg_SaveXMM1(%rbp) movsd %xmm2,Arg_SaveXMM2(%rbp) movsd %xmm3,Arg_SaveXMM3(%rbp) movsd %xmm4,Arg_SaveXMM4(%rbp) movsd %xmm5,Arg_SaveXMM5(%rbp) movsd %xmm6,Arg_SaveXMM6(%rbp) movq %r8,Arg_SaveR8(%rbp) movq %r9,Arg_SaveR9(%rbp) movq %r10,Arg_SaveR10(%rbp) movq %r11,Arg_SaveR11(%rbp) movq %r12,Arg_SaveR12(%rbp) movq %r13,Arg_SaveR13(%rbp) movq %r14,Arg_SaveR14(%rbp) movq %rsp,Arg_StackPtr(%rbp) movq %r15,Arg_LocalMpointer(%rbp) # Save back heap pointer movq Arg_SaveCStack(%rbp),%rsp # Restore C stack pointer #ifdef _WIN32 - addq $(Fr_Size-56),%rsp - popq %rsi - popq %rdi + subq $32,%rsp # Windows save area + movq Arg_ThreadId(%rbp),%rcx #else - addq $(Fr_Size-40),%rsp + movq Arg_ThreadId(%rbp),%rdi #endif - popq %r15 # Restore callee-save registers - popq %r14 - popq %r13 - popq %r12 - popq %rbx - popq %rbp + call X86TrapHandler@plt +#ifdef _WIN32 + addq $32,%rsp +#endif + movq Arg_LocalMpointer(%rbp),%r15 + movq Arg_StackPtr(%rbp),%rsp # Set the new stack ptr + movsd Arg_SaveXMM0(%rbp),%xmm0 # Load the registers + movsd Arg_SaveXMM1(%rbp),%xmm1 + movsd Arg_SaveXMM2(%rbp),%xmm2 + movsd Arg_SaveXMM3(%rbp),%xmm3 + movsd Arg_SaveXMM4(%rbp),%xmm4 + movsd Arg_SaveXMM5(%rbp),%xmm5 + movsd Arg_SaveXMM6(%rbp),%xmm6 + movq Arg_SaveRBX(%rbp),%rbx + movq Arg_SaveRCX(%rbp),%rcx + movq Arg_SaveRDX(%rbp),%rdx + movq Arg_SaveRSI(%rbp),%rsi + movq Arg_SaveRDI(%rbp),%rdi + movq Arg_SaveR8(%rbp),%r8 + movq Arg_SaveR9(%rbp),%r9 + movq Arg_SaveR10(%rbp),%r10 + movq Arg_SaveR11(%rbp),%r11 + movq Arg_SaveR12(%rbp),%r12 + movq Arg_SaveR13(%rbp),%r13 + movq Arg_SaveR14(%rbp),%r14 + movq Arg_ExceptionPacket(%rbp),%rax + cmpq $1,%rax # Did we raise an exception? + jnz raisexLocal + movq Arg_SaveRAX(%rbp),%rax + cld # Clear this just in case ret /* Used when entering new code. The argument and closure are on the stack in case there is a GC before we enter the code. */ INLINE_ROUTINE(X86AsmPopArgAndClosure) popq %rdx popq %rax #ifdef POLYML32IN64 jmp *(%rbx,%rdx,4) #else jmp *(%rdx) #endif -# This is used if the RTS sets up an exception. It's probably no longer relevant. -INLINE_ROUTINE(X86AsmRaiseException) - movq Arg_HandlerRegister(%rbp),%rcx # Get next handler into %rcx - jmp *(%rcx) # Additional assembly code routines # RTS call to kill the current thread. INLINE_ROUTINE(X86AsmKillSelf) CALL_EXTRA(RETURN_KILL_SELF) -INLINE_ROUTINE(X86AsmCallbackReturn) - CALL_EXTRA(RETURN_CALLBACK_RETURN) - -INLINE_ROUTINE(X86AsmCallbackException) - CALL_EXTRA(RETURN_CALLBACK_EXCEPTION) - INLINE_ROUTINE(X86AsmCallExtraRETURN_HEAP_OVERFLOW) CALL_EXTRA(RETURN_HEAP_OVERFLOW) INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOW) CALL_EXTRA(RETURN_STACK_OVERFLOW) INLINE_ROUTINE(X86AsmCallExtraRETURN_STACK_OVERFLOWEX) CALL_EXTRA(RETURN_STACK_OVERFLOWEX) # This implements atomic addition in the same way as atomic_increment INLINE_ROUTINE(X86AsmAtomicIncrement) #ifdef _WIN32 movq %rcx,%rax # On Windows the argument is passed in %rcx #else movq %rdi,%rax # On X86_64 the argument is passed in %rdi #endif # Use %rcx and %rax because they are volatile (unlike %rbx on X86/64/Unix) movq $2,%rcx #ifdef POLYML32IN64 lock xaddl %ecx,(%rax) # Rax is an absolute address but this is only a word #else lock xaddq %rcx,(%rax) #endif addq $2,%rcx movq %rcx,%rax ret