diff --git a/libpolyml/arm64assembly.S b/libpolyml/arm64assembly.S index b36e05ab..36947468 100644 --- a/libpolyml/arm64assembly.S +++ b/libpolyml/arm64assembly.S @@ -1,235 +1,237 @@ // // Assembly code for the ARM64 for Poly/ML // Author: David Matthews // Copyright (c) David C. J. Matthews 2021 // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License version 2.1 as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library// if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // The syntax of directives in the GNU assembler and in the MS ARMASM // are somewhat different. ARMASMSYNTAX is defined in the VS project files. #ifdef ARMASMSYNTAX #define LABEL(x) x #else #define LABEL(x) x: #endif #ifndef MACOSX #ifdef ARMASMSYNTAX AREA |.text|, CODE, READONLY #else .section .text,"x" .balign 4 #endif +#else +.align 4 #endif #ifdef HAVE_CONFIG_H // Specifically for POLYML32IN64 #include "config.h" #endif #ifdef SYMBOLS_REQUIRE_UNDERSCORE #define EXTNAME(x) _##x #else #define EXTNAME(x) x #endif // Offsets into the assembly code interface #define TrapHandlerEntry 32 #define HandlerRegister 40 #define ExceptionPacket 56 #define ThreadId 64 #define RegisterArray 72 #define FPRegisterArray 272 #define LocalMBottom 336 #define LocalMPointer 344 #define MLStackPointer 352 #define LinkRegister 360 #define EntryPoint 368 #define ReturnReason 376 #ifdef ARMASMSYNTAX EXPORT Arm64AsmEnterCompiledCode Arm64AsmEnterCompiledCode PROC #else .global EXTNAME(Arm64AsmEnterCompiledCode) EXTNAME(Arm64AsmEnterCompiledCode): #endif // This is called once the thread has been initialised to run the ML code. // It never returns. The RTS may be entered either by a compiled RTS call // or by a call to a "trap" function. // We only need to load a subset of the registers. mov x26,x0 // Copy the address of the assembly-code section into X26 ldr x0,[x26, RegisterArray] // Argument ldr x8,[x26, RegisterArray+8*8] // Closure address #ifdef POLYML32IN64 ldr x24,[x26, RegisterArray+24 * 8] add x16,x24,x8,LSL #2 ldr x16,[x16] #else ldr x16,[x8] // Code address - first word of closure #endif ldr x25,[x26, LocalMBottom] // Limit of heap ldp x27,x28,[x26, LocalMPointer] // Allocation pointer and stack pointer ldr x30,[x26, LinkRegister] // Link register - always zero because we don't return br x16 // Jump to code #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_HEAP_OVERFLOW Arm64AsmCallExtraRETURN_HEAP_OVERFLOW PROC #else .global EXTNAME(Arm64AsmCallExtraRETURN_HEAP_OVERFLOW) EXTNAME(Arm64AsmCallExtraRETURN_HEAP_OVERFLOW) : #endif mov x16, 1 // Common code to call into the RTS LABEL(trapHandle) strb w16,[x26, ReturnReason] stp x0,x1,[x26, RegisterArray] stp x2,x3, [x26, RegisterArray+2*8] stp x4,x5, [x26, RegisterArray + 4*8] stp x6, x7, [x26, RegisterArray + 6*8] stp x8, x9, [x26, RegisterArray + 8*8] stp x10, x11, [x26, RegisterArray + 10*8] stp x12, x13, [x26, RegisterArray + 12*8] stp x14, x15, [x26, RegisterArray + 14*8] stp x19, x20, [x26, RegisterArray + 19 * 8] stp x21,x22,[x26, RegisterArray + 21 * 8] stp x23,x24, [x26, RegisterArray + 23 * 8] stp d0,d1,[x26, FPRegisterArray] stp d2,d3,[x26, FPRegisterArray+2*8] stp d4,d5,[x26, FPRegisterArray+4*8] stp d6,d7,[x26, FPRegisterArray+6*8] str x27,[x26,LocalMPointer] str x28,[x26,MLStackPointer] str x30,[x26,LinkRegister] ldr x0,[x26,ThreadId] // Pass the thread id as an argument so that we can get the task data ldr x16,[x26,TrapHandlerEntry] blr x16 // Load the registers. Even though some are callee-save the RTS may have updated them. // x26, though, should have been preserved. ldr x1,[x26, RegisterArray+1*8] ldp x2,x3, [x26, RegisterArray+2*8] ldp x4,x5, [x26, RegisterArray + 4*8] ldp x6, x7, [x26, RegisterArray + 6*8] ldp x8, x9, [x26, RegisterArray + 8*8] ldp x10, x11, [x26, RegisterArray + 10*8] ldp x12, x13, [x26, RegisterArray + 12*8] ldp x14, x15, [x26, RegisterArray + 14*8] ldp x19, x20, [x26, RegisterArray + 19 * 8] ldp x21,x22,[x26, RegisterArray + 21 * 8] ldp x23,x24, [x26, RegisterArray + 23 * 8] ldp d0,d1,[x26, FPRegisterArray] ldp d2,d3,[x26, FPRegisterArray+2*8] ldp d4,d5,[x26, FPRegisterArray+4*8] ldp d6,d7,[x26, FPRegisterArray+6*8] ldr x25,[x26, LocalMBottom] ldp x27,x28,[x26,LocalMPointer] ldr x30,[x26,LinkRegister] // Check whether we've raised an exception e.g. Interrupt ldr x0,[x26,ExceptionPacket] cmp x0,#1 bne raiseexcept ldr x0,[x26, RegisterArray] ldr x16,[x26,EntryPoint] // Normally this will be x30 but not always br x16 LABEL(raiseexcept) ldr x28,[x26,HandlerRegister] // Set the stack ptr to this ldr x16,[x28] br x16 #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_STACK_OVERFLOW Arm64AsmCallExtraRETURN_STACK_OVERFLOW PROC #else .global EXTNAME(Arm64AsmCallExtraRETURN_STACK_OVERFLOW) EXTNAME(Arm64AsmCallExtraRETURN_STACK_OVERFLOW) : #endif mov x16, 2 b trapHandle #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX PROC #else .global EXTNAME(Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX) EXTNAME(Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX) : #endif mov x16, 3 b trapHandle #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_ENTER_INTERPRETER Arm64AsmCallExtraRETURN_ENTER_INTERPRETER PROC #else .global EXTNAME(Arm64AsmCallExtraRETURN_ENTER_INTERPRETER) EXTNAME(Arm64AsmCallExtraRETURN_ENTER_INTERPRETER) : #endif mov x16,4 b trapHandle #ifdef ARMASMSYNTAX ENDP #endif // POLYUNSIGNED Arm64AsmAtomicExchange(PolyObject*, POLYSIGNED); // This is not actually used with the VS build. #ifdef ARMASMSYNTAX EXPORT Arm64AsmAtomicExchange Arm64AsmAtomicExchange PROC #else .global EXTNAME(Arm64AsmAtomicExchange) EXTNAME(Arm64AsmAtomicExchange): #endif // The easiest way to do this is with swpal but that is only available // in ARM 8.1 and above. For the moment we use the old version. // swpal x0,xzr,[x0] LABEL(aaea1) ldaxr x3,[x0] stlxr w4,xzr,[x0] cbnz w4,aaea1 dmb ish mov x0,x3 ret #ifdef ARMASMSYNTAX ENDP END #endif diff --git a/libpolyml/osmemunix.cpp b/libpolyml/osmemunix.cpp index 895e7be2..36cbecdc 100644 --- a/libpolyml/osmemunix.cpp +++ b/libpolyml/osmemunix.cpp @@ -1,503 +1,500 @@ /* Title: osomem.cpp - Interface to OS memory management - Unix version Copyright (c) 2006, 2017-18, 2020-21 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #else #error "No configuration file" #endif #if defined __linux__ && !defined _GNU_SOURCE // _GNU_SOURCE must be defined before #include to get O_TEMPFILE etc. #define _GNU_SOURCE 1 #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_MMAN_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SYS_PARAM_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_FCNTL_H #include #endif // Linux prefers MAP_ANONYMOUS to MAP_ANON #ifndef MAP_ANON #ifdef MAP_ANONYMOUS #define MAP_ANON MAP_ANONYMOUS #endif #endif // Assume that mmap is supported. If it isn't we can't run. #include "osmem.h" #include "bitmap.h" #include "locking.h" #include "polystring.h" // For TempCString // How do we get the page size? #ifndef HAVE_GETPAGESIZE #ifdef _SC_PAGESIZE #define getpagesize() sysconf(_SC_PAGESIZE) #else // If this fails we're stuck #define getpagesize() PAGESIZE #endif #endif #ifdef SOLARIS #define FIXTYPE (caddr_t) #else #define FIXTYPE #endif // MAP_JIT is needed on Mac OS with hardened kernel #ifndef MAP_JIT #define MAP_JIT 0 #endif // Open a temporary file, unlink it and return the file descriptor. static int openTmpFile(const char* dirName) { #ifdef O_TMPFILE int flags = 0; #ifdef O_CLOEXEC flags |= O_CLOEXEC; #endif int tfd = open(dirName, flags | O_TMPFILE | O_RDWR | O_EXCL, 0700); if (tfd != -1) return tfd; #endif const char* template_subdir = "/mlMapXXXXXX"; TempString buff((char*)malloc(strlen(dirName) + strlen(template_subdir) + 1)); if (buff == 0) return -1; // Unable to allocate strcpy(buff, dirName); strcat(buff, template_subdir); int fd = mkstemp(buff); if (fd == -1) return -1; unlink(buff); return fd; } static int createTemporaryFile() { char *tmpDir = getenv("TMPDIR"); int fd; if (tmpDir != NULL) { fd = openTmpFile(tmpDir); if (fd != -1) return fd; } #ifdef P_tmpdir fd = openTmpFile(P_tmpdir); if (fd != -1) return fd; #endif fd = openTmpFile("/tmp"); if (fd != -1) return fd; fd = openTmpFile("/var/tmp"); if (fd != -1) return fd; return -1; } #ifdef POLYML32IN64 OSMem::OSMem() { memBase = 0; shadowFd = -1; } OSMem::~OSMem() { } bool OSMem::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void** pBase /* = 0 */) { memUsage = usage; pageSize = getpagesize(); bool simpleMmap; if (usage != UsageExecutableCode) simpleMmap = true; else { // Can we allocate memory with write+execute? void *test = mmap(0, pageSize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANON|MAP_JIT, -1, 0); if (test != MAP_FAILED) { munmap(FIXTYPE test, pageSize); simpleMmap = true; } else simpleMmap = false; } if (simpleMmap) { // Don't require shadow area. Can use mmap - memBase = (char*)mmap(0, space, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + int flags = MAP_PRIVATE | MAP_ANON; + if (usage == UsageExecutableCode) flags |= MAP_JIT; + memBase = (char*)mmap(0, space, PROT_NONE, flags, -1, 0); if (memBase == MAP_FAILED) return false; // We need the heap to be such that the top 32-bits are non-zero. if ((uintptr_t)memBase < ((uintptr_t)1 << 32)) { // Allocate again. void* newSpace = mmap(0, space, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); munmap(FIXTYPE memBase, space); // Free the old area that isn't suitable. // Return what we got, or zero if it failed. memBase = (char*)newSpace; } shadowBase = memBase; } else { // More difficult - require file mapping shadowFd = createTemporaryFile(); if (shadowFd == -1) return false; if (ftruncate(shadowFd, space) == -1) return false; void *readWrite = mmap(0, space, PROT_NONE, MAP_SHARED, shadowFd, 0); if (readWrite == MAP_FAILED) return 0; memBase = (char*)mmap(0, space, PROT_NONE, MAP_SHARED, shadowFd, 0); if (memBase == MAP_FAILED) { munmap(FIXTYPE readWrite, space); return false; } // This should be above 32-bits. ASSERT((uintptr_t)memBase >= ((uintptr_t)1 << 32)); shadowBase = (char*)readWrite; } if (pBase != 0) *pBase = memBase; // Create a bitmap with a bit for each page. if (!pageMap.Create(space / pageSize)) return false; lastAllocated = space / pageSize; // Beyond the last page in the area // Set the last bit in the area so that we don't use it. // This is effectively a work-around for a problem with the heap. // If we have a zero-sized cell at the end of the memory its address is // going to be zero. This causes problems with forwarding pointers. // There may be better ways of doing this. pageMap.SetBit(space / pageSize - 1); return true; } void* OSMem::AllocateDataArea(size_t& space) { char* baseAddr; { PLocker l(&bitmapLock); uintptr_t pages = (space + pageSize - 1) / pageSize; // Round up to an integral number of pages. space = pages * pageSize; // Find some space while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area. lastAllocated--; uintptr_t free = pageMap.FindFree(0, lastAllocated, pages); if (free == lastAllocated) return 0; // Can't find the space. pageMap.SetBits(free, pages); // TODO: Do we need to zero this? It may have previously been set. baseAddr = memBase + free * pageSize; } int prot = PROT_READ | PROT_WRITE; int flags = MAP_FIXED | MAP_PRIVATE | MAP_ANON; #if defined(MAP_STACK) && defined(__OpenBSD__) // On OpenBSD the stack must be mapped with MAP_STACK otherwise it // segfaults. On FreeBSD, though, this isn't necessary and causes problems. if (memUsage == UsageStack) flags |= MAP_STACK; #endif if (mmap(baseAddr, space, prot, flags, -1, 0) == MAP_FAILED) return 0; msync(baseAddr, space, MS_SYNC | MS_INVALIDATE); return baseAddr; } bool OSMem::FreeDataArea(void* p, size_t space) { char* addr = (char*)p; uintptr_t offset = (addr - memBase) / pageSize; // Remap the pages as new entries. This should remove the old versions. if (mmap(p, space, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED) return false; msync(p, space, MS_SYNC | MS_INVALIDATE); uintptr_t pages = space / pageSize; { PLocker l(&bitmapLock); pageMap.ClearBits(offset, pages); if (offset + pages > lastAllocated) // We allocate from the top down. lastAllocated = offset + pages; } return true; } void* OSMem::AllocateCodeArea(size_t& space, void*& shadowArea) { uintptr_t offset; { PLocker l(&bitmapLock); uintptr_t pages = (space + pageSize - 1) / pageSize; // Round up to an integral number of pages. space = pages * pageSize; // Find some space while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area. lastAllocated--; uintptr_t free = pageMap.FindFree(0, lastAllocated, pages); if (free == lastAllocated) return 0; // Can't find the space. pageMap.SetBits(free, pages); offset = free * pageSize; } if (shadowFd == -1) { char *baseAddr = memBase + offset; int prot = PROT_READ | PROT_WRITE; - int flags = MAP_FIXED | MAP_PRIVATE | MAP_ANON; - if (memUsage == UsageExecutableCode) - { - prot |= PROT_EXEC; - flags |= MAP_JIT; - } - if (mmap(baseAddr, space, prot, flags, -1, 0) == MAP_FAILED) + if (memUsage == UsageExecutableCode) prot |= PROT_EXEC; + if (mprotect(baseAddr, space, prot) != 0) return 0; msync(baseAddr, space, MS_SYNC | MS_INVALIDATE); shadowArea = baseAddr; return baseAddr; } else { char *baseAddr = memBase + offset; char *readWriteArea = shadowBase + offset; if (mmap(baseAddr, space, PROT_READ|PROT_EXEC, MAP_FIXED | MAP_SHARED, shadowFd, offset) == MAP_FAILED) return 0; msync(baseAddr, space, MS_SYNC | MS_INVALIDATE); if (mmap(readWriteArea, space, PROT_READ|PROT_WRITE, MAP_FIXED | MAP_SHARED, shadowFd, offset) == MAP_FAILED) return 0; msync(readWriteArea, space, MS_SYNC | MS_INVALIDATE); shadowArea = readWriteArea; return baseAddr; } } bool OSMem::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space) { // Free areas by mapping them with PROT_NONE. uintptr_t offset = ((char*)codeAddr - memBase) / pageSize; if (shadowFd == -1) { mmap(codeAddr, space, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); msync(codeAddr, space, MS_SYNC | MS_INVALIDATE); } else { mmap(codeAddr, space, PROT_NONE, MAP_SHARED, shadowFd, offset); msync(codeAddr, space, MS_SYNC | MS_INVALIDATE); mmap(dataAddr, space, PROT_NONE, MAP_SHARED, shadowFd, offset); msync(dataAddr, space, MS_SYNC | MS_INVALIDATE); } uintptr_t pages = space / pageSize; { PLocker l(&bitmapLock); pageMap.ClearBits(offset, pages); if (offset + pages > lastAllocated) // We allocate from the top down. lastAllocated = offset + pages; } return true; } bool OSMem::EnableWrite(bool enable, void* p, size_t space) { int res = mprotect(FIXTYPE p, space, enable ? PROT_READ|PROT_WRITE: PROT_READ); return res != -1; } bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) { int prot = PROT_READ; if (memUsage == UsageExecutableCode) prot |= PROT_EXEC; int res = mprotect(FIXTYPE codeAddr, space, prot); return res != -1; } #else // Native address versions OSMem::OSMem() { allocPtr = 0; shadowFd = -1; } OSMem::~OSMem() { if (shadowFd != -1) close(shadowFd); } bool OSMem::Initialise(enum _MemUsage usage, size_t space /* = 0 */, void **pBase /* = 0 */) { memUsage = usage; pageSize = getpagesize(); if (usage != UsageExecutableCode) return true; // Can we allocate memory with write+execute? void *test = mmap(0, pageSize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_JIT|MAP_PRIVATE|MAP_ANON, -1, 0); if (test != MAP_FAILED) { // Don't require shadow area munmap(FIXTYPE test, pageSize); return true; } if (errno != ENOTSUP && errno != EACCES) // Fails with ENOTSUPP on OpenBSD and EACCES in SELinux. return false; // Check that read-write works. test = mmap(0, pageSize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); if (test == MAP_FAILED) return false; // There's a problem. munmap(FIXTYPE test, pageSize); // Need to create a file descriptor for mapping. shadowFd = createTemporaryFile(); if (shadowFd != -1) return true; return false; } // Allocate space and return a pointer to it. The size is the minimum // size requested and it is updated with the actual space allocated. // Returns NULL if it cannot allocate the space. void *OSMem::AllocateDataArea(size_t &space) { // Round up to an integral number of pages. space = (space + pageSize-1) & ~(pageSize-1); int fd = -1; // This value is required by FreeBSD. Linux doesn't care int flags = MAP_PRIVATE | MAP_ANON; #if defined(MAP_STACK) && defined(__OpenBSD__) // On OpenBSD the stack must be mapped with MAP_STACK otherwise it // segfaults. On FreeBSD, though, this isn't necessary and causes problems. if (memUsage == UsageStack) flags |= MAP_STACK; #endif void *result = mmap(0, space, PROT_READ|PROT_WRITE, flags, fd, 0); // Convert MAP_FAILED (-1) into NULL if (result == MAP_FAILED) return 0; return result; } // Release the space previously allocated. This must free the whole of // the segment. The space must be the size actually allocated. bool OSMem::FreeDataArea(void *p, size_t space) { return munmap(FIXTYPE p, space) == 0; } bool OSMem::EnableWrite(bool enable, void* p, size_t space) { int res = mprotect(FIXTYPE p, space, enable ? PROT_READ|PROT_WRITE: PROT_READ); return res != -1; } void *OSMem::AllocateCodeArea(size_t &space, void*& shadowArea) { // Round up to an integral number of pages. space = (space + pageSize-1) & ~(pageSize-1); if (shadowFd == -1) { int fd = -1; // This value is required by FreeBSD. Linux doesn't care int prot = PROT_READ | PROT_WRITE; int flags = MAP_PRIVATE|MAP_ANON; if (memUsage == UsageExecutableCode) { prot |= PROT_EXEC; flags |= MAP_JIT; } void *result = mmap(0, space, prot, flags, fd, 0); // Convert MAP_FAILED (-1) into NULL if (result == MAP_FAILED) return 0; shadowArea = result; return result; } // Have to use dual areas. size_t allocAt; { PLocker lock(&allocLock); allocAt = allocPtr; allocPtr += space; } if (ftruncate(shadowFd, allocAt + space) == -1) return 0; void *readExec = mmap(0, space, PROT_READ|PROT_EXEC, MAP_SHARED, shadowFd, allocAt); if (readExec == MAP_FAILED) return 0; void *readWrite = mmap(0, space, PROT_READ|PROT_WRITE, MAP_SHARED, shadowFd, allocAt); if (readWrite == MAP_FAILED) { munmap(FIXTYPE readExec, space); return 0; } shadowArea = readWrite; return readExec; } bool OSMem::FreeCodeArea(void *codeArea, void *dataArea, size_t space) { bool freeCode = munmap(FIXTYPE codeArea, space) == 0; if (codeArea == dataArea) return freeCode; return (munmap(FIXTYPE dataArea, space) == 0) & freeCode; } bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) { int prot = PROT_READ; if (memUsage == UsageExecutableCode) prot |= PROT_EXEC; int res = mprotect(FIXTYPE codeAddr, space, prot); return res != -1; } #endif