diff --git a/RootArm64.ML b/RootArm64.ML index 2453846c..b9b03122 100644 --- a/RootArm64.ML +++ b/RootArm64.ML @@ -1,133 +1,135 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Compiler root file for Arm64. This gives the "use" instructions necessary to build the compiler and suitable for use with an IDE project file. It was constructed from the Poly/ML make files. *) PolyML.print_depth 1; PolyML.Compiler.reportUnreferencedIds := true; use "mlsource/MLCompiler/Address.ML"; use "mlsource/MLCompiler/Misc.ML"; use "mlsource/MLCompiler/HashTable.ML"; use "mlsource/MLCompiler/UniversalTable.ML"; use "mlsource/MLCompiler/StronglyConnected.sml"; use "mlsource/MLCompiler/StretchArray.ML"; use "mlsource/MLCompiler/STRUCTVALSIG.sml"; use "mlsource/MLCompiler/PRETTYSIG.sml"; use "mlsource/MLCompiler/LEXSIG.sml"; use "mlsource/MLCompiler/SymbolsSig.sml"; use "mlsource/MLCompiler/COMPILERBODYSIG.sml"; use "mlsource/MLCompiler/DEBUG.sig"; use "mlsource/MLCompiler/MAKESIG.sml"; use "mlsource/MLCompiler/MAKE_.ML"; use "mlsource/MLCompiler/FOREIGNCALLSIG.sml"; use "mlsource/MLCompiler/BUILTINS.sml"; use "mlsource/MLCompiler/CODETREESIG.ML"; use "mlsource/MLCompiler/STRUCT_VALS.ML"; use "mlsource/MLCompiler/CodeTree/BackendIntermediateCodeSig.sml"; use "mlsource/MLCompiler/CodeTree/BaseCodeTreeSig.sml"; use "mlsource/MLCompiler/CodeTree/CodetreeFunctionsSig.sml"; use "mlsource/MLCompiler/CodeTree/CODEARRAYSIG.ML"; use "mlsource/MLCompiler/CodeTree/CodegenTreeSig.sml"; use "mlsource/MLCompiler/CodeTree/GENCODESIG.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_FUNCTIONS.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_STATIC_LINK_AND_CASES.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_CODEGEN_CONSTANT_FUNCTIONS.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_LAMBDA_LIFT.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_REMOVE_REDUNDANT.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_SIMPLIFIER.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE_OPTIMISER.sml"; use "mlsource/MLCompiler/CodeTree/CODETREE.ML"; use "mlsource/MLCompiler/Pretty.sml"; use "mlsource/MLCompiler/CodeTree/CODE_ARRAY.ML"; use "mlsource/MLCompiler/CodeTree/ByteCode/INTCODECONSSIG.sml"; use "mlsource/MLCompiler/CodeTree/ByteCode/INTCODECONS.ML"; use "mlsource/MLCompiler/CodeTree/ByteCode/INTGCODE.ML"; use "mlsource/MLCompiler/Debug.ML"; use "mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml"; use "mlsource/MLCompiler/CodeTree/BaseCodeTree.sml"; use "mlsource/MLCompiler/CodeTree/ByteCode/ml_bind.ML"; +use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig"; +use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML"; use "mlsource/MLCompiler/CodeTree/GCode.arm64.ML"; use "mlsource/MLCompiler/CodeTree/ml_bind.ML"; use "mlsource/MLCompiler/StructVals.ML"; use "mlsource/MLCompiler/LEX_.ML"; use "mlsource/MLCompiler/Symbols.ML"; use "mlsource/MLCompiler/Lex.ML"; use "mlsource/MLCompiler/SymsetSig.sml"; use "mlsource/MLCompiler/DATATYPEREPSIG.sml"; use "mlsource/MLCompiler/VALUEOPSSIG.sml"; use "mlsource/MLCompiler/EXPORTTREESIG.sml"; use "mlsource/MLCompiler/STRUCTURESSIG.sml"; use "mlsource/MLCompiler/COMPILER_BODY.ML"; use "mlsource/MLCompiler/SymSet.ML"; use "mlsource/MLCompiler/TYPETREESIG.sml"; use "mlsource/MLCompiler/COPIERSIG.sml"; use "mlsource/MLCompiler/TYPEIDCODESIG.sml"; use "mlsource/MLCompiler/DATATYPE_REP.ML"; use "mlsource/MLCompiler/PRINTTABLESIG.sml"; use "mlsource/MLCompiler/VALUE_OPS.ML"; use "mlsource/MLCompiler/TYPE_TREE.ML"; use "mlsource/MLCompiler/UTILITIES_.ML"; use "mlsource/MLCompiler/Utilities.ML"; use "mlsource/MLCompiler/PRINT_TABLE.ML"; use "mlsource/MLCompiler/PrintTable.ML"; use "mlsource/MLCompiler/ExportTree.sml"; use "mlsource/MLCompiler/ExportTreeStruct.sml"; use "mlsource/MLCompiler/TypeTree.ML"; use "mlsource/MLCompiler/COPIER.sml"; use "mlsource/MLCompiler/CopierStruct.sml"; use "mlsource/MLCompiler/TYPEIDCODE.sml"; use "mlsource/MLCompiler/TypeIDCodeStruct.sml"; use "mlsource/MLCompiler/DatatypeRep.ML"; use "mlsource/MLCompiler/ValueOps.ML"; use "mlsource/MLCompiler/PARSETREESIG.sml"; use "mlsource/MLCompiler/SIGNATURESSIG.sml"; use "mlsource/MLCompiler/DEBUGGER.sig"; use "mlsource/MLCompiler/STRUCTURES_.ML"; use "mlsource/MLCompiler/DEBUGGER_.sml"; use "mlsource/MLCompiler/Debugger.sml"; use "mlsource/MLCompiler/ParseTree/BaseParseTreeSig.sml"; use "mlsource/MLCompiler/ParseTree/BASE_PARSE_TREE.sml"; use "mlsource/MLCompiler/ParseTree/PrintParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/PRINT_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/ExportParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/EXPORT_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/TypeCheckParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/TYPECHECK_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/MatchCompilerSig.sml"; use "mlsource/MLCompiler/ParseTree/MATCH_COMPILER.sml"; use "mlsource/MLCompiler/ParseTree/CodegenParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/CODEGEN_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/PARSE_TREE.ML"; use "mlsource/MLCompiler/ParseTree/ml_bind.ML"; use "mlsource/MLCompiler/SIGNATURES.sml"; use "mlsource/MLCompiler/SignaturesStruct.sml"; use "mlsource/MLCompiler/Structures.ML"; use "mlsource/MLCompiler/PARSE_DEC.ML"; use "mlsource/MLCompiler/SKIPS_.ML"; use "mlsource/MLCompiler/Skips.ML"; use "mlsource/MLCompiler/PARSE_TYPE.ML"; use "mlsource/MLCompiler/ParseType.ML"; use "mlsource/MLCompiler/ParseDec.ML"; use "mlsource/MLCompiler/CompilerBody.ML"; use "mlsource/MLCompiler/CompilerVersion.sml"; use "mlsource/MLCompiler/Make.ML"; use "mlsource/MLCompiler/INITIALISE_.ML"; use "mlsource/MLCompiler/Initialise.ML"; use "mlsource/MLCompiler/ml_bind.ML"; diff --git a/libpolyml/arm64.cpp b/libpolyml/arm64.cpp index 32d75be5..73373566 100644 --- a/libpolyml/arm64.cpp +++ b/libpolyml/arm64.cpp @@ -1,773 +1,791 @@ /* Machine-dependent code for ARM64 Copyright David C.J. Matthews 2020-21. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // Currently this is just copied from the interpreted version. #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #ifdef HAVE_STRING_H #include #endif #include "globals.h" #include "machine_dep.h" #include "sys.h" #include "profiling.h" #include "arb.h" #include "processes.h" #include "run_time.h" #include "gc.h" #include "diagnostics.h" #include "polystring.h" #include "save_vec.h" #include "memmgr.h" #include "scanaddrs.h" #include "rtsentry.h" #include "bytecode.h" #include "int_opcodes.h" /* * ARM64 register use: * X0 First argument and return value * X1-X7 Second-eighth argument * X8 Indirect result (C), ML closure pointer on entry * X9-X15 Volatile scratch registers * X16-17 Intra-procedure-call (C). Only used for special cases in ML. * X18 Platform register. Not used in ML. * X19-X24 Non-volatile (C). Scratch registers (ML). * X25 ML Heap limit pointer * X26 ML assembly interface pointer. Non-volatile (C). * X27 ML Heap allocation pointer. Non-volatile (C). * X28 ML Stack pointer. Non-volatile (C). * X29 Frame pointer (C). Not used in ML * X30 Link register. Not used in ML. * X31 Stack pointer (C). Not used in ML. Also zero register. * * Floating point registers: * V0 First argument and return value * V1-V7 Second-eighth argument * V8-V15 Non volatile. Not currently used in ML. * V16-V31 Volatile. Not currently used in ML. * * The ML calling conventions generally follow the C ABI except that * all registers are volatile and X28 is used for the stack. */ /* the amount of ML stack space to reserve for registers, C exception handling etc. The compiler requires us to reserve 2 stack-frames worth (2 * 20 words). We actually reserve slightly more than this. */ // Arm64 instructions are all 32-bit values. typedef uint32_t* arm64CodePointer; #define OVERFLOW_STACK_SIZE 50 // X26 always points at this area when executing ML code. // The offsets are built into the assembly code and some are built into // the code generator so this must not be changed without checking these. typedef struct _AssemblyArgs { public: byte* enterInterpreter; // These are filled in with the functions. byte* heapOverFlowCall; byte* stackOverFlowCall; byte* stackOverFlowCallEx; byte* trapHandlerEntry; stackItem* handlerRegister; // Current exception handler stackItem* stackLimit; // Lower limit of stack stackItem exceptionPacket; // Set if there is an exception PolyWord threadId; // My thread id. Saves having to call into RTS for it. stackItem registers[25]; // Save/load area for registers X0-X25 inclusive double fpRegisters[8]; // Save/load area for floating point regs D0-D7 PolyWord* localMbottom; // Base of memory + 1 word PolyWord* localMpointer; // X27 Allocation ptr + 1 word stackItem* stackPtr; // X28 Current stack pointer arm64CodePointer linkRegister; // X30 - Link register (return address) arm64CodePointer entryPoint; // PC address to return to byte returnReason; // Reason for returning from ML - Set by assembly code. } AssemblyArgs; class Arm64TaskData: public TaskData, ByteCodeInterpreter { public: Arm64TaskData(); ~Arm64TaskData() {} unsigned allocReg; // The register to take the allocated space. POLYUNSIGNED allocWords; // The words to allocate. AssemblyArgs assemblyInterface; uint32_t saveRegisterMask; // Registers that need to be updated by a GC. virtual void GarbageCollect(ScanAddress *process); void ScanStackAddress(ScanAddress *process, stackItem& val, StackSpace *stack); virtual void EnterPolyCode(); // Start running ML virtual void SetException(poly_exn *exc) { assemblyInterface.exceptionPacket = (PolyWord)exc; } virtual void InterruptCode(); // AddTimeProfileCount is used in time profiling. virtual bool AddTimeProfileCount(SIGNALCONTEXT *context); virtual void InitStackFrame(TaskData *newTask, Handle proc); // Atomic exchange-and-add. Used in the process module to release a mutex and in the // interpreter. It needs to use the same instruction that compiled code uses. virtual POLYSIGNED AtomicExchAdd(PolyObject* mutexp, POLYSIGNED incr); // Set a mutex to zero. virtual void AtomicReset(PolyObject* mutexp); // Return the minimum space occupied by the stack. Used when setting a limit. // N.B. This is PolyWords not native words. virtual uintptr_t currentStackSpace(void) const { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE; } virtual void addProfileCount(POLYUNSIGNED words) { addSynchronousCount(interpreterPc, words); } virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length); //void HeapOverflowTrap(); void SetMemRegisters(); void SaveMemRegisters(); void HandleTrap(); // ByteCode overrides. The interpreter and native code states need to be in sync. // The interpreter is only used during the initial bootstrap. virtual void ClearExceptionPacket() { assemblyInterface.exceptionPacket = TAGGED(0); } virtual PolyWord GetExceptionPacket() { return assemblyInterface.exceptionPacket; } virtual stackItem* GetHandlerRegister() { return assemblyInterface.handlerRegister; } virtual void SetHandlerRegister(stackItem* hr) { assemblyInterface.handlerRegister = hr; } void Interpret(); void EndBootStrap() { mixedCode = true; } PLock interruptLock; virtual void HandleStackOverflow(uintptr_t space); }; class Arm64Dependent : public MachineDependent { public: Arm64Dependent() : mustInterpret(false) {} // Create a task data object. virtual TaskData* CreateTaskData(void) { return new Arm64TaskData(); } virtual Architectures MachineArchitecture(void) { return MA_Arm64; } virtual void SetBootArchitecture(char arch, unsigned wordLength); + // The ARM has separate instruction and data caches. + virtual void FlushInstructionCache(void* p, POLYUNSIGNED bytes); + // During the first bootstrap phase this is interpreted. bool mustInterpret; }; static Arm64Dependent arm64Dependent; MachineDependent* machineDependent = &arm64Dependent; // Values for the returnReason byte enum RETURN_REASON { RETURN_HEAP_OVERFLOW = 1, RETURN_STACK_OVERFLOW = 2, RETURN_STACK_OVERFLOWEX = 3, RETURN_ENTER_INTERPRETER = 4 }; extern "C" { // These are declared in the assembly code segment. void Arm64AsmEnterCompiledCode(void*); int Arm64AsmCallExtraRETURN_ENTER_INTERPRETER(void); int Arm64AsmCallExtraRETURN_HEAP_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX(void); // This is declared here and called from the assembly code. // It avoids having a call to an external in the assembly code // which sometimes gives problems with position-indepent code. void Arm64TrapHandler(PolyWord threadId); }; Arm64TaskData::Arm64TaskData() : ByteCodeInterpreter(&assemblyInterface.stackPtr, &assemblyInterface.stackLimit), allocReg(0), allocWords(0), saveRegisterMask(0) { assemblyInterface.enterInterpreter = (byte*)Arm64AsmCallExtraRETURN_ENTER_INTERPRETER; assemblyInterface.heapOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_HEAP_OVERFLOW; assemblyInterface.stackOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOW; assemblyInterface.stackOverFlowCallEx = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX; assemblyInterface.trapHandlerEntry = (byte*)Arm64TrapHandler; interpreterPc = 0; mixedCode = !arm64Dependent.mustInterpret; } void Arm64Dependent::SetBootArchitecture(char arch, unsigned wordLength) { if (arch == 'I') mustInterpret = true; else if (arch != 'A') Crash("Boot file has unexpected architecture code: %c", arch); } +// The ARM has separate instruction and data caches so we must flush +// the cache when creating or modifying code. +void Arm64Dependent::FlushInstructionCache(void* p, POLYUNSIGNED bytes) +{ +#ifdef _WIN32 + ::FlushInstructionCache(GetCurrentProcess(), p, bytes); +#elif defined (__GNUC__) + __builtin___clear_cache(p, (char*)p + bytes); +#elif (defined (__clang__) && defined (__APPLE__)) + sys_icache_invalidate(p, bytes); +#else +#error "No code to flush the instruction cache." +#endif +} + void Arm64TaskData::GarbageCollect(ScanAddress *process) { TaskData::GarbageCollect(process); ByteCodeInterpreter::GarbageCollect(process); if (assemblyInterface.exceptionPacket.w().IsDataPtr()) { PolyObject* obj = assemblyInterface.exceptionPacket.w().AsObjPtr(); obj = process->ScanObjectAddress(obj); assemblyInterface.exceptionPacket = (PolyWord)obj; } if (stack != 0) { stackItem*stackPtr = assemblyInterface.stackPtr; // Now the values on the stack. for (stackItem* q = stackPtr; q < (stackItem*)stack->top; q++) ScanStackAddress(process, *q, stack); } } // Process a value within the stack. void Arm64TaskData::ScanStackAddress(ScanAddress *process, stackItem& stackItem, StackSpace *stack) { // We may have return addresses on the stack which could look like // tagged values. Check whether the value is in the code area before // checking whether it is untagged. #ifdef POLYML32IN64 // In 32-in-64 return addresses always have the top 32 bits non-zero. if (stackItem.argValue < ((uintptr_t)1 << 32)) { // It's either a tagged integer or an object pointer. if (stackItem.w().IsDataPtr()) { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } } else { // Could be a code address or a stack address. MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space == 0 || space->spaceType != ST_CODE) return; PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); ASSERT(obj != 0); // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } #else // The -1 here is because we may have a zero-sized cell in the last // word of a space. MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space == 0) return; // In particular we may have one of the assembly code addresses. if (space->spaceType == ST_CODE) { PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); // If it is actually an integer it might be outside a valid code object. if (obj == 0) { ASSERT(stackItem.w().IsTagged()); // It must be an integer } else // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr()) // Local values must be word addresses. { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } #endif } // Copy a stack void Arm64TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length) { #ifdef POLYML32IN64 old_length = old_length / 2; new_length = new_length / 2; #endif /* Moves a stack, updating all references within the stack */ stackItem*old_base = (stackItem*)old_stack; stackItem*new_base = (stackItem*)new_stack; stackItem*old_top = old_base + old_length; /* Calculate the offset of the new stack from the old. If the frame is being extended objects in the new frame will be further up the stack than in the old one. */ uintptr_t offset = new_base - old_base + new_length - old_length; stackItem *oldSp = assemblyInterface.stackPtr; assemblyInterface.stackPtr = oldSp + offset; assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset; /* Skip the unused part of the stack. */ uintptr_t i = oldSp - old_base; ASSERT(i <= old_length); i = old_length - i; stackItem *old = oldSp; stackItem *newp = assemblyInterface.stackPtr; while (i--) { stackItem old_word = *old++; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem* addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *newp++ = old_word; } ASSERT(old == ((stackItem*)old_stack) + old_length); ASSERT(newp == ((stackItem*)new_stack) + new_length); } void Arm64TaskData::EnterPolyCode() /* Called from "main" to enter the code. */ { assemblyInterface.stackLimit = (stackItem*)((PolyWord*)this->stack->stack() + OVERFLOW_STACK_SIZE); if (arm64Dependent.mustInterpret) { PolyWord closure = assemblyInterface.registers[8]; *(--assemblyInterface.stackPtr) = closure; /* Closure address */ interpreterPc = *(POLYCODEPTR*)closure.AsObjPtr(); Interpret(); ASSERT(0); // Should never return } SetMemRegisters(); // Jump into the ML code. This code sets up the registers and puts the // address of the assemblyInterface into X26 Arm64AsmEnterCompiledCode(&assemblyInterface); // This should never return ASSERT(0); } void Arm64TaskData::Interpret() { while (true) { switch (RunInterpreter(this)) { case ReturnCall: // After the call there will be an enter-int instruction so that when this // returns we will re-enter the interpreter. The number of arguments for // this call is after that. while ((uintptr_t)interpreterPc & 3) { ASSERT(interpreterPc[0] == INSTR_no_op); interpreterPc++; } ASSERT(interpreterPc[0] == 0xe9); numTailArguments = interpreterPc[12]; case ReturnTailCall: { ClearExceptionPacket(); // Pop the closure. PolyWord closureWord = *assemblyInterface.stackPtr++; PolyObject* closure = closureWord.AsObjPtr(); arm64CodePointer cp = *(arm64CodePointer*)closure; if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) { // If the code we're going to is interpreted push back the closure and // continue. interpreterPc = (POLYCODEPTR)cp; assemblyInterface.stackPtr--; HandleStackOverflow(128); // Make sure we have space since we're bypassing the check. continue; } assemblyInterface.registers[8] = closureWord; // Put closure in the closure reg. // Pop the return address. We may need to align this to a word boundary. POLYCODEPTR originalReturn = (POLYCODEPTR)((assemblyInterface.stackPtr++)->codeAddr); while ((uintptr_t)originalReturn & 3) { ASSERT(originalReturn[0] == INSTR_no_op); originalReturn++; } // Because of the way the build process works we only ever call functions with a single argument. ASSERT(numTailArguments == 1); assemblyInterface.registers[0] = *(assemblyInterface.stackPtr++); assemblyInterface.linkRegister = (arm64CodePointer)originalReturn; // Set the return address to caller assemblyInterface.entryPoint = *(arm64CodePointer*)closure; // Entry point to callee interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } case ReturnReturn: { ClearExceptionPacket(); // Returning from an interpreted function. Normally we'll be returning to // interpreted code. if ((uintptr_t)interpreterPc & 3) // ARM64 addresses will always be 4-byte aligned. continue; arm64CodePointer cp = (arm64CodePointer)interpreterPc; if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) continue; // Pop the value we're returning. Set the entry point to the code we're returning to. assemblyInterface.registers[0] = *assemblyInterface.stackPtr++; assemblyInterface.entryPoint = cp; interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } } } } // Called from the assembly code as a result of a trap i.e. a request for // a GC or to extend the stack. void Arm64TrapHandler(PolyWord threadId) { Arm64TaskData* taskData = (Arm64TaskData*)TaskData::FindTaskForId(threadId); taskData->HandleTrap(); } void Arm64TaskData::HandleTrap() { SaveMemRegisters(); // Update globals from the memory registers. switch (this->assemblyInterface.returnReason) { case RETURN_HEAP_OVERFLOW: // The heap has overflowed. // The register mask is the word after the return. saveRegisterMask = *assemblyInterface.entryPoint++; ASSERT(0); // TODO //HeapOverflowTrap(); // Computes a value for allocWords only break; case RETURN_STACK_OVERFLOW: case RETURN_STACK_OVERFLOWEX: { // The register mask is the word after the return. saveRegisterMask = *assemblyInterface.entryPoint++; ASSERT(0); // TODO uintptr_t min_size = 0; // Size in PolyWords #if (0) if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW) { min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } else { // Stack limit overflow. If the required stack space is larger than // the fixed overflow size the code will calculate the limit in %EDI. stackItem* stackP = regDI().stackAddr; min_size = (this->stack->top - (PolyWord*)stackP) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } #endif HandleStackOverflow(min_size); break; } case RETURN_ENTER_INTERPRETER: { interpreterPc = (POLYCODEPTR)assemblyInterface.linkRegister; byte reasonCode = *interpreterPc++; // Sort out arguments. assemblyInterface.exceptionPacket = TAGGED(0); if (reasonCode == 0xff) { // Exception handler. ASSERT(0); // Not used assemblyInterface.exceptionPacket = assemblyInterface.registers[0]; // Get the exception packet // We're already in the exception handler but we still have to // adjust the stack pointer and pop the current exception handler. assemblyInterface.stackPtr = assemblyInterface.handlerRegister; assemblyInterface.stackPtr++; assemblyInterface.handlerRegister = (assemblyInterface.stackPtr++)[0].stackAddr; } else if (reasonCode >= 128) { // Start of function. unsigned numArgs = reasonCode - 128; // We need the stack to contain: // The closure, the return address, the arguments. // Push the register args. ASSERT(numArgs == 1); // We only ever call functions with one argument. *(--assemblyInterface.stackPtr) = assemblyInterface.registers[0]; *(--assemblyInterface.stackPtr) = assemblyInterface.registers[9]; // Return address - value of X30 before enter-int *(--assemblyInterface.stackPtr) = assemblyInterface.registers[8]; // Closure } else { // Return from call. Push X0 *(--assemblyInterface.stackPtr) = assemblyInterface.registers[0]; } Interpret(); break; } default: Crash("Unknown return reason code %u", this->assemblyInterface.returnReason); } SetMemRegisters(); } void Arm64TaskData::HandleStackOverflow(uintptr_t space) { uintptr_t min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE + space; try { // The stack check has failed. This may either be because we really have // overflowed the stack or because the stack limit value has been adjusted // to result in a call here. CheckAndGrowStack(this, min_size); } catch (IOException&) { // We may get an exception while handling this if we run out of store } { PLocker l(&interruptLock); // Set the stack limit. This clears any interrupt and also sets the // correct value if we've grown the stack. assemblyInterface.stackLimit = (stackItem*)stack->bottom + OVERFLOW_STACK_SIZE; } try { processes->ProcessAsynchRequests(this); // Release and re-acquire use of the ML memory to allow another thread // to GC. processes->ThreadReleaseMLMemory(this); processes->ThreadUseMLMemory(this); } catch (IOException&) { } } void Arm64TaskData::InitStackFrame(TaskData* parentTask, Handle proc) /* Initialise stack frame. */ { StackSpace* space = this->stack; StackObject* stack = (StackObject*)space->stack(); PolyObject* closure = DEREFWORDHANDLE(proc); uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem); assemblyInterface.stackPtr = (stackItem*)stack + stack_size; assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE; assemblyInterface.handlerRegister = assemblyInterface.stackPtr; // Store the argument and the closure. assemblyInterface.registers[8] = proc->Word(); // Closure assemblyInterface.registers[0] = TAGGED(0); // Argument assemblyInterface.linkRegister = 0; // We never return // Have to set the register mask in case we get a GC before the thread starts. saveRegisterMask = (1 << 8) | 1; // X8 and X0 } // This is called from a different thread so we have to be careful. void Arm64TaskData::InterruptCode() { PLocker l(&interruptLock); // Set the stack limit pointer to the top of the stack to cause // a trap when we next check for stack overflow. // We use a lock here to ensure that we always use the current value of the // stack. The thread we're interrupting could be growing the stack at this point. if (stack != 0) assemblyInterface.stackLimit = (stackItem*)(stack->top - 1); } // Called before entering ML code from the run-time system void Arm64TaskData::SetMemRegisters() { // Copy the current store limits into variables before we go into the assembly code. // If we haven't yet set the allocation area or we don't have enough we need // to create one (or a new one). if (allocPointer <= allocLimit + allocWords) { if (allocPointer < allocLimit) Crash("Bad length in heap overflow trap"); // Find some space to allocate in. Updates taskData->allocPointer and // returns a pointer to the newly allocated space (if allocWords != 0) PolyWord* space = processes->FindAllocationSpace(this, allocWords, true); if (space == 0) { // We will now raise an exception instead of returning. // Set allocWords to zero so we don't set the allocation register // since that could be holding the exception packet. allocWords = 0; } // Undo the allocation just now. allocPointer += allocWords; } if (this->allocWords != 0) { // If we have had a heap trap we actually do the allocation here. // We will have already garbage collected and recovered sufficient space. // This also happens if we have just trapped because of store profiling. allocPointer -= allocWords; // Now allocate // Set the allocation register to this area. N.B. This is an absolute address. #if (0) if (this->allocReg < 15) get_reg(this->allocReg)[0].codeAddr = (POLYCODEPTR)(this->allocPointer + 1); /* remember: it's off-by-one */ #endif allocWords = 0; } // If we have run out of store, either just above or while allocating in the RTS, // allocPointer and allocLimit will have been set to zero as part of the GC. We will // now be raising an exception which may free some store but we need to come back here // before we allocate anything. The compiled code uses unsigned arithmetic to check for // heap overflow but only after subtracting the space required. We need to make sure // that the values are still non-negative after substracting any object size. if (allocPointer == 0) allocPointer += MAX_OBJECT_SIZE; if (allocLimit == 0) allocLimit += MAX_OBJECT_SIZE; assemblyInterface.localMbottom = allocLimit + 1; assemblyInterface.localMpointer = allocPointer + 1; // If we are profiling store allocation we set mem_hl so that a trap // will be generated. if (profileMode == kProfileStoreAllocation) assemblyInterface.localMbottom = assemblyInterface.localMpointer; assemblyInterface.threadId = threadObject; } // This is called whenever we have returned from ML to C. void Arm64TaskData::SaveMemRegisters() { // The normal return is to the link register address. assemblyInterface.entryPoint = assemblyInterface.linkRegister; if (interpreterPc == 0) // Not if we're already in the interpreter allocPointer = assemblyInterface.localMpointer - 1; allocWords = 0; assemblyInterface.exceptionPacket = TAGGED(0); saveRegisterMask = 0; } // As far as possible we want locking and unlocking an ML mutex to be fast so // we try to implement the code in the assembly code using appropriate // interlocked instructions. That does mean that if we need to lock and // unlock an ML mutex in this code we have to use the same, machine-dependent, // code to do it. These are defaults that are used where there is no // machine-specific code. #if defined(_MSC_VER) // This saves having to define it in the MASM assembly code. static POLYSIGNED Arm64AsmAtomicExchangeAndAdd(PolyObject* mutexp, POLYSIGNED addend) { # if (SIZEOF_POLYWORD == 8) return InterlockedExchangeAdd64((LONG64*)mutexp, addend); # else return InterlockedExchangeAdd((LONG*)mutexp, addend); # endif } #else extern "C" { // This is only defined in the GAS assembly code POLYSIGNED Arm64AsmAtomicExchangeAndAdd(PolyObject*, POLYSIGNED); } #endif // Do the exchange-and-add POLYSIGNED Arm64TaskData::AtomicExchAdd(PolyObject* mutexp, POLYSIGNED incr) { return Arm64AsmAtomicExchangeAndAdd(mutexp, incr - TAGGED(0).AsSigned()/* Remove the tag */); } // Release a mutex. Because the atomic increment and decrement // use the hardware atomic load-and-add we can simply set this to zero. void Arm64TaskData::AtomicReset(PolyObject* mutexp) { mutexp->Set(0, TAGGED(0)); // Set this to released. } bool Arm64TaskData::AddTimeProfileCount(SIGNALCONTEXT *context) { if (interpreterPc != 0) { // See if the PC we've got is an ML code address. MemSpace *space = gMem.SpaceForAddress(interpreterPc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(interpreterPc); return true; } } return false; } extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolyInterpretedEnterIntMode(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function); } // Do we require EnterInt instructions and if so for which architecture? // 0 = > None; 1 => X86_32, 2 => X86_64. 3 => X86_32_in_64. 4 => ARM_64. POLYUNSIGNED PolyInterpretedEnterIntMode() { return TAGGED(4).AsUnsigned(); } // End the first stage of bootstrap mode and run a new function. // The first stage is always interpreted. Once that is complete every function will have // at least an executable "enter-interpreter" stub so it can be called as machine code. POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function) { TaskData* taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle pushedFunction = taskData->saveVec.push(function); arm64Dependent.mustInterpret = false; taskData->InitStackFrame(taskData, pushedFunction); taskData->EnterPolyCode(); // Should never return. ASSERT(0); return TAGGED(0).AsUnsigned(); } // No machine-specific calls in the interpreter. struct _entrypts machineSpecificEPT[] = { { "PolyInterpretedEnterIntMode", (polyRTSFunction)&PolyInterpretedEnterIntMode }, { "PolyEndBootstrapMode", (polyRTSFunction)&PolyEndBootstrapMode }, { NULL, NULL} // End of list. }; diff --git a/libpolyml/poly_specific.cpp b/libpolyml/poly_specific.cpp index 9a797525..44eccb54 100644 --- a/libpolyml/poly_specific.cpp +++ b/libpolyml/poly_specific.cpp @@ -1,466 +1,468 @@ /* Title: poly_specific.cpp - Poly/ML specific RTS calls. Copyright (c) 2006, 2015-17, 2019, 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This module is used for various run-time calls that are either in the PolyML structure or otherwise specific to Poly/ML. */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #ifdef HAVE_STRING_H #include #endif #include "globals.h" #include "poly_specific.h" #include "arb.h" #include "mpoly.h" #include "sys.h" #include "machine_dep.h" #include "polystring.h" #include "run_time.h" #include "version.h" #include "save_vec.h" #include "version.h" #include "memmgr.h" #include "processes.h" #include "gc.h" #include "rtsentry.h" extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolySpecificGeneral(FirstArgument threadId, PolyWord code, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetABI(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableCode(FirstArgument threadId, PolyWord byteSeg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableClosure(FirstArgument threadId, PolyWord closure); POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToCode(FirstArgument threadId, PolyWord byteVec); POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToClosure(FirstArgument threadId, PolyWord byteVec, PolyWord closure); POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeConstant(PolyWord closure, PolyWord offset, PolyWord c, PolyWord flags); POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeByte(PolyWord closure, PolyWord offset, PolyWord c); POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeByte(PolyWord closure, PolyWord offset); POLYEXTERNALSYMBOL POLYUNSIGNED PolySortArrayOfAddresses(PolyWord array); POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest4(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4); POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest5(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4, PolyWord arg5); } #define SAVE(x) taskData->saveVec.push(x) #ifndef GIT_VERSION #define GIT_VERSION "" #endif Handle poly_dispatch_c(TaskData *taskData, Handle args, Handle code) { unsigned c = get_C_unsigned(taskData, DEREFWORD(code)); switch (c) { case 9: // Return the GIT version if appropriate { return SAVE(C_string_to_Poly(taskData, GIT_VERSION)); } case 10: // Return the RTS version string. { const char *version; switch (machineDependent->MachineArchitecture()) { case MA_Interpreted: version = "Portable-" TextVersion; break; case MA_I386: version = "I386-" TextVersion; break; case MA_X86_64: version = "X86_64-" TextVersion; break; case MA_X86_64_32: version = "X86_64_32-" TextVersion; break; case MA_Arm64: version = "Arm64-" TextVersion; break; default: version = "Unknown-" TextVersion; break; } return SAVE(C_string_to_Poly(taskData, version)); } case 12: // Return the architecture // Used in InitialPolyML.ML for PolyML.architecture { const char *arch; switch (machineDependent->MachineArchitecture()) { case MA_Interpreted: arch = "Interpreted"; break; case MA_I386: arch = "I386"; break; case MA_X86_64: arch = "X86_64"; break; case MA_X86_64_32: arch = "X86_64_32"; break; case MA_Arm64: arch = "Arm64"; break; default: arch = "Unknown"; break; } return SAVE(C_string_to_Poly(taskData, arch)); } case 19: // Return the RTS argument help string. return SAVE(C_string_to_Poly(taskData, RTSArgHelp())); default: { char msg[100]; sprintf(msg, "Unknown poly-specific function: %d", c); raise_exception_string(taskData, EXC_Fail, msg); return 0; } } } // General interface to poly-specific. Ideally the various cases will be made into // separate functions. POLYUNSIGNED PolySpecificGeneral(FirstArgument threadId, PolyWord code, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedCode = taskData->saveVec.push(code); Handle pushedArg = taskData->saveVec.push(arg); Handle result = 0; try { result = poly_dispatch_c(taskData, pushedArg, pushedCode); } catch (...) { } // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } // Return the ABI - i.e. the calling conventions used when calling external functions. POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetABI() { // Return the ABI. For 64-bit we need to know if this is Windows. #if (SIZEOF_VOIDP == 8) #if (defined(_WIN32) || defined(__CYGWIN__)) return TAGGED(2).AsUnsigned(); // 64-bit Windows #else return TAGGED(1).AsUnsigned(); // 64-bit Unix #endif #else return TAGGED(0).AsUnsigned(); // 32-bit Unix and Windows #endif } // Code generation - Code is initially allocated in a byte segment. When all the // values have been set apart from any addresses the byte segment is copied into // a mutable code segment. // PolyCopyByteVecToCode is now replaced by PolyCopyByteVecToClosure POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToCode(FirstArgument threadId, PolyWord byteVec) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(byteVec); PolyObject *result = 0; try { if (!pushedArg->WordP()->IsByteObject()) raise_fail(taskData, "Not byte data area"); do { PolyObject *initCell = pushedArg->WordP(); POLYUNSIGNED requiredSize = initCell->Length(); result = gMem.AllocCodeSpace(requiredSize); if (result == 0) { // Could not allocate - must GC. if (!QuickGC(taskData, pushedArg->WordP()->Length())) raise_fail(taskData, "Insufficient memory"); } else memcpy(result, initCell, requiredSize * sizeof(PolyWord)); } while (result == 0); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return ((PolyWord)result).AsUnsigned(); } // Copy the byte vector into code space. POLYUNSIGNED PolyCopyByteVecToClosure(FirstArgument threadId, PolyWord byteVec, PolyWord closure) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedByteVec = taskData->saveVec.push(byteVec); Handle pushedClosure = taskData->saveVec.push(closure); PolyObject *result = 0; try { if (!pushedByteVec->WordP()->IsByteObject()) raise_fail(taskData, "Not byte data area"); if (pushedClosure->WordP()->Length() != sizeof(PolyObject*)/sizeof(PolyWord)) raise_fail(taskData, "Invalid closure size"); if (!pushedClosure->WordP()->IsMutable()) raise_fail(taskData, "Closure is not mutable"); do { PolyObject *initCell = pushedByteVec->WordP(); POLYUNSIGNED requiredSize = initCell->Length(); result = gMem.AllocCodeSpace(requiredSize); if (result == 0) { // Could not allocate - must GC. if (!QuickGC(taskData, pushedByteVec->WordP()->Length())) raise_fail(taskData, "Insufficient memory"); } else memcpy(gMem.SpaceForObjectAddress(result)->writeAble((byte*)result), initCell, requiredSize * sizeof(PolyWord)); } while (result == 0); } catch (...) {} // If an ML exception is raised // Store the code address in the closure. *((PolyObject**)pushedClosure->WordP()) = result; // Lock the closure. pushedClosure->WordP()->SetLengthWord(pushedClosure->WordP()->LengthWord() & ~_OBJ_MUTABLE_BIT); taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Code generation - Lock a mutable code segment and return the original address. // Currently this does not allocate so other than the exception it could // be a fast call. POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableCode(FirstArgument threadId, PolyWord byteSeg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(byteSeg); Handle result = 0; try { PolyObject *codeObj = pushedArg->WordP(); if (!codeObj->IsCodeObject() || !codeObj->IsMutable()) raise_fail(taskData, "Not mutable code area"); POLYUNSIGNED segLength = codeObj->Length(); codeObj->SetLengthWord(segLength, F_CODE_OBJ); // This is really a legacy of the PPC code-generator. machineDependent->FlushInstructionCache(codeObj, segLength * sizeof(PolyWord)); // In the future it may be necessary to return a different address here. // N.B. The code area should only have execute permission in the native // code version, not the interpreted version. result = pushedArg; // Return the original address. } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } // Replacement for above POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableClosure(FirstArgument threadId, PolyWord closure) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); PolyObject *codeObj = *(PolyObject**)(closure.AsObjPtr()); try { if (!codeObj->IsCodeObject() || !codeObj->IsMutable()) raise_fail(taskData, "Not mutable code area"); POLYUNSIGNED segLength = codeObj->Length(); gMem.SpaceForObjectAddress(codeObj)->writeAble(codeObj)->SetLengthWord(segLength, F_CODE_OBJ); + // Flush cache on ARM at least. + machineDependent->FlushInstructionCache(codeObj, segLength * sizeof(PolyWord)); // In the future it may be necessary to return a different address here. // N.B. The code area should only have execute permission in the native // code version, not the interpreted version. } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Set code constant. This can be a fast call. // This is in the RTS both because we pass a closure in here and cannot have // code addresses in 32-in-64 and also because we need to ensure there is no // possibility of a GC while the code is an inconsistent state. POLYUNSIGNED PolySetCodeConstant(PolyWord closure, PolyWord offset, PolyWord cWord, PolyWord flags) { byte *pointer; // Previously we passed the code address in here and we need to // retain that for legacy code. This is now the closure. if (closure.AsObjPtr()->IsCodeObject()) pointer = closure.AsCodePtr(); else pointer = *(POLYCODEPTR*)(closure.AsObjPtr()); // pointer is the start of the code segment. // c will usually be an address. // offset is a byte offset pointer += offset.UnTaggedUnsigned(); byte* writeable = gMem.SpaceForAddress(pointer)->writeAble(pointer); switch (UNTAGGED(flags)) { case 0: // Absolute constant - size PolyWord { POLYUNSIGNED c = cWord.AsUnsigned(); #ifdef WORDS_BIGENDIAN // This is used to store constants in the constant area // on the interpreted version. for (unsigned i = sizeof(PolyWord); i > 0; i--) { writeable[i-1] = (byte)(c & 255); c >>= 8; } #else for (unsigned i = 0; i < sizeof(PolyWord); i++) { writeable[i] = (byte)(c & 255); c >>= 8; } #endif break; } case 1: // Relative constant - X86 - size 4 bytes { // The offset is relative to the END of the constant. byte *target; // In 32-in-64 we pass in the closure address here // rather than the code address. if (cWord.AsObjPtr()->IsCodeObject()) target = cWord.AsCodePtr(); else target = *(POLYCODEPTR*)(cWord.AsObjPtr()); size_t c = target - pointer - 4; for (unsigned i = 0; i < sizeof(PolyWord); i++) { writeable[i] = (byte)(c & 255); c >>= 8; } break; } case 2: // Absolute constant - size uintptr_t // This is the same as case 0 except in 32-in-64 when // it is an absolute address rather than an object pointer. { uintptr_t c = (uintptr_t)(cWord.AsObjPtr()); for (unsigned i = 0; i < sizeof(uintptr_t); i++) { pointer[i] = (byte)(c & 255); c >>= 8; } break; } } return TAGGED(0).AsUnsigned(); } // Set a code byte. This needs to be in the RTS because it uses the closure POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeByte(PolyWord closure, PolyWord offset, PolyWord cWord) { byte *pointer = *(POLYCODEPTR*)(closure.AsObjPtr()); byte* writable = gMem.SpaceForAddress(pointer)->writeAble(pointer); writable[UNTAGGED_UNSIGNED(offset)] = (byte)UNTAGGED_UNSIGNED(cWord); return TAGGED(0).AsUnsigned(); } POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeByte(PolyWord closure, PolyWord offset) { byte *pointer = *(POLYCODEPTR*)(closure.AsObjPtr()); return TAGGED(pointer[UNTAGGED_UNSIGNED(offset)]).AsUnsigned(); } static int compare(const void *a, const void *b) { PolyWord *av = (PolyWord*)a; PolyWord *bv = (PolyWord*)b; if ((*av).IsTagged() || (*bv).IsTagged()) return 0; // Shouldn't happen PolyObject *ao = (*av).AsObjPtr(), *bo = (*bv).AsObjPtr(); if (ao->Length() < 1 || bo->Length() < 1) return 0; // Shouldn't happen if (ao->Get(0).AsUnsigned() < bo->Get(0).AsUnsigned()) return -1; if (ao->Get(0).AsUnsigned() > bo->Get(0).AsUnsigned()) return 1; return 0; } // Sort an array of addresses. This is used in the code-generator to search for // duplicates in the address area. The argument is an array of pairs. The first // item of each pair is an address, the second is an identifier of some kind. POLYEXTERNALSYMBOL POLYUNSIGNED PolySortArrayOfAddresses(PolyWord array) { if (!array.IsDataPtr()) return(TAGGED(0)).AsUnsigned(); PolyObject *arrayP = array.AsObjPtr(); POLYUNSIGNED numberOfItems = arrayP->Length(); if (!arrayP->IsMutable()) return(TAGGED(0)).AsUnsigned(); qsort(arrayP, numberOfItems, sizeof(PolyWord), compare); return (TAGGED(1)).AsUnsigned(); } POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest4(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4) { switch (arg1.UnTaggedUnsigned()) { case 1: return arg1.AsUnsigned(); case 2: return arg2.AsUnsigned(); case 3: return arg3.AsUnsigned(); case 4: return arg4.AsUnsigned(); default: return TAGGED(0).AsUnsigned(); } } POLYEXTERNALSYMBOL POLYUNSIGNED PolyTest5(FirstArgument threadId, PolyWord arg1, PolyWord arg2, PolyWord arg3, PolyWord arg4, PolyWord arg5) { switch (arg1.UnTaggedUnsigned()) { case 1: return arg1.AsUnsigned(); case 2: return arg2.AsUnsigned(); case 3: return arg3.AsUnsigned(); case 4: return arg4.AsUnsigned(); case 5: return arg5.AsUnsigned(); default: return TAGGED(0).AsUnsigned(); } } struct _entrypts polySpecificEPT[] = { { "PolySpecificGeneral", (polyRTSFunction)&PolySpecificGeneral}, { "PolyGetABI", (polyRTSFunction)&PolyGetABI }, { "PolyCopyByteVecToCode", (polyRTSFunction)&PolyCopyByteVecToCode }, { "PolyCopyByteVecToClosure", (polyRTSFunction)&PolyCopyByteVecToClosure }, { "PolyLockMutableCode", (polyRTSFunction)&PolyLockMutableCode }, { "PolyLockMutableClosure", (polyRTSFunction)&PolyLockMutableClosure }, { "PolySetCodeConstant", (polyRTSFunction)&PolySetCodeConstant }, { "PolySetCodeByte", (polyRTSFunction)&PolySetCodeByte }, { "PolyGetCodeByte", (polyRTSFunction)&PolyGetCodeByte }, { "PolySortArrayOfAddresses", (polyRTSFunction)&PolySortArrayOfAddresses }, { "PolyTest4", (polyRTSFunction)&PolyTest4 }, { "PolyTest5", (polyRTSFunction)&PolyTest5 }, { NULL, NULL} // End of list. }; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml new file mode 100644 index 00000000..25813ac1 --- /dev/null +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml @@ -0,0 +1,204 @@ +(* + Copyright (c) 2021 David C. J. Matthews + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + Licence version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public Licence for more details. + + You should have received a copy of the GNU Lesser General Public + Licence along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*) + +functor ARM64ASSEMBLY ( + structure Debug: DEBUG + and Pretty: PRETTYSIG + and CodeArray: CODEARRAYSIG +) : Arm64Assembly = + +struct + open CodeArray Address + + exception InternalError = Misc.InternalError + + infix 5 << <<+ <<- >> >>+ >>- ~>> ~>>+ ~>>- (* Shift operators *) + infix 3 andb orb xorb andbL orbL xorbL andb8 orb8 xorb8 + + val op << = Word.<< and op >> = Word.>> and op ~>> = Word.~>> + + val wordToWord8 = Word8.fromLargeWord o Word.toLargeWord + and word8ToWord = Word.fromLargeWord o Word8.toLargeWord + + datatype code = + Code of + { + constVec: machineWord list ref, (* Constant area constant values. *) + functionName: string, (* Name of the function. *) + printAssemblyCode:bool, (* Whether to print the code when we finish. *) + printStream: string->unit (* The stream to use *) + } + + fun codeCreate (name, parameters) = + let + val printStream = Pretty.getSimplePrinter(parameters, []) + in + Code + { + constVec = ref [], + functionName = name, + printAssemblyCode = Debug.getParameter Debug.assemblyCodeTag parameters, + printStream = printStream + } + end + + val retCode = 0wxD65F03C0 + and nopCode = 0wxD503201F + and mov1ToX0 = 0wxD2800020 + + fun codeSize _ = 1 (* Number of 32-bit words *) + + fun foldCode startIc foldFn ops = + let + fun doFold(oper :: operList, ic) = + doFold(operList, + (* Get the size BEFORE any possible change. *) + ic + Word.fromInt(codeSize oper) * 0w4 before foldFn(oper, ic)) + | doFold(_, ic) = ic + in + doFold(ops, startIc) + end + + + fun genCode(ops, Code {constVec, ...}) = + let + + (* First pass - set the labels. *) + fun setLabelsAndSizes ops = Word.fromInt(List.length ops) + val codeSize = setLabelsAndSizes ops (* Number of 32-bit instructions *) + val wordsOfCode = (codeSize + 0w1) div 0w2 (* Round up to 64-bits *) + val paddingWord = if Word.andb(codeSize, 0w1) = 0w1 then [nopCode] else [] + + val segSize = wordsOfCode + Word.fromInt(List.length(! constVec)) + 0w4 (* 4 extra words *) + val codeVec = byteVecMake segSize + + + fun genCodeWords(code, byteNo) = + ( + (* Little-endian order *) + byteVecSet(codeVec, byteNo, wordToWord8 code); + byteVecSet(codeVec, byteNo+0w1, wordToWord8(code >> 0w8)); + byteVecSet(codeVec, byteNo+0w2, wordToWord8(code >> 0w16)); + byteVecSet(codeVec, byteNo+0w3, wordToWord8(code >> 0w24)) + ) + + in + foldCode 0w0 genCodeWords (ops @ paddingWord); + (codeVec (* Return the completed code. *), wordsOfCode (* And the size in 64-bit words. *)) + end + + (* Store a 64-bit value in the code *) + fun set64(value, wordNo, seg) = + let + val addrs = wordNo * 0w8 + fun putBytes(value, a, seg, i) = + if i = 0w8 then () + else + ( + byteVecSet(seg, a+i, Word8.fromInt(value mod 256)); + putBytes(value div 256, a, seg, i+0w1) + ) + in + putBytes(value, addrs, seg, 0w0) + end + + + + fun printCode (codeVec, functionName, wordsOfCode, printStream) = + (printStream "Code for "; printStream functionName) + + (* Adds the constants onto the code, and copies the code into a new segment *) + fun copyCode {code as + Code{ printAssemblyCode, printStream, + functionName, constVec, ...}, maxStack, numberOfArguments, resultClosure} = + let + local + val codeList = [mov1ToX0, retCode] + (* Add a stack check. This is only needed if the + function needs more than 128 words since the call and tail functions + check for this much. *) + in + val codeList = + if maxStack < 128 + then codeList + else raise InternalError "TODO" (* SimpleCode[opcode_stackSize16, Word8.fromInt maxStack, Word8.fromInt(maxStack div 256)] :: codeList *) + end + + val (byteVec, wordsOfCode) = genCode(codeList, code) + + (* +3 for profile count, function name and constants count *) + val numOfConst = List.length(! constVec) + val segSize = wordsOfCode + Word.fromInt numOfConst + 0w4 + val firstConstant = wordsOfCode + 0w3 (* Add 3 for no of consts, fn name and profile count. *) + + (* Put in the number of constants. This must go in before + we actually put in any constants. *) + local + val lastWord = segSize - 0w1 + in + val () = set64(numOfConst + 2, wordsOfCode, byteVec) + (* Set the last word of the code to the (negative) byte offset of the start of the code area + from the end of this word. *) + val () = set64((numOfConst + 3) * ~8, lastWord, byteVec) + end + + (* Now we've filled in all the size info we need to convert the segment + into a proper code segment before it's safe to put in any ML values. *) + val codeVec = byteVecToCodeVec(byteVec, resultClosure) + + local + val name : string = functionName + val nameWord : machineWord = toMachineWord name + in + val () = codeVecPutWord (codeVec, wordsOfCode+0w1, nameWord) + end + (* Profile ref. A byte ref used by the profiler in the RTS. *) + local + val v = RunCall.allocateByteMemory(0w1, Word.fromLargeWord(Word8.toLargeWord(Word8.orb(F_mutable, F_bytes)))) + fun clear 0w0 = () + | clear i = (assignByte(v, i-0w1, 0w0); clear (i-0w1)) + val () = clear(wordSize) + in + val () = codeVecPutWord (codeVec, wordsOfCode+0w2, toMachineWord v) + end + + (* and then copy the constants from the constant list. *) + local + fun setConstant(value, num) = + ( + codeVecPutWord (codeVec, firstConstant + num, value); + num+0w1 + ) + in + val _ = List.foldl setConstant 0w0 (!constVec) + end + in + if printAssemblyCode orelse true + then (* print out the code *) + (printCode (codeVec, functionName, wordsOfCode, printStream); printStream"\n") + else (); + codeVecLock(codeVec, resultClosure) + end (* copyCode *) + + + structure Sharing = + struct + type code = code + type closureRef = closureRef + end +end; + diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig similarity index 59% copy from mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML copy to mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig index e8f24f6a..cc791058 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig @@ -1,27 +1,36 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) -local - structure FallBackCG = ByteCode -in - structure Arm64Code = - Arm64GenCode( - structure FallBackCG = FallBackCG - structure BackendTree = BackendIntermediateCode - structure CodeArray = CODE_ARRAY - ) +signature Arm64Assembly = +sig + type code + type closureRef + + (* Create a code value for the function. *) + val codeCreate: string * Universal.universal list -> code + + + (* copyCode - create the vector of code and update the closure reference to + point to it. *) + val copyCode: {code: code, maxStack: int, numberOfArguments: int, resultClosure: closureRef} -> unit + + structure Sharing: + sig + type code = code + type closureRef = closureRef + end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml index 54de1389..69adbb58 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml @@ -1,52 +1,75 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64GenCode ( structure FallBackCG: GENCODESIG and BackendTree: BackendIntermediateCodeSig and CodeArray: CODEARRAYSIG + and Arm64Assembly: Arm64Assembly - sharing FallBackCG.Sharing = BackendTree.Sharing = CodeArray.Sharing + sharing FallBackCG.Sharing = BackendTree.Sharing = CodeArray.Sharing = Arm64Assembly.Sharing ) : GENCODESIG = struct - open BackendTree CodeArray + open BackendTree CodeArray Arm64Assembly Address exception Fallback - fun gencodeLambda(lambda, args, closureRef) = - ( - raise Fallback - ) - (* If we can't do it fall back to the interpreter. *) - handle Fallback => FallBackCG.gencodeLambda(lambda, args, closureRef) - + fun codegen (pt, cvec, resultClosure, numOfArgs, localCount, parameters) = + let + val maxStack = ref 0 + + fun genCode (BICConstnt(w, _)) = + if isShort w andalso toShort w = 0w0 then () else raise Fallback + | genCode _ = raise Fallback + + val () = genCode pt + + fun breakHere () = () + val () = breakHere() + in + copyCode{code = cvec, maxStack = !maxStack, resultClosure=resultClosure, numberOfArguments=numOfArgs} + end + fun gencodeLambda(lambda as { name, body, argTypes, localCount, ...}:bicLambdaForm, parameters, closure) = + (let + (* make the code buffer for the new function. *) + val newCode : code = codeCreate (name, parameters) + (* This function must have no non-local references. *) + + val _ = + case (argTypes, localCount) of + ([GeneralType], 0) => () + | _ => raise Fallback + in + codegen (body, newCode, closure, List.length argTypes, localCount, parameters) + end) handle Fallback => FallBackCG.gencodeLambda(lambda, parameters, closure) + structure Foreign: FOREIGNCALLSIG = struct open FallBackCG.Foreign end structure Sharing = struct type backendIC = backendIC and argumentType = argumentType and bicLoadForm = bicLoadForm and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML index e8f24f6a..0ce53888 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML @@ -1,27 +1,35 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) local structure FallBackCG = ByteCode + + structure Arm64Assembly = + ARM64ASSEMBLY ( + structure Debug = Debug + and Pretty = Pretty + and CodeArray = CODE_ARRAY + ) in structure Arm64Code = Arm64GenCode( structure FallBackCG = FallBackCG - structure BackendTree = BackendIntermediateCode - structure CodeArray = CODE_ARRAY + and BackendTree = BackendIntermediateCode + and CodeArray = CODE_ARRAY + and Arm64Assembly = Arm64Assembly ) end; diff --git a/polymlArm64.pyp b/polymlArm64.pyp index a1c77b18..bb679404 100644 --- a/polymlArm64.pyp +++ b/polymlArm64.pyp @@ -1,221 +1,223 @@ + +