diff --git a/libpolyml/arm64.cpp b/libpolyml/arm64.cpp index a89abbc0..7f02e0a7 100644 --- a/libpolyml/arm64.cpp +++ b/libpolyml/arm64.cpp @@ -1,870 +1,871 @@ /* Machine-dependent code for ARM64 Copyright David C.J. Matthews 2020-21. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // Currently this is just copied from the interpreted version. #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #ifdef HAVE_STRING_H #include #endif #include "globals.h" #include "machine_dep.h" #include "sys.h" #include "profiling.h" #include "arb.h" #include "processes.h" #include "run_time.h" #include "gc.h" #include "diagnostics.h" #include "polystring.h" #include "save_vec.h" #include "memmgr.h" #include "scanaddrs.h" #include "rtsentry.h" #include "bytecode.h" #include "int_opcodes.h" /* * ARM64 register use: * X0 First argument and return value * X1-X7 Second-eighth argument * X8 Indirect result (C), ML closure pointer on entry * X9-X15 Volatile scratch registers * X16-17 Intra-procedure-call (C). Only used for special cases in ML. * X18 Platform register. Not used in ML. * X19-X23 Non-volatile (C). Scratch registers (ML). * X24 Non-volatile (C). Scratch register (ML). Heap base in 32-in-64. * X25 ML Heap limit pointer * X26 ML assembly interface pointer. Non-volatile (C). * X27 ML Heap allocation pointer. Non-volatile (C). * X28 ML Stack pointer. Non-volatile (C). * X29 Frame pointer (C). Not used in ML * X30 Link register. Not used in ML. * X31 Stack pointer (C). Not used in ML. Also zero register. * * Floating point registers: * V0 First argument and return value * V1-V7 Second-eighth argument * V8-V15 Non volatile. Not currently used in ML. * V16-V31 Volatile. Not currently used in ML. * * The ML calling conventions generally follow the C ABI except that * all registers are volatile and X28 is used for the stack. */ /* the amount of ML stack space to reserve for registers, C exception handling etc. The compiler requires us to reserve 2 stack-frames worth (2 * 20 words). We actually reserve slightly more than this. */ // Arm64 instructions are all 32-bit values. typedef uint32_t arm64Instr, *arm64CodePointer; // Each function checks for space on the stack at the start. To reduce the // code size it assumes there are at least 10 words on the stack and only // checks the exact space if it requires more than that. For safety we // always make sure there are 50 words spare. #define OVERFLOW_STACK_SIZE 50 // X26 always points at this area when executing ML code. // The offsets are built into the assembly code and some are built into // the code generator so this must not be changed without checking these. typedef struct _AssemblyArgs { public: byte* enterInterpreter; // These are filled in with the functions. byte* heapOverFlowCall; byte* stackOverFlowCall; byte* stackOverFlowCallEx; byte* trapHandlerEntry; stackItem* handlerRegister; // Current exception handler stackItem* stackLimit; // Lower limit of stack stackItem exceptionPacket; // Set if there is an exception PolyWord threadId; // My thread id. Saves having to call into RTS for it. stackItem registers[25]; // Save/load area for registers X0-X24 inclusive double fpRegisters[8]; // Save/load area for floating point regs D0-D7 PolyWord* localMbottom; // Base of memory + 1 word PolyWord* localMpointer; // X27 Allocation ptr + 1 word stackItem* stackPtr; // X28 Current stack pointer arm64CodePointer linkRegister; // X30 - Link register (return address) arm64CodePointer entryPoint; // PC address to return to byte returnReason; // Reason for returning from ML - Set by assembly code. } AssemblyArgs; class Arm64TaskData: public TaskData, ByteCodeInterpreter { public: Arm64TaskData(); ~Arm64TaskData() {} unsigned allocReg; // The register to take the allocated space. POLYUNSIGNED allocWords; // The words to allocate. AssemblyArgs assemblyInterface; uint32_t saveRegisterMask; // Registers that need to be updated by a GC. virtual void GarbageCollect(ScanAddress *process); void ScanStackAddress(ScanAddress *process, stackItem& val, StackSpace *stack); virtual void EnterPolyCode(); // Start running ML virtual void SetException(poly_exn *exc) { assemblyInterface.exceptionPacket = (PolyWord)exc; } virtual void InterruptCode(); // AddTimeProfileCount is used in time profiling. virtual bool AddTimeProfileCount(SIGNALCONTEXT *context); virtual void InitStackFrame(TaskData *newTask, Handle proc); // Atomically release a mutex using hardware interlock. virtual bool AtomicallyReleaseMutex(PolyObject* mutexp); // Return the minimum space occupied by the stack. Used when setting a limit. // N.B. This is PolyWords not native words. virtual uintptr_t currentStackSpace(void) const { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE; } virtual void addProfileCount(POLYUNSIGNED words) { addSynchronousCount(interpreterPc, words); } // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer virtual void PreRTSCall(void) { TaskData::PreRTSCall(); SaveMemRegisters(); } // PostRTSCall: Before returning we need to restore the heap pointer. // If there has been a GC in the RTS call we need to create a new heap area. virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall(); } virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length); void SetMemRegisters(); void SaveMemRegisters(); void HandleTrap(); // ByteCode overrides. The interpreter and native code states need to be in sync. // The interpreter is only used during the initial bootstrap. virtual void ClearExceptionPacket() { assemblyInterface.exceptionPacket = TAGGED(0); } virtual PolyWord GetExceptionPacket() { return assemblyInterface.exceptionPacket; } virtual stackItem* GetHandlerRegister() { return assemblyInterface.handlerRegister; } virtual void SetHandlerRegister(stackItem* hr) { assemblyInterface.handlerRegister = hr; } void Interpret(); void EndBootStrap() { mixedCode = true; } PLock interruptLock; virtual void HandleStackOverflow(uintptr_t space); }; class Arm64Dependent : public MachineDependent { public: Arm64Dependent() : mustInterpret(false) {} // Create a task data object. virtual TaskData* CreateTaskData(void) { return new Arm64TaskData(); } virtual Architectures MachineArchitecture(void); virtual void SetBootArchitecture(char arch, unsigned wordLength); // The ARM has separate instruction and data caches. virtual void FlushInstructionCache(void* p, POLYUNSIGNED bytes); // During the first bootstrap phase this is interpreted. bool mustInterpret; }; static Arm64Dependent arm64Dependent; MachineDependent* machineDependent = &arm64Dependent; Architectures Arm64Dependent::MachineArchitecture(void) { // During the first phase of the bootstrap we // compile the interpreted version. if (mustInterpret) return MA_Interpreted; #if defined(POLYML32IN64) return MA_Arm64_32; #else return MA_Arm64; #endif } // Values for the returnReason byte. These values are put into returnReason by the assembly code // depending on which of the "trap" functions has been called. enum RETURN_REASON { RETURN_HEAP_OVERFLOW = 1, // Heap space check has failed. RETURN_STACK_OVERFLOW = 2, // Stack space check has failed (<= 10 words). RETURN_STACK_OVERFLOWEX = 3, // Stack space check has failed. Adjusted SP is in X9. RETURN_ENTER_INTERPRETER = 4 // Native code has entered interpreted code. }; extern "C" { // These are declared in the assembly code segment. void Arm64AsmEnterCompiledCode(void*); int Arm64AsmCallExtraRETURN_ENTER_INTERPRETER(void); int Arm64AsmCallExtraRETURN_HEAP_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX(void); // This is declared here and called from the assembly code. // It avoids having a call to an external in the assembly code // which sometimes gives problems with position-indepent code. void Arm64TrapHandler(PolyWord threadId); }; Arm64TaskData::Arm64TaskData() : ByteCodeInterpreter(&assemblyInterface.stackPtr, &assemblyInterface.stackLimit), allocReg(0), allocWords(0), saveRegisterMask(0) { assemblyInterface.enterInterpreter = (byte*)Arm64AsmCallExtraRETURN_ENTER_INTERPRETER; assemblyInterface.heapOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_HEAP_OVERFLOW; assemblyInterface.stackOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOW; assemblyInterface.stackOverFlowCallEx = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX; assemblyInterface.trapHandlerEntry = (byte*)Arm64TrapHandler; interpreterPc = 0; mixedCode = !arm64Dependent.mustInterpret; } void Arm64Dependent::SetBootArchitecture(char arch, unsigned wordLength) { if (arch == 'I') mustInterpret = true; else if (arch != 'A') Crash("Boot file has unexpected architecture code: %c", arch); } // The ARM has separate instruction and data caches so we must flush // the cache when creating or modifying code. void Arm64Dependent::FlushInstructionCache(void* p, POLYUNSIGNED bytes) { #ifdef _WIN32 ::FlushInstructionCache(GetCurrentProcess(), p, bytes); #elif defined (__GNUC__) __builtin___clear_cache(p, (char*)p + bytes); #elif (defined (__clang__) && defined (__APPLE__)) sys_icache_invalidate(p, bytes); #else #error "No code to flush the instruction cache." #endif } void Arm64TaskData::GarbageCollect(ScanAddress *process) { TaskData::GarbageCollect(process); ByteCodeInterpreter::GarbageCollect(process); assemblyInterface.threadId = threadObject; // threadObject updated by TaskData::GarbageCollect if (assemblyInterface.exceptionPacket.w().IsDataPtr()) { PolyObject* obj = assemblyInterface.exceptionPacket.w().AsObjPtr(); obj = process->ScanObjectAddress(obj); assemblyInterface.exceptionPacket = (PolyWord)obj; } if (stack != 0) { stackItem*stackPtr = assemblyInterface.stackPtr; // Now the values on the stack. for (stackItem* q = stackPtr; q < (stackItem*)stack->top; q++) ScanStackAddress(process, *q, stack); } // Register mask. There is a bit for each of the registers up to X24. for (int i = 0; i < 25; i++) { if (saveRegisterMask & (1 << i)) ScanStackAddress(process, assemblyInterface.registers[i], stack); } // Make sure the code is still reachable. Code addresses aren't updated. { stackItem code; code.codeAddr = (POLYCODEPTR)assemblyInterface.linkRegister; ScanStackAddress(process, code, stack); code.codeAddr = (POLYCODEPTR)assemblyInterface.entryPoint; ScanStackAddress(process, code, stack); } } // Process a value within the stack. void Arm64TaskData::ScanStackAddress(ScanAddress *process, stackItem& stackItem, StackSpace *stack) { // We may have return addresses on the stack which could look like // tagged values. Check whether the value is in the code area before // checking whether it is untagged. #ifdef POLYML32IN64 // In 32-in-64 return addresses always have the top 32 bits non-zero. if (stackItem.argValue < ((uintptr_t)1 << 32)) { // It's either a tagged integer or an object pointer. if (stackItem.w().IsDataPtr()) { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } } else { // Could be a code address or a stack address. MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space == 0 || space->spaceType != ST_CODE) return; PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); ASSERT(obj != 0); // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } #else // The -1 here is because we may have a zero-sized cell in the last // word of a space. MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space->spaceType == ST_CODE) { PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); // If it is actually an integer it might be outside a valid code object. if (obj == 0) { ASSERT(stackItem.w().IsTagged()); // It must be an integer } else // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr()) // Local values must be word addresses. { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } #endif } // Copy a stack void Arm64TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length) { #ifdef POLYML32IN64 old_length = old_length / 2; new_length = new_length / 2; #endif /* Moves a stack, updating all references within the stack */ stackItem*old_base = (stackItem*)old_stack; stackItem*new_base = (stackItem*)new_stack; stackItem*old_top = old_base + old_length; /* Calculate the offset of the new stack from the old. If the frame is being extended objects in the new frame will be further up the stack than in the old one. */ uintptr_t offset = new_base - old_base + new_length - old_length; stackItem *oldSp = assemblyInterface.stackPtr; assemblyInterface.stackPtr = oldSp + offset; assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset; /* Skip the unused part of the stack. */ uintptr_t i = oldSp - old_base; ASSERT(i <= old_length); i = old_length - i; stackItem *old = oldSp; stackItem *newp = assemblyInterface.stackPtr; while (i--) { stackItem old_word = *old++; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem* addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *newp++ = old_word; } ASSERT(old == ((stackItem*)old_stack) + old_length); ASSERT(newp == ((stackItem*)new_stack) + new_length); } void Arm64TaskData::EnterPolyCode() /* Called from "main" to enter the code. */ { assemblyInterface.stackLimit = (stackItem*)((PolyWord*)this->stack->stack() + OVERFLOW_STACK_SIZE); if (arm64Dependent.mustInterpret) { PolyWord closure = assemblyInterface.registers[8]; *(--assemblyInterface.stackPtr) = closure; /* Closure address */ interpreterPc = *(POLYCODEPTR*)closure.AsObjPtr(); Interpret(); ASSERT(0); // Should never return } SetMemRegisters(); // Jump into the ML code. This code sets up the registers and puts the // address of the assemblyInterface into X26 Arm64AsmEnterCompiledCode(&assemblyInterface); // This should never return ASSERT(0); } void Arm64TaskData::Interpret() { while (true) { switch (RunInterpreter(this)) { case ReturnCall: // After the call there will be an enter-int instruction so that when this // returns we will re-enter the interpreter. The number of arguments for // this call is after that. while ((uintptr_t)interpreterPc & 3) { ASSERT(interpreterPc[0] == INSTR_no_op); interpreterPc++; } ASSERT(interpreterPc[0] == 0xe9); numTailArguments = interpreterPc[12]; case ReturnTailCall: { ClearExceptionPacket(); // Pop the closure. PolyWord closureWord = *assemblyInterface.stackPtr++; PolyObject* closure = closureWord.AsObjPtr(); arm64CodePointer cp = *(arm64CodePointer*)closure; if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) { // If the code we're going to is interpreted push back the closure and // continue. interpreterPc = (POLYCODEPTR)cp; assemblyInterface.stackPtr--; HandleStackOverflow(128); // Make sure we have space since we're bypassing the check. continue; } assemblyInterface.registers[8] = closureWord; // Put closure in the closure reg. // Pop the return address. We may need to align this to a word boundary. POLYCODEPTR originalReturn = (POLYCODEPTR)((assemblyInterface.stackPtr++)->codeAddr); while ((uintptr_t)originalReturn & 3) { ASSERT(originalReturn[0] == INSTR_no_op); originalReturn++; } // Get the arguments into the correct registers. // Load the register arguments. The first 8 arguments go into X0-X7. // These will have been the first arguments to be pushed so will be // furthest away on the stack. // Note: we don't currently pass any arguments in the FP regs. for (unsigned i = 0; i < numTailArguments && i < 8; i++) assemblyInterface.registers[i] = assemblyInterface.stackPtr[numTailArguments - i - 1]; // If there are any more arguments these need to be shifted down the stack. while (numTailArguments > 8) { numTailArguments--; assemblyInterface.stackPtr[numTailArguments] = assemblyInterface.stackPtr[numTailArguments - 8]; } // Remove the register arguments assemblyInterface.stackPtr += numTailArguments > 8 ? 8 : numTailArguments; assemblyInterface.linkRegister = (arm64CodePointer)originalReturn; // Set the return address to caller assemblyInterface.entryPoint = *(arm64CodePointer*)closure; // Entry point to callee interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } case ReturnReturn: { ClearExceptionPacket(); // Returning from an interpreted function. Normally we'll be returning to // interpreted code. if ((uintptr_t)interpreterPc & 3) // ARM64 addresses will always be 4-byte aligned. continue; arm64CodePointer cp = (arm64CodePointer)interpreterPc; if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) continue; // Pop the value we're returning. Set the entry point to the code we're returning to. assemblyInterface.registers[0] = *assemblyInterface.stackPtr++; assemblyInterface.entryPoint = cp; interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } case ReturnRaise: { // This never occurs with the normal bootstrap but can happen during development. assemblyInterface.stackPtr = GetHandlerRegister(); arm64CodePointer cp = (arm64CodePointer)(assemblyInterface.stackPtr[0].codeAddr); if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) continue; interpreterPc = 0; return; } } } } // Called from the assembly code as a result of a trap i.e. a request for // a GC or to extend the stack. void Arm64TrapHandler(PolyWord threadId) { Arm64TaskData* taskData = (Arm64TaskData*)TaskData::FindTaskForId(threadId); taskData->HandleTrap(); } void Arm64TaskData::HandleTrap() { SaveMemRegisters(); // Update globals from the memory registers. switch (this->assemblyInterface.returnReason) { case RETURN_HEAP_OVERFLOW: { // The heap has overflowed. // The register mask is the word after the return. saveRegisterMask = *assemblyInterface.entryPoint++; // The generated code first subtracts the space required from x27 and puts the // result into a separate register. It then compares this with x25 and comes here if // it is not above that. Either way it is going to execute an instruction to put // this value back into x27. // Look at that instruction to find out the register. arm64Instr moveInstr = *assemblyInterface.entryPoint; ASSERT((moveInstr & 0xffe0ffff) == 0xaa0003fb); // mov x27,xN allocReg = (moveInstr >> 16) & 0x1f; allocWords = (allocPointer - (PolyWord*)assemblyInterface.registers[allocReg].stackAddr) + 1; assemblyInterface.registers[allocReg] = TAGGED(0); // Clear this - it's not a valid address. if (profileMode == kProfileStoreAllocation) addProfileCount(allocWords); // The actual allocation is done in SetMemRegisters. break; } case RETURN_STACK_OVERFLOW: case RETURN_STACK_OVERFLOWEX: { // The register mask is the word after the return. saveRegisterMask = *assemblyInterface.entryPoint++; uintptr_t min_size = 0; // Size in PolyWords if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW) { min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } else { // Stack limit overflow. If the required stack space is larger than // the fixed overflow size the code will calculate the limit in X9. stackItem* stackP = assemblyInterface.registers[9].stackAddr; min_size = (this->stack->top - (PolyWord*)stackP) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } HandleStackOverflow(min_size); break; } case RETURN_ENTER_INTERPRETER: { interpreterPc = (POLYCODEPTR)assemblyInterface.linkRegister; byte reasonCode = *interpreterPc++; // Sort out arguments. assemblyInterface.exceptionPacket = TAGGED(0); if (reasonCode == 0xff) { // Exception handler. assemblyInterface.exceptionPacket = assemblyInterface.registers[0]; // Get the exception packet // We need to leave the current handler in place. When we enter the interpreter it will // check the exception packet and if it is non-null will raise it. } else if (reasonCode >= 128) { // Start of function. unsigned numArgs = reasonCode - 128; // We need the stack to contain: // The closure, the return address, the arguments. // The stack will currently contain the stack arguments. // Add space for the register arguments if (numArgs > 8) assemblyInterface.stackPtr -= 8; else assemblyInterface.stackPtr -= numArgs; // Move up any stack arguments. for (unsigned n = 8; n < numArgs; n++) { assemblyInterface.stackPtr[n - 8] = assemblyInterface.stackPtr[n]; } // Store the register arguments for (unsigned n = 0; n < numArgs && n < 8; n++) assemblyInterface.stackPtr[numArgs - n - 1] = assemblyInterface.registers[n]; // Finally push the return address and closure pointer *(--assemblyInterface.stackPtr) = assemblyInterface.registers[9]; // Return address - value of X30 before enter-int *(--assemblyInterface.stackPtr) = assemblyInterface.registers[8]; // Closure } else { // Return from call. Push X0 *(--assemblyInterface.stackPtr) = assemblyInterface.registers[0]; } Interpret(); break; } default: Crash("Unknown return reason code %u", this->assemblyInterface.returnReason); } SetMemRegisters(); } void Arm64TaskData::HandleStackOverflow(uintptr_t space) { uintptr_t min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE + space; try { // The stack check has failed. This may either be because we really have // overflowed the stack or because the stack limit value has been adjusted // to result in a call here. CheckAndGrowStack(this, min_size); } catch (IOException&) { // We may get an exception while handling this if we run out of store } { PLocker l(&interruptLock); // Set the stack limit. This clears any interrupt and also sets the // correct value if we've grown the stack. assemblyInterface.stackLimit = (stackItem*)stack->bottom + OVERFLOW_STACK_SIZE; } try { processes->ProcessAsynchRequests(this); // Release and re-acquire use of the ML memory to allow another thread // to GC. processes->ThreadReleaseMLMemory(this); processes->ThreadUseMLMemory(this); } catch (IOException&) { } } void Arm64TaskData::InitStackFrame(TaskData* parentTask, Handle proc) /* Initialise stack frame. */ { StackSpace* space = this->stack; StackObject* stack = (StackObject*)space->stack(); uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem); assemblyInterface.stackPtr = (stackItem*)stack + stack_size; assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE; assemblyInterface.handlerRegister = assemblyInterface.stackPtr; // Store the argument and the closure. assemblyInterface.registers[8] = proc->Word(); // Closure assemblyInterface.registers[0] = TAGGED(0); // Argument - assemblyInterface.linkRegister = 0; // We never return + assemblyInterface.linkRegister = (arm64CodePointer)1; // We never return. Use a tagged value because it may be pushed + assemblyInterface.entryPoint = (arm64CodePointer)1; // Have to set the register mask in case we get a GC before the thread starts. saveRegisterMask = (1 << 8) | 1; // X8 and X0 #ifdef POLYML32IN64 // In 32-in-64 RBX always contains the heap base address. assemblyInterface.registers[24].stackAddr = (stackItem*)globalHeapBase; #endif } // This is called from a different thread so we have to be careful. void Arm64TaskData::InterruptCode() { PLocker l(&interruptLock); // Set the stack limit pointer to the top of the stack to cause // a trap when we next check for stack overflow. // We use a lock here to ensure that we always use the current value of the // stack. The thread we're interrupting could be growing the stack at this point. if (stack != 0) assemblyInterface.stackLimit = (stackItem*)(stack->top - 1); } // Called before entering ML code from the run-time system void Arm64TaskData::SetMemRegisters() { // Copy the current store limits into variables before we go into the assembly code. // If we haven't yet set the allocation area or we don't have enough we need // to create one (or a new one). if (allocPointer <= allocLimit + allocWords) { if (allocPointer < allocLimit) Crash("Bad length in heap overflow trap"); // Find some space to allocate in. Updates taskData->allocPointer and // returns a pointer to the newly allocated space (if allocWords != 0) PolyWord* space = processes->FindAllocationSpace(this, allocWords, true); if (space == 0) { // We will now raise an exception instead of returning. // Set allocWords to zero so we don't set the allocation register // since that could be holding the exception packet. allocWords = 0; } // Undo the allocation just now. allocPointer += allocWords; } if (this->allocWords != 0) { // If we have had a heap trap we actually do the allocation here. // We will have already garbage collected and recovered sufficient space. // This also happens if we have just trapped because of store profiling. allocPointer -= allocWords; // Now allocate // Set the allocation register to this area. N.B. This is an absolute address. assemblyInterface.registers[allocReg].codeAddr = (POLYCODEPTR)(allocPointer + 1); /* remember: it's off-by-one */ allocWords = 0; } // If we have run out of store, either just above or while allocating in the RTS, // allocPointer and allocLimit will have been set to zero as part of the GC. We will // now be raising an exception which may free some store but we need to come back here // before we allocate anything. The compiled code uses unsigned arithmetic to check for // heap overflow but only after subtracting the space required. We need to make sure // that the values are still non-negative after substracting any object size. if (allocPointer == 0) allocPointer += MAX_OBJECT_SIZE; if (allocLimit == 0) allocLimit += MAX_OBJECT_SIZE; assemblyInterface.localMbottom = allocLimit + 1; assemblyInterface.localMpointer = allocPointer + 1; // If we are profiling store allocation we set mem_hl so that a trap // will be generated. if (profileMode == kProfileStoreAllocation) assemblyInterface.localMbottom = assemblyInterface.localMpointer; assemblyInterface.threadId = threadObject; } // This is called whenever we have returned from ML to C. void Arm64TaskData::SaveMemRegisters() { if (interpreterPc == 0) { // Not if we're already in the interpreter // The normal return is to the link register address. assemblyInterface.entryPoint = assemblyInterface.linkRegister; allocPointer = assemblyInterface.localMpointer - 1; } allocWords = 0; assemblyInterface.exceptionPacket = TAGGED(0); saveRegisterMask = 0; } // As far as possible we want locking and unlocking an ML mutex to be fast so // we try to implement the code in the assembly code using appropriate // interlocked instructions. That does mean that if we need to lock and // unlock an ML mutex in this code we have to use the same, machine-dependent, // code to do it. These are defaults that are used where there is no // machine-specific code. #if defined(_MSC_VER) // This saves having to define it in the MASM assembly code. static uintptr_t Arm64AsmAtomicExchange(PolyObject* mutexp, uintptr_t value) { # if (SIZEOF_POLYWORD == 8) return InterlockedExchange64((LONG64*)mutexp, value); # else return InterlockedExchange((LONG*)mutexp, value); # endif } #else extern "C" { // This is only defined in the GAS assembly code uintptr_t Arm646AsmAtomicExchange(PolyObject*, uintptr_t); } #endif bool Arm64TaskData::AtomicallyReleaseMutex(PolyObject* mutexp) { uintptr_t oldValue = Arm64AsmAtomicExchange(mutexp, 0); return oldValue == 1; } bool Arm64TaskData::AddTimeProfileCount(SIGNALCONTEXT *context) { if (interpreterPc != 0) { // See if the PC we've got is an ML code address. MemSpace *space = gMem.SpaceForAddress(interpreterPc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(interpreterPc); return true; } } return false; } extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolyInterpretedEnterIntMode(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function); } // Do we require EnterInt instructions and if so for which architecture? // 0 = > None; 1 => X86_32, 2 => X86_64. 3 => X86_32_in_64. 4 => ARM_64. // ARM_64 in 32 is the same as ARM64. POLYUNSIGNED PolyInterpretedEnterIntMode() { return TAGGED(4).AsUnsigned(); } // End the first stage of bootstrap mode and run a new function. // The first stage is always interpreted. Once that is complete every function will have // at least an executable "enter-interpreter" stub so it can be called as machine code. POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function) { TaskData* taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle pushedFunction = taskData->saveVec.push(function); arm64Dependent.mustInterpret = false; ((Arm64TaskData*)taskData)->EndBootStrap(); taskData->InitStackFrame(taskData, pushedFunction); taskData->EnterPolyCode(); // Should never return. ASSERT(0); return TAGGED(0).AsUnsigned(); } // No machine-specific calls in the interpreter. struct _entrypts machineSpecificEPT[] = { { "PolyInterpretedEnterIntMode", (polyRTSFunction)&PolyInterpretedEnterIntMode }, { "PolyEndBootstrapMode", (polyRTSFunction)&PolyEndBootstrapMode }, { NULL, NULL} // End of list. }; diff --git a/libpolyml/arm64assembly.S b/libpolyml/arm64assembly.S index 8fee6b6c..c14113b7 100644 --- a/libpolyml/arm64assembly.S +++ b/libpolyml/arm64assembly.S @@ -1,218 +1,220 @@ // // Assembly code for the ARM64 for Poly/ML // Author: David Matthews // Copyright (c) David C. J. Matthews 2021 // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License version 2.1 as published by the Free Software Foundation. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library// if not, write to the Free Software // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // The syntax of directives in the GNU assembler and in the MS ARMASM // are somewhat different. ARMASMSYNTAX is defined in the VS project files. #ifdef ARMASMSYNTAX #define LABEL(x) x #else #define LABEL(x) x: #endif #ifdef ARMASMSYNTAX AREA |.text|, CODE, READONLY #else .section .text,"x" .balign 4 #endif // Offsets into the assembly code interface #define TrapHandlerEntry 32 #define HandlerRegister 40 #define ExceptionPacket 56 #define ThreadId 64 #define RegisterArray 72 #define FPRegisterArray 272 #define LocalMBottom 336 #define LocalMPointer 344 #define MLStackPointer 352 #define LinkRegister 360 #define EntryPoint 368 #define ReturnReason 376 #ifdef ARMASMSYNTAX EXPORT Arm64AsmEnterCompiledCode Arm64AsmEnterCompiledCode PROC #else .global Arm64AsmEnterCompiledCode Arm64AsmEnterCompiledCode: #endif // This is called once the thread has been initialised to run the ML code. // It never returns. The RTS may be entered either by a compiled RTS call // or by a call to a "trap" function. // We only need to load a subset of the registers. mov x26,x0 // Copy the address of the assembly-code section into X26 ldr x0,[x26, RegisterArray] // Argument ldr x8,[x26, RegisterArray+8*8] // Closure address #ifdef POLYML32IN64 - ldr x16,[x8,x24, LSL #2] // TODO: This doesn't work. + ldr x24,[x26, RegisterArray+24 * 8] + add x16,x24,x8,LSL #2 + ldr x16,[x16] #else ldr x16,[x8] // Code address - first word of closure #endif ldr x25,[x26, LocalMBottom] // Limit of heap ldp x27,x28,[x26, LocalMPointer] // Allocation pointer and stack pointer ldr x30,[x26, LinkRegister] // Link register - always zero because we don't return br x16 // Jump to code #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_HEAP_OVERFLOW Arm64AsmCallExtraRETURN_HEAP_OVERFLOW PROC #else .global Arm64AsmCallExtraRETURN_HEAP_OVERFLOW Arm64AsmCallExtraRETURN_HEAP_OVERFLOW : #endif mov x16, 1 // Common code to call into the RTS LABEL(trapHandle) strb w16,[x26, ReturnReason] stp x0,x1,[x26, RegisterArray] stp x2,x3, [x26, RegisterArray+2*8] stp x4,x5, [x26, RegisterArray + 4*8] stp x6, x7, [x26, RegisterArray + 6*8] stp x8, x9, [x26, RegisterArray + 8*8] stp x10, x11, [x26, RegisterArray + 10*8] stp x12, x13, [x26, RegisterArray + 12*8] stp x14, x15, [x26, RegisterArray + 14*8] stp x19, x20, [x26, RegisterArray + 19 * 8] stp x21,x22,[x26, RegisterArray + 21 * 8] stp x23,x24, [x26, RegisterArray + 23 * 8] stp d0,d1,[x26, FPRegisterArray] stp d2,d3,[x26, FPRegisterArray+2*8] stp d4,d5,[x26, FPRegisterArray+4*8] stp d6,d7,[x26, FPRegisterArray+6*8] str x27,[x26,LocalMPointer] str x28,[x26,MLStackPointer] str x30,[x26,LinkRegister] ldr x0,[x26,ThreadId] // Pass the thread id as an argument so that we can get the task data ldr x16,[x26,TrapHandlerEntry] blr x16 // Load the registers. Even though some are callee-save the RTS may have updated them. // x26, though, should have been preserved. ldr x1,[x26, RegisterArray+1*8] ldp x2,x3, [x26, RegisterArray+2*8] ldp x4,x5, [x26, RegisterArray + 4*8] ldp x6, x7, [x26, RegisterArray + 6*8] ldp x8, x9, [x26, RegisterArray + 8*8] ldp x10, x11, [x26, RegisterArray + 10*8] ldp x12, x13, [x26, RegisterArray + 12*8] ldp x14, x15, [x26, RegisterArray + 14*8] ldp x19, x20, [x26, RegisterArray + 19 * 8] ldp x21,x22,[x26, RegisterArray + 21 * 8] ldp x23,x24, [x26, RegisterArray + 23 * 8] ldp d0,d1,[x26, FPRegisterArray] ldp d2,d3,[x26, FPRegisterArray+2*8] ldp d4,d5,[x26, FPRegisterArray+4*8] ldp d6,d7,[x26, FPRegisterArray+6*8] ldr x25,[x26, LocalMBottom] ldp x27,x28,[x26,LocalMPointer] ldr x30,[x26,LinkRegister] // Check whether we've raised an exception e.g. Interrupt ldr x0,[x26,ExceptionPacket] cmp x0,#1 bne raiseexcept ldr x0,[x26, RegisterArray] ldr x16,[x26,EntryPoint] // Normally this will be x30 but not always br x16 LABEL(raiseexcept) ldr x28,[x26,HandlerRegister] // Set the stack ptr to this ldr x16,[x28] br x16 #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_STACK_OVERFLOW Arm64AsmCallExtraRETURN_STACK_OVERFLOW PROC #else .global Arm64AsmCallExtraRETURN_STACK_OVERFLOW Arm64AsmCallExtraRETURN_STACK_OVERFLOW : #endif mov x16, 2 b trapHandle #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX PROC #else .global Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX : #endif mov x16, 3 b trapHandle #ifdef ARMASMSYNTAX ENDP #endif #ifdef ARMASMSYNTAX EXPORT Arm64AsmCallExtraRETURN_ENTER_INTERPRETER Arm64AsmCallExtraRETURN_ENTER_INTERPRETER PROC #else .global Arm64AsmCallExtraRETURN_ENTER_INTERPRETER Arm64AsmCallExtraRETURN_ENTER_INTERPRETER : #endif mov x16,4 b trapHandle #ifdef ARMASMSYNTAX ENDP #endif // POLYUNSIGNED Arm64AsmAtomicExchange(PolyObject*, POLYSIGNED); // This is not actually used with the VS build. #ifdef ARMASMSYNTAX EXPORT Arm64AsmAtomicExchange Arm64AsmAtomicExchange PROC #else .global Arm64AsmAtomicExchange Arm64AsmAtomicExchange: #endif // The easiest way to do this is with swpal but that is only available // in ARM 8.1 and above. For the moment we use the old version. // swpal x0,xzr,[x0] LABEL(aaea1) ldaxr x3,[x0] stlxr w4,xzr,[x0] cbnz w4,aaea1 dmb ish mov x0,x3 ret #ifdef ARMASMSYNTAX ENDP END #endif diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml index 458603cc..d4ea9b3a 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sml @@ -1,2097 +1,2100 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor ARM64ASSEMBLY ( structure Debug: DEBUG and Pretty: PRETTYSIG and CodeArray: CODEARRAYSIG ) : Arm64Assembly = struct open CodeArray Address exception InternalError = Misc.InternalError infix 5 << <<+ <<- >> >>+ >>- ~>> ~>>+ ~>>- (* Shift operators *) infix 3 andb orb xorb andbL orbL xorbL andb8 orb8 xorb8 - val op << = Word.<< and op >> = Word.>> and op ~>> = Word.~>> - and op andb = Word.andb and op orb = Word.orb + val op << = Word32.<< and op >> = Word32.>> and op ~>> = Word32.~>> + and op andb = Word32.andb and op orb = Word32.orb - val wordToWord8 = Word8.fromLargeWord o Word.toLargeWord + val word32ToWord8 = Word8.fromLargeWord o Word32.toLargeWord + and word8ToWord32 = Word32.fromLargeWord o Word8.toLargeWord + and word32ToWord = Word.fromLargeWord o Word32.toLargeWord + and wordToWord32 = Word32.fromLargeWord o Word.toLargeWord and word8ToWord = Word.fromLargeWord o Word8.toLargeWord - + (* XReg is used for fixed point registers since X0 and W0 are the same register. *) datatype xReg = XReg of Word8.word | XZero | XSP (* VReg is used for the floating point registers since V0, D0 and S0 are the same register. *) and vReg = VReg of Word8.word (* A Label is a ref that is later set to the location. Several labels can be linked together so that they are only set at a single point. Only forward jumps are linked so when we come to finally set the label we will have the full list. *) type labels = Word.word ref list ref (* Condition codes. The encoding is standard. *) datatype condition = CCode of Word8.word val condEqual = CCode 0wx0 (* Z=1 *) and condNotEqual = CCode 0wx1 (* Z=0 *) and condCarrySet = CCode 0wx2 (* C=1 *) and condCarryClear = CCode 0wx3 (* C=0 *) and condNegative = CCode 0wx4 (* N=1 *) and condPositive = CCode 0wx5 (* N=0 imcludes zero *) and condOverflow = CCode 0wx6 (* V=1 *) and condNoOverflow = CCode 0wx7 (* V=0 *) and condUnsignedHigher = CCode 0wx8 (* C=1 && Z=0 *) and condUnsignedLowOrEq = CCode 0wx9 (* ! (C=1 && Z=0) *) and condSignedGreaterEq = CCode 0wxa (* N=V *) and condSignedLess = CCode 0wxb (* N<>V *) and condSignedGreater = CCode 0wxc (* Z==0 && N=V *) and condSignedLessEq = CCode 0wxd (* !(Z==0 && N=V) *) (* use unconditional branches for the "always" cases. *) (* N.B. On subtraction and comparison the ARM uses an inverted carry flag for borrow. The C flag is set if there is NO borrow. This is the reverse of the X86. *) (* Offsets in the assembly code interface pointed at by X26 These are in units of 64-bits NOT bytes. *) val heapOverflowCallOffset = 1 and stackOverflowCallOffset = 2 and stackOverflowXCallOffset= 3 and exceptionHandlerOffset = 5 and stackLimitOffset = 6 and exceptionPacketOffset = 7 and threadIdOffset = 8 and heapLimitPtrOffset = 42 and heapAllocPtrOffset = 43 and mlStackPtrOffset = 44 (* 31 in the register field can either mean the zero register or the hardware stack pointer. Which meaning depends on the instruction. *) fun xRegOrXZ(XReg w) = w | xRegOrXZ XZero = 0w31 | xRegOrXZ XSP = raise InternalError "XSP not valid here" and xRegOrXSP(XReg w) = w | xRegOrXSP XZero = raise InternalError "XZero not valid here" | xRegOrXSP XSP = 0w31 (* There are cases where it isn't clear. *) and xRegOnly (XReg w) = w | xRegOnly XZero = raise InternalError "XZero not valid here" | xRegOnly XSP = raise InternalError "XSP not valid here" val X0 = XReg 0w0 and X1 = XReg 0w1 and X2 = XReg 0w2 and X3 = XReg 0w3 and X4 = XReg 0w4 and X5 = XReg 0w5 and X6 = XReg 0w6 and X7 = XReg 0w7 and X8 = XReg 0w8 and X9 = XReg 0w9 and X10= XReg 0w10 and X11 = XReg 0w11 and X12 = XReg 0w12 and X13 = XReg 0w13 and X14= XReg 0w14 and X15 = XReg 0w15 and X16 = XReg 0w16 and X17 = XReg 0w17 and X18= XReg 0w18 and X19 = XReg 0w19 and X20 = XReg 0w20 and X21 = XReg 0w21 and X22= XReg 0w22 and X23 = XReg 0w23 and X24 = XReg 0w24 and X25 = XReg 0w25 and X26= XReg 0w26 and X27 = XReg 0w27 and X28 = XReg 0w28 and X29 = XReg 0w29 and X30= XReg 0w30 val X_MLHeapLimit = X25 (* ML Heap limit pointer *) and X_MLAssemblyInt = X26 (* ML assembly interface pointer. *) and X_MLHeapAllocPtr = X27 (* ML Heap allocation pointer. *) and X_MLStackPtr = X28 (* ML Stack pointer. *) and X_LinkReg = X30 (* Link reg - return address *) fun vReg(VReg v) = v (* Only the first eight registers are currently used by ML. *) val V0 = VReg 0w0 and V1 = VReg 0w1 and V2 = VReg 0w2 and V3 = VReg 0w3 and V4 = VReg 0w4 and V5 = VReg 0w5 and V6 = VReg 0w6 and V7 = VReg 0w7 (* Some data instructions include a possible shift. *) datatype shiftType = - ShiftLSL of word - | ShiftLSR of word - | ShiftASR of word + ShiftLSL of Word8.word + | ShiftLSR of Word8.word + | ShiftASR of Word8.word | ShiftNone local fun checkImm6 w = if w > 0w63 then raise InternalError "shift > 63" else w in fun shiftEncode(ShiftLSL w) = (0w0, checkImm6 w) | shiftEncode(ShiftLSR w) = (0w1, checkImm6 w) | shiftEncode(ShiftASR w) = (0w2, checkImm6 w) | shiftEncode ShiftNone = (0w0, 0w0) end (* Other instructions include an extension i.e. a sign- or zero-extended value from one of the argument registers. When an extension is encoded there can also be a left shift which applies after the extension. I don't understand what difference, if any, there is between UXTX and SXTX. There's no ExtNone because we need to use either UXTW or UXTX depending on the length *) datatype 'a extend = ExtUXTB of 'a (* Unsigned extend byte *) | ExtUXTH of 'a (* Unsigned extend byte *) | ExtUXTW of 'a (* Unsigned extend byte *) | ExtUXTX of 'a (* Left shift *) | ExtSXTB of 'a (* Sign extend byte *) | ExtSXTH of 'a (* Sign extend halfword *) | ExtSXTW of 'a (* Sign extend word *) | ExtSXTX of 'a (* Left shift *) (* Load/store instructions have only a single bit for the shift. For byte operations this is one bit shift; for others it scales by the size of the operand if set. *) datatype scale = ScaleOrShift | NoScale local (* Although there are three bits it seems that the shift is limited to 0 to 4. *) fun checkImm3 w = if w > 0w4 then raise InternalError "extend shift > 4" else w in fun extendArithEncode(ExtUXTB w) = (0w0, checkImm3 w) | extendArithEncode(ExtUXTH w) = (0w1, checkImm3 w) | extendArithEncode(ExtUXTW w) = (0w2, checkImm3 w) | extendArithEncode(ExtUXTX w) = (0w3, checkImm3 w) | extendArithEncode(ExtSXTB w) = (0w4, checkImm3 w) | extendArithEncode(ExtSXTH w) = (0w5, checkImm3 w) | extendArithEncode(ExtSXTW w) = (0w6, checkImm3 w) | extendArithEncode(ExtSXTX w) = (0w7, checkImm3 w) fun extendLSEncode(ExtUXTB v) = (0w0, v) | extendLSEncode(ExtUXTH v) = (0w1, v) | extendLSEncode(ExtUXTW v) = (0w2, v) | extendLSEncode(ExtUXTX v) = (0w3, v) | extendLSEncode(ExtSXTB v) = (0w4, v) | extendLSEncode(ExtSXTH v) = (0w5, v) | extendLSEncode(ExtSXTW v) = (0w6, v) | extendLSEncode(ExtSXTX v) = (0w7, v) end datatype wordSize = WordSize32 | WordSize64 (* Bit patterns on the ARM64 are encoded using a complicated scheme and only certain values can be encoded. An element can be 2, 4, 8, 16, 32 or 64 bits and must be a sequence of at least one zero bits followed by at least one one bit. This sequence can then be rotated within the element. Finally the element is replicated within the register up to 32 or 64 bits. All this information is encoded in 13 bits. N.B. Bit patterns of all zeros or all ones cannot be encoded. *) (* Encode the value if it is possible. *) fun encodeBitPattern(value, sf (* size flag *)) = (* Can't encode 0 or all ones. *) if value = 0w0 orelse value = Word64.notb 0w0 then NONE (* If this is 32-bits we can't encode all ones in the low-order 32-bits or any value that won't fit in 32-bits, *) else if sf = WordSize32 andalso value >= 0wxffffffff then NONE else let val regSize = case sf of WordSize32 => 0w32 | WordSize64 => 0w64 (* Get the element size. Look for the repeat of the pattern. *) fun getElemSize size = let val ns = size div 0w2 val mask = Word64.<<(0w1, ns) - 0w1 in if Word64.andb(value, mask) <> Word64.andb(Word64.>>(value, ns), mask) then size else if ns <= 0w2 then ns else getElemSize ns end val elemSize = getElemSize regSize fun log2 0w1 = 0w0 | log2 n = 0w1 + log2(Word.>>(n, 0w1)) val elemBits = log2 elemSize (* Find the rotation that puts as many of the zero bits in the element at the top. *) val elemMask = Word64.>>(Word64.notb 0w0, 0w64-elemSize) fun ror elt = Word64.orb((Word64.<<(Word64.andb(elt, 0w1), elemSize-0w1), Word64.>>(elt, 0w1))) and rol elt = Word64.orb(Word64.andb(elemMask, Word64.<<(elt, 0w1)), Word64.>>(elt, elemSize-0w1)) fun findRotation(v, n) = if ror v < v then findRotation(ror v, (n-0w1) mod elemSize) else if rol v < v then findRotation(rol v, n+0w1) else (v, n) val (rotated, rotation) = findRotation(Word64.andb(value, elemMask), 0w0) (* Count out the low order ones. If the result is zero then we;ve got a valid sequence of zeros followed by ones but if we discover a zero bit and the result isn't zero then we can't encode this. *) fun countLowOrderOnes(v, n) = if v = 0w0 then SOME n else if Word64.andb(v, 0w1) = 0w1 then countLowOrderOnes(Word64.>>(v, 0w1), n+0w1) else NONE in case countLowOrderOnes(rotated, 0w0) of NONE => NONE | SOME lowOrderOnes => let (* Encode the element size. *) val elemSizeEnc = 0wx7f - (Word.<<(0w1, elemBits+0w1) - 0w1) val n = if Word.andb(elemSizeEnc, 0wx40) = 0w0 then 0w1 else 0w0 val imms = Word.andb(Word.orb(elemSizeEnc, lowOrderOnes-0w1), 0wx3f) in SOME{n=n, imms=imms, immr=rotation} end end; (* Decode a pattern for printing. *) fun decodeBitPattern{sf, n, immr, imms} = let (* Find the highest bit set in N:NOT(imms) *) fun highestBitSet 0w0 = 0 - | highestBitSet n = 1+highestBitSet(Word.>>(n, 0w1)) - val len = highestBitSet(Word.orb(Word.<<(n, 0w6), Word.xorb(imms, 0wx3f))) - 1 + | highestBitSet n = 1+highestBitSet(Word32.>>(n, 0w1)) + val len = highestBitSet(Word32.orb(Word32.<<(n, 0w6), Word32.xorb(imms, 0wx3f))) - 1 val _ = if len < 0 then raise InternalError "decodeBitPattern: invalid" else () - val size = Word.<<(0w1, Word.fromInt len) - val r = Word.andb(immr, size-0w1) - and s = Word.andb(imms, size-0w1) + val size = Word32.<<(0w1, Word.fromInt len) + val r = Word32.andb(immr, size-0w1) + and s = Word32.andb(imms, size-0w1) val _ = if s = size-0w1 then raise InternalError "decodeBitPattern: invalid" else () - val pattern = Word64.<<(0w1, s+0w1) - 0w1 + val pattern = Word64.<<(0w1, word32ToWord(s+0w1)) - 0w1 (* Rotate right: shift left and put the top bit in the high order bit*) fun ror elt = - Word64.orb((Word64.<<(Word64.andb(elt, 0w1), size-0w1), + Word64.orb((Word64.<<(Word64.andb(elt, 0w1), word32ToWord(size-0w1)), Word64.>>(elt, 0w1))) fun rotateBits(value, 0w0) = value | rotateBits(value, n) = rotateBits(ror value, n-0w1) val rotated = rotateBits(pattern, r) val regSize = if sf = 0w0 then 0w32 else 0w64 (* Replicate the rotated pattern to fill the register. *) fun replicate(pattern, size) = if size >= regSize then pattern - else replicate(Word64.orb(pattern, Word64.<<(pattern, size)), size * 0w2) + else replicate(Word64.orb(pattern, Word64.<<(pattern, word32ToWord size)), size * 0w2) in replicate(rotated, size) end val isEncodableBitPattern = isSome o encodeBitPattern datatype instr = - SimpleInstr of word + SimpleInstr of Word32.word | LoadAddressLiteral of {reg: xReg, value: machineWord} | LoadNonAddressLiteral of {reg: xReg, value: Word64.word} | Label of labels | UnconditionalBranch of labels | ConditionalBranch of { label: labels, jumpCondition: condition, length: brLength ref } | LoadLabelAddress of { label: labels, reg: xReg } | TestBitBranch of { label: labels, bitNo: Word8.word, brNonZero: bool, reg: xReg, length: brLength ref } | CompareBranch of { label: labels, brNonZero: bool, size: wordSize, reg: xReg, length: brLength ref } and brLength = BrShort | BrExtended val nopCode = 0wxD503201F (* Add/subtract an optionally shifted 12-bit immediate (i.e. constant) to/from a register. The constant is zero-extended. The versions that do not set the flags can use XSP as the destination; the versions that use the signs can use XZero as the destination i.e. they discard the result and act as a comparison. *) local fun addSubRegImmediate(sf, oper, s, xdOp) ({regN, regD, immed, shifted}) = let val () = if immed >= 0wx1000 then raise InternalError "addSubRegImmediate: immed > 12 bits" else () in SimpleInstr( 0wx11000000 orb (sf << 0w31) orb (oper << 0w30) orb (s << 0w29) orb (if shifted then 0wx400000 else 0w0) orb - (immed << 0w10) orb (word8ToWord(xRegOrXSP regN) << 0w5) orb - word8ToWord(xdOp regD)) + (wordToWord32 immed << 0w10) orb (word8ToWord32(xRegOrXSP regN) << 0w5) orb + word8ToWord32(xdOp regD)) end in val addImmediate = addSubRegImmediate(0w1, 0w0, 0w0, xRegOrXSP) and addSImmediate = addSubRegImmediate(0w1, 0w0, 0w1, xRegOrXZ) and subImmediate = addSubRegImmediate(0w1, 0w1, 0w0, xRegOrXSP) and subSImmediate = addSubRegImmediate(0w1, 0w1, 0w1, xRegOrXZ) end (* Add/subtract a shifted register, optionally setting the flags. *) local (* X31 is XZ here unlike the extended version.*) fun addSubtractShiftedReg (sf, oper, s) ({regM, regN, regD, shift}) = let val (shift, imm6) = shiftEncode shift in SimpleInstr(0wx0b000000 orb (sf << 0w31) orb (oper << 0w30) orb (s << 0w29) orb - (shift << 0w22) orb (word8ToWord(xRegOnly regM) << 0w16) orb - (imm6 << 0w10) orb (word8ToWord(xRegOrXZ regN) << 0w5) orb - word8ToWord(xRegOrXZ regD)) + (shift << 0w22) orb (word8ToWord32(xRegOnly regM) << 0w16) orb + (word8ToWord32 imm6 << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb + word8ToWord32(xRegOrXZ regD)) end in val addShiftedReg = addSubtractShiftedReg(0w1, 0w0, 0w0) and addSShiftedReg = addSubtractShiftedReg(0w1, 0w0, 0w1) and subShiftedReg = addSubtractShiftedReg(0w1, 0w1, 0w0) and subSShiftedReg = addSubtractShiftedReg(0w1, 0w1, 0w1) end (* Add/subtract an extended register, optionally setting the flags. *) local (* SP can be used as Xn and also for Xd for the non-flags versions. *) fun addSubtractExtendedReg (sf, oper, s, opt, xD) ({regM, regN, regD, extend}) = let val (option, imm3) = extendArithEncode extend in SimpleInstr(0wx0b200000 orb (sf << 0w31) orb (oper << 0w30) orb (s << 0w29) orb - (opt << 0w22) orb (word8ToWord(xRegOnly regM) << 0w16) orb - (option << 0w13) orb (imm3 << 0w10) orb - (word8ToWord(xRegOrXSP regN) << 0w5) orb - word8ToWord(xD regD)) + (opt << 0w22) orb (word8ToWord32(xRegOnly regM) << 0w16) orb + (option << 0w13) orb (word8ToWord32 imm3 << 0w10) orb + (word8ToWord32(xRegOrXSP regN) << 0w5) orb + word8ToWord32(xD regD)) end in val addExtendedReg = addSubtractExtendedReg(0w1, 0w0, 0w0, 0w0, xRegOrXSP) and addSExtendedReg = addSubtractExtendedReg(0w1, 0w0, 0w1, 0w0, xRegOrXZ) and subExtendedReg = addSubtractExtendedReg(0w1, 0w1, 0w0, 0w0, xRegOrXSP) and subSExtendedReg = addSubtractExtendedReg(0w1, 0w1, 0w1, 0w0, xRegOrXZ) end (* Logical operations on a shifted register. *) local fun logicalShiftedReg (sf, oper, n) ({regM, regN, regD, shift}) = let val (shift, imm6) = shiftEncode shift in SimpleInstr(0wx0a000000 orb (sf << 0w31) orb (oper << 0w29) orb - (shift << 0w22) orb (n << 0w21) orb (word8ToWord(xRegOrXZ regM) << 0w16) orb - (imm6 << 0w10) orb (word8ToWord(xRegOrXZ regN) << 0w5) orb - word8ToWord(xRegOrXZ regD)) + (shift << 0w22) orb (n << 0w21) orb (word8ToWord32(xRegOrXZ regM) << 0w16) orb + (word8ToWord32 imm6 << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb + word8ToWord32(xRegOrXZ regD)) end in val andShiftedReg = logicalShiftedReg(0w1, 0w0, 0w0) and orrShiftedReg = logicalShiftedReg(0w1, 0w1, 0w0) and eorShiftedReg = logicalShiftedReg(0w1, 0w2, 0w0) and andsShiftedReg = logicalShiftedReg(0w1, 0w3, 0w0) (* There are also versions that operate with an inverted version of the argument. *) end (* Two-source operations. *) local fun twoSourceInstr (sf, s, opcode) ({regM, regN, regD}) = SimpleInstr(0wx1ac00000 orb (sf << 0w31) orb (s << 0w29) orb - (word8ToWord(xRegOnly regM) << 0w16) orb (opcode << 0w10) orb - (word8ToWord(xRegOnly regN) << 0w5) orb - word8ToWord(xRegOnly regD)) + (word8ToWord32(xRegOnly regM) << 0w16) orb (opcode << 0w10) orb + (word8ToWord32(xRegOnly regN) << 0w5) orb + word8ToWord32(xRegOnly regD)) in (* Signed and unsigned division. *) val unsignedDivide = twoSourceInstr(0w1, 0w0, 0wx2) and signedDivide = twoSourceInstr(0w1, 0w0, 0wx3) (* Logical shift left Rd = Rn << (Rm mod 0w64) *) and logicalShiftLeftVariable = twoSourceInstr(0w1, 0w0, 0wx8) (* Logical shift right Rd = Rn >> (Rm mod 0w64) *) and logicalShiftRightVariable = twoSourceInstr(0w1, 0w0, 0wx9) (* Arithmetic shift right Rd = Rn ~>> (Rm mod 0w64) *) and arithmeticShiftRightVariable = twoSourceInstr(0w1, 0w0, 0wxa) end (* Three source operations. These are all variations of multiply. *) local fun threeSourceInstr (sf, op54, op31, o0) ({regM, regA, regN, regD}) = SimpleInstr(0wx1b000000 orb (sf << 0w31) orb (op54 << 0w29) orb - (op31 << 0w21) orb (word8ToWord(xRegOnly regM) << 0w16) orb - (o0 << 0w15) orb (word8ToWord(xRegOrXZ regA) << 0w10) orb - (word8ToWord(xRegOnly regN) << 0w5) orb - word8ToWord(xRegOnly regD)) + (op31 << 0w21) orb (word8ToWord32(xRegOnly regM) << 0w16) orb + (o0 << 0w15) orb (word8ToWord32(xRegOrXZ regA) << 0w10) orb + (word8ToWord32(xRegOnly regN) << 0w5) orb + word8ToWord32(xRegOnly regD)) in (* regD = regA + regN * regM *) val multiplyAndAdd = threeSourceInstr(0w1, 0w0, 0w0, 0w0) (* regD = regA - regN * regM *) and multiplyAndSub = threeSourceInstr(0w1, 0w0, 0w0, 0w1) (* Return the high-order part of a signed multiplication. *) fun signedMultiplyHigh({regM, regN, regD}) = threeSourceInstr(0w1, 0w0, 0w2, 0w0) { regM=regM, regN=regN, regD=regD, regA=XZero} end (* Loads: There are two versions of this on the ARM. There is a version that takes a signed 9-bit byte offset and a version that takes an unsigned 12-bit word offset. *) local fun loadStoreRegScaled (size, v, opc, xD) ({regT, regN, unitOffset}) = let val _ = (unitOffset >= 0 andalso unitOffset < 0x1000) orelse raise InternalError "loadStoreRegScaled: value out of range" in SimpleInstr(0wx39000000 orb (size << 0w30) orb (opc << 0w22) orb - (v << 0w26) orb (Word.fromInt unitOffset << 0w10) orb - (word8ToWord(xRegOrXSP regN) << 0w5) orb word8ToWord(xD regT)) + (v << 0w26) orb (Word32.fromInt unitOffset << 0w10) orb + (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(xD regT)) end in val loadRegScaled = loadStoreRegScaled(0w3, 0w0, 0w1, xRegOrXZ) and storeRegScaled = loadStoreRegScaled(0w3, 0w0, 0w0, xRegOrXZ) (* (Unsigned) byte operations. There are also signed versions. *) and loadRegScaledByte = loadStoreRegScaled (0w0, 0w0, 0w1, xRegOrXZ) and storeRegScaledByte = loadStoreRegScaled (0w0, 0w0, 0w0, xRegOrXZ) and loadRegScaled16 = loadStoreRegScaled (0w1, 0w0, 0w1, xRegOrXZ) and storeRegScaled16 = loadStoreRegScaled (0w1, 0w0, 0w0, xRegOrXZ) and loadRegScaled32 = loadStoreRegScaled (0w2, 0w0, 0w1, xRegOrXZ) and storeRegScaled32 = loadStoreRegScaled (0w2, 0w0, 0w0, xRegOrXZ) and loadRegScaledDouble = loadStoreRegScaled(0w3, 0w1, 0w1, vReg) and storeRegScaledDouble = loadStoreRegScaled(0w3, 0w1, 0w0, vReg) and loadRegScaledFloat = loadStoreRegScaled(0w2, 0w1, 0w1, vReg) and storeRegScaledFloat = loadStoreRegScaled(0w2, 0w1, 0w0, vReg) end local (* Loads and stores with a signed byte offset. This includes simple unscaled addresses, pre-indexing and post-indexing. *) fun loadStoreByteAddress (op4, xD) (size, v, opc) ({regT, regN, byteOffset}) = let val _ = (byteOffset >= ~256 andalso byteOffset < 256) orelse raise InternalError "loadStoreUnscaled: value out of range" - val imm9 = Word.fromInt byteOffset andb 0wx1ff + val imm9 = Word32.fromInt byteOffset andb 0wx1ff in SimpleInstr(0wx38000000 orb (size << 0w30) orb (opc << 0w22) orb (v << 0w26) orb (imm9 << 0w12) orb (op4 << 0w10) orb - (word8ToWord(xRegOrXSP regN) << 0w5) orb word8ToWord(xD regT)) + (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(xD regT)) end val loadStoreUnscaled = loadStoreByteAddress (0w0, xRegOrXZ) and loadStoreUnscaledSIMD = loadStoreByteAddress (0w0, vReg) and loadStorePostIndex = loadStoreByteAddress (0w1, xRegOrXZ) and loadStorePreIndex = loadStoreByteAddress (0w3, xRegOrXZ) in val loadRegUnscaled = loadStoreUnscaled (0w3, 0w0, 0w1) and storeRegUnscaled = loadStoreUnscaled (0w3, 0w0, 0w0) (* (Unsigned) byte operations. There are also signed versions. *) and loadRegUnscaledByte = loadStoreUnscaled (0w0, 0w0, 0w1) and storeRegUnscaledByte = loadStoreUnscaled (0w0, 0w0, 0w0) and loadRegUnscaled16 = loadStoreUnscaled (0w1, 0w0, 0w1) and storeRegUnscaled16 = loadStoreUnscaled (0w1, 0w0, 0w0) and loadRegUnscaled32 = loadStoreUnscaled (0w2, 0w0, 0w1) and storeRegUnscaled32 = loadStoreUnscaled (0w2, 0w0, 0w0) and loadRegUnscaledFloat = loadStoreUnscaledSIMD (0w2, 0w1, 0w1) and storeRegUnscaledFloat = loadStoreUnscaledSIMD (0w2, 0w1, 0w0) and loadRegUnscaledDouble = loadStoreUnscaledSIMD (0w3, 0w1, 0w1) and storeRegUnscaledDouble = loadStoreUnscaledSIMD (0w3, 0w1, 0w0) val loadRegPostIndex = loadStorePostIndex (0w3, 0w0, 0w1) and storeRegPostIndex = loadStorePostIndex (0w3, 0w0, 0w0) and loadRegPostIndexByte = loadStorePostIndex (0w0, 0w0, 0w1) and storeRegPostIndexByte = loadStorePostIndex (0w0, 0w0, 0w0) val loadRegPreIndex = loadStorePreIndex (0w3, 0w0, 0w1) and storeRegPreIndex = loadStorePreIndex (0w3, 0w0, 0w0) and loadRegPreIndexByte = loadStorePreIndex (0w0, 0w0, 0w1) and storeRegPreIndexByte = loadStorePreIndex (0w0, 0w0, 0w0) end (* Load/store with a register offset i.e. an index register. *) local fun loadStoreRegRegisterOffset (size, v, opc, xD) ({regT, regN, regM, option}) = let val (opt, s) = case extendLSEncode option of (opt, ScaleOrShift) => (opt, 0w1) | (opt, NoScale) => (opt, 0w0) in SimpleInstr(0wx38200800 orb (size << 0w30) orb (v << 0w26) orb (opc << 0w22) orb - (word8ToWord(xRegOnly regM) << 0w16) orb (opt << 0w13) orb (s << 0w12) orb - (word8ToWord(xRegOrXSP regN) << 0w5) orb word8ToWord(xD regT)) + (word8ToWord32(xRegOnly regM) << 0w16) orb (opt << 0w13) orb (s << 0w12) orb + (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(xD regT)) end in val loadRegIndexed = loadStoreRegRegisterOffset(0w3, 0w0, 0w1, xRegOrXZ) and storeRegIndexed = loadStoreRegRegisterOffset(0w3, 0w0, 0w0, xRegOrXZ) and loadRegIndexedByte = loadStoreRegRegisterOffset(0w0, 0w0, 0w1, xRegOrXZ) and storeRegIndexedByte = loadStoreRegRegisterOffset(0w0, 0w0, 0w0, xRegOrXZ) and loadRegIndexed16 = loadStoreRegRegisterOffset(0w1, 0w0, 0w1, xRegOrXZ) and storeRegIndexed16 = loadStoreRegRegisterOffset(0w1, 0w0, 0w0, xRegOrXZ) and loadRegIndexed32 = loadStoreRegRegisterOffset(0w2, 0w0, 0w1, xRegOrXZ) and storeRegIndexed32 = loadStoreRegRegisterOffset(0w2, 0w0, 0w0, xRegOrXZ) and loadRegIndexedFloat = loadStoreRegRegisterOffset(0w2, 0w1, 0w1, vReg) and storeRegIndexedFloat = loadStoreRegRegisterOffset(0w2, 0w1, 0w0, vReg) and loadRegIndexedDouble = loadStoreRegRegisterOffset(0w3, 0w1, 0w1, vReg) and storeRegIndexedDouble = loadStoreRegRegisterOffset(0w3, 0w1, 0w0, vReg) end local (* Loads and stores with special ordering. *) fun loadStoreExclusive(size, o2, l, o1, o0) {regS, regT2, regN, regT} = SimpleInstr(0wx08000000 orb (size << 0w30) orb (o2 << 0w23) orb (l << 0w22) orb - (o1 << 0w21) orb (word8ToWord(xRegOrXZ regS) << 0w16) orb (o0 << 0w15) orb - (word8ToWord(xRegOrXZ regT2) << 0w10) orb (word8ToWord(xRegOrXZ regN) << 0w5) orb - word8ToWord(xRegOrXZ regT)) + (o1 << 0w21) orb (word8ToWord32(xRegOrXZ regS) << 0w16) orb (o0 << 0w15) orb + (word8ToWord32(xRegOrXZ regT2) << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb + word8ToWord32(xRegOrXZ regT)) in fun loadAcquire{regN, regT} = loadStoreExclusive(0w3, 0w1, 0w1, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT} and storeRelease{regN, regT} = loadStoreExclusive(0w3, 0w1, 0w0, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT} (* Acquire exclusive access to a memory location and load its current value *) and loadAcquireExclusiveRegister{regN, regT} = loadStoreExclusive(0w3, 0w0, 0w1, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT} (* Release exclusive access and test whether it succeeded. Sets regS to 0 if successful otherwise 1, in which case we have to repeat the operation. *) and storeReleaseExclusiveRegister{regN, regS, regT} = loadStoreExclusive(0w3, 0w0, 0w0, 0w0, 0w1) {regS=regS, regT2=XZero, regN=regN, regT=regT} end (* Addresses must go in the constant area at the end of the code where they can be found by the GC. *) fun loadAddressConstant(xReg, valu) = LoadAddressLiteral{reg=xReg, value=valu} (* Non-address constants. These may or may not be tagged values. *) fun loadNonAddressConstant(xReg, valu) = LoadNonAddressLiteral{reg=xReg, value=valu} local fun moveWideImmediate(sf, opc) {regD, immediate, shift} = let val hw = case (shift, sf) of (0w0, _) => 0w0 | (0w16, _) => 0w1 | (0w24, 0w1) => 0w2 | (0w48, 0w1) => 0w3 | _ => raise InternalError "moveWideImmediate: invalid shift" val _ = immediate <= 0wxffff orelse raise InternalError "moveWideImmediate: immediate too large" in SimpleInstr(0wx12800000 orb (sf << 0w31) orb (opc << 0w29) orb - (hw << 0w21) orb (immediate << 0w5) orb word8ToWord(xRegOnly regD)) + (hw << 0w21) orb (wordToWord32 immediate << 0w5) orb word8ToWord32(xRegOnly regD)) end in val moveNot32 = moveWideImmediate(0w0, 0w0) and moveZero32 = moveWideImmediate(0w0, 0w2) and moveKeep32 = moveWideImmediate(0w0, 0w3) and moveNot = moveWideImmediate(0w1, 0w0) and moveZero = moveWideImmediate(0w1, 0w2) and moveKeep = moveWideImmediate(0w1, 0w3) end (* Instructions involved in thread synchonisation. *) val yield = SimpleInstr 0wxD503203F (* Yield inside a spin-lock. *) and dmbIsh = SimpleInstr 0wxD5033BBF (* Memory barrier. *) (* Jump to the address in the register and put the address of the next instruction into X30. *) fun branchAndLinkReg(dest) = - SimpleInstr(0wxD63F0000 orb (word8ToWord(xRegOnly dest) << 0w5)) + SimpleInstr(0wxD63F0000 orb (word8ToWord32(xRegOnly dest) << 0w5)) (* Jump to the address in the register. *) fun branchRegister(dest) = - SimpleInstr(0wxD61F0000 orb (word8ToWord(xRegOnly dest) << 0w5)) + SimpleInstr(0wxD61F0000 orb (word8ToWord32(xRegOnly dest) << 0w5)) (* Jump to the address in the register and hint this is a return. *) fun returnRegister(dest) = - SimpleInstr(0wxD65F0000 orb (word8ToWord(xRegOnly dest) << 0w5)) + SimpleInstr(0wxD65F0000 orb (word8ToWord32(xRegOnly dest) << 0w5)) (* Put a label into the code. *) val setLabel = Label (* Create a label. *) fun createLabel () = ref [ref 0w0] (* A conditional or unconditional branch. *) and conditionalBranch(cond, label) = ConditionalBranch{label=label, jumpCondition=cond, length=ref BrExtended } and unconditionalBranch label = UnconditionalBranch label (* Put the address of a label into a register - used for handlers and cases. *) and loadLabelAddress(reg, label) = LoadLabelAddress{label=label, reg=reg} (* Test a bit in a register and branch if zero/nonzero *) and testBitBranchZero(reg, bit, label) = TestBitBranch{label=label, bitNo=bit, brNonZero=false, reg=reg, length=ref BrExtended} and testBitBranchNonZero(reg, bit, label) = TestBitBranch{label=label, bitNo=bit, brNonZero=true, reg=reg, length=ref BrExtended} (* Compare a register with zero and branch if zero/nonzero *) and compareBranchZero(reg, size, label) = CompareBranch{label=label, brNonZero=false, size=size, reg=reg, length=ref BrExtended} and compareBranchNonZero(reg, size, label) = CompareBranch{label=label, brNonZero=true, size=size, reg=reg, length=ref BrExtended} (* Set the destination register to the value of the first reg if the condition is true otherwise to a, possibly modified, version of the second argument. There are variants that set it unmodified, incremented, inverted and negated. *) local fun conditionalSelect (sf, opc, op2) {regD, regFalse, regTrue, cond=CCode cond} = SimpleInstr(0wx1A800000 orb (sf << 0w31) orb (opc << 0w30) orb - (word8ToWord(xRegOrXZ regFalse) << 0w16) orb (word8ToWord cond << 0w12) orb - (op2 << 0w10) orb (word8ToWord(xRegOrXZ regTrue) << 0w5) orb - word8ToWord(xRegOrXZ regD)) + (word8ToWord32(xRegOrXZ regFalse) << 0w16) orb (word8ToWord32 cond << 0w12) orb + (op2 << 0w10) orb (word8ToWord32(xRegOrXZ regTrue) << 0w5) orb + word8ToWord32(xRegOrXZ regD)) in val conditionalSet = conditionalSelect(0w1, 0w0, 0w0) and conditionalSetIncrement = conditionalSelect(0w1, 0w0, 0w1) and conditionalSetInverted = conditionalSelect(0w1, 0w1, 0w0) and conditionalSetNegated = conditionalSelect(0w1, 0w1, 0w1) end (* This combines the effect of a left and right shift. There are various derived forms of this depending on the relative values of immr and imms. if imms >= immr copies imms-immr-1 bits from bit position immr to the lsb bits of the destination. if imms < immr copies imms+1 bits from the lsb bit to bit position regsize-immr. How the remaining bits are affected depends on the instruction. BitField instructions do not affect other bits. UnsignedBitField instructions zero other bits. SignedBitField instructions set the high order bits to a copy of the high order bit copied and zero the low order bits. *) local fun bitfield (sf, opc, n) {immr, imms, regN, regD} = SimpleInstr(0wx13000000 orb (sf << 0w31) orb (opc << 0w29) orb (n << 0w22) orb - (immr << 0w16) orb (imms << 0w10) orb (word8ToWord(xRegOrXZ regN) << 0w5) orb - word8ToWord(xRegOrXZ regD)) + (wordToWord32 immr << 0w16) orb (wordToWord32 imms << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb + word8ToWord32(xRegOrXZ regD)) val signedBitfieldMove32 = bitfield(0w0, 0w0, 0w0) and bitfieldMove32 = bitfield(0w0, 0w1, 0w0) and unsignedBitfieldMove32 = bitfield(0w0, 0w2, 0w0) and signedBitfieldMove64 = bitfield(0w1, 0w0, 0w1) and bitfieldMove64 = bitfield(0w1, 0w1, 0w1) and unsignedBitfieldMove64 = bitfield(0w1, 0w2, 0w1) in fun logicalShiftLeft{wordSize=WordSize64, shift, regN, regD} = unsignedBitfieldMove64{immr=Word.~ shift mod 0w64, imms=0w64-0w1-shift, regN=regN, regD=regD} | logicalShiftLeft{wordSize=WordSize32, shift, regN, regD} = unsignedBitfieldMove32{immr=Word.~ shift mod 0w32, imms=0w32-0w1-shift, regN=regN, regD=regD} and logicalShiftRight{wordSize=WordSize64, shift, regN, regD} = unsignedBitfieldMove64{immr=shift, imms=0wx3f, regN=regN, regD=regD} | logicalShiftRight{wordSize=WordSize32, shift, regN, regD} = unsignedBitfieldMove32{immr=shift, imms=0wx1f, regN=regN, regD=regD} and unsignedBitfieldInsertinZeros{wordSize=WordSize64, lsb, width, regN, regD} = unsignedBitfieldMove64{immr=Word.~ lsb mod 0w64, imms=width-0w1, regN=regN, regD=regD} | unsignedBitfieldInsertinZeros{wordSize=WordSize32, lsb, width, regN, regD} = unsignedBitfieldMove32{immr=Word.~ lsb mod 0w32, imms=width-0w1, regN=regN, regD=regD} and arithmeticShiftRight{wordSize=WordSize64, shift, regN, regD} = signedBitfieldMove64{immr=shift, imms=0wx3f, regN=regN, regD=regD} | arithmeticShiftRight{wordSize=WordSize32, shift, regN, regD} = signedBitfieldMove32{immr=shift, imms=0wx1f, regN=regN, regD=regD} and bitfieldInsert{wordSize=WordSize64, lsb, width, regN, regD} = bitfieldMove64{immr=Word.~ lsb mod 0w64, imms=width-0w1, regN=regN, regD=regD} | bitfieldInsert{wordSize=WordSize32, lsb, width, regN, regD} = bitfieldMove32{immr=Word.~ lsb mod 0w32, imms=width-0w1, regN=regN, regD=regD} end local (* Logical immediates. AND, OR, XOR and ANDS. Assumes that the immediate value has already been checked as valid. The non-flags versions can use SP as the destination. *) fun logicalImmediate (opc, xD) {wordSize, bits, regN, regD} = let val s = case wordSize of WordSize32 => 0w0 | WordSize64 => 0w1 val {n, imms, immr} = case encodeBitPattern(bits, wordSize) of NONE => raise InternalError "testBitPattern: unable to encode bit pattern" | SOME res => res in SimpleInstr(0wx12000000 orb (opc << 0w29) orb (s << 0w31) orb (n << 0w22) orb - (immr << 0w16) orb (imms << 0w10) orb (word8ToWord(xRegOrXZ regN) << 0w5) orb - word8ToWord(xD regD)) + (wordToWord32 immr << 0w16) orb (wordToWord32 imms << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb + word8ToWord32(xD regD)) end in val bitwiseAndImmediate = logicalImmediate (0w0, xRegOrXSP) and bitwiseOrImmediate = logicalImmediate (0w1, xRegOrXSP) and bitwiseXorImmediate = logicalImmediate (0w2, xRegOrXSP) and bitwiseAndSImmediate = logicalImmediate (0w3, xRegOrXZ) (* Test a bit pattern in a register. If the pattern is within the low-order 32-bits we use a 32-bit test. *) fun testBitPattern(reg, bits) = let val w = if bits <= 0wxffffffff then WordSize32 else WordSize64 in bitwiseAndSImmediate({wordSize=w, bits=bits, regN=reg, regD=XZero}) end end local (* Floating point operations - 2 source *) fun floatingPoint2Source (pt, opc) {regM, regN, regD} = - SimpleInstr(0wx1E200800 orb (pt << 0w22) orb (word8ToWord(vReg regM) << 0w16) orb - (opc << 0w12) orb (word8ToWord(vReg regN) << 0w5) orb word8ToWord(vReg regD)) + SimpleInstr(0wx1E200800 orb (pt << 0w22) orb (word8ToWord32(vReg regM) << 0w16) orb + (opc << 0w12) orb (word8ToWord32(vReg regN) << 0w5) orb word8ToWord32(vReg regD)) in val multiplyFloat = floatingPoint2Source(0w0, 0wx0) and divideFloat = floatingPoint2Source(0w0, 0wx1) and addFloat = floatingPoint2Source(0w0, 0wx2) and subtractFloat = floatingPoint2Source(0w0, 0wx3) and multiplyDouble = floatingPoint2Source(0w1, 0wx0) and divideDouble = floatingPoint2Source(0w1, 0wx1) and addDouble = floatingPoint2Source(0w1, 0wx2) and subtractDouble = floatingPoint2Source(0w1, 0wx3) end local (* Move between a floating point and a general register with or without conversion. *) fun fmoveGeneral (sf, s, ptype, mode, opcode, rN, rD) {regN, regD} = SimpleInstr(0wx1E200000 orb (sf << 0w31) orb (s << 0w29) orb (ptype << 0w22) orb (mode << 0w19) orb (opcode << 0w16) orb - (word8ToWord(rN regN) << 0w5) orb word8ToWord(rD regD)) + (word8ToWord32(rN regN) << 0w5) orb word8ToWord32(rD regD)) open IEEEReal in (* Moves without conversion *) val moveGeneralToFloat = fmoveGeneral(0w0, 0w0, 0w0, 0w0, 0w7, xRegOrXZ, vReg) and moveFloatToGeneral = fmoveGeneral(0w0, 0w0, 0w0, 0w0, 0w6, vReg, xRegOnly) and moveGeneralToDouble = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w7, xRegOrXZ, vReg) and moveDoubleToGeneral = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w6, vReg, xRegOnly) (* Moves with conversion - signed. The argument is a 64-bit value. *) and convertIntToFloat = fmoveGeneral(0w1, 0w0, 0w0, 0w0, 0w2, xRegOrXZ, vReg) and convertIntToDouble = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w2, xRegOrXZ, vReg) fun convertFloatToInt TO_NEAREST = fmoveGeneral(0w1, 0w0, 0w0, 0w0, 0w4, vReg, xRegOnly) (* fcvtas *) | convertFloatToInt TO_NEGINF = fmoveGeneral(0w1, 0w0, 0w0, 0w2, 0w0, vReg, xRegOnly) (* fcvtms *) | convertFloatToInt TO_POSINF = fmoveGeneral(0w1, 0w0, 0w0, 0w1, 0w0, vReg, xRegOnly) (* fcvtps *) | convertFloatToInt TO_ZERO = fmoveGeneral(0w1, 0w0, 0w0, 0w3, 0w0, vReg, xRegOnly) (* fcvtzs *) and convertDoubleToInt TO_NEAREST = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w4, vReg, xRegOnly) (* fcvtas *) | convertDoubleToInt TO_NEGINF = fmoveGeneral(0w1, 0w0, 0w1, 0w2, 0w0, vReg, xRegOnly) (* fcvtms *) | convertDoubleToInt TO_POSINF = fmoveGeneral(0w1, 0w0, 0w1, 0w1, 0w0, vReg, xRegOnly) (* fcvtps *) | convertDoubleToInt TO_ZERO = fmoveGeneral(0w1, 0w0, 0w1, 0w3, 0w0, vReg, xRegOnly) (* fcvtzs *) end local fun floatingPtCompare(ptype, opc) {regM, regN} = SimpleInstr(0wx1E202000 orb (ptype << 0w22) orb - (word8ToWord(vReg regM) << 0w16) orb (word8ToWord(vReg regN) << 0w5) orb + (word8ToWord32(vReg regM) << 0w16) orb (word8ToWord32(vReg regN) << 0w5) orb (opc << 0w3)) in val compareFloat = floatingPtCompare(0w0, 0w0) (* fcmp *) and compareDouble = floatingPtCompare(0w1, 0w0) (* It is also possible to compare a single register with zero using opc=1/3 *) end local (* Floating point single source. *) fun floatingPtSingle (ptype, opc) {regN, regD} = SimpleInstr(0wx1E204000 orb (ptype << 0w22) orb (opc << 0w15) orb - (word8ToWord(vReg regN) << 0w5) orb word8ToWord(vReg regD)) + (word8ToWord32(vReg regN) << 0w5) orb word8ToWord32(vReg regD)) in val moveFloatToFloat = floatingPtSingle(0w0, 0wx0) and absFloat = floatingPtSingle(0w0, 0wx1) and negFloat = floatingPtSingle(0w0, 0wx2) and convertFloatToDouble = floatingPtSingle(0w0, 0wx5) and moveDoubleToDouble = floatingPtSingle(0w1, 0wx0) and absDouble = floatingPtSingle(0w1, 0wx1) and negDouble = floatingPtSingle(0w1, 0wx2) and convertDoubleToFloat = floatingPtSingle(0w1, 0wx4) end (* This word is put in after a call to the RTS trap-handler. All the registers are saved and restored across a call to the trap-handler; the register mask contains those that may contain an address and so need to be scanned and possibly updated if there is a GC. *) fun registerMask(regs) = let fun addToMask(r, mask) = mask orb (0w1 << word8ToWord(xRegOnly r)) val maskWord = List.foldl addToMask 0w0 regs in SimpleInstr(0wx02000000 (* Reserved instr range. *) orb maskWord) end (* Size of each code word. *) fun codeSize (SimpleInstr _) = 1 (* Number of 32-bit words *) | codeSize (LoadAddressLiteral _) = 1 | codeSize (LoadNonAddressLiteral _) = 1 | codeSize (Label _) = 0 | codeSize (UnconditionalBranch _) = 1 | codeSize (LoadLabelAddress _) = 1 | codeSize (ConditionalBranch { length=ref BrShort, ...}) = 1 | codeSize (ConditionalBranch { length=ref BrExtended, ...}) = 2 | codeSize (TestBitBranch { length=ref BrShort, ...}) = 1 | codeSize (TestBitBranch { length=ref BrExtended, ...}) = 2 | codeSize (CompareBranch { length=ref BrShort, ...}) = 1 | codeSize (CompareBranch { length=ref BrExtended, ...}) = 2 (* Store a 32-bit value in the code *) fun writeInstr(value, wordAddr, seg) = let fun putBytes(value, a, seg, i) = if i = 0w4 then () else ( - byteVecSet(seg, a+i, wordToWord8(value andb 0wxff)); + byteVecSet(seg, a+i, word32ToWord8(value andb 0wxff)); putBytes(value >> 0w8, a, seg, i+0w1) ) in - putBytes(value, wordAddr << 0w2, seg, 0w0) + putBytes(value, Word.<<(wordAddr, 0w2), seg, 0w0) end (* Store a 64-bit constant in the code area. *) fun write64Bit(value, word64Addr, seg) = let fun putBytes(value, a, seg, i) = if i = 0w8 then () else ( byteVecSet(seg, a+i, Word8.fromLarge(Word64.toLarge value)); putBytes(Word64.>>(value, 0w8), a, seg, i+0w1) ) in - putBytes(value, word64Addr << 0w3, seg, 0w0) + putBytes(value, Word.<<(word64Addr, 0w3), seg, 0w0) end (* Set the sizes of branches depending on the distance to the destination. *) fun setLabelsAndSizes ops = let (* Set the labels and get the current size of the code. *) fun setLabels(Label(ref labs) :: ops, ic) = (List.app(fn d => d := ic) labs; setLabels(ops, ic)) | setLabels(oper :: ops, ic) = setLabels(ops, ic + Word.fromInt(codeSize oper)) | setLabels([], ic) = ic (* Set the labels and adjust the sizes, repeating until it never gets smaller *) fun setLabAndSize(ops, lastSize) = let (* See if we can shorten any branches. The "addr" is the original address since that's what we've used to set the labels. *) fun adjust([], _) = () | adjust(ConditionalBranch { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt addr in - if offset < Word.toInt(0w1 << 0w18) andalso offset >= ~ (Word.toInt(0w1 << 0w18)) + if offset < Word32.toInt(0w1 << 0w18) andalso offset >= ~ (Word32.toInt(0w1 << 0w18)) then length := BrShort else (); adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *) end | adjust(TestBitBranch { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt addr in if offset < 0x2000 andalso offset >= ~ 0x2000 then length := BrShort else (); adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *) end | adjust(CompareBranch { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt addr in if offset < 0x40000 andalso offset >= ~ 0x40000 then length := BrShort else (); adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *) end | adjust(instr :: instrs, addr) = adjust(instrs, addr + Word.fromInt(codeSize instr)) val () = adjust(ops, 0w0) val nextSize = setLabels(ops, 0w0) in if nextSize < lastSize then setLabAndSize(ops, nextSize) else if nextSize = lastSize then lastSize else raise InternalError "setLabAndSize - size increased" end in setLabAndSize(ops, setLabels(ops, 0w0)) end fun genCode(ops, addressConsts, nonAddressConsts) = let val codeSize = setLabelsAndSizes ops (* Number of 32-bit instructions *) val wordsOfCode = (codeSize + 0w1) div 0w2 (* Round up to 64-bits *) val paddingWord = if Word.andb(codeSize, 0w1) = 0w1 then [SimpleInstr nopCode] else [] val numNonAddrConsts = Word.fromInt(List.length nonAddressConsts) and numAddrConsts = Word.fromInt(List.length addressConsts) val segSize = wordsOfCode + numAddrConsts + numNonAddrConsts + 0w4 (* 4 extra words *) val codeVec = byteVecMake segSize fun testBit(bitNo, brNonZero, offset, reg) = 0wx36000000 orb (if bitNo >= 0w32 then 0wx80000000 else 0w0) orb (if brNonZero then 0wx01000000 else 0w0) orb - (word8ToWord(Word8.andb(bitNo, 0wx3f)) << 0w19) orb - ((offset andb 0wx3fff) << 0w5) orb word8ToWord(xRegOnly reg) + (word8ToWord32(Word8.andb(bitNo, 0wx3f)) << 0w19) orb + ((offset andb 0wx3fff) << 0w5) orb word8ToWord32(xRegOnly reg) and compareBranch(size, brNonZero, offset, reg) = 0wx34000000 orb (case size of WordSize64 => 0wx80000000 | WordSize32 => 0w0) orb (if brNonZero then 0wx01000000 else 0w0) orb - ((offset andb 0wx7ffff) << 0w5) orb word8ToWord(xRegOnly reg) + ((offset andb 0wx7ffff) << 0w5) orb word8ToWord32(xRegOnly reg) fun genCodeWords([], _ , _, _) = () | genCodeWords(SimpleInstr code :: tail, wordNo, aConstNum, nonAConstNum) = ( writeInstr(code, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum) ) | genCodeWords(LoadAddressLiteral{reg, ...} :: tail, wordNo, aConstNum, nonAConstNum) = let (* The offset is in 32-bit words. The first of the constants is at offset wordsOfCode+3 *) val offsetOfConstant = (wordsOfCode+numNonAddrConsts+0w3+aConstNum)*0w2 - wordNo val _ = offsetOfConstant < 0wx100000 orelse raise InternalError "Offset to constant is too large" - val code = 0wx58000000 orb (offsetOfConstant << 0w5) orb word8ToWord(xRegOnly reg) + val code = 0wx58000000 orb (wordToWord32 offsetOfConstant << 0w5) orb word8ToWord32(xRegOnly reg) in writeInstr(code, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum+0w1, nonAConstNum) end | genCodeWords(LoadNonAddressLiteral{reg, ...} :: tail, wordNo, aConstNum, nonAConstNum) = let (* The offset is in 32-bit words. *) val offsetOfConstant = (wordsOfCode+nonAConstNum)*0w2 - wordNo val _ = offsetOfConstant < 0wx100000 orelse raise InternalError "Offset to constant is too large" - val code = 0wx58000000 orb (offsetOfConstant << 0w5) orb word8ToWord(xRegOnly reg) + val code = 0wx58000000 orb (wordToWord32 offsetOfConstant << 0w5) orb word8ToWord32(xRegOnly reg) in writeInstr(code, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum+0w1) end | genCodeWords(Label _ :: tail, wordNo, aConstNum, nonAConstNum) = genCodeWords(tail, wordNo, aConstNum, nonAConstNum) (* No code. *) | genCodeWords(UnconditionalBranch(ref labs) :: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt wordNo - val _ = (offset < Word.toInt(0w1 << 0w25) andalso offset >= ~ (Word.toInt(0w1 << 0w25))) + val _ = (offset < Word32.toInt(0w1 << 0w25) andalso offset >= ~ (Word32.toInt(0w1 << 0w25))) orelse raise InternalError "genCodeWords: branch too far"; in - writeInstr(0wx14000000 orb (Word.fromInt offset andb 0wx03ffffff), wordNo, codeVec); + writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum) end | genCodeWords(ConditionalBranch{ label=ref labs, jumpCondition=CCode cond, length=ref BrShort }:: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt wordNo - val _ = (offset < Word.toInt(0w1 << 0w18) andalso offset >= ~ (Word.toInt(0w1 << 0w18))) + val _ = (offset < Word32.toInt(0w1 << 0w18) andalso offset >= ~ (Word32.toInt(0w1 << 0w18))) orelse raise InternalError "genCodeWords: branch too far" in - writeInstr(0wx54000000 orb ((Word.fromInt offset andb 0wx07ffff) << 0w5) - orb word8ToWord cond, wordNo, codeVec); + writeInstr(0wx54000000 orb ((Word32.fromInt offset andb 0wx07ffff) << 0w5) + orb word8ToWord32 cond, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum) end | genCodeWords(ConditionalBranch{ label=ref labs, jumpCondition=CCode cond, length=ref BrExtended }:: tail, wordNo, aConstNum, nonAConstNum) = let (* Long form - put a conditional branch with reversed sense round an unconditional branch. *) val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt (wordNo + 0w1) (* Next instruction. *) - val _ = (offset < Word.toInt(0w1 << 0w25) andalso offset >= ~ (Word.toInt(0w1 << 0w25))) + val _ = (offset < Word32.toInt(0w1 << 0w25) andalso offset >= ~ (Word32.toInt(0w1 << 0w25))) orelse raise InternalError "genCodeWords: branch too far" val revCond = Word8.xorb(cond, 0w1) in - writeInstr(0wx54000000 orb (0w2 << 0w5) orb word8ToWord revCond, wordNo, codeVec); - writeInstr(0wx14000000 orb (Word.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec); + writeInstr(0wx54000000 orb (0w2 << 0w5) orb word8ToWord32 revCond, wordNo, codeVec); + writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec); genCodeWords(tail, wordNo+0w2, aConstNum, nonAConstNum) end | genCodeWords(LoadLabelAddress{label=ref labs, reg} :: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt wordNo val _ = offset < 0x100000 orelse offset >= ~ 0x100000 orelse raise InternalError "Offset to label address is too large" - val code = 0wx10000000 orb ((Word.fromInt offset andb 0wx7ffff) << 0w5) orb word8ToWord(xRegOnly reg) + val code = 0wx10000000 orb ((Word32.fromInt offset andb 0wx7ffff) << 0w5) orb word8ToWord32(xRegOnly reg) in writeInstr(code, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum) end | genCodeWords(TestBitBranch{label=ref labs, bitNo, brNonZero, reg, length=ref BrExtended} :: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt (wordNo + 0w1) (* Next instruction *) - val _ = (offset < Word.toInt(0w1 << 0w25) andalso offset >= ~ (Word.toInt(0w1 << 0w25))) + val _ = (offset < Word32.toInt(0w1 << 0w25) andalso offset >= ~ (Word32.toInt(0w1 << 0w25))) orelse raise InternalError "genCodeWords: branch too far" val _ = bitNo <= 0w63 orelse raise InternalError "TestBitBranch: bit number > 63" val code = testBit(bitNo, (* Invert test *) not brNonZero, 0w2 (* Skip branch *), reg) in writeInstr(code, wordNo, codeVec); - writeInstr(0wx14000000 orb (Word.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec); + writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec); genCodeWords(tail, wordNo+0w2, aConstNum, nonAConstNum) end | genCodeWords(TestBitBranch{label=ref labs, bitNo, brNonZero, reg, length=ref BrShort} :: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt wordNo val _ = (offset < 0x2000 andalso offset >= ~ 0x2000) orelse raise InternalError "TestBitBranch: Offset to label address is too large" val _ = bitNo <= 0w63 orelse raise InternalError "TestBitBranch: bit number > 63" - val code = testBit(bitNo, brNonZero, Word.fromInt offset, reg) + val code = testBit(bitNo, brNonZero, Word32.fromInt offset, reg) in writeInstr(code, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum) end | genCodeWords(CompareBranch{label=ref labs, brNonZero, size, reg, length=ref BrExtended} :: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt (wordNo+0w1) - val _ = (offset < Word.toInt(0w1 << 0w25) andalso offset >= ~ (Word.toInt(0w1 << 0w25))) + val _ = (offset < Word32.toInt(0w1 << 0w25) andalso offset >= ~ (Word32.toInt(0w1 << 0w25))) orelse raise InternalError "genCodeWords: branch too far" val code = compareBranch(size, (* Invert test *) not brNonZero, 0w2, reg) in writeInstr(code, wordNo, codeVec); - writeInstr(0wx14000000 orb (Word.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec); + writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec); genCodeWords(tail, wordNo+0w2, aConstNum, nonAConstNum) end | genCodeWords(CompareBranch{label=ref labs, brNonZero, size, reg, length=ref BrShort} :: tail, wordNo, aConstNum, nonAConstNum) = let val dest = !(hd labs) val offset = Word.toInt dest - Word.toInt wordNo val _ = (offset < 0x40000 andalso offset >= ~ 0x40000) orelse raise InternalError "CompareBranch: Offset to label address is too large" - val code = compareBranch(size, brNonZero, Word.fromInt offset, reg) + val code = compareBranch(size, brNonZero, Word32.fromInt offset, reg) in writeInstr(code, wordNo, codeVec); genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum) end in genCodeWords (ops @ paddingWord, 0w0, 0w0, 0w0); (* Copy in the non-address constants. *) List.foldl(fn (cVal, addr) => (write64Bit(cVal, addr, codeVec); addr+0w1)) wordsOfCode nonAddressConsts; (codeVec (* Return the completed code. *), wordsOfCode+numNonAddrConsts (* And the size in 64-bit words. *)) end (* Store a 64-bit value in the code *) fun set64(value, wordNo, seg) = let val addrs = wordNo * 0w8 fun putBytes(value, a, seg, i) = if i = 0w8 then () else ( byteVecSet(seg, a+i, Word8.fromInt(value mod 256)); putBytes(value div 256, a, seg, i+0w1) ) in putBytes(value, addrs, seg, 0w0) end (* Print the instructions in the code. *) fun printCode (codeVec, functionName, wordsOfCode, printStream) = let val numInstructions = wordsOfCode * 0w2 (* Words is number of 64-bit words *) fun printHex (v, n) = let val s = Word.fmt StringCvt.HEX v val pad = CharVector.tabulate(Int.max(0, n-size s), fn _ => #"0") in printStream pad; printStream s end fun printCondition 0wx0 = printStream "eq" | printCondition 0wx1 = printStream "ne" | printCondition 0wx2 = printStream "cs" | printCondition 0wx3 = printStream "cc" | printCondition 0wx4 = printStream "mi" | printCondition 0wx5 = printStream "pl" | printCondition 0wx6 = printStream "vs" | printCondition 0wx7 = printStream "vc" | printCondition 0wx8 = printStream "hi" | printCondition 0wx9 = printStream "ls" | printCondition 0wxa = printStream "ge" | printCondition 0wxb = printStream "lt" | printCondition 0wxc = printStream "gt" | printCondition 0wxd = printStream "le" | printCondition 0wxe = printStream "al" | printCondition _ = printStream "nv" (* Normal XReg with 31 being XZ *) fun prXReg 0w31 = printStream "xz" - | prXReg r = printStream("x" ^ Word.fmt StringCvt.DEC r) + | prXReg r = printStream("x" ^ Word32.fmt StringCvt.DEC r) (* XReg when 31 is SP *) fun prXRegOrSP 0w31 = printStream "sp" - | prXRegOrSP r = printStream("x" ^ Word.fmt StringCvt.DEC r) + | prXRegOrSP r = printStream("x" ^ Word32.fmt StringCvt.DEC r) (* Normal WReg with 31 being WZ *) fun prWReg 0w31 = printStream "wz" - | prWReg r = printStream("w" ^ Word.fmt StringCvt.DEC r) + | prWReg r = printStream("w" ^ Word32.fmt StringCvt.DEC r) (* WReg when 31 is WSP *) fun prWRegOrSP 0w31 = printStream "wsp" - | prWRegOrSP r = printStream("w" ^ Word.fmt StringCvt.DEC r) + | prWRegOrSP r = printStream("w" ^ Word32.fmt StringCvt.DEC r) (* Each instruction is 32-bytes. *) fun printWordAt wordNo = let - val byteNo = wordNo << 0w2 + val byteNo = Word.<<(wordNo, 0w2) val () = printHex(byteNo, 6) (* Address *) val () = printStream "\t" val wordValue = - word8ToWord (codeVecGet (codeVec, byteNo)) orb - (word8ToWord (codeVecGet (codeVec, byteNo+0w1)) << 0w8) orb - (word8ToWord (codeVecGet (codeVec, byteNo+0w2)) << 0w16) orb - (word8ToWord (codeVecGet (codeVec, byteNo+0w3)) << 0w24) - val () = printHex(wordValue, 8) (* Instr as hex *) + word8ToWord32 (codeVecGet (codeVec, byteNo)) orb + (word8ToWord32 (codeVecGet (codeVec, byteNo+0w1)) << 0w8) orb + (word8ToWord32 (codeVecGet (codeVec, byteNo+0w2)) << 0w16) orb + (word8ToWord32 (codeVecGet (codeVec, byteNo+0w3)) << 0w24) + val () = printHex(word32ToWord wordValue, 8) (* Instr as hex *) val () = printStream "\t" in if (wordValue andb 0wxfffffc1f) = 0wxD61F0000 then let val rN = (wordValue andb 0wx3e0) >> 0w5 in printStream "br\tx"; - printStream(Word.fmt StringCvt.DEC rN) + printStream(Word32.fmt StringCvt.DEC rN) end else if (wordValue andb 0wxfffffc1f) = 0wxD63F0000 then let val rN = (wordValue andb 0wx3e0) >> 0w5 in printStream "blr\tx"; - printStream(Word.fmt StringCvt.DEC rN) + printStream(Word32.fmt StringCvt.DEC rN) end else if (wordValue andb 0wxfffffc1f) = 0wxD65F0000 then let val rN = (wordValue andb 0wx3e0) >> 0w5 in printStream "ret\tx"; - printStream(Word.fmt StringCvt.DEC rN) + printStream(Word32.fmt StringCvt.DEC rN) end else if wordValue = 0wxD503201F then printStream "nop" else if wordValue = 0wxD503203F then printStream "yield" else if wordValue = 0wxD5033BBF then printStream "dmb\tish" else if (wordValue andb 0wx1f800000) = 0wx12800000 then (* Move of constants. Includes movn and movk. *) let val rD = wordValue andb 0wx1f - val imm16 = Word.toInt((wordValue >> 0w5) andb 0wxffff) + val imm16 = Word32.toInt((wordValue >> 0w5) andb 0wxffff) val isXReg = (wordValue andb 0wx80000000) <> 0w0 val opc = (wordValue >> 0w29) andb 0w3 val shift = (wordValue >> 0w21) andb 0w3 in printStream (if opc = 0w3 then "movk\t" else "mov\t"); printStream (if isXReg then "x" else "w"); - printStream(Word.fmt StringCvt.DEC rD); + printStream(Word32.fmt StringCvt.DEC rD); printStream ",#"; printStream(Int.toString(if opc = 0w0 then ~1 - imm16 else imm16)); if shift = 0w0 then () - else (printStream ",lsl #"; printStream(Word.fmt StringCvt.DEC (shift*0w16))) + else (printStream ",lsl #"; printStream(Word32.fmt StringCvt.DEC (shift*0w16))) end else if (wordValue andb 0wx3b000000) = 0wx39000000 then (* Load/Store with unsigned, scaled offset. *) let (* The offset is in units of the size of the operand. *) val size = wordValue >> 0w30 and v = (wordValue >> 0w26) andb 0w1 and opc = (wordValue >> 0w22) andb 0w3 val rT = wordValue andb 0wx1f and rN = (wordValue andb 0wx3e0) >> 0w5 and imm12 = (wordValue andb 0wx3ffc00) >> 0w10 val (opcode, r, scale) = case (size, v, opc) of (0w0, 0w0, 0w0) => ("strb", "w", 0w0) | (0w0, 0w0, 0w1) => ("ldrb", "w", 0w0) | (0w1, 0w0, 0w0) => ("strh", "w", 0w2) | (0w1, 0w0, 0w1) => ("ldrh", "w", 0w2) | (0w2, 0w0, 0w0) => ("str", "w", 0w4) | (0w2, 0w0, 0w1) => ("ldr", "w", 0w4) | (0w3, 0w0, 0w0) => ("str", "x", 0w8) | (0w3, 0w0, 0w1) => ("ldr", "x", 0w8) | (0w2, 0w1, 0w0) => ("str", "s", 0w4) | (0w2, 0w1, 0w1) => ("ldr", "s", 0w4) | (0w3, 0w1, 0w0) => ("str", "d", 0w8) | (0w3, 0w1, 0w1) => ("ldr", "d", 0w8) | _ => ("??", "?", 0w1) in - printStream opcode; printStream "\t"; printStream r; printStream(Word.fmt StringCvt.DEC rT); - printStream ",[x"; printStream(Word.fmt StringCvt.DEC rN); - printStream ",#"; printStream(Word.fmt StringCvt.DEC(imm12*scale)); + printStream opcode; printStream "\t"; printStream r; printStream(Word32.fmt StringCvt.DEC rT); + printStream ",[x"; printStream(Word32.fmt StringCvt.DEC rN); + printStream ",#"; printStream(Word32.fmt StringCvt.DEC(imm12*scale)); printStream "]" end else if (wordValue andb 0wx3b200c00) = 0wx38000000 then (* Load/store unscaled immediate *) let val size = wordValue >> 0w30 and v = (wordValue >> 0w26) andb 0w1 and opc = (wordValue >> 0w22) andb 0w3 val rT = wordValue andb 0wx1f and rN = (wordValue andb 0wx3e0) >> 0w5 and imm9 = (wordValue andb 0wx1ff000) >> 0w12 val imm9Text = if imm9 > 0wxff - then "-" ^ Word.fmt StringCvt.DEC (0wx200 - imm9) - else Word.fmt StringCvt.DEC imm9 + then "-" ^ Word32.fmt StringCvt.DEC (0wx200 - imm9) + else Word32.fmt StringCvt.DEC imm9 val (opcode, r) = case (size, v, opc) of (0w0, 0w0, 0w0) => ("strub", "w") | (0w0, 0w0, 0w1) => ("ldrub", "w") | (0w1, 0w0, 0w0) => ("struh", "w") | (0w1, 0w0, 0w1) => ("ldruh", "w") | (0w2, 0w0, 0w0) => ("stur", "w") | (0w2, 0w0, 0w1) => ("ldur", "w") | (0w3, 0w0, 0w0) => ("stur", "x") | (0w3, 0w0, 0w1) => ("ldur", "x") | (0w2, 0w1, 0w0) => ("stur", "s") | (0w2, 0w1, 0w1) => ("ldur", "s") | (0w3, 0w1, 0w0) => ("stur", "d") | (0w3, 0w1, 0w1) => ("ldur", "d") | _ => ("???", "?") in printStream opcode; printStream "\t"; printStream r; - printStream(Word.fmt StringCvt.DEC rT); - printStream ",[x"; printStream(Word.fmt StringCvt.DEC rN); + printStream(Word32.fmt StringCvt.DEC rT); + printStream ",[x"; printStream(Word32.fmt StringCvt.DEC rN); printStream ",#"; printStream imm9Text; printStream "]" end else if (wordValue andb 0wx3b200c00) = 0wx38000400 then (* Load/store immediate post-indexed *) let val size = wordValue >> 0w30 and v = (wordValue >> 0w26) andb 0w1 and opc = (wordValue >> 0w22) andb 0w3 val rT = wordValue andb 0wx1f and rN = (wordValue andb 0wx3e0) >> 0w5 and imm9 = (wordValue andb 0wx1ff000) >> 0w12 val imm9Text = if imm9 > 0wxff - then "-" ^ Word.fmt StringCvt.DEC (0wx200 - imm9) - else Word.fmt StringCvt.DEC imm9 + then "-" ^ Word32.fmt StringCvt.DEC (0wx200 - imm9) + else Word32.fmt StringCvt.DEC imm9 val (opcode, r) = case (size, v, opc) of (0w0, 0w0, 0w0) => ("strb", "w") | (0w0, 0w0, 0w1) => ("ldrb", "w") | (0w3, 0w0, 0w0) => ("str", "x") | (0w3, 0w0, 0w1) => ("ldr", "x") | _ => ("???", "?") in printStream opcode; printStream "\t"; printStream r; - printStream(Word.fmt StringCvt.DEC rT); - printStream ",[x"; printStream(Word.fmt StringCvt.DEC rN); + printStream(Word32.fmt StringCvt.DEC rT); + printStream ",[x"; printStream(Word32.fmt StringCvt.DEC rN); printStream "],#"; printStream imm9Text end else if (wordValue andb 0wx3b200c00) = 0wx38000c00 then (* Load/store immediate pre-indexed *) let val size = wordValue >> 0w30 and v = (wordValue >> 0w26) andb 0w1 and opc = (wordValue >> 0w22) andb 0w3 val rT = wordValue andb 0wx1f and rN = (wordValue andb 0wx3e0) >> 0w5 and imm9 = (wordValue andb 0wx1ff000) >> 0w12 val imm9Text = if imm9 > 0wxff - then "-" ^ Word.fmt StringCvt.DEC (0wx200 - imm9) - else Word.fmt StringCvt.DEC imm9 + then "-" ^ Word32.fmt StringCvt.DEC (0wx200 - imm9) + else Word32.fmt StringCvt.DEC imm9 val (opcode, r) = case (size, v, opc) of (0w0, 0w0, 0w0) => ("strb", "w") | (0w0, 0w0, 0w1) => ("ldrb", "w") | (0w3, 0w0, 0w0) => ("str", "x") | (0w3, 0w0, 0w1) => ("ldr", "x") | _ => ("???", "?") in printStream opcode; printStream "\t"; printStream r; - printStream(Word.fmt StringCvt.DEC rT); - printStream ",[x"; printStream(Word.fmt StringCvt.DEC rN); + printStream(Word32.fmt StringCvt.DEC rT); + printStream ",[x"; printStream(Word32.fmt StringCvt.DEC rN); printStream ",#"; printStream imm9Text; printStream "]!" end else if (wordValue andb 0wx3b200c00) = 0wx38200800 then (* Load/store with register offset i.e. an index register. *) let val size = wordValue >> 0w30 and v = (wordValue >> 0w26) andb 0w1 and opc = (wordValue >> 0w22) andb 0w3 val rT = wordValue andb 0wx1f and rN = (wordValue >> 0w5) andb 0wx1f and rM = (wordValue >> 0w16) andb 0wx1f val option = (wordValue >> 0w13) andb 0w7 val s = (wordValue andb 0wx1000) <> 0w0 val (opcode, r) = case (size, v, opc) of (0w0, 0w0, 0w0) => ("strb", "w") | (0w0, 0w0, 0w1) => ("ldrb", "w") | (0w1, 0w0, 0w0) => ("strh", "w") | (0w1, 0w0, 0w1) => ("ldrh", "w") | (0w2, 0w0, 0w0) => ("str", "w") | (0w2, 0w0, 0w1) => ("ldr", "w") | (0w3, 0w0, 0w0) => ("str", "x") | (0w3, 0w0, 0w1) => ("ldr", "x") | (0w2, 0w1, 0w0) => ("str", "s") | (0w2, 0w1, 0w1) => ("ldr", "s") | (0w3, 0w1, 0w0) => ("str", "d") | (0w3, 0w1, 0w1) => ("ldr", "d") | _ => ("???", "?") val (extend, xr) = case option of 0w2 => (" uxtw", "w") | 0w3 => if s then (" lsl", "x") else ("", "x") | 0w6 => (" sxtw", "w") | 0w7 => (" sxtx", "x") | _ => ("?", "?") val indexShift = case (size, s) of (0w0, true) => " #1" | (0w1, true) => " #1" | (0w2, true) => " #2" | (0w3, true) => " #3" | _ => "" in printStream opcode; printStream "\t"; printStream r; - printStream(Word.fmt StringCvt.DEC rT); - printStream ",[x"; printStream(Word.fmt StringCvt.DEC rN); - printStream ","; printStream xr; printStream(Word.fmt StringCvt.DEC rM); + printStream(Word32.fmt StringCvt.DEC rT); + printStream ",[x"; printStream(Word32.fmt StringCvt.DEC rN); + printStream ","; printStream xr; printStream(Word32.fmt StringCvt.DEC rM); printStream extend; printStream indexShift; printStream "]" end else if (wordValue andb 0wx3f000000) = 0wx08000000 then (* Loads and stores with special ordering. *) let val size = (wordValue >> 0w30) andb 0w3 and o2 = (wordValue >> 0w23) andb 0w1 and l = (wordValue >> 0w22) andb 0w1 and o1 = (wordValue >> 0w21) andb 0w1 and o0 = (wordValue >> 0w15) andb 0w1 val rT = wordValue andb 0wx1f and rN = (wordValue >> 0w5) andb 0wx1f and rS = (wordValue >> 0w16) andb 0wx1f val (opcode, r) = case (size, o2, l, o1, o0) of (0w3, 0w1, 0w1, 0w0, 0w1) => ("ldar", "x") | (0w3, 0w1, 0w0, 0w0, 0w1) => ("stlr", "x") | (0w3, 0w0, 0w1, 0w0, 0w1) => ("ldaxr", "x") | (0w3, 0w0, 0w0, 0w0, 0w1) => ("stlxr", "x") | _ => ("??", "?") in printStream opcode; printStream "\t"; if opcode = "stlxr" - then (printStream "w"; printStream(Word.fmt StringCvt.DEC rS); printStream ",") + then (printStream "w"; printStream(Word32.fmt StringCvt.DEC rS); printStream ",") else (); printStream r; - printStream(Word.fmt StringCvt.DEC rT); - printStream ",[x"; printStream(Word.fmt StringCvt.DEC rN); printStream "]" + printStream(Word32.fmt StringCvt.DEC rT); + printStream ",[x"; printStream(Word32.fmt StringCvt.DEC rN); printStream "]" end else if (wordValue andb 0wxbf800000) = 0wx91000000 then let (* Add/Subtract a 12-bit immediate with possible shift. *) val rD = wordValue andb 0wx1f and rN = (wordValue andb 0wx3e0) >> 0w5 and imm12 = (wordValue andb 0wx3ffc00) >> 0w10 and shiftBit = wordValue andb 0wx400000 val imm = if shiftBit <> 0w0 then imm12 << 0w12 else imm12 val opr = if (wordValue andb 0wx40000000) = 0w0 then "add" else "sub" in printStream opr; printStream "\t"; prXRegOrSP rD; printStream ","; prXRegOrSP rN; - printStream ",#"; printStream(Word.fmt StringCvt.DEC imm) + printStream ",#"; printStream(Word32.fmt StringCvt.DEC imm) end else if (wordValue andb 0wxff800000) = 0wxF1000000 then let (* Subtract a 12-bit immediate with possible shift, setting flags. *) val rD = wordValue andb 0wx1f and rN = (wordValue andb 0wx3e0) >> 0w5 and imm12 = (wordValue andb 0wx3ffc00) >> 0w10 and shiftBit = wordValue andb 0wx400000 val imm = if shiftBit <> 0w0 then imm12 << 0w12 else imm12 in if rD = 0w31 then printStream "cmp\t" else (printStream "subs\t"; prXReg rD; printStream ","); - prXRegOrSP rN; printStream ",#"; printStream(Word.fmt StringCvt.DEC imm) + prXRegOrSP rN; printStream ",#"; printStream(Word32.fmt StringCvt.DEC imm) end else if (wordValue andb 0wx7fe0ffe0) = 0wx2A0003E0 then (* Move reg,reg. This is a subset of ORR shifted register. *) let val reg = if (wordValue andb 0wx80000000) <> 0w0 then "x" else "w" in printStream "mov\t"; printStream reg; - printStream(Word.fmt StringCvt.DEC(wordValue andb 0wx1f)); + printStream(Word32.fmt StringCvt.DEC(wordValue andb 0wx1f)); printStream ","; printStream reg; - printStream(Word.fmt StringCvt.DEC((wordValue >> 0w16) andb 0wx1f)) + printStream(Word32.fmt StringCvt.DEC((wordValue >> 0w16) andb 0wx1f)) end else if (wordValue andb 0wx1f000000) = 0wx0A000000 then let (* Logical operations with shifted register. *) val rD = wordValue andb 0wx1f and rN = (wordValue >> 0w5) andb 0wx1f and rM = (wordValue >> 0w16) andb 0wx1f and imm6 = (wordValue >> 0w10) andb 0wx3f and shiftCode = (wordValue >> 0w22) andb 0wx3 val opc = (wordValue >> 0w29) andb 0wx3 val nBit = (wordValue >> 0w21) andb 0w1 val reg = if (wordValue andb 0wx80000000) <> 0w0 then "x" else "w" val opcode = case (opc, nBit) of (0w0, 0w0) => "and" | (0w1, 0w0) => "orr" | (0w2, 0w0) => "eor" | (0w3, 0w0) => "ands" | _ => "??" in printStream opcode; printStream"\t"; printStream reg; - printStream(Word.fmt StringCvt.DEC rD); printStream ","; - printStream reg; printStream(Word.fmt StringCvt.DEC rN); - printStream ","; printStream reg; printStream(Word.fmt StringCvt.DEC rM); + printStream(Word32.fmt StringCvt.DEC rD); printStream ","; + printStream reg; printStream(Word32.fmt StringCvt.DEC rN); + printStream ","; printStream reg; printStream(Word32.fmt StringCvt.DEC rM); if imm6 <> 0w0 then ( case shiftCode of 0w0 => printStream ",lsl #" | 0w1 => printStream ",lsr #" | 0w2 => printStream ",asr #" | _ => printStream ",?? #"; - printStream(Word.fmt StringCvt.DEC imm6) + printStream(Word32.fmt StringCvt.DEC imm6) ) else () end else if (wordValue andb 0wx1f200000) = 0wx0B000000 then let (* Add/subtract shifted register. *) val rD = wordValue andb 0wx1f and rN = (wordValue >> 0w5) andb 0wx1f and rM = (wordValue >> 0w16) andb 0wx1f and imm6 = (wordValue >> 0w10) andb 0wx3f and shiftCode = (wordValue >> 0w22) andb 0wx3 val oper = (wordValue andb 0wx40000000) = 0w0 val isS = (wordValue andb 0wx20000000) <> 0w0 val pReg = if (wordValue andb 0wx80000000) <> 0w0 then prXReg else prWReg in if isS andalso rD = 0w31 then printStream(if oper then "cmn\t" else "cmp\t") else ( printStream(if oper then "add" else "sub"); printStream(if isS then "s\t" else "\t"); pReg rD; printStream "," ); pReg rN; printStream ","; pReg rM; if imm6 <> 0w0 then ( case shiftCode of 0w0 => printStream ",lsl #" | 0w1 => printStream ",lsr #" | 0w2 => printStream ",asr #" | _ => printStream ",?? #"; - printStream(Word.fmt StringCvt.DEC imm6) + printStream(Word32.fmt StringCvt.DEC imm6) ) else () end else if (wordValue andb 0wx1fe00000) = 0wx0b200000 then let (* Add/subtract extended register. *) val rD = wordValue andb 0wx1f and rN = (wordValue >> 0w5) andb 0wx1f and rM = (wordValue >> 0w16) andb 0wx1f and extend = (wordValue >> 0w13) andb 0w7 and amount = (wordValue >> 0w10) andb 0w7 and sf = (wordValue >> 0w31) andb 0w1 and p = (wordValue >> 0w30) andb 0w1 and s = (wordValue >> 0w29) andb 0w1 in if s = 0w1 andalso rD = 0w31 then printStream(if p = 0w0 then "cmn\t" else "cmp\t") else ( printStream(if p = 0w0 then "add" else "sub"); printStream(if s = 0w1 then "s\t" else "\t"); (if sf = 0w1 then prXRegOrSP else prWRegOrSP) rD; printStream "," ); (if sf = 0w1 then prXRegOrSP else prWRegOrSP) rN; printStream ","; (if extend = 0w3 orelse extend = 0w7 then prXReg else prWReg) rM; case extend of 0w0 => printStream ",uxtb" | 0w1 => printStream ",uxth" | 0w2 => if amount = 0w0 andalso sf = 0w0 then () else printStream ",uxtw" | 0w3 => if amount = 0w0 andalso sf = 0w1 then () else printStream ",uxtx" | 0w4 => printStream ",sxtb" | 0w5 => printStream ",sxth" | 0w6 => printStream ",sxtw" | 0w7 => printStream ",sxtx" | _ => printStream "?"; if amount <> 0w0 - then printStream(" #" ^ Word.fmt StringCvt.DEC amount) + then printStream(" #" ^ Word32.fmt StringCvt.DEC amount) else () end else if (wordValue andb 0wxff000000) = 0wx58000000 then let (* Load from a PC-relative address. This may refer to the address constant area or the non-address constant area. *) val rT = wordValue andb 0wx1f (* The offset is in 32-bit words *) - val byteAddr = ((wordValue andb 0wx00ffffe0) >> (0w5-0w2)) + byteNo - val word64Addr = byteAddr >> 0w3 + val byteAddr = word32ToWord(((wordValue andb 0wx00ffffe0) >> (0w5-0w2))) + byteNo + val word64Addr = Word.>>(byteAddr, 0w3) (* We must NOT use codeVecGetWord if this is in the non-address area. It may well not be a tagged value. *) local fun getConstant(cVal, 0w0) = cVal | getConstant(cVal, offset) = let val byteVal = Word64.fromLarge(Word8.toLarge(codeVecGet (codeVec, byteAddr+offset-0w1))) in getConstant(Word64.orb(Word64.<<(cVal, 0w8), byteVal), offset-0w1) end in val constantValue = if word64Addr <= wordsOfCode then "0x" ^ Word64.toString(getConstant(0w0, 0w8)) (* It's a non-address constant *) else stringOfWord(codeVecGetWord(codeVec, word64Addr)) end in - printStream "ldr\tx"; printStream(Word.fmt StringCvt.DEC rT); + printStream "ldr\tx"; printStream(Word32.fmt StringCvt.DEC rT); printStream ",0x"; printStream(Word.fmt StringCvt.HEX byteAddr); printStream "\t// "; printStream constantValue end else if (wordValue andb 0wxbf000000) = 0wx10000000 then let (* Put a pc-relative address into a register. *) val rT = wordValue andb 0wx1f val byteOffset = ((wordValue andb 0wx00ffffe0) << (Word.fromInt Word.wordSize - 0w23) ~>> (Word.fromInt Word.wordSize - 0w20)) + ((wordValue >> 0w29) andb 0w3) in - printStream "adr\tx"; printStream(Word.fmt StringCvt.DEC rT); - printStream ",0x"; printStream(Word.fmt StringCvt.HEX (byteNo+byteOffset)) + printStream "adr\tx"; printStream(Word32.fmt StringCvt.DEC rT); + printStream ",0x"; printStream(Word.fmt StringCvt.HEX (byteNo+word32ToWord byteOffset)) end else if (wordValue andb 0wxfc000000) = 0wx14000000 then (* Unconditional branch. *) let (* The offset is signed and the destination may be earlier. *) val byteOffset = (wordValue andb 0wx03ffffff) << (Word.fromInt Word.wordSize - 0w26) ~>> (Word.fromInt Word.wordSize - 0w28) in printStream "b\t0x"; - printStream(Word.fmt StringCvt.HEX (byteNo+byteOffset)) + printStream(Word.fmt StringCvt.HEX (byteNo+word32ToWord byteOffset)) end else if (wordValue andb 0wxff000000) = 0wx54000000 then (* Conditional branch *) let val byteOffset = (wordValue andb 0wx00ffffe0) << (Word.fromInt Word.wordSize - 0w24) ~>> (Word.fromInt Word.wordSize - 0w21) in printStream "b."; printCondition(wordValue andb 0wxf); printStream "\t0x"; - printStream(Word.fmt StringCvt.HEX (byteNo+byteOffset)) + printStream(Word.fmt StringCvt.HEX (byteNo+word32ToWord byteOffset)) end else if (wordValue andb 0wx7e000000) = 0wx34000000 then (* Compare and branch *) let val byteOffset = (wordValue andb 0wx00ffffe0) << (Word.fromInt Word.wordSize - 0w24) ~>> (Word.fromInt Word.wordSize - 0w21) val oper = if (wordValue andb 0wx01000000) = 0w0 then "cbz" else "cbnz" val r = if (wordValue andb 0wx80000000) = 0w0 then "w" else "x" in printStream oper; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC (wordValue andb 0wx1f)); + printStream r; printStream(Word32.fmt StringCvt.DEC (wordValue andb 0wx1f)); printStream ",0x"; - printStream(Word.fmt StringCvt.HEX (byteNo+byteOffset)) + printStream(Word.fmt StringCvt.HEX (byteNo+word32ToWord byteOffset)) end else if (wordValue andb 0wx7e000000) = 0wx36000000 then (* Test bit and branch *) let val byteOffset = (wordValue andb 0wx00ffffe0) << (Word.fromInt Word.wordSize - 0w19) ~>> (Word.fromInt Word.wordSize - 0w16) val oper = if (wordValue andb 0wx01000000) = 0w0 then "tbz" else "tbnz" val b40 = (wordValue >> 0w19) andb 0wx1f val bitNo = b40 orb ((wordValue >> 0w26) andb 0wx20) val r = if bitNo < 0w32 then "w" else "x" in printStream oper; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC (wordValue andb 0wx1f)); - printStream ",#"; printStream(Word.fmt StringCvt.DEC bitNo); printStream ",0x"; - printStream(Word.fmt StringCvt.HEX (byteNo+byteOffset)) + printStream r; printStream(Word32.fmt StringCvt.DEC (wordValue andb 0wx1f)); + printStream ",#"; printStream(Word32.fmt StringCvt.DEC bitNo); printStream ",0x"; + printStream(Word.fmt StringCvt.HEX (byteNo+word32ToWord byteOffset)) end else if (wordValue andb 0wx3fe00000) = 0wx1A800000 then let val sf = wordValue >> 0w31 val opc = (wordValue >> 0w30) andb 0w1 val op2 = (wordValue >> 0w10) andb 0w3 val rT = wordValue andb 0wx1f val rN = (wordValue >> 0w5) andb 0wx1f val rM = (wordValue >> 0w16) andb 0wx1f val cond = (wordValue >> 0w12) andb 0wxf val opcode = case (opc, op2) of (0w0, 0w0) => "csel" | (0w0, 0w1) => "csinc" | (0w1, 0w0) => "csinv" | (0w1, 0w1) => "csneg" | _ => "??" val r = if sf = 0w0 then "w" else "x" in printStream opcode; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC rT); - printStream ","; printStream r; printStream(Word.fmt StringCvt.DEC rN); - printStream ","; printStream r; printStream(Word.fmt StringCvt.DEC rM); + printStream r; printStream(Word32.fmt StringCvt.DEC rT); + printStream ","; printStream r; printStream(Word32.fmt StringCvt.DEC rN); + printStream ","; printStream r; printStream(Word32.fmt StringCvt.DEC rM); printStream ","; printCondition cond end else if (wordValue andb 0wx7f800000) = 0wx13000000 then (* signed bitfield *) let val sf = wordValue >> 0w31 (* N is always the same as sf. *) (*val nBit = (wordValue >> 0w22) andb 0w1*) val immr = (wordValue >> 0w16) andb 0wx3f val imms = (wordValue >> 0w10) andb 0wx3f val rN = (wordValue >> 0w5) andb 0wx1f val rD = wordValue andb 0wx1f val (r, wordSize) = if sf = 0w0 then ("w", 0w32) else ("x", 0w64) in if imms = wordSize - 0w1 then printStream "asr\t" else printStream "sbfm\t"; printStream r; - printStream(Word.fmt StringCvt.DEC rD); + printStream(Word32.fmt StringCvt.DEC rD); printStream ","; printStream r; - printStream(Word.fmt StringCvt.DEC rN); + printStream(Word32.fmt StringCvt.DEC rN); if imms = wordSize - 0w1 - then (printStream ",#0x"; printStream(Word.toString immr)) + then (printStream ",#0x"; printStream(Word32.toString immr)) else ( - printStream ",#0x"; printStream(Word.toString immr); - printStream ",#0x"; printStream(Word.toString imms) + printStream ",#0x"; printStream(Word32.toString immr); + printStream ",#0x"; printStream(Word32.toString imms) ) end else if (wordValue andb 0wx7f800000) = 0wx53000000 then (* unsigned bitfield move *) let val sf = wordValue >> 0w31 (* N is always the same as sf. *) (*val nBit = (wordValue >> 0w22) andb 0w1*) val immr = (wordValue >> 0w16) andb 0wx3f val imms = (wordValue >> 0w10) andb 0wx3f val rN = (wordValue >> 0w5) andb 0wx1f val rD = wordValue andb 0wx1f val (r, wordSize) = if sf = 0w0 then ("w", 0w32) else ("x", 0w64) in if imms + 0w1 = immr then printStream "lsl\t" else if imms = wordSize - 0w1 then printStream "lsr\t" else printStream "ubfm\t"; printStream r; - printStream(Word.fmt StringCvt.DEC rD); + printStream(Word32.fmt StringCvt.DEC rD); printStream ","; printStream r; - printStream(Word.fmt StringCvt.DEC rN); + printStream(Word32.fmt StringCvt.DEC rN); if imms + 0w1 = immr - then (printStream ",#0x"; printStream(Word.toString(wordSize - immr))) + then (printStream ",#0x"; printStream(Word32.toString(wordSize - immr))) else if imms = wordSize - 0w1 - then (printStream ",#0x"; printStream(Word.toString immr)) + then (printStream ",#0x"; printStream(Word32.toString immr)) else ( - printStream ",#0x"; printStream(Word.toString immr); - printStream ",#0x"; printStream(Word.toString imms) + printStream ",#0x"; printStream(Word32.toString immr); + printStream ",#0x"; printStream(Word32.toString imms) ) end else if (wordValue andb 0wx1f800000) = 0wx12000000 then (* logical immediate *) let val sf = wordValue >> 0w31 val opc = (wordValue >> 0w29) andb 0w3 val nBit = (wordValue >> 0w22) andb 0w1 val immr = (wordValue >> 0w16) andb 0wx3f val imms = (wordValue >> 0w10) andb 0wx3f val rN = (wordValue >> 0w5) andb 0wx1f val rD = wordValue andb 0wx1f val (opcode, r) = case (sf, opc, nBit) of (0w0, 0w0, 0w0) => ("and", "w") | (0w0, 0w1, 0w0) => ("orr", "w") | (0w0, 0w2, 0w0) => ("eor", "w") | (0w0, 0w3, 0w0) => ("ands", "w") | (0w1, 0w0, _) => ("and", "x") | (0w1, 0w1, _) => ("orr", "x") | (0w1, 0w2, _) => ("eor", "x") | (0w1, 0w3, _) => ("ands", "x") | _ => ("??", "?") in printStream opcode; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC rD); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rN); printStream ",#0x"; + printStream r; printStream(Word32.fmt StringCvt.DEC rD); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ",#0x"; printStream(Word64.toString(decodeBitPattern{sf=sf, n=nBit, immr=immr, imms=imms})) end else if (wordValue andb 0wx5fe00000) = 0wx1ac00000 then (* Two source operations - shifts and divide. *) let val sf = wordValue >> 0w31 val s = (wordValue >> 0w29) andb 0w1 val rM = (wordValue >> 0w16) andb 0wx1f val opcode = (wordValue >> 0w10) andb 0wx3f val rN = (wordValue >> 0w5) andb 0wx1f val rD = wordValue andb 0wx1f val (oper, r) = case (sf, s, opcode) of (0w1, 0w0, 0wx2) => ("udiv", "x") | (0w1, 0w0, 0wx3) => ("sdiv", "x") | (0w1, 0w0, 0wx8) => ("lsl", "x") | (0w1, 0w0, 0wx9) => ("lsr", "x") | (0w1, 0w0, 0wxa) => ("asr", "x") | _ => ("??", "?") in printStream oper; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC rD); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rN); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rM) + printStream r; printStream(Word32.fmt StringCvt.DEC rD); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rM) end else if (wordValue andb 0wx1f000000) = 0wx1b000000 then (* Three source operations - multiply add/subtract. *) let val sf = wordValue >> 0w31 val op54 = (wordValue >> 0w29) andb 0w3 val op31 = (wordValue >> 0w21) andb 0w7 val o0 = (wordValue >> 0w15) andb 0w1 val rM = (wordValue >> 0w16) andb 0wx1f val rA = (wordValue >> 0w10) andb 0wx1f val rN = (wordValue >> 0w5) andb 0wx1f val rD = wordValue andb 0wx1f val (oper, r) = case (sf, op54, op31, o0, rA) of (0w1, 0w0, 0w0, 0w0, 0w31) => ("mul", "x") | (0w1, 0w0, 0w0, 0w0, _) => ("madd", "x") | (0w1, 0w0, 0w0, 0w1, 0w31) => ("mneg", "x") | (0w1, 0w0, 0w0, 0w1, _) => ("msub", "x") | (0w1, 0w0, 0w2, 0w0, 0w31) => ("smulh", "x") | _ => ("??", "?") in printStream oper; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC rD); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rN); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rM); + printStream r; printStream(Word32.fmt StringCvt.DEC rD); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rM); if rA = 0w31 then () - else (printStream ","; printStream r; printStream(Word.fmt StringCvt.DEC rA)) + else (printStream ","; printStream r; printStream(Word32.fmt StringCvt.DEC rA)) end else if (wordValue andb 0wxffe0fc00) = 0wxC800FC00 then let val rS = (wordValue >> 0w16) andb 0wx1f val rN = (wordValue >> 0w5) andb 0wx1f val rT = wordValue andb 0wx1f in - printStream "stlxr\tw"; printStream(Word.fmt StringCvt.DEC rS); - printStream ",x"; printStream(Word.fmt StringCvt.DEC rT); - printStream ".[x"; printStream(Word.fmt StringCvt.DEC rN); printStream "]" + printStream "stlxr\tw"; printStream(Word32.fmt StringCvt.DEC rS); + printStream ",x"; printStream(Word32.fmt StringCvt.DEC rT); + printStream ".[x"; printStream(Word32.fmt StringCvt.DEC rN); printStream "]" end else if (wordValue andb 0wx7f20fc00) = 0wx1E200000 then (* Moves between floating point and general regs. *) let val sf = (wordValue >> 0w31) andb 0w1 and s = (wordValue >> 0w29) andb 0w1 and ptype = (wordValue >> 0w22) andb 0w3 and mode = (wordValue >> 0w19) andb 0w3 and opcode = (wordValue >> 0w16) andb 0w7 and rN = (wordValue >> 0w5) andb 0wx1f and rD = wordValue andb 0wx1f val (opc, dr, nr) = case (sf, s, ptype, mode, opcode) of (0w0, 0w0, 0w0, 0w0, 0w7) => ("fmov", "s", "w") (* w -> s *) | (0w0, 0w0, 0w0, 0w0, 0w6) => ("fmov", "w", "s") (* s -> w *) | (0w1, 0w0, 0w1, 0w0, 0w7) => ("fmov", "d", "x") (* d -> x *) | (0w1, 0w0, 0w1, 0w0, 0w6) => ("fmov", "x", "d") (* x -> d *) | (0w1, 0w0, 0w0, 0w0, 0w2) => ("scvtf", "x", "s") | (0w1, 0w0, 0w1, 0w0, 0w2) => ("scvtf", "x", "d") | (0w1, 0w0, 0w0, 0w0, 0w4) => ("fcvtas", "w", "s") (* s -> w *) | (0w1, 0w0, 0w0, 0w2, 0w0) => ("fcvtms", "w", "s") (* s -> w *) | (0w1, 0w0, 0w0, 0w1, 0w0) => ("fcvtps", "w", "s") (* s -> w *) | (0w1, 0w0, 0w0, 0w3, 0w0) => ("fcvtzs", "w", "s") (* s -> w *) | (0w1, 0w0, 0w1, 0w0, 0w4) => ("fcvtas", "x", "s") (* s -> x *) | (0w1, 0w0, 0w1, 0w2, 0w0) => ("fcvtms", "x", "s") (* s -> x *) | (0w1, 0w0, 0w1, 0w1, 0w0) => ("fcvtps", "x", "s") (* s -> x *) | (0w1, 0w0, 0w1, 0w3, 0w0) => ("fcvtzs", "x", "s") (* s -> x *) | _ => ("?", "?", "?") in printStream opc; printStream "\t"; - printStream dr; printStream(Word.fmt StringCvt.DEC rD); printStream ","; - printStream nr; printStream(Word.fmt StringCvt.DEC rN) + printStream dr; printStream(Word32.fmt StringCvt.DEC rD); printStream ","; + printStream nr; printStream(Word32.fmt StringCvt.DEC rN) end else if (wordValue andb 0wxff200c00) = 0wx1E200800 then (* Floating point two source operations. *) let val pt = (wordValue >> 0w22) andb 0w3 and rM = (wordValue >> 0w16) andb 0wx1f and opc = (wordValue >> 0w12) andb 0wxf and rN = (wordValue >> 0w5) andb 0wx1f and rT = wordValue andb 0wx1f val (opcode, r) = case (pt, opc) of (0w0, 0wx0) => ("fmul", "s") | (0w0, 0wx1) => ("fdiv", "s") | (0w0, 0wx2) => ("fadd", "s") | (0w0, 0wx3) => ("fsub", "s") | (0w1, 0wx0) => ("fmul", "d") | (0w1, 0wx1) => ("fdiv", "d") | (0w1, 0wx2) => ("fadd", "d") | (0w1, 0wx3) => ("fsub", "d") | _ => ("??", "?") in printStream opcode; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC rT); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rN); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rM) + printStream r; printStream(Word32.fmt StringCvt.DEC rT); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rM) end else if (wordValue andb 0wxff207c00) = 0wx1E204000 then (* Floating point single source. *) let val pt = (wordValue >> 0w22) andb 0w3 and opc = (wordValue >> 0w15) andb 0wx3f and rN = (wordValue >> 0w5) andb 0wx1f and rT = wordValue andb 0wx1f val (opcode, rS, rD) = case (pt, opc) of (0w0, 0wx0) => ("fmov", "s", "s") | (0w0, 0wx1) => ("fabs", "s", "s") | (0w0, 0wx2) => ("fneg", "s", "s") | (0w0, 0wx5) => ("fcvt", "s", "d") | (0w1, 0wx0) => ("fmov", "d", "d") | (0w1, 0wx1) => ("fabs", "d", "d") | (0w1, 0wx2) => ("fneg", "d", "d") | (0w1, 0wx4) => ("fcvt", "d", "s") | _ => ("??", "?", "?") in printStream opcode; printStream "\t"; - printStream rD; printStream(Word.fmt StringCvt.DEC rT); printStream ","; - printStream rS; printStream(Word.fmt StringCvt.DEC rN) + printStream rD; printStream(Word32.fmt StringCvt.DEC rT); printStream ","; + printStream rS; printStream(Word32.fmt StringCvt.DEC rN) end else if (wordValue andb 0wxff20fc07) = 0wx1E202000 then (* Floating point comparison *) let val pt = (wordValue >> 0w22) andb 0w3 and rM = (wordValue >> 0w16) andb 0wx1f and rN = (wordValue >> 0w5) andb 0wx1f and opc = (wordValue >> 0w3) andb 0w3 val (opcode, r) = case (pt, opc) of (0w0, 0wx0) => ("fcmp", "s") | (0w1, 0wx0) => ("fcmp", "d") | (0w0, 0wx2) => ("fcmpe", "s") | (0w1, 0wx2) => ("fcmpe", "d") | _ => ("??", "?") in printStream opcode; printStream "\t"; - printStream r; printStream(Word.fmt StringCvt.DEC rN); printStream ","; - printStream r; printStream(Word.fmt StringCvt.DEC rM) + printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ","; + printStream r; printStream(Word32.fmt StringCvt.DEC rM) end else if (wordValue andb 0wx1e000000) = 0wx02000000 then (* This is an unallocated range. We use it for the register mask. *) let fun printMask (0w25, _) = () | printMask (i, comma) = if ((0w1 << i) andb wordValue) <> 0w0 then ( if comma then printStream ", " else (); printStream "x"; printStream(Word.fmt StringCvt.DEC i); printMask(i+0w1, true) ) else printMask(i+0w1, comma) in printStream "["; printMask(0w0, false); printStream "]" end else printStream "?" ; printStream "\n" end fun printAll i = if i = numInstructions then () else (printWordAt i; printAll(i+0w1)) in printStream functionName; printStream ":\n"; printAll 0w0 end (* Adds the constants onto the code, and copies the code into a new segment *) fun generateCode {instrs, name=functionName, parameters, resultClosure} = let val printStream = Pretty.getSimplePrinter(parameters, []) and printAssemblyCode = Debug.getParameter Debug.assemblyCodeTag parameters local (* Extract the constants. *) fun getConsts(LoadAddressLiteral {value, ...}, (addrs, nonAddrs)) = (value::addrs, nonAddrs) | getConsts(LoadNonAddressLiteral {value, ...}, (addrs, nonAddrs)) = (addrs, value::nonAddrs) | getConsts(_, consts) = consts val (addrConsts, nonAddrConsts) = List.foldl getConsts ([], []) instrs in val addressConsts = List.rev addrConsts and nonAddressConsts = List.rev nonAddrConsts end val (byteVec, wordsOfCode) = genCode(instrs, addressConsts, nonAddressConsts) (* +3 for profile count, function name and constants count *) val numOfConst = List.length addressConsts val segSize = wordsOfCode + Word.fromInt numOfConst + 0w4 val firstConstant = wordsOfCode + 0w3 (* Add 3 for no of consts, fn name and profile count. *) (* Put in the number of constants. This must go in before we actually put in any constants. *) local val lastWord = segSize - 0w1 in val () = set64(numOfConst + 2, wordsOfCode, byteVec) (* Set the last word of the code to the (negative) byte offset of the start of the code area from the end of this word. *) val () = set64((numOfConst + 3) * ~8, lastWord, byteVec) end (* Now we've filled in all the size info we need to convert the segment into a proper code segment before it's safe to put in any ML values. *) val codeVec = byteVecToCodeVec(byteVec, resultClosure) local val name : string = functionName val nameWord : machineWord = toMachineWord name in val () = codeVecPutWord (codeVec, wordsOfCode+0w1, nameWord) end (* Profile ref. A byte ref used by the profiler in the RTS. *) local val v = RunCall.allocateByteMemory(0w1, Word.fromLargeWord(Word8.toLargeWord(Word8.orb(F_mutable, F_bytes)))) fun clear 0w0 = () | clear i = (assignByte(v, i-0w1, 0w0); clear (i-0w1)) val () = clear(wordSize) in val () = codeVecPutWord (codeVec, wordsOfCode+0w2, toMachineWord v) end (* and then copy the constants from the constant list. *) local fun setConstant(value, num) = ( codeVecPutWord (codeVec, firstConstant + num, value); num+0w1 ) in val _ = List.foldl setConstant 0w0 addressConsts end in if printAssemblyCode then (* print out the code *) (printCode (codeVec, functionName, wordsOfCode, printStream); printStream"\n") else (); codeVecLock(codeVec, resultClosure) end (* copyCode *) structure Sharing = struct type closureRef = closureRef type instr = instr type xReg = xReg type vReg = vReg type labels = labels type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig index 5e2e6c0a..1e5212c3 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sig @@ -1,356 +1,356 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature Arm64Assembly = sig type closureRef type instr type machineWord = Address.machineWord type labels type condition (* XZero and XSP are both encoded as 31 but the interpretation depends on the instruction The datatype definition is included here to allow for pattern matching on XSP and XZero. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val X_MLHeapLimit: xReg (* ML Heap limit pointer *) and X_MLAssemblyInt: xReg (* ML assembly interface pointer. *) and X_MLHeapAllocPtr: xReg (* ML Heap allocation pointer. *) and X_MLStackPtr: xReg (* ML Stack pointer. *) and X_LinkReg: xReg (* Link reg - return address *) val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg (* Condition for conditional branches etc. *) val condEqual: condition and condNotEqual: condition and condCarrySet: condition and condCarryClear: condition and condNegative: condition and condPositive: condition and condOverflow: condition and condNoOverflow: condition and condUnsignedHigher: condition and condUnsignedLowOrEq: condition and condSignedGreaterEq: condition and condSignedLess: condition and condSignedGreater: condition and condSignedLessEq: condition datatype shiftType = - ShiftLSL of word - | ShiftLSR of word - | ShiftASR of word + ShiftLSL of Word8.word + | ShiftLSR of Word8.word + | ShiftASR of Word8.word | ShiftNone datatype wordSize = WordSize32 | WordSize64 datatype 'a extend = ExtUXTB of 'a (* Unsigned extend byte *) | ExtUXTH of 'a (* Unsigned extend byte *) | ExtUXTW of 'a (* Unsigned extend byte *) | ExtUXTX of 'a (* Left shift *) | ExtSXTB of 'a (* Sign extend byte *) | ExtSXTH of 'a (* Sign extend halfword *) | ExtSXTW of 'a (* Sign extend word *) | ExtSXTX of 'a (* Left shift *) (* Load/store instructions have only a single bit for the shift. For byte operations this is one bit shift; for others it scales by the size of the operand if set. *) datatype scale = ScaleOrShift | NoScale (* Jump to the address in the register and put the address of the next instruction into X30. *) val branchAndLinkReg: xReg -> instr (* Jump to the address in the register. *) and branchRegister: xReg -> instr (* Jump to the address in the register and hint this is a return. *) and returnRegister: xReg -> instr (* Move an address constant to a register. *) val loadAddressConstant: xReg * machineWord -> instr (* Move a constant into a register that is not an address. The argument is the actual bit pattern to be copied. For tagged integers that means that the value must have been shifted and the tag bit set. *) and loadNonAddressConstant: xReg * Word64.word -> instr (* Move a value into a register. The immediate is 16-bits and the shift is 0, 16, 24, or 48. moveKeep affect only the specific 16-bits and leaves the remainder unchanged. *) val moveNot32: {regD: xReg, immediate: word, shift: word} -> instr and moveZero32: {regD: xReg, immediate: word, shift: word} -> instr and moveKeep32: {regD: xReg, immediate: word, shift: word} -> instr val moveNot: {regD: xReg, immediate: word, shift: word} -> instr and moveZero: {regD: xReg, immediate: word, shift: word} -> instr and moveKeep: {regD: xReg, immediate: word, shift: word} -> instr (* Add/subtract an optionally shifted 12-bit immediate (i.e. constant) to/from a register. The constant is zero-extended. *) val addImmediate: {regN: xReg, regD: xReg, immed: word, shifted: bool} -> instr and addSImmediate: {regN: xReg, regD: xReg, immed: word, shifted: bool} -> instr and subImmediate: {regN: xReg, regD: xReg, immed: word, shifted: bool} -> instr and subSImmediate: {regN: xReg, regD: xReg, immed: word, shifted: bool} -> instr (* Add/subtract a shifted register, optionally setting the flags. *) val addShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr and addSShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr and subShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr and subSShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr (* Add/subtract an extended register, optionally setting the flags. *) - val addExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: word extend} -> instr - and addSExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: word extend} -> instr - and subExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: word extend} -> instr - and subSExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: word extend} -> instr + val addExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend} -> instr + and addSExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend} -> instr + and subExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend} -> instr + and subSExtendedReg: {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend} -> instr (* Multiplication *) (* regD = regA + regN * regM *) val multiplyAndAdd: {regM: xReg, regN: xReg, regA: xReg, regD: xReg} -> instr (* regD = regA - regN * regM *) and multiplyAndSub: {regM: xReg, regN: xReg, regA: xReg, regD: xReg} -> instr (* Return the high-order part of a signed multiplication. *) and signedMultiplyHigh: {regM: xReg, regN: xReg, regD: xReg} -> instr (* Division *) val unsignedDivide: {regM: xReg, regN: xReg, regD: xReg} -> instr and signedDivide: {regM: xReg, regN: xReg, regD: xReg} -> instr (* Logical operations on a shifted register, optionally setting the flags. *) val andShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr and orrShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr and eorShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr and andsShiftedReg: {regM: xReg, regN: xReg, regD: xReg, shift: shiftType} -> instr (* And a register with a bit pattern, discarding the results but setting the condition codes. The bit pattern must be encodable. *) val testBitPattern: xReg * Word64.word -> instr (* Check whether a constant can be encoded. *) val isEncodableBitPattern: Word64.word * wordSize -> bool (* Load/Store an aligned word using a 12-bit offset. The offset is in units of the size of the operand. *) val loadRegScaled: {regT: xReg, regN: xReg, unitOffset: int} -> instr and storeRegScaled: {regT: xReg, regN: xReg, unitOffset: int} -> instr and loadRegScaledByte: {regT: xReg, regN: xReg, unitOffset: int} -> instr and storeRegScaledByte: {regT: xReg, regN: xReg, unitOffset: int} -> instr and loadRegScaled16: {regT: xReg, regN: xReg, unitOffset: int} -> instr and storeRegScaled16: {regT: xReg, regN: xReg, unitOffset: int} -> instr and loadRegScaled32: {regT: xReg, regN: xReg, unitOffset: int} -> instr and storeRegScaled32: {regT: xReg, regN: xReg, unitOffset: int} -> instr and loadRegScaledDouble: {regT: vReg, regN: xReg, unitOffset: int} -> instr and storeRegScaledDouble: {regT: vReg, regN: xReg, unitOffset: int} -> instr and loadRegScaledFloat: {regT: vReg, regN: xReg, unitOffset: int} -> instr and storeRegScaledFloat: {regT: vReg, regN: xReg, unitOffset: int} -> instr (* Load/Store a value using a signed byte offset. *) val loadRegUnscaled: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegUnscaled: {regT: xReg, regN: xReg, byteOffset: int} -> instr and loadRegUnscaledByte: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegUnscaledByte: {regT: xReg, regN: xReg, byteOffset: int} -> instr and loadRegUnscaled16: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegUnscaled16: {regT: xReg, regN: xReg, byteOffset: int} -> instr and loadRegUnscaled32: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegUnscaled32: {regT: xReg, regN: xReg, byteOffset: int} -> instr and loadRegUnscaledFloat: {regT: vReg, regN: xReg, byteOffset: int} -> instr and storeRegUnscaledFloat: {regT: vReg, regN: xReg, byteOffset: int} -> instr and loadRegUnscaledDouble: {regT: vReg, regN: xReg, byteOffset: int} -> instr and storeRegUnscaledDouble: {regT: vReg, regN: xReg, byteOffset: int} -> instr (* Load/store with a register offset i.e. an index register. *) val loadRegIndexed: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and storeRegIndexed: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and loadRegIndexedByte: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and storeRegIndexedByte: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and loadRegIndexed16: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and storeRegIndexed16: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and loadRegIndexed32: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and storeRegIndexed32: {regN: xReg, regM: xReg, regT: xReg, option: scale extend} -> instr and loadRegIndexedFloat: {regN: xReg, regM: xReg, regT: vReg, option: scale extend} -> instr and storeRegIndexedFloat: {regN: xReg, regM: xReg, regT: vReg, option: scale extend} -> instr and loadRegIndexedDouble: {regN: xReg, regM: xReg, regT: vReg, option: scale extend} -> instr and storeRegIndexedDouble: {regN: xReg, regM: xReg, regT: vReg, option: scale extend} -> instr (* Load/Store a value using a signed byte offset and post-indexing (post-increment). *) (* The terminology is confusing. Pre-indexing means adding the offset into base address before loading the value, typically used for push, and post-index means using the original value of the base register as the address and adding in the offset after the value has been loaded, e.g. pop. *) val loadRegPostIndex: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegPostIndex: {regT: xReg, regN: xReg, byteOffset: int} -> instr and loadRegPostIndexByte: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegPostIndexByte: {regT: xReg, regN: xReg, byteOffset: int} -> instr (* Load/Store a value using a signed byte offset and pre-indexing (pre-increment). *) val loadRegPreIndex: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegPreIndex: {regT: xReg, regN: xReg, byteOffset: int} -> instr and loadRegPreIndexByte: {regT: xReg, regN: xReg, byteOffset: int} -> instr and storeRegPreIndexByte: {regT: xReg, regN: xReg, byteOffset: int} -> instr (* Loads and stores with special ordering. *) val loadAcquire: {regN: xReg, regT: xReg} -> instr and storeRelease: {regN: xReg, regT: xReg} -> instr (* This word is put in after a call to the RTS trap-handler. All the registers are saved and restored across a call to the trap-handler; the register mask contains those that may contain an address and so need to be scanned and possibly updated if there is a GC. *) val registerMask: xReg list -> instr (* Create a label. *) val createLabel: unit -> labels (* Put a label into the code. *) val setLabel: labels -> instr (* A conditional branch. *) val conditionalBranch: condition * labels -> instr (* Unconditional branch *) and unconditionalBranch: labels -> instr (* Put the address of a label into a register - used for handlers and cases. *) and loadLabelAddress: xReg * labels -> instr (* Test a bit in a register and branch if zero/nonzero *) and testBitBranchZero: xReg * Word8.word * labels -> instr and testBitBranchNonZero: xReg * Word8.word * labels -> instr (* Compare a register with zero and branch if zero/nonzero *) and compareBranchZero: xReg * wordSize * labels -> instr and compareBranchNonZero: xReg * wordSize * labels -> instr (* Set the destination register to the value of the first reg if the condition is true otherwise to a, possibly modified, version of the second argument. There are variants that set it unmodified, incremented, inverted and negated. *) val conditionalSet: {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition} -> instr val conditionalSetIncrement: {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition} -> instr val conditionalSetInverted: {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition} -> instr val conditionalSetNegated: {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition} -> instr (* Various shifts *) val logicalShiftLeft: {wordSize: wordSize, shift: word, regN: xReg, regD: xReg} -> instr and logicalShiftRight: {wordSize: wordSize, shift: word, regN: xReg, regD: xReg} -> instr and arithmeticShiftRight: {wordSize: wordSize, shift: word, regN: xReg, regD: xReg} -> instr (* Extract bits and set the rest of the register to zero. *) and unsignedBitfieldInsertinZeros: {wordSize: wordSize, lsb: word, width: word, regN: xReg, regD: xReg} -> instr (* Extract bits but leave the rest of the register unchanged. Can be used to clear a specific range of bits by using XZero as the source. *) and bitfieldInsert: {wordSize: wordSize, lsb: word, width: word, regN: xReg, regD: xReg} -> instr (* Logical shift left Rd = Rn << (Rm mod 0w64) *) val logicalShiftLeftVariable: {regM: xReg, regN: xReg, regD: xReg} -> instr (* Logical shift right Rd = Rn >> (Rm mod 0w64) *) and logicalShiftRightVariable: {regM: xReg, regN: xReg, regD: xReg} -> instr (* Arithmetic shift right Rd = Rn ~>> (Rm mod 0w64) *) and arithmeticShiftRightVariable: {regM: xReg, regN: xReg, regD: xReg} -> instr (* Logical operations on bit patterns. The pattern must be valid. ANDS is an AND that also sets the flags, typically used for a test. *) val bitwiseAndImmediate: {wordSize: wordSize, bits: Word64.word, regN: xReg, regD: xReg} -> instr and bitwiseOrImmediate: {wordSize: wordSize, bits: Word64.word, regN: xReg, regD: xReg} -> instr and bitwiseXorImmediate: {wordSize: wordSize, bits: Word64.word, regN: xReg, regD: xReg} -> instr and bitwiseAndSImmediate: {wordSize: wordSize, bits: Word64.word, regN: xReg, regD: xReg} -> instr (* Instructions involved in thread synchonisation. *) val yield: instr and dmbIsh: instr val loadAcquireExclusiveRegister: {regN: xReg, regT: xReg} -> instr val storeReleaseExclusiveRegister: {regN: xReg, regS: xReg, regT: xReg} -> instr (* Floating point moves and conversions. Moves simply copy the bits. In all cases the integer argument is signed 64-bits. *) val moveGeneralToDouble: {regN: xReg, regD: vReg} -> instr and moveGeneralToFloat: {regN: xReg, regD: vReg} -> instr and moveDoubleToGeneral: {regN: vReg, regD: xReg} -> instr and moveFloatToGeneral: {regN: vReg, regD: xReg} -> instr and convertIntToDouble: {regN: xReg, regD: vReg} -> instr and convertIntToFloat: {regN: xReg, regD: vReg} -> instr and convertFloatToInt: IEEEReal.rounding_mode -> {regN: vReg, regD: xReg} -> instr and convertDoubleToInt: IEEEReal.rounding_mode -> {regN: vReg, regD: xReg} -> instr (* Floating point operations. *) val multiplyFloat: {regM: vReg, regN: vReg, regD: vReg} -> instr and divideFloat: {regM: vReg, regN: vReg, regD: vReg} -> instr and addFloat: {regM: vReg, regN: vReg, regD: vReg} -> instr and subtractFloat: {regM: vReg, regN: vReg, regD: vReg} -> instr and multiplyDouble: {regM: vReg, regN: vReg, regD: vReg} -> instr and divideDouble: {regM: vReg, regN: vReg, regD: vReg} -> instr and addDouble: {regM: vReg, regN: vReg, regD: vReg} -> instr and subtractDouble: {regM: vReg, regN: vReg, regD: vReg} -> instr val compareFloat: {regM: vReg, regN: vReg} -> instr and compareDouble: {regM: vReg, regN: vReg} -> instr val moveFloatToFloat: {regN: vReg, regD: vReg} -> instr and absFloat: {regN: vReg, regD: vReg} -> instr and negFloat: {regN: vReg, regD: vReg} -> instr and convertFloatToDouble: {regN: vReg, regD: vReg} -> instr and moveDoubleToDouble: {regN: vReg, regD: vReg} -> instr and absDouble: {regN: vReg, regD: vReg} -> instr and negDouble: {regN: vReg, regD: vReg} -> instr and convertDoubleToFloat: {regN: vReg, regD: vReg} -> instr (* Create the vector of code from the list of instructions and update the closure reference to point to it. *) val generateCode: {instrs: instr list, name: string, parameters: Universal.universal list, resultClosure: closureRef} -> unit (* Offsets in the assembly code interface pointed at by X26 These are in units of 64-bits NOT bytes. *) val heapOverflowCallOffset: int and stackOverflowCallOffset: int and stackOverflowXCallOffset: int and exceptionHandlerOffset: int and stackLimitOffset: int and exceptionPacketOffset: int and threadIdOffset: int and heapLimitPtrOffset: int and heapAllocPtrOffset: int and mlStackPtrOffset: int structure Sharing: sig type closureRef = closureRef type instr = instr type xReg = xReg type vReg = vReg type labels = labels type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml index 678bbda0..d8ca101e 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml @@ -1,293 +1,298 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ForeignCall( structure FallBackCG: GENCODESIG and CodeArray: CODEARRAYSIG and Arm64Assembly: Arm64Assembly and Debug: DEBUG and Arm64Sequences: Arm64Sequences sharing FallBackCG.Sharing = CodeArray.Sharing = Arm64Assembly.Sharing = Arm64Sequences.Sharing ): FOREIGNCALLSIG = struct open CodeArray Arm64Assembly Arm64Sequences exception InternalError = Misc.InternalError datatype fastArgs = FastArgFixed | FastArgDouble | FastArgFloat val makeEntryPoint: string -> machineWord = RunCall.rtsCallFull1 "PolyCreateEntryPointObject" (* Store a double into memory. *) fun boxDouble(floatReg, fixedReg, workReg) = let val label = createLabel() in [ (* Subtract the number of bytes required from the heap pointer. *) subImmediate{regN=X_MLHeapAllocPtr, regD=fixedReg, immed=0w16, shifted=false}, (* Compare the result with the heap limit. *) subSShiftedReg{regM=X_MLHeapLimit, regN=fixedReg, regD=XZero, shift=ShiftNone}, conditionalBranch(condCarrySet, label), loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset}, branchAndLinkReg X16, registerMask [], (* Not used at the moment. *) setLabel label, (* Update the heap pointer. *) moveRegToReg{sReg=fixedReg, dReg=X_MLHeapAllocPtr} ] @ loadNonAddress(workReg, Word64.orb(0w1, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), 0w56))) @ [ (* Store the length word. Have to use the unaligned version because offset is -ve. *) storeRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= ~8}, (* Store the floating pt reg. *) storeRegScaledDouble{regT=floatReg, regN=fixedReg, unitOffset=0} ] end fun rtsCallFastGeneral (functionName, argFormats, resultFormat, debugSwitches) = let val entryPointAddr = makeEntryPoint functionName val nArgs = List.length argFormats (* The maximum we currently have is five. *) val noRtsException = createLabel() fun loadArgs([], _, _, _) = [] | loadArgs(FastArgFixed :: argTypes, srcReg :: srcRegs, fixed :: fixedRegs, fpRegs) = if srcReg = fixed then loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) (* Already in the right reg *) else moveRegToReg{sReg=srcReg, dReg=fixed} :: loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) | loadArgs(FastArgDouble :: argTypes, srcReg :: srcRegs, fixedRegs, fp :: fpRegs) = (* Unbox the value into a fp reg. *) loadRegScaledDouble{regT=fp, regN=srcReg, unitOffset=0} :: loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) | loadArgs(FastArgFloat :: argTypes, srcReg :: srcRegs, fixedRegs, fp :: fpRegs) = (* Untag and move into the fp reg *) logicalShiftRight{wordSize=WordSize64, shift=0w32, regN=srcReg, regD=srcReg} :: moveGeneralToFloat{regN=srcReg, regD=fp} :: loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) | loadArgs _ = raise InternalError "rtsCall: Too many arguments" (* Temporarily we need to check for RTS exceptions here. The interpreter assumes they are checked for as part of the RST call. *) val instructions = loadArgs(argFormats, (* ML Arguments *) [X0, X1, X2, X3, X4, X5, X6, X7], (* C fixed pt args *) [X0, X1, X2, X3, X4, X5, X6, X7], (* C floating pt args *) [V0, V1, V2, V3, V4, V5, V6, V7]) @ [ (* Move X30 to X24, a callee-save register. *) (* Note: maybe we should push X24 just in case this is the only reachable reference to the code. *) orrShiftedReg{regN=XZero, regM=X_LinkReg, regD=X24, shift=ShiftNone} (* Clear the RTS exception before we enter. "Full" RTS calls clear it anyway but "fast" calls don't. *) ] @ loadNonAddress(X8, 0w1) @ [ storeRegScaled{regT=X8, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset}, loadAddressConstant(X16, entryPointAddr), (* Load entry point *) loadRegScaled{regT=X16, regN=X16, unitOffset=0}, (* Load the actual address. *) (* Store the current heap allocation pointer. *) storeRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, (* For the moment save and restore the ML stack pointer. No RTS call should change it and it's callee-save but just in case... *) storeRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, branchAndLinkReg X16, (* Call the function. *) (* Restore the ML stack pointer. *) loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, (* Load the heap allocation ptr and limit. We could have GCed in the RTS call. *) loadRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, loadRegScaled{regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset}, (* Check for RTS exception. *) loadRegScaled{regT=X8, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset}, subSImmediate{regN=X8, regD=XZero, immed=0w1, shifted=false}, conditionalBranch(condEqual, noRtsException), (* If it isn't then raise the exception. *) orrShiftedReg{regN=XZero, regM=X8, regD=X0, shift=ShiftNone}, loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, branchRegister X1, setLabel noRtsException ] @ ( case resultFormat of FastArgFixed => [] | FastArgDouble => (* This must be boxed. *) boxDouble(V0, X0, X1) | FastArgFloat => (* This must be tagged *) [ moveFloatToGeneral{regN=V0, regD=X0}, logicalShiftLeft{wordSize=WordSize64, shift=0w32, regN=X0, regD=X0}, bitwiseOrImmediate{regN=X0, regD=X0, wordSize=WordSize64, bits=0w1} ] ) @ [ returnRegister X24 ] val closure = makeConstantClosure() val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure} in closureAsAddress closure end fun rtsCallFast (functionName, nArgs, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFast(functionName, nArgs, debugSwitches) else rtsCallFastGeneral (functionName, List.tabulate(nArgs, fn _ => FastArgFixed), FastArgFixed, debugSwitches) (* RTS call with one double-precision floating point argument and a floating point result. *) fun rtsCallFastRealtoReal (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastRealtoReal(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgDouble], FastArgDouble, debugSwitches) (* RTS call with two double-precision floating point arguments and a floating point result. *) fun rtsCallFastRealRealtoReal (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastRealRealtoReal(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgDouble, FastArgDouble], FastArgDouble, debugSwitches) (* RTS call with one double-precision floating point argument, one fixed point argument and a floating point result. *) fun rtsCallFastRealGeneraltoReal (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastRealGeneraltoReal(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgDouble, FastArgFixed], FastArgDouble, debugSwitches) (* RTS call with one general (i.e. ML word) argument and a floating point result. This is used only to convert arbitrary precision values to floats. *) fun rtsCallFastGeneraltoReal (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastGeneraltoReal(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgFixed], FastArgDouble, debugSwitches) (* Operations on Real32.real values. *) fun rtsCallFastFloattoFloat (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastFloattoFloat(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgFloat], FastArgFloat, debugSwitches) fun rtsCallFastFloatFloattoFloat (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastFloatFloattoFloat(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgFloat, FastArgFloat], FastArgFloat, debugSwitches) (* RTS call with one double-precision floating point argument, one fixed point argument and a floating point result. *) fun rtsCallFastFloatGeneraltoFloat (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastFloatGeneraltoFloat(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgFloat, FastArgFixed], FastArgFloat, debugSwitches) (* RTS call with one general (i.e. ML word) argument and a floating point result. This is used only to convert arbitrary precision values to floats. *) fun rtsCallFastGeneraltoFloat (functionName, debugSwitches) = if Debug.getParameter Debug.compilerDebugTag debugSwitches = 0 then FallBackCG.Foreign.rtsCallFastGeneraltoFloat(functionName, debugSwitches) else rtsCallFastGeneral (functionName, [FastArgFixed], FastArgFloat, debugSwitches) (* There is only one ABI value. *) datatype abi = ARM64Abi fun abiList () = [("default", ARM64Abi)] (* This must match the type in Foreign.LowLevel. Once this is bootstrapped we could use that type but note that this is the type we use within the compiler and we build Foreign.LowLevel AFTER compiling this. *) datatype cTypeForm = CTypeFloatingPt | CTypePointer | CTypeSignedInt | CTypeUnsignedInt | CTypeStruct of cType list | CTypeVoid withtype cType = { typeForm: cTypeForm, align: word, size: word } fun foreignCall(_: abi, args: cType list, result: cType): Address.machineWord = let (* TODO: Just raise an exception for now. *) val exceptionPacket = Foreign.Foreign "TODO: ARM64 FFI call" val instructions = [ loadAddressConstant(X0, Address.toMachineWord exceptionPacket), loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, branchRegister X1 ] val functionName = "foreignCall" val debugSwitches = [(*Universal.tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), Universal.tagInject DEBUG.assemblyCodeTag true*)] val closure = makeConstantClosure() val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure} in closureAsAddress closure end (* Build a callback function. The arguments are the abi, the list of argument types and the result type. The result is an ML function that takes an ML function, f, as its argument, registers it as a callback and returns the C function as its result. When the C function is called the arguments are copied into temporary memory and the vector passed to f along with the address of the memory for the result. "f" stores the result in it when it returns and the result is then passed back as the result of the callback. N.B. This returns a closure cell which contains the address of the code. It can be used as a SysWord.word value except that while it exists the code will not be GCed. *) fun buildCallBack(_: abi, args: cType list, result: cType): Address.machineWord = let val _ = raise Foreign.Foreign "TODO: ARM64 FFI callback" val instructions = [] val functionName = "foreignCallBack(2)" val debugSwitches = [(*Universal.tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), Universal.tagInject DEBUG.assemblyCodeTag true*)] val closure = makeConstantClosure() val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure} val stage2Code = closureAsAddress closure fun resultFunction f = let (* Generate a small function to load the address of f into a register and then jump to stage2. The idea is that it should be possible to generate this eventually in a single RTS call. That could be done by using a version of this as a model. *) val instructions = [] val functionName = "foreignCallBack(1)" val debugSwitches = [(*Universal.tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), Universal.tagInject DEBUG.assemblyCodeTag true*)] val closure = makeConstantClosure() val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure} val res = closureAsAddress closure (*val _ = print("Address is " ^ (LargeWord.toString(RunCall.unsafeCast res)) ^ "\n")*) in res end in Address.toMachineWord resultFunction end + + (* Temporarily replace all of the above. *) + open FallBackCG.Foreign + + end; diff --git a/mlsource/MLCompiler/CodeTree/GCode.arm64_32.ML b/mlsource/MLCompiler/CodeTree/GCode.arm64_32.ML new file mode 100644 index 00000000..d071dfc6 --- /dev/null +++ b/mlsource/MLCompiler/CodeTree/GCode.arm64_32.ML @@ -0,0 +1,18 @@ +(* + Copyright (c) 2021 David C. J. Matthews + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + Licence version 2.1 as published by the Free Software Foundation. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public Licence for more details. + + You should have received a copy of the GNU Lesser General Public + Licence along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*) + +structure GCode = Arm64Code;