diff --git a/libpolyml/arm64.cpp b/libpolyml/arm64.cpp index c6701589..deebf06e 100644 --- a/libpolyml/arm64.cpp +++ b/libpolyml/arm64.cpp @@ -1,1099 +1,1102 @@ /* Machine-dependent code for ARM64 - Copyright David C.J. Matthews 2020-21. + Copyright David C.J. Matthews 2020-22. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // Currently this is just copied from the interpreted version. #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #ifdef HAVE_STRING_H #include #endif #include "globals.h" #include "machine_dep.h" #include "sys.h" #include "profiling.h" #include "arb.h" #include "processes.h" #include "run_time.h" #include "gc.h" #include "diagnostics.h" #include "polystring.h" #include "save_vec.h" #include "memmgr.h" #include "scanaddrs.h" #include "rtsentry.h" #include "bytecode.h" #include "int_opcodes.h" /* * ARM64 register use: * X0 First argument and return value * X1-X7 Second-eighth argument * X8 Indirect result (C), ML closure pointer on entry * X9-X15 Volatile scratch registers * X16-17 Intra-procedure-call (C). Only used for special cases in ML. * X18 Platform register. Not used in ML. * X19-X23 Non-volatile (C). Scratch registers (ML). * X24 Non-volatile (C). Scratch register (ML). Heap base in 32-in-64. * X25 ML Heap limit pointer * X26 ML assembly interface pointer. Non-volatile (C). * X27 ML Heap allocation pointer. Non-volatile (C). * X28 ML Stack pointer. Non-volatile (C). * X29 Frame pointer (C). Not used in ML * X30 Link register. * X31 Stack pointer (C). Only used when calling C. Also zero register. * * Floating point registers: * V0 First argument and return value * V1-V7 Second-eighth argument * V8-V15 Non volatile. Not currently used in ML. * V16-V31 Volatile. Not currently used in ML. * * The ML calling conventions generally follow the C ABI except that * all registers are volatile and X28 is used for the stack. */ /* the amount of ML stack space to reserve for registers, C exception handling etc. The compiler requires us to reserve 2 stack-frames worth (2 * 20 words). We actually reserve slightly more than this. */ // Arm64 instructions are all 32-bit values. typedef uint32_t arm64Instr, *arm64CodePointer; // Each function checks for space on the stack at the start. To reduce the // code size it assumes there are at least 10 words on the stack and only // checks the exact space if it requires more than that. For safety we // always make sure there are 50 words spare. #define OVERFLOW_STACK_SIZE 50 // X26 always points at this area when executing ML code. // The offsets are built into the assembly code and some are built into // the code generator so this must not be changed without checking these. typedef struct _AssemblyArgs { public: byte* enterInterpreter; // These are filled in with the functions. byte* heapOverFlowCall; byte* stackOverFlowCall; byte* stackOverFlowCallEx; byte* trapHandlerEntry; stackItem* handlerRegister; // Current exception handler stackItem* stackLimit; // Lower limit of stack stackItem exceptionPacket; // Set if there is an exception stackItem threadId; // My thread id. Saves having to call into RTS for it. (stackItem so it's 64-bits) stackItem registers[25]; // Save/load area for registers X0-X24 inclusive double fpRegisters[8]; // Save/load area for floating point regs D0-D7 PolyWord* localMbottom; // Base of memory + 1 word PolyWord* localMpointer; // X27 Allocation ptr + 1 word stackItem* stackPtr; // X28 Current stack pointer arm64CodePointer linkRegister; // X30 - Link register (return address) arm64CodePointer entryPoint; // PC address to return to byte returnReason; // Reason for returning from ML - Set by assembly code. } AssemblyArgs; class Arm64TaskData: public TaskData, ByteCodeInterpreter { public: Arm64TaskData(); ~Arm64TaskData() {} unsigned allocReg; // The register to take the allocated space. POLYUNSIGNED allocWords; // The words to allocate. AssemblyArgs assemblyInterface; uint32_t saveRegisterMask; // Registers that need to be updated by a GC. virtual void GarbageCollect(ScanAddress *process); void ScanStackAddress(ScanAddress *process, stackItem& val, StackSpace *stack); virtual void EnterPolyCode(); // Start running ML virtual void SetException(poly_exn *exc) { assemblyInterface.exceptionPacket = (PolyWord)exc; } virtual void InterruptCode(); // AddTimeProfileCount is used in time profiling. virtual bool AddTimeProfileCount(SIGNALCONTEXT *context); virtual void InitStackFrame(TaskData *newTask, Handle proc); // Atomically release a mutex using hardware interlock. virtual bool AtomicallyReleaseMutex(PolyObject* mutexp); // Return the minimum space occupied by the stack. Used when setting a limit. // N.B. This is PolyWords not native words. virtual uintptr_t currentStackSpace(void) const { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE; } virtual void addProfileCount(POLYUNSIGNED words) { addSynchronousCount((POLYCODEPTR)assemblyInterface.entryPoint, words); } // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer virtual void PreRTSCall(void) { TaskData::PreRTSCall(); SaveMemRegisters(); } // PostRTSCall: Before returning we need to restore the heap pointer. // If there has been a GC in the RTS call we need to create a new heap area. virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall(); } virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length); void SetMemRegisters(); void SaveMemRegisters(); void HandleTrap(); // ByteCode overrides. The interpreter and native code states need to be in sync. // The interpreter is only used during the initial bootstrap. virtual void ClearExceptionPacket() { assemblyInterface.exceptionPacket = TAGGED(0); } virtual PolyWord GetExceptionPacket() { return assemblyInterface.exceptionPacket; } virtual stackItem* GetHandlerRegister() { return assemblyInterface.handlerRegister; } virtual void SetHandlerRegister(stackItem* hr) { assemblyInterface.handlerRegister = hr; } void Interpret(); void EndBootStrap() { mixedCode = true; } PLock interruptLock; virtual void HandleStackOverflow(uintptr_t space); }; class Arm64Dependent : public MachineDependent { public: Arm64Dependent() : mustInterpret(false) {} // Create a task data object. virtual TaskData* CreateTaskData(void) { return new Arm64TaskData(); } virtual void ScanConstantsWithinCode(PolyObject* addr, PolyObject* oldAddr, POLYUNSIGNED length, PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress* process); virtual void RelocateConstantsWithinCode(PolyObject* addr, ScanAddress* process); virtual Architectures MachineArchitecture(void); virtual void SetBootArchitecture(char arch, unsigned wordLength); // The ARM has separate instruction and data caches. virtual void FlushInstructionCache(void* p, POLYUNSIGNED bytes); // During the first bootstrap phase this is interpreted. bool mustInterpret; #if defined(POLYML32IN64) virtual void UpdateGlobalHeapReference(PolyObject* addr); #else // Address of the constant segment from the code segment. This is complicated because // some OSs require the code to position-independent which means the code can only // contain relative offsets. This isn't a problem for 32-in-64 because the code is // copied before it is executed. // Set the address of the constant area. If this is within the code segment itself we use the // default, negative, byte offset. If the constant area has been split off we use a pair of // dummy ADRP/LDR instructions. They aren't ever executed but allow us to use relative addressing. virtual void SetAddressOfConstants(PolyObject* objAddr, PolyObject* writable, POLYUNSIGNED length, PolyWord* constAddr) { if (constAddr > (PolyWord*)objAddr && constAddr < (PolyWord*)objAddr + length) { int64_t offset = (byte*)constAddr - (byte*)objAddr - length * sizeof(PolyWord); writable->Set(length - 1, PolyWord::FromSigned(offset)); } else { PolyWord* last_word = objAddr->Offset(length - 1); // Last word in the code MemSpace* space = gMem.SpaceForAddress(last_word); uint32_t* pt = (uint32_t*)space->writeAble(last_word); pt[0] = toARMInstr(0x90000000); // Insert dummy ADRP and LDR pt[1] = toARMInstr(0xf9400000); ScanAddress::SetConstantValue((byte*)last_word, (PolyObject*)constAddr, PROCESS_RELOC_ARM64ADRPLDR64); } } virtual void GetConstSegmentForCode(PolyObject* obj, POLYUNSIGNED obj_length, PolyWord*& cp, POLYUNSIGNED& count) const { PolyWord* last_word = obj->Offset(obj_length - 1); // Last word in the code if ((last_word[0].AsUnsigned() >> 56) == 0xff) { // If the high-order byte is 0xff it's a (-ve) byte offset. POLYSIGNED offset = last_word->AsSigned(); cp = last_word + 1 + offset / sizeof(PolyWord); count = cp[-1].AsUnsigned(); } else { PolyObject* addr = ScanAddress::GetConstantValue((byte*)last_word, PROCESS_RELOC_ARM64ADRPLDR64, 0); cp = (PolyWord*)addr; count = addr->Length(); } } #endif }; static Arm64Dependent arm64Dependent; MachineDependent* machineDependent = &arm64Dependent; Architectures Arm64Dependent::MachineArchitecture(void) { // During the first phase of the bootstrap we // compile the interpreted version. if (mustInterpret) return MA_Interpreted; #if defined(POLYML32IN64) return MA_Arm64_32; #else return MA_Arm64; #endif } // Values for the returnReason byte. These values are put into returnReason by the assembly code // depending on which of the "trap" functions has been called. enum RETURN_REASON { RETURN_HEAP_OVERFLOW = 1, // Heap space check has failed. RETURN_STACK_OVERFLOW = 2, // Stack space check has failed (<= 10 words). RETURN_STACK_OVERFLOWEX = 3, // Stack space check has failed. Adjusted SP is in X9. RETURN_ENTER_INTERPRETER = 4 // Native code has entered interpreted code. }; extern "C" { // These are declared in the assembly code segment. void Arm64AsmEnterCompiledCode(void*); int Arm64AsmCallExtraRETURN_ENTER_INTERPRETER(void); int Arm64AsmCallExtraRETURN_HEAP_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX(void); // This is declared here and called from the assembly code. // It avoids having a call to an external in the assembly code // which sometimes gives problems with position-indepent code. void Arm64TrapHandler(stackItem threadId); }; Arm64TaskData::Arm64TaskData() : ByteCodeInterpreter(&assemblyInterface.stackPtr, &assemblyInterface.stackLimit), allocReg(0), allocWords(0), saveRegisterMask(0) { assemblyInterface.enterInterpreter = (byte*)Arm64AsmCallExtraRETURN_ENTER_INTERPRETER; assemblyInterface.heapOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_HEAP_OVERFLOW; assemblyInterface.stackOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOW; assemblyInterface.stackOverFlowCallEx = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX; assemblyInterface.trapHandlerEntry = (byte*)Arm64TrapHandler; interpreterPc = 0; mixedCode = !arm64Dependent.mustInterpret; } void Arm64Dependent::SetBootArchitecture(char arch, unsigned wordLength) { if (arch == 'I') mustInterpret = true; else if (arch != 'A') Crash("Boot file has unexpected architecture code: %c", arch); } // The ARM has separate instruction and data caches so we must flush // the cache when creating or modifying code. void Arm64Dependent::FlushInstructionCache(void* p, POLYUNSIGNED bytes) { #ifdef _WIN32 ::FlushInstructionCache(GetCurrentProcess(), p, bytes); #elif defined (__GNUC__) __builtin___clear_cache((char*)p, (char*)p + bytes); #elif (defined (__clang__) && defined (__APPLE__)) sys_icache_invalidate(p, bytes); #else #error "No code to flush the instruction cache." #endif } void Arm64TaskData::GarbageCollect(ScanAddress *process) { TaskData::GarbageCollect(process); ByteCodeInterpreter::GarbageCollect(process); assemblyInterface.threadId = stackItem(threadObject); // threadObject updated by TaskData::GarbageCollect if (assemblyInterface.exceptionPacket.w().IsDataPtr()) { PolyObject* obj = assemblyInterface.exceptionPacket.w().AsObjPtr(); obj = process->ScanObjectAddress(obj); assemblyInterface.exceptionPacket = (PolyWord)obj; } if (stack != 0) { stackItem*stackPtr = assemblyInterface.stackPtr; // Now the values on the stack. for (stackItem* q = stackPtr; q < (stackItem*)stack->top; q++) ScanStackAddress(process, *q, stack); } // Register mask. There is a bit for each of the registers up to X24. for (int i = 0; i < 25; i++) { if (saveRegisterMask & (1 << i)) ScanStackAddress(process, assemblyInterface.registers[i], stack); } // Make sure the code is still reachable. Code addresses aren't updated. { stackItem code; code.codeAddr = (POLYCODEPTR)assemblyInterface.linkRegister; ScanStackAddress(process, code, stack); code.codeAddr = (POLYCODEPTR)assemblyInterface.entryPoint; ScanStackAddress(process, code, stack); } } // Process a value within the stack. void Arm64TaskData::ScanStackAddress(ScanAddress *process, stackItem& stackItem, StackSpace *stack) { // Code addresses on the ARM are always even, unlike the X86, so if it's tagged // it can't be an address. if (stackItem.w().IsTagged()) return; #ifdef POLYML32IN64 // In 32-in-64 we can have either absolute addresses or object indexes. // Absolute addresses always have the top 32-bits non-zero if (stackItem.argValue < ((uintptr_t)1 << 32)) { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } else { - // Could be a code address or a stack address. + // Could be a code address, a stack address or a heap address that has been + // converted from an object pointer. Currently local addresses only occur + // in registers, not on the stack. MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); - if (space == 0 || space->spaceType != ST_CODE) return; - PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); - ASSERT(obj != 0); - // Process the address of the start. Don't update anything. - process->ScanObjectAddress(obj); + if (space->spaceType == ST_CODE) + { + PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); + ASSERT(obj != 0); + // Process the address of the start. Don't update anything. + process->ScanObjectAddress(obj); + } + else if (space->spaceType == ST_LOCAL) + stackItem.absAddress = process->ScanObjectAddress(stackItem.absAddress); } #else MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space->spaceType == ST_CODE) { PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } else if (space->spaceType == ST_LOCAL) // Local values must be word addresses. { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } #endif } // Copy a stack void Arm64TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length) { #ifdef POLYML32IN64 old_length = old_length / 2; new_length = new_length / 2; #endif /* Moves a stack, updating all references within the stack */ stackItem*old_base = (stackItem*)old_stack; stackItem*new_base = (stackItem*)new_stack; stackItem*old_top = old_base + old_length; /* Calculate the offset of the new stack from the old. If the frame is being extended objects in the new frame will be further up the stack than in the old one. */ uintptr_t offset = new_base - old_base + new_length - old_length; stackItem *oldSp = assemblyInterface.stackPtr; assemblyInterface.stackPtr = oldSp + offset; assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset; /* Skip the unused part of the stack. */ uintptr_t i = oldSp - old_base; ASSERT(i <= old_length); i = old_length - i; stackItem *old = oldSp; stackItem *newp = assemblyInterface.stackPtr; while (i--) { stackItem old_word = *old++; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem* addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *newp++ = old_word; } ASSERT(old == ((stackItem*)old_stack) + old_length); ASSERT(newp == ((stackItem*)new_stack) + new_length); // And change any registers that pointed into the old stack for (int j = 0; j < 25; j++) { if (saveRegisterMask & (1 << j)) { stackItem* regAddr = &(assemblyInterface.registers[j]); stackItem old_word = *regAddr; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem* addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *regAddr = old_word; } } } void Arm64TaskData::EnterPolyCode() /* Called from "main" to enter the code. */ { assemblyInterface.stackLimit = (stackItem*)((PolyWord*)this->stack->stack() + OVERFLOW_STACK_SIZE); if (arm64Dependent.mustInterpret) { PolyWord closure = assemblyInterface.registers[8]; *(--assemblyInterface.stackPtr) = closure; /* Closure address */ interpreterPc = *(POLYCODEPTR*)closure.AsObjPtr(); Interpret(); ASSERT(0); // Should never return } SetMemRegisters(); // Jump into the ML code. This code sets up the registers and puts the // address of the assemblyInterface into X26 Arm64AsmEnterCompiledCode(&assemblyInterface); // This should never return ASSERT(0); } void Arm64TaskData::Interpret() { while (true) { switch (RunInterpreter(this)) { case ReturnCall: // After the call there will be an enter-int instruction so that when this // returns we will re-enter the interpreter. The number of arguments for // this call is after that. while ((uintptr_t)interpreterPc & 3) { ASSERT(interpreterPc[0] == INSTR_no_op); interpreterPc++; } ASSERT(interpreterPc[0] == 0xe9); numTailArguments = interpreterPc[12]; case ReturnTailCall: { ClearExceptionPacket(); // Pop the closure. PolyWord closureWord = *assemblyInterface.stackPtr++; PolyObject* closure = closureWord.AsObjPtr(); arm64CodePointer cp = *(arm64CodePointer*)closure; if (fromARMInstr(cp[0]) == 0xAA1E03E9 && fromARMInstr(cp[1]) == 0xF9400350 && fromARMInstr(cp[2]) == 0xD63F0200) { // If the code we're going to is interpreted push back the closure and // continue. interpreterPc = (POLYCODEPTR)cp; assemblyInterface.stackPtr--; HandleStackOverflow(128); // Make sure we have space since we're bypassing the check. continue; } assemblyInterface.registers[8] = closureWord; // Put closure in the closure reg. // Pop the return address. We may need to align this to a word boundary. POLYCODEPTR originalReturn = (POLYCODEPTR)((assemblyInterface.stackPtr++)->codeAddr); while ((uintptr_t)originalReturn & 3) { ASSERT(originalReturn[0] == INSTR_no_op); originalReturn++; } // Get the arguments into the correct registers. // Load the register arguments. The first 8 arguments go into X0-X7. // These will have been the first arguments to be pushed so will be // furthest away on the stack. // Note: we don't currently pass any arguments in the FP regs. for (unsigned i = 0; i < numTailArguments && i < 8; i++) assemblyInterface.registers[i] = assemblyInterface.stackPtr[numTailArguments - i - 1]; // If there are any more arguments these need to be shifted down the stack. while (numTailArguments > 8) { numTailArguments--; assemblyInterface.stackPtr[numTailArguments] = assemblyInterface.stackPtr[numTailArguments - 8]; } // Remove the register arguments assemblyInterface.stackPtr += numTailArguments > 8 ? 8 : numTailArguments; assemblyInterface.linkRegister = (arm64CodePointer)originalReturn; // Set the return address to caller assemblyInterface.entryPoint = *(arm64CodePointer*)closure; // Entry point to callee interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } case ReturnReturn: { ClearExceptionPacket(); // Returning from an interpreted function. Normally we'll be returning to // interpreted code. if ((uintptr_t)interpreterPc & 3) // ARM64 addresses will always be 4-byte aligned. continue; arm64CodePointer cp = (arm64CodePointer)interpreterPc; if (fromARMInstr(cp[0]) == 0xAA1E03E9 && fromARMInstr(cp[1]) == 0xF9400350 && fromARMInstr(cp[2]) == 0xD63F0200) continue; // Pop the value we're returning. Set the entry point to the code we're returning to. assemblyInterface.registers[0] = *assemblyInterface.stackPtr++; assemblyInterface.entryPoint = cp; interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } } } } // Called from the assembly code as a result of a trap i.e. a request for // a GC or to extend the stack. // N.B. Argument must be stackItem not PolyWord so that it's compatible with // big-endian 32-in-64. void Arm64TrapHandler(stackItem threadId) { Arm64TaskData* taskData = (Arm64TaskData*)TaskData::FindTaskForId(threadId); taskData->HandleTrap(); } void Arm64TaskData::HandleTrap() { SaveMemRegisters(); // Update globals from the memory registers. switch (this->assemblyInterface.returnReason) { case RETURN_HEAP_OVERFLOW: { // The heap has overflowed. // The register mask is the word after the return. saveRegisterMask = fromARMInstr(*assemblyInterface.entryPoint++); // The generated code first subtracts the space required from x27 and puts the // result into a separate register. It then compares this with x25 and comes here if // it is not above that. Either way it is going to execute an instruction to put // this value back into x27. // Look at that instruction to find out the register. arm64Instr moveInstr = fromARMInstr(*assemblyInterface.entryPoint); // We may have an instruction to pop X30 first. if (moveInstr == 0xF840879E) moveInstr = fromARMInstr(assemblyInterface.entryPoint[1]); ASSERT((moveInstr & 0xffe0ffff) == 0xaa0003fb); // mov x27,xN allocReg = (moveInstr >> 16) & 0x1f; - allocWords = (allocPointer - (PolyWord*)assemblyInterface.registers[allocReg].stackAddr) + 1; + allocWords = (POLYUNSIGNED)((allocPointer - (PolyWord*)assemblyInterface.registers[allocReg].stackAddr) + 1); assemblyInterface.registers[allocReg] = TAGGED(0); // Clear this - it's not a valid address. if (profileMode == kProfileStoreAllocation) addProfileCount(allocWords); // The actual allocation is done in SetMemRegisters. break; } case RETURN_STACK_OVERFLOW: case RETURN_STACK_OVERFLOWEX: { // The register mask is the word after the return. saveRegisterMask = fromARMInstr(*assemblyInterface.entryPoint++); uintptr_t min_size = 0; // Size in PolyWords if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW) { min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } else { // Stack limit overflow. If the required stack space is larger than // the fixed overflow size the code will calculate the limit in X9. stackItem* stackP = assemblyInterface.registers[9].stackAddr; min_size = (this->stack->top - (PolyWord*)stackP) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } HandleStackOverflow(min_size); break; } case RETURN_ENTER_INTERPRETER: { interpreterPc = (POLYCODEPTR)assemblyInterface.linkRegister; byte reasonCode = *interpreterPc++; // Sort out arguments. assemblyInterface.exceptionPacket = TAGGED(0); if (reasonCode == 0xff) { // Exception handler. assemblyInterface.exceptionPacket = assemblyInterface.registers[0]; // Get the exception packet // We need to leave the current handler in place. When we enter the interpreter it will // check the exception packet and if it is non-null will raise it. } else if (reasonCode >= 128) { // Start of function. unsigned numArgs = reasonCode - 128; // We need the stack to contain: // The closure, the return address, the arguments. // The stack will currently contain the stack arguments. // Add space for the register arguments if (numArgs > 8) assemblyInterface.stackPtr -= 8; else assemblyInterface.stackPtr -= numArgs; // Move up any stack arguments. for (unsigned n = 8; n < numArgs; n++) { assemblyInterface.stackPtr[n - 8] = assemblyInterface.stackPtr[n]; } // Store the register arguments for (unsigned n = 0; n < numArgs && n < 8; n++) assemblyInterface.stackPtr[numArgs - n - 1] = assemblyInterface.registers[n]; // Finally push the return address and closure pointer *(--assemblyInterface.stackPtr) = assemblyInterface.registers[9]; // Return address - value of X30 before enter-int *(--assemblyInterface.stackPtr) = assemblyInterface.registers[8]; // Closure } else { // Return from call. Push X0 *(--assemblyInterface.stackPtr) = assemblyInterface.registers[0]; } Interpret(); break; } default: Crash("Unknown return reason code %u", this->assemblyInterface.returnReason); } SetMemRegisters(); } void Arm64TaskData::HandleStackOverflow(uintptr_t space) { uintptr_t min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE + space; try { // The stack check has failed. This may either be because we really have // overflowed the stack or because the stack limit value has been adjusted // to result in a call here. CheckAndGrowStack(this, min_size); } catch (IOException&) { // We may get an exception while handling this if we run out of store } { PLocker l(&interruptLock); // Set the stack limit. This clears any interrupt and also sets the // correct value if we've grown the stack. assemblyInterface.stackLimit = (stackItem*)stack->bottom + OVERFLOW_STACK_SIZE; } try { processes->ProcessAsynchRequests(this); // Release and re-acquire use of the ML memory to allow another thread // to GC. processes->ThreadReleaseMLMemory(this); processes->ThreadUseMLMemory(this); } catch (IOException&) { } catch (KillException&) { processes->ThreadExit(this); } } void Arm64TaskData::InitStackFrame(TaskData* parentTask, Handle proc) /* Initialise stack frame. */ { StackSpace* space = this->stack; StackObject* stack = (StackObject*)space->stack(); uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem); assemblyInterface.stackPtr = (stackItem*)stack + stack_size; assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE; assemblyInterface.handlerRegister = assemblyInterface.stackPtr; // Store the argument and the closure. assemblyInterface.registers[8] = proc->Word(); // Closure assemblyInterface.registers[0] = TAGGED(0); // Argument assemblyInterface.linkRegister = (arm64CodePointer)1; // We never return. Use a tagged value because it may be pushed assemblyInterface.entryPoint = (arm64CodePointer)1; // Have to set the register mask in case we get a GC before the thread starts. saveRegisterMask = (1 << 8) | 1; // X8 and X0 #ifdef POLYML32IN64 // In 32-in-64 RBX always contains the heap base address. assemblyInterface.registers[24].stackAddr = (stackItem*)globalHeapBase; #endif } // This is called from a different thread so we have to be careful. void Arm64TaskData::InterruptCode() { PLocker l(&interruptLock); // Set the stack limit pointer to the top of the stack to cause // a trap when we next check for stack overflow. // We use a lock here to ensure that we always use the current value of the // stack. The thread we're interrupting could be growing the stack at this point. if (stack != 0) assemblyInterface.stackLimit = (stackItem*)(stack->top - 1); } // Called before entering ML code from the run-time system void Arm64TaskData::SetMemRegisters() { // Copy the current store limits into variables before we go into the assembly code. // If we haven't yet set the allocation area or we don't have enough we need // to create one (or a new one). if (allocPointer <= allocLimit + allocWords) { if (allocPointer < allocLimit) Crash("Bad length in heap overflow trap"); // Find some space to allocate in. Updates taskData->allocPointer and // returns a pointer to the newly allocated space (if allocWords != 0) PolyWord* space = processes->FindAllocationSpace(this, allocWords, true); if (space == 0) { // We will now raise an exception instead of returning. // Set allocWords to zero so we don't set the allocation register // since that could be holding the exception packet. allocWords = 0; } // Undo the allocation just now. allocPointer += allocWords; } if (this->allocWords != 0) { // If we have had a heap trap we actually do the allocation here. // We will have already garbage collected and recovered sufficient space. // This also happens if we have just trapped because of store profiling. allocPointer -= allocWords; // Now allocate // Set the allocation register to this area. N.B. This is an absolute address. assemblyInterface.registers[allocReg].codeAddr = (POLYCODEPTR)(allocPointer + 1); /* remember: it's off-by-one */ allocWords = 0; } // If we have run out of store, either just above or while allocating in the RTS, // allocPointer and allocLimit will have been set to zero as part of the GC. We will // now be raising an exception which may free some store but we need to come back here // before we allocate anything. The compiled code uses unsigned arithmetic to check for // heap overflow but only after subtracting the space required. We need to make sure // that the values are still non-negative after substracting any object size. if (allocPointer == 0) allocPointer += MAX_OBJECT_SIZE; if (allocLimit == 0) allocLimit += MAX_OBJECT_SIZE; assemblyInterface.localMbottom = allocLimit + 1; assemblyInterface.localMpointer = allocPointer + 1; // If we are profiling store allocation we set mem_hl so that a trap // will be generated. if (profileMode == kProfileStoreAllocation) assemblyInterface.localMbottom = assemblyInterface.localMpointer; assemblyInterface.threadId = stackItem(threadObject); } // This is called whenever we have returned from ML to C. void Arm64TaskData::SaveMemRegisters() { if (interpreterPc == 0) { // Not if we're already in the interpreter // The normal return is to the link register address. assemblyInterface.entryPoint = assemblyInterface.linkRegister; allocPointer = assemblyInterface.localMpointer - 1; } allocWords = 0; assemblyInterface.exceptionPacket = TAGGED(0); saveRegisterMask = 0; } // Process addresses in the code // Because we don't have constants actually in the code we only have to process this in two // cases. If we are exporting the code we first copy it to a new location. We have to update // the ADRP+LDR/ADD pairs at that point. // When we construct the relocations we need to identify the points where the relocations apply in // the code. This applies both to exporting to object code and to saved states. void Arm64Dependent::ScanConstantsWithinCode(PolyObject* addr, PolyObject* oldAddr, POLYUNSIGNED length, PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress* process) { arm64CodePointer pt = (arm64CodePointer)addr; if (addr == oldAddr && newConstAddr == oldConstAddr) return; // If it begins with the enter-int sequence it's interpreted code. if (fromARMInstr(pt[0]) == 0xAA1E03E9 && fromARMInstr(pt[1]) == 0xF9400350 && fromARMInstr(pt[2]) == 0xD63F0200) return; while (*pt != 0) // The code ends with a UDF instruction (0) { arm64Instr instr0 = fromARMInstr(pt[0]); if ((instr0 & 0x9f000000) == 0x90000000) // ADRP instruction { // Look at the instruction at the original location, before it was copied, to // find out the address it referred to. byte* oldInstrAddress = (byte*)pt - (byte*)addr + (byte*)oldAddr; arm64Instr instr1 = fromARMInstr(pt[1]); ScanRelocationKind scanKind; if ((instr1 & 0xffc00000) == 0xf9400000) scanKind = PROCESS_RELOC_ARM64ADRPLDR64; // LDR of 64-bit quantity else if ((instr1 & 0xffc00000) == 0xb9400000) scanKind = PROCESS_RELOC_ARM64ADRPLDR32; // LDR of 32-bit quantity else if ((instr1 & 0xff800000) == 0x91000000) scanKind = PROCESS_RELOC_ARM64ADRPADD; // ADD else ASSERT(0); // Invalid instruction byte* constAddress = (byte*)ScanAddress::GetConstantValue(oldInstrAddress, scanKind, 0); // This could be a reference to the code itself or the non-constant area. // If it's in the code we relocate it to the new code; if it's in the constant // area to the new constant area. byte* newAddress; if (constAddress > oldInstrAddress && constAddress < ((byte*)oldConstAddr)) newAddress = (byte*)addr + (constAddress - (byte*)oldAddr); else newAddress = (byte*)newConstAddr + (constAddress - (byte*)oldConstAddr); ScanAddress::SetConstantValue((byte*)pt, (PolyObject*)newAddress, scanKind); } pt++; } } void Arm64Dependent::RelocateConstantsWithinCode(PolyObject* addr, ScanAddress* process) { arm64CodePointer pt = (arm64CodePointer)addr; // If it begins with the enter-int sequence it's interpreted code. if (fromARMInstr(pt[0]) == 0xAA1E03E9 && fromARMInstr(pt[1]) == 0xF9400350 && fromARMInstr(pt[2]) == 0xD63F0200) return; #ifndef POLYML32IN64 POLYUNSIGNED length = addr->Length(); // If we have replaced the offset with a dummy ADRP/LDR pair we have to add a relocation. PolyWord* end = addr->Offset(length - 1); if ((end[0].AsUnsigned() >> 56) != 0xff) process->RelocateOnly(addr, (byte*)end, PROCESS_RELOC_ARM64ADRPLDR64); #endif while (*pt != 0) // The code ends with a UDF instruction (0) { arm64Instr instr0 = fromARMInstr(pt[0]); if ((instr0 & 0x9f000000) == 0x90000000) // ADRP instruction { arm64Instr instr1 = fromARMInstr(pt[1]); ScanRelocationKind scanKind; if ((instr1 & 0xffc00000) == 0xf9400000) scanKind = PROCESS_RELOC_ARM64ADRPLDR64; // LDR of 64-bit quantity else if ((instr1 & 0xffc00000) == 0xb9400000) scanKind = PROCESS_RELOC_ARM64ADRPLDR32; // LDR of 32-bit quantity else if ((instr1 & 0xff800000) == 0x91000000) scanKind = PROCESS_RELOC_ARM64ADRPADD; // ADD else ASSERT(0); // Invalid instruction process->RelocateOnly(addr, (byte*)pt, scanKind); } pt++; } } // This is a special hack for FFI callbacks in 32-in-64. This is called // #ifdef POLYML32IN64 void Arm64Dependent::UpdateGlobalHeapReference(PolyObject* addr) { arm64CodePointer pt = (arm64CodePointer)addr; if (fromARMInstr(pt[0]) == 0xD503201F && (fromARMInstr(pt[1]) & 0xff000000) == 0x58000000) { // nop (special marker) followed by LDR Xn,pc-relative uint32_t pcOffset = (fromARMInstr(pt[1]) >> 5) & 0x3ffff; // This is a number of 32-bit words PolyWord* gHeapAddr = ((PolyWord*)addr) + pcOffset + 1; // PolyWords are 32-bits if (((PolyWord**)gHeapAddr)[0] != globalHeapBase) ((PolyWord**)gMem.SpaceForAddress(gHeapAddr)->writeAble(gHeapAddr))[0] = globalHeapBase; } } #endif // As far as possible we want locking and unlocking an ML mutex to be fast so // we try to implement the code in the assembly code using appropriate // interlocked instructions. That does mean that if we need to lock and // unlock an ML mutex in this code we have to use the same, machine-dependent, // code to do it. These are defaults that are used where there is no // machine-specific code. #if defined(_MSC_VER) // This saves having to define it in the MASM assembly code. static uintptr_t Arm64AsmAtomicExchange(PolyObject* mutexp, uintptr_t value) { -# if (SIZEOF_POLYWORD == 8) + // Mutexes are always 64-bit values even on 32-in-64. return InterlockedExchange64((LONG64*)mutexp, value); -# else - return InterlockedExchange((LONG*)mutexp, value); -# endif } #else extern "C" { // This is only defined in the GAS assembly code uintptr_t Arm64AsmAtomicExchange(PolyObject*, uintptr_t); } #endif bool Arm64TaskData::AtomicallyReleaseMutex(PolyObject* mutexp) { uintptr_t oldValue = Arm64AsmAtomicExchange(mutexp, 0); return oldValue == 1; } bool Arm64TaskData::AddTimeProfileCount(SIGNALCONTEXT *context) { stackItem* sp = 0; POLYCODEPTR pc = 0; if (context != 0) { #if defined(HAVE_WINDOWS_H) sp = (stackItem*)context->Sp; pc = (POLYCODEPTR)context->Pc; #elif defined(HAVE_UCONTEXT_T) #ifdef HAVE_MCONTEXT_T_REGS // Linux sp = (stackItem*)context->uc_mcontext.sp; pc = (POLYCODEPTR)context->uc_mcontext.pc; #endif #endif } if (pc != 0) { // See if the PC we've got is an ML code address. MemSpace* space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(pc); return true; } } // See if the sp value is in the current stack. if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top) { // We may be in the assembly code. The top of the stack will be a return address. pc = sp[0].w().AsCodePtr(); MemSpace* space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(pc); return true; } } // See if the value of regSP is a valid stack pointer. // This works if we happen to be in an RTS call using a "Full" call. // It doesn't work if we've used a "Fast" call because that doesn't save the SP. sp = assemblyInterface.stackPtr; if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top) { // We may be in the run-time system. pc = sp[0].w().AsCodePtr(); MemSpace* space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(pc); return true; } } return false; } extern "C" { POLYEXTERNALSYMBOL void* PolyArm64GetThreadData(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyInterpretedEnterIntMode(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyEndBootstrapMode(POLYUNSIGNED threadId, POLYUNSIGNED function); } // Return the address of assembly data for the current thread. This is normally in // X26 except if we are in a callback. void* PolyArm64GetThreadData() { // We should get the task data for the thread that is running this code. // If this thread has been created by the foreign code we will have to // create a new one here. TaskData* taskData = processes->GetTaskDataForThread(); if (taskData == 0) { try { taskData = processes->CreateNewTaskData(); } catch (std::bad_alloc&) { ::Exit("Unable to create thread data - insufficient memory"); } catch (MemoryException&) { ::Exit("Unable to create thread data - insufficient memory"); } } return &((Arm64TaskData*)taskData)->assemblyInterface; } // Do we require EnterInt instructions and if so for which architecture? // 0 = > None; 1 => X86_32, 2 => X86_64. 3 => X86_32_in_64. 4 => ARM_64. // ARM_64 in 32 is the same as ARM64. POLYUNSIGNED PolyInterpretedEnterIntMode() { return TAGGED(4).AsUnsigned(); } // End the first stage of bootstrap mode and run a new function. // The first stage is always interpreted. Once that is complete every function will have // at least an executable "enter-interpreter" stub so it can be called as machine code. POLYUNSIGNED PolyEndBootstrapMode(POLYUNSIGNED threadId, POLYUNSIGNED function) { TaskData* taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle pushedFunction = taskData->saveVec.push(function); arm64Dependent.mustInterpret = false; ((Arm64TaskData*)taskData)->EndBootStrap(); taskData->InitStackFrame(taskData, pushedFunction); taskData->EnterPolyCode(); // Should never return. ASSERT(0); return TAGGED(0).AsUnsigned(); } // No machine-specific calls in the interpreter. struct _entrypts machineSpecificEPT[] = { { "PolyArm64GetThreadData", (polyRTSFunction)&PolyArm64GetThreadData }, { "PolyInterpretedEnterIntMode", (polyRTSFunction)&PolyInterpretedEnterIntMode }, { "PolyEndBootstrapMode", (polyRTSFunction)&PolyEndBootstrapMode }, { NULL, NULL} // End of list. }; diff --git a/libpolyml/globals.h b/libpolyml/globals.h index 313d5038..c30c8ada 100644 --- a/libpolyml/globals.h +++ b/libpolyml/globals.h @@ -1,426 +1,427 @@ /* Title: Globals for the system. Author: Dave Matthews, Cambridge University Computer Laboratory - Copyright David C. J. Matthews 2017-21 + Copyright David C. J. Matthews 2017-22 Copyright (c) 2000-7 Cambridge University Technical Services Limited This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _GLOBALS_H #define _GLOBALS_H /* Poly words, pointers and cells (objects). The garbage collector needs to be able to distinguish different uses of a memory word. We need to be able find which words are pointers to other objects and which are simple integers. The simple distinction is between integers, which are tagged by having the bottom bit set, and Addresses which are word aligned (bottom 2 bits zero on a 32 bit machine, bottom 3 bits on a 64 bit machine, bottom bit in 32-in-64). Addresses always point to the start of cells. The preceding word of a cell is the length word. This contains the length of the cell in words in the low-order 3 (7 in native 64-bits) bytes and a flag byte in the top byte. The flags give information about the type of the object. The length word is also used by the garbage collector and other object processors. */ #if HAVE_STDINT_H # include #endif #if HAVE_INTTYPES_H # ifndef __STDC_FORMAT_MACROS # define __STDC_FORMAT_MACROS # endif # include #elif (defined(_MSC_VER) && (_MSC_VER >= 1900)) // In VS 2015 and later we need to use # include #endif #ifdef HAVE_STDDEF_H # include #endif #define POLY_TAGSHIFT 1 #if (defined(_WIN32)) # include #endif #ifdef POLYML32IN64 typedef int32_t POLYSIGNED; typedef uint32_t POLYUNSIGNED; #define SIZEOF_POLYWORD 4 #else typedef intptr_t POLYSIGNED; typedef uintptr_t POLYUNSIGNED; #define SIZEOF_POLYWORD SIZEOF_VOIDP #endif // libpolyml uses printf-style I/O instead of C++ standard IOstreams, // so we need specifier to format POLYUNSIGNED/POLYSIGNED values. #ifdef POLYML32IN64 #if (defined(PRIu32)) # define POLYUFMT PRIu32 # define POLYSFMT PRId32 #elif (defined(_MSC_VER)) # define POLYUFMT "lu" # define POLYSFMT "ld" #else # define POLYUFMT "u" # define POLYSFMT "d" #endif #elif (defined(PRIuPTR)) # define POLYUFMT PRIuPTR # define POLYSFMT PRIdPTR #elif (defined(_MSC_VER) && (SIZEOF_POLYWORD == 8)) # define POLYUFMT "llu" # define POLYSFMT "lld" #else # define POLYUFMT "lu" // as before. Cross your fingers. # define POLYSFMT "ld" // idem. #endif // We can use the C99 %zu in most cases except MingW since it uses // the old msvcrt and that only supports C89. #if (defined(_WIN32) && (! defined(_MSC_VER) || _MSC_VER < 1800)) # if (SIZEOF_VOIDP == 8) # define PRI_SIZET PRIu64 # else # define PRI_SIZET PRIu32 # endif #else # define PRI_SIZET "zu" #endif typedef unsigned char byte; class PolyObject; typedef PolyObject *POLYOBJPTR; #ifdef POLYML32IN64 class PolyWord; extern PolyWord *globalHeapBase, *globalCodeBase; typedef uint32_t POLYOBJECTPTR; // This is an index into globalHeapBase // If a 64-bit value if in the range of the object pointers. inline bool IsHeapAddress(void *addr) { return (uintptr_t)addr <= 0xffffffff; } #else typedef POLYOBJPTR POLYOBJECTPTR; inline bool IsHeapAddress(void *) { return true; } #endif typedef byte *POLYCODEPTR; class PolyWord { public: // Initialise to TAGGED(0). This is very rarely used. PolyWord() { contents.unsignedInt = 1; } // Integers need to be tagged. static PolyWord TaggedInt(POLYSIGNED s) { return PolyWord((s << POLY_TAGSHIFT) | (POLYSIGNED)0x01); } static PolyWord TaggedUnsigned(POLYUNSIGNED u) { return PolyWord((u << POLY_TAGSHIFT) | 0x01); } static PolyWord FromStackAddr(PolyWord *sp) { return PolyWord(sp); } static PolyWord FromCodePtr(POLYCODEPTR p) { return PolyWord(p); } // Tests for the various cases. bool IsTagged(void) const { return (contents.unsignedInt & 1) != 0; } #ifndef POLYML32IN64 // In native 32-bit and 64-bit addresses are on word boundaries bool IsDataPtr(void) const { return (contents.unsignedInt & (sizeof(PolyWord) - 1)) == 0; } #else // In 32-in-64 addresses are anything that isn't tagged. bool IsDataPtr(void) const { return (contents.unsignedInt & 1) == 0; } #ifdef POLYML32IN64DEBUG static POLYOBJECTPTR AddressToObjectPtr(void *address); #else static POLYOBJECTPTR AddressToObjectPtr(void *address) { return (POLYOBJECTPTR)((PolyWord*)address - globalHeapBase); } #endif #endif // Extract the various cases. POLYSIGNED UnTagged(void) const { return contents.signedInt >> POLY_TAGSHIFT; } POLYUNSIGNED UnTaggedUnsigned(void) const { return contents.unsignedInt >> POLY_TAGSHIFT; } #ifdef POLYML32IN64 PolyWord(POLYOBJPTR p) { contents.objectPtr = AddressToObjectPtr(p); } PolyWord *AsStackAddr(PolyWord *base = globalHeapBase) const { return base + contents.objectPtr; } POLYOBJPTR AsObjPtr(PolyWord *base = globalHeapBase) const { return (POLYOBJPTR)AsStackAddr(base); } #else // An object pointer can become a word directly. PolyWord(POLYOBJPTR p) { contents.objectPtr = p; } POLYOBJPTR AsObjPtr(PolyWord *base = 0) const { return contents.objectPtr; } PolyWord *AsStackAddr(PolyWord *base=0) const { return (PolyWord *)contents.objectPtr; } #endif POLYCODEPTR AsCodePtr(void) const { return (POLYCODEPTR)AsObjPtr(); } void *AsAddress(void)const { return AsCodePtr(); } // There are a few cases where we need to store and extract untagged values static PolyWord FromUnsigned(POLYUNSIGNED u) { return PolyWord(u); } static PolyWord FromSigned(POLYSIGNED s) { return PolyWord(s); } POLYUNSIGNED AsUnsigned(void) const { return contents.unsignedInt; } POLYSIGNED AsSigned(void) const { return contents.signedInt; } protected: PolyWord(POLYSIGNED s) { contents.signedInt = s; } PolyWord(POLYUNSIGNED u) { contents.unsignedInt = u; } public: bool operator == (PolyWord b) const { return contents.unsignedInt == b.contents.unsignedInt; } bool operator != (PolyWord b) const { return contents.unsignedInt != b.contents.unsignedInt; } protected: #ifdef POLYML32IN64 PolyWord(PolyWord *sp) { contents.objectPtr = AddressToObjectPtr(sp); } PolyWord(POLYCODEPTR p) { contents.objectPtr = AddressToObjectPtr(p); } #else PolyWord(PolyWord *sp) { contents.objectPtr = (PolyObject*)sp; } PolyWord(POLYCODEPTR p) { contents.objectPtr = (PolyObject*)p; } #endif union { POLYSIGNED signedInt; // A tagged integer - lowest bit set POLYUNSIGNED unsignedInt; // A tagged integer - lowest bit set POLYOBJECTPTR objectPtr; // Object pointer - lowest bit clear. } contents; }; //typedef PolyWord POLYWORD; inline bool OBJ_IS_AN_INTEGER(const PolyWord & a) { return a.IsTagged(); } inline bool OBJ_IS_DATAPTR(const PolyWord & a) { return a.IsDataPtr(); } // The maximum tagged signed number is one less than 0x80 shifted into the top byte then shifted down // by the tag shift. #define MAXTAGGED (((POLYSIGNED)0x80 << (POLYSIGNED)(8*(sizeof(PolyWord)-1) -POLY_TAGSHIFT)) -1) inline PolyWord TAGGED(POLYSIGNED a) { return PolyWord::TaggedInt(a); } inline POLYSIGNED UNTAGGED(PolyWord a) { return a.UnTagged(); } inline POLYUNSIGNED UNTAGGED_UNSIGNED(PolyWord a) { return a.UnTaggedUnsigned(); } #define IS_INT(x) ((x).IsTagged()) /* length word flags */ #define OBJ_PRIVATE_FLAGS_SHIFT (8 * (sizeof(PolyWord) - 1)) #define _TOP_BYTE(x) ((POLYUNSIGNED)(x) << OBJ_PRIVATE_FLAGS_SHIFT) // Bottom two bits define the content format. // Zero bits mean ordinary word object containing addresses or tagged integers. #define F_BYTE_OBJ 0x01 /* byte object (contains no pointers) */ #define F_CODE_OBJ 0x02 /* code object (mixed bytes and words) */ #define F_CLOSURE_OBJ 0x03 /* closure (32-in-64 only). First word is code addr. */ #define F_GC_MARK 0x04 // Used during the GC marking phase #define F_NO_OVERWRITE 0x08 /* don't overwrite when loading - mutables only. */ // This bit is overloaded and has different meanings depending on what other bits are set. // For byte objects it is the sign bit for arbitrary precision ints. // For other data it indicates either that the object is a profile block or contains // information for allocation profiling. #define F_NEGATIVE_BIT 0x10 // Sign bit for arbitrary precision ints (byte segs only) #define F_PROFILE_BIT 0x10 // Object has a profile pointer (word segs only) #define F_WEAK_BIT 0x20 /* object contains weak references to option values. */ // The Weak bit is only used on mutables. The data sharing (sharedata.cpp) uses this with // immutables to indicate that the length field is being used to store the "depth". #define F_MUTABLE_BIT 0x40 /* object is mutable */ #define F_TOMBSTONE_BIT 0x80 // Object is a forwarding pointer #define F_PRIVATE_FLAGS_MASK 0xFF // Shifted bits #define _OBJ_BYTE_OBJ _TOP_BYTE(F_BYTE_OBJ) /* byte object (contains no pointers) */ #define _OBJ_CODE_OBJ _TOP_BYTE(F_CODE_OBJ) /* code object (mixed bytes and words) */ #define _OBJ_CLOSURE_OBJ _TOP_BYTE(F_CLOSURE_OBJ) // closure (32-in-64 only). First word is code addr. #define _OBJ_GC_MARK _TOP_BYTE(F_GC_MARK) // Mark bit #define _OBJ_NO_OVERWRITE _TOP_BYTE(F_NO_OVERWRITE) /* don't overwrite when loading - mutables only. */ #define _OBJ_NEGATIVE_BIT _TOP_BYTE(F_NEGATIVE_BIT) /* sign bit for arbitrary precision ints */ #define _OBJ_PROFILE_BIT _TOP_BYTE(F_PROFILE_BIT) /* sign bit for arbitrary precision ints */ #define _OBJ_WEAK_BIT _TOP_BYTE(F_WEAK_BIT) #define _OBJ_MUTABLE_BIT _TOP_BYTE(F_MUTABLE_BIT) /* object is mutable */ #define _OBJ_TOMBSTONE_BIT _TOP_BYTE(F_TOMBSTONE_BIT) // object is a tombstone. #define _OBJ_PRIVATE_FLAGS_MASK _TOP_BYTE(F_PRIVATE_FLAGS_MASK) #define _OBJ_PRIVATE_LENGTH_MASK ((-1) ^ _OBJ_PRIVATE_FLAGS_MASK) #define MAX_OBJECT_SIZE _OBJ_PRIVATE_LENGTH_MASK // inline bool OBJ_IS_LENGTH(POLYUNSIGNED L) { return ((L & _OBJ_TOMBSTONE_BIT) == 0); } /* these should only be applied to proper length words */ /* discards GC flag, mutable bit and weak bit. */ inline byte GetTypeBits(POLYUNSIGNED L) { return (byte)(L >> OBJ_PRIVATE_FLAGS_SHIFT) & 0x03; } inline POLYUNSIGNED OBJ_OBJECT_LENGTH(POLYUNSIGNED L) { return L & _OBJ_PRIVATE_LENGTH_MASK; } inline bool OBJ_IS_BYTE_OBJECT(POLYUNSIGNED L) { return (GetTypeBits(L) == F_BYTE_OBJ); } inline bool OBJ_IS_CODE_OBJECT(POLYUNSIGNED L) { return (GetTypeBits(L) == F_CODE_OBJ); } inline bool OBJ_IS_CLOSURE_OBJECT(POLYUNSIGNED L) { return (GetTypeBits(L) == F_CLOSURE_OBJ); } inline bool OBJ_IS_NO_OVERWRITE(POLYUNSIGNED L) { return ((L & _OBJ_NO_OVERWRITE) != 0); } inline bool OBJ_IS_NEGATIVE(POLYUNSIGNED L) { return ((L & _OBJ_NEGATIVE_BIT) != 0); } inline bool OBJ_HAS_PROFILE(POLYUNSIGNED L) { return ((L & _OBJ_PROFILE_BIT) != 0); } inline bool OBJ_IS_MUTABLE_OBJECT(POLYUNSIGNED L) { return ((L & _OBJ_MUTABLE_BIT) != 0); } inline bool OBJ_IS_WEAKREF_OBJECT(POLYUNSIGNED L) { return ((L & _OBJ_WEAK_BIT) != 0); } /* Don't need to worry about whether shift is signed, because OBJ_PRIVATE_USER_FLAGS_MASK removes the sign bit. We don't want the GC bit (which should be 0) anyway. */ #define OBJ_PRIVATE_USER_FLAGS_MASK _TOP_BYTE(0x7F) #define OBJ_IS_WORD_OBJECT(L) (GetTypeBits(L) == 0) /* case 2 - forwarding pointer */ inline bool OBJ_IS_POINTER(POLYUNSIGNED L) { return (L & _OBJ_TOMBSTONE_BIT) != 0; } #ifdef POLYML32IN64 inline PolyObject *OBJ_GET_POINTER(POLYUNSIGNED L) { return (PolyObject*)(globalHeapBase + ((L & ~_OBJ_TOMBSTONE_BIT) << 1)); } inline POLYUNSIGNED OBJ_SET_POINTER(PolyObject *pt) { return PolyWord::AddressToObjectPtr(pt) >> 1 | _OBJ_TOMBSTONE_BIT; } #else inline PolyObject *OBJ_GET_POINTER(POLYUNSIGNED L) { return (PolyObject*)(( L & ~_OBJ_TOMBSTONE_BIT) <<2); } inline POLYUNSIGNED OBJ_SET_POINTER(PolyObject *pt) { return ((POLYUNSIGNED)pt >> 2) | _OBJ_TOMBSTONE_BIT; } #endif // An object i.e. a piece of allocated memory in the heap. In the simplest case this is a // tuple, a list cons cell, a string or a ref. Every object has a length word in the word before // where its address points. The top byte of this contains flags. class PolyObject { public: byte *AsBytePtr(void)const { return (byte*)this; } PolyWord *AsWordPtr(void)const { return (PolyWord*)this; } POLYUNSIGNED LengthWord(void)const { return ((PolyWord*)this)[-1].AsUnsigned(); } POLYUNSIGNED Length(void)const { return OBJ_OBJECT_LENGTH(LengthWord()); } // Get and set a word PolyWord Get(POLYUNSIGNED i) const { return ((PolyWord*)this)[i]; } void Set(POLYUNSIGNED i, PolyWord v) { ((PolyWord*)this)[i] = v; } PolyWord *Offset(POLYUNSIGNED i) const { return ((PolyWord*)this)+i; } // Create a length word from a length and the flags in the top byte. void SetLengthWord(POLYUNSIGNED l, byte f) { ((POLYUNSIGNED*)this)[-1] = l | ((POLYUNSIGNED)f << OBJ_PRIVATE_FLAGS_SHIFT); } void SetLengthWord(POLYUNSIGNED l) { ((PolyWord*)this)[-1] = PolyWord::FromUnsigned(l); } bool IsByteObject(void) const { return OBJ_IS_BYTE_OBJECT(LengthWord()); } bool IsCodeObject(void) const { return OBJ_IS_CODE_OBJECT(LengthWord()); } bool IsClosureObject(void) const { return OBJ_IS_CLOSURE_OBJECT(LengthWord()); } bool IsWordObject(void) const { return OBJ_IS_WORD_OBJECT(LengthWord()); } bool IsMutable(void) const { return OBJ_IS_MUTABLE_OBJECT(LengthWord()); } bool IsWeakRefObject(void) const { return OBJ_IS_WEAKREF_OBJECT(LengthWord()); } bool IsNoOverwriteObject(void) const { return OBJ_IS_NO_OVERWRITE(LengthWord()); } bool ContainsForwardingPtr(void) const { return OBJ_IS_POINTER(LengthWord()); } PolyObject *GetForwardingPtr(void) const { return OBJ_GET_POINTER(LengthWord()); } void SetForwardingPtr(PolyObject *newp) { ((PolyWord*)this)[-1] = PolyWord::FromUnsigned(OBJ_SET_POINTER(newp)); } bool ContainsNormalLengthWord(void) const { return OBJ_IS_LENGTH(LengthWord()); } // Follow a chain of forwarding pointers PolyObject *FollowForwardingChain(void) { if (ContainsForwardingPtr()) return GetForwardingPtr()->FollowForwardingChain(); else return this; } }; // Stacks are native-words size even in 32-in-64. union stackItem { stackItem(PolyWord v) { argValue = v.AsUnsigned(); } stackItem() { argValue = TAGGED(0).AsUnsigned(); } // These return the low order word. PolyWord w()const { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); } operator PolyWord () { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); } POLYCODEPTR codeAddr; // Return addresses stackItem* stackAddr; // Stack addresses uintptr_t argValue; // Treat an address as an int + PolyObject* absAddress; // It could be an absolute address to the heap }; /* There was a problem with version 2.95 on Sparc/Solaris at least. The PolyObject class has no members so classes derived from it e.g. ML_Cons_Cell should begin at the beginning of the object. Later versions of GCC get this right. */ #if defined(__GNUC__) && (__GNUC__ <= 2) #error Poly/ML requires GCC version 3 or newer #endif inline POLYUNSIGNED GetLengthWord(PolyWord p) { return p.AsObjPtr()->LengthWord(); } // Get the length of an object. inline POLYUNSIGNED OBJECT_LENGTH(PolyWord p) { return OBJ_OBJECT_LENGTH(GetLengthWord(p)); } // A list cell. This can be passed to or returned from certain RTS functions. class ML_Cons_Cell: public PolyObject { public: PolyWord h; PolyWord t; #define ListNull (TAGGED(0)) static bool IsNull(PolyWord p) { return p == ListNull; } }; /* An exception packet. This contains an identifier (either a tagged integer for RTS exceptions or the address of a mutable for those created within ML), a string name for printing and an exception argument value. */ class PolyException: public PolyObject { public: PolyWord ex_id; /* Exc identifier */ PolyWord ex_name;/* Exc name */ PolyWord arg; /* Exc arguments */ PolyWord ex_location; // Location of "raise". Always zero for RTS exceptions. }; typedef PolyException poly_exn; /* Macro to round a number of bytes up to a number of words. */ #define WORDS(s) ((s+sizeof(PolyWord)-1)/sizeof(PolyWord)) /********************************************************************** * * Representation of option type. * **********************************************************************/ #define NONE_VALUE (TAGGED(0)) /* SOME x is represented by a single word cell containing x. */ #if (defined(_WIN32)) /* Windows doesn't include 0x in %p format. */ #define ZERO_X "0x" #else #define ZERO_X "" #endif // ARM instructions are always little-endian even in big-endian mode #ifdef WORDS_BIGENDIAN inline uint32_t reverseBytes32(uint32_t value) { return (((value & 0x000000ff) << 24) | ((value & 0x0000ff00) << 8) | ((value & 0x00ff0000) >> 8) | ((value & 0xff000000) >> 24)); } inline uint32_t fromARMInstr(uint32_t instr) { return reverseBytes32(instr); } inline uint32_t toARMInstr(uint32_t instr) { return reverseBytes32(instr); } #else inline uint32_t fromARMInstr(uint32_t instr) { return instr; } inline uint32_t toARMInstr(uint32_t instr) { return instr; } #endif #endif diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig index f4ca93a4..8b7e57e3 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig @@ -1,438 +1,438 @@ (* Signature for the high-level ARM64 code - Copyright David C. J. Matthews 2021 + Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ICODE = sig type machineWord = Address.machineWord type address = Address.address type closureRef (* Registers. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word (* It is simpler to use a single type for all registers. *) datatype reg = GenReg of xReg | FPReg of vReg val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg val is32in64: bool and isBigEndian: bool (* Condition for conditional branches etc. *) datatype condition = CondEqual (* Z=1 *) | CondNotEqual (* Z=0 *) | CondCarrySet (* C=1 *) | CondCarryClear (* C=0 *) | CondNegative (* N=1 *) | CondPositive (* N=0 imcludes zero *) | CondOverflow (* V=1 *) | CondNoOverflow (* V=0 *) | CondUnsignedHigher (* C=1 && Z=0 *) | CondUnsignedLowOrEq (* ! (C=1 && Z=0) *) | CondSignedGreaterEq (* N=V *) | CondSignedLess (* N<>V *) | CondSignedGreater (* Z==0 && N=V *) | CondSignedLessEq (* !(Z==0 && N=V) *) (* The shift used in arithemtic operations. *) and shiftType = ShiftLSL of Word8.word | ShiftLSR of Word8.word | ShiftASR of Word8.word | ShiftNone datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and callKind = Recursive | ConstantCode of machineWord | FullCall and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The result is stored in the destination register. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dest: 'genReg, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, stackAdjust: int, currStackSize: int } (* Return from the function. resultReg is the preg that contains the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of { resultReg: 'genReg, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } (* Debugging - fault if values don't match. *) | CacheCheck of { arg1: 'genReg, arg2: 'genReg } (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } (* Return the successor blocks from a control flow. *) val successorBlocks: controlFlow -> int list type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock val printICodeAbstract: basicBlockAbstract vector * (string -> unit) -> unit and printICodeConcrete: basicBlockConcrete vector * (string -> unit) -> unit (* Check whether this value is acceptable for LogicalImmediate. *) val isEncodableBitPattern: Word64.word * opSize -> bool (* This generates a BitField instruction with the appropriate values for immr and imms. *) val shiftConstant: { direction: shiftDirection, dest: preg, source: preg, shift: word, opSize: opSize } -> iCodeAbstract structure Sharing: sig type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML index 2a783a5e..26faaf83 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML @@ -1,1094 +1,1113 @@ (* Copyright David C. J. Matthews 2016-21 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64AllocateRegisters( structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ALLOCATEREGISTERS = struct open Arm64ICode open Identify open IntSet open Address exception InternalError = Misc.InternalError + val checkCache = true (* Check the cache rather than use it *) + datatype allocateResult = AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list (* General registers. X24 is used as the global heap base in 32-in-64. X30 is the return address set by blr but is otherwise a general register. Put the argument registers at the end of the list so they'll only be used when hinted. *) val generalRegisters = map GenReg ([X9, X10, X11, X12, X13, X14, X15, X19, X20, X21, X22, X23, X0, X1, X2, X3, X4, X5, X6, X7, X8, X30] @ (if is32in64 then [] else [X24])) val floatingPtRegisters = map FPReg [V7, V6, V5, V4, V3, V2, V1] type conflictState = { conflicts: intSet, realConflicts: reg list } type triple = {instr: iCodeAbstract, current: intSet, active: intSet} exception InternalError = Misc.InternalError (* Get the conflict states, allocate registers and return the code with the allocated registers if it is successful. *) fun allocateRegisters{blocks, regProps, maxPRegs, ...} = let (* Other registers that conflict with this i.e. cannot share the same real register. *) val regConflicts = Array.array(maxPRegs, emptySet) (* Real registers that cannot be used for this because they are needed for an instruction. Only X30 in calls and RTS traps. *) and regRealConflicts = Array.array(maxPRegs, []: reg list) fun addConflictsTo(addTo, conflicts) = List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo (* To reserve a register we need to add the real register to the real conflict sets of all the abstract conflicts. *) local fun isInset reg set = List.exists (fn r => r = reg) set in fun reserveRegister(reserveFor, reg) = let fun reserveAReg r = let val absConflicts = Array.sub(regConflicts, r) fun addConflict i = if isInset i reserveFor then () else addRealConflict (i, reg) in List.app addConflict (setToList absConflicts) end in List.app reserveAReg reserveFor end and addRealConflict (i, reg) = let val currentConflicts = Array.sub(regRealConflicts, i) in if isInset reg currentConflicts then () else Array.update(regRealConflicts, i, reg :: currentConflicts) end end fun conflictsForInstr passThrough {instr, current, ...} = let val {sources, dests} = getInstructionRegisters instr fun regNo(PReg i) = i val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos val afterRemoveDests = minus(current, destSet) local (* In almost all circumstances the destination and sources don't conflict and the same register can be used as a destination and a source. BoxLarge can only store the value after the memory has been allocated. BitFieldInsert has to copy the "destAsSource" value into the destination so cannot use the same register for the "source". *) val postInstruction = case instr of BoxLarge _ => destRegNos @ sourceRegNos | BoxTagFloat _ => destRegNos @ sourceRegNos (* Not sure about this. *) | BitFieldInsert{source, ...} => regNo source :: destRegNos | _ => destRegNos in (* If there is more than one destination they conflict with each other. *) val () = addConflictsTo(postInstruction, listToSet postInstruction); (* Mark conflicts for the destinations, i.e. after the instruction. The destinations conflict with the registers that are used subsequently. *) val () = addConflictsTo(postInstruction, current); val () = addConflictsTo(postInstruction, passThrough); (* Mark conflicts for the sources i.e. before the instruction. *) (* Sources must be set up as conflicts with each other i.e. when we come to allocate registers we must choose different real registers for different abstract registers. *) val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos) val () = addConflictsTo(sourceRegNos, afterRemoveDests); val () = addConflictsTo(sourceRegNos, passThrough) end (* I'm not sure if this is needed. There was a check in the old code to ensure that different registers were used for loop variables even if they were actually unused. This may happen anyway. Comment and code copied from X86 version. Retain it for the moment. *) val () = case instr of JumpLoop{regArgs, ...} => let val destRegs = List.foldl(fn ({dst=PReg loopReg, ...}, dests) => loopReg :: dests) [] regArgs in addConflictsTo(destRegs, listToSet destRegs); addConflictsTo(destRegs, current); addConflictsTo(destRegs, passThrough) end | _ => () (* Certain instructions are specific as to the real registers. *) val () = case instr of ReturnResultFromFunction{ returnReg=PReg retReg, ... } => (* We're going to put the return value in X0 so we can't use that for the return address. *) addRealConflict(retReg, GenReg X0) | RaiseExceptionPacket{ packetReg } => (* This wasn't needed previously because we always pushed the registers across an exception. *) reserveRegister([regNo packetReg], GenReg X0) | BeginHandler { packetReg, ...} => reserveRegister([regNo packetReg], GenReg X0) | FunctionCall { dest, regArgs, ...} => (* This is only needed if we are saving the registers rather than marking them as "must push". *) let val destReg = regNo dest in reserveRegister([destReg], GenReg X0); (* The argument registers also conflict. In order to execute this call we need to load the arguments into specific registers so we can't use them for values that we want after the call. We use regNo dest here because that will conflict with everything immediately afterwards. *) List.app(fn (_, r) => reserveRegister([destReg], GenReg r)) regArgs; (* Likewise X30 since that's the return address. *) addRealConflict(destReg, GenReg X30) end (* We can't use X30 as the result because it's needed for the return addr if we have to GC. *) | AllocateMemoryFixed{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | AllocateMemoryVariable{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | BoxLarge{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | BoxTagFloat{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) (* Could exclude floats on native addr. *) | _ => () in () end (* Process the block. *) fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) = let (* We need to establish conflicts between all the registers active at the end of the block since they may not be established elsewhere. This isn't necessary for an unconditional branch since the same registers will be included in the block that is the target of the branch, possibly along with others. However if this is a conditional or indexed branch we may have different sets at each of the targets and we have to ensure that all the registers differ. *) val united = union(exports, passThrough) val () = addConflictsTo(setToList united, united) val () = List.app (conflictsForInstr passThrough) block in () end val () = Vector.app conflictsForBlock blocks (* Hint values. The idea of hints is that by using a hinted register we may avoid an unnecessary move instruction. realHints is set when a pseudo-register is going to be loaded from a specific register e.g. a register argument, or moved into one e.g. X0 for the function result. friends is set to the other pReg that may be associated with the pReg. Typically this is where we have a merge register that we move some value into. *) val realHints = Array.array(maxPRegs, NONE: reg option) (* Sources and destinations. These indicate the registers that are the sources and destinations of the indexing register and are used as hints. If a register has been allocated for a source or destination we may be able to reuse it. *) val sourceRegs = Array.array(maxPRegs, []: int list) and destinationRegs = Array.array(maxPRegs, []: int list) local (* Real hints. If this is the source of a value e.g. a function argument in a register, we'll use it directly. If, though, this is the result of a function and we want the result to end up in a specific register we want to propagate it to any pReg that moves its value into this. *) fun addRealHint(r, reg) = case Array.sub(realHints, r) of SOME _ => () | NONE => ( (* Add to this pReg *) Array.update(realHints, r, SOME reg); (* and to any other pReg that moves here. *) List.app(fn r => addRealHint(r, reg)) (Array.sub(sourceRegs, r)) ) fun addSourceAndDestinationHint{src, dst} = let val conflicts = Array.sub(regConflicts, src) in (* If they conflict we can't add them. *) if member(dst, conflicts) then () else let val currentDests = Array.sub(destinationRegs, src) val currentSources = Array.sub(sourceRegs, dst) in (* Add the destination for this source i.e. the registers we move this source into. *) if List.exists(fn i => i=dst) currentDests then () else Array.update(destinationRegs, src, dst :: currentDests); (* Add the source to the list of sources for this destination. A merge register may have several sources, a different one for each path. If the destination has a real hint we want to propagate that back. That isn't needed for the destinations because we allocate the registers from the start forward. *) if List.exists(fn i => i=src) currentSources then () else let val sources = src :: currentSources val () = Array.update(sourceRegs, dst, sources) in case Array.sub(realHints, dst) of NONE => () | SOME real => List.app(fn r => addRealHint(r, real)) sources end end end (* Add the hints to steer the register allocation. The idea is to avoid moves between registers by getting values into the appropriate register in advance. We don't actually need to add real hints where the real register is providing the value, e.g. BeginFunction, because the allocation process will take care of that. *) fun addHints{instr=MoveRegister{source=PReg sreg, dest=PReg dreg, ...}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints{instr=BitFieldInsert{destAsSource=PReg dsReg, dest=PReg dReg, ...}, ...} = (* The "destAsSource" is the destination if some bits are retained. *) addSourceAndDestinationHint {src=dsReg, dst=dReg} | addHints{instr=ReturnResultFromFunction { resultReg=PReg resReg, ... }, ...} = addRealHint(resReg, GenReg X0) | addHints{instr=JumpLoop{regArgs, ...}, ...} = let fun addRegArg {src=ArgInReg(PReg argReg), dst=PReg resReg} = addSourceAndDestinationHint {dst=resReg, src=argReg} | addRegArg {src=ArgOnStack _, ...} = () in List.app addRegArg regArgs end | addHints{instr=BeginFunction{regArgs, ...}, ...} = List.app (fn (PReg pr, reg) => addRealHint(pr, GenReg reg)) regArgs | addHints{instr=TailRecursiveCall{regArgs, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app setHint regArgs end | addHints{instr=FunctionCall{regArgs, dest=PReg dreg, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in addRealHint(dreg, GenReg X0); List.app setHint regArgs end (* Exception packets are in X0 *) | addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints{instr=BeginHandler{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints _ = () in val () = Vector.app(fn ExtendedBasicBlock { block, ...} => List.app addHints block) blocks end val allocatedRegs = Array.array(maxPRegs, NONE: reg option) val failures = ref []: intSet list ref (* Find a real register for a preg. 1. If a register is already allocated use that. 2. Try the "preferred" register if one has been given. 3. Try the realHints value if there is one. 4. See if there is a "friend" that has an appropriate register 5. Look at all the registers and find one. *) fun findRegister(r, pref, regSet, cache) = case Array.sub(allocatedRegs, r) of SOME reg => reg | NONE => let val conflicts = Array.sub(regConflicts, r) and realConflicts = Array.sub(regRealConflicts, r) (* Find the registers we've already allocated that may conflict. *) val conflictingRegs = List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @ realConflicts fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs) fun tryAReg NONE = NONE | tryAReg (somePref as SOME prefReg) = if isFree prefReg then (Array.update(allocatedRegs, r, somePref); somePref) else NONE (* Search the sources and destinations to see if a register has already been allocated or there is a hint. *) fun findAFriend([], [], _) = NONE | findAFriend(aDest :: otherDests, sources, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aDest) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aDest)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the destinations of the destinations to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest)) in findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen) end end | findAFriend([], aSrc :: otherSrcs, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aSrc) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aSrc)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the sources of the sources to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc)) in findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen) end end in case tryAReg pref of SOME r => r | NONE => ( case tryAReg (Array.sub(realHints, r)) of SOME r => r | NONE => ( case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of SOME r => r (* Look through the registers to find one that's free. First try excluding the cache registers. *) | NONE => let (* First try filtering all the cache registers to see if we can find a register. If not see if it works by *) fun filterCache(filteredRegset, []) = List.find isFree filteredRegset | filterCache(filteredRegset, (cReg, _) :: cache) = ( case filterCache(List.filter(fn r => r <> cReg) filteredRegset, cache) of NONE => if isFree cReg then SOME cReg else NONE | result => result ) val pick = case filterCache(regSet, cache) of SOME reg => reg | NONE => ( (* This failed. We're going to have to spill something. *) failures := conflicts :: ! failures; hd regSet (* Return something to allow this pass to complete *) ) val () = Array.update(allocatedRegs, r, SOME pick) in pick end ) ) end (* Turn the abstract icode into a concrete version by allocating the registers. *) local + fun asGenReg(GenReg reg) = reg | asGenReg _ = raise InternalError "asGenReg" + and asFPReg(FPReg reg) = reg | asFPReg _ = raise InternalError "asFPReg" + datatype cacheItem = CacheStack of stackLocn | CacheObjectAddress of preg (* Remove any reference to newly allocated registers from the cache. Also used after block move and comparison that modify registers *) fun pruneCache(reg: reg, cache) = List.filter(fn (r, _) => r <> reg) cache + (* Return the cache registers that contain valid addresses. *) + fun cachedAddressRegs cache = List.map (asGenReg o #1) cache + fun allocateNewDestination(PReg r, pref, regSet, cacheList) = case Array.sub(allocatedRegs, r) of SOME reg => ( case Vector.sub(regProps, r) of RegPropMultiple => (reg, pruneCache(reg, cacheList)) (* This is allowed for merge registers *) | _ => raise InternalError "Register defined at multiple points" ) | NONE => let val reg = findRegister(r, pref, regSet, cacheList) in (reg, pruneCache(reg, cacheList)) end - fun asGenReg(GenReg reg) = reg | asGenReg _ = raise InternalError "asGenReg" - and asFPReg(FPReg reg) = reg | asFPReg _ = raise InternalError "asFPReg" - fun allocateGenReg(r, cache) = let val (reg, newCache) = allocateNewDestination(r, NONE, generalRegisters, cache) in (asGenReg reg, newCache) end and allocateFPReg(r, cache) = let val (reg, newCache) = allocateNewDestination(r, NONE, floatingPtRegisters, cache) in (asFPReg reg, newCache) end and allocateGenRegOrZero(ZeroReg, cache) = (XZero, cache) | allocateGenRegOrZero(SomeReg reg, cache) = allocateGenReg(reg, cache) fun getAllocatedGenReg(PReg r) = case Array.sub(allocatedRegs, r) of SOME(GenReg reg) => reg | _ => raise InternalError "getAllocatedGenReg" and getAllocatedFPReg(PReg r) = case Array.sub(allocatedRegs, r) of SOME(FPReg reg) => reg | _ => raise InternalError "getAllocatedFPReg" fun getAllocatedGenRegOrZero ZeroReg = XZero | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg fun getAllocatedArg(ArgInReg reg) = ArgInReg(getAllocatedGenReg reg) | getAllocatedArg(ArgOnStack stackLoc) = ArgOnStack stackLoc val getSaveRegs = List.map getAllocatedGenReg (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl (*and snd <@> fst = fst @ snd*) fun absToConcrete([], context, code) = (context, code) | absToConcrete({instr=MoveRegister{ source, dest}, ...} :: rest, cache, code) = let (* Try to use the register we've allocated for the source as the destination so that we can eliminate this instruction altogether. *) val sourceReg = getAllocatedGenReg source val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg sourceReg), generalRegisters, cache) val dReg = asGenReg destReg in if sourceReg = dReg then absToConcrete(rest, newCache, code) else absToConcrete(rest, newCache, code <::> MoveRegister { source=sourceReg, dest=dReg}) end | absToConcrete({instr=LoadNonAddressConstant { dest, source}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadNonAddressConstant { dest=destReg, source=source}) end | absToConcrete({instr=LoadAddressConstant { dest, source}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadAddressConstant { dest=destReg, source=source}) end | absToConcrete({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, loadType=loadType}) end | absToConcrete({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadFPWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, floatSize=floatSize}) end | absToConcrete({instr=LoadWithIndexedOffset { base, dest, index, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index, loadType=loadType}) end | absToConcrete({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadFPWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index, floatSize=floatSize}) end | absToConcrete({instr=GetThreadId { dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> GetThreadId { dest=destReg}) end | absToConcrete({instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest}, kill, ...} :: rest, cache, code) = let (* See if this is in the cache and use it if it is. If this is the last reference to this source entry we don't want it in the cache any longer. *) val killThis = member(srcNo, kill) val (newCode, destReg, newCache) = case List.find(fn (_, CacheObjectAddress c) => c=source | _ => false) cache of SOME (srcReg, _) => + if checkCache + then let val (destReg, newCache) = allocateGenReg(dest, cache) in (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg} <::> CacheCheck{ arg1=destReg, arg2=asGenReg srcReg }, GenReg destReg, newCache) end -(* let + else + let (* Try to use the cache register as the destination if we can. *) val (destReg, newCache) = allocateNewDestination(dest, SOME srcReg, generalRegisters, cache) val dReg = asGenReg destReg and sReg = asGenReg srcReg in if dReg = sReg then (code, destReg, newCache) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg, if killThis then pruneCache(srcReg, newCache) else newCache) - end*) + end + | NONE => let val (destReg, newCache) = allocateGenReg(dest, cache) in (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, GenReg destReg, newCache) end in absToConcrete(rest, if killThis then newCache else (destReg, CacheObjectAddress source) :: newCache, newCode) end | absToConcrete({instr=AbsoluteToObjectIndex { source, dest}, ...} :: rest, cache, code) = let (* Don't make an entry in the cache for this; it won't be used. *) val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> AbsoluteToObjectIndex { source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=AllocateMemoryFixed { bytesRequired, dest, saveRegs }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) val filteredCache = pruneCache(GenReg X30, newCache) + val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> - AllocateMemoryFixed { dest=destReg, bytesRequired=bytesRequired, saveRegs=getSaveRegs saveRegs}) + AllocateMemoryFixed { dest=destReg, bytesRequired=bytesRequired, saveRegs=saved}) end | absToConcrete({instr=AllocateMemoryVariable{size, dest, saveRegs}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) val filteredCache = pruneCache(GenReg X30, newCache) + val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> - AllocateMemoryVariable{size=getAllocatedGenReg size, dest=destReg, saveRegs=getSaveRegs saveRegs}) + AllocateMemoryVariable{size=getAllocatedGenReg size, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=InitialiseMem{size, addr, init}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> InitialiseMem{size=getAllocatedGenReg size, addr=getAllocatedGenReg addr, init=getAllocatedGenReg init}) | absToConcrete({instr=BeginLoop, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> BeginLoop) | absToConcrete({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...} :: rest, context, code) = let fun getStackArg{src, wordOffset, stackloc} = {src=getAllocatedArg src, wordOffset=wordOffset, stackloc=stackloc} and getRegArg{src, dst} = {src=getAllocatedArg src, dst=getAllocatedGenReg dst} in absToConcrete(rest, context, code <::> JumpLoop{ regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, checkInterrupt=Option.map getSaveRegs checkInterrupt}) end | absToConcrete({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, byteOffset=byteOffset, loadType=loadType}) | absToConcrete({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreFPWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, byteOffset=byteOffset, floatSize=floatSize}) | absToConcrete({instr=StoreWithIndexedOffset { base, source, index, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, index=getAllocatedGenReg index, loadType=loadType}) | absToConcrete({instr=StoreFPWithIndexedOffset { base, source, index, floatSize}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreFPWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, index=getAllocatedGenReg index, floatSize=floatSize}) | absToConcrete({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, cache) in absToConcrete(rest, newCache, code <::> AddSubImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length}) end | absToConcrete({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, cache) in absToConcrete(rest, newCache, code <::> AddSubRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=destReg, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift}) end | absToConcrete({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, cache) in absToConcrete(rest, newCache, code <::> LogicalImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef, immed=immed, logOp=logOp, length=length}) end | absToConcrete({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, cache) in absToConcrete(rest, newCache, code <::> LogicalRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=destReg, ccRef=ccRef, logOp=logOp, length=length, shift=shift}) end | absToConcrete({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> ShiftRegister { source=getAllocatedGenReg source, shift=getAllocatedGenReg shift, dest=destReg, direction=direction, opSize=opSize}) end | absToConcrete({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> Multiplication { kind=kind, sourceA=getAllocatedGenRegOrZero sourceA, sourceM=getAllocatedGenReg sourceM, sourceN=getAllocatedGenReg sourceN, dest=destReg}) end | absToConcrete({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> Division { isSigned=isSigned, dividend=getAllocatedGenReg dividend, divisor=getAllocatedGenReg divisor, dest=destReg, opSize=opSize}) end | absToConcrete({instr=BeginFunction {regArgs, stackArgs}, ...} :: rest, _, code) = let (* Allocate the register arguments. At this point all the registers are free and the cache is empty. However we may have a "real conflict" that means that the allocated register is different. e.g. we need this argument some time after an arbitrary precision operation that may call a function. *) fun allocReg(src, dst) = let val (destReg, _) = allocateNewDestination(src, SOME(GenReg dst), generalRegisters, []) in (asGenReg destReg, dst) end in absToConcrete(rest, [], code <::> BeginFunction {regArgs=map allocReg regArgs, stackArgs=stackArgs}) end | absToConcrete({instr=FunctionCall{callKind, regArgs, stackArgs, dest, containers, saveRegs, ...}, ...} :: rest, _, code) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) val (destReg, _) = allocateGenReg(dest, []) - (* Currently we empty the cache at this point. At the very least we need to remove - any registers that could be modified e.g. X0 and X30. *) + (* Currently we empty the cache at this point. Saving the resgisters across + the call would probably be more expensive than reloading them. *) in absToConcrete(rest, [] (* Empty after a function call. *), code <::> FunctionCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getAllocatedArg stackArgs, dest=destReg, saveRegs=getSaveRegs saveRegs, containers=containers}) end | absToConcrete({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, ...} :: rest, context, code) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) and getStackArg{src, stack} = {src=getAllocatedArg src, stack=stack} in absToConcrete(rest, context, code <::> TailRecursiveCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, stackAdjust=stackAdjust, currStackSize=currStackSize}) end | absToConcrete({instr=ReturnResultFromFunction{resultReg, returnReg, numStackArgs}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> ReturnResultFromFunction{resultReg=getAllocatedGenReg resultReg, returnReg=getAllocatedGenReg returnReg, numStackArgs=numStackArgs}) | absToConcrete({instr=RaiseExceptionPacket{packetReg}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> RaiseExceptionPacket{packetReg=getAllocatedGenReg packetReg}) | absToConcrete({instr=PushToStack{ source, container, copies }, ...} :: rest, cache, code) = let val srcReg = getAllocatedGenReg source val newCache = (GenReg srcReg, CacheStack container) :: cache in absToConcrete(rest, newCache, code <::> PushToStack{source=srcReg, container=container, copies=copies}) end | absToConcrete({instr=LoadStack{ dest, container as StackLoc{rno, ...} , field=0, wordOffset}, kill, ...} :: rest, cache, code) = let (* See if this is in the cache and use it if it is. If this is the last reference to this stack entry we don't want it in the cache any longer. *) val killThis = member(rno, kill) val (newCode, destReg, newCache) = case List.find(fn (_, CacheStack c) => c=container | _ => false) cache of SOME (srcReg, _) => + if checkCache + then let val (destReg, newCache) = allocateGenReg(dest, cache) in (code <::> LoadStack{ dest=destReg, container=container, field=0, wordOffset=wordOffset } <::> CacheCheck{ arg1=destReg, arg2=asGenReg srcReg }, GenReg destReg, newCache) end - (* + else let (* Try to use the cache register as the destination if we can. *) val (destReg, newCache) = allocateNewDestination(dest, SOME srcReg, generalRegisters, cache) val dReg = asGenReg destReg and sReg = asGenReg srcReg in if dReg = sReg then (code, destReg, newCache) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg, if killThis then pruneCache(srcReg, newCache) else newCache) - end*) + end + | NONE => let val (destReg, newCache) = allocateGenReg(dest, cache) in (code <::> LoadStack{ dest=destReg, container=container, field=0, wordOffset=wordOffset }, GenReg destReg, newCache) end in absToConcrete(rest, if killThis then newCache else (destReg, CacheStack container) :: newCache, newCode) end | absToConcrete({instr=LoadStack{ dest, container, field, wordOffset}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadStack{ dest=destReg, container=container, field=field, wordOffset=wordOffset }) end | absToConcrete({instr=StoreToStack{source, container, field, stackOffset}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreToStack{source=getAllocatedGenReg source, container=container, field=field, stackOffset=stackOffset}) | absToConcrete({instr=ContainerAddress{ dest, container, stackOffset}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> ContainerAddress{ dest=destReg, container=container, stackOffset=stackOffset }) end | absToConcrete({instr=ResetStackPtr {numWords}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> ResetStackPtr {numWords=numWords}) | absToConcrete({instr=TagValue{source, dest, isSigned, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> TagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize}) end | absToConcrete({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> UntagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize}) end | absToConcrete({instr=BoxLarge{source, dest, saveRegs, ...}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) val filteredCache = pruneCache(GenReg X30, newCache) + val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> - BoxLarge{source=getAllocatedGenReg source, dest=destReg, saveRegs=getSaveRegs saveRegs}) + BoxLarge{source=getAllocatedGenReg source, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=UnboxLarge{source, dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> UnboxLarge{source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=BoxTagFloat{floatSize, source, dest, saveRegs}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) val filteredCache = pruneCache(GenReg X30, newCache) + val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in - absToConcrete(rest, filteredCache, code <::> BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, - dest=destReg, saveRegs=getSaveRegs saveRegs}) + absToConcrete(rest, filteredCache, code <::> + BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=UnboxTagFloat{floatSize, source, dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, cache) in absToConcrete(rest, newCache, code <::> UnboxTagFloat{floatSize=floatSize, source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=LoadAcquire { base, dest, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadAcquire { base=getAllocatedGenReg base, dest=destReg, loadType=loadType}) end | absToConcrete({instr=StoreRelease { base, source, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreRelease{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, loadType=loadType}) | absToConcrete({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> BitFieldShift { source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, immr=immr, imms=imms, length=length}) end | absToConcrete({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...} :: rest, cache, code) = let val destAsSourceReg = getAllocatedGenReg destAsSource val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg destAsSourceReg), generalRegisters, cache) in absToConcrete(rest, newCache, code <::> BitFieldInsert { source=getAllocatedGenReg source, destAsSource=destAsSourceReg, dest=asGenReg destReg, immr=immr, imms=imms, length=length}) end | absToConcrete({instr=IndexedCaseOperation{testReg}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> IndexedCaseOperation{testReg=getAllocatedGenReg testReg}) | absToConcrete({instr=PushExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PushExceptionHandler) | absToConcrete({instr=PopExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PopExceptionHandler) | absToConcrete({instr=BeginHandler{packetReg}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(packetReg, cache) in absToConcrete(rest, newCache, code <::> BeginHandler{packetReg=destReg}) end | absToConcrete({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...} :: rest, cache, code) = let (* This instruction modifies these registers so they must be removed from the cache *) val vec1Reg = getAllocatedGenReg vec1Addr and vec2Reg = getAllocatedGenReg vec2Addr and lenReg = getAllocatedGenReg length val newCache = pruneCache(GenReg vec1Reg, pruneCache(GenReg vec2Reg, pruneCache(GenReg lenReg, cache))) in absToConcrete(rest, newCache, code <::> CompareByteVectors{vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lenReg, ccRef=ccRef}) end | absToConcrete({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...} :: rest, cache, code) = let (* This instruction modifies these registers so they must be removed from the cache *) val srcAReg = getAllocatedGenReg srcAddr and dstAReg = getAllocatedGenReg destAddr and lenReg = getAllocatedGenReg length val newCache = pruneCache(GenReg srcAReg, pruneCache(GenReg dstAReg, pruneCache(GenReg lenReg, cache))) in absToConcrete(rest, newCache, code <::> BlockMove{srcAddr=srcAReg, destAddr=dstAReg, length=lenReg, isByteMove=isByteMove}) end | absToConcrete({instr=AddSubXSP{source, dest, isAdd}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, cache) in absToConcrete(rest, newCache, code <::> AddSubXSP { source=getAllocatedGenReg source, dest=destReg, isAdd=isAdd}) end | absToConcrete({instr=TouchValue{source}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> TouchValue { source=getAllocatedGenReg source}) | absToConcrete({instr=LoadAcquireExclusive{ base, dest }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> LoadAcquireExclusive { base=getAllocatedGenReg base, dest=destReg}) end | absToConcrete({instr=StoreReleaseExclusive{ base, source, result }, ...} :: rest, cache, code) = let val (resultReg, newCache) = allocateGenReg(result, cache) in absToConcrete(rest, newCache, code <::> StoreReleaseExclusive{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source, result=resultReg}) end | absToConcrete({instr=MemoryBarrier, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> MemoryBarrier) | absToConcrete({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, cache) in absToConcrete(rest, newCache, code <::> ConvertIntToFloat{ source=getAllocatedGenReg source, dest=destReg, srcSize=srcSize, destSize=destSize}) end | absToConcrete({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, cache) in absToConcrete(rest, newCache, code <::> ConvertFloatToInt{ source=getAllocatedFPReg source, dest=destReg, srcSize=srcSize, destSize=destSize, rounding=rounding}) end | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, cache) in absToConcrete(rest, newCache, code <::> UnaryFloatingPt{ source=getAllocatedFPReg source, dest=destReg, fpOp=fpOp}) end | absToConcrete({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, cache) in absToConcrete(rest, newCache, code <::> BinaryFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, dest=destReg, fpOp=fpOp, opSize=opSize}) end | absToConcrete({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> CompareFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, opSize=opSize, ccRef=ccRef}) + | absToConcrete({instr=CacheCheck _, ...} :: _, _, _) = (* Concrete only. *) + raise InternalError "absToConcrete: CheckCache" + in fun concreteBlock(ExtendedBasicBlock{ block, flow, ...}) = let val (_, code) = absToConcrete(block, [], []) in BasicBlock{block=List.rev code, flow=flow} end end val numBlocks = Vector.length blocks val resultArray = Array.array(numBlocks, NONE) fun processBlocks blockNo = case Array.sub(resultArray, blockNo) of SOME _ => () (* Done . *) | NONE => let val thisBlock as ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, blockNo) in (* Process this block and add it to the results. *) Array.update(resultArray, blockNo, SOME(concreteBlock thisBlock)); (* Now the blocks that depend on this. *) case flow of ExitCode => () | Unconditional m => processBlocks m | Conditional {trueJump, falseJump, ...} => (processBlocks trueJump; processBlocks falseJump) | IndexedBr cases => List.app processBlocks cases | SetHandler{ handler, continue } => (processBlocks handler; processBlocks continue) | UnconditionalHandle _ => () | ConditionalHandle { continue, ...} => processBlocks continue end in processBlocks 0; (* If the failures list is empty we succeeded. *) case !failures of [] => (* Return the allocation vector. We may have unused registers, *) AllocateSuccess(Vector.tabulate(numBlocks, fn i => valOf(Array.sub(resultArray, i))) ) (* Else we'll have to spill something. *) | l => AllocateFailure l end val nGenRegs = List.length generalRegisters structure Sharing = struct type intSet = intSet and extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg and xReg = xReg and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML index b36d4681..b75c6257 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML @@ -1,973 +1,976 @@ (* - Copyright David C. J. Matthews 2021 + Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICode( structure Arm64Code: ARM64PREASSEMBLY ): ARM64ICODE = struct open Arm64Code open Address datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype reg = GenReg of xReg | FPReg of vReg datatype callKind = Recursive | ConstantCode of machineWord | FullCall (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The result is stored in the destination register. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dest: 'genReg, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, stackAdjust: int, currStackSize: int } (* Return from the function. resultReg is the preg that contains the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of { resultReg: 'genReg, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } (* Debugging - fault if values don't match. *) | CacheCheck of { arg1: 'genReg, arg2: 'genReg } (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock (* Return the list of blocks that are the immediate successor of this. *) fun successorBlocks(Unconditional l) = [l] | successorBlocks(Conditional{trueJump, falseJump, ...}) = [trueJump, falseJump] | successorBlocks ExitCode = [] | successorBlocks(IndexedBr cases) = cases | successorBlocks(SetHandler{handler, continue, ...}) = [handler, continue] (* We only need "handler" in SetHandler because we may have a handler that is never actually jumped to. *) | successorBlocks(UnconditionalHandle handler) = [handler] | successorBlocks(ConditionalHandle{handler, continue, ...}) = [handler, continue] local fun printCC(CcRef ccRef, stream) = stream ("CC" ^ Int.toString ccRef) fun printStackLoc(StackLoc{size, rno}, stream) = (stream "S"; stream(Int.toString rno); stream "("; stream(Int.toString size); stream ")") fun regRepr(XReg w) = "X" ^ Int.toString(Word8.toInt w) | regRepr XZero = "XZ" | regRepr XSP = "SP" fun arithRepr OpSize64 = "64" | arithRepr OpSize32 = "32" fun printLoadType(Load64, stream) = stream "64" | printLoadType(Load32, stream) = stream "32" | printLoadType(Load16, stream) = stream "16" | printLoadType(Load8, stream) = stream "8" fun printSaves([], _, _) = () | printSaves([areg], _, printReg) = printReg areg | printSaves(areg::more, stream, printReg) = (printReg areg; stream ","; printSaves(more, stream, printReg)) fun printArg(ArgInReg reg, _, printReg) = printReg reg | printArg(ArgOnStack{wordOffset, container, field, ...}, stream, _) = ( printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")" ) fun printShift(ShiftLSL w, stream) = stream(" LSL " ^ Word8.toString w) | printShift(ShiftLSR w, stream) = stream(" LSR " ^ Word8.toString w) | printShift(ShiftASR w, stream) = stream(" ASR " ^ Word8.toString w) | printShift(ShiftNone, _) = () fun printFloatSize(Float32, stream) = stream "F" | printFloatSize(Double64, stream) = stream "D" fun printICode (stream, printGenReg:'a -> unit, _: 'b->unit, _: 'c->unit) (MoveRegister{ source, dest }: ('a, 'b, 'c) arm64ICode) = ( stream "\tMove\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (LoadNonAddressConstant{ source, dest }) = ( stream "\tLoadNonAddress\t"; stream(Word64.toString source); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (LoadAddressConstant{ source, dest }) = ( stream "\tLoadAddress\t"; stream(Address.stringOfWord source); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (LoadWithConstantOffset{ base, dest, byteOffset, loadType }) = ( stream "\tLoadConstOffset"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, printFPReg) (LoadFPWithConstantOffset{ base, dest, byteOffset, floatSize }) = ( stream "\tLoadConstOffset"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printFPReg dest ) | printICode (stream, printGenReg, _, _) (LoadWithIndexedOffset{ base, dest, index, loadType }) = ( stream "\tLoadIndexed"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; stream "] => "; printGenReg dest ) | printICode (stream, printGenReg, _, printFPReg) (LoadFPWithIndexedOffset{ base, dest, index, floatSize }) = ( stream "\tLoadIndexed"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; stream "] => "; printFPReg dest ) | printICode (stream, printGenReg, _, _) (GetThreadId { dest}) = ( stream "\tGetThreadId\t"; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (ObjectIndexAddressToAbsolute{ source, dest }) = ( stream "\tObjectAddrToAbs\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (AbsoluteToObjectIndex{ source, dest }) = ( stream "\tAbsToObjectAddr\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (AllocateMemoryFixed{bytesRequired, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream(Word64.fmt StringCvt.DEC bytesRequired); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, _) (AllocateMemoryVariable{size, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream "s="; printGenReg(size); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, _) (InitialiseMem{size, addr, init}) = ( stream "\tInitialiseMem\t"; stream "s="; printGenReg(size); stream ",i="; printGenReg(init); stream ",a="; printGenReg(addr) ) | printICode (stream, _, _, _) BeginLoop = stream "\tBeginLoop" | printICode (stream, printGenReg, _, _) (JumpLoop{regArgs, stackArgs, checkInterrupt, ... }) = ( stream "\tJumpLoop\t"; List.app(fn {src, dst} => (printGenReg(dst); stream "="; printArg(src, stream, printGenReg); stream " ")) regArgs; List.app( fn {src, wordOffset, stackloc} => (printStackLoc(stackloc, stream); stream("(sp" ^ Int.toString wordOffset); stream ")="; printArg(src, stream, printGenReg); stream " ") ) stackArgs; case checkInterrupt of NONE => () | SOME saveRegs => (stream " Check:save="; printSaves(saveRegs, stream, printGenReg)) ) | printICode (stream, printGenReg, _, _) (StoreWithConstantOffset{ base, source, byteOffset, loadType }) = ( stream "\tStoreConstOffset"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode (stream, printGenReg, _, printFPReg) (StoreFPWithConstantOffset{ base, source, byteOffset, floatSize }) = ( stream "\tStoreConstOffset"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode (stream, printGenReg, _, _) (StoreWithIndexedOffset{ base, source, index, loadType }) = ( stream "\tStoreIndexed"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; stream "]" ) | printICode (stream, printGenReg, _, printFPReg) (StoreFPWithIndexedOffset{ base, source, index, floatSize }) = ( stream "\tStoreIndexed"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; stream "]" ) | printICode (stream, printGenReg, printOptGenReg, _) (AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }) = ( stream (if isAdd then "\tAddImmediate" else "\tSubImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, printOptGenReg, _) (AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift }) = ( stream (if isAdd then "\tAddRegister" else "\tSubRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, printOptGenReg, _) (LogicalImmediate{ source, dest, ccRef, immed, logOp, length }) = ( stream (case logOp of LogAnd => "\tAndImmediate" | LogOr => "\tOrImmediate" | LogXor => "\tXorImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word64.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, printOptGenReg, _) (LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift }) = ( stream (case logOp of LogAnd => "\tAndRegister" | LogOr => "\tOrRegister" | LogXor => "\tXorRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, _, _) (ShiftRegister{ direction, dest, source, shift, opSize }) = ( stream ( case direction of ShiftLeft => "\tShiftLeft" | ShiftRightLogical => "\tShiftRightLog" | ShiftRightArithmetic => "\tShiftRightArith"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " by "; printGenReg(shift); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, printOptGenReg, _) (Multiplication{ kind, dest, sourceA, sourceM, sourceN }) = ( stream ( case kind of MultAdd32 => "\tMultAdd32\t" | MultSub32 => "\tMultSub32\t" | MultAdd64 => "\tMultAdd64\t" | MultSub64 => "\tMultSub64\t" | SignedMultAddLong => "\tSignedMultAddLong\t" | SignedMultHigh => "\tSignedMultHigh\t"); printGenReg(sourceM); stream " * "; printGenReg(sourceN); stream " +/- "; printOptGenReg sourceA; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (Division{ isSigned, dest, dividend, divisor, opSize }) = ( stream (if isSigned then "\tSignedDivide" else "\tUnsignedDivide"); stream(arithRepr opSize); stream "\t"; printGenReg(dividend); stream " by "; printGenReg(divisor); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (BeginFunction{ regArgs, stackArgs }) = ( stream "\tBeginFunction\t"; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printGenReg(arg); stream " ")) regArgs; List.app(fn s => printStackLoc(s, stream)) stackArgs ) | printICode (stream, printGenReg, _, _) (FunctionCall{callKind, regArgs, stackArgs, dest, saveRegs, containers}) = ( stream "\tFunctionCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn arg => (stream "p="; printArg(arg, stream, printGenReg); stream " ")) stackArgs; stream "=> "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg); if null containers then () else (stream " containers="; List.app (fn c => (printStackLoc(c, stream); stream " ")) containers) ) | printICode (stream, printGenReg, _, _) (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize, ...}) = ( stream "\tTailCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn {src, stack} => (stream (Int.toString stack); stream "<="; printArg(src, stream, printGenReg); stream " ")) stackArgs; stream "adjust="; stream(Int.toString stackAdjust); stream " stackSize="; stream(Int.toString currStackSize) ) | printICode (stream, printGenReg, _, _) (ReturnResultFromFunction{ resultReg, returnReg, numStackArgs }) = ( stream "\tReturnFromFunction\t"; printGenReg(returnReg); stream " with "; printGenReg(resultReg); stream("," ^ Int.toString numStackArgs) ) | printICode (stream, printGenReg, _, _) (RaiseExceptionPacket{ packetReg }) = ( stream "\tRaiseException\t"; printGenReg(packetReg) ) | printICode (stream, printGenReg, _, _) (PushToStack{ source, copies, container }) = ( stream "\tPushToStack\t"; printGenReg source; if copies > 1 then (stream " * "; stream(Int.toString copies)) else (); stream " => "; printStackLoc(container, stream) ) | printICode (stream, printGenReg, _, _) (LoadStack{ dest, wordOffset, container, field }) = ( stream "\tLoadStack\t"; printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")"; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (StoreToStack{ source, container, field, stackOffset }) = ( stream "\tStoreToStack\t"; printGenReg source; stream " => "; printStackLoc(container, stream); stream "+"; stream (Int.toString field); stream "("; stream(Int.toString stackOffset); stream ")" ) | printICode (stream, printGenReg, _, _) (ContainerAddress{ dest, container, stackOffset }) = ( stream "\tContainerAddress\t"; stream "@"; printStackLoc(container, stream); stream " ("; stream(Int.toString stackOffset); stream ") => "; printGenReg dest ) | printICode (stream, _, _, _) (ResetStackPtr{ numWords }) = ( stream "\tResetStackPtr\t"; stream(Int.toString numWords) ) | printICode (stream, printGenReg, _, _) (TagValue{ source, dest, isSigned, opSize }) = ( stream "\tTag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (UntagValue{ source, dest, isSigned, opSize }) = ( stream "\tUntag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (BoxLarge{source, dest, saveRegs}) = ( stream "\tBoxLarge\t"; printGenReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, _) (UnboxLarge{source, dest}) = ( stream "\tUnboxLarge\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, printFPReg) (BoxTagFloat{floatSize, source, dest, saveRegs}) = ( stream "\tBoxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, printFPReg) (UnboxTagFloat{floatSize, source, dest}) = ( stream "\tUnboxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode (stream, printGenReg, _, _) (LoadAcquire{ base, dest, loadType }) = ( stream "\tLoadAcquire"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (StoreRelease{ base, source, loadType }) = ( stream "\tStoreRelease"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "]" ) | printICode (stream, printGenReg, _, _) (BitFieldShift{ source, dest, isSigned, length, immr, imms }) = ( stream "\tBitShift"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr length); stream "\t"; printGenReg source; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode (stream, printGenReg, _, _) (BitFieldInsert{ source, dest, destAsSource, length, immr, imms }) = ( stream "\tBitInsert"; stream(arithRepr length); stream "\t"; printGenReg source; stream " with "; printGenReg destAsSource; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode (stream, printGenReg, _, _) (IndexedCaseOperation{testReg}) = ( stream "\tIndexedCase\t"; printGenReg testReg ) | printICode (stream, _, _, _) PushExceptionHandler = stream "\tPushExcHandler" | printICode (stream, _, _, _) PopExceptionHandler = stream "\tPopExcHandler" | printICode (stream, printGenReg, _, _) (BeginHandler{packetReg}) = ( stream "\tBeginHandler\t"; printGenReg packetReg ) | printICode (stream, printGenReg, _, _) (CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = ( stream "\tCompareByteVectors\t"; printGenReg(vec1Addr); stream ","; printGenReg(vec2Addr); stream ","; printGenReg(length); stream " => "; printCC(ccRef, stream) ) | printICode (stream, printGenReg, _, _) (BlockMove{srcAddr, destAddr, length, isByteMove}) = ( stream(if isByteMove then "\tBlockByteMove\t" else "\tBlockWordMove\t"); stream "src="; printGenReg(srcAddr); stream ",dest="; printGenReg(destAddr); stream ",len="; printGenReg(length) ) | printICode (stream, printGenReg, printOptGenReg, _) (AddSubXSP{ source, dest, isAdd }) = ( stream(if isAdd then "\tAdd\t" else "\tSubtract\t"); printGenReg source; stream " XSP => "; printOptGenReg dest ) | printICode (stream, printGenReg, _, _) (TouchValue{ source }) = ( stream "\tTouchValue\t"; printGenReg source ) | printICode (stream, printGenReg, _, _) (LoadAcquireExclusive{ base, dest }) = ( stream "\tLoadExclusive\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode (stream, printGenReg, printOptGenReg, _) (StoreReleaseExclusive{ base, source, result }) = ( stream "\tStoreExclusive\t"; printOptGenReg source; stream " => ["; printGenReg base; stream "] result => "; printGenReg result ) | printICode (stream, _, _, _) MemoryBarrier = stream "\tMemoryBarrier" | printICode (stream, printGenReg, _, printFPReg) (ConvertIntToFloat{ source, dest, srcSize, destSize}) = ( stream "\tConvert"; stream(arithRepr srcSize); stream "To"; printFloatSize(destSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode (stream, printGenReg, _, printFPReg) (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}) = let open IEEEReal in stream "\tConvert"; printFloatSize(srcSize, stream); stream "To"; stream(arithRepr destSize); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream( case rounding of TO_NEAREST => " rounding" | TO_NEGINF => " rounding down" | TO_POSINF => " rounding up" | TO_ZERO => " truncating" ) end | printICode (stream, _, _, printFPReg) (UnaryFloatingPt{ source, dest, fpOp}) = ( stream( case fpOp of NegFloat => "\tNegFloat\t" | NegDouble => "\tNegDouble\t" | AbsFloat => "\tAbsFloat\t" | AbsDouble => "\tAbsDouble\t" | ConvFloatToDble => "\tFloatToDble\t" | ConvDbleToFloat => "\t\t" ); printFPReg source; stream " => "; printFPReg dest ) | printICode (stream, _, _, printFPReg) (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}) = ( stream( case fpOp of MultiplyFP => "\tMultiply" | DivideFP => "\tDivide" | AddFP => "\tAdd" | SubtractFP => "\tSubtract" ); printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream " => "; printFPReg dest ) | printICode (stream, _, _, printFPReg) (CompareFloatingPoint{ arg1, arg2, opSize, ccRef}) = ( stream "\tCompare"; printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream ", "; printCC(ccRef, stream) ) + | printICode (stream, printGenReg, _, _) (CacheCheck{ arg1, arg2}) = + ( stream "\tCacheCheck\t"; printGenReg arg1; stream ", "; printGenReg arg2 ) + and printCondition(cond, stream) = stream(condToString cond) (* Print a basic block. *) fun printBlock (stream, printGenReg, printOptGenReg, printFPReg) (blockNo, BasicBlock{block, flow, ...}) = ( (* Put a label on all but the first. *) if blockNo <> 0 then stream("L" ^ Int.toString blockNo ^ ":") else (); List.app (fn icode => (printICode (stream, printGenReg, printOptGenReg, printFPReg) (icode); stream "\n")) block; case flow of Unconditional l => stream("\tJump\tL" ^ Int.toString l ^ "\n") | Conditional {condition, trueJump, falseJump, ccRef, ...} => ( stream "\tJump"; printCondition(condition, stream); stream "\t"; printCC(ccRef, stream); stream " L"; stream (Int.toString trueJump); stream " else L"; stream (Int.toString falseJump); stream "\n" ) | ExitCode => () | IndexedBr _ => () | SetHandler{handler, continue} => stream(concat["\tSetHandler\tH", Int.toString handler, "\n", "\tJump\tL", Int.toString continue, "\n"]) | UnconditionalHandle handler => stream("\tJump\tH" ^ Int.toString handler ^ "\n") | ConditionalHandle{handler, continue} => stream(concat["\tJump\tL", Int.toString continue, " or H", Int.toString handler, "\n"]) ) in fun printPReg stream (PReg i) = stream("R" ^ Int.toString i) fun printOptPReg stream ZeroReg = stream "Zero" | printOptPReg stream (SomeReg reg) = printPReg stream reg fun printXReg stream (XReg w) = stream("X" ^ Int.toString(Word8.toInt w)) | printXReg stream XZero = stream "XZ" | printXReg stream XSP = stream "XSP" fun printVReg stream (VReg w) = stream("V" ^ Int.toString(Word8.toInt w)) fun printICodeAbstract(blockVec, stream) = Vector.appi(printBlock(stream, printPReg stream, printOptPReg stream, printPReg stream)) blockVec and printICodeConcrete(blockVec, stream) = Vector.appi(printBlock(stream, printXReg stream, printXReg stream, printVReg stream)) blockVec end (* Only certain bit patterns are allowed in a logical immediate instruction but the encoding is complex so it's easiest to inherit the test from the assembler layer. *) local fun optow OpSize32 = WordSize32 | optow OpSize64 = WordSize64 in fun isEncodableBitPattern(v, w) = Arm64Code.isEncodableBitPattern(v, optow w) end (* This generates a BitField instruction with the appropriate values for immr and imms. *) fun shiftConstant{ direction, dest, source, shift, opSize } = let val (isSigned, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (false, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (false, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (false, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (false, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (true, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (true, shift, 0wx1f) in BitFieldShift{ source=source, dest=dest, isSigned=isSigned, length=opSize, immr=immr, imms=imms } end structure Sharing = struct type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML index 0ccc3b38..f0f511a6 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML @@ -1,875 +1,877 @@ (* - Copyright (c) 2021 David C.J. Matthews + Copyright (c) 2021-2 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64IdentifyReferences( structure Arm64ICode: ARM64ICODE structure Debug: DEBUG structure IntSet: INTSET ): ARM64IDENTIFYREFERENCES = struct open Arm64ICode open IntSet type regState = { active: int, refs: int, pushState: bool, prop: regProperty } (* CC states before and after. Currently no instruction uses the condition; conditional branches are handled at the block level. The result of executing the instruction may be to set the condition code to a defined state, an undefined state or leave it unchanged. *) datatype outCCState = CCSet of ccRef | CCIndeterminate | CCUnchanged and inCCState = CCNeeded of ccRef | CCUnused datatype extendedBasicBlock = ExtendedBasicBlock of { block: {instr: iCodeAbstract, current: intSet, active: intSet, kill: intSet } list, flow: controlFlow, locals: intSet, (* Defined and used entirely within the block. *) imports: intSet, (* Defined outside the block, used inside it, but not needed afterwards. *) exports: intSet, (* Defined within the block, possibly used inside, but used outside. *) passThrough: intSet, (* Active throughout the block. May be referred to by it but needed afterwards. *) loopRegs: intSet, (* Destination registers for a loop. They will be updated by this block. *) initialStacks: intSet, (* Stack items required at the start i.e. imports+passThrough for stack items. *) inCCState: ccRef option, (* The state this block assumes. If SOME _ all predecessors must set it. *) outCCState: ccRef option (* The condition code set by this block. SOME _ if at least one successor needs it. *) } exception InternalError = Misc.InternalError (* Return the list of blocks that are the immediate successor of this. *) fun blockSuccessors(BasicBlock{flow, ...}) = successorBlocks flow fun getOptReg(SomeReg reg) = [reg] | getOptReg ZeroReg = [] fun getInstructionState(MoveRegister { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadNonAddressConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAddressConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadWithConstantOffset { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPWithConstantOffset { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadWithIndexedOffset { base, dest, index, ...}) = { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPWithIndexedOffset { base, dest, index, ...}) = { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(GetThreadId { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ObjectIndexAddressToAbsolute { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AbsoluteToObjectIndex { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AllocateMemoryFixed { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(AllocateMemoryVariable{size, dest, ...}) = { sources=[size], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(InitialiseMem{size, addr, init}) = { sources=[size, addr, init], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginLoop) = (* This is just a marker. It doesn't actually generate any code. *) { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(JumpLoop{regArgs, stackArgs, ...}) = let fun getSourceFromRegs({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (regSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=regSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(StoreWithConstantOffset { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreFPWithConstantOffset { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreWithIndexedOffset { base, source, index, ...}) = { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreFPWithIndexedOffset { base, source, index, ...}) = { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AddSubImmediate{ source, dest, ccRef, ... }) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(AddSubRegister{ base, shifted, dest, ccRef, ... }) = { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(LogicalImmediate{ source, dest, ccRef, ... }) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(LogicalRegister{ base, shifted, dest, ccRef, ... }) = { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(ShiftRegister{ source, shift, dest, ... }) = { sources=[source, shift], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(Multiplication{ dest, sourceA, sourceM, sourceN, ... }) = { sources=getOptReg sourceA @ [sourceM, sourceN], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(Division{ dest, dividend, divisor, ... }) = { sources=[dividend, divisor], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginFunction {regArgs, stackArgs, ...}) = { sources=[], dests=map #1 regArgs, sStacks=[], dStacks=stackArgs, ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(FunctionCall{regArgs, stackArgs, dest, containers, ...}) = let (* Non-tail-recursive. Behaves as a normal reference to sources. *) fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack(ArgInReg reg, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack(ArgOnStack { container, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (argSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=argSources, dests=[dest], sStacks=stackSources @ containers, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(TailRecursiveCall{regArgs, stackArgs, ...}) = let (* Tail recursive call. References the argument sources but exits. *) fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (argSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=argSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(ReturnResultFromFunction{resultReg, returnReg, ...}) = { sources=[resultReg, returnReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(RaiseExceptionPacket{packetReg}) = { sources=[packetReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(PushToStack{ source, container, ... }) = { sources=[source], dests=[], sStacks=[], dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadStack{ dest, container, ... }) = { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreToStack{ source, container, ... }) = (* Although this stores into the container it must already exist. *) { sources=[source], dests=[], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ContainerAddress{ dest, container, ... }) = { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ResetStackPtr _) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(TagValue{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(UntagValue{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BoxLarge{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(UnboxLarge{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BoxTagFloat{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(UnboxTagFloat{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAcquire { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreRelease { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BitFieldShift{ source, dest, ... }) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BitFieldInsert{ source, destAsSource, dest, ... }) = { sources=[source, destAsSource], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(IndexedCaseOperation{ testReg }) = { sources=[testReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(PushExceptionHandler) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(PopExceptionHandler) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginHandler{packetReg}) = (* The packet register is a destination since this provides its definition. *) { sources=[], dests=[packetReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = { sources=[vec1Addr, vec2Addr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } | getInstructionState(BlockMove{srcAddr, destAddr, length, ...}) = { sources=[srcAddr, destAddr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(AddSubXSP{source, dest, ...}) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(TouchValue{source}) = { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAcquireExclusive{base, dest}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreReleaseExclusive{base, source, result}) = { sources=[base] @ getOptReg source, dests=[result], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(MemoryBarrier) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ConvertIntToFloat{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ConvertFloatToInt{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(UnaryFloatingPt{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BinaryFloatingPoint{ arg1, arg2, dest, ...}) = { sources=[arg1, arg2], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(CompareFloatingPoint{ arg1, arg2, ccRef, ...}) = { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } + | getInstructionState(CacheCheck{ arg1, arg2}) = + { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } (* These instructions can be eliminated if their register sources are not used. There may be other cases. *) fun eliminateable(MoveRegister _) = true | eliminateable(LoadNonAddressConstant _) = true | eliminateable(LoadAddressConstant _) = true | eliminateable(LoadWithConstantOffset _) = true | eliminateable(LoadWithIndexedOffset _) = true | eliminateable(ObjectIndexAddressToAbsolute _) = true | eliminateable(TagValue _) = true | eliminateable(UntagValue _) = true | eliminateable(BoxLarge _) = true | eliminateable(UnboxLarge _) = true | eliminateable _ = false fun identifyRegs(blockVector, pregProps): extendedBasicBlock vector * regState vector = let val maxPRegs = Vector.length pregProps val vectorLength = Vector.length blockVector (* Initial arrays - declarationArray is the set of registers given values by the block, importArray is the set of registers referenced by the block and not declared locally. *) val declarationArray = Array.array(vectorLength, emptySet) and importArray = Array.array(vectorLength, emptySet) val stackDecArray = Array.array(vectorLength, emptySet) and stackImportArray = Array.array(vectorLength, emptySet) and localLoopRegArray = Array.array(vectorLength, emptySet) (* References - this is used locally to see if a register is ever actually used and also included in the result which uses it as part of the choice of which register to spill. *) val regRefs = Array.array(maxPRegs, 0) (* Registers that must be pushed because they are required after a function call. For cache registers this means "discard". *) and requirePushOrDiscard = Array.array(maxPRegs, false) fun incrRef r = Array.update(regRefs, r, Array.sub(regRefs, r)+1) (* Contains the, possibly filtered, code for each block. *) val resultCode = Array.array(vectorLength, NONE) val ccInStates = Array.array(vectorLength, CCUnused) and ccOutStates = Array.array(vectorLength, CCIndeterminate) (* First pass - for each block build up the sets of registers defined and used in the block. We do this depth-first so that we can use "refs" to see if a register is used. If this is an instruction that can be eliminated we don't need to generate it and can ignore any references it makes. *) local fun blockScan blockNo = if isSome(Array.sub(resultCode, blockNo)) then () else let val () = Array.update(resultCode, blockNo, SOME []) (* Prevent looping. *) val thisBlock as BasicBlock { block, flow, ...} = Vector.sub(blockVector, blockNo) val successors = blockSuccessors thisBlock (* Visit everything reachable first. *) val () = List.app blockScan successors fun scanCode(instr, original as { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... }) = let val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ccIn, ccOut, ... } = getInstructionState instr fun regNo(PReg i) = i and stackNo(StackLoc{rno, ...}) = rno val destRegNos = map regNo dests and sourceRegNos = map regNo sources val stackDestRegNos = map stackNo stackDests and stackSourceRegNos = map stackNo stackSrcs (* If this instruction requires a cc i.e. is SetToCondition or X87FPGetCondition we need to set this as a requirement earlier. If this sets the CC and it is the condition we've been expecting we've satisfied it and can set the previous condition to Unused. We could use this to decide if a comparison is no longer required. That can only happen in very specific circumstances e.g. some tests in Test176.ML so it's not worthwhile. *) val newInCC = case (ccIn, ccOut, occIn) of (cc as CCNeeded _, _, _) => cc (* This instr needs a particular cc. *) | (CCUnused, CCSet _, _) => CCUnused | (CCUnused, _, occIn) => occIn (* If this instruction modifies the CC check to see if it is setting an requirement. *) val _ = case (occIn, ccOut) of (CCNeeded ccRIn, CCSet ccRout) => if ccRIn = ccRout then () else raise InternalError "CCCheck failed" | (CCNeeded _, CCIndeterminate) => raise InternalError "CCCheck failed" | _ => () (* The output CC is the last CC set. Tail instructions that don't change the CC state are ignored until we reach an instruction that sets it. *) val newOutCC = case occOut of CCUnchanged => ccOut | _ => occOut val instrLoopRegs = case instr of JumpLoop{regArgs, ...} => listToSet (map (regNo o #dst) regArgs) | _ => emptySet in if eliminateable instr andalso List.all(fn dReg => Array.sub(regRefs, dReg) = 0) destRegNos then original (* Don't include this instruction. *) else let (* Only mark the sources as referred after we know we're going to need this. In that way we may eliminate the instruction that created this source. *) val () = List.app incrRef sourceRegNos in { code = instr :: code, decs = union(listToSet destRegNos, decs), refs = union(listToSet sourceRegNos, refs), sDecs = union(listToSet stackDestRegNos, sDecs), sRefs = union(listToSet stackSourceRegNos, sRefs), occIn = newInCC, occOut = newOutCC, loopRegs = union(loopRegs, instrLoopRegs)} end end (* If we have a conditional branch at the end we need the condition code. It should either be set here or in a preceding block. *) val inCC = case flow of Conditional { ccRef, ...} => CCNeeded ccRef | _ => CCUnused val { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... } = List.foldr scanCode {code=[], decs=emptySet, refs=emptySet, sDecs=emptySet, sRefs=emptySet, occIn=inCC, occOut=CCUnchanged, loopRegs=emptySet} block in Array.update(declarationArray, blockNo, decs); (* refs includes local declarations. Remove before adding to the result. *) Array.update(importArray, blockNo, minus(refs, decs)); Array.update(localLoopRegArray, blockNo, loopRegs); Array.update(stackDecArray, blockNo, sDecs); Array.update(stackImportArray, blockNo, minus(sRefs, sDecs)); Array.update(resultCode, blockNo, SOME code); Array.update(ccInStates, blockNo, occIn); Array.update(ccOutStates, blockNo, occOut) end in val () = blockScan 0 (* Start with the root block. *) end (* Second phase - Propagate reference information between the blocks. We need to consider loops here. Do a depth-first scan marking each block. If we find a loop we save the import information we've used. If when we come to process that block we find the import information is different we need to reprocess. *) (* Pass through array - values used in other blocks after this that are not declared in this block. *) val passThroughArray = Array.array(vectorLength, emptySet) val stackPassThroughArray = Array.array(vectorLength, emptySet) (* Exports - those of our declarations that are used in other blocks. *) val exportArray = Array.array(vectorLength, emptySet) val stackExportArray = Array.array(vectorLength, emptySet) (* Loop registers. This contains the registers that are not exported from or passed through this block but are used subsequently as loop registers. *) val loopRegArray = Array.array(vectorLength, emptySet) val () = Array.copy{src=localLoopRegArray, dst=loopRegArray, di=0} (* If any one of the successors requires the CC then this is set. Otherwise we leave it as Unused. *) val ccRequiredOut = Array.array(vectorLength, CCUnused) local datatype loopData = Unprocessed | Processing | Processed | Looped of { regSet: intSet, loopSet: intSet, stackSet: intSet, ccState: inCCState } fun reprocessLoop () = let val reprocess = ref false val loopArray = Array.array(vectorLength, Unprocessed) fun processBlocks blockNo = case Array.sub(loopArray, blockNo) of Processed => (* Already seen this by a different route. *) { regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), ccState = Array.sub(ccInStates, blockNo), loopSet = Array.sub(loopRegArray, blockNo) } | Looped s => s (* We've already seen this in a loop. *) | Processing => (* We have a loop. *) let (* Use the existing input array. *) val inputs = { regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), ccState = Array.sub(ccInStates, blockNo), loopSet = Array.sub(loopRegArray, blockNo) } val () = Array.update(loopArray, blockNo, Looped inputs) in inputs end | Unprocessed => (* Normal case - not visited yet. *) let val () = Array.update(loopArray, blockNo, Processing) val thisBlock = Vector.sub(blockVector, blockNo) val ourDeclarations = Array.sub(declarationArray, blockNo) and ourStackDeclarations = Array.sub(stackDecArray, blockNo) and ourLocalLoopRegs = Array.sub(localLoopRegArray, blockNo) val successors = blockSuccessors thisBlock fun addSuccessor b = let val {regSet=theirImports, stackSet=theirStackImports, ccState=theirInState, loopSet=theirLoops} = processBlocks b (* Remove loop regs from the imports if they are actually given new values by this block. We don't want to pass the old loop regs through here. *) val theirImports = minus(theirImports, ourLocalLoopRegs) (* Split the imports. If a register is a local declaration then it becomes an export. If it is not it becomes part of our passThrough. *) val (addToExp, addToImp) = IntSet.partition (fn i => member(i, ourDeclarations)) theirImports val (addToStackExp, addToStackImp) = IntSet.partition (fn i => member(i, ourStackDeclarations)) theirStackImports (* Merge the input states from each of the successors. *) val () = case (theirInState, Array.sub(ccRequiredOut, blockNo)) of (CCNeeded ts, CCNeeded req) => if ts = req then () else raise InternalError "Mismatched states" | (ts as CCNeeded _, _) => Array.update(ccRequiredOut, blockNo, ts) | _ => () (* Add loop registers to the set if they are not declared here. The only place they are declared is at the entry to the loop so that stops them being propagated further. *) val addToLoops = minus(theirLoops, ourDeclarations) in Array.update(exportArray, blockNo, union(Array.sub(exportArray, blockNo), addToExp)); Array.update(passThroughArray, blockNo, union(Array.sub(passThroughArray, blockNo), addToImp)); Array.update(stackExportArray, blockNo, union(Array.sub(stackExportArray, blockNo), addToStackExp)); Array.update(stackPassThroughArray, blockNo, union(Array.sub(stackPassThroughArray, blockNo), addToStackImp)); Array.update(loopRegArray, blockNo, union(Array.sub(loopRegArray, blockNo), addToLoops)) end val () = List.app addSuccessor successors val ourInputs = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)) val ourStackInputs = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)) in (* Check that we supply the required state. *) case (Array.sub(ccRequiredOut, blockNo), Array.sub(ccOutStates, blockNo)) of (CCNeeded ccReq, CCSet ccSet) => if ccReq = ccSet then () else raise InternalError "Mismatched cc states" | (CCNeeded _, CCIndeterminate) => raise InternalError "Mismatched cc states" | (cc as CCNeeded needOut, CCUnchanged) => ( (* We pass through the state. If we don't use the state then we need to set this as the input. If we do use the state it must be the same. *) case Array.sub(ccInStates, blockNo) of CCUnused => Array.update(ccInStates, blockNo, cc) | CCNeeded needIn => if needOut = needIn then () else raise InternalError "Mismatched cc states" ) | _ => (); (* Was this block used in a loop? If so we should not be requiring a CC. *) case Array.sub(loopArray, blockNo) of Looped {regSet, stackSet, ...} => ( case Array.sub(ccInStates, blockNo) of CCNeeded _ => raise InternalError "Looped state needs cc" | _ => (); if setToList regSet = setToList ourInputs andalso setToList stackSet = setToList ourStackInputs then () else reprocess := true ) | _ => (); Array.update(loopArray, blockNo, Processed); { regSet = ourInputs, stackSet = ourStackInputs, ccState = Array.sub(ccInStates, blockNo), loopSet=Array.sub(loopRegArray, blockNo)} end in reprocess := false; processBlocks 0; if !reprocess then reprocessLoop () else () end in val () = reprocessLoop () end (* Third pass - Build the result list with the active registers for each instruction. We don't include registers in the passThrough set since they are active throughout the block. *) local (* Number of instrs for which this is active. We use this to try to select a register to push to the stack if we have too many. Registers that have only a short lifetime are less likely to be pushed than those that are active longer. *) val regActive = Array.array(maxPRegs, 0) fun addActivity n r = Array.update(regActive, r, Array.sub(regActive, r)+n) fun createResultInstrs (passThrough, stackPassThrough) (instr, (tail, activeAfterThis, stackActiveAfterThis)) = let val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ... } = getInstructionState instr in (* Eliminate instructions if their results are not required. The earlier check for this will remove most cases but if we have duplicated a block we may have a register that is required elsewhere but not in this particular branch. *) if not(List.exists(fn PReg d => member(d, activeAfterThis)) dests) andalso eliminateable instr then (tail, activeAfterThis, stackActiveAfterThis) else let fun regNo(PReg i) = i fun stackNo(StackLoc{rno, ...}) = rno val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos (* Remove any sources that are present in passThrough since they are going to be active throughout the block. *) and sourceSet = minus(listToSet sourceRegNos, passThrough) val stackDestRegNos = map stackNo stackDests and stackSourceRegNos = map stackNo stackSrcs val stackDestSet = listToSet stackDestRegNos and stackSourceSet = minus(listToSet stackSourceRegNos, stackPassThrough) (* To compute the active set for the PREVIOUS instruction (we're processing from the end back to the start) we remove any registers that have been given values in this instruction and add anything that we are using in this instruction since they will now need to have values. *) val afterRemoveDests = minus(activeAfterThis, destSet) val stackAfterRemoveDests = minus(stackActiveAfterThis, stackDestSet) val activeForPrevious = union(sourceSet, afterRemoveDests) val stackActiveForPrevious = union(stackSourceSet, stackAfterRemoveDests) (* The "active" set is the set of registers that need to be active DURING the instruction. It includes destinations, which will usually be in "activeAfterThis", because there may be destinations that are not actually used subsequently but still need a register. *) val activeForInstr = case instr of FunctionCall _ => sourceSet (* Is this still needed? *) | TailRecursiveCall _ => (* Set the active set to the total set of registers we require including the work register. This ensures that we will spill as many registers as we require when we look at the size of the active set. *) union(sourceSet, destSet) | BoxLarge _ => (* We can only store the value in the box after the box is allocated. *) union(activeAfterThis, union(sourceSet, destSet)) | BoxTagFloat _ => (* Since the source must be a V register and the destination an X register there isn't actually a problem here, but do this anyway. *) union(activeAfterThis, union(sourceSet, destSet)) | _ => union(activeAfterThis, destSet) val () = List.app(addActivity 1) (setToList activeForInstr) local (* If we are allocating memory we have to save the current registers if they could contain an address. We mustn't push untagged registers and we mustn't push the destination. *) fun getSaveSet dReg = let val activeAfter = union(activeAfterThis, passThrough) (* Remove any registers marked - must-not-push. These are registers holding non-address values. They will actually be saved by the RTS across any GC but not checked or modified by the GC. Exclude the result register. *) fun getSave i = if i = dReg then NONE else case Vector.sub(pregProps, i) of RegPropGeneral => SOME(PReg i) | RegPropCacheTagged => SOME(PReg i) | RegPropUntagged => NONE | RegPropStack _ => NONE | RegPropCacheUntagged => NONE | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" in List.mapPartial getSave (setToList activeAfter) end in (* Sometimes we need to modify the instruction e.g. to include the set of registers to save. *) val convertedInstr = case instr of AllocateMemoryFixed{bytesRequired, dest, saveRegs=_} => AllocateMemoryFixed{bytesRequired=bytesRequired, dest=dest, saveRegs=getSaveSet(regNo dest)} | AllocateMemoryVariable{size, dest, saveRegs=_} => AllocateMemoryVariable{size=size, dest=dest, saveRegs=getSaveSet(regNo dest)} | BoxLarge{source, dest, saveRegs=_} => BoxLarge{source=source, dest=dest, saveRegs=getSaveSet(regNo dest)} | BoxTagFloat{source, dest, floatSize, saveRegs=_} => BoxTagFloat{source=source, dest=dest, floatSize=floatSize, saveRegs=getSaveSet(regNo dest)} | JumpLoop{regArgs, stackArgs, checkInterrupt = SOME _, ...} => let (* If we have to check for interrupts we must preserve registers across the RTS call. *) fun getSave i = case Vector.sub(pregProps, i) of RegPropGeneral => SOME(PReg i) | RegPropCacheTagged => SOME(PReg i) | RegPropUntagged => NONE | RegPropStack _ => NONE | RegPropCacheUntagged => NONE | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" val currentRegs = union(activeAfterThis, passThrough) (* Have to include the loop registers. These were previously included automatically because they were part of the import set. *) val check = List.mapPartial getSave (map (regNo o #dst) regArgs @ setToList currentRegs) in JumpLoop{regArgs=regArgs, stackArgs=stackArgs, checkInterrupt=SOME check} end | FunctionCall{regArgs, stackArgs=[], dest, callKind as ConstantCode m, saveRegs=_, containers} => (* If this is arbitrary precision push the registers rather than marking them as "save". stringOfWord returns 'CODE "PolyAddArbitrary"' etc. *) if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) then FunctionCall{regArgs=regArgs, stackArgs=[], callKind=callKind, dest=dest, containers=containers, saveRegs=getSaveSet(regNo dest) } else instr | _ => instr end (* FunctionCall must mark all registers as "push". *) local fun pushRegisters () = let val activeAfter = union(activeAfterThis, passThrough) fun pushAllButDests i = if List.exists(fn j => i=j) destRegNos then () else case Vector.sub(pregProps, i) of RegPropCacheTagged => raise InternalError "pushRegisters: cache reg" | RegPropCacheUntagged => raise InternalError "pushRegisters: cache reg" | _ => Array.update(requirePushOrDiscard, i, true) in (* We need to push everything active after this except the result register. *) List.app pushAllButDests (setToList activeAfter) end in val () = case instr of FunctionCall{ stackArgs=[], callKind=ConstantCode m, ...} => if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) then () else pushRegisters () | FunctionCall _ => pushRegisters () (* It should no longer be necessary to push across a handler but there still seem to be cases that need it. *) (*| BeginHandler _ => pushRegisters ()*) | _ => () end (* Which entries are active in this instruction but not afterwards? *) val kill = union(minus(stackSourceSet, stackActiveAfterThis), minus(sourceSet, activeAfterThis)) in ({instr=convertedInstr, active=activeForInstr, current=activeAfterThis, kill=kill} :: tail, activeForPrevious, stackActiveForPrevious) end end fun createResult blockNo = let val BasicBlock{ flow, ...} = Vector.sub(blockVector, blockNo) val declSet = Array.sub(declarationArray, blockNo) and importSet = Array.sub(importArray, blockNo) and passSet = Array.sub(passThroughArray, blockNo) and loopSet = Array.sub(loopRegArray, blockNo) and exportSet = Array.sub(exportArray, blockNo) and stackPassSet = Array.sub(stackPassThroughArray, blockNo) and stackImportSet = Array.sub(stackImportArray, blockNo) and stackExportSet = Array.sub(stackExportArray, blockNo) val filteredCode = getOpt(Array.sub(resultCode, blockNo), []) (* At the end of the block we should have the exports active. *) val (resultInstrs, _, _) = List.foldr (createResultInstrs (passSet, stackPassSet)) ([], exportSet, stackExportSet) filteredCode (* Set the active count for the pass through. *) val instrCount = List.length filteredCode val () = List.app(addActivity instrCount) (setToList passSet) val inCCState = case Array.sub(ccInStates, blockNo) of CCNeeded s => SOME s | CCUnused => NONE val outCCState = case Array.sub(ccRequiredOut, blockNo) of CCNeeded s => SOME s | CCUnused => NONE in ExtendedBasicBlock { block = resultInstrs, flow=flow, locals = minus(declSet, exportSet), imports = importSet, exports = exportSet, passThrough = passSet, loopRegs = loopSet, initialStacks = union(stackPassSet, stackImportSet), inCCState = inCCState, outCCState = outCCState } end in val resultBlocks = Vector.tabulate(vectorLength, createResult) val regActive = regActive end val registerState: regState vector = Vector.tabulate(maxPRegs, fn i => { active = Array.sub(regActive, i), refs = Array.sub(regRefs, i), pushState = Array.sub(requirePushOrDiscard, i), prop = Vector.sub(pregProps, i) } ) in (resultBlocks, registerState) end (* Exported function. First filter out unreferenced blocks then process the registers themselves. *) fun identifyRegisters(blockVector, pregProps) = let val vectorLength = Vector.length blockVector val mapArray = Array.array(vectorLength, NONE) and resArray = Array.array(vectorLength, NONE) val count = ref 0 fun setReferences label = case Array.sub(mapArray, label) of NONE => (* Not yet visited *) let val BasicBlock{flow, block} = Vector.sub(blockVector, label) (* Create a new entry for it. *) val newLabel = ! count before count := !count + 1 (* Add it to the map. Any other references will use this without reprocessing. *) val () = Array.update(mapArray, label, SOME newLabel) val newFlow = case flow of Unconditional l => Unconditional(setReferences l) | Conditional{trueJump, falseJump, ccRef, condition} => Conditional{trueJump=setReferences trueJump, falseJump=setReferences falseJump, ccRef=ccRef, condition=condition} | ExitCode => ExitCode | IndexedBr list => IndexedBr(map setReferences list) | SetHandler{handler, continue} => SetHandler{handler=setReferences handler, continue=setReferences continue} | UnconditionalHandle l => UnconditionalHandle(setReferences l) | ConditionalHandle{handler, continue} => ConditionalHandle{handler=setReferences handler, continue=setReferences continue} val () = Array.update(resArray, newLabel, SOME(BasicBlock{flow=newFlow, block=block})) in newLabel end | SOME lab => lab val _ = setReferences 0 val newBlockVector = Vector.tabulate(!count, fn i => valOf(Array.sub(resArray, i))) in identifyRegs(newBlockVector, pregProps) end (* Exported for use in GetConflictSets *) fun getInstructionRegisters instr = let val {sources, dests, ...} = getInstructionState instr in {sources=sources, dests=dests} end (* Exported for use in ICodeOptimise *) val getInstructionCC = #ccOut o getInstructionState structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and intSet = intSet and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and extendedBasicBlock = extendedBasicBlock and controlFlow = controlFlow and regProperty = regProperty and ccRef = ccRef and outCCState = outCCState end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML index 16a7da9c..c8d9620e 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML @@ -1,1139 +1,1141 @@ (* - Copyright David C. J. Matthews 2021 + Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64PushRegisters( structure Arm64ICode: ARM64ICODE structure IntSet: INTSET structure Identify: ARM64IDENTIFYREFERENCES sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ) : ARM64PUSHREGISTERS = struct open Arm64ICode open IntSet open Identify type basicBlockAbstract = (preg, pregOrZero, preg) basicBlock (* Curried subscript functions *) fun asub a i = Array.sub(a, i) and vsub v i = Vector.sub(v, i) exception InternalError = Misc.InternalError (* Each preg in the input is mapped to either a new preg or the stack. *) datatype pregMapType = Unset | ToPReg of preg | ToStack of int * stackLocn (* The stack contains both entries in the input code and entries added here. It is really used to ensure that the stack at run time is the same size at the start of a block whichever block has jumped to it. *) datatype stackEntry = NewEntry of {pregNo: int} (* pregNo is the original preg that has been pushed here. *) | OriginalEntry of { stackLoc: stackLocn } | HandlerEntry fun addRegisterPushes{code: extendedBasicBlock vector, pushVec: bool vector, pregProps, firstPass=_} = let val maxPRegs = Vector.length pregProps val numberOfBlocks = Vector.length code (* Output registers and properties. *) val pregCounter = ref 0 val pregPropList = ref [] val pregMap = Array.array(maxPRegs, Unset) val maxStack = ref 0 (* The stack size we've assumed for the block. Also indicates if a block has already been processed. *) val inputStackSizes = Array.array(numberOfBlocks, NONE) (* The result of processing a block. *) val blockOutput = Array.array(numberOfBlocks, {code=[], stackCount=0}) (* Extra blocks to adjust the stack are added here. *) val extraBlocks: basicBlockAbstract list ref = ref [] val blockCounter = ref numberOfBlocks (* Get the blocks that are inputs for each one. *) local val blockRefs = Array.array(numberOfBlocks, []) fun setReferences fromBlock = let val ExtendedBasicBlock{ flow, ...} = vsub code fromBlock val refs = successorBlocks flow fun setRefs toBlock = let val oldRefs = asub blockRefs toBlock in Array.update(blockRefs, toBlock, fromBlock :: oldRefs); if null oldRefs then setReferences toBlock else () end in List.app setRefs refs end val () = setReferences 0 in val blockRefs = blockRefs end (* Recursive scan of the blocks. For each block we produce an input and output state. The input state is the output state of the predecessor i.e. some block that jumps to this, but with any entries removed that are not used in this block. It is then necessary to match the input state, if necessary by adding extra blocks that just do the matching. *) local val haveProcessed = isSome o asub inputStackSizes fun processBlocks toDo = case List.filter (fn (n, _) => not(haveProcessed n)) toDo of [] => () (* Nothing left to do *) | stillToDo as head :: _ => let (* Try to find a block all of whose predecessors have been processed. That increases the chances that we will have cached items. *) fun available(dest, _) = List.all haveProcessed (Array.sub(blockRefs, dest)) val (blockNo, lastOutputState) = case List.find available stillToDo of SOME c => c | NONE => head (* This is the first time we've come to this block. *) val ExtendedBasicBlock{ block, flow, imports, passThrough, loopRegs, initialStacks, ...} = vsub code blockNo (* Remove any items from the input state that are no longer needed for this block. They could be local to the previous block or needed by a different successor. Although the values in loopRegs are not required the stack space is so that they can be updated. *) fun removeItems(result as {stack=[], stackCount=0}) = result | removeItems{stack=[], ...} = raise InternalError "removeItems - stack size" | removeItems (thisStack as {stack=NewEntry{pregNo} :: rest, stackCount}) = if member(pregNo, imports) orelse member(pregNo, passThrough) orelse member(pregNo, loopRegs) then thisStack else removeItems{stack=rest, stackCount=stackCount-1} | removeItems (thisStack as {stack=OriginalEntry{stackLoc=StackLoc{rno, size}, ...} :: rest, stackCount}) = if member(rno, initialStacks) then thisStack else removeItems{stack=rest, stackCount=stackCount-size} | removeItems result = result val {stackCount=newSp, stack=newStack} = removeItems lastOutputState (* References to hold the current stack count (number of words on the stack) and the list of items on the stack. The list is not used directly to map stack addresses. Instead it is used to match the stack at the beginning and end of a block. *) val stackCount = ref newSp val stack = ref newStack (* Items from the stack that have been marked as deleted but not yet removed. We only remove items from the top of the stack to avoid quadratic behaviour with a very deep stack. *) val deletedItems = ref [] (* Save the stack size in case we come by a different route. *) val () = Array.update(inputStackSizes, blockNo, SOME newSp) fun pushItemToStack item = let val size = case item of NewEntry _ => 1 | OriginalEntry{stackLoc=StackLoc{size, ...}, ...} => size | HandlerEntry => 2 in stackCount := ! stackCount+size; stack := item :: ! stack; maxStack := Int.max(!maxStack, !stackCount) end fun newPReg propKind = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := propKind :: !pregPropList in PReg regNo end and newStackLoc size = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropStack size :: !pregPropList in StackLoc{size=size, rno=regNo} end (* Map a source register. This always loads the argument. *) fun mapSrcReg(PReg n) = case Array.sub(pregMap, n) of Unset => raise InternalError "mapSrcReg - unset" | ToPReg preg => (preg, []) | ToStack(stackLoc, container as StackLoc{size, ...}) => let (* Make a new untagged register. That will prevent us pushing it if we have to spill registers. *) val newReg = newPReg RegPropUntagged in (newReg, [LoadStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0, dest=newReg}]) end fun mapDestReg(PReg n) = let val currentLocation = Array.sub(pregMap, n) val kind = Vector.sub(pregProps, n) in if Vector.sub(pushVec, n) then let (* This should not have been seen before. *) val _ = case currentLocation of Unset => () | _ => raise InternalError "mapDestReg - already set" val newReg = newPReg kind val newContainer = newStackLoc 1 val () = Array.update(pregMap, n, ToStack (!stackCount, newContainer)) val () = pushItemToStack(NewEntry{pregNo=n}) in (newReg, [PushToStack{source= newReg, container=newContainer, copies=1}]) end else let (* See if we already have a number for it. We may encounter the same preg as a destination when returning the result from a conditional in which case we have to use the same number. We shouldn't have pushed it. *) val newReg = case (currentLocation, kind) of (Unset, _) => let val newReg = newPReg kind val () = Array.update(pregMap, n, ToPReg newReg) in newReg end | (ToPReg preg, RegPropMultiple) => preg | _ => raise InternalError "mapDestReg - multiply defined non-merge reg" in (newReg, []) end end (* Optional destination for arithmetic and logical ops. *) fun mapOptDest ZeroReg = (ZeroReg, []) | mapOptDest (SomeReg destReg) = let val (destVal, destCode) = mapDestReg destReg in (SomeReg destVal, destCode) end fun mapOptSrc ZeroReg = (ZeroReg, []) | mapOptSrc (SomeReg srcReg) = let val (srcVal, srcCode) = mapSrcReg srcReg in (SomeReg srcVal, srcCode) end (* Adjust a stack offset from the old state to the new state. *) fun mapContainerAndStack(StackLoc{rno, size}, field) = let val (newStackAddr, newContainer) = case Array.sub(pregMap, rno) of Unset => raise InternalError "mapContainer - unset" | ToPReg _ => raise InternalError "mapContainer - ToPReg" | ToStack stackContainer => stackContainer val newOffset = !stackCount-(newStackAddr+size) + field in (newOffset, newContainer) end (* Add an entry for an existing stack entry. *) fun mapDestContainer(StackLoc{rno, size}, locn) = ( case Array.sub(pregMap, rno) of Unset => let val newContainer = newStackLoc size val () = Array.update(pregMap, rno, ToStack(locn, newContainer)) in newContainer end | _ => raise InternalError "mapDestContainer: already set" ) (* Map a function argument which could be a register or a stack entry. A register entry could have been pushed. *) fun mapArgument(ArgInReg (PReg r)) = ( case Array.sub(pregMap, r) of Unset => raise InternalError "mapSource - unset" | ToPReg preg => ArgInReg preg | ToStack(stackLoc, container as StackLoc{size, ...}) => ArgOnStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0} ) | mapArgument(ArgOnStack{container, field, ...}) = let val (newOffset, newContainer) = mapContainerAndStack(container, field) in ArgOnStack{container=newContainer, wordOffset=newOffset, field=field} end (* Rewrite the code, replacing any registers that need to be pushed with references to the stack. The result is built up in reverse order and then reversed. *) fun pushRegisters({instr=MoveRegister{ source, dest as PReg dReg }, ...}, code) = if Vector.sub(pushVec, dReg) then (* We're going to push this. *) let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest (* TODO: Since we're pushing it we don't need to move it first. *) in destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code end else let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=LoadNonAddressConstant { dest, source}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadNonAddressConstant { dest=destVal, source=source} :: code end | pushRegisters({instr=LoadAddressConstant { dest, source}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadAddressConstant { dest=destVal, source=source} :: code end | pushRegisters({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, loadType=loadType} :: baseCode @ code end | pushRegisters({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, floatSize=floatSize} :: baseCode @ code end | pushRegisters({instr=LoadWithIndexedOffset { base, dest, index, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index val (destVal, destCode) = mapDestReg dest in destCode @ LoadWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal, loadType=loadType} :: indexCode @ baseCode @ code end | pushRegisters({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal, floatSize=floatSize} :: indexCode @ baseCode @ code end | pushRegisters({instr=GetThreadId { dest}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ GetThreadId { dest=destVal} :: code end | pushRegisters({instr=ObjectIndexAddressToAbsolute { source, dest}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ObjectIndexAddressToAbsolute { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=AbsoluteToObjectIndex { source, dest}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ AbsoluteToObjectIndex { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=AllocateMemoryFixed { bytesRequired, dest, ...}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ AllocateMemoryFixed { dest=destVal, bytesRequired=bytesRequired, saveRegs=[]} :: code end | pushRegisters({instr=AllocateMemoryVariable{size, dest, ...}, ...}, code) = let val (sizeVal, sizeCode) = mapSrcReg size val (destVal, destCode) = mapDestReg dest in destCode @ AllocateMemoryVariable{size=sizeVal, dest=destVal, saveRegs=[]} :: sizeCode @ code end | pushRegisters({instr=InitialiseMem{size, addr, init}, ...}, code) = let val (sizeVal, sizeCode) = mapSrcReg size val (addrVal, addrCode) = mapSrcReg addr val (initVal, initCode) = mapSrcReg init in InitialiseMem{size=sizeVal, addr=addrVal, init=initVal} :: initCode @ addrCode @ sizeCode @ code end | pushRegisters({instr=BeginLoop, ...}, code) = BeginLoop :: code | pushRegisters({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}, code) = let (* Normally JumpLoop will be the last item in a block but it is possible that we've added a reset-stack after it. *) fun getValues [] = ([], []) | getValues ({src=source, dst=PReg n} :: rest) = let val (otherRegArgs, otherStackArgs) = getValues rest in case Array.sub(pregMap, n) of ToPReg lReg => ({src=mapArgument source, dst=lReg} :: otherRegArgs, otherStackArgs) | ToStack(stackloc, stackC as StackLoc{size, ...}) => let val sourceVal = mapArgument source val stackOff = !stackCount - stackloc - size in (otherRegArgs, {src=sourceVal, wordOffset=stackOff, stackloc=stackC} :: otherStackArgs) end | Unset => (* Drop it. It's never used. This can happen if we are folding a function over a list such that it always returns the last value and then discard the result of the fold. *) (otherRegArgs, otherStackArgs) end val (newRegArguments, newStackArgs) = getValues regArgs fun loadStackArg({src=source, stackloc=destC, ...}, otherArgs) = let val sourceVal = mapArgument source val (newOffset, newContainer) = mapContainerAndStack(destC, 0) in {src=sourceVal, wordOffset=newOffset, stackloc=newContainer} :: otherArgs end val oldStackArgs = List.foldr loadStackArg [] stackArgs val check = case checkInterrupt of NONE => NONE | SOME _ => SOME [] in JumpLoop{ regArgs=newRegArguments, stackArgs=oldStackArgs @ newStackArgs, checkInterrupt=check} :: code end | pushRegisters({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, loadType=loadType} :: baseCode @ sourceCode @ code end | pushRegisters({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreFPWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, floatSize=floatSize} :: baseCode @ sourceCode @ code end | pushRegisters({instr=StoreWithIndexedOffset { base, source, index, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index in StoreWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal, loadType=loadType} :: indexCode @ baseCode @ sourceCode @ code end | pushRegisters({instr=StoreFPWithIndexedOffset { base, source, index, floatSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index in StoreFPWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal, floatSize=floatSize} :: indexCode @ baseCode @ sourceCode @ code end | pushRegisters({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ AddSubImmediate { source=sourceVal, dest=destVal, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length} :: sourceCode @ code end | pushRegisters({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...}, code) = let val (op1Val, op1Code) = mapSrcReg base val (op2Val, op2Code) = mapSrcReg shifted val (destVal, destCode) = mapOptDest dest in destCode @ AddSubRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift} :: op2Code @ op1Code @ code end | pushRegisters({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ LogicalImmediate { source=sourceVal, dest=destVal, ccRef=ccRef, immed=immed, logOp=logOp, length=length} :: sourceCode @ code end | pushRegisters({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...}, code) = let val (op1Val, op1Code) = mapSrcReg base val (op2Val, op2Code) = mapSrcReg shifted val (destVal, destCode) = mapOptDest dest in destCode @ LogicalRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef, logOp=logOp, length=length, shift=shift} :: op2Code @ op1Code @ code end | pushRegisters({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...}, code) = let val (srcVal, op1Code) = mapSrcReg source val (shiftVal, op2Code) = mapSrcReg shift val (destVal, destCode) = mapDestReg dest in destCode @ ShiftRegister { source=srcVal, shift=shiftVal, dest=destVal, direction=direction, opSize=opSize} :: op2Code @ op1Code @ code end | pushRegisters({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...}, code) = let val (srcAVal, srcACode) = mapOptSrc sourceA val (srcMVal, srcMCode) = mapSrcReg sourceM val (srcNVal, srcNCode) = mapSrcReg sourceN val (destVal, destCode) = mapDestReg dest in destCode @ Multiplication { kind=kind, sourceA=srcAVal, sourceM=srcMVal, sourceN=srcNVal, dest=destVal} :: srcNCode @ srcMCode @ srcACode @ code end | pushRegisters({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...}, code) = let val (dividendVal, dividendCode) = mapSrcReg dividend val (divisorVal, divisorCode) = mapSrcReg divisor val (destVal, destCode) = mapDestReg dest in destCode @ Division { isSigned=isSigned, dividend=dividendVal, divisor=divisorVal, dest=destVal, opSize=opSize} :: divisorCode @ dividendCode @ code end | pushRegisters({instr=BeginFunction {regArgs, stackArgs}, ...}, code) = let (* Create a new container list. The offsets begin at -numArgs. *) fun newContainers(src :: srcs, offset) = let val newContainer = mapDestContainer(src, offset) in newContainer :: newContainers(srcs, offset+1) end | newContainers _ = [] val newStackArgs = newContainers(stackArgs, ~ (List.length stackArgs)) (* Push any registers that need to be pushed. *) fun pushReg((preg, rreg), (others, code)) = let val (newReg, newCode) = mapDestReg(preg) in ((newReg, rreg) :: others, newCode @ code) end val (newRegArgs, pushCode) = List.foldl pushReg ([], []) regArgs in pushCode @ BeginFunction {regArgs=newRegArgs, stackArgs=newStackArgs} :: code end | pushRegisters({instr=FunctionCall{callKind, regArgs, stackArgs, dest, containers, ...}, ...}, code) = let (* It's possible that this could lead to having to spill registers in order to load others. Leave that problem for the moment. *) fun loadStackArg (arg, otherArgs) = let val argVal = mapArgument arg in argVal :: otherArgs end val newStackArgs = List.foldr loadStackArg [] stackArgs fun loadRegArg ((arg, reg), otherArgs) = let val argVal = mapArgument arg in (argVal, reg) :: otherArgs end val newRegArgs = List.foldr loadRegArg [] regArgs val (destVal, destCode) = mapDestReg dest val newContainers = List.map(fn c => #2(mapContainerAndStack(c, 0))) containers in destCode @ FunctionCall{ callKind=callKind, regArgs=newRegArgs, stackArgs=newStackArgs, dest=destVal, saveRegs=[], containers=newContainers} :: code end | pushRegisters({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, ...}, ...}, code) = let val newStackOffset = !stackCount fun loadStackArg ({src, stack}, (otherLoads, otherArgs)) = let val (argVal, loadCode) = case mapArgument src of (source as ArgOnStack{wordOffset, container, field}) => (* If we're leaving it in its old location or we're pushing it above the current top we're ok. We're also ok if we're moving it from a somewhere above the last argument. Otherwise we have to load it. It goes into a normal tagged register which may mean that it could be pushed onto the stack in a subsequent pass. *) if wordOffset = stack+newStackOffset orelse stack+newStackOffset < 0 orelse newStackOffset-wordOffset > ~ stackAdjust then (source, []) else let val preg = newPReg RegPropGeneral in (ArgInReg preg, [LoadStack{wordOffset=wordOffset, container=container, field=field, dest=preg}]) end | argCode => (argCode, []) in (loadCode @ otherLoads, {src=argVal, stack=stack} :: otherArgs) end val (stackArgLoads, newStackArgs) = List.foldr loadStackArg ([], []) stackArgs fun loadRegArg ((arg, reg), otherArgs) = let val argVal = mapArgument arg in (argVal, reg) :: otherArgs end val newRegArgs = List.foldr loadRegArg [] regArgs in TailRecursiveCall{ callKind=callKind, regArgs=newRegArgs, stackArgs=newStackArgs, stackAdjust=stackAdjust, currStackSize=newStackOffset} :: stackArgLoads @ code end | pushRegisters({instr=ReturnResultFromFunction{resultReg, returnReg, numStackArgs}, ...}, code) = let val (resultValue, loadResult) = mapSrcReg resultReg val (returnValue, loadReturn) = mapSrcReg returnReg val resetCode = if !stackCount = 0 then [] else [ResetStackPtr{numWords= !stackCount}] in ReturnResultFromFunction{resultReg=resultValue, returnReg=returnValue, numStackArgs=numStackArgs} :: resetCode @ loadReturn @ loadResult @ code end | pushRegisters({instr=RaiseExceptionPacket{packetReg}, ...}, code) = let val (packetVal, packetCode) = mapSrcReg packetReg in RaiseExceptionPacket{packetReg=packetVal} :: packetCode @ code end | pushRegisters({instr=PushToStack{ source, container, copies }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source (* This was a push from a previous pass. Treat as a container of size "copies". *) val newContainer = mapDestContainer(container, !stackCount) val () = pushItemToStack(OriginalEntry{stackLoc=container}) in PushToStack{source=sourceVal, container=newContainer, copies=copies} :: sourceCode @ code end | pushRegisters({instr=LoadStack{ dest, container, field, ... }, ...}, code) = let val (newOffset, newContainer) = mapContainerAndStack(container, field) val (destVal, destCode) = mapDestReg dest in destCode @ LoadStack{ dest=destVal, container=newContainer, field=field, wordOffset=newOffset } :: code end | pushRegisters({instr=StoreToStack{source, container, field, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (newOffset, newContainer) = mapContainerAndStack(container, field) in StoreToStack{source=sourceVal, container=newContainer, field=field, stackOffset=newOffset} :: sourceCode @ code end | pushRegisters({instr=ContainerAddress{ dest, container, ... }, ...}, code) = let val (newOffset, newContainer) = mapContainerAndStack(container, 0) val (destVal, destCode) = mapDestReg dest in destCode @ ContainerAddress{ dest=destVal, container=newContainer, stackOffset=newOffset } :: code end | pushRegisters({instr=ResetStackPtr _, ...}, code) = code (* Added in a previous pass - discard it. *) | pushRegisters({instr=TagValue{source, dest, isSigned, opSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ TagValue{source=sourceVal, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code end | pushRegisters({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...}, code) = let val (loadedSource, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UntagValue{source=loadedSource, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code end | pushRegisters({instr=BoxLarge{source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BoxLarge{source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code end | pushRegisters({instr=UnboxLarge{source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnboxLarge{source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=BoxTagFloat{floatSize, source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BoxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code end | pushRegisters({instr=UnboxTagFloat{floatSize, source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnboxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=LoadAcquire { base, dest, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadAcquire { base=baseVal, dest=destVal, loadType=loadType} :: baseCode @ code end | pushRegisters({instr=StoreRelease { base, source, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreRelease{ base=baseVal, source=sourceVal, loadType=loadType} :: baseCode @ sourceCode @ code end | pushRegisters({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BitFieldShift { source=sourceVal, dest=destVal, isSigned=isSigned, immr=immr, imms=imms, length=length} :: sourceCode @ code end | pushRegisters({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destSrcVal, destSrcCode) = mapSrcReg destAsSource val (destVal, destCode) = mapDestReg dest in destCode @ BitFieldInsert { source=sourceVal, destAsSource=destSrcVal, dest=destVal, immr=immr, imms=imms, length=length} :: destSrcCode @ sourceCode @ code end | pushRegisters({instr=IndexedCaseOperation{testReg}, ...}, code) = let val (testVal, testCode) = mapSrcReg testReg in IndexedCaseOperation{testReg=testVal} :: testCode @ code end | pushRegisters({instr=PushExceptionHandler, ...}, code) = let (* Add a handler entry to the stack. *) val () = pushItemToStack HandlerEntry in PushExceptionHandler :: code end | pushRegisters({instr=PopExceptionHandler, ...}, code) = let (* Appears at the end of the block whose exceptions are being handled. Delete the handler and anything above it. *) (* Get the state after removing the handler. *) fun popContext ([], _) = raise InternalError "pushRegisters - pop handler" | popContext (HandlerEntry :: tl, new) = (tl, new-2) | popContext (OriginalEntry{stackLoc=StackLoc{size, ...}, ...} :: tl, new) = popContext(tl, new-size) | popContext (NewEntry _ :: tl, new) = popContext(tl, new-1) val (newStack, nnCount) = popContext(!stack, !stackCount) val () = stack := newStack val oldStackPtr = ! stackCount val () = stackCount := nnCount (* Reset the stack to just above the two words of the handler. *) val resetCode = if oldStackPtr <> nnCount+2 then [ResetStackPtr{numWords=oldStackPtr-nnCount-2}] else [] in PopExceptionHandler :: resetCode @ code end | pushRegisters({instr=BeginHandler{packetReg}, ...}, code) = let (* Start of a handler. The top active entry should be the handler. *) val () = case !stack of HandlerEntry :: tl => stack := tl | _ => raise InternalError "pushRegisters: BeginHandler" val () = stackCount := !stackCount - 2 val (packetVal, packetCode) = mapDestReg packetReg in packetCode @ BeginHandler{packetReg=packetVal} :: code end | pushRegisters({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...}, code) = let val (vec1Val, vec1Code) = mapSrcReg vec1Addr val (vec2Val, vec2Code) = mapSrcReg vec2Addr val (lenVal, lenCode) = mapSrcReg length in CompareByteVectors{vec1Addr=vec1Val, vec2Addr=vec2Val, length=lenVal, ccRef=ccRef} :: lenCode @ vec2Code @ vec1Code @ code end | pushRegisters({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...}, code) = let val (srcVal, srcCode) = mapSrcReg srcAddr val (destVal, destCode) = mapSrcReg destAddr val (lenVal, lenCode) = mapSrcReg length in BlockMove{srcAddr=srcVal, destAddr=destVal, length=lenVal, isByteMove=isByteMove} :: lenCode @ destCode @ srcCode @ code end | pushRegisters({instr=AddSubXSP{source, dest, isAdd}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ AddSubXSP { source=sourceVal, dest=destVal, isAdd=isAdd} :: sourceCode @ code end | pushRegisters({instr=TouchValue{source, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source in TouchValue { source=sourceVal} :: sourceCode @ code end | pushRegisters({instr=LoadAcquireExclusive{ base, dest }, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadAcquireExclusive { base=baseVal, dest=destVal} :: baseCode @ code end | pushRegisters({instr=StoreReleaseExclusive{ base, source, result }, ...}, code) = let val (sourceVal, sourceCode) = mapOptSrc source val (baseVal, baseCode) = mapSrcReg base val (resVal, resCode) = mapDestReg result in resCode @ StoreReleaseExclusive{ base=baseVal, source=sourceVal, result=resVal} :: baseCode @ sourceCode @ code end | pushRegisters({instr=MemoryBarrier, ...}, code) = MemoryBarrier :: code | pushRegisters({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ConvertIntToFloat{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize} :: sourceCode @ code end | pushRegisters({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ConvertFloatToInt{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize, rounding=rounding} :: sourceCode @ code end | pushRegisters({instr=UnaryFloatingPt{ source, dest, fpOp}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnaryFloatingPt{ source=sourceVal, dest=destVal, fpOp=fpOp} :: sourceCode @ code end | pushRegisters({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...}, code) = let val (arg1Val, arg1Code) = mapSrcReg arg1 val (arg2Val, arg2Code) = mapSrcReg arg2 val (destVal, destCode) = mapDestReg dest in destCode @ BinaryFloatingPoint{ arg1=arg1Val, arg2=arg2Val, dest=destVal, fpOp=fpOp, opSize=opSize} :: arg2Code @ arg1Code @ code end | pushRegisters({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...}, code) = let val (arg1Val, arg1Code) = mapSrcReg arg1 val (arg2Val, arg2Code) = mapSrcReg arg2 in CompareFloatingPoint{ arg1=arg1Val, arg2=arg2Val, opSize=opSize, ccRef=ccRef} :: arg2Code @ arg1Code @ code end + | pushRegisters({instr=CacheCheck _, ...}, _) = raise InternalError "pushRegisters: CacheCheck" + local fun doPush(instr as {kill, ...}, code) = let val newCode = pushRegisters(instr, code) (* Can we pop the stack? *) val stackReset = case setToList (minus(kill, loopRegs)) of [] => [] | killList => let (* See if any of the kill items are at the top of the stack. If they are we can pop them and perhaps items we've previously marked for deletion but not been able to pop. *) val oldStack = !stackCount fun checkAndAdd(r, output) = case Array.sub(pregMap, r) of ToStack(stackLoc, StackLoc{size, ...}) => if stackLoc < 0 then r :: output (* We can have arguments and return address. *) else if !stackCount = stackLoc+size then ( stack := tl (!stack); stackCount := stackLoc; output ) else r :: output | _ => r :: output val toAdd = List.foldl checkAndAdd [] killList fun reprocess list = let val prevStack = !stackCount val outlist = List.foldl checkAndAdd [] list in if !stackCount = prevStack then list else reprocess outlist end val () = if !stackCount = oldStack then deletedItems := toAdd @ !deletedItems else deletedItems := reprocess(toAdd @ !deletedItems) val _ = oldStack >= !stackCount orelse raise InternalError "negative stack offset" in if !stackCount = oldStack then [] else [ResetStackPtr{numWords=oldStack - !stackCount}] end in stackReset @ newCode end in val codeResult = List.foldl doPush [] block val outputCount = ! stackCount val results = {code=codeResult, stackCount= outputCount} val stateResult = { stackCount= outputCount, stack= !stack } val () = Array.update(blockOutput, blockNo, results) end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val addItems = List.map(fn m => (m, stateResult)) addSet in processBlocks(addItems @ stillToDo) end in val () = processBlocks([(0, {stack=[], stackCount=0})]) end (* Put together the result code and blocks. *) local fun createBlock blockNo = (* Skip unreferenced blocks apart from block 0. *) if blockNo <> 0 andalso null (asub blockRefs blockNo) then BasicBlock{block=[], flow=ExitCode} else let val ExtendedBasicBlock{ flow, ...} = vsub code blockNo val {code=codeResult, stackCount=outputCount, ...} = asub blockOutput blockNo (* Process the successor. If we need a stack adjustment this will require an adjustment block. TODO: We could put a pre-adjustment if we only have one branch to this block. *) fun matchStacks targetBlock = let (* Process the destination. If it hasn't been processed. *) val expectedInput = valOf (asub inputStackSizes targetBlock) in if expectedInput = outputCount then targetBlock else let val _ = outputCount > expectedInput orelse raise InternalError "adjustStack" val adjustCode = [ResetStackPtr{numWords=outputCount-expectedInput}] val newBlock = BasicBlock{block=adjustCode, flow=Unconditional targetBlock} val newBlockNo = !blockCounter before blockCounter := !blockCounter+1 val () = extraBlocks := newBlock :: !extraBlocks in newBlockNo end end val (finalCode, newFlow) = case flow of ExitCode => (codeResult, ExitCode) | Unconditional m => let (* Process the block. Since we're making an unconditional jump we can include any stack adjustment needed to match the destination in here. In particular this includes loops. *) val expectedInput = valOf (asub inputStackSizes m) val _ = outputCount >= expectedInput orelse raise InternalError "negative reset" val resultCode = if expectedInput = outputCount then codeResult else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult in (resultCode, Unconditional m) end (* For any of these, if we need to adjust the stack we have to add an adjustment block. *) | Conditional {trueJump, falseJump, ccRef, condition} => (codeResult, Conditional{trueJump=matchStacks trueJump, falseJump=matchStacks falseJump, ccRef=ccRef, condition=condition}) | SetHandler{ handler, continue } => (codeResult, SetHandler{ handler=matchStacks handler, continue=matchStacks continue}) | IndexedBr cases => (codeResult, IndexedBr(map matchStacks cases)) | u as UnconditionalHandle _ => (codeResult, u) | c as ConditionalHandle{ continue, ... } => let (* As for unconditional branch *) val expectedInput = valOf (asub inputStackSizes continue) val _ = outputCount >= expectedInput orelse raise InternalError "negative reset" val resultCode = if expectedInput = outputCount then codeResult else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult in (resultCode, c) end in BasicBlock{block=List.rev finalCode, flow=newFlow} end in val resultBlocks = Vector.tabulate(numberOfBlocks, createBlock) end (* Add any extra blocks to the result. *) val finalResult = case !extraBlocks of [] => resultBlocks | blocks => Vector.concat[resultBlocks, Vector.fromList(List.rev blocks)] val pregProperties = Vector.fromList(List.rev(! pregPropList)) in {code=finalResult, pregProps=pregProperties, maxStack= !maxStack} end structure Sharing = struct type extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and preg = preg and pregOrZero = pregOrZero end end;