diff --git a/libpolyml/arm64.cpp b/libpolyml/arm64.cpp index 273a6fe5..889212e8 100644 --- a/libpolyml/arm64.cpp +++ b/libpolyml/arm64.cpp @@ -1,1053 +1,1038 @@ /* Machine-dependent code for ARM64 Copyright David C.J. Matthews 2020-21. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ // Currently this is just copied from the interpreted version. #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #ifdef HAVE_STRING_H #include #endif #include "globals.h" #include "machine_dep.h" #include "sys.h" #include "profiling.h" #include "arb.h" #include "processes.h" #include "run_time.h" #include "gc.h" #include "diagnostics.h" #include "polystring.h" #include "save_vec.h" #include "memmgr.h" #include "scanaddrs.h" #include "rtsentry.h" #include "bytecode.h" #include "int_opcodes.h" /* * ARM64 register use: * X0 First argument and return value * X1-X7 Second-eighth argument * X8 Indirect result (C), ML closure pointer on entry * X9-X15 Volatile scratch registers * X16-17 Intra-procedure-call (C). Only used for special cases in ML. * X18 Platform register. Not used in ML. * X19-X23 Non-volatile (C). Scratch registers (ML). * X24 Non-volatile (C). Scratch register (ML). Heap base in 32-in-64. * X25 ML Heap limit pointer * X26 ML assembly interface pointer. Non-volatile (C). * X27 ML Heap allocation pointer. Non-volatile (C). * X28 ML Stack pointer. Non-volatile (C). * X29 Frame pointer (C). Not used in ML * X30 Link register. * X31 Stack pointer (C). Only used when calling C. Also zero register. * * Floating point registers: * V0 First argument and return value * V1-V7 Second-eighth argument * V8-V15 Non volatile. Not currently used in ML. * V16-V31 Volatile. Not currently used in ML. * * The ML calling conventions generally follow the C ABI except that * all registers are volatile and X28 is used for the stack. */ /* the amount of ML stack space to reserve for registers, C exception handling etc. The compiler requires us to reserve 2 stack-frames worth (2 * 20 words). We actually reserve slightly more than this. */ // Arm64 instructions are all 32-bit values. typedef uint32_t arm64Instr, *arm64CodePointer; // Each function checks for space on the stack at the start. To reduce the // code size it assumes there are at least 10 words on the stack and only // checks the exact space if it requires more than that. For safety we // always make sure there are 50 words spare. #define OVERFLOW_STACK_SIZE 50 // X26 always points at this area when executing ML code. // The offsets are built into the assembly code and some are built into // the code generator so this must not be changed without checking these. typedef struct _AssemblyArgs { public: byte* enterInterpreter; // These are filled in with the functions. byte* heapOverFlowCall; byte* stackOverFlowCall; byte* stackOverFlowCallEx; byte* trapHandlerEntry; stackItem* handlerRegister; // Current exception handler stackItem* stackLimit; // Lower limit of stack stackItem exceptionPacket; // Set if there is an exception stackItem threadId; // My thread id. Saves having to call into RTS for it. (stackItem so it's 64-bits) stackItem registers[25]; // Save/load area for registers X0-X24 inclusive double fpRegisters[8]; // Save/load area for floating point regs D0-D7 PolyWord* localMbottom; // Base of memory + 1 word PolyWord* localMpointer; // X27 Allocation ptr + 1 word stackItem* stackPtr; // X28 Current stack pointer arm64CodePointer linkRegister; // X30 - Link register (return address) arm64CodePointer entryPoint; // PC address to return to byte returnReason; // Reason for returning from ML - Set by assembly code. } AssemblyArgs; class Arm64TaskData: public TaskData, ByteCodeInterpreter { public: Arm64TaskData(); ~Arm64TaskData() {} unsigned allocReg; // The register to take the allocated space. POLYUNSIGNED allocWords; // The words to allocate. AssemblyArgs assemblyInterface; uint32_t saveRegisterMask; // Registers that need to be updated by a GC. virtual void GarbageCollect(ScanAddress *process); void ScanStackAddress(ScanAddress *process, stackItem& val, StackSpace *stack); virtual void EnterPolyCode(); // Start running ML virtual void SetException(poly_exn *exc) { assemblyInterface.exceptionPacket = (PolyWord)exc; } virtual void InterruptCode(); // AddTimeProfileCount is used in time profiling. virtual bool AddTimeProfileCount(SIGNALCONTEXT *context); virtual void InitStackFrame(TaskData *newTask, Handle proc); // Atomically release a mutex using hardware interlock. virtual bool AtomicallyReleaseMutex(PolyObject* mutexp); // Return the minimum space occupied by the stack. Used when setting a limit. // N.B. This is PolyWords not native words. virtual uintptr_t currentStackSpace(void) const { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE; } virtual void addProfileCount(POLYUNSIGNED words) { addSynchronousCount(interpreterPc, words); } // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer virtual void PreRTSCall(void) { TaskData::PreRTSCall(); SaveMemRegisters(); } // PostRTSCall: Before returning we need to restore the heap pointer. // If there has been a GC in the RTS call we need to create a new heap area. virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall(); } virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length); void SetMemRegisters(); void SaveMemRegisters(); void HandleTrap(); // ByteCode overrides. The interpreter and native code states need to be in sync. // The interpreter is only used during the initial bootstrap. virtual void ClearExceptionPacket() { assemblyInterface.exceptionPacket = TAGGED(0); } virtual PolyWord GetExceptionPacket() { return assemblyInterface.exceptionPacket; } virtual stackItem* GetHandlerRegister() { return assemblyInterface.handlerRegister; } virtual void SetHandlerRegister(stackItem* hr) { assemblyInterface.handlerRegister = hr; } void Interpret(); void EndBootStrap() { mixedCode = true; } PLock interruptLock; virtual void HandleStackOverflow(uintptr_t space); }; class Arm64Dependent : public MachineDependent { public: Arm64Dependent() : mustInterpret(false) {} // Create a task data object. virtual TaskData* CreateTaskData(void) { return new Arm64TaskData(); } virtual void ScanConstantsWithinCode(PolyObject* addr, PolyObject* oldAddr, POLYUNSIGNED length, PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress* process); virtual Architectures MachineArchitecture(void); virtual void SetBootArchitecture(char arch, unsigned wordLength); // The ARM has separate instruction and data caches. virtual void FlushInstructionCache(void* p, POLYUNSIGNED bytes); // During the first bootstrap phase this is interpreted. bool mustInterpret; -#if (! defined(POLYML32IN64)) +#if defined(POLYML32IN64) + virtual void UpdateGlobalHeapReference(PolyObject* addr); +#else // Address of the constant segment from the code segment. This is complicated because // some OSs require the code to position-independent which means the code can only // contain relative offsets. This isn't a problem for 32-in-64 because the code is // copied before it is executed. // Set the address of the constant area. If this is within the code segment itself we use the // default, negative, byte offset. If the constant area has been split off we use a pair of // dummy ADRP/LDR instructions. They aren't ever executed but allow us to use relative addressing. virtual void SetAddressOfConstants(PolyObject* objAddr, PolyObject* writable, POLYUNSIGNED length, PolyWord* constAddr) { if (constAddr > (PolyWord*)objAddr && constAddr < (PolyWord*)objAddr + length) { int64_t offset = (byte*)constAddr - (byte*)objAddr - length * sizeof(PolyWord); writable->Set(length - 1, PolyWord::FromSigned(offset)); } else { PolyWord* last_word = objAddr->Offset(length - 1); // Last word in the code MemSpace* space = gMem.SpaceForAddress(last_word); uint32_t* pt = (uint32_t*)space->writeAble(last_word); pt[0] = 0x90000000; // Insert dummy ADRP and LDR pt[1] = 0xf9400000; ScanAddress::SetConstantValue((byte*)last_word, (PolyObject*)constAddr, PROCESS_RELOC_ARM64ADRPLDR); } } virtual void GetConstSegmentForCode(PolyObject* obj, POLYUNSIGNED obj_length, PolyWord*& cp, POLYUNSIGNED& count) const { PolyWord* last_word = obj->Offset(obj_length - 1); // Last word in the code if ((last_word[0].AsUnsigned() >> 56) == 0xff) { // If the high-order byte is 0xff it's a (-ve) byte offset. POLYSIGNED offset = last_word->AsSigned(); cp = last_word + 1 + offset / sizeof(PolyWord); count = cp[-1].AsUnsigned(); } else { PolyObject* addr = ScanAddress::GetConstantValue((byte*)last_word, PROCESS_RELOC_ARM64ADRPLDR, 0); cp = (PolyWord*)addr; count = addr->Length(); } } #endif - - // Override for X86-64 because of the need for position-independent code. -#if (defined(HOSTARCHITECTURE_X86_64) && !defined(POLYML32IN64)) - // Find the start of the constant section for a piece of code. - virtual void GetConstSegmentForCode(PolyObject* obj, POLYUNSIGNED obj_length, PolyWord*& cp, POLYUNSIGNED& count) const - { - PolyWord* last_word = obj->Offset(obj_length - 1); // Last word in the code - // Only the low order 32-bits are valid since this may be - // set by a 32-bit relative relocation. - int32_t offset = (int32_t)last_word->AsSigned(); - POLYSIGNED offset = last_word->AsSigned(); - cp = last_word + 1 + offset / sizeof(PolyWord); - count = cp[-1].AsUnsigned(); - } - // Set the address of the constant area. The default is a relative byte offset. - virtual void SetAddressOfConstants(PolyObject* objAddr, PolyObject* writable, POLYUNSIGNED length, PolyWord* constAddr) - { - int64_t offset = (byte*)constAddr - (byte*)objAddr - length * sizeof(PolyWord); - ASSERT(offset >= -(int64_t)0x80000000 && offset <= (int64_t)0x7fffffff); - ASSERT(offset < ((int64_t)1) << 32 && offset >((int64_t)(-1)) << 32); - writable->Set(length - 1, PolyWord::FromSigned(offset & 0xffffffff)); - } -#endif }; static Arm64Dependent arm64Dependent; MachineDependent* machineDependent = &arm64Dependent; Architectures Arm64Dependent::MachineArchitecture(void) { // During the first phase of the bootstrap we // compile the interpreted version. if (mustInterpret) return MA_Interpreted; #if defined(POLYML32IN64) return MA_Arm64_32; #else return MA_Arm64; #endif } // Values for the returnReason byte. These values are put into returnReason by the assembly code // depending on which of the "trap" functions has been called. enum RETURN_REASON { RETURN_HEAP_OVERFLOW = 1, // Heap space check has failed. RETURN_STACK_OVERFLOW = 2, // Stack space check has failed (<= 10 words). RETURN_STACK_OVERFLOWEX = 3, // Stack space check has failed. Adjusted SP is in X9. RETURN_ENTER_INTERPRETER = 4 // Native code has entered interpreted code. }; extern "C" { // These are declared in the assembly code segment. void Arm64AsmEnterCompiledCode(void*); int Arm64AsmCallExtraRETURN_ENTER_INTERPRETER(void); int Arm64AsmCallExtraRETURN_HEAP_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOW(void); int Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX(void); // This is declared here and called from the assembly code. // It avoids having a call to an external in the assembly code // which sometimes gives problems with position-indepent code. void Arm64TrapHandler(PolyWord threadId); }; Arm64TaskData::Arm64TaskData() : ByteCodeInterpreter(&assemblyInterface.stackPtr, &assemblyInterface.stackLimit), allocReg(0), allocWords(0), saveRegisterMask(0) { assemblyInterface.enterInterpreter = (byte*)Arm64AsmCallExtraRETURN_ENTER_INTERPRETER; assemblyInterface.heapOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_HEAP_OVERFLOW; assemblyInterface.stackOverFlowCall = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOW; assemblyInterface.stackOverFlowCallEx = (byte*)Arm64AsmCallExtraRETURN_STACK_OVERFLOWEX; assemblyInterface.trapHandlerEntry = (byte*)Arm64TrapHandler; interpreterPc = 0; mixedCode = !arm64Dependent.mustInterpret; } void Arm64Dependent::SetBootArchitecture(char arch, unsigned wordLength) { if (arch == 'I') mustInterpret = true; else if (arch != 'A') Crash("Boot file has unexpected architecture code: %c", arch); } // The ARM has separate instruction and data caches so we must flush // the cache when creating or modifying code. void Arm64Dependent::FlushInstructionCache(void* p, POLYUNSIGNED bytes) { #ifdef _WIN32 ::FlushInstructionCache(GetCurrentProcess(), p, bytes); #elif defined (__GNUC__) __builtin___clear_cache((char*)p, (char*)p + bytes); #elif (defined (__clang__) && defined (__APPLE__)) sys_icache_invalidate(p, bytes); #else #error "No code to flush the instruction cache." #endif } void Arm64TaskData::GarbageCollect(ScanAddress *process) { TaskData::GarbageCollect(process); ByteCodeInterpreter::GarbageCollect(process); assemblyInterface.threadId = stackItem(threadObject); // threadObject updated by TaskData::GarbageCollect if (assemblyInterface.exceptionPacket.w().IsDataPtr()) { PolyObject* obj = assemblyInterface.exceptionPacket.w().AsObjPtr(); obj = process->ScanObjectAddress(obj); assemblyInterface.exceptionPacket = (PolyWord)obj; } if (stack != 0) { stackItem*stackPtr = assemblyInterface.stackPtr; // Now the values on the stack. for (stackItem* q = stackPtr; q < (stackItem*)stack->top; q++) ScanStackAddress(process, *q, stack); } // Register mask. There is a bit for each of the registers up to X24. for (int i = 0; i < 25; i++) { if (saveRegisterMask & (1 << i)) ScanStackAddress(process, assemblyInterface.registers[i], stack); } // Make sure the code is still reachable. Code addresses aren't updated. { stackItem code; code.codeAddr = (POLYCODEPTR)assemblyInterface.linkRegister; ScanStackAddress(process, code, stack); code.codeAddr = (POLYCODEPTR)assemblyInterface.entryPoint; ScanStackAddress(process, code, stack); } } // Process a value within the stack. void Arm64TaskData::ScanStackAddress(ScanAddress *process, stackItem& stackItem, StackSpace *stack) { // Code addresses on the ARM are always even, unlike the X86, so if it's tagged // it can't be an address. if (stackItem.w().IsTagged()) return; #ifdef POLYML32IN64 // In 32-in-64 we can have either absolute addresses or object indexes. // Absolute addresses always have the top 32-bits non-zero if (stackItem.argValue < ((uintptr_t)1 << 32)) { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } else { // Could be a code address or a stack address. MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space == 0 || space->spaceType != ST_CODE) return; PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); ASSERT(obj != 0); // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } #else MemSpace* space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space->spaceType == ST_CODE) { PolyObject* obj = gMem.FindCodeObject(stackItem.codeAddr); // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } else if (space->spaceType == ST_LOCAL) // Local values must be word addresses. { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } #endif } // Copy a stack void Arm64TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length) { #ifdef POLYML32IN64 old_length = old_length / 2; new_length = new_length / 2; #endif /* Moves a stack, updating all references within the stack */ stackItem*old_base = (stackItem*)old_stack; stackItem*new_base = (stackItem*)new_stack; stackItem*old_top = old_base + old_length; /* Calculate the offset of the new stack from the old. If the frame is being extended objects in the new frame will be further up the stack than in the old one. */ uintptr_t offset = new_base - old_base + new_length - old_length; stackItem *oldSp = assemblyInterface.stackPtr; assemblyInterface.stackPtr = oldSp + offset; assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset; /* Skip the unused part of the stack. */ uintptr_t i = oldSp - old_base; ASSERT(i <= old_length); i = old_length - i; stackItem *old = oldSp; stackItem *newp = assemblyInterface.stackPtr; while (i--) { stackItem old_word = *old++; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem* addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *newp++ = old_word; } ASSERT(old == ((stackItem*)old_stack) + old_length); ASSERT(newp == ((stackItem*)new_stack) + new_length); } void Arm64TaskData::EnterPolyCode() /* Called from "main" to enter the code. */ { assemblyInterface.stackLimit = (stackItem*)((PolyWord*)this->stack->stack() + OVERFLOW_STACK_SIZE); if (arm64Dependent.mustInterpret) { PolyWord closure = assemblyInterface.registers[8]; *(--assemblyInterface.stackPtr) = closure; /* Closure address */ interpreterPc = *(POLYCODEPTR*)closure.AsObjPtr(); Interpret(); ASSERT(0); // Should never return } SetMemRegisters(); // Jump into the ML code. This code sets up the registers and puts the // address of the assemblyInterface into X26 Arm64AsmEnterCompiledCode(&assemblyInterface); // This should never return ASSERT(0); } void Arm64TaskData::Interpret() { while (true) { switch (RunInterpreter(this)) { case ReturnCall: // After the call there will be an enter-int instruction so that when this // returns we will re-enter the interpreter. The number of arguments for // this call is after that. while ((uintptr_t)interpreterPc & 3) { ASSERT(interpreterPc[0] == INSTR_no_op); interpreterPc++; } ASSERT(interpreterPc[0] == 0xe9); numTailArguments = interpreterPc[12]; case ReturnTailCall: { ClearExceptionPacket(); // Pop the closure. PolyWord closureWord = *assemblyInterface.stackPtr++; PolyObject* closure = closureWord.AsObjPtr(); arm64CodePointer cp = *(arm64CodePointer*)closure; if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) { // If the code we're going to is interpreted push back the closure and // continue. interpreterPc = (POLYCODEPTR)cp; assemblyInterface.stackPtr--; HandleStackOverflow(128); // Make sure we have space since we're bypassing the check. continue; } assemblyInterface.registers[8] = closureWord; // Put closure in the closure reg. // Pop the return address. We may need to align this to a word boundary. POLYCODEPTR originalReturn = (POLYCODEPTR)((assemblyInterface.stackPtr++)->codeAddr); while ((uintptr_t)originalReturn & 3) { ASSERT(originalReturn[0] == INSTR_no_op); originalReturn++; } // Get the arguments into the correct registers. // Load the register arguments. The first 8 arguments go into X0-X7. // These will have been the first arguments to be pushed so will be // furthest away on the stack. // Note: we don't currently pass any arguments in the FP regs. for (unsigned i = 0; i < numTailArguments && i < 8; i++) assemblyInterface.registers[i] = assemblyInterface.stackPtr[numTailArguments - i - 1]; // If there are any more arguments these need to be shifted down the stack. while (numTailArguments > 8) { numTailArguments--; assemblyInterface.stackPtr[numTailArguments] = assemblyInterface.stackPtr[numTailArguments - 8]; } // Remove the register arguments assemblyInterface.stackPtr += numTailArguments > 8 ? 8 : numTailArguments; assemblyInterface.linkRegister = (arm64CodePointer)originalReturn; // Set the return address to caller assemblyInterface.entryPoint = *(arm64CodePointer*)closure; // Entry point to callee interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } case ReturnReturn: { ClearExceptionPacket(); // Returning from an interpreted function. Normally we'll be returning to // interpreted code. if ((uintptr_t)interpreterPc & 3) // ARM64 addresses will always be 4-byte aligned. continue; arm64CodePointer cp = (arm64CodePointer)interpreterPc; if (cp[0] == 0xAA1E03E9 && cp[1] == 0xF9400350 && cp[2] == 0xD63F0200) continue; // Pop the value we're returning. Set the entry point to the code we're returning to. assemblyInterface.registers[0] = *assemblyInterface.stackPtr++; assemblyInterface.entryPoint = cp; interpreterPc = 0; // No longer in the interpreter (See SaveMemRegs) return; } } } } // Called from the assembly code as a result of a trap i.e. a request for // a GC or to extend the stack. void Arm64TrapHandler(PolyWord threadId) { Arm64TaskData* taskData = (Arm64TaskData*)TaskData::FindTaskForId(threadId); taskData->HandleTrap(); } void Arm64TaskData::HandleTrap() { SaveMemRegisters(); // Update globals from the memory registers. switch (this->assemblyInterface.returnReason) { case RETURN_HEAP_OVERFLOW: { // The heap has overflowed. // The register mask is the word after the return. saveRegisterMask = *assemblyInterface.entryPoint++; // The generated code first subtracts the space required from x27 and puts the // result into a separate register. It then compares this with x25 and comes here if // it is not above that. Either way it is going to execute an instruction to put // this value back into x27. // Look at that instruction to find out the register. arm64Instr moveInstr = *assemblyInterface.entryPoint; ASSERT((moveInstr & 0xffe0ffff) == 0xaa0003fb); // mov x27,xN allocReg = (moveInstr >> 16) & 0x1f; allocWords = (allocPointer - (PolyWord*)assemblyInterface.registers[allocReg].stackAddr) + 1; assemblyInterface.registers[allocReg] = TAGGED(0); // Clear this - it's not a valid address. if (profileMode == kProfileStoreAllocation) addProfileCount(allocWords); // The actual allocation is done in SetMemRegisters. break; } case RETURN_STACK_OVERFLOW: case RETURN_STACK_OVERFLOWEX: { // The register mask is the word after the return. saveRegisterMask = *assemblyInterface.entryPoint++; uintptr_t min_size = 0; // Size in PolyWords if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW) { min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } else { // Stack limit overflow. If the required stack space is larger than // the fixed overflow size the code will calculate the limit in X9. stackItem* stackP = assemblyInterface.registers[9].stackAddr; min_size = (this->stack->top - (PolyWord*)stackP) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } HandleStackOverflow(min_size); break; } case RETURN_ENTER_INTERPRETER: { interpreterPc = (POLYCODEPTR)assemblyInterface.linkRegister; byte reasonCode = *interpreterPc++; // Sort out arguments. assemblyInterface.exceptionPacket = TAGGED(0); if (reasonCode == 0xff) { // Exception handler. assemblyInterface.exceptionPacket = assemblyInterface.registers[0]; // Get the exception packet // We need to leave the current handler in place. When we enter the interpreter it will // check the exception packet and if it is non-null will raise it. } else if (reasonCode >= 128) { // Start of function. unsigned numArgs = reasonCode - 128; // We need the stack to contain: // The closure, the return address, the arguments. // The stack will currently contain the stack arguments. // Add space for the register arguments if (numArgs > 8) assemblyInterface.stackPtr -= 8; else assemblyInterface.stackPtr -= numArgs; // Move up any stack arguments. for (unsigned n = 8; n < numArgs; n++) { assemblyInterface.stackPtr[n - 8] = assemblyInterface.stackPtr[n]; } // Store the register arguments for (unsigned n = 0; n < numArgs && n < 8; n++) assemblyInterface.stackPtr[numArgs - n - 1] = assemblyInterface.registers[n]; // Finally push the return address and closure pointer *(--assemblyInterface.stackPtr) = assemblyInterface.registers[9]; // Return address - value of X30 before enter-int *(--assemblyInterface.stackPtr) = assemblyInterface.registers[8]; // Closure } else { // Return from call. Push X0 *(--assemblyInterface.stackPtr) = assemblyInterface.registers[0]; } Interpret(); break; } default: Crash("Unknown return reason code %u", this->assemblyInterface.returnReason); } SetMemRegisters(); } void Arm64TaskData::HandleStackOverflow(uintptr_t space) { uintptr_t min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE + space; try { // The stack check has failed. This may either be because we really have // overflowed the stack or because the stack limit value has been adjusted // to result in a call here. CheckAndGrowStack(this, min_size); } catch (IOException&) { // We may get an exception while handling this if we run out of store } { PLocker l(&interruptLock); // Set the stack limit. This clears any interrupt and also sets the // correct value if we've grown the stack. assemblyInterface.stackLimit = (stackItem*)stack->bottom + OVERFLOW_STACK_SIZE; } try { processes->ProcessAsynchRequests(this); // Release and re-acquire use of the ML memory to allow another thread // to GC. processes->ThreadReleaseMLMemory(this); processes->ThreadUseMLMemory(this); } catch (IOException&) { } } void Arm64TaskData::InitStackFrame(TaskData* parentTask, Handle proc) /* Initialise stack frame. */ { StackSpace* space = this->stack; StackObject* stack = (StackObject*)space->stack(); uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem); assemblyInterface.stackPtr = (stackItem*)stack + stack_size; assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE; assemblyInterface.handlerRegister = assemblyInterface.stackPtr; // Store the argument and the closure. assemblyInterface.registers[8] = proc->Word(); // Closure assemblyInterface.registers[0] = TAGGED(0); // Argument assemblyInterface.linkRegister = (arm64CodePointer)1; // We never return. Use a tagged value because it may be pushed assemblyInterface.entryPoint = (arm64CodePointer)1; // Have to set the register mask in case we get a GC before the thread starts. saveRegisterMask = (1 << 8) | 1; // X8 and X0 #ifdef POLYML32IN64 // In 32-in-64 RBX always contains the heap base address. assemblyInterface.registers[24].stackAddr = (stackItem*)globalHeapBase; #endif } // This is called from a different thread so we have to be careful. void Arm64TaskData::InterruptCode() { PLocker l(&interruptLock); // Set the stack limit pointer to the top of the stack to cause // a trap when we next check for stack overflow. // We use a lock here to ensure that we always use the current value of the // stack. The thread we're interrupting could be growing the stack at this point. if (stack != 0) assemblyInterface.stackLimit = (stackItem*)(stack->top - 1); } // Called before entering ML code from the run-time system void Arm64TaskData::SetMemRegisters() { // Copy the current store limits into variables before we go into the assembly code. // If we haven't yet set the allocation area or we don't have enough we need // to create one (or a new one). if (allocPointer <= allocLimit + allocWords) { if (allocPointer < allocLimit) Crash("Bad length in heap overflow trap"); // Find some space to allocate in. Updates taskData->allocPointer and // returns a pointer to the newly allocated space (if allocWords != 0) PolyWord* space = processes->FindAllocationSpace(this, allocWords, true); if (space == 0) { // We will now raise an exception instead of returning. // Set allocWords to zero so we don't set the allocation register // since that could be holding the exception packet. allocWords = 0; } // Undo the allocation just now. allocPointer += allocWords; } if (this->allocWords != 0) { // If we have had a heap trap we actually do the allocation here. // We will have already garbage collected and recovered sufficient space. // This also happens if we have just trapped because of store profiling. allocPointer -= allocWords; // Now allocate // Set the allocation register to this area. N.B. This is an absolute address. assemblyInterface.registers[allocReg].codeAddr = (POLYCODEPTR)(allocPointer + 1); /* remember: it's off-by-one */ allocWords = 0; } // If we have run out of store, either just above or while allocating in the RTS, // allocPointer and allocLimit will have been set to zero as part of the GC. We will // now be raising an exception which may free some store but we need to come back here // before we allocate anything. The compiled code uses unsigned arithmetic to check for // heap overflow but only after subtracting the space required. We need to make sure // that the values are still non-negative after substracting any object size. if (allocPointer == 0) allocPointer += MAX_OBJECT_SIZE; if (allocLimit == 0) allocLimit += MAX_OBJECT_SIZE; assemblyInterface.localMbottom = allocLimit + 1; assemblyInterface.localMpointer = allocPointer + 1; // If we are profiling store allocation we set mem_hl so that a trap // will be generated. if (profileMode == kProfileStoreAllocation) assemblyInterface.localMbottom = assemblyInterface.localMpointer; assemblyInterface.threadId = stackItem(threadObject); } // This is called whenever we have returned from ML to C. void Arm64TaskData::SaveMemRegisters() { if (interpreterPc == 0) { // Not if we're already in the interpreter // The normal return is to the link register address. assemblyInterface.entryPoint = assemblyInterface.linkRegister; allocPointer = assemblyInterface.localMpointer - 1; } allocWords = 0; assemblyInterface.exceptionPacket = TAGGED(0); saveRegisterMask = 0; } // Process addresses in the code. The only case where we need to do that on the ARM64 is to deal // with spltting the constant area from the code in order to make the code position-independent. // We need to convert pc-relative LDR instructions into ADRP/LDR pairs. void Arm64Dependent::ScanConstantsWithinCode(PolyObject* addr, PolyObject* oldAddr, POLYUNSIGNED length, PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress* process) { +#ifndef POLYML32IN64 arm64CodePointer pt = (arm64CodePointer)addr; -#ifdef POLYML32IN64 - // The only case we have to consider in 32-in-64 is the special hack for - // the global heap base in callbacks. - if (pt[0] == 0xD503201F && (pt[1] & 0xff000000) == 0x58000000) - { - // nop (special marker) followed by LDR Xn,pc-relative - uint32_t pcOffset = (pt[1] >> 5) & 0x3ffff; // This is a number of 32-bit words - PolyWord* gHeapAddr = ((PolyWord*)addr) + pcOffset + 1; // PolyWords are 32-bits - if (((PolyWord**)gHeapAddr)[0] != globalHeapBase) - ((PolyWord**)gMem.SpaceForAddress(gHeapAddr)->writeAble(gHeapAddr))[0] = globalHeapBase; - } -#else - // If it begins with the enter-int sequence it's interpreted code. + // If it begins with the enter-int sequence it's interpreted code. if (pt[0] == 0xAA1E03E9 && pt[1] == 0xF9400350 && pt[2] == 0xD63F0200) return; // We only need a split if the constant area is not at the original offset. POLYSIGNED constAdjustment = (byte*)newConstAddr - (byte*)addr - ((byte*)oldConstAddr - (byte*)oldAddr); // If we have replaced the offset with a dummy ADRP/LDR pair we have to add a relocation. PolyWord* end = addr->Offset(length - 1); if ((end[0].AsUnsigned() >> 56) != 0xff) process->RelocateOnly(addr, (byte*)end, PROCESS_RELOC_ARM64ADRPLDR); while (*pt != 0) // The code ends with a UDF instruction (0) { if ((*pt & 0xbf000000) == 0x18000000) // LDR with pc-relative offset { // This could be a reference to the constant area or to the non-address area. // References to the constant area are followed by a nop if (constAdjustment != 0 && pt[1] == 0xd503201f) { unsigned reg = pt[0] & 0x1f; // The displacement is a signed multiple of 4 bytes but it will always be +ve ASSERT((pt[0] & 0x00800000) == 0); // The constant address is relative to the new location of the code. byte* constAddress = (byte*)(pt + ((pt[0] >> 5) & 0x7ffff)); byte* newAddress = (byte*)constAddress + constAdjustment; pt[0] = 0x90000000 + reg; // ADRP Xn, 0 pt[1] = 0xf9400000 + (reg << 5) + reg; // LDR Xn,[Xn+#0] ScanAddress::SetConstantValue((byte*)pt, (PolyObject*)newAddress, PROCESS_RELOC_ARM64ADRPLDR); } } else if ((*pt & 0x9f000000) == 0x90000000) // ADRP instruction { // These only occur after we have converted LDRs above ASSERT((pt[1] & 0xffc00000) == 0xf9400000); // The next should be the Load // For the moment assume that this does not require a move. ASSERT(addr == oldAddr && newConstAddr == oldConstAddr); process->RelocateOnly(addr, (byte*)pt, PROCESS_RELOC_ARM64ADRPLDR); } pt++; } #endif } +// This is a special hack for FFI callbacks in 32-in-64. This is called +// +#ifdef POLYML32IN64 +void Arm64Dependent::UpdateGlobalHeapReference(PolyObject* addr) +{ + arm64CodePointer pt = (arm64CodePointer)addr; + if (pt[0] == 0xD503201F && (pt[1] & 0xff000000) == 0x58000000) + { + // nop (special marker) followed by LDR Xn,pc-relative + uint32_t pcOffset = (pt[1] >> 5) & 0x3ffff; // This is a number of 32-bit words + PolyWord* gHeapAddr = ((PolyWord*)addr) + pcOffset + 1; // PolyWords are 32-bits + if (((PolyWord**)gHeapAddr)[0] != globalHeapBase) + ((PolyWord**)gMem.SpaceForAddress(gHeapAddr)->writeAble(gHeapAddr))[0] = globalHeapBase; + } +} +#endif + // As far as possible we want locking and unlocking an ML mutex to be fast so // we try to implement the code in the assembly code using appropriate // interlocked instructions. That does mean that if we need to lock and // unlock an ML mutex in this code we have to use the same, machine-dependent, // code to do it. These are defaults that are used where there is no // machine-specific code. #if defined(_MSC_VER) // This saves having to define it in the MASM assembly code. static uintptr_t Arm64AsmAtomicExchange(PolyObject* mutexp, uintptr_t value) { # if (SIZEOF_POLYWORD == 8) return InterlockedExchange64((LONG64*)mutexp, value); # else return InterlockedExchange((LONG*)mutexp, value); # endif } #else extern "C" { // This is only defined in the GAS assembly code uintptr_t Arm64AsmAtomicExchange(PolyObject*, uintptr_t); } #endif bool Arm64TaskData::AtomicallyReleaseMutex(PolyObject* mutexp) { uintptr_t oldValue = Arm64AsmAtomicExchange(mutexp, 0); return oldValue == 1; } bool Arm64TaskData::AddTimeProfileCount(SIGNALCONTEXT *context) { stackItem* sp = 0; POLYCODEPTR pc = 0; if (context != 0) { #if defined(HAVE_WINDOWS_H) sp = (stackItem*)context->Sp; pc = (POLYCODEPTR)context->Pc; #elif defined(HAVE_UCONTEXT_T) #ifdef HAVE_MCONTEXT_T_REGS // Linux sp = (stackItem*)context->uc_mcontext.sp; pc = (POLYCODEPTR)context->uc_mcontext.pc; #endif #endif } if (pc != 0) { // See if the PC we've got is an ML code address. MemSpace* space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(pc); return true; } } // See if the sp value is in the current stack. if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top) { // We may be in the assembly code. The top of the stack will be a return address. pc = sp[0].w().AsCodePtr(); MemSpace* space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(pc); return true; } } // See if the value of regSP is a valid stack pointer. // This works if we happen to be in an RTS call using a "Full" call. // It doesn't work if we've used a "Fast" call because that doesn't save the SP. sp = assemblyInterface.stackPtr; if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top) { // We may be in the run-time system. pc = sp[0].w().AsCodePtr(); MemSpace* space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { incrementCountAsynch(pc); return true; } } return false; } extern "C" { POLYEXTERNALSYMBOL void* PolyArm64GetThreadData(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyInterpretedEnterIntMode(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function); } // Return the address of assembly data for the current thread. This is normally in // X26 except if we are in a callback. void* PolyArm64GetThreadData() { // We should get the task data for the thread that is running this code. // If this thread has been created by the foreign code we will have to // create a new one here. TaskData* taskData = processes->GetTaskDataForThread(); if (taskData == 0) { try { taskData = processes->CreateNewTaskData(); } catch (std::bad_alloc&) { ::Exit("Unable to create thread data - insufficient memory"); } catch (MemoryException&) { ::Exit("Unable to create thread data - insufficient memory"); } } return &((Arm64TaskData*)taskData)->assemblyInterface; } // Do we require EnterInt instructions and if so for which architecture? // 0 = > None; 1 => X86_32, 2 => X86_64. 3 => X86_32_in_64. 4 => ARM_64. // ARM_64 in 32 is the same as ARM64. POLYUNSIGNED PolyInterpretedEnterIntMode() { return TAGGED(4).AsUnsigned(); } // End the first stage of bootstrap mode and run a new function. // The first stage is always interpreted. Once that is complete every function will have // at least an executable "enter-interpreter" stub so it can be called as machine code. POLYUNSIGNED PolyEndBootstrapMode(FirstArgument threadId, PolyWord function) { TaskData* taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle pushedFunction = taskData->saveVec.push(function); arm64Dependent.mustInterpret = false; ((Arm64TaskData*)taskData)->EndBootStrap(); taskData->InitStackFrame(taskData, pushedFunction); taskData->EnterPolyCode(); // Should never return. ASSERT(0); return TAGGED(0).AsUnsigned(); } // No machine-specific calls in the interpreter. struct _entrypts machineSpecificEPT[] = { { "PolyArm64GetThreadData", (polyRTSFunction)&PolyArm64GetThreadData }, { "PolyInterpretedEnterIntMode", (polyRTSFunction)&PolyInterpretedEnterIntMode }, { "PolyEndBootstrapMode", (polyRTSFunction)&PolyEndBootstrapMode }, { NULL, NULL} // End of list. }; diff --git a/libpolyml/machine_dep.h b/libpolyml/machine_dep.h index 42fd52a3..e811efdc 100644 --- a/libpolyml/machine_dep.h +++ b/libpolyml/machine_dep.h @@ -1,111 +1,115 @@ /* Title: machine_dep.h - exports signature for machine_dep.c Copyright (c) 2000 Cambridge University Technical Services Limited Further development Copyright 2020-21 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef _MACHINE_DEP_H #define _MACHINE_DEP_H class ScanAddress; class TaskData; class SaveVecEntry; typedef SaveVecEntry *Handle; class StackSpace; // Machine architecture values. typedef enum { MA_Interpreted = 0, MA_I386, MA_X86_64, MA_X86_64_32, MA_Arm64, MA_Arm64_32 } Architectures; // Machine-dependent module. class MachineDependent { public: virtual ~MachineDependent() {} // Keep the compiler happy // Create the machine-specific task data object. virtual TaskData *CreateTaskData(void) = 0; virtual unsigned InitialStackSize(void) { return 128; } // Initial size of a stack // Must be > 40 (i.e. 2*min_stack_check) + base area in each stack frame // Find the start of the constant section for a piece of code. // This is the default version which uses the whole of the last word as a // byte offset. // Normally the constant area is located within the code object and the offset is a small // negative value. When creating position-independent code we need to put the constants in a // separate area. We have to use a relative offset to the constants rather than an absolute // address to ensure that the code is position-independent. virtual void GetConstSegmentForCode(PolyObject *obj, POLYUNSIGNED obj_length, PolyWord*& cp, POLYUNSIGNED& count) const { PolyWord* last_word = obj->Offset(obj_length - 1); // Last word in the code POLYSIGNED offset = last_word->AsSigned(); cp = last_word + 1 + offset / sizeof(PolyWord); count = cp[-1].AsUnsigned(); } void GetConstSegmentForCode(PolyObject* obj, PolyWord*& cp, POLYUNSIGNED& count) const { GetConstSegmentForCode(obj, obj->Length(), cp, count); } PolyWord* ConstPtrForCode(PolyObject* obj) const { PolyWord* cp; POLYUNSIGNED count; GetConstSegmentForCode(obj, cp, count); return cp; } /* ScanConstantsWithinCode - update addresses within a code segment.*/ virtual void ScanConstantsWithinCode(PolyObject* addr, PolyObject* old, POLYUNSIGNED length, PolyWord* newConstAddr, PolyWord* oldConstAddr, POLYUNSIGNED numConsts, ScanAddress* process) {} void ScanConstantsWithinCode(PolyObject* addr, POLYUNSIGNED length, ScanAddress* process) { PolyWord* constAddr; POLYUNSIGNED count; GetConstSegmentForCode(addr, length, constAddr, count); ScanConstantsWithinCode(addr, addr, length, constAddr, constAddr, count, process); } void ScanConstantsWithinCode(PolyObject* addr, ScanAddress* process) { ScanConstantsWithinCode(addr, addr->Length(), process); } // Common case // Set the address of the constant area. The default is a relative byte offset. virtual void SetAddressOfConstants(PolyObject* objAddr, PolyObject* writable, POLYUNSIGNED length, PolyWord* constAddr) { POLYSIGNED offset = (POLYSIGNED)((constAddr - (PolyWord*)objAddr - length) * sizeof(PolyWord)); writable->Set(length - 1, PolyWord::FromSigned(offset)); } virtual void FlushInstructionCache(void *p, POLYUNSIGNED bytes) {} virtual Architectures MachineArchitecture(void) = 0; virtual void SetBootArchitecture(char arch, unsigned wordLength) {} + + // Update the global heap base in ARM64 32-in-64 FFI callbacks. + // This is a very special case. + virtual void UpdateGlobalHeapReference(PolyObject* addr) {} }; extern MachineDependent *machineDependent; extern struct _entrypts machineSpecificEPT[]; #endif /* _MACHINE_DEP_H */ diff --git a/libpolyml/savestate.cpp b/libpolyml/savestate.cpp index a560ccdd..833e0411 100644 --- a/libpolyml/savestate.cpp +++ b/libpolyml/savestate.cpp @@ -1,2262 +1,2264 @@ /* Title: savestate.cpp - Save and Load state Copyright (c) 2007, 2015, 2017-19, 2021 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_STDIO_H #include #endif #ifdef HAVE_WINDOWS_H #include // For MAX_PATH #endif #ifdef HAVE_SYS_PARAM_H #include // For MAX_PATH #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_TIME_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #if (defined(_WIN32)) #include #define ERRORNUMBER _doserrno #define NOMEMORY ERROR_NOT_ENOUGH_MEMORY #else typedef char TCHAR; #define _T(x) x #define _tfopen fopen #define _tcscpy strcpy #define _tcsdup strdup #define _tcslen strlen #define _fputtc fputc #define _fputts fputs #ifndef lstrcmpi #define lstrcmpi strcasecmp #endif #define ERRORNUMBER errno #define NOMEMORY ENOMEM #endif #include "globals.h" #include "savestate.h" #include "processes.h" #include "run_time.h" #include "polystring.h" #include "scanaddrs.h" #include "arb.h" #include "memmgr.h" #include "mpoly.h" // For exportTimeStamp #include "exporter.h" // For CopyScan #include "machine_dep.h" #include "osmem.h" #include "gc.h" // For FullGC. #include "timing.h" #include "rtsentry.h" #include "check_objects.h" #include "rtsentry.h" #include "../polyexports.h" // For InitHeaderFromExport #include "version.h" // For InitHeaderFromExport #ifdef _MSC_VER // Don't tell me about ISO C++ changes. #pragma warning(disable:4996) #endif extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolySaveState(FirstArgument threadId, PolyWord fileName, PolyWord depth); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadState(FirstArgument threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowHierarchy(FirstArgument threadId); POLYEXTERNALSYMBOL POLYUNSIGNED PolyRenameParent(FirstArgument threadId, PolyWord childName, PolyWord parentName); POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowParent(FirstArgument threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyStoreModule(FirstArgument threadId, PolyWord name, PolyWord contents); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadModule(FirstArgument threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadHierarchy(FirstArgument threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetModuleDirectory(FirstArgument threadId); } // Helper class to close files on exit. class AutoClose { public: AutoClose(FILE *f = 0): m_file(f) {} ~AutoClose() { if (m_file) ::fclose(m_file); } operator FILE*() { return m_file; } FILE* operator = (FILE* p) { return (m_file = p); } private: FILE *m_file; }; // This is probably generally useful so may be moved into // a general header file. template class AutoFree { public: AutoFree(BASE p = 0): m_value(p) {} ~AutoFree() { free(m_value); } // Automatic conversions to the base type. operator BASE() { return m_value; } BASE operator = (BASE p) { return (m_value = p); } private: BASE m_value; }; #ifdef HAVE__FTELLI64 // fseek and ftell are only 32-bits in Windows. #define off_t __int64 #define fseek _fseeki64 #define ftell _ftelli64 #endif /* * Structure definitions for the saved state files. */ #define SAVEDSTATESIGNATURE "POLYSAVE" #define SAVEDSTATEVERSION 2 // File header for a saved state file. This appears as the first entry // in the file. typedef struct _savedStateHeader { // These entries are primarily to check that we have a valid // saved state file before we try to interpret anything else. char headerSignature[8]; // Should contain SAVEDSTATESIGNATURE unsigned headerVersion; // Should contain SAVEDSTATEVERSION unsigned headerLength; // Number of bytes in the header unsigned segmentDescrLength; // Number of bytes in a descriptor // These entries contain the real data. off_t segmentDescr; // Position of segment descriptor table unsigned segmentDescrCount; // Number of segment descriptors in the table off_t stringTable; // Pointer to the string table (zero if none) size_t stringTableSize; // Size of string table unsigned parentNameEntry; // Position of parent name in string table (0 if top) time_t timeStamp; // The time stamp for this file. time_t parentTimeStamp; // The time stamp for the parent. void *originalBaseAddr; // Original base address (32-in-64 only) } SavedStateHeader; // Entry for segment table. This describes the segments on the disc that // need to be loaded into memory. typedef struct _savedStateSegmentDescr { off_t segmentData; // Position of the segment data size_t segmentSize; // Size of the segment data off_t relocations; // Position of the relocation table unsigned relocationCount; // Number of entries in relocation table unsigned relocationSize; // Size of a relocation entry unsigned segmentFlags; // Segment flags (see SSF_ values) unsigned segmentIndex; // The index of this segment or the segment it overwrites void *originalAddress; // The base address when the segment was written. } SavedStateSegmentDescr; #define SSF_WRITABLE 1 // The segment contains mutable data #define SSF_OVERWRITE 2 // The segment overwrites the data (mutable) in a parent. #define SSF_NOOVERWRITE 4 // The segment must not be further overwritten #define SSF_BYTES 8 // The segment contains only byte data #define SSF_CODE 16 // The segment contains only code typedef struct _relocationEntry { // Each entry indicates a location that has to be set to an address. // The location to be set is determined by adding "relocAddress" to the base address of // this segment (the one to which these relocations apply) and the value to store // by adding "targetAddress" to the base address of the segment indicated by "targetSegment". POLYUNSIGNED relocAddress; // The (byte) offset in this segment that we will set POLYUNSIGNED targetAddress; // The value to add to the base of the destination segment unsigned targetSegment; // The base segment. 0 is IO segment. ScanRelocationKind relKind; // The kind of relocation (processor dependent). } RelocationEntry; #define SAVE(x) taskData->saveVec.push(x) /* * Hierarchy table: contains information about last loaded or saved state. */ // Pointer to list of files loaded in last load. // There's no need for a lock since the update is only made when all // the ML threads have stopped. class HierarchyTable { public: HierarchyTable(const TCHAR *file, time_t time): fileName(_tcsdup(file)), timeStamp(time) { } AutoFree fileName; time_t timeStamp; }; HierarchyTable **hierarchyTable; static unsigned hierarchyDepth; static bool AddHierarchyEntry(const TCHAR *fileName, time_t timeStamp) { // Add an entry to the hierarchy table for this file. HierarchyTable *newEntry = new HierarchyTable(fileName, timeStamp); if (newEntry == 0) return false; HierarchyTable **newTable = (HierarchyTable **)realloc(hierarchyTable, sizeof(HierarchyTable *)*(hierarchyDepth+1)); if (newTable == 0) return false; hierarchyTable = newTable; hierarchyTable[hierarchyDepth++] = newEntry; return true; } // Test whether we're overwriting a parent of ourself. #if (defined(_WIN32) || defined(__CYGWIN__)) static bool sameFile(const TCHAR *x, const TCHAR *y) { HANDLE hXFile = INVALID_HANDLE_VALUE, hYFile = INVALID_HANDLE_VALUE; bool result = false; hXFile = CreateFile(x, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hXFile == INVALID_HANDLE_VALUE) goto closeAndExit; hYFile = CreateFile(y, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hYFile == INVALID_HANDLE_VALUE) goto closeAndExit; BY_HANDLE_FILE_INFORMATION fileInfoX, fileInfoY; if (! GetFileInformationByHandle(hXFile, &fileInfoX)) goto closeAndExit; if (! GetFileInformationByHandle(hYFile, &fileInfoY)) goto closeAndExit; result = fileInfoX.dwVolumeSerialNumber == fileInfoY.dwVolumeSerialNumber && fileInfoX.nFileIndexLow == fileInfoY.nFileIndexLow && fileInfoX.nFileIndexHigh == fileInfoY.nFileIndexHigh; closeAndExit: if (hXFile != INVALID_HANDLE_VALUE) CloseHandle(hXFile); if (hYFile != INVALID_HANDLE_VALUE) CloseHandle(hYFile); return result; } #else static bool sameFile(const char *x, const char *y) { struct stat xStat, yStat; // If either file does not exist that's fine. if (stat(x, &xStat) != 0 || stat(y, &yStat) != 0) return false; return (xStat.st_dev == yStat.st_dev && xStat.st_ino == yStat.st_ino); } #endif /* * Saving state. */ // This class is used to create the relocations. It uses Exporter // for this but this may perhaps be too heavyweight. class SaveStateExport: public Exporter, public ScanAddress { public: SaveStateExport(unsigned int h=0): Exporter(h), relocationCount(0) {} public: virtual void exportStore(void) {} // Not used. private: // ScanAddress overrides virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code, intptr_t displacement); // At the moment we should only get calls to ScanConstant. virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; } protected: void setRelocationAddress(void *p, POLYUNSIGNED *reloc); PolyWord createRelocation(PolyWord p, void *relocAddr); unsigned relocationCount; friend class SaveRequest; }; // Generate the address relative to the start of the segment. void SaveStateExport::setRelocationAddress(void *p, POLYUNSIGNED *reloc) { unsigned area = findArea(p); POLYUNSIGNED offset = (POLYUNSIGNED)((char*)p - (char*)memTable[area].mtOriginalAddr); *reloc = offset; } // Create a relocation entry for an address at a given location. PolyWord SaveStateExport::createRelocation(PolyWord p, void *relocAddr) { RelocationEntry reloc; // Set the offset within the section we're scanning. setRelocationAddress(relocAddr, &reloc.relocAddress); void *addr = p.AsAddress(); unsigned addrArea = findArea(addr); reloc.targetAddress = (POLYUNSIGNED)((char*)addr - (char*)memTable[addrArea].mtOriginalAddr); reloc.targetSegment = (unsigned)memTable[addrArea].mtIndex; reloc.relKind = PROCESS_RELOC_DIRECT; fwrite(&reloc, sizeof(reloc), 1, exportFile); relocationCount++; return p; // Don't change the contents } /* This is called for each constant within the code. Print a relocation entry for the word and return a value that means that the offset is saved in original word. */ void SaveStateExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code, intptr_t displacement) { PolyObject *p = GetConstantValue(addr, code, displacement); if (p == 0) return; void *a = p; unsigned aArea = findArea(a); // We don't need a relocation if this is relative to the current segment // since the relative address will already be right. if (code == PROCESS_RELOC_I386RELATIVE && aArea == findArea(addr)) return; // Set the value at the address to the offset relative to the symbol. RelocationEntry reloc; setRelocationAddress(addr, &reloc.relocAddress); reloc.targetAddress = (POLYUNSIGNED)((char*)a - (char*)memTable[aArea].mtOriginalAddr); reloc.targetSegment = (unsigned)memTable[aArea].mtIndex; reloc.relKind = code; fwrite(&reloc, sizeof(reloc), 1, exportFile); relocationCount++; } // Request to the main thread to save data. class SaveRequest: public MainThreadRequest { public: SaveRequest(const TCHAR *name, unsigned h): MainThreadRequest(MTP_SAVESTATE), fileName(name), newHierarchy(h), errorMessage(0), errCode(0) {} virtual void Perform(); const TCHAR *fileName; unsigned newHierarchy; const char *errorMessage; int errCode; }; // This class is used to update references to objects that have moved. If // we have copied an object into the area to be exported we may still have references // to it from the stack or from RTS data structures. We have to ensure that these // are updated. // This is very similar to ProcessFixupAddress in sharedata.cpp class SaveFixupAddress: public ScanAddress { protected: virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt); virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { *pt = ScanObjectAddress(*pt); return 0; } virtual PolyObject *ScanObjectAddress(PolyObject *base); public: void ScanCodeSpace(CodeSpace *space); }; POLYUNSIGNED SaveFixupAddress::ScanAddressAt(PolyWord *pt) { PolyWord val = *pt; if (val.IsDataPtr() && val != PolyWord::FromUnsigned(0)) *pt = ScanObjectAddress(val.AsObjPtr()); return 0; } // Returns the new address if the argument is the address of an object that // has moved, otherwise returns the original. PolyObject *SaveFixupAddress::ScanObjectAddress(PolyObject *obj) { if (obj->ContainsForwardingPtr()) // tombstone is a pointer to a moved object { #ifdef POLYML32IN64 MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1); PolyObject *newp; if (space->isCode) newp = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else newp = obj->GetForwardingPtr(); #else PolyObject *newp = obj->GetForwardingPtr(); #endif ASSERT (newp->ContainsNormalLengthWord()); return newp; } ASSERT (obj->ContainsNormalLengthWord()); // object is not moved return obj; } // Fix up addresses in the code area. Unlike ScanAddressesInRegion this updates // cells that have been moved. We need to do that because we may still have // return addresses into those cells and we don't move return addresses. We // do want the code to see updated constant addresses. void SaveFixupAddress::ScanCodeSpace(CodeSpace *space) { for (PolyWord *pt = space->bottom; pt < space->top; ) { pt++; PolyObject *obj = (PolyObject*)pt; #ifdef POLYML32IN64 PolyObject *dest = obj; while (dest->ContainsForwardingPtr()) { MemSpace *space = gMem.SpaceForObjectAddress(dest); if (space->isCode) dest = (PolyObject*)(globalCodeBase + ((dest->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else dest = dest->GetForwardingPtr(); } #else PolyObject *dest = obj->FollowForwardingChain(); #endif POLYUNSIGNED length = dest->Length(); if (length != 0) ScanAddressesInObject(obj, dest->LengthWord()); pt += length; } } // Called by the root thread to actually save the state and write the file. void SaveRequest::Perform() { if (debugOptions & DEBUG_SAVING) Log("SAVE: Beginning saving state.\n"); // Check that we aren't overwriting our own parent. for (unsigned q = 0; q < newHierarchy-1; q++) { if (sameFile(hierarchyTable[q]->fileName, fileName)) { errorMessage = "File being saved is used as a parent of this file"; errCode = 0; if (debugOptions & DEBUG_SAVING) Log("SAVE: File being saved is used as a parent of this file.\n"); return; } } SaveStateExport exports; // Open the file. This could quite reasonably fail if the path is wrong. exports.exportFile = _tfopen(fileName, _T("wb")); if (exports.exportFile == NULL) { errorMessage = "Cannot open save file"; errCode = ERRORNUMBER; if (debugOptions & DEBUG_SAVING) Log("SAVE: Cannot open save file.\n"); return; } // Scan over the permanent mutable area copying all reachable data that is // not in a lower hierarchy into new permanent segments. CopyScan copyScan(newHierarchy); copyScan.initialise(false); bool success = true; try { for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->isMutable && !space->noOverwrite && !space->byteOnly) { if (debugOptions & DEBUG_SAVING) Log("SAVE: Scanning permanent mutable area %p allocated at %p size %lu\n", space, space->bottom, space->spaceSize()); copyScan.ScanAddressesInRegion(space->bottom, space->top); } } } catch (MemoryException &) { success = false; if (debugOptions & DEBUG_SAVING) Log("SAVE: Scan of permanent mutable area raised memory exception.\n"); } // Copy the areas into the export object. Make sufficient space for // the largest possible number of entries. exports.memTable = new memoryTableEntry[gMem.eSpaces.size()+gMem.pSpaces.size()+1]; unsigned memTableCount = 0; // Permanent spaces at higher level. These have to have entries although // only the mutable entries will be written. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->hierarchy < newHierarchy) { memoryTableEntry *entry = &exports.memTable[memTableCount++]; entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom; entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord); entry->mtIndex = space->index; entry->mtFlags = 0; if (space->isMutable) { entry->mtFlags |= MTF_WRITEABLE; if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE; if (space->byteOnly) entry->mtFlags |= MTF_BYTES; } if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE; } } unsigned permanentEntries = memTableCount; // Remember where new entries start. // Newly created spaces. for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++) { memoryTableEntry *entry = &exports.memTable[memTableCount++]; PermanentMemSpace *space = *i; entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom; entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord); entry->mtIndex = space->index; entry->mtFlags = 0; if (space->isMutable) { entry->mtFlags |= MTF_WRITEABLE; if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE; if (space->byteOnly) entry->mtFlags |= MTF_BYTES; } if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE; } exports.memTableEntries = memTableCount; if (debugOptions & DEBUG_SAVING) Log("SAVE: Updating references to moved objects.\n"); // Update references to moved objects. SaveFixupAddress fixup; for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *space = *i; fixup.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr); fixup.ScanAddressesInRegion(space->upperAllocPtr, space->top); } for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) fixup.ScanCodeSpace(*i); GCModules(&fixup); // Restore the length words in the code areas. // Although we've updated any pointers to the start of the code we could have return addresses // pointing to the original code. GCModules updates the stack but doesn't update return addresses. for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { CodeSpace *space = *i; for (PolyWord *pt = space->bottom; pt < space->top; ) { pt++; PolyObject *obj = (PolyObject*)pt; if (obj->ContainsForwardingPtr()) { #ifdef POLYML32IN64 PolyObject *forwardedTo = obj; while (forwardedTo->ContainsForwardingPtr()) forwardedTo = (PolyObject*)(globalCodeBase + ((forwardedTo->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); #else PolyObject *forwardedTo = obj->FollowForwardingChain(); #endif POLYUNSIGNED lengthWord = forwardedTo->LengthWord(); space->writeAble(obj)->SetLengthWord(lengthWord); } pt += obj->Length(); } } // Update the global memory space table. Old segments at the same level // or lower are removed. The new segments become permanent. // Try to promote the spaces even if we've had a failure because export // spaces are deleted in ~CopyScan and we may have already copied // some objects there. if (debugOptions & DEBUG_SAVING) Log("SAVE: Promoting export spaces to permanent spaces.\n"); if (! gMem.PromoteExportSpaces(newHierarchy) || ! success) { errorMessage = "Out of Memory"; errCode = NOMEMORY; if (debugOptions & DEBUG_SAVING) Log("SAVE: Unable to promote export spaces.\n"); return; } // Remove any deeper entries from the hierarchy table. while (hierarchyDepth > newHierarchy-1) { hierarchyDepth--; delete(hierarchyTable[hierarchyDepth]); hierarchyTable[hierarchyDepth] = 0; } if (debugOptions & DEBUG_SAVING) Log("SAVE: Writing out data.\n"); // Write out the file header. SavedStateHeader saveHeader; memset(&saveHeader, 0, sizeof(saveHeader)); saveHeader.headerLength = sizeof(saveHeader); memcpy(saveHeader.headerSignature, SAVEDSTATESIGNATURE, sizeof(saveHeader.headerSignature)); saveHeader.headerVersion = SAVEDSTATEVERSION; saveHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr); if (newHierarchy == 1) saveHeader.parentTimeStamp = exportTimeStamp; else { saveHeader.parentTimeStamp = hierarchyTable[newHierarchy-2]->timeStamp; saveHeader.parentNameEntry = sizeof(TCHAR); // Always the first entry. } saveHeader.timeStamp = getBuildTime(); saveHeader.segmentDescrCount = exports.memTableEntries; // One segment for each space. #ifdef POLYML32IN64 saveHeader.originalBaseAddr = globalHeapBase; #endif // Write out the header. fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile); // We need a segment header for each permanent area whether it is // actually in this file or not. SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [exports.memTableEntries]; for (unsigned j = 0; j < exports.memTableEntries; j++) { memoryTableEntry *entry = &exports.memTable[j]; memset(&descrs[j], 0, sizeof(SavedStateSegmentDescr)); descrs[j].relocationSize = sizeof(RelocationEntry); descrs[j].segmentIndex = (unsigned)entry->mtIndex; descrs[j].segmentSize = entry->mtLength; // Set this even if we don't write it. descrs[j].originalAddress = entry->mtOriginalAddr; if (entry->mtFlags & MTF_WRITEABLE) { descrs[j].segmentFlags |= SSF_WRITABLE; if (entry->mtFlags & MTF_NO_OVERWRITE) descrs[j].segmentFlags |= SSF_NOOVERWRITE; if (j < permanentEntries && (entry->mtFlags & MTF_NO_OVERWRITE) == 0) descrs[j].segmentFlags |= SSF_OVERWRITE; if (entry->mtFlags & MTF_BYTES) descrs[j].segmentFlags |= SSF_BYTES; } if (entry->mtFlags & MTF_EXECUTABLE) descrs[j].segmentFlags |= SSF_CODE; } // Write out temporarily. Will be overwritten at the end. saveHeader.segmentDescr = ftell(exports.exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile); // Write out the relocations and the data. for (unsigned k = 1 /* Not IO area */; k < exports.memTableEntries; k++) { memoryTableEntry *entry = &exports.memTable[k]; // Write out the contents if this is new or if it is a normal, overwritable // mutable area. if (k >= permanentEntries || (entry->mtFlags & (MTF_WRITEABLE|MTF_NO_OVERWRITE)) == MTF_WRITEABLE) { descrs[k].relocations = ftell(exports.exportFile); // Have to write this out. exports.relocationCount = 0; // Create the relocation table. char *start = (char*)entry->mtOriginalAddr; char *end = start + entry->mtLength; for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); // Most relocations can be computed when the saved state is // loaded so we only write out the difficult ones: those that // occur within compiled code. // exports.relocateObject(obj); if (length != 0 && obj->IsCodeObject()) machineDependent->ScanConstantsWithinCode(obj, &exports); p += length; } descrs[k].relocationCount = exports.relocationCount; // Write out the data. descrs[k].segmentData = ftell(exports.exportFile); fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exports.exportFile); } } // If this is a child we need to write a string table containing the parent name. if (newHierarchy > 1) { saveHeader.stringTable = ftell(exports.exportFile); _fputtc(0, exports.exportFile); // First byte of string table is zero _fputts(hierarchyTable[newHierarchy-2]->fileName, exports.exportFile); _fputtc(0, exports.exportFile); // A terminating null. saveHeader.stringTableSize = (_tcslen(hierarchyTable[newHierarchy-2]->fileName) + 2)*sizeof(TCHAR); } // Rewrite the header and the segment tables now they're complete. fseek(exports.exportFile, 0, SEEK_SET); fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile); if (debugOptions & DEBUG_SAVING) Log("SAVE: Writing complete.\n"); // Add an entry to the hierarchy table for this file. (void)AddHierarchyEntry(fileName, saveHeader.timeStamp); delete[](descrs); CheckMemory(); } // Write a saved state file. POLYUNSIGNED PolySaveState(FirstArgument threadId, PolyWord fileName, PolyWord depth) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); try { TempString fileNameBuff(Poly_string_to_T_alloc(fileName)); // The value of depth is zero for top-level save so we need to add one for hierarchy. unsigned newHierarchy = get_C_unsigned(taskData, depth) + 1; if (newHierarchy > hierarchyDepth + 1) raise_fail(taskData, "Depth must be no more than the current hierarchy plus one"); // Request a full GC first. The main reason is to avoid running out of memory as a // result of repeated saves. Old export spaces are turned into local spaces and // the GC will delete them if they are completely empty FullGC(taskData); SaveRequest request(fileNameBuff, newHierarchy); processes->MakeRootRequest(taskData, &request); if (request.errorMessage) raise_syscall(taskData, request.errorMessage, request.errCode); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } /* * Loading saved state files. */ class StateLoader: public MainThreadRequest { public: StateLoader(bool isH, Handle files): MainThreadRequest(MTP_LOADSTATE), isHierarchy(isH), fileNameList(files), errorResult(0), errNumber(0) { } virtual void Perform(void); bool LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail); bool isHierarchy; Handle fileNameList; const char *errorResult; // The fileName here is the last file loaded. As well as using it // to load the name can also be printed out at the end to identify the // particular file in the hierarchy that failed. AutoFree fileName; int errNumber; }; // Called by the main thread once all the ML threads have stopped. void StateLoader::Perform(void) { // Copy the first file name into the buffer. if (isHierarchy) { if (ML_Cons_Cell::IsNull(fileNameList->Word())) { errorResult = "Hierarchy list is empty"; return; } ML_Cons_Cell *p = DEREFLISTHANDLE(fileNameList); fileName = Poly_string_to_T_alloc(p->h); if (fileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return; } (void)LoadFile(true, 0, p->t); } else { fileName = Poly_string_to_T_alloc(fileNameList->Word()); if (fileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return; } (void)LoadFile(true, 0, TAGGED(0)); } } class ClearVolatile: public ScanAddress { public: ClearVolatile() {} virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; } virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord); }; // Set the values of external references and clear the values of other weak byte refs. void ClearVolatile::ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord) { if (OBJ_IS_MUTABLE_OBJECT(lengthWord) && OBJ_IS_NO_OVERWRITE(lengthWord)) { if (OBJ_IS_BYTE_OBJECT(lengthWord)) { if (OBJ_IS_WEAKREF_OBJECT(lengthWord)) { POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord); if (len >= sizeof(uintptr_t) / sizeof(PolyWord)) *((uintptr_t*)base) = 0; setEntryPoint(base); } } else { // Clear volatile refs POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord); for (POLYUNSIGNED i = 0; i < len; i++) base->Set(i, TAGGED(0)); } } } // This is copied from the B-tree in MemMgr. It probably should be // merged but will do for the moment. It's intended to reduce the // cost of finding the segment for relocation. class SpaceBTree { public: SpaceBTree(bool is, unsigned i = 0) : isLeaf(is), index(i) { } virtual ~SpaceBTree() {} bool isLeaf; unsigned index; // The index if this is a leaf }; // A non-leaf node in the B-tree class SpaceBTreeTree : public SpaceBTree { public: SpaceBTreeTree(); virtual ~SpaceBTreeTree(); SpaceBTree *tree[256]; }; SpaceBTreeTree::SpaceBTreeTree() : SpaceBTree(false) { for (unsigned i = 0; i < 256; i++) tree[i] = 0; } SpaceBTreeTree::~SpaceBTreeTree() { for (unsigned i = 0; i < 256; i++) delete(tree[i]); } // This class is used to relocate addresses in areas that have been loaded. class LoadRelocate: public ScanAddress { public: LoadRelocate(bool pcc = false): processCodeConstants(pcc), originalBaseAddr(0), descrs(0), targetAddresses(0), nDescrs(0), spaceTree(0) {} ~LoadRelocate(); void RelocateObject(PolyObject *p); virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(0); return base; } // Not used virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement); void RelocateAddressAt(PolyWord *pt); PolyObject *RelocateAddress(PolyObject *obj); void AddTreeRange(SpaceBTree **t, unsigned index, uintptr_t startS, uintptr_t endS); bool processCodeConstants; PolyWord *originalBaseAddr; SavedStateSegmentDescr *descrs; PolyWord **targetAddresses; unsigned nDescrs; SpaceBTree *spaceTree; intptr_t relativeOffset; }; LoadRelocate::~LoadRelocate() { if (descrs) delete[](descrs); if (targetAddresses) delete[](targetAddresses); delete(spaceTree); } // Add an entry to the space B-tree. void LoadRelocate::AddTreeRange(SpaceBTree **tt, unsigned index, uintptr_t startS, uintptr_t endS) { if (*tt == 0) *tt = new SpaceBTreeTree; ASSERT(!(*tt)->isLeaf); SpaceBTreeTree *t = (SpaceBTreeTree*)*tt; const unsigned shift = (sizeof(void*) - 1) * 8; // Takes the high-order byte uintptr_t r = startS >> shift; ASSERT(r < 256); const uintptr_t s = endS == 0 ? 256 : endS >> shift; ASSERT(s >= r && s <= 256); if (r == s) // Wholly within this entry AddTreeRange(&(t->tree[r]), index, startS << 8, endS << 8); else { // Deal with any remainder at the start. if ((r << shift) != startS) { AddTreeRange(&(t->tree[r]), index, startS << 8, 0 /*End of range*/); r++; } // Whole entries. while (r < s) { ASSERT(t->tree[r] == 0); t->tree[r] = new SpaceBTree(true, index); r++; } // Remainder at the end. if ((s << shift) != endS) AddTreeRange(&(t->tree[r]), index, 0, endS << 8); } } // Update the addresses in a group of words. void LoadRelocate::RelocateAddressAt(PolyWord *pt) { PolyWord val = *pt; if (! val.IsTagged()) *gMem.SpaceForAddress(pt)->writeAble(pt) = RelocateAddress(val.AsObjPtr(originalBaseAddr)); } PolyObject *LoadRelocate::RelocateAddress(PolyObject *obj) { // Which segment is this address in? // N.B. As with SpaceForAddress we need to subtract 1 to point to the length word. uintptr_t t = (uintptr_t)((PolyWord*)obj - 1); SpaceBTree *tr = spaceTree; // Each level of the tree is either a leaf or a vector of trees. unsigned j = sizeof(void *) * 8; for (;;) { if (tr == 0) break; if (tr->isLeaf) { // It's in this segment: relocate it to the current position. unsigned i = tr->index; SavedStateSegmentDescr *descr = &descrs[i]; PolyWord *newAddress = targetAddresses[descr->segmentIndex]; ASSERT((char*)obj > descr->originalAddress && (char*)obj <= (char*)descr->originalAddress + descr->segmentSize); ASSERT(newAddress != 0); byte *setAddress = (byte*)newAddress + ((char*)obj - (char*)descr->originalAddress); return (PolyObject*)setAddress; } j -= 8; tr = ((SpaceBTreeTree*)tr)->tree[(t >> j) & 0xff]; } // This should never happen. ASSERT(0); return 0; } // This is based on Exporter::relocateObject but does the reverse. // It attempts to adjust all the addresses in the object when it has // been read in. void LoadRelocate::RelocateObject(PolyObject *p) { if (p->IsByteObject()) { } else if (p->IsCodeObject()) { POLYUNSIGNED constCount; PolyWord *cp; ASSERT(! p->IsMutable() ); machineDependent->GetConstSegmentForCode(p, cp, constCount); /* Now the constant area. */ for (POLYUNSIGNED i = 0; i < constCount; i++) RelocateAddressAt(&(cp[i])); // Saved states and modules have relocation entries for constants in the code. // We can't use them when loading object files in 32-in-64 so have to process the // constants here. if (processCodeConstants) machineDependent->ScanConstantsWithinCode(p, this); + // On 32-in-64 ARM we may have to update the global heap base in an FFI callback. + machineDependent->UpdateGlobalHeapReference(p); } else if (p->IsClosureObject()) { // The first word is the address of the code. POLYUNSIGNED length = p->Length(); *(PolyObject**)p = RelocateAddress(*(PolyObject**)p); for (POLYUNSIGNED i = sizeof(PolyObject*)/sizeof(PolyWord); i < length; i++) RelocateAddressAt(p->Offset(i)); } else /* Ordinary objects, essentially tuples. */ { POLYUNSIGNED length = p->Length(); for (POLYUNSIGNED i = 0; i < length; i++) RelocateAddressAt(p->Offset(i)); } } // Update addresses as constants within the code. void LoadRelocate::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code, intptr_t displacement) { PolyObject *p = GetConstantValue(addressOfConstant, code, displacement); if (p != 0) { // Relative addresses are computed by adding the CURRENT address. // We have to convert them into addresses in original space before we // can relocate them. if (code == PROCESS_RELOC_I386RELATIVE) p = (PolyObject*)((PolyWord*)p + relativeOffset); PolyObject *newValue = RelocateAddress(p); SetConstantValue(addressOfConstant, newValue, code); } } // Work around bug in Mac OS when reading into MAP_JIT memory. static size_t readData(void *ptr, size_t size, FILE *stream) { #ifndef MACOSX return fread(ptr, size, 1, stream); #else char buff[1024]; for (size_t s = 0; s < size; ) { size_t unit = sizeof(buff); if (size - s < unit) unit = size-s; if (fread(buff, unit, 1, stream) != 1) return 0; memcpy((char*)ptr+s, buff, unit); s += unit; } return 1; // Succeeded #endif } // Load a saved state file. Calls itself to handle parent files. bool StateLoader::LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail) { LoadRelocate relocate; AutoFree thisFile(_tcsdup(fileName)); AutoClose loadFile(_tfopen(fileName, _T("rb"))); if ((FILE*)loadFile == NULL) { errorResult = "Cannot open load file"; errNumber = ERRORNUMBER; return false; } SavedStateHeader header; // Read the header and check the signature. if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1) { errorResult = "Unable to load header"; return false; } if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0) { errorResult = "File is not a saved state"; return false; } if (header.headerVersion != SAVEDSTATEVERSION || header.headerLength != sizeof(SavedStateHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { errorResult = "Unsupported version of saved state file"; return false; } // Check that we have the required stamp before loading any children. // If a parent has been overwritten we could get a loop. if (! isInitial && header.timeStamp != requiredStamp) { // Time-stamps don't match. errorResult = "The parent for this saved state does not match or has been changed"; return false; } // Have verified that this is a reasonable saved state file. If it isn't a // top-level file we have to load the parents first. if (header.parentNameEntry != 0) { if (isHierarchy) { // Take the file name from the list if (ML_Cons_Cell::IsNull(tail)) { errorResult = "Missing parent name in argument list"; return false; } ML_Cons_Cell *p = (ML_Cons_Cell *)tail.AsObjPtr(); fileName = Poly_string_to_T_alloc(p->h); if (fileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return false; } if (! LoadFile(false, header.parentTimeStamp, p->t)) return false; } else { size_t toRead = header.stringTableSize-header.parentNameEntry; size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR)); // Always allow space for null terminator size_t roundedBytes = (elems + 1) * sizeof(TCHAR); TCHAR *newFileName = (TCHAR *)realloc(fileName, roundedBytes); if (newFileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return false; } fileName = newFileName; if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ || fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 || fread(fileName, 1, toRead, loadFile) != toRead) { errorResult = "Unable to read parent file name"; return false; } fileName[elems] = 0; // Should already be null-terminated, but just in case. if (! LoadFile(false, header.parentTimeStamp, TAGGED(0))) return false; } ASSERT(hierarchyDepth > 0 && hierarchyTable[hierarchyDepth-1] != 0); } else // Top-level file { if (isHierarchy && ! ML_Cons_Cell::IsNull(tail)) { // There should be no further file names if this is really the top. errorResult = "Too many file names in the list"; return false; } if (header.parentTimeStamp != exportTimeStamp) { // Time-stamp does not match executable. errorResult = "Saved state was exported from a different executable or the executable has changed"; return false; } // Any existing spaces at this level or greater must be turned // into local spaces. We may have references from the stack to objects that // have previously been imported but otherwise these spaces are no longer // needed. gMem.DemoteImportSpaces(); // Clean out the hierarchy table. for (unsigned h = 0; h < hierarchyDepth; h++) { delete(hierarchyTable[h]); hierarchyTable[h] = 0; } hierarchyDepth = 0; } // Now have a valid, matching saved state. // Load the segment descriptors. relocate.nDescrs = header.segmentDescrCount; relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs]; relocate.originalBaseAddr = (PolyWord*)header.originalBaseAddr; if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 || fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs) { errorResult = "Unable to read segment descriptors"; return false; } { unsigned maxIndex = 0; for (unsigned i = 0; i < relocate.nDescrs; i++) { if (relocate.descrs[i].segmentIndex > maxIndex) maxIndex = relocate.descrs[i].segmentIndex; relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress, (uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize-1)); } relocate.targetAddresses = new PolyWord*[maxIndex+1]; for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0; } // Read in and create the new segments first. If we have problems, // in particular if we have run out of memory, then it's easier to recover. for (unsigned i = 0; i < relocate.nDescrs; i++) { SavedStateSegmentDescr *descr = &relocate.descrs[i]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); if (space != NULL) relocate.targetAddresses[descr->segmentIndex] = space->bottom; if (descr->segmentData == 0) { // No data - just an entry in the index. if (space == NULL/* || descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/) { errorResult = "Mismatch for existing memory space"; return false; } } else if ((descr->segmentFlags & SSF_OVERWRITE) == 0) { // New segment. if (space != NULL) { errorResult = "Segment already exists"; return false; } // Allocate memory for the new segment. unsigned mFlags = (descr->segmentFlags & SSF_WRITABLE ? MTF_WRITEABLE : 0) | (descr->segmentFlags & SSF_NOOVERWRITE ? MTF_NO_OVERWRITE : 0) | (descr->segmentFlags & SSF_BYTES ? MTF_BYTES : 0) | (descr->segmentFlags & SSF_CODE ? MTF_EXECUTABLE : 0); PermanentMemSpace *newSpace = gMem.AllocateNewPermanentSpace(descr->segmentSize, mFlags, descr->segmentIndex, hierarchyDepth + 1); if (newSpace == 0) { errorResult = "Unable to allocate memory"; return false; } PolyWord *mem = newSpace->bottom; PolyWord* writeAble = newSpace->writeAble(mem); if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0) { errorResult = "Unable to seek segment"; return false; } if (readData(writeAble, descr->segmentSize, loadFile) != 1) { errorResult = "Unable to read segment"; return false; } // Fill unused space to the top of the area. gMem.FillUnusedSpace(writeAble +descr->segmentSize/sizeof(PolyWord), newSpace->spaceSize() - descr->segmentSize/sizeof(PolyWord)); // Leave it writable until we've done the relocations. relocate.targetAddresses[descr->segmentIndex] = mem; if (newSpace->noOverwrite) { ClearVolatile cwbr; cwbr.ScanAddressesInRegion(newSpace->bottom, newSpace->topPointer); } } } // Now read in the mutable overwrites and relocate. for (unsigned j = 0; j < relocate.nDescrs; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); ASSERT(space != NULL); // We should have created it. if (descr->segmentFlags & SSF_OVERWRITE) { if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 || fread(space->bottom, descr->segmentSize, 1, loadFile) != 1) { errorResult = "Unable to read segment"; return false; } } // Relocation. if (descr->segmentData != 0) { // Adjust the addresses in the loaded segment. for (PolyWord *p = space->bottom; p < space->top; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); relocate.RelocateObject(obj); p += length; } } // Process explicit relocations. // If we get errors just skip the error and continue rather than leave // everything in an unstable state. if (descr->relocations) { if (fseek(loadFile, descr->relocations, SEEK_SET) != 0) { errorResult = "Unable to read relocation segment"; return false; } for (unsigned k = 0; k < descr->relocationCount; k++) { RelocationEntry reloc; if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1) { errorResult = "Unable to read relocation segment"; return false; } MemSpace *toSpace = gMem.SpaceForIndex(reloc.targetSegment); if (toSpace == NULL) { errorResult = "Unknown space reference in relocation"; continue; } byte *setAddress = (byte*)space->bottom + reloc.relocAddress; byte *targetAddress = (byte*)toSpace->bottom + reloc.targetAddress; if (setAddress >= (byte*)space->top || targetAddress >= (byte*)toSpace->top) { errorResult = "Bad relocation"; continue; } ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind); } } } // Set the final permissions. for (unsigned j = 0; j < relocate.nDescrs; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; if (descr->segmentData != 0) { PermanentMemSpace* space = gMem.SpaceForIndex(descr->segmentIndex); gMem.CompletePermanentSpaceAllocation(space); } } // Add an entry to the hierarchy table for this file. if (! AddHierarchyEntry(thisFile, header.timeStamp)) return false; return true; // Succeeded } static void LoadState(TaskData *taskData, bool isHierarchy, Handle hFileList) // Load a saved state or a hierarchy. // hFileList is a list if this is a hierarchy and a single name if it is not. { StateLoader loader(isHierarchy, hFileList); // Request the main thread to do the load. This may set the error string if it failed. processes->MakeRootRequest(taskData, &loader); if (loader.errorResult != 0) { if (loader.errNumber == 0) raise_fail(taskData, loader.errorResult); else { AutoFree buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1)); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "%s: %S", loader.errorResult, (TCHAR *)loader.fileName); #else sprintf(buff, "%s: %s", loader.errorResult, (TCHAR *)loader.fileName); #endif raise_syscall(taskData, buff, loader.errNumber); } } } // Load a saved state file and any ancestors. POLYUNSIGNED PolyLoadState(FirstArgument threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); try { LoadState(taskData, false, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Load hierarchy. This provides a complete list of children and parents. POLYUNSIGNED PolyLoadHierarchy(FirstArgument threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); try { LoadState(taskData, true, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } /* * Additional functions to provide information or change saved-state files. */ // These functions do not affect the global state so can be executed by // the ML threads directly. static Handle ShowHierarchy(TaskData *taskData) // Return the list of files in the hierarchy. { Handle saved = taskData->saveVec.mark(); Handle list = SAVE(ListNull); // Process this in reverse order. for (unsigned i = hierarchyDepth; i > 0; i--) { Handle value = SAVE(C_string_to_Poly(taskData, hierarchyTable[i-1]->fileName)); Handle next = alloc_and_save(taskData, sizeof(ML_Cons_Cell)/sizeof(PolyWord)); DEREFLISTHANDLE(next)->h = value->Word(); DEREFLISTHANDLE(next)->t = list->Word(); taskData->saveVec.reset(saved); list = SAVE(next->Word()); } return list; } // Show the hierarchy. POLYUNSIGNED PolyShowHierarchy(FirstArgument threadId) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle result = 0; try { result = ShowHierarchy(taskData); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } static void RenameParent(TaskData *taskData, PolyWord childName, PolyWord parentName) // Change the name of the immediate parent stored in a child { // The name of the file to modify. AutoFree fileNameBuff(Poly_string_to_T_alloc(childName)); if (fileNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); // The new parent name to insert. AutoFree parentNameBuff(Poly_string_to_T_alloc(parentName)); if (parentNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); AutoClose loadFile(_tfopen(fileNameBuff, _T("r+b"))); // Open for reading and writing if ((FILE*)loadFile == NULL) { AutoFree buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1)); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff); #else sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff); #endif raise_syscall(taskData, buff, ERRORNUMBER); } SavedStateHeader header; // Read the header and check the signature. if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1) raise_fail(taskData, "Unable to load header"); if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0) raise_fail(taskData, "File is not a saved state"); if (header.headerVersion != SAVEDSTATEVERSION || header.headerLength != sizeof(SavedStateHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { raise_fail(taskData, "Unsupported version of saved state file"); } // Does this actually have a parent? if (header.parentNameEntry == 0) raise_fail(taskData, "File does not have a parent"); // At the moment the only entry in the string table is the parent // name so we can simply write a new one on the end of the file. // This makes the file grow slightly each time but it shouldn't be // significant. fseek(loadFile, 0, SEEK_END); header.stringTable = ftell(loadFile); // Remember where this is _fputtc(0, loadFile); // First byte of string table is zero _fputts(parentNameBuff, loadFile); _fputtc(0, loadFile); // A terminating null. header.stringTableSize = (_tcslen(parentNameBuff) + 2)*sizeof(TCHAR); // Now rewind and write the header with the revised string table. fseek(loadFile, 0, SEEK_SET); fwrite(&header, sizeof(header), 1, loadFile); } POLYUNSIGNED PolyRenameParent(FirstArgument threadId, PolyWord childName, PolyWord parentName) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); try { RenameParent(taskData, childName, parentName); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } static Handle ShowParent(TaskData *taskData, Handle hFileName) // Return the name of the immediate parent stored in a child { AutoFree fileNameBuff(Poly_string_to_T_alloc(hFileName->Word())); if (fileNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); AutoClose loadFile(_tfopen(fileNameBuff, _T("rb"))); if ((FILE*)loadFile == NULL) { AutoFree buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1)); if (buff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff); #else sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff); #endif raise_syscall(taskData, buff, ERRORNUMBER); } SavedStateHeader header; // Read the header and check the signature. if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1) raise_fail(taskData, "Unable to load header"); if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0) raise_fail(taskData, "File is not a saved state"); if (header.headerVersion != SAVEDSTATEVERSION || header.headerLength != sizeof(SavedStateHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { raise_fail(taskData, "Unsupported version of saved state file"); } // Does this have a parent? if (header.parentNameEntry != 0) { size_t toRead = header.stringTableSize-header.parentNameEntry; size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR)); // Always allow space for null terminator size_t roundedBytes = (elems + 1) * sizeof(TCHAR); AutoFree parentFileName((TCHAR *)malloc(roundedBytes)); if (parentFileName == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ || fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 || fread(parentFileName, 1, toRead, loadFile) != toRead) { raise_fail(taskData, "Unable to read parent file name"); } parentFileName[elems] = 0; // Should already be null-terminated, but just in case. // Convert the name into a Poly string and then build a "Some" value. // It's possible, although silly, to have the empty string as a parent name. Handle resVal = SAVE(C_string_to_Poly(taskData, parentFileName)); Handle result = alloc_and_save(taskData, 1); DEREFHANDLE(result)->Set(0, resVal->Word()); return result; } else return SAVE(NONE_VALUE); } // Return the name of the immediate parent stored in a child POLYUNSIGNED PolyShowParent(FirstArgument threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); Handle result = 0; try { result = ShowParent(taskData, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } // Module system #define MODULESIGNATURE "POLYMODU" #define MODULEVERSION 2 typedef struct _moduleHeader { // These entries are primarily to check that we have a valid // saved state file before we try to interpret anything else. char headerSignature[8]; // Should contain MODULESIGNATURE unsigned headerVersion; // Should contain MODULEVERSION unsigned headerLength; // Number of bytes in the header unsigned segmentDescrLength; // Number of bytes in a descriptor // These entries contain the real data. off_t segmentDescr; // Position of segment descriptor table unsigned segmentDescrCount; // Number of segment descriptors in the table time_t timeStamp; // The time stamp for this file. time_t executableTimeStamp; // The time stamp for the parent executable. // Root uintptr_t rootSegment; POLYUNSIGNED rootOffset; } ModuleHeader; // Store a module class ModuleStorer: public MainThreadRequest { public: ModuleStorer(const TCHAR *file, Handle r): MainThreadRequest(MTP_STOREMODULE), fileName(file), root(r), errorMessage(0), errCode(0) {} virtual void Perform(); const TCHAR *fileName; Handle root; const char *errorMessage; int errCode; }; class ModuleExport: public SaveStateExport { public: ModuleExport(): SaveStateExport(1/* Everything EXCEPT the executable. */) {} virtual void exportStore(void); // Write the data out. }; void ModuleStorer::Perform() { ModuleExport exporter; #if (defined(_WIN32) && defined(UNICODE)) exporter.exportFile = _wfopen(fileName, L"wb"); #else exporter.exportFile = fopen(fileName, "wb"); #endif if (exporter.exportFile == NULL) { errorMessage = "Cannot open export file"; errCode = ERRORNUMBER; return; } // RunExport copies everything reachable from the root, except data from // the executable because we've set the hierarchy to 1, using CopyScan. // It builds the tables in the export data structure then calls exportStore // to actually write the data. if (! root->Word().IsDataPtr()) { // If we have a completely empty module the list may be null. // This needs to be dealt with at a higher level. errorMessage = "Module root is not an address"; return; } exporter.RunExport(root->WordP()); errorMessage = exporter.errorMessage; // This will be null unless there's been an error. } void ModuleExport::exportStore(void) { // What we need to do here is implement the export in a similar way to e.g. PECOFFExport::exportStore // This is copied from SaveRequest::Perform and should be common code. ModuleHeader modHeader; memset(&modHeader, 0, sizeof(modHeader)); modHeader.headerLength = sizeof(modHeader); memcpy(modHeader.headerSignature, MODULESIGNATURE, sizeof(modHeader.headerSignature)); modHeader.headerVersion = MODULEVERSION; modHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr); modHeader.executableTimeStamp = exportTimeStamp; { unsigned rootArea = findArea(this->rootFunction); struct _memTableEntry *mt = &memTable[rootArea]; modHeader.rootSegment = mt->mtIndex; modHeader.rootOffset = (POLYUNSIGNED)((char*)this->rootFunction - (char*)mt->mtOriginalAddr); } modHeader.timeStamp = getBuildTime(); modHeader.segmentDescrCount = this->memTableEntries; // One segment for each space. // Write out the header. fwrite(&modHeader, sizeof(modHeader), 1, this->exportFile); SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [this->memTableEntries]; // We need an entry in the descriptor tables for each segment in the executable because // we may have relocations that refer to addresses in it. for (unsigned j = 0; j < this->memTableEntries; j++) { SavedStateSegmentDescr *thisDescr = &descrs[j]; memoryTableEntry *entry = &this->memTable[j]; memset(thisDescr, 0, sizeof(SavedStateSegmentDescr)); thisDescr->relocationSize = sizeof(RelocationEntry); thisDescr->segmentIndex = (unsigned)entry->mtIndex; thisDescr->segmentSize = entry->mtLength; // Set this even if we don't write it. thisDescr->originalAddress = entry->mtOriginalAddr; if (entry->mtFlags & MTF_WRITEABLE) { thisDescr->segmentFlags |= SSF_WRITABLE; if (entry->mtFlags & MTF_NO_OVERWRITE) thisDescr->segmentFlags |= SSF_NOOVERWRITE; if ((entry->mtFlags & MTF_NO_OVERWRITE) == 0) thisDescr->segmentFlags |= SSF_OVERWRITE; if (entry->mtFlags & MTF_BYTES) thisDescr->segmentFlags |= SSF_BYTES; } if (entry->mtFlags & MTF_EXECUTABLE) thisDescr->segmentFlags |= SSF_CODE; } // Write out temporarily. Will be overwritten at the end. modHeader.segmentDescr = ftell(this->exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, this->exportFile); // Write out the relocations and the data. for (unsigned k = 0; k < this->memTableEntries; k++) { SavedStateSegmentDescr *thisDescr = &descrs[k]; memoryTableEntry *entry = &this->memTable[k]; if (k >= newAreas) // Not permanent areas { thisDescr->relocations = ftell(this->exportFile); // Have to write this out. this->relocationCount = 0; // Create the relocation table. char *start = (char*)entry->mtOriginalAddr; char *end = start + entry->mtLength; for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); // For saved states we don't include explicit relocations except // in code but it's easier if we do for modules. if (length != 0 && obj->IsCodeObject()) machineDependent->ScanConstantsWithinCode(obj, this); relocateObject(obj); p += length; } thisDescr->relocationCount = this->relocationCount; // Write out the data. thisDescr->segmentData = ftell(exportFile); fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exportFile); } } // Rewrite the header and the segment tables now they're complete. fseek(exportFile, 0, SEEK_SET); fwrite(&modHeader, sizeof(modHeader), 1, exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, exportFile); delete[](descrs); fclose(exportFile); exportFile = NULL; } // Store a module POLYUNSIGNED PolyStoreModule(FirstArgument threadId, PolyWord name, PolyWord contents) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedContents = taskData->saveVec.push(contents); try { TempString fileName(name); ModuleStorer storer(fileName, pushedContents); processes->MakeRootRequest(taskData, &storer); if (storer.errorMessage) raise_syscall(taskData, storer.errorMessage, storer.errCode); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Load a module. class ModuleLoader: public MainThreadRequest { public: ModuleLoader(TaskData *taskData, const TCHAR *file): MainThreadRequest(MTP_LOADMODULE), callerTaskData(taskData), fileName(file), errorResult(NULL), errNumber(0), rootHandle(0) {} virtual void Perform(); TaskData *callerTaskData; const TCHAR *fileName; const char *errorResult; int errNumber; Handle rootHandle; }; void ModuleLoader::Perform() { AutoClose loadFile(_tfopen(fileName, _T("rb"))); if ((FILE*)loadFile == NULL) { errorResult = "Cannot open load file"; errNumber = ERRORNUMBER; return; } ModuleHeader header; // Read the header and check the signature. if (fread(&header, sizeof(ModuleHeader), 1, loadFile) != 1) { errorResult = "Unable to load header"; return; } if (strncmp(header.headerSignature, MODULESIGNATURE, sizeof(header.headerSignature)) != 0) { errorResult = "File is not a Poly/ML module"; return; } if (header.headerVersion != MODULEVERSION || header.headerLength != sizeof(ModuleHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { errorResult = "Unsupported version of module file"; return; } if (header.executableTimeStamp != exportTimeStamp) { // Time-stamp does not match executable. errorResult = "Module was exported from a different executable or the executable has changed"; return; } LoadRelocate relocate; relocate.nDescrs = header.segmentDescrCount; relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs]; if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 || fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs) { errorResult = "Unable to read segment descriptors"; return; } { unsigned maxIndex = 0; for (unsigned i = 0; i < relocate.nDescrs; i++) if (relocate.descrs[i].segmentIndex > maxIndex) maxIndex = relocate.descrs[i].segmentIndex; relocate.targetAddresses = new PolyWord*[maxIndex+1]; for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0; } // Read in and create the new segments first. If we have problems, // in particular if we have run out of memory, then it's easier to recover. for (unsigned i = 0; i < relocate.nDescrs; i++) { SavedStateSegmentDescr *descr = &relocate.descrs[i]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); if (descr->segmentData == 0) { // No data - just an entry in the index. if (space == NULL/* || descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/) { errorResult = "Mismatch for existing memory space"; return; } else relocate.targetAddresses[descr->segmentIndex] = space->bottom; } else { // New segment. if (space != NULL) { errorResult = "Segment already exists"; return; } // Allocate memory for the new segment. size_t actualSize = descr->segmentSize; MemSpace *space; if (descr->segmentFlags & SSF_CODE) { CodeSpace *cSpace = gMem.NewCodeSpace(actualSize); if (cSpace == 0) { errorResult = "Unable to allocate memory"; return; } space = cSpace; cSpace->firstFree = (PolyWord*)((byte*)space->bottom + descr->segmentSize); if (cSpace->firstFree != cSpace->top) gMem.FillUnusedSpace(cSpace->firstFree, cSpace->top - cSpace->firstFree); } else { LocalMemSpace *lSpace = gMem.NewLocalSpace(actualSize, descr->segmentFlags & SSF_WRITABLE); if (lSpace == 0) { errorResult = "Unable to allocate memory"; return; } space = lSpace; lSpace->lowerAllocPtr = (PolyWord*)((byte*)lSpace->bottom + descr->segmentSize); } if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0) { errorResult = "Unable to seek to segment"; return; } if (readData(space->bottom, descr->segmentSize, loadFile) != 1) { errorResult = "Unable to read segment"; return; } relocate.targetAddresses[descr->segmentIndex] = space->bottom; if (space->isMutable && (descr->segmentFlags & SSF_BYTES) != 0) { ClearVolatile cwbr; cwbr.ScanAddressesInRegion(space->bottom, (PolyWord*)((byte*)space->bottom + descr->segmentSize)); } } } // Now deal with relocation. for (unsigned j = 0; j < relocate.nDescrs; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; PolyWord *baseAddr = relocate.targetAddresses[descr->segmentIndex]; ASSERT(baseAddr != NULL); // We should have created it. // Process explicit relocations. // If we get errors just skip the error and continue rather than leave // everything in an unstable state. if (descr->relocations) { if (fseek(loadFile, descr->relocations, SEEK_SET) != 0) errorResult = "Unable to read relocation segment"; for (unsigned k = 0; k < descr->relocationCount; k++) { RelocationEntry reloc; if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1) errorResult = "Unable to read relocation segment"; byte *setAddress = (byte*)baseAddr + reloc.relocAddress; byte *targetAddress = (byte*)relocate.targetAddresses[reloc.targetSegment] + reloc.targetAddress; ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind); } } } // Get the root address. Push this to the caller's save vec. If we put the // newly created areas into local memory we could get a GC as soon as we // complete this root request. { PolyWord *baseAddr = relocate.targetAddresses[header.rootSegment]; rootHandle = callerTaskData->saveVec.push((PolyObject*)((byte*)baseAddr + header.rootOffset)); } } static Handle LoadModule(TaskData *taskData, Handle args) { TempString fileName(args->Word()); ModuleLoader loader(taskData, fileName); processes->MakeRootRequest(taskData, &loader); if (loader.errorResult != 0) { if (loader.errNumber == 0) raise_fail(taskData, loader.errorResult); else { AutoFree buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1)); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "%s: %S", loader.errorResult, loader.fileName); #else sprintf(buff, "%s: %s", loader.errorResult, loader.fileName); #endif raise_syscall(taskData, buff, loader.errNumber); } } return loader.rootHandle; } // Load a module POLYUNSIGNED PolyLoadModule(FirstArgument threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); Handle result = 0; try { result = LoadModule(taskData, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } PolyObject *InitHeaderFromExport(struct _exportDescription *exports) { // Check the structure sizes stored in the export structure match the versions // used in this library. if (exports->structLength != sizeof(exportDescription) || exports->memTableSize != sizeof(memoryTableEntry) || exports->rtsVersion < FIRST_supported_version || exports->rtsVersion > LAST_supported_version) { #if (FIRST_supported_version == LAST_supported_version) Exit("The exported object file has version %0.2f but this library supports %0.2f", ((float)exports->rtsVersion) / 100.0, ((float)FIRST_supported_version) / 100.0); #else Exit("The exported object file has version %0.2f but this library supports %0.2f-%0.2f", ((float)exports->rtsVersion) / 100.0, ((float)FIRST_supported_version) / 100.0, ((float)LAST_supported_version) / 100.0); #endif } // We could also check the RTS version and the architecture. exportTimeStamp = exports->timeStamp; // Needed for load and save. memoryTableEntry *memTable = exports->memTable; #ifdef POLYML32IN64 // We need to copy this into the heap before beginning execution. // This is very like loading a saved state and the code should probably // be merged. LoadRelocate relocate(true); relocate.nDescrs = exports->memTableEntries; relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs]; relocate.targetAddresses = new PolyWord*[exports->memTableEntries]; relocate.originalBaseAddr = (PolyWord*)exports->originalBaseAddr; PolyObject *root = 0; for (unsigned i = 0; i < exports->memTableEntries; i++) { relocate.descrs[i].segmentIndex = memTable[i].mtIndex; relocate.descrs[i].originalAddress = memTable[i].mtOriginalAddr; relocate.descrs[i].segmentSize = memTable[i].mtLength; PermanentMemSpace *newSpace = gMem.AllocateNewPermanentSpace(memTable[i].mtLength, (unsigned)memTable[i].mtFlags, (unsigned)memTable[i].mtIndex); if (newSpace == 0) Exit("Unable to initialise a permanent memory space"); PolyWord *mem = newSpace->bottom; memcpy(newSpace->writeAble(mem), memTable[i].mtCurrentAddr, memTable[i].mtLength); PolyWord* unused = mem + memTable[i].mtLength / sizeof(PolyWord); gMem.FillUnusedSpace(newSpace->writeAble(unused), newSpace->spaceSize() - memTable[i].mtLength / sizeof(PolyWord)); if (newSpace == 0) Exit("Unable to initialise a permanent memory space"); relocate.targetAddresses[i] = mem; relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress, (uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize - 1)); // Relocate the root function. if (exports->rootFunction >= memTable[i].mtCurrentAddr && exports->rootFunction < (char*)memTable[i].mtCurrentAddr + memTable[i].mtLength) { root = (PolyObject*)((char*)mem + ((char*)exports->rootFunction - (char*)memTable[i].mtCurrentAddr)); } } // Now relocate the addresses for (unsigned j = 0; j < exports->memTableEntries; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); // Any relative addresses have to be corrected by adding this. relocate.relativeOffset = (PolyWord*)descr->originalAddress - space->bottom; for (PolyWord *p = space->bottom; p < space->top; ) { #ifdef POLYML32IN64 if ((((uintptr_t)p) & 4) == 0) { // Skip any padding. The length word should be on an odd-word boundary. p++; continue; } #endif p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); relocate.RelocateObject(obj); p += length; } } // Set the final permissions. for (unsigned j = 0; j < exports->memTableEntries; j++) { PermanentMemSpace *space = gMem.SpaceForIndex(memTable[j].mtIndex); gMem.CompletePermanentSpaceAllocation(space); } return root; #else for (unsigned i = 0; i < exports->memTableEntries; i++) { // Construct a new space for each of the entries. if (gMem.NewPermanentSpace( (PolyWord*)memTable[i].mtCurrentAddr, memTable[i].mtLength / sizeof(PolyWord), (unsigned)memTable[i].mtFlags, (unsigned)memTable[i].mtIndex) == 0) Exit("Unable to initialise a permanent memory space"); } return (PolyObject *)exports->rootFunction; #endif } // Return the system directory for modules. This is configured differently // in Unix and in Windows. POLYUNSIGNED PolyGetModuleDirectory(FirstArgument threadId) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle result = 0; try { #if (defined(MODULEDIR)) result = SAVE(C_string_to_Poly(taskData, MODULEDIR)); #elif (defined(_WIN32)) { // This registry key is configured when Poly/ML is installed using the installer. // It gives the path to the Poly/ML installation directory. We return the // Modules subdirectory. HKEY hk; if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, _T("SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\PolyML.exe"), 0, KEY_QUERY_VALUE, &hk) == ERROR_SUCCESS) { DWORD valSize; if (RegQueryValueEx(hk, _T("Path"), 0, NULL, NULL, &valSize) == ERROR_SUCCESS) { #define MODULEDIR _T("Modules") TempString buff((TCHAR*)malloc(valSize + (_tcslen(MODULEDIR) + 1) * sizeof(TCHAR))); DWORD dwType; if (RegQueryValueEx(hk, _T("Path"), 0, &dwType, (LPBYTE)(LPTSTR)buff, &valSize) == ERROR_SUCCESS) { // The registry entry should end with a backslash. _tcscat(buff, MODULEDIR); result = SAVE(C_string_to_Poly(taskData, buff)); } } RegCloseKey(hk); } result = SAVE(C_string_to_Poly(taskData, "")); } #else result = SAVE(C_string_to_Poly(taskData, "")); #endif } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } struct _entrypts savestateEPT[] = { { "PolySaveState", (polyRTSFunction)&PolySaveState }, { "PolyLoadState", (polyRTSFunction)&PolyLoadState }, { "PolyShowHierarchy", (polyRTSFunction)&PolyShowHierarchy }, { "PolyRenameParent", (polyRTSFunction)&PolyRenameParent }, { "PolyShowParent", (polyRTSFunction)&PolyShowParent }, { "PolyStoreModule", (polyRTSFunction)&PolyStoreModule }, { "PolyLoadModule", (polyRTSFunction)&PolyLoadModule }, { "PolyLoadHierarchy", (polyRTSFunction)&PolyLoadHierarchy }, { "PolyGetModuleDirectory", (polyRTSFunction)&PolyGetModuleDirectory }, { NULL, NULL } // End of list. };