diff --git a/libpolyml/exporter.cpp b/libpolyml/exporter.cpp index 591903c4..f2649e4d 100644 --- a/libpolyml/exporter.cpp +++ b/libpolyml/exporter.cpp @@ -1,914 +1,924 @@ /* Title: exporter.cpp - Export a function as an object or C file - Copyright (c) 2006-7, 2015, 2016-19 David C.J. Matthews + Copyright (c) 2006-7, 2015, 2016-20 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_SYS_PARAM_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #if (defined(_WIN32)) #include #else #define _T(x) x #define _tcslen strlen #define _tcscmp strcmp #define _tcscat strcat #endif #include "exporter.h" #include "save_vec.h" #include "polystring.h" #include "run_time.h" #include "osmem.h" #include "scanaddrs.h" #include "gc.h" #include "machine_dep.h" #include "diagnostics.h" #include "memmgr.h" #include "processes.h" // For IO_SPACING #include "sys.h" // For EXC_Fail #include "rtsentry.h" #include "pexport.h" #ifdef HAVE_PECOFF #include "pecoffexport.h" #elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H) #include "elfexport.h" #elif defined(HAVE_MACH_O_RELOC_H) #include "machoexport.h" #endif #if (defined(_WIN32)) #define NOMEMORY ERROR_NOT_ENOUGH_MEMORY #define ERRORNUMBER _doserrno #else #define NOMEMORY ENOMEM #define ERRORNUMBER errno #endif extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolyExport(PolyObject *threadId, PolyWord fileName, PolyWord root); POLYEXTERNALSYMBOL POLYUNSIGNED PolyExportPortable(PolyObject *threadId, PolyWord fileName, PolyWord root); } /* To export the function and everything reachable from it we need to copy all the objects into a new area. We leave tombstones in the original objects by overwriting the length word. That prevents us from copying an object twice and breaks loops. Once we've copied the objects we then have to go back over the memory and turn the tombstones back into length words. */ GraveYard::~GraveYard() { free(graves); } // Used to calculate the space required for the ordinary mutables // and the no-overwrite mutables. They are interspersed in local space. class MutSizes : public ScanAddress { public: MutSizes() : mutSize(0), noOverSize(0) {} virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }// No Actually used virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord) { const POLYUNSIGNED words = OBJ_OBJECT_LENGTH(lengthWord) + 1; // Include length word if (OBJ_IS_NO_OVERWRITE(lengthWord)) noOverSize += words; else mutSize += words; } POLYUNSIGNED mutSize, noOverSize; }; CopyScan::CopyScan(unsigned h/*=0*/): hierarchy(h) { defaultImmSize = defaultMutSize = defaultCodeSize = defaultNoOverSize = 0; tombs = 0; graveYard = 0; } void CopyScan::initialise(bool isExport/*=true*/) { ASSERT(gMem.eSpaces.size() == 0); // Set the space sizes to a proportion of the space currently in use. // Computing these sizes is not obvious because CopyScan is used both // for export and for saved states. For saved states in particular we // want to use a smaller size because they are retained after we save // the state and if we have many child saved states it's important not // to waste memory. if (hierarchy == 0) { graveYard = new GraveYard[gMem.pSpaces.size()]; if (graveYard == 0) { if (debugOptions & DEBUG_SAVING) Log("SAVE: Unable to allocate graveyard, size: %lu.\n", gMem.pSpaces.size()); throw MemoryException(); } } for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->hierarchy >= hierarchy) { // Include this if we're exporting (hierarchy=0) or if we're saving a state // and will include this in the new state. size_t size = (space->top-space->bottom)/4; if (space->noOverwrite) defaultNoOverSize += size; else if (space->isMutable) defaultMutSize += size; else if (space->isCode) defaultCodeSize += size; else defaultImmSize += size; if (space->hierarchy == 0 && ! space->isMutable) { // We need a separate area for the tombstones because this is read-only graveYard[tombs].graves = (PolyWord*)calloc(space->spaceSize(), sizeof(PolyWord)); if (graveYard[tombs].graves == 0) { if (debugOptions & DEBUG_SAVING) Log("SAVE: Unable to allocate graveyard for permanent space, size: %lu.\n", space->spaceSize() * sizeof(PolyWord)); throw MemoryException(); } if (debugOptions & DEBUG_SAVING) Log("SAVE: Allocated graveyard for permanent space, %p size: %lu.\n", graveYard[tombs].graves, space->spaceSize() * sizeof(PolyWord)); graveYard[tombs].startAddr = space->bottom; graveYard[tombs].endAddr = space->top; tombs++; } } } for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *space = *i; uintptr_t size = space->allocatedSpace(); // It looks as though the mutable size generally gets // overestimated while the immutable size is correct. if (space->isMutable) { MutSizes sizeMut; sizeMut.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr); sizeMut.ScanAddressesInRegion(space->upperAllocPtr, space->top); defaultNoOverSize += sizeMut.noOverSize / 4; defaultMutSize += sizeMut.mutSize / 4; } else defaultImmSize += size/2; } for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { CodeSpace *space = *i; uintptr_t size = space->spaceSize(); defaultCodeSize += size/2; } if (isExport) { // Minimum 1M words. if (defaultMutSize < 1024*1024) defaultMutSize = 1024*1024; if (defaultImmSize < 1024*1024) defaultImmSize = 1024*1024; if (defaultCodeSize < 1024*1024) defaultCodeSize = 1024*1024; #ifdef MACOSX // Limit the segment size for Mac OS X. The linker has a limit of 2^24 relocations // in a segment so this is a crude way of ensuring the limit isn't exceeded. // It's unlikely to be exceeded by the code itself. // Actually, from trial-and-error, the limit seems to be around 6M. if (defaultMutSize > 6 * 1024 * 1024) defaultMutSize = 6 * 1024 * 1024; if (defaultImmSize > 6 * 1024 * 1024) defaultImmSize = 6 * 1024 * 1024; #endif if (defaultNoOverSize < 4096) defaultNoOverSize = 4096; // Except for the no-overwrite area } else { // Much smaller minimum sizes for saved states. if (defaultMutSize < 1024) defaultMutSize = 1024; if (defaultImmSize < 4096) defaultImmSize = 4096; if (defaultCodeSize < 4096) defaultCodeSize = 4096; if (defaultNoOverSize < 4096) defaultNoOverSize = 4096; // Set maximum sizes as well. We may have insufficient contiguous space for // very large areas. if (defaultMutSize > 1024 * 1024) defaultMutSize = 1024 * 1024; if (defaultImmSize > 1024 * 1024) defaultImmSize = 1024 * 1024; if (defaultCodeSize > 1024 * 1024) defaultCodeSize = 1024 * 1024; if (defaultNoOverSize > 1024 * 1024) defaultNoOverSize = 1024 * 1024; } if (debugOptions & DEBUG_SAVING) Log("SAVE: Copyscan default sizes: Immutable: %" POLYUFMT ", Mutable: %" POLYUFMT ", Code: %" POLYUFMT ", No-overwrite %" POLYUFMT ".\n", defaultImmSize, defaultMutSize, defaultCodeSize, defaultNoOverSize); } CopyScan::~CopyScan() { gMem.DeleteExportSpaces(); if (graveYard) delete[](graveYard); } // This function is called for each address in an object // once it has been copied to its new location. We copy first // then scan to update the addresses. POLYUNSIGNED CopyScan::ScanAddressAt(PolyWord *pt) { PolyWord val = *pt; // Ignore integers. if (IS_INT(val) || val == PolyWord::FromUnsigned(0)) return 0; PolyObject *obj = val.AsObjPtr(); POLYUNSIGNED l = ScanAddress(&obj); *pt = obj; return l; } // This function is called for each address in an object // once it has been copied to its new location. We copy first // then scan to update the addresses. POLYUNSIGNED CopyScan::ScanAddress(PolyObject **pt) { PolyObject *obj = *pt; MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1); ASSERT(space != 0); // We may sometimes get addresses that have already been updated // to point to the new area. e.g. (only?) in the case of constants // that have been updated in ScanConstantsWithinCode. if (space->spaceType == ST_EXPORT) return 0; // If this is at a lower level than the hierarchy we are saving // then leave it untouched. if (space->spaceType == ST_PERMANENT) { PermanentMemSpace *pmSpace = (PermanentMemSpace*)space; if (pmSpace->hierarchy < hierarchy) return 0; } // Have we already scanned this? if (obj->ContainsForwardingPtr()) { // Update the address to the new value. #ifdef POLYML32IN64 PolyObject *newAddr; if (space->isCode) newAddr = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else newAddr = obj->GetForwardingPtr(); #else PolyObject *newAddr = obj->GetForwardingPtr(); #endif *pt = newAddr; return 0; // No need to scan it again. } else if (space->spaceType == ST_PERMANENT) { // See if we have this in the grave-yard. for (unsigned i = 0; i < tombs; i++) { GraveYard *g = &graveYard[i]; if ((PolyWord*)obj >= g->startAddr && (PolyWord*)obj < g->endAddr) { PolyWord *tombAddr = g->graves + ((PolyWord*)obj - g->startAddr); PolyObject *tombObject = (PolyObject*)tombAddr; if (tombObject->ContainsForwardingPtr()) { #ifdef POLYML32IN64 PolyObject *newAddr; if (space->isCode) newAddr = (PolyObject*)(globalCodeBase + ((tombObject->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else newAddr = tombObject->GetForwardingPtr(); #else PolyObject *newAddr = tombObject->GetForwardingPtr(); #endif *pt = newAddr; return 0; } break; // No need to look further } } } // No, we need to copy it. ASSERT(space->spaceType == ST_LOCAL || space->spaceType == ST_PERMANENT || space->spaceType == ST_CODE); POLYUNSIGNED lengthWord = obj->LengthWord(); POLYUNSIGNED words = OBJ_OBJECT_LENGTH(lengthWord); PolyObject *newObj = 0; + PolyObject* writeAble = 0; bool isMutableObj = obj->IsMutable(); bool isNoOverwrite = false; bool isByteObj = false; bool isCodeObj = false; if (isMutableObj) { isNoOverwrite = obj->IsNoOverwriteObject(); isByteObj = obj->IsByteObject(); } else isCodeObj = obj->IsCodeObject(); // Allocate a new address for the object. for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++) { PermanentMemSpace *space = *i; if (isMutableObj == space->isMutable && isNoOverwrite == space->noOverwrite && isByteObj == space->byteOnly && isCodeObj == space->isCode) { ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom); size_t spaceLeft = space->top - space->topPointer; if (spaceLeft > words) { newObj = (PolyObject*)(space->topPointer + 1); + writeAble = space->writeAble(newObj); space->topPointer += words + 1; #ifdef POLYML32IN64 // Maintain the odd-word alignment of topPointer if ((words & 1) == 0 && space->topPointer < space->top) { - *space->topPointer = PolyWord::FromUnsigned(0); + *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0); space->topPointer++; } #endif break; } } } if (newObj == 0) { // Didn't find room in the existing spaces. Create a new space. uintptr_t spaceWords; if (isMutableObj) { if (isNoOverwrite) spaceWords = defaultNoOverSize; else spaceWords = defaultMutSize; } else { if (isCodeObj) spaceWords = defaultCodeSize; else spaceWords = defaultImmSize; } if (spaceWords <= words) spaceWords = words + 1; // Make sure there's space for this object. PermanentMemSpace *space = gMem.NewExportSpace(spaceWords, isMutableObj, isNoOverwrite, isCodeObj); if (isByteObj) space->byteOnly = true; if (space == 0) { if (debugOptions & DEBUG_SAVING) Log("SAVE: Unable to allocate export space, size: %lu.\n", spaceWords); // Unable to allocate this. throw MemoryException(); } newObj = (PolyObject*)(space->topPointer + 1); + writeAble = space->writeAble(newObj); space->topPointer += words + 1; #ifdef POLYML32IN64 // Maintain the odd-word alignment of topPointer if ((words & 1) == 0 && space->topPointer < space->top) { *space->topPointer = PolyWord::FromUnsigned(0); space->topPointer++; } #endif ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom); } - newObj->SetLengthWord(lengthWord); // copy length word + writeAble->SetLengthWord(lengthWord); // copy length word - memcpy(newObj, obj, words * sizeof(PolyWord)); + memcpy(writeAble, obj, words * sizeof(PolyWord)); if (space->spaceType == ST_PERMANENT && !space->isMutable && ((PermanentMemSpace*)space)->hierarchy == 0) { // The immutable permanent areas are read-only. unsigned m; for (m = 0; m < tombs; m++) { GraveYard *g = &graveYard[m]; if ((PolyWord*)obj >= g->startAddr && (PolyWord*)obj < g->endAddr) { PolyWord *tombAddr = g->graves + ((PolyWord*)obj - g->startAddr); PolyObject *tombObject = (PolyObject*)tombAddr; #ifdef POLYML32IN64 if (isCodeObj) { POLYUNSIGNED ll = (POLYUNSIGNED)(((PolyWord*)newObj - globalCodeBase) >> 1 | _OBJ_TOMBSTONE_BIT); tombObject->SetLengthWord(ll); } else tombObject->SetForwardingPtr(newObj); #else tombObject->SetForwardingPtr(newObj); #endif break; // No need to look further } } ASSERT(m < tombs); // Should be there. } + else if (isCodeObj) #ifdef POLYML32IN64 // If this is a code address we can't use the usual forwarding pointer format. // Instead we have to compute the offset relative to the base of the code. - else if (isCodeObj) { POLYUNSIGNED ll = (POLYUNSIGNED)(((PolyWord*)newObj-globalCodeBase) >> 1 | _OBJ_TOMBSTONE_BIT); - obj->SetLengthWord(ll); + gMem.SpaceForAddress(obj)->writeAble(obj)->SetLengthWord(ll); } +#else + gMem.SpaceForAddress(obj)->writeAble(obj)->SetForwardingPtr(newObj); #endif else obj->SetForwardingPtr(newObj); // Put forwarding pointer in old object. if (OBJ_IS_CODE_OBJECT(lengthWord)) { // We don't need to worry about flushing the instruction cache // since we're not going to execute this code here. // We do have to update any relative addresses within the code // to take account of its new position. We have to do that now // even though ScanAddressesInObject will do it again because this // is the only point where we have both the old and the new addresses. machineDependent->ScanConstantsWithinCode(newObj, obj, words, this); } *pt = newObj; // Update it to the newly copied object. return lengthWord; // This new object needs to be scanned. } // The address of code in the code area. We treat this as a normal heap cell. // We will probably need to copy this and to process addresses within it. POLYUNSIGNED CopyScan::ScanCodeAddressAt(PolyObject **pt) { POLYUNSIGNED lengthWord = ScanAddress(pt); if (lengthWord) ScanAddressesInObject(*pt, lengthWord); return 0; } PolyObject *CopyScan::ScanObjectAddress(PolyObject *base) { PolyWord val = base; // Scan this as an address. POLYUNSIGNED lengthWord = CopyScan::ScanAddressAt(&val); if (lengthWord) ScanAddressesInObject(val.AsObjPtr(), lengthWord); return val.AsObjPtr(); } #define MAX_EXTENSION 4 // The longest extension we may need to add is ".obj" // Convert the forwarding pointers in a region back into length words. // Generally if this object has a forwarding pointer that's // because we've moved it into the export region. We can, // though, get multiple levels of forwarding if there is an object // that has been shifted up by a garbage collection, leaving a forwarding // pointer and then that object has been moved to the export region. // We mustn't turn locally forwarded values back into ordinary objects // because they could contain addresses that are no longer valid. static POLYUNSIGNED GetObjLength(PolyObject *obj) { if (obj->ContainsForwardingPtr()) { PolyObject *forwardedTo; #ifdef POLYML32IN64 { MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1); if (space->isCode) forwardedTo = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else forwardedTo = obj->GetForwardingPtr(); } #else forwardedTo = obj->GetForwardingPtr(); #endif POLYUNSIGNED length = GetObjLength(forwardedTo); MemSpace *space = gMem.SpaceForAddress((PolyWord*)forwardedTo-1); if (space->spaceType == ST_EXPORT) - obj->SetLengthWord(length); + gMem.SpaceForAddress(obj)->writeAble(obj)->SetLengthWord(length); return length; } else { ASSERT(obj->ContainsNormalLengthWord()); return obj->LengthWord(); } } static void FixForwarding(PolyWord *pt, size_t space) { while (space) { pt++; PolyObject *obj = (PolyObject*)pt; #ifdef POLYML32IN64 if ((uintptr_t)obj & 4) { // Skip filler words needed to align to an even word space--; continue; // We've added 1 to pt so just loop. } #endif size_t length = OBJ_OBJECT_LENGTH(GetObjLength(obj)); pt += length; ASSERT(space > length); space -= length+1; } } class ExportRequest: public MainThreadRequest { public: ExportRequest(Handle root, Exporter *exp): MainThreadRequest(MTP_EXPORTING), exportRoot(root), exporter(exp) {} virtual void Perform() { exporter->RunExport(exportRoot->WordP()); } Handle exportRoot; Exporter *exporter; }; static void exporter(TaskData *taskData, Handle fileName, Handle root, const TCHAR *extension, Exporter *exports) { size_t extLen = _tcslen(extension); TempString fileNameBuff(Poly_string_to_T_alloc(fileName->Word(), extLen)); if (fileNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); size_t length = _tcslen(fileNameBuff); // Does it already have the extension? If not add it on. if (length < extLen || _tcscmp(fileNameBuff + length - extLen, extension) != 0) _tcscat(fileNameBuff, extension); #if (defined(_WIN32) && defined(UNICODE)) exports->exportFile = _wfopen(fileNameBuff, L"wb"); #else exports->exportFile = fopen(fileNameBuff, "wb"); #endif if (exports->exportFile == NULL) raise_syscall(taskData, "Cannot open export file", ERRORNUMBER); // Request a full GC to reduce the size of fix-ups. FullGC(taskData); // Request the main thread to do the export. ExportRequest request(root, exports); processes->MakeRootRequest(taskData, &request); if (exports->errorMessage) raise_fail(taskData, exports->errorMessage); } // This is called by the initial thread to actually do the export. void Exporter::RunExport(PolyObject *rootFunction) { Exporter *exports = this; PolyObject *copiedRoot = 0; CopyScan copyScan(hierarchy); try { copyScan.initialise(); // Copy the root and everything reachable from it into the temporary area. copiedRoot = copyScan.ScanObjectAddress(rootFunction); } catch (MemoryException &) { // If we ran out of memory. copiedRoot = 0; } // Fix the forwarding pointers. for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *space = *i; // Local areas only have objects from the allocation pointer to the top. FixForwarding(space->bottom, space->lowerAllocPtr - space->bottom); FixForwarding(space->upperAllocPtr, space->top - space->upperAllocPtr); } for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { MemSpace *space = *i; // Permanent areas are filled with objects from the bottom. FixForwarding(space->bottom, space->top - space->bottom); } for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { MemSpace *space = *i; // Code areas are filled with objects from the bottom. FixForwarding(space->bottom, space->top - space->bottom); } // Reraise the exception after cleaning up the forwarding pointers. if (copiedRoot == 0) { exports->errorMessage = "Insufficient Memory"; return; } // Copy the areas into the export object. size_t tableEntries = gMem.eSpaces.size(); unsigned memEntry = 0; if (hierarchy != 0) tableEntries += gMem.pSpaces.size(); exports->memTable = new memoryTableEntry[tableEntries]; // If we're constructing a module we need to include the global spaces. if (hierarchy != 0) { // Permanent spaces from the executable. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->hierarchy < hierarchy) { memoryTableEntry *entry = &exports->memTable[memEntry++]; entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom; entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord); entry->mtIndex = space->index; entry->mtFlags = 0; if (space->isMutable) entry->mtFlags |= MTF_WRITEABLE; if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE; } } newAreas = memEntry; } for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++) { memoryTableEntry *entry = &exports->memTable[memEntry++]; PermanentMemSpace *space = *i; entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom; entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord); entry->mtIndex = hierarchy == 0 ? memEntry-1 : space->index; entry->mtFlags = 0; if (space->isMutable) { entry->mtFlags = MTF_WRITEABLE; if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE; } if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE; if (space->byteOnly) entry->mtFlags |= MTF_BYTES; } ASSERT(memEntry == tableEntries); exports->memTableEntries = memEntry; exports->rootFunction = copiedRoot; try { // This can raise MemoryException at least in PExport::exportStore. exports->exportStore(); } catch (MemoryException &) { exports->errorMessage = "Insufficient Memory"; } } // Functions called via the RTS call. Handle exportNative(TaskData *taskData, Handle args) { #ifdef HAVE_PECOFF // Windows including Cygwin #if (defined(_WIN32)) const TCHAR *extension = _T(".obj"); // Windows #else const char *extension = ".o"; // Cygwin #endif PECOFFExport exports; exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)), taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports); #elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H) // Most Unix including Linux, FreeBSD and Solaris. const char *extension = ".o"; ELFExport exports; exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)), taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports); #elif defined(HAVE_MACH_O_RELOC_H) // Mac OS-X const char *extension = ".o"; MachoExport exports; exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)), taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports); #else raise_exception_string (taskData, EXC_Fail, "Native export not available for this platform"); #endif return taskData->saveVec.push(TAGGED(0)); } Handle exportPortable(TaskData *taskData, Handle args) { PExport exports; exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)), taskData->saveVec.push(args->WordP()->Get(1)), _T(".txt"), &exports); return taskData->saveVec.push(TAGGED(0)); } POLYUNSIGNED PolyExport(PolyObject *threadId, PolyWord fileName, PolyWord root) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedName = taskData->saveVec.push(fileName); Handle pushedRoot = taskData->saveVec.push(root); try { #ifdef HAVE_PECOFF // Windows including Cygwin #if (defined(_WIN32)) const TCHAR *extension = _T(".obj"); // Windows #else const char *extension = ".o"; // Cygwin #endif PECOFFExport exports; exporter(taskData, pushedName, pushedRoot, extension, &exports); #elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H) // Most Unix including Linux, FreeBSD and Solaris. const char *extension = ".o"; ELFExport exports; exporter(taskData, pushedName, pushedRoot, extension, &exports); #elif defined(HAVE_MACH_O_RELOC_H) // Mac OS-X const char *extension = ".o"; MachoExport exports; exporter(taskData, pushedName, pushedRoot, extension, &exports); #else raise_exception_string (taskData, EXC_Fail, "Native export not available for this platform"); #endif } catch (...) { } // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); // Returns unit } POLYUNSIGNED PolyExportPortable(PolyObject *threadId, PolyWord fileName, PolyWord root) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedName = taskData->saveVec.push(fileName); Handle pushedRoot = taskData->saveVec.push(root); try { PExport exports; exporter(taskData, pushedName, pushedRoot, _T(".txt"), &exports); } catch (...) { } // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); // Returns unit } // Helper functions for exporting. We need to produce relocation information // and this code is common to every method. Exporter::Exporter(unsigned int h): exportFile(NULL), errorMessage(0), hierarchy(h), memTable(0), newAreas(0) { } Exporter::~Exporter() { delete[](memTable); if (exportFile) fclose(exportFile); } void Exporter::relocateValue(PolyWord *pt) { #ifndef POLYML32IN64 PolyWord q = *pt; if (IS_INT(q) || q == PolyWord::FromUnsigned(0)) {} else createRelocation(pt); #endif } +void Exporter::createRelocation(PolyWord* pt) +{ + *gMem.SpaceForAddress(pt)->writeAble(pt) = createRelocation(*pt, pt); +} + // Check through the areas to see where the address is. It must be // in one of them. unsigned Exporter::findArea(void *p) { for (unsigned i = 0; i < memTableEntries; i++) { if (p > memTable[i].mtOriginalAddr && p <= (char*)memTable[i].mtOriginalAddr + memTable[i].mtLength) return i; } { ASSERT(0); } return 0; } void Exporter::relocateObject(PolyObject *p) { if (p->IsByteObject()) { if (p->IsMutable() && p->IsWeakRefObject()) { // Weak mutable byte refs are used for external references and // also in the FFI for non-persistent values. bool isFuncPtr = true; const char *entryName = getEntryPointName(p, &isFuncPtr); if (entryName != 0) addExternalReference(p, entryName, isFuncPtr); // Clear the first word of the data. ASSERT(p->Length() >= sizeof(uintptr_t)/sizeof(PolyWord)); *(uintptr_t*)p = 0; } } else if (p->IsCodeObject()) { POLYUNSIGNED constCount; PolyWord *cp; ASSERT(! p->IsMutable() ); p->GetConstSegmentForCode(cp, constCount); /* Now the constants. */ for (POLYUNSIGNED i = 0; i < constCount; i++) relocateValue(&(cp[i])); } else if (p->IsClosureObject()) { #ifndef POLYML32IN64 ASSERT(0); #endif // This should only be used in 32-in-64 where we don't use relocations. } else /* Ordinary objects, essentially tuples. */ { POLYUNSIGNED length = p->Length(); for (POLYUNSIGNED i = 0; i < length; i++) relocateValue(p->Offset(i)); } } ExportStringTable::ExportStringTable(): strings(0), stringSize(0), stringAvailable(0) { } ExportStringTable::~ExportStringTable() { free(strings); } // Add a string to the string table, growing it if necessary. unsigned long ExportStringTable::makeEntry(const char *str) { unsigned len = (unsigned)strlen(str); unsigned long entry = stringSize; if (stringSize + len + 1 > stringAvailable) { stringAvailable = stringAvailable+stringAvailable/2; if (stringAvailable < stringSize + len + 1) stringAvailable = stringSize + len + 1 + 500; char* newStrings = (char*)realloc(strings, stringAvailable); if (newStrings == 0) { if (debugOptions & DEBUG_SAVING) Log("SAVE: Unable to realloc string table, size: %lu.\n", stringAvailable); throw MemoryException(); } else strings = newStrings; } strcpy(strings + stringSize, str); stringSize += len + 1; return entry; } struct _entrypts exporterEPT[] = { { "PolyExport", (polyRTSFunction)&PolyExport}, { "PolyExportPortable", (polyRTSFunction)&PolyExportPortable}, { NULL, NULL} // End of list. }; diff --git a/libpolyml/exporter.h b/libpolyml/exporter.h index bc0a217d..02a3b6fb 100644 --- a/libpolyml/exporter.h +++ b/libpolyml/exporter.h @@ -1,120 +1,120 @@ /* Title: exporter.h - Export a function as an object or C file - Copyright (c) 2006, 2015-17 David C.J. Matthews + Copyright (c) 2006, 2015-17, 2020 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef EXPORTER_H_INCLUDED #define EXPORTER_H_INCLUDED #include "globals.h" // For PolyWord #include "../polyexports.h" // For struct _memTableEntry #ifdef HAVE_STDIO_H #include // For FILE #endif class SaveVecEntry; typedef SaveVecEntry *Handle; class TaskData; extern Handle exportNative(TaskData *mdTaskData, Handle args); extern Handle exportPortable(TaskData *mdTaskData, Handle args); // This is the base class for the exporters for the various object-code formats. class Exporter { public: Exporter(unsigned int h=0); virtual ~Exporter(); virtual void exportStore(void) = 0; // Called by the root thread to do the work. void RunExport(PolyObject *rootFunction); protected: virtual PolyWord createRelocation(PolyWord p, void *relocAddr) = 0; void relocateValue(PolyWord *pt); void relocateObject(PolyObject *p); - void createRelocation(PolyWord *pt) { *pt = createRelocation(*pt, pt); } + void createRelocation(PolyWord *pt); unsigned findArea(void *p); // Find index of area that address is in. virtual void addExternalReference(void *p, const char *entryPoint, bool isFuncPtr) {} public: FILE *exportFile; const char *errorMessage; protected: unsigned int hierarchy; struct _memTableEntry *memTable; unsigned memTableEntries; PolyObject *rootFunction; // Address of the root function. unsigned newAreas; }; // The object-code exporters all use a similar string table format // consisting of null-terminated C-strings. class ExportStringTable { public: ExportStringTable(); ~ExportStringTable(); unsigned long makeEntry(const char *str); char *strings; unsigned long stringSize, stringAvailable; }; #include "scanaddrs.h" // Because permanent immutable areas are read-only we need to // have somewhere else to hold the tomb-stones. class GraveYard { public: GraveYard() { graves = 0; } ~GraveYard(); PolyWord *graves; PolyWord *startAddr, *endAddr; }; class CopyScan: public ScanAddress { public: CopyScan(unsigned h=0); void initialise(bool isExport=true); ~CopyScan(); protected: virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt); // Have to follow pointers from closures into code. virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt); POLYUNSIGNED ScanAddress(PolyObject **pt); public: virtual PolyObject *ScanObjectAddress(PolyObject *base); // Default sizes of the segments. uintptr_t defaultImmSize, defaultCodeSize, defaultMutSize, defaultNoOverSize; unsigned hierarchy; GraveYard *graveYard; unsigned tombs; }; extern struct _entrypts exporterEPT[]; #endif diff --git a/libpolyml/gc_mark_phase.cpp b/libpolyml/gc_mark_phase.cpp index 18b06448..2af92606 100644 --- a/libpolyml/gc_mark_phase.cpp +++ b/libpolyml/gc_mark_phase.cpp @@ -1,873 +1,882 @@ /* Title: Multi-Threaded Garbage Collector - Mark phase Copyright (c) 2010-12, 2015-16, 2019 David C. J. Matthews Based on the original garbage collector code Copyright 2000-2008 Cambridge University Technical Services Limited This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This is the first, mark, phase of the garbage collector. It detects all reachable cells in the area being collected. At the end of the phase the bit-maps associated with the areas will have ones for words belonging to cells that must be retained and zeros for words that can be reused. This is now multi-threaded. The mark phase involves setting a bit in the header of each live cell and then a pass over the memory building the bitmaps and clearing this bit. It is unfortunate that we cannot use the GC-bit that is used in forwarding pointers but we may well have forwarded pointers left over from a partially completed minor GC. Using a bit in the header avoids the need for locking since at worst it may involve two threads duplicating some marking. The code ensures that each reachable cell is marked at least once but with multiple threads a cell may be marked by more than once cell if the memory is not fully up to date. Each thread has a stack on which it remembers cells that have been marked but not fully scanned. If a thread runs out of cells of its own to scan it can pick a pointer off the stack of another thread and scan that. The original thread will still scan it some time later but it should find that the addresses in it have all been marked and it can simply pop this off. This is all done without locking. Stacks are only modified by the owning thread and when they pop anything they write zero in its place. Other threads only need to search for a zero to find if they are at the top and if they get a pointer that has already been scanned then this is safe. The only assumption made about the memory is that all the bits of a word are updated together so that a thread will always read a value that is a valid pointer. Many of the ideas are drawn from Flood, Detlefs, Shavit and Zhang 2001 "Parallel Garbage Collection for Shared Memory Multiprocessors". */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #include "globals.h" #include "processes.h" #include "gc.h" #include "scanaddrs.h" #include "check_objects.h" #include "bitmap.h" #include "memmgr.h" #include "diagnostics.h" #include "gctaskfarm.h" #include "profiling.h" #include "heapsizing.h" #define MARK_STACK_SIZE 3000 #define LARGECACHE_SIZE 20 class MTGCProcessMarkPointers: public ScanAddress { public: MTGCProcessMarkPointers(); virtual void ScanRuntimeAddress(PolyObject **pt, RtsStrength weak); virtual PolyObject *ScanObjectAddress(PolyObject *base); virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord); // Have to redefine this for some reason. void ScanAddressesInObject(PolyObject *base) { ScanAddressesInObject(base, base->LengthWord()); } virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code); // ScanCodeAddressAt should never be called. POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { ASSERT(0); return 0; } static void MarkPointersTask(GCTaskId *, void *arg1, void *arg2); static void InitStatics(unsigned threads) { markStacks = new MTGCProcessMarkPointers[threads]; nInUse = 0; nThreads = threads; } static void MarkRoots(void); static bool RescanForStackOverflow(); private: bool TestForScan(PolyWord *pt); void MarkAndTestForScan(PolyWord *pt); void Reset(); void PushToStack(PolyObject *obj, PolyWord *currentPtr = 0) { // If we don't have all the threads running we start a new one but // only once we have several items on the stack. Otherwise we // can end up creating a task that terminates almost immediately. if (nInUse >= nThreads || msp < 2 || ! ForkNew(obj)) { if (msp < MARK_STACK_SIZE) { markStack[msp++] = obj; if (currentPtr != 0) { locPtr++; if (locPtr == LARGECACHE_SIZE) locPtr = 0; largeObjectCache[locPtr].base = obj; largeObjectCache[locPtr].current = currentPtr; } } else StackOverflow(obj); } // else the new task is processing it. } static void StackOverflow(PolyObject *obj); static bool ForkNew(PolyObject *obj); PolyObject *markStack[MARK_STACK_SIZE]; unsigned msp; bool active; // For the typical small cell it's easier just to rescan from the start // but that can be expensive for large cells. This caches the offset for // large cells. static const POLYUNSIGNED largeObjectSize = 50; struct { PolyObject *base; PolyWord *current; } largeObjectCache[LARGECACHE_SIZE]; unsigned locPtr; static MTGCProcessMarkPointers *markStacks; protected: static unsigned nThreads, nInUse; static PLock stackLock; }; // There is one mark-stack for each GC thread. markStacks[0] is used by the // main thread when marking the roots and rescanning after mark-stack overflow. // Once that work is done markStacks[0] is released and is available for a // worker thread. MTGCProcessMarkPointers *MTGCProcessMarkPointers::markStacks; unsigned MTGCProcessMarkPointers::nThreads, MTGCProcessMarkPointers::nInUse; PLock MTGCProcessMarkPointers::stackLock("GC mark stack"); // It is possible to have two levels of forwarding because // we could have a cell in the allocation area that has been moved // to the immutable area and then shared with another cell. inline PolyObject *FollowForwarding(PolyObject *obj) { while (obj->ContainsForwardingPtr()) obj = obj->GetForwardingPtr(); return obj; } MTGCProcessMarkPointers::MTGCProcessMarkPointers(): msp(0), active(false), locPtr(0) { // Clear the mark stack for (unsigned i = 0; i < MARK_STACK_SIZE; i++) markStack[i] = 0; // Clear the large object cache just to be sure. for (unsigned j = 0; j < LARGECACHE_SIZE; j++) { largeObjectCache[j].base = 0; largeObjectCache[j].current = 0; } } // Clear the state at the beginning of a new GC pass. void MTGCProcessMarkPointers::Reset() { locPtr = 0; //largeObjectCache[locPtr].base = 0; // Clear the cache completely just to be safe for (unsigned j = 0; j < LARGECACHE_SIZE; j++) { largeObjectCache[j].base = 0; largeObjectCache[j].current = 0; } } // Called when the stack has overflowed. We need to include this // in the range to be rescanned. void MTGCProcessMarkPointers::StackOverflow(PolyObject *obj) { MarkableSpace *space = (MarkableSpace*)gMem.SpaceForAddress((PolyWord*)obj-1); ASSERT(space != 0 && (space->spaceType == ST_LOCAL || space->spaceType == ST_CODE)); PLocker lock(&space->spaceLock); // Have to include this in the range to rescan. if (space->fullGCRescanStart > ((PolyWord*)obj) - 1) space->fullGCRescanStart = ((PolyWord*)obj) - 1; POLYUNSIGNED n = obj->Length(); if (space->fullGCRescanEnd < ((PolyWord*)obj) + n) space->fullGCRescanEnd = ((PolyWord*)obj) + n; ASSERT(obj->LengthWord() & _OBJ_GC_MARK); // Should have been marked. if (debugOptions & DEBUG_GC_ENHANCED) Log("GC: Mark: Stack overflow. Rescan for %p\n", obj); } // Fork a new task. Because we've checked nInUse without taking the lock // we may find that we can no longer create a new task. bool MTGCProcessMarkPointers::ForkNew(PolyObject *obj) { MTGCProcessMarkPointers *marker = 0; { PLocker lock(&stackLock); if (nInUse == nThreads) return false; for (unsigned i = 0; i < nThreads; i++) { if (! markStacks[i].active) { marker = &markStacks[i]; break; } } ASSERT(marker != 0); marker->active = true; nInUse++; } bool test = gpTaskFarm->AddWork(&MTGCProcessMarkPointers::MarkPointersTask, marker, obj); ASSERT(test); return true; } // Main marking task. This is forked off initially to scan a specific object and // anything reachable from it but once that has finished it tries to find objects // on other stacks to scan. void MTGCProcessMarkPointers::MarkPointersTask(GCTaskId *, void *arg1, void *arg2) { MTGCProcessMarkPointers *marker = (MTGCProcessMarkPointers*)arg1; marker->Reset(); marker->ScanAddressesInObject((PolyObject*)arg2); while (true) { // Look for a stack that has at least one item on it. MTGCProcessMarkPointers *steal = 0; for (unsigned i = 0; i < nThreads && steal == 0; i++) { if (markStacks[i].markStack[0] != 0) steal = &markStacks[i]; } // We're finished if they're all done. if (steal == 0) break; // Look for items on this stack for (unsigned j = 0; j < MARK_STACK_SIZE; j++) { // Pick the item off the stack. // N.B. The owning thread may update this to zero // at any time. PolyObject *toSteal = steal->markStack[j]; if (toSteal == 0) break; // Nothing more on the stack // The idea here is that the original thread pushed this // because there were at least two addresses it needed to // process. It started down one branch but left the other. // Since it will have marked cells in the branch it has // followed this thread will start on the unprocessed // address(es). marker->ScanAddressesInObject(toSteal); } } PLocker lock(&stackLock); marker->active = false; // It's finished nInUse--; ASSERT(marker->markStack[0] == 0); } // Tests if this needs to be scanned. It marks it if it has not been marked // unless it has to be scanned. bool MTGCProcessMarkPointers::TestForScan(PolyWord *pt) { if ((*pt).IsTagged()) return false; // This could contain a forwarding pointer if it points into an // allocation area and has been moved by the minor GC. // We have to be a little careful. Another thread could also // be following any forwarding pointers here. However it's safe // because they will update it with the same value. PolyObject *obj = (*pt).AsObjPtr(); if (obj->ContainsForwardingPtr()) { obj = FollowForwarding(obj); *pt = obj; } MemSpace *sp = gMem.SpaceForAddress((PolyWord*)obj-1); if (sp == 0 || (sp->spaceType != ST_LOCAL && sp->spaceType != ST_CODE)) return false; // Ignore it if it points to a permanent area POLYUNSIGNED L = obj->LengthWord(); if (L & _OBJ_GC_MARK) return false; // Already marked if (debugOptions & DEBUG_GC_DETAIL) Log("GC: Mark: %p %" POLYUFMT " %u\n", obj, OBJ_OBJECT_LENGTH(L), GetTypeBits(L)); if (OBJ_IS_BYTE_OBJECT(L)) { obj->SetLengthWord(L | _OBJ_GC_MARK); // Mark it return false; // We've done as much as we need } return true; } void MTGCProcessMarkPointers::MarkAndTestForScan(PolyWord *pt) { if (TestForScan(pt)) { PolyObject *obj = (*pt).AsObjPtr(); obj->SetLengthWord(obj->LengthWord() | _OBJ_GC_MARK); } } // The initial entry to process the roots. These may be RTS addresses or addresses in // a thread stack. Also called recursively to process the addresses of constants in // code segments. This is used in situations where a scanner may return the // updated address of an object. PolyObject *MTGCProcessMarkPointers::ScanObjectAddress(PolyObject *obj) { MemSpace *sp = gMem.SpaceForAddress((PolyWord*)obj-1); if (!(sp->spaceType == ST_LOCAL || sp->spaceType == ST_CODE)) return obj; // Ignore it if it points to a permanent area // We may have a forwarding pointer if this has been moved by the // minor GC. if (obj->ContainsForwardingPtr()) + { obj = FollowForwarding(obj); + sp = gMem.SpaceForAddress((PolyWord*)obj - 1); + } ASSERT(obj->ContainsNormalLengthWord()); POLYUNSIGNED L = obj->LengthWord(); if (L & _OBJ_GC_MARK) return obj; // Already marked - obj->SetLengthWord(L | _OBJ_GC_MARK); // Mark it + sp->writeAble(obj)->SetLengthWord(L | _OBJ_GC_MARK); // Mark it if (profileMode == kProfileLiveData || (profileMode == kProfileLiveMutables && obj->IsMutable())) AddObjectProfile(obj); POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L); if (debugOptions & DEBUG_GC_DETAIL) Log("GC: Mark: %p %" POLYUFMT " %u\n", obj, n, GetTypeBits(L)); if (OBJ_IS_BYTE_OBJECT(L)) return obj; // If we already have something on the stack we must being called // recursively to process a constant in a code segment. Just push // it on the stack and let the caller deal with it. if (msp != 0) PushToStack(obj); // Can't check this because it may have forwarding ptrs. else { MTGCProcessMarkPointers::ScanAddressesInObject(obj, L); // We can only check after we've processed it because if we // have addresses left over from an incomplete partial GC they // may need to forwarded. CheckObject (obj); } return obj; } // These functions are only called with pointers held by the runtime system. // Weak references can occur in the runtime system, eg. streams and windows. // Weak references are not marked and so unreferenced streams and windows // can be detected and closed. void MTGCProcessMarkPointers::ScanRuntimeAddress(PolyObject **pt, RtsStrength weak) { if (weak == STRENGTH_WEAK) return; *pt = ScanObjectAddress(*pt); CheckPointer (*pt); // Check it after any forwarding pointers have been followed. } // This is called via ScanAddressesInRegion to process the permanent mutables. It is // also called from ScanObjectAddress to process root addresses. // It processes all the addresses reachable from the object. // This is almost the same as RecursiveScan::ScanAddressesInObject. void MTGCProcessMarkPointers::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) { if (OBJ_IS_BYTE_OBJECT(lengthWord)) return; while (true) { ASSERT (OBJ_IS_LENGTH(lengthWord)); POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord); PolyWord *baseAddr = (PolyWord*)obj; PolyWord *endWord = baseAddr + length; if (OBJ_IS_WEAKREF_OBJECT(lengthWord)) { // Special case. ASSERT(OBJ_IS_MUTABLE_OBJECT(lengthWord)); // Should be a mutable. ASSERT(OBJ_IS_WORD_OBJECT(lengthWord)); // Should be a plain object. // We need to mark the "SOME" values in this object but we don't mark // the references contained within the "SOME". // Mark every word but ignore the result. for (POLYUNSIGNED i = 0; i < length; i++) (void)MarkAndTestForScan(baseAddr+i); // We've finished with this. endWord = baseAddr; } else if (OBJ_IS_CODE_OBJECT(lengthWord)) { // Legacy: The code-generator now uses PolyCopyByteVecToClosure to allocate mutable // code cells in the code area. Previously they were allocated in the heap and copied // into the code area only when they were locked. // It's better to process the whole code object in one go. ScanAddress::ScanAddressesInObject(obj, lengthWord); endWord = baseAddr; // Finished } else if (OBJ_IS_CLOSURE_OBJECT(lengthWord)) { // The first word is the absolute address of the code ... PolyObject *codeAddr = *(PolyObject**)obj; // except that it is possible we haven't yet set it. if (((uintptr_t)codeAddr & 1) == 0) ScanObjectAddress(codeAddr); // The rest is a normal tuple. baseAddr += sizeof(PolyObject*) / sizeof(PolyWord); } // If there are only two addresses in this cell that need to be // followed we follow them immediately and treat this cell as done. // If there are more than two we push the address of this cell on // the stack, follow the first address and then rescan it. That way // list cells are processed once only but we don't overflow the // stack by pushing all the addresses in a very large vector. PolyObject *firstWord = 0; PolyObject *secondWord = 0; PolyWord *restartAddr = 0; if (obj == largeObjectCache[locPtr].base) { baseAddr = largeObjectCache[locPtr].current; ASSERT(baseAddr > (PolyWord*)obj && baseAddr < endWord); if (locPtr == 0) locPtr = LARGECACHE_SIZE - 1; else locPtr--; } while (baseAddr != endWord) { PolyWord wordAt = *baseAddr; if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0)) { // Normal address. We can have words of all zeros at least in the // situation where we have a partially constructed code segment where // the constants at the end of the code have not yet been filled in. if (TestForScan(baseAddr)) { if (firstWord == 0) firstWord = baseAddr->AsObjPtr(); else if (secondWord == 0) { // If we need to rescan because there are three or more words to do // this is the place we need to restart (or the start of the cell if it's // small). restartAddr = baseAddr; secondWord = baseAddr->AsObjPtr(); } else break; // More than two words. } } baseAddr++; } if (baseAddr != endWord) // Put this back on the stack while we process the first word PushToStack(obj, length < largeObjectSize ? 0 : restartAddr); else if (secondWord != 0) { // Mark it now because we will process it. - secondWord->SetLengthWord(secondWord->LengthWord() | _OBJ_GC_MARK); + PolyObject* writeAble = secondWord; + if (secondWord->IsCodeObject()) + writeAble = gMem.SpaceForAddress(secondWord)->writeAble(secondWord); + writeAble->SetLengthWord(secondWord->LengthWord() | _OBJ_GC_MARK); // Put this on the stack. If this is a list node we will be // pushing the tail. PushToStack(secondWord); } if (firstWord != 0) { // Mark it and process it immediately. - firstWord->SetLengthWord(firstWord->LengthWord() | _OBJ_GC_MARK); + PolyObject* writeAble = firstWord; + if (firstWord->IsCodeObject()) + writeAble = gMem.SpaceForAddress(firstWord)->writeAble(firstWord); + writeAble->SetLengthWord(firstWord->LengthWord() | _OBJ_GC_MARK); obj = firstWord; } else if (msp == 0) { markStack[msp] = 0; // Really finished return; } else { // Clear the item above the top. This really is finished. if (msp < MARK_STACK_SIZE) markStack[msp] = 0; // Pop the item from the stack but don't overwrite it yet. // This allows another thread to steal it if there really // is nothing else to do. This is only really important // for large objects. obj = markStack[--msp]; // Pop something. } lengthWord = obj->LengthWord(); } } // Process a constant within the code. This is a direct copy of ScanAddress::ScanConstant // with the addition of the locking. void MTGCProcessMarkPointers::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code) { // If we have newly compiled code the constants may be in the // local heap. MTGCProcessMarkPointers::ScanObjectAddress can // return an updated address for a local address if there is a // forwarding pointer. // Constants can be aligned on any byte offset so another thread // scanning the same code could see an invalid address if it read // the constant while it was being updated. We put a lock round // this just in case. MemSpace *space = gMem.SpaceForAddress(addressOfConstant); PLock *lock = 0; if (space->spaceType == ST_CODE) lock = &((CodeSpace*)space)->spaceLock; if (lock != 0) lock->Lock(); PolyObject *p = GetConstantValue(addressOfConstant, code); if (lock != 0) lock->Unlock(); if (p != 0) { PolyObject *newVal = ScanObjectAddress(p); if (newVal != p) // Update it if it has changed. { if (lock != 0) lock->Lock(); SetConstantValue(addressOfConstant, newVal, code); if (lock != 0) lock->Unlock(); } } } // Mark all the roots. This is run in the main thread and has the effect // of starting new tasks as the scanning runs. void MTGCProcessMarkPointers::MarkRoots(void) { ASSERT(nThreads >= 1); ASSERT(nInUse == 0); MTGCProcessMarkPointers *marker = &markStacks[0]; marker->Reset(); marker->active = true; nInUse = 1; // Scan the permanent mutable areas. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->isMutable && ! space->byteOnly) marker->ScanAddressesInRegion(space->bottom, space->top); } // Scan the RTS roots. GCModules(marker); ASSERT(marker->markStack[0] == 0); // When this has finished there may well be other tasks running. PLocker lock(&stackLock); marker->active = false; nInUse--; } // This class just allows us to use ScanAddress::ScanAddressesInRegion to call // ScanAddressesInObject for each object in the region. class Rescanner: public ScanAddress { public: Rescanner(MTGCProcessMarkPointers *marker): m_marker(marker) {} virtual void ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) { // If it has previously been marked it is known to be reachable but // the contents may not have been scanned if the stack overflowed. if (lengthWord &_OBJ_GC_MARK) m_marker->ScanAddressesInObject(obj, lengthWord); } // Have to define this. virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(false); return 0; } virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { ASSERT(false); return 0; } bool ScanSpace(MarkableSpace *space); private: MTGCProcessMarkPointers *m_marker; }; // Rescan any marked objects in the area between fullGCRescanStart and fullGCRescanEnd. // N.B. We may have threads already processing other areas and they could overflow // their stacks and change fullGCRescanStart or fullGCRescanEnd. bool Rescanner::ScanSpace(MarkableSpace *space) { PolyWord *start, *end; { PLocker lock(&space->spaceLock); start = space->fullGCRescanStart; end = space->fullGCRescanEnd; space->fullGCRescanStart = space->top; space->fullGCRescanEnd = space->bottom; } if (start < end) { if (debugOptions & DEBUG_GC_ENHANCED) Log("GC: Mark: Rescanning from %p to %p\n", start, end); ScanAddressesInRegion(start, end); return true; // Require rescan } else return false; } // When the threads created by marking the roots have completed we need to check that // the mark stack has not overflowed. If it has we need to rescan. This rescanning // pass may result in a further overflow so if we find we have to rescan we repeat. bool MTGCProcessMarkPointers::RescanForStackOverflow() { ASSERT(nThreads >= 1); ASSERT(nInUse == 0); MTGCProcessMarkPointers *marker = &markStacks[0]; marker->Reset(); marker->active = true; nInUse = 1; bool rescan = false; Rescanner rescanner(marker); for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { if (rescanner.ScanSpace(*i)) rescan = true; } for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { if (rescanner.ScanSpace(*i)) rescan = true; } { PLocker lock(&stackLock); nInUse--; marker->active = false; } return rescan; } static void SetBitmaps(LocalMemSpace *space, PolyWord *pt, PolyWord *top) { while (pt < top) { #ifdef POLYML32IN64 if ((((uintptr_t)pt) & 4) == 0) { pt++; continue; } #endif PolyObject *obj = (PolyObject*)++pt; // If it has been copied by a minor collection skip it if (obj->ContainsForwardingPtr()) { obj = FollowForwarding(obj); ASSERT(obj->ContainsNormalLengthWord()); pt += obj->Length(); } else { POLYUNSIGNED L = obj->LengthWord(); POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L); if (L & _OBJ_GC_MARK) { obj->SetLengthWord(L & ~(_OBJ_GC_MARK)); uintptr_t bitno = space->wordNo(pt); space->bitmap.SetBits(bitno - 1, n + 1); if (OBJ_IS_MUTABLE_OBJECT(L)) space->m_marked += n + 1; else space->i_marked += n + 1; if ((PolyWord*)obj <= space->fullGCLowerLimit) space->fullGCLowerLimit = (PolyWord*)obj-1; if (OBJ_IS_WEAKREF_OBJECT(L)) { // Add this to the limits for the containing area. PolyWord *baseAddr = (PolyWord*)obj; PolyWord *startAddr = baseAddr-1; // Must point AT length word. PolyWord *endObject = baseAddr + n; if (startAddr < space->lowestWeak) space->lowestWeak = startAddr; if (endObject > space->highestWeak) space->highestWeak = endObject; } } pt += n; } } } static void CreateBitmapsTask(GCTaskId *, void *arg1, void *arg2) { LocalMemSpace *lSpace = (LocalMemSpace *)arg1; lSpace->bitmap.ClearBits(0, lSpace->spaceSize()); SetBitmaps(lSpace, lSpace->bottom, lSpace->top); } // Parallel task to check the marks on cells in the code area and // turn them into byte areas if they are free. static void CheckMarksOnCodeTask(GCTaskId *, void *arg1, void *arg2) { CodeSpace *space = (CodeSpace*)arg1; #ifdef POLYML32IN64 PolyWord *pt = space->bottom+1; #else PolyWord *pt = space->bottom; #endif PolyWord *lastFree = 0; POLYUNSIGNED lastFreeSpace = 0; space->largestFree = 0; space->firstFree = 0; while (pt < space->top) { PolyObject *obj = (PolyObject*)(pt+1); // There should not be forwarding pointers ASSERT(obj->ContainsNormalLengthWord()); POLYUNSIGNED L = obj->LengthWord(); POLYUNSIGNED length = OBJ_OBJECT_LENGTH(L); if (L & _OBJ_GC_MARK) { // It's marked - retain it. ASSERT(L & _OBJ_CODE_OBJ); - obj->SetLengthWord(L & ~(_OBJ_GC_MARK)); // Clear the mark bit + space->writeAble(obj)->SetLengthWord(L & ~(_OBJ_GC_MARK)); // Clear the mark bit lastFree = 0; lastFreeSpace = 0; } #ifdef POLYML32IN64 else if (length == 0) { // We may have zero filler words to set the correct alignment. // Merge them into a previously free area otherwise leave // them if they're after something allocated. if (lastFree + lastFreeSpace == pt) { lastFreeSpace += length + 1; PolyObject *freeSpace = (PolyObject*)(lastFree + 1); - freeSpace->SetLengthWord(lastFreeSpace - 1, F_BYTE_OBJ); + space->writeAble(freeSpace)->SetLengthWord(lastFreeSpace - 1, F_BYTE_OBJ); } } #endif else { // Turn it into a byte area i.e. free. It may already be free. if (space->firstFree == 0) space->firstFree = pt; space->headerMap.ClearBit(pt-space->bottom); // Remove the "header" bit if (lastFree + lastFreeSpace == pt) // Merge free spaces. Speeds up subsequent scans. lastFreeSpace += length + 1; else { lastFree = pt; lastFreeSpace = length + 1; } PolyObject *freeSpace = (PolyObject*)(lastFree+1); - freeSpace->SetLengthWord(lastFreeSpace-1, F_BYTE_OBJ); + space->writeAble(freeSpace)->SetLengthWord(lastFreeSpace-1, F_BYTE_OBJ); if (lastFreeSpace > space->largestFree) space->largestFree = lastFreeSpace; } pt += length+1; } } void GCMarkPhase(void) { mainThreadPhase = MTP_GCPHASEMARK; // Clear the mark counters and set the rescan limits. for(std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *lSpace = *i; lSpace->i_marked = lSpace->m_marked = 0; lSpace->fullGCRescanStart = lSpace->top; lSpace->fullGCRescanEnd = lSpace->bottom; } for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { CodeSpace *space = *i; space->fullGCRescanStart = space->top; space->fullGCRescanEnd = space->bottom; } MTGCProcessMarkPointers::MarkRoots(); gpTaskFarm->WaitForCompletion(); // Do we have to rescan because the mark stack overflowed? bool rescan; do { rescan = MTGCProcessMarkPointers::RescanForStackOverflow(); gpTaskFarm->WaitForCompletion(); } while(rescan); gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Mark"); // Turn the marks into bitmap entries. for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) gpTaskFarm->AddWorkOrRunNow(&CreateBitmapsTask, *i, 0); // Process the code areas. for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) gpTaskFarm->AddWorkOrRunNow(&CheckMarksOnCodeTask, *i, 0); gpTaskFarm->WaitForCompletion(); // Wait for completion of the bitmaps gMem.RemoveEmptyCodeAreas(); gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Bitmap"); uintptr_t totalLive = 0; for(std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *lSpace = *i; if (! lSpace->isMutable) ASSERT(lSpace->m_marked == 0); totalLive += lSpace->m_marked + lSpace->i_marked; if (debugOptions & DEBUG_GC_ENHANCED) Log("GC: Mark: %s space %p: %" POLYUFMT " immutable words marked, %" POLYUFMT " mutable words marked\n", lSpace->spaceTypeString(), lSpace, lSpace->i_marked, lSpace->m_marked); } if (debugOptions & DEBUG_GC) Log("GC: Mark: Total live data %" POLYUFMT " words\n", totalLive); } // Set up the stacks. void initialiseMarkerTables() { unsigned threads = gpTaskFarm->ThreadCount(); if (threads == 0) threads = 1; MTGCProcessMarkPointers::InitStatics(threads); } diff --git a/libpolyml/memmgr.cpp b/libpolyml/memmgr.cpp index 0c5ea905..84fba812 100644 --- a/libpolyml/memmgr.cpp +++ b/libpolyml/memmgr.cpp @@ -1,1377 +1,1378 @@ /* Title: memmgr.cpp Memory segment manager Copyright (c) 2006-7, 2011-12, 2016-18 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #include #include #include "globals.h" #include "memmgr.h" #include "osmem.h" #include "scanaddrs.h" #include "bitmap.h" #include "mpoly.h" #include "diagnostics.h" #include "statistics.h" #include "processes.h" #include "machine_dep.h" #ifdef POLYML32IN64 // This contains the address of the base of the heap. PolyWord *globalHeapBase, *globalCodeBase; #endif // heap resizing policy option requested on command line unsigned heapsizingOption = 0; MemSpace::MemSpace(OSMem *alloc): SpaceTree(true) { spaceType = ST_PERMANENT; isMutable = false; bottom = 0; top = 0; isCode = false; allocator = alloc; shadowSpace = 0; } MemSpace::~MemSpace() { if (allocator != 0 && bottom != 0) { if (isCode) allocator->FreeCodeArea(bottom, shadowSpace, (char*)top - (char*)bottom); else allocator->FreeDataArea(bottom, (char*)top - (char*)bottom); } } MarkableSpace::MarkableSpace(OSMem *alloc): MemSpace(alloc), spaceLock("Local space") { } LocalMemSpace::LocalMemSpace(OSMem *alloc): MarkableSpace(alloc) { spaceType = ST_LOCAL; upperAllocPtr = lowerAllocPtr = 0; for (unsigned i = 0; i < NSTARTS; i++) start[i] = 0; start_index = 0; i_marked = m_marked = updated = 0; allocationSpace = false; } bool LocalMemSpace::InitSpace(PolyWord *heapSpace, uintptr_t size, bool mut) { isMutable = mut; bottom = heapSpace; top = bottom + size; // Initialise all the fields. The partial GC in particular relies on this. upperAllocPtr = partialGCTop = fullGCRescanStart = fullGCLowerLimit = lowestWeak = top; lowerAllocPtr = partialGCScan = partialGCRootBase = partialGCRootTop = fullGCRescanEnd = highestWeak = bottom; #ifdef POLYML32IN64 // The address must be on an odd-word boundary so that after the length // word is put in the actual cell address is on an even-word boundary. lowerAllocPtr[0] = PolyWord::FromUnsigned(0); lowerAllocPtr = bottom + 1; #endif spaceOwner = 0; allocationSpace = false; // Bitmap for the space. return bitmap.Create(size); } MemMgr::MemMgr(): allocLock("Memmgr alloc"), codeBitmapLock("Code bitmap") { nextIndex = 0; reservedSpace = 0; nextAllocator = 0; defaultSpaceSize = 0; spaceBeforeMinorGC = 0; spaceForHeap = 0; currentAllocSpace = currentHeapSize = 0; defaultSpaceSize = 1024 * 1024 / sizeof(PolyWord); // 1Mbyte segments. spaceTree = new SpaceTreeTree; } MemMgr::~MemMgr() { delete(spaceTree); // Have to do this before we delete the spaces. for (std::vector::iterator i = pSpaces.begin(); i < pSpaces.end(); i++) delete(*i); for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++) delete(*i); for (std::vector::iterator i = eSpaces.begin(); i < eSpaces.end(); i++) delete(*i); for (std::vector::iterator i = sSpaces.begin(); i < sSpaces.end(); i++) delete(*i); for (std::vector::iterator i = cSpaces.begin(); i < cSpaces.end(); i++) delete(*i); } bool MemMgr::Initialise() { #ifdef POLYML32IN64 // Allocate a single 16G area but with no access. void *heapBase; if (!osHeapAlloc.Initialise(false, (size_t)16 * 1024 * 1024 * 1024, &heapBase)) return false; globalHeapBase = (PolyWord*)heapBase; // Allocate a 4 gbyte area for the stacks. // It's important that the stack and code areas have addresses with // non-zero top 32-bits. if (!osStackAlloc.Initialise(false, (size_t)4 * 1024 * 1024 * 1024)) return false; // Allocate a 2G area for the code. void *codeBase; if (!osCodeAlloc.Initialise(machineDependent->CodeMustBeExecutable(), (size_t)2 * 1024 * 1024 * 1024, &codeBase)) return false; globalCodeBase = (PolyWord*)codeBase; return true; #else return osHeapAlloc.Initialise(false) && osStackAlloc.Initialise(false) && osCodeAlloc.Initialise(machineDependent->CodeMustBeExecutable()); #endif } // Create and initialise a new local space and add it to the table. LocalMemSpace* MemMgr::NewLocalSpace(uintptr_t size, bool mut) { try { LocalMemSpace *space = new LocalMemSpace(&osHeapAlloc); // Before trying to allocate the heap temporarily allocate the // reserved space. This ensures that this much space will always // be available for C stacks and the C++ heap. void *reservation = 0; size_t rSpace = reservedSpace*sizeof(PolyWord); if (reservedSpace != 0) { reservation = osHeapAlloc.AllocateDataArea(rSpace); if (reservation == NULL) { // Insufficient space for the reservation. Can't allocate this local space. if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New local %smutable space: insufficient reservation space\n", mut ? "": "im"); delete space; return 0; } } // Allocate the heap itself. size_t iSpace = size * sizeof(PolyWord); PolyWord* heapSpace = (PolyWord*)osHeapAlloc.AllocateDataArea(iSpace); // The size may have been rounded up to a block boundary. size = iSpace / sizeof(PolyWord); bool success = heapSpace != 0 && space->InitSpace(heapSpace, size, mut) && AddLocalSpace(space); if (reservation != 0) osHeapAlloc.FreeDataArea(reservation, rSpace); if (success) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New local %smutable space %p, size=%luk words, bottom=%p, top=%p\n", mut ? "": "im", space, space->spaceSize()/1024, space->bottom, space->top); currentHeapSize += space->spaceSize(); globalStats.setSize(PSS_TOTAL_HEAP, currentHeapSize * sizeof(PolyWord)); return space; } // If something went wrong. delete space; if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New local %smutable space: insufficient space\n", mut ? "": "im"); return 0; } catch (std::bad_alloc&) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New local %smutable space: \"new\" failed\n", mut ? "": "im"); return 0; } } // Create a local space for initial allocation. LocalMemSpace *MemMgr::CreateAllocationSpace(uintptr_t size) { LocalMemSpace *result = NewLocalSpace(size, true); if (result) { result->allocationSpace = true; currentAllocSpace += result->spaceSize(); globalStats.incSize(PSS_ALLOCATION, result->spaceSize()*sizeof(PolyWord)); globalStats.incSize(PSS_ALLOCATION_FREE, result->freeSpace()*sizeof(PolyWord)); } return result; } // If an allocation space has a lot of data left in it after a GC, particularly // a single large object we should turn it into a local area. void MemMgr::ConvertAllocationSpaceToLocal(LocalMemSpace *space) { ASSERT(space->allocationSpace); space->allocationSpace = false; // Currently it is left as a mutable area but if the contents are all // immutable e.g. a large vector it could be better to turn it into an // immutable area. currentAllocSpace -= space->spaceSize(); } // Add a local memory space to the table. bool MemMgr::AddLocalSpace(LocalMemSpace *space) { // Add to the table. // Update the B-tree. try { AddTree(space); // The entries in the local table are ordered so that the copy phase of the full // GC simply has to copy to an entry earlier in the table. Immutable spaces come // first, followed by mutable spaces and finally allocation spaces. if (space->allocationSpace) lSpaces.push_back(space); // Just add at the end else if (space->isMutable) { // Add before the allocation spaces std::vector::iterator i = lSpaces.begin(); while (i != lSpaces.end() && ! (*i)->allocationSpace) i++; lSpaces.insert(i, space); } else { // Immutable space: Add before the mutable spaces std::vector::iterator i = lSpaces.begin(); while (i != lSpaces.end() && ! (*i)->isMutable) i++; lSpaces.insert(i, space); } } catch (std::bad_alloc&) { RemoveTree(space); return false; } return true; } // Create an entry for a permanent space. PermanentMemSpace* MemMgr::NewPermanentSpace(PolyWord *base, uintptr_t words, unsigned flags, unsigned index, unsigned hierarchy /*= 0*/) { try { PermanentMemSpace *space = new PermanentMemSpace(0/* Not freed */); space->bottom = base; space->topPointer = space->top = space->bottom + words; space->spaceType = ST_PERMANENT; space->isMutable = flags & MTF_WRITEABLE ? true : false; space->noOverwrite = flags & MTF_NO_OVERWRITE ? true : false; space->byteOnly = flags & MTF_BYTES ? true : false; space->isCode = flags & MTF_EXECUTABLE ? true : false; space->index = index; space->hierarchy = hierarchy; if (index >= nextIndex) nextIndex = index+1; // Extend the permanent memory table and add this space to it. try { AddTree(space); pSpaces.push_back(space); } catch (std::exception&) { RemoveTree(space); delete space; return 0; } return space; } catch (std::bad_alloc&) { return 0; } } PermanentMemSpace *MemMgr::AllocateNewPermanentSpace(uintptr_t byteSize, unsigned flags, unsigned index, unsigned hierarchy) { try { OSMem *alloc = flags & MTF_EXECUTABLE ? &osCodeAlloc : &osHeapAlloc; PermanentMemSpace *space = new PermanentMemSpace(alloc); size_t actualSize = byteSize; PolyWord* base; void* newShadow=0; if (flags & MTF_EXECUTABLE) base = (PolyWord*)alloc->AllocateCodeArea(actualSize, newShadow); else base = (PolyWord*)alloc->AllocateDataArea(actualSize); if (base == 0) { delete(space); return 0; } space->bottom = base; space->shadowSpace = (PolyWord*)newShadow; space->topPointer = space->top = space->bottom + actualSize/sizeof(PolyWord); space->spaceType = ST_PERMANENT; space->isMutable = flags & MTF_WRITEABLE ? true : false; space->noOverwrite = flags & MTF_NO_OVERWRITE ? true : false; space->byteOnly = flags & MTF_BYTES ? true : false; space->isCode = flags & MTF_EXECUTABLE ? true : false; space->index = index; space->hierarchy = hierarchy; if (index >= nextIndex) nextIndex = index + 1; // Extend the permanent memory table and add this space to it. try { AddTree(space); pSpaces.push_back(space); } catch (std::exception&) { RemoveTree(space); delete space; return 0; } return space; } catch (std::bad_alloc&) { return 0; } } bool MemMgr::CompletePermanentSpaceAllocation(PermanentMemSpace *space) { // Remove write access unless it is mutable. // Don't remove write access unless this is top-level. Share-data assumes only hierarchy 0 is write-protected. if (!space->isMutable && space->hierarchy == 0) { if (space->isCode) osCodeAlloc.DisableWriteForCode(space->bottom, space->shadowSpace, (char*)space->top - (char*)space->bottom); else osHeapAlloc.EnableWrite(false, space->bottom, (char*)space->top - (char*)space->bottom); } return true; } // Delete a local space and remove it from the table. void MemMgr::DeleteLocalSpace(std::vector::iterator &iter) { LocalMemSpace *sp = *iter; if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Deleted local %s space %p at %p size %zu\n", sp->spaceTypeString(), sp, sp->bottom, sp->spaceSize()); currentHeapSize -= sp->spaceSize(); globalStats.setSize(PSS_TOTAL_HEAP, currentHeapSize * sizeof(PolyWord)); if (sp->allocationSpace) currentAllocSpace -= sp->spaceSize(); RemoveTree(sp); delete(sp); iter = lSpaces.erase(iter); } // Remove local areas that are now empty after a GC. // It isn't clear if we always want to do this. void MemMgr::RemoveEmptyLocals() { for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); ) { LocalMemSpace *space = *i; if (space->isEmpty()) DeleteLocalSpace(i); else i++; } } // Create and initialise a new export space and add it to the table. PermanentMemSpace* MemMgr::NewExportSpace(uintptr_t size, bool mut, bool noOv, bool code) { try { OSMem *alloc = code ? &osCodeAlloc : &osHeapAlloc; PermanentMemSpace *space = new PermanentMemSpace(alloc); space->spaceType = ST_EXPORT; space->isMutable = mut; space->noOverwrite = noOv; space->isCode = code; space->index = nextIndex++; // Allocate the memory itself. size_t iSpace = size*sizeof(PolyWord); if (code) { void* shadow; space->bottom = (PolyWord*)alloc->AllocateCodeArea(iSpace, shadow); if (space->bottom != 0) space->shadowSpace = (PolyWord*)shadow; } else space->bottom = (PolyWord*)alloc->AllocateDataArea(iSpace); if (space->bottom == 0) { delete space; if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New export %smutable space: insufficient space\n", mut ? "" : "im"); return 0; } // The size may have been rounded up to a block boundary. size = iSpace/sizeof(PolyWord); space->top = space->bottom + size; space->topPointer = space->bottom; #ifdef POLYML32IN64 // The address must be on an odd-word boundary so that after the length // word is put in the actual cell address is on an even-word boundary. - space->topPointer[0] = PolyWord::FromUnsigned(0); + space->writeAble(space->topPointer)[0] = PolyWord::FromUnsigned(0); space->topPointer = space->bottom + 1; #endif if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New export %smutable %s%sspace %p, size=%luk words, bottom=%p, top=%p\n", mut ? "" : "im", noOv ? "no-overwrite " : "", code ? "code " : "", space, space->spaceSize() / 1024, space->bottom, space->top); // Add to the table. try { AddTree(space); eSpaces.push_back(space); } catch (std::exception&) { RemoveTree(space); delete space; if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New export %smutable space: Adding to tree failed\n", mut ? "" : "im"); return 0; } return space; } catch (std::bad_alloc&) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New export %smutable space: \"new\" failed\n", mut ? "" : "im"); return 0; } } void MemMgr::DeleteExportSpaces(void) { for (std::vector::iterator i = eSpaces.begin(); i < eSpaces.end(); i++) { PermanentMemSpace *space = *i; RemoveTree(space); delete(space); } eSpaces.clear(); } // If we have saved the state rather than exported a function we turn the exported // spaces into permanent ones, removing existing permanent spaces at the same or // lower level. bool MemMgr::PromoteExportSpaces(unsigned hierarchy) { // Save permanent spaces at a lower hierarchy. Others are converted into // local spaces. Most or all items will have been copied from these spaces // into an export space but there could be items reachable only from the stack. std::vector::iterator i = pSpaces.begin(); while (i != pSpaces.end()) { PermanentMemSpace *pSpace = *i; if (pSpace->hierarchy < hierarchy) i++; else { try { // Turn this into a local space or a code space // Remove this from the tree - AddLocalSpace will make an entry for the local version. RemoveTree(pSpace); if (pSpace->isCode) { // Enable write access. Permanent spaces are read-only. // osCodeAlloc.SetPermissions(pSpace->bottom, (char*)pSpace->top - (char*)pSpace->bottom, // PERMISSION_READ | PERMISSION_WRITE | PERMISSION_EXEC); - CodeSpace *space = new CodeSpace(pSpace->bottom, pSpace->spaceSize(), &osCodeAlloc); + CodeSpace *space = new CodeSpace(pSpace->bottom, pSpace->shadowSpace, pSpace->spaceSize(), &osCodeAlloc); if (! space->headerMap.Create(space->spaceSize())) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Unable to create header map for state space %p\n", pSpace); return false; } if (!AddCodeSpace(space)) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Unable to convert saved state space %p into code space\n", pSpace); return false; } if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Converted saved state space %p into code space %p\n", pSpace, space); // Set the bits in the header map. for (PolyWord *ptr = space->bottom; ptr < space->top; ) { PolyObject *obj = (PolyObject*)(ptr+1); // We may have forwarded this if this has been // copied to the exported area. Restore the original length word. if (obj->ContainsForwardingPtr()) { #ifdef POLYML32IN64 PolyObject *forwardedTo = obj; // This is relative to globalCodeBase not globalHeapBase while (forwardedTo->ContainsForwardingPtr()) forwardedTo = (PolyObject*)(globalCodeBase + ((forwardedTo->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); #else PolyObject *forwardedTo = obj->FollowForwardingChain(); #endif obj->SetLengthWord(forwardedTo->LengthWord()); } // Set the "start" bit if this is allocated. It will be a byte seg if not. if (obj->IsCodeObject()) space->headerMap.SetBit(ptr-space->bottom); ASSERT(!obj->IsClosureObject()); ptr += obj->Length() + 1; } } else { // Enable write access. Permanent spaces are read-only. // osHeapAlloc.SetPermissions(pSpace->bottom, (char*)pSpace->top - (char*)pSpace->bottom, // PERMISSION_READ | PERMISSION_WRITE); LocalMemSpace *space = new LocalMemSpace(&osHeapAlloc); space->top = pSpace->top; // Space is allocated in local areas from the top down. This area is full and // all data is in the old generation. The area can be recovered by a full GC. space->bottom = space->upperAllocPtr = space->lowerAllocPtr = space->fullGCLowerLimit = pSpace->bottom; space->isMutable = pSpace->isMutable; space->isCode = false; if (! space->bitmap.Create(space->top-space->bottom) || ! AddLocalSpace(space)) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Unable to convert saved state space %p into local space\n", pSpace); return false; } if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Converted saved state space %p into local %smutable space %p\n", pSpace, pSpace->isMutable ? "im": "", space); currentHeapSize += space->spaceSize(); globalStats.setSize(PSS_TOTAL_HEAP, currentHeapSize * sizeof(PolyWord)); } i = pSpaces.erase(i); } catch (std::bad_alloc&) { return false; } } } // Save newly exported spaces. for(std::vector::iterator j = eSpaces.begin(); j < eSpaces.end(); j++) { PermanentMemSpace *space = *j; space->hierarchy = hierarchy; // Set the hierarchy of the new spaces. space->spaceType = ST_PERMANENT; // Put a dummy object to fill up the unused space. if (space->topPointer != space->top) - FillUnusedSpace(space->topPointer, space->top - space->topPointer); + FillUnusedSpace(space->writeAble(space->topPointer), space->top - space->topPointer); // Put in a dummy object to fill the rest of the space. pSpaces.push_back(space); } eSpaces.clear(); return true; } // Before we import a hierarchical saved state we need to turn any previously imported // spaces into local spaces. bool MemMgr::DemoteImportSpaces() { return PromoteExportSpaces(1); // Only truly permanent spaces are retained. } // Return the space for a given index PermanentMemSpace *MemMgr::SpaceForIndex(unsigned index) { for (std::vector::iterator i = pSpaces.begin(); i < pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->index == index) return space; } return NULL; } // In several places we assume that segments are filled with valid // objects. This fills unused memory with one or more "byte" objects. void MemMgr::FillUnusedSpace(PolyWord *base, uintptr_t words) { PolyWord *pDummy = base+1; while (words > 0) { #ifdef POLYML32IN64 // Make sure that any dummy object we insert is properly aligned. if (((uintptr_t)pDummy) & 4) { *pDummy++ = PolyWord::FromUnsigned(0); words--; continue; } #endif POLYUNSIGNED oSize; // If the space is larger than the maximum object size // we will need several objects. if (words > MAX_OBJECT_SIZE) oSize = MAX_OBJECT_SIZE; else oSize = (POLYUNSIGNED)(words-1); // Make this a byte object so it's always skipped. ((PolyObject*)pDummy)->SetLengthWord(oSize, F_BYTE_OBJ); words -= oSize+1; pDummy += oSize+1; } } // Allocate an area of the heap of at least minWords and at most maxWords. // This is used both when allocating single objects (when minWords and maxWords // are the same) and when allocating heap segments. If there is insufficient // space to satisfy the minimum it will return 0. PolyWord *MemMgr::AllocHeapSpace(uintptr_t minWords, uintptr_t &maxWords, bool doAllocation) { PLocker locker(&allocLock); // We try to distribute the allocations between the memory spaces // so that at the next GC we don't have all the most recent cells in // one space. The most recent cells will be more likely to survive a // GC so distibuting them improves the load balance for a multi-thread GC. nextAllocator++; if (nextAllocator > gMem.lSpaces.size()) nextAllocator = 0; unsigned j = nextAllocator; for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++) { if (j >= gMem.lSpaces.size()) j = 0; LocalMemSpace *space = gMem.lSpaces[j++]; if (space->allocationSpace) { uintptr_t available = space->freeSpace(); if (available > 0 && available >= minWords) { // Reduce the maximum value if we had less than that. if (available < maxWords) maxWords = available; #ifdef POLYML32IN64 // If necessary round down to an even boundary if (maxWords & 1) { maxWords--; space->lowerAllocPtr[maxWords] = PolyWord::FromUnsigned(0); } #endif PolyWord *result = space->lowerAllocPtr; // Return the address. if (doAllocation) space->lowerAllocPtr += maxWords; // Allocate it. #ifdef POLYML32IN64 ASSERT((uintptr_t)result & 4); // Must be odd-word aligned #endif return result; } } } // There isn't space in the existing areas - can we create a new area? // The reason we don't have enough space could simply be that we want to // allocate an object larger than the default space size. Try deleting // some other spaces to bring currentAllocSpace below spaceBeforeMinorGC - minWords. if (minWords > defaultSpaceSize && minWords < spaceBeforeMinorGC) RemoveExcessAllocation(spaceBeforeMinorGC - minWords); if (currentAllocSpace/* + minWords */ < spaceBeforeMinorGC) { // i.e. the current allocation space is less than the space allowed for the minor GC // but it may be that allocating this object will take us over the limit. We allow // that to happen so that we can successfully allocate very large objects even if // we have a new GC very shortly. uintptr_t spaceSize = defaultSpaceSize; #ifdef POLYML32IN64 // When we create the allocation space we take one word so that the first // length word is on an odd-word boundary. We need to allow for that otherwise // we may have available < minWords. if (minWords >= spaceSize) spaceSize = minWords+1; // If we really want a large space. #else if (minWords > spaceSize) spaceSize = minWords; // If we really want a large space. #endif LocalMemSpace *space = CreateAllocationSpace(spaceSize); if (space == 0) return 0; // Can't allocate it // Allocate our space in this new area. uintptr_t available = space->freeSpace(); ASSERT(available >= minWords); if (available < maxWords) { maxWords = available; #ifdef POLYML32IN64 // If necessary round down to an even boundary if (maxWords & 1) { maxWords--; space->lowerAllocPtr[maxWords] = PolyWord::FromUnsigned(0); } #endif } PolyWord *result = space->lowerAllocPtr; // Return the address. if (doAllocation) space->lowerAllocPtr += maxWords; // Allocate it. #ifdef POLYML32IN64 ASSERT((uintptr_t)result & 4); // Must be odd-word aligned #endif return result; } return 0; // There isn't space even for the minimum. } -CodeSpace::CodeSpace(PolyWord *start, uintptr_t spaceSize, OSMem *alloc): MarkableSpace(alloc) +CodeSpace::CodeSpace(PolyWord *start, PolyWord *shadow, uintptr_t spaceSize, OSMem *alloc): MarkableSpace(alloc) { bottom = start; + shadowSpace = shadow; top = start+spaceSize; isMutable = true; // Make it mutable just in case. This will cause it to be scanned. isCode = true; spaceType = ST_CODE; #ifdef POLYML32IN64 // Dummy word so that the cell itself, after the length word, is on an 8-byte boundary. - *start = PolyWord::FromUnsigned(0); + writeAble(start)[0] = PolyWord::FromUnsigned(0); largestFree = spaceSize - 2; firstFree = start+1; #else largestFree = spaceSize - 1; firstFree = start; #endif } CodeSpace *MemMgr::NewCodeSpace(uintptr_t size) { // Allocate a new area and add it at the end of the table. CodeSpace *allocSpace = 0; // Allocate a new mutable, code space. N.B. This may round up "actualSize". size_t actualSize = size * sizeof(PolyWord); void* shadow; PolyWord *mem = (PolyWord*)osCodeAlloc.AllocateCodeArea(actualSize, shadow); if (mem != 0) { try { - allocSpace = new CodeSpace(mem, actualSize / sizeof(PolyWord), &osCodeAlloc); + allocSpace = new CodeSpace(mem, (PolyWord*)shadow, actualSize / sizeof(PolyWord), &osCodeAlloc); allocSpace->shadowSpace = (PolyWord*)shadow; if (!allocSpace->headerMap.Create(allocSpace->spaceSize())) { delete allocSpace; allocSpace = 0; } else if (!AddCodeSpace(allocSpace)) { delete allocSpace; allocSpace = 0; } else if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New code space %p allocated at %p size %lu\n", allocSpace, allocSpace->bottom, allocSpace->spaceSize()); // Put in a byte cell to mark the area as unallocated. - FillUnusedSpace(allocSpace->firstFree, allocSpace->top- allocSpace->firstFree); + FillUnusedSpace(allocSpace->writeAble(allocSpace->firstFree), allocSpace->top- allocSpace->firstFree); } catch (std::bad_alloc&) { } if (allocSpace == 0) { osCodeAlloc.FreeCodeArea(mem, shadow, actualSize); mem = 0; } } return allocSpace; } // Allocate memory for a piece of code. This needs to be both mutable and executable, // at least for native code. The interpreted version need not (should not?) make the // area executable. It will not be executed until the mutable bit has been cleared. // Once code is allocated it is not GCed or moved. // initCell is a byte cell that is copied into the new code area. PolyObject* MemMgr::AllocCodeSpace(POLYUNSIGNED requiredSize) { PLocker locker(&codeSpaceLock); // Search the code spaces until we find a free area big enough. size_t i = 0; while (true) { if (i != cSpaces.size()) { CodeSpace *space = cSpaces[i]; if (space->largestFree >= requiredSize) { POLYUNSIGNED actualLargest = 0; while (space->firstFree < space->top) { PolyObject *obj = (PolyObject*)(space->firstFree+1); // Skip over allocated areas or free areas that are too small. if (obj->IsCodeObject() || obj->Length() < 8) space->firstFree += obj->Length()+1; else break; } PolyWord *pt = space->firstFree; while (pt < space->top) { PolyObject *obj = (PolyObject*)(pt+1); POLYUNSIGNED length = obj->Length(); if (obj->IsByteObject()) { if (length >= requiredSize) { // Free and large enough PolyWord *next = pt+requiredSize+1; POLYUNSIGNED spare = length - requiredSize; #ifdef POLYML32IN64 // Maintain alignment. if (((requiredSize + 1) & 1) && spare != 0) { - *next++ = PolyWord::FromUnsigned(0); + space->writeAble(next++)[0] = PolyWord::FromUnsigned(0); spare--; } #endif if (spare != 0) - FillUnusedSpace(next, spare); + FillUnusedSpace(space->writeAble(next), spare); space->isMutable = true; // Set this - it ensures the area is scanned on GC. space->headerMap.SetBit(pt-space->bottom); // Set the "header" bit // Set the length word of the code area and copy the byte cell in. // The code bit must be set before the lock is released to ensure // another thread doesn't reuse this. - obj->SetLengthWord(requiredSize, F_CODE_OBJ|F_MUTABLE_BIT); + space->writeAble(obj)->SetLengthWord(requiredSize, F_CODE_OBJ|F_MUTABLE_BIT); return obj; } else if (length >= actualLargest) actualLargest = length+1; } pt += length+1; } // Reached the end without finding what we wanted. Update the largest size. space->largestFree = actualLargest; } i++; // Next area } else { // Allocate a new area and add it at the end of the table. uintptr_t spaceSize = requiredSize + 1; #ifdef POLYML32IN64 // We need to allow for the extra alignment word otherwise we // may allocate less than we need. spaceSize += 1; #endif CodeSpace *allocSpace = NewCodeSpace(spaceSize); if (allocSpace == 0) return 0; // Try a GC. globalStats.incSize(PSS_CODE_SPACE, allocSpace->spaceSize() * sizeof(PolyWord)); } } } // Remove code areas that are completely empty. This is probably better than waiting to reuse them. // It's particularly important if we reload a saved state because the code areas for old saved states // are made into local code areas just in case they are currently in use or reachable. void MemMgr::RemoveEmptyCodeAreas() { for (std::vector::iterator i = cSpaces.begin(); i != cSpaces.end(); ) { CodeSpace *space = *i; PolyObject *start = (PolyObject *)(space->bottom+1); if (start->IsByteObject() && start->Length() == space->spaceSize()-1) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Deleted code space %p at %p size %zu\n", space, space->bottom, space->spaceSize()); globalStats.decSize(PSS_CODE_SPACE, space->spaceSize() * sizeof(PolyWord)); // We have an empty cell that fills the whole space. RemoveTree(space); delete(space); i = cSpaces.erase(i); } else i++; } } // Add a code space to the tables. Used both for newly compiled code and also demoted saved spaces. bool MemMgr::AddCodeSpace(CodeSpace *space) { try { AddTree(space); cSpaces.push_back(space); } catch (std::exception&) { RemoveTree(space); return false; } return true; } // Check that we have sufficient space for an allocation to succeed. // Called from the GC to ensure that we will not get into an infinite // loop trying to allocate, failing and garbage-collecting again. bool MemMgr::CheckForAllocation(uintptr_t words) { uintptr_t allocated = 0; return AllocHeapSpace(words, allocated, false) != 0; } // Adjust the allocation area by removing free areas so that the total // size of the allocation area is less than the required value. This // is used after the quick GC and also if we need to allocate a large // object. void MemMgr::RemoveExcessAllocation(uintptr_t words) { // First remove any non-standard allocation areas. for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end();) { LocalMemSpace *space = *i; if (space->allocationSpace && space->isEmpty() && space->spaceSize() != defaultSpaceSize) DeleteLocalSpace(i); else i++; } for (std::vector::iterator i = lSpaces.begin(); currentAllocSpace > words && i < lSpaces.end(); ) { LocalMemSpace *space = *i; if (space->allocationSpace && space->isEmpty()) DeleteLocalSpace(i); else i++; } } // Return number of words free in all allocation spaces. uintptr_t MemMgr::GetFreeAllocSpace() { uintptr_t freeSpace = 0; PLocker lock(&allocLock); for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++) { LocalMemSpace *space = *i; if (space->allocationSpace) freeSpace += space->freeSpace(); } return freeSpace; } StackSpace *MemMgr::NewStackSpace(uintptr_t size) { PLocker lock(&stackSpaceLock); try { StackSpace *space = new StackSpace(&osStackAlloc); size_t iSpace = size*sizeof(PolyWord); space->bottom = (PolyWord*)osStackAlloc.AllocateDataArea(iSpace); if (space->bottom == 0) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New stack space: insufficient space\n"); delete space; return 0; } // The size may have been rounded up to a block boundary. size = iSpace/sizeof(PolyWord); space->top = space->bottom + size; space->spaceType = ST_STACK; space->isMutable = true; // Add the stack space to the tree. This ensures that operations such as // LocalSpaceForAddress will work for addresses within the stack. We can // get them in the RTS with functions such as quot_rem and exception stack. // It's not clear whether they really appear in the GC. try { AddTree(space); sSpaces.push_back(space); } catch (std::exception&) { RemoveTree(space); delete space; return 0; } if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New stack space %p allocated at %p size %lu\n", space, space->bottom, space->spaceSize()); globalStats.incSize(PSS_STACK_SPACE, space->spaceSize() * sizeof(PolyWord)); return space; } catch (std::bad_alloc&) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: New stack space: \"new\" failed\n"); return 0; } } // If checkmem is given write protect the immutable areas except during a GC. void MemMgr::ProtectImmutable(bool on) { if (debugOptions & DEBUG_CHECK_OBJECTS) { for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++) { LocalMemSpace *space = *i; if (!space->isMutable) { if (!space->isCode) osHeapAlloc.EnableWrite(!on, space->bottom, (char*)space->top - (char*)space->bottom); } } } } bool MemMgr::GrowOrShrinkStack(TaskData *taskData, uintptr_t newSize) { StackSpace *space = taskData->stack; size_t iSpace = newSize*sizeof(PolyWord); PolyWord *newSpace = (PolyWord*)osStackAlloc.AllocateDataArea(iSpace); if (newSpace == 0) { if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Unable to change size of stack %p from %lu to %lu: insufficient space\n", space, space->spaceSize(), newSize); return false; } // The size may have been rounded up to a block boundary. newSize = iSpace/sizeof(PolyWord); try { AddTree(space, newSpace, newSpace+newSize); } catch (std::bad_alloc&) { RemoveTree(space, newSpace, newSpace+newSize); delete space; return 0; } taskData->CopyStackFrame(space->stack(), space->spaceSize(), (StackObject*)newSpace, newSize); if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Size of stack %p changed from %lu to %lu at %p\n", space, space->spaceSize(), newSize, newSpace); globalStats.incSize(PSS_STACK_SPACE, (newSize - space->spaceSize()) * sizeof(PolyWord)); RemoveTree(space); // Remove it BEFORE freeing the space - another thread may allocate it PolyWord *oldBottom = space->bottom; size_t oldSize = (char*)space->top - (char*)space->bottom; space->bottom = newSpace; // Switch this before freeing - We could get a profile trap during the free space->top = newSpace+newSize; osStackAlloc.FreeDataArea(oldBottom, oldSize); return true; } // Delete a stack when a thread has finished. // This can be called by an ML thread so needs an interlock. bool MemMgr::DeleteStackSpace(StackSpace *space) { PLocker lock(&stackSpaceLock); for (std::vector::iterator i = sSpaces.begin(); i < sSpaces.end(); i++) { if (*i == space) { globalStats.decSize(PSS_STACK_SPACE, space->spaceSize() * sizeof(PolyWord)); RemoveTree(space); delete space; sSpaces.erase(i); if (debugOptions & DEBUG_MEMMGR) Log("MMGR: Deleted stack space %p at %p size %zu\n", space, space->bottom, space->spaceSize()); return true; } } ASSERT(false); // It should always be in the table. return false; } SpaceTreeTree::SpaceTreeTree(): SpaceTree(false) { for (unsigned i = 0; i < 256; i++) tree[i] = 0; } SpaceTreeTree::~SpaceTreeTree() { for (unsigned i = 0; i < 256; i++) { if (tree[i] && ! tree[i]->isSpace) delete(tree[i]); } } // Add and remove entries in the space tree. void MemMgr::AddTree(MemSpace *space, PolyWord *startS, PolyWord *endS) { // It isn't clear we need to lock here but it's probably sensible. PLocker lock(&spaceTreeLock); AddTreeRange(&spaceTree, space, (uintptr_t)startS, (uintptr_t)endS); } void MemMgr::RemoveTree(MemSpace *space, PolyWord *startS, PolyWord *endS) { PLocker lock(&spaceTreeLock); RemoveTreeRange(&spaceTree, space, (uintptr_t)startS, (uintptr_t)endS); } void MemMgr::AddTreeRange(SpaceTree **tt, MemSpace *space, uintptr_t startS, uintptr_t endS) { if (*tt == 0) *tt = new SpaceTreeTree; ASSERT(! (*tt)->isSpace); SpaceTreeTree *t = (SpaceTreeTree*)*tt; const unsigned shift = (sizeof(void*)-1) * 8; // Takes the high-order byte uintptr_t r = startS >> shift; ASSERT(r < 256); const uintptr_t s = endS == 0 ? 256 : endS >> shift; ASSERT(s >= r && s <= 256); if (r == s) // Wholly within this entry AddTreeRange(&(t->tree[r]), space, startS << 8, endS << 8); else { // Deal with any remainder at the start. if ((r << shift) != startS) { AddTreeRange(&(t->tree[r]), space, startS << 8, 0 /*End of range*/); r++; } // Whole entries. while (r < s) { ASSERT(t->tree[r] == 0); t->tree[r] = space; r++; } // Remainder at the end. if ((s << shift) != endS) AddTreeRange(&(t->tree[r]), space, 0, endS << 8); } } // Remove an entry from the tree for a range. Strictly speaking we don't need the // space argument here but it's useful as a check. // This may be called to remove a partially installed structure if we have // run out of space in AddTreeRange. void MemMgr::RemoveTreeRange(SpaceTree **tt, MemSpace *space, uintptr_t startS, uintptr_t endS) { SpaceTreeTree *t = (SpaceTreeTree*)*tt; if (t == 0) return; // This can only occur if we're recovering. ASSERT(! t->isSpace); const unsigned shift = (sizeof(void*)-1) * 8; uintptr_t r = startS >> shift; const uintptr_t s = endS == 0 ? 256 : endS >> shift; if (r == s) RemoveTreeRange(&(t->tree[r]), space, startS << 8, endS << 8); else { // Deal with any remainder at the start. if ((r << shift) != startS) { RemoveTreeRange(&(t->tree[r]), space, startS << 8, 0); r++; } // Whole entries. while (r < s) { ASSERT(t->tree[r] == space || t->tree[r] == 0 /* Recovery only */); t->tree[r] = 0; r++; } // Remainder at the end. if ((s << shift) != endS) RemoveTreeRange(&(t->tree[r]), space, 0, endS << 8); } // See if the whole vector is now empty. for (unsigned j = 0; j < 256; j++) { if (t->tree[j]) return; // It's not empty - we're done. } delete(t); *tt = 0; } uintptr_t MemMgr::AllocatedInAlloc() { uintptr_t inAlloc = 0; for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++) { LocalMemSpace *sp = *i; if (sp->allocationSpace) inAlloc += sp->allocatedSpace(); } return inAlloc; } // Report heap sizes and occupancy before and after GC void MemMgr::ReportHeapSizes(const char *phase) { uintptr_t alloc = 0, nonAlloc = 0, inAlloc = 0, inNonAlloc = 0; for (std::vector::iterator i = lSpaces.begin(); i < lSpaces.end(); i++) { LocalMemSpace *sp = *i; if (sp->allocationSpace) { alloc += sp->spaceSize(); inAlloc += sp->allocatedSpace(); } else { nonAlloc += sp->spaceSize(); inNonAlloc += sp->allocatedSpace(); } } Log("Heap: %s Major heap used ", phase); LogSize(inNonAlloc); Log(" of "); LogSize(nonAlloc); Log(" (%1.0f%%). Alloc space used ", (float)inNonAlloc / (float)nonAlloc * 100.0F); LogSize(inAlloc); Log(" of "); LogSize(alloc); Log(" (%1.0f%%). Total space ", (float)inAlloc / (float)alloc * 100.0F); LogSize(spaceForHeap); Log(" %1.0f%% full.\n", (float)(inAlloc + inNonAlloc) / (float)spaceForHeap * 100.0F); Log("Heap: Local spaces %" PRI_SIZET ", permanent spaces %" PRI_SIZET ", code spaces %" PRI_SIZET ", stack spaces %" PRI_SIZET "\n", lSpaces.size(), pSpaces.size(), cSpaces.size(), sSpaces.size()); uintptr_t cTotal = 0, cOccupied = 0; for (std::vector::iterator c = cSpaces.begin(); c != cSpaces.end(); c++) { cTotal += (*c)->spaceSize(); PolyWord *pt = (*c)->bottom; while (pt < (*c)->top) { pt++; PolyObject *obj = (PolyObject*)pt; if (obj->ContainsForwardingPtr()) { #ifdef POLYML32IN64 // This is relative to globalCodeBase not globalHeapBase while (obj->ContainsForwardingPtr()) obj = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); #else obj = obj->FollowForwardingChain(); #endif pt += obj->Length(); } else { if (obj->IsCodeObject()) cOccupied += obj->Length() + 1; pt += obj->Length(); } } } Log("Heap: Code area: total "); LogSize(cTotal); Log(" occupied: "); LogSize(cOccupied); Log("\n"); uintptr_t stackSpace = 0; for (std::vector::iterator s = sSpaces.begin(); s != sSpaces.end(); s++) { stackSpace += (*s)->spaceSize(); } Log("Heap: Stack area: total "); LogSize(stackSpace); Log("\n"); } // Profiling - Find a code object or return zero if not found. // This can be called on a "user" thread. PolyObject *MemMgr::FindCodeObject(const byte *addr) { MemSpace *space = SpaceForAddress(addr); if (space == 0) return 0; Bitmap *profMap = 0; if (! space->isCode) return 0; if (space->spaceType == ST_CODE) { CodeSpace *cSpace = (CodeSpace*)space; profMap = &cSpace->headerMap; } else if (space->spaceType == ST_PERMANENT) { PermanentMemSpace *pSpace = (PermanentMemSpace*)space; profMap = &pSpace->profileCode; } else return 0; // Must be in code or permanent code. // For the permanent areas the header maps are created and initialised on demand. if (! profMap->Created()) { PLocker lock(&codeBitmapLock); if (! profMap->Created()) // Second check now we've got the lock. { // Create the bitmap. If it failed just say "not in this area" if (! profMap->Create(space->spaceSize())) return 0; // Set the first bit before releasing the lock. profMap->SetBit(0); } } // A bit is set if it is a length word. while ((uintptr_t)addr & (sizeof(POLYUNSIGNED)-1)) addr--; // Make it word aligned PolyWord *wordAddr = (PolyWord*)addr; // Work back to find the first set bit before this. // Normally we will find one but if we're looking up a value that // is actually an integer it might be in a piece of code that is now free. uintptr_t bitOffset = profMap->FindLastSet(wordAddr - space->bottom); if (space->spaceType == ST_CODE) { PolyWord *ptr = space->bottom+bitOffset; if (ptr >= space->top) return 0; // This will find the last non-free code cell or the first cell. // Return zero if the value was not actually in the cell or it wasn't code. PolyObject *obj = (PolyObject*)(ptr+1); #ifdef POLYML32IN64 PolyObject *lastObj = obj; // This is relative to globalCodeBase not globalHeapBase. while (lastObj->ContainsForwardingPtr()) lastObj = (PolyObject*)(globalCodeBase + ((lastObj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); #else PolyObject *lastObj = obj->FollowForwardingChain(); #endif // We normally replace forwarding pointers but when scanning to update // addresses after a saved state we may not have yet done that. if (wordAddr > ptr && wordAddr < ptr + 1 + lastObj->Length() && lastObj->IsCodeObject()) return obj; else return 0; } // Permanent area - the bits are set on demand. // Now work forward, setting any bits if necessary. We don't need a lock // because this is monotonic. for (;;) { PolyWord *ptr = space->bottom+bitOffset; if (ptr >= space->top) return 0; PolyObject *obj = (PolyObject*)(ptr+1); ASSERT(obj->ContainsNormalLengthWord()); if (wordAddr > ptr && wordAddr < ptr + obj->Length()) return obj; bitOffset += obj->Length()+1; profMap->SetBit(bitOffset); } return 0; } // Remove profiling bitmaps from permanent areas to free up memory. void MemMgr::RemoveProfilingBitmaps() { for (std::vector::iterator i = pSpaces.begin(); i < pSpaces.end(); i++) (*i)->profileCode.Destroy(); } #ifdef POLYML32IN64DEBUG POLYOBJECTPTR PolyWord::AddressToObjectPtr(void *address) { ASSERT(address >= globalHeapBase); uintptr_t offset = (PolyWord*)address - globalHeapBase; ASSERT(offset <= 0x7fffffff); // Currently limited to 8Gbytes ASSERT((offset & 1) == 0); return (POLYOBJECTPTR)offset; } #endif MemMgr gMem; // The one and only memory manager object diff --git a/libpolyml/memmgr.h b/libpolyml/memmgr.h index 4f65a90d..2bcc968e 100644 --- a/libpolyml/memmgr.h +++ b/libpolyml/memmgr.h @@ -1,406 +1,423 @@ /* Title: memmgr.h Memory segment manager Copyright (c) 2006-8, 2010-12, 2016-18 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef MEMMGR_H #define MEMMGR_H #include "bitmap.h" #include "locking.h" #include "osmem.h" #include // utility conversion macros #define Words_to_K(w) (w*sizeof(PolyWord))/1024 #define Words_to_M(w) (w*sizeof(PolyWord))/(1<<20) #define B_to_M(b) (b/(1<<20)) class ScanAddress; class GCTaskId; class TaskData; typedef enum { ST_PERMANENT, // Permanent areas are part of the object code // Also loaded saved state. ST_LOCAL, // Local heaps contain volatile data ST_EXPORT, // Temporary export area ST_STACK, // ML Stack for a thread ST_CODE // Code created in the current run } SpaceType; // B-tree used in SpaceForAddress. Leaves are MemSpaces. class SpaceTree { public: SpaceTree(bool is): isSpace(is) { } virtual ~SpaceTree() {} bool isSpace; }; // A non-leaf node in the B-tree class SpaceTreeTree: public SpaceTree { public: SpaceTreeTree(); virtual ~SpaceTreeTree(); SpaceTree *tree[256]; }; // Base class for the various memory spaces. class MemSpace: public SpaceTree { protected: MemSpace(OSMem *alloc); virtual ~MemSpace(); public: SpaceType spaceType; bool isMutable; bool isCode; PolyWord *bottom; // Bottom of area PolyWord *top; // Top of area. OSMem *allocator; // Used to free the area. May be null. PolyWord *shadowSpace; // Extra writable area for code if necessary uintptr_t spaceSize(void)const { return top-bottom; } // No of words // These next two are used in the GC to limit scanning for // weak refs. PolyWord *lowestWeak, *highestWeak; // Used when printing debugging info virtual const char *spaceTypeString() { return isMutable ? "mutable" : "immutable"; } + // Return the writeable address if this is in read-only code. + byte* writeAble(byte* p) { + if (shadowSpace != 0) + return (p - (byte*)bottom + (byte*)shadowSpace); + else return p; + } + + PolyWord* writeAble(PolyWord* p) { + if (shadowSpace != 0) + return (p - bottom + shadowSpace); + else return p; + } + + PolyObject* writeAble(PolyObject* p) { + return (PolyObject*)writeAble((PolyWord *) p); + } + friend class MemMgr; }; // Permanent memory space. Either linked into the executable program or // loaded from a saved state file. class PermanentMemSpace: public MemSpace { protected: PermanentMemSpace(OSMem *alloc): MemSpace(alloc), index(0), hierarchy(0), noOverwrite(false), byteOnly(false), topPointer(0) {} public: unsigned index; // An identifier for the space. Used when saving and loading. unsigned hierarchy; // The hierarchy number: 0=from executable, 1=top level saved state, ... bool noOverwrite; // Don't save this in deeper hierarchies. bool byteOnly; // Only contains byte data - no need to scan for addresses. // When exporting or saving state we copy data into a new area. // This area grows upwards unlike the local areas that grow down. PolyWord *topPointer; Bitmap shareBitmap; // Used in sharedata Bitmap profileCode; // Used when profiling friend class MemMgr; }; #define NSTARTS 10 // Markable spaces are used as the base class for local heap // spaces and code spaces. class MarkableSpace: public MemSpace { protected: MarkableSpace(OSMem *alloc); virtual ~MarkableSpace() {} public: PolyWord *fullGCRescanStart; // Upper and lower limits for rescan during mark phase. PolyWord *fullGCRescanEnd; PLock spaceLock; // Lock used to protect forwarding pointers }; // Local areas can be garbage collected. class LocalMemSpace: public MarkableSpace { protected: LocalMemSpace(OSMem *alloc); virtual ~LocalMemSpace() {} bool InitSpace(PolyWord *heapPtr, uintptr_t size, bool mut); public: // Allocation. The minor GC allocates at the bottom of the areas while the // major GC and initial allocations are made at the top. The reason for this // is that it's only possible to scan objects from the bottom up and the minor // GC combines scanning with allocation whereas the major GC compacts from the // bottom into the top of an area. PolyWord *upperAllocPtr; // Allocation pointer. Objects are allocated AFTER this. PolyWord *lowerAllocPtr; // Allocation pointer. Objects are allocated BEFORE this. PolyWord *fullGCLowerLimit;// Lowest object in area before copying. PolyWord *partialGCTop; // Value of upperAllocPtr before the current partial GC. PolyWord *partialGCScan; // Scan pointer used in minor GC PolyWord *partialGCRootBase; // Start of the root objects. PolyWord *partialGCRootTop;// Value of lowerAllocPtr after the roots have been copied. GCTaskId *spaceOwner; // The thread that "owns" this space during a GC. Bitmap bitmap; /* bitmap with one bit for each word in the GC area. */ PLock bitmapLock; // Lock used in GC sharing pass. bool allocationSpace; // True if this is (mutable) space for initial allocation uintptr_t start[NSTARTS]; /* starting points for bit searches. */ unsigned start_index; /* last index used to index start array */ uintptr_t i_marked; /* count of immutable words marked. */ uintptr_t m_marked; /* count of mutable words marked. */ uintptr_t updated; /* count of words updated. */ uintptr_t allocatedSpace(void)const // Words allocated { return (top-upperAllocPtr) + (lowerAllocPtr-bottom); } uintptr_t freeSpace(void)const // Words free { return upperAllocPtr-lowerAllocPtr; } #ifdef POLYML32IN64 // We will generally set a zero cell for alignment. bool isEmpty(void)const { return allocatedSpace() <= 1; } #else bool isEmpty(void)const { return allocatedSpace() == 0; } #endif virtual const char *spaceTypeString() { return allocationSpace ? "allocation" : MemSpace::spaceTypeString(); } // Used when converting to and from bit positions in the bitmap uintptr_t wordNo(PolyWord *pt) { return pt - bottom; } PolyWord *wordAddr(uintptr_t bitno) { return bottom + bitno; } friend class MemMgr; }; class StackObject; // Abstract - Architecture specific // Stack spaces. These are managed by the thread module class StackSpace: public MemSpace { public: StackSpace(OSMem *alloc): MemSpace(alloc) { } StackObject *stack()const { return (StackObject *)bottom; } }; // Code Space. These contain local code created by the compiler. class CodeSpace: public MarkableSpace { public: - CodeSpace(PolyWord *start, uintptr_t spaceSize, OSMem *alloc); + CodeSpace(PolyWord *start, PolyWord *shadow, uintptr_t spaceSize, OSMem *alloc); Bitmap headerMap; // Map to find the headers during GC or profiling. uintptr_t largestFree; // The largest free space in the area PolyWord *firstFree; // The start of the first free space in the area. }; class MemMgr { public: MemMgr(); ~MemMgr(); bool Initialise(); // Create a local space for initial allocation. LocalMemSpace *CreateAllocationSpace(uintptr_t size); // Create and initialise a new local space and add it to the table. LocalMemSpace *NewLocalSpace(uintptr_t size, bool mut); // Create an entry for a permanent space. PermanentMemSpace *NewPermanentSpace(PolyWord *base, uintptr_t words, unsigned flags, unsigned index, unsigned hierarchy = 0); // Create a permanent space but allocate memory for it. // Sets bottom and top to the actual memory size. PermanentMemSpace *AllocateNewPermanentSpace(uintptr_t byteSize, unsigned flags, unsigned index, unsigned hierarchy = 0); // Called after an allocated permanent area has been filled in. bool CompletePermanentSpaceAllocation(PermanentMemSpace *space); // Delete a local space. Takes the iterator position in lSpaces and returns the // iterator after deletion. void DeleteLocalSpace(std::vector::iterator &iter); // Allocate an area of the heap of at least minWords and at most maxWords. // This is used both when allocating single objects (when minWords and maxWords // are the same) and when allocating heap segments. If there is insufficient // space to satisfy the minimum it will return 0. Updates "maxWords" with // the space actually allocated PolyWord *AllocHeapSpace(uintptr_t minWords, uintptr_t &maxWords, bool doAllocation = true); PolyWord *AllocHeapSpace(uintptr_t words) { uintptr_t allocated = words; return AllocHeapSpace(words, allocated); } CodeSpace *NewCodeSpace(uintptr_t size); // Allocate space for code. This is initially mutable to allow the code to be built. PolyObject *AllocCodeSpace(POLYUNSIGNED size); // Check that a subsequent allocation will succeed. Called from the GC to ensure bool CheckForAllocation(uintptr_t words); // If an allocation space has a lot of data left in it, particularly a single // large object we should turn it into a local area. void ConvertAllocationSpaceToLocal(LocalMemSpace *space); // Allocate space for the initial stack for a thread. The caller must // initialise the new stack. Returns 0 if allocation fails. StackSpace *NewStackSpace(uintptr_t size); // Adjust the space for a stack. Returns true if it succeeded. If it failed // it leaves the stack untouched. bool GrowOrShrinkStack(TaskData *taskData, uintptr_t newSize); // Delete a stack when a thread has finished. bool DeleteStackSpace(StackSpace *space); // Create and delete export spaces PermanentMemSpace *NewExportSpace(uintptr_t size, bool mut, bool noOv, bool code); void DeleteExportSpaces(void); bool PromoteExportSpaces(unsigned hierarchy); // Turn export spaces into permanent spaces. bool DemoteImportSpaces(void); // Turn previously imported spaces into local. PermanentMemSpace *SpaceForIndex(unsigned index); // Return the space for a given index // As a debugging check, write protect the immutable areas apart from during the GC. void ProtectImmutable(bool on); // Find a space that contains a given address. This is called for every cell // during a GC so needs to be fast., // N.B. This must be called on an address at the beginning or within the cell. // Generally that means with a pointer to the length word. Pointing at the // first "data" word may give the wrong result if the length is zero. MemSpace *SpaceForAddress(const void *pt) const { uintptr_t t = (uintptr_t)pt; SpaceTree *tr = spaceTree; // Each level of the tree is either a leaf or a vector of trees. unsigned j = sizeof(void *)*8; for (;;) { if (tr == 0 || tr->isSpace) return (MemSpace*)tr; j -= 8; tr = ((SpaceTreeTree*)tr)->tree[(t >> j) & 0xff]; } return 0; } // Find a local address for a space. // N.B. The argument should generally be the length word. See // comment on SpaceForAddress. LocalMemSpace *LocalSpaceForAddress(const void *pt) const { MemSpace *s = SpaceForAddress(pt); if (s != 0 && s->spaceType == ST_LOCAL) return (LocalMemSpace*)s; else return 0; } void SetReservation(uintptr_t words) { reservedSpace = words; } // In several places we assume that segments are filled with valid // objects. This fills unused memory with one or more "byte" objects. void FillUnusedSpace(PolyWord *base, uintptr_t words); // Return number of words of free space for stats. uintptr_t GetFreeAllocSpace(); // Remove unused local areas. void RemoveEmptyLocals(); // Remove unused code areas. void RemoveEmptyCodeAreas(); // Remove unused allocation areas to reduce the space below the limit. void RemoveExcessAllocation(uintptr_t words); void RemoveExcessAllocation() { RemoveExcessAllocation(spaceBeforeMinorGC); } // Table for permanent spaces std::vector pSpaces; // Table for local spaces std::vector lSpaces; // Table for export spaces std::vector eSpaces; // Table for stack spaces std::vector sSpaces; PLock stackSpaceLock; // Table for code spaces std::vector cSpaces; PLock codeSpaceLock; // Storage manager lock. PLock allocLock; // Lock for creating new bitmaps for code profiling PLock codeBitmapLock; unsigned nextIndex; // Used when allocating new permanent spaces. uintptr_t SpaceBeforeMinorGC() const { return spaceBeforeMinorGC; } uintptr_t SpaceForHeap() const { return spaceForHeap; } void SetSpaceBeforeMinorGC(uintptr_t minorSize) { spaceBeforeMinorGC = minorSize; } void SetSpaceForHeap(uintptr_t heapSize) { spaceForHeap = heapSize; } uintptr_t CurrentAllocSpace() { return currentAllocSpace; } uintptr_t AllocatedInAlloc(); uintptr_t CurrentHeapSize() { return currentHeapSize; } uintptr_t DefaultSpaceSize() const { return defaultSpaceSize; } void ReportHeapSizes(const char *phase); // Profiling - Find a code object or return zero if not found. PolyObject *FindCodeObject(const byte *addr); // Profiling - Free bitmaps to indicate start of an object. void RemoveProfilingBitmaps(); private: bool AddLocalSpace(LocalMemSpace *space); bool AddCodeSpace(CodeSpace *space); uintptr_t reservedSpace; unsigned nextAllocator; // The default size in words when creating new segments. uintptr_t defaultSpaceSize; // The number of words that can be used for initial allocation. uintptr_t spaceBeforeMinorGC; // The number of words that can be used for the heap uintptr_t spaceForHeap; // The current sizes of the allocation space and the total heap size. uintptr_t currentAllocSpace, currentHeapSize; // LocalSpaceForAddress is a hot-spot so we use a B-tree to convert addresses; SpaceTree *spaceTree; PLock spaceTreeLock; void AddTree(MemSpace *space) { AddTree(space, space->bottom, space->top); } void RemoveTree(MemSpace *space) { RemoveTree(space, space->bottom, space->top); } void AddTree(MemSpace *space, PolyWord *startS, PolyWord *endS); void RemoveTree(MemSpace *space, PolyWord *startS, PolyWord *endS); void AddTreeRange(SpaceTree **t, MemSpace *space, uintptr_t startS, uintptr_t endS); void RemoveTreeRange(SpaceTree **t, MemSpace *space, uintptr_t startS, uintptr_t endS); OSMem osHeapAlloc, osStackAlloc, osCodeAlloc; }; extern MemMgr gMem; #endif diff --git a/libpolyml/osmem.cpp b/libpolyml/osmem.cpp index 827d8d46..b5216b6e 100644 --- a/libpolyml/osmem.cpp +++ b/libpolyml/osmem.cpp @@ -1,467 +1,481 @@ /* Title: osomem.cpp - Interface to OS memory management Copyright (c) 2006, 2017-18, 2020 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_MMAN_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #include "osmem.h" #include "bitmap.h" #include "locking.h" // Linux prefers MAP_ANONYMOUS to MAP_ANON #ifndef MAP_ANON #ifdef MAP_ANONYMOUS #define MAP_ANON MAP_ANONYMOUS #endif #endif #ifdef POLYML32IN64 +OSMem::OSMem() +{ + memBase = 0; +} + bool OSMem::Initialise(bool requiresExecute, size_t space /* = 0 */, void **pBase /* = 0 */) { needExecute = requiresExecute; pageSize = PageSize(); memBase = (char*)ReserveHeap(space); if (memBase == 0) return 0; if (pBase != 0) *pBase = memBase; // Create a bitmap with a bit for each page. if (!pageMap.Create(space / pageSize)) return false; lastAllocated = space / pageSize; // Beyond the last page in the area // Set the last bit in the area so that we don't use it. // This is effectively a work-around for a problem with the heap. // If we have a zero-sized cell at the end of the memory its address is // going to be zero. This causes problems with forwarding pointers. // There may be better ways of doing this. pageMap.SetBit(space / pageSize - 1); return true; } void *OSMem::AllocateDataArea(size_t& space) { char *baseAddr; { PLocker l(&bitmapLock); uintptr_t pages = (space + pageSize - 1) / pageSize; // Round up to an integral number of pages. space = pages * pageSize; // Find some space while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area. lastAllocated--; uintptr_t free = pageMap.FindFree(0, lastAllocated, pages); if (free == lastAllocated) return 0; // Can't find the space. pageMap.SetBits(free, pages); // TODO: Do we need to zero this? It may have previously been set. baseAddr = memBase + free * pageSize; } return CommitPages(baseAddr, space, false); } bool OSMem::FreeDataArea(void *p, size_t space) { char *addr = (char*)p; uintptr_t offset = (addr - memBase) / pageSize; if (!UncommitPages(p, space)) return false; uintptr_t pages = space / pageSize; { PLocker l(&bitmapLock); pageMap.ClearBits(offset, pages); if (offset + pages > lastAllocated) // We allocate from the top down. lastAllocated = offset + pages; } return true; } void * OSMem::AllocateCodeArea(size_t& space, void*& shadowArea) { char* baseAddr; { PLocker l(&bitmapLock); uintptr_t pages = (space + pageSize - 1) / pageSize; // Round up to an integral number of pages. space = pages * pageSize; // Find some space while (pageMap.TestBit(lastAllocated - 1)) // Skip the wholly allocated area. lastAllocated--; uintptr_t free = pageMap.FindFree(0, lastAllocated, pages); if (free == lastAllocated) return 0; // Can't find the space. pageMap.SetBits(free, pages); // TODO: Do we need to zero this? It may have previously been set. baseAddr = memBase + free * pageSize; } void *dataArea = CommitPages(baseAddr, space, needExecute); shadowArea = dataArea; return dataArea; } bool OSMem::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space) { ASSERT(codeAddr == dataAddr); char* addr = (char*)codeAddr; uintptr_t offset = (addr - memBase) / pageSize; if (!UncommitPages(codeAddr, space)) return false; uintptr_t pages = space / pageSize; { PLocker l(&bitmapLock); pageMap.ClearBits(offset, pages); if (offset + pages > lastAllocated) // We allocate from the top down. lastAllocated = offset + pages; } return true; } #endif #if (defined(HAVE_MMAP) && defined(MAP_ANON)) // We don't use autoconf's test for mmap here because that tests for // file mapping. Instead the test simply tests for the presence of an mmap // function. // We also insist that the OS supports MAP_ANON or MAP_ANONYMOUS. Older // versions of Solaris required the use of /dev/zero instead. We don't // support that. #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_SYS_PARAM_H #include #endif // How do we get the page size? #ifndef HAVE_GETPAGESIZE #ifdef _SC_PAGESIZE #define getpagesize() sysconf(_SC_PAGESIZE) #else // If this fails we're stuck #define getpagesize() PAGESIZE #endif #endif #ifdef SOLARIS #define FIXTYPE (caddr_t) #else #define FIXTYPE #endif #ifdef POLYML32IN64 // Unix-specific implementation of the subsidiary functions. size_t OSMem::PageSize() { return getpagesize(); } void *OSMem::ReserveHeap(size_t space) { return mmap(0, space, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); } bool OSMem::UnreserveHeap(void *p, size_t space) { return munmap(FIXTYPE p, space) == 0; } void *OSMem::CommitPages(void *baseAddr, size_t space, bool allowExecute) { int prot = PROT_READ | PROT_WRITE; if (allowExecute) prot |= PROT_EXEC; if (mmap(baseAddr, space, prot, MAP_FIXED|MAP_PRIVATE|MAP_ANON, -1, 0) == MAP_FAILED) return 0; msync(baseAddr, space, MS_SYNC|MS_INVALIDATE); return baseAddr; } bool OSMem::UncommitPages(void *p, size_t space) { // Remap the pages as new entries. This should remove the old versions. if (mmap(p, space, PROT_NONE, MAP_FIXED|MAP_PRIVATE|MAP_ANON, -1, 0) == MAP_FAILED) return false; msync(p, space, MS_SYNC|MS_INVALIDATE); return true; } bool OSMem::EnableWrite(bool enable, void* p, size_t space) { int res = mprotect(FIXTYPE p, space, enable ? PROT_READ|PROT_WRITE: PROT_READ); return res != -1; } bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) { int prot = PROT_READ; if (needExecute) prot |= PROT_EXEC; int res = mprotect(FIXTYPE codeAddr, space, prot); return res != -1; } #else +OSMem::OSMem() +{ + allocPtr = 0; +} bool OSMem::Initialise(bool requiresExecute, size_t space /* = 0 */, void **pBase /* = 0 */) { needExecute = requiresExecute; pageSize = getpagesize(); return true; } // Allocate space and return a pointer to it. The size is the minimum // size requested and it is updated with the actual space allocated. // Returns NULL if it cannot allocate the space. void *OSMem::AllocateDataArea(size_t &space) { // Round up to an integral number of pages. space = (space + pageSize-1) & ~(pageSize-1); int fd = -1; // This value is required by FreeBSD. Linux doesn't care void *result = mmap(0, space, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, fd, 0); // Convert MAP_FAILED (-1) into NULL if (result == MAP_FAILED) return 0; return result; } // Release the space previously allocated. This must free the whole of // the segment. The space must be the size actually allocated. bool OSMem::FreeDataArea(void *p, size_t space) { return munmap(FIXTYPE p, space) == 0; } bool OSMem::EnableWrite(bool enable, void* p, size_t space) { int res = mprotect(FIXTYPE p, space, enable ? PROT_READ|PROT_WRITE: PROT_READ); return res != -1; } void *OSMem::AllocateCodeArea(size_t &space, void*& shadowArea) { // Round up to an integral number of pages. space = (space + pageSize-1) & ~(pageSize-1); int fd = -1; // This value is required by FreeBSD. Linux doesn't care int prot = PROT_READ | PROT_WRITE; if (needExecute) prot |= PROT_EXEC; void *result = mmap(0, space, prot, MAP_PRIVATE|MAP_ANON, fd, 0); // Convert MAP_FAILED (-1) into NULL if (result == MAP_FAILED) return 0; shadowArea = result; return result; } bool OSMem::FreeCodeArea(void *codeArea, void *dataArea, size_t space) { ASSERT(codeArea == dataArea); return munmap(FIXTYPE codeArea, space) == 0; } bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) { int prot = PROT_READ; if (needExecute) prot |= PROT_EXEC; int res = mprotect(FIXTYPE codeAddr, space, prot); return res != -1; } #endif #elif defined(_WIN32) // Use Windows memory management. #include #ifdef POLYML32IN64 // Windows-specific implementations of the subsidiary functions. size_t OSMem::PageSize() { // Get the page size and round up to that multiple. SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); // Get the page size. Put it in a size_t variable otherwise the rounding // up of "space" may go wrong on 64-bits. return sysInfo.dwPageSize; } void *OSMem::ReserveHeap(size_t space) { void *memBase = VirtualAlloc(0, space, MEM_RESERVE, PAGE_NOACCESS); if (memBase == 0) return 0; // We need the heap to be such that the top 32-bits are non-zero. if ((uintptr_t)memBase >= ((uintptr_t)1 << 32)) return memBase; // Allocate again. void *newSpace = ReserveHeap(space); UnreserveHeap(memBase, space); // Free the old area that isn't suitable. // Return what we got, or zero if it failed. return newSpace; } bool OSMem::UnreserveHeap(void *p, size_t space) { return VirtualFree(p, 0, MEM_RELEASE) == TRUE; } void *OSMem::CommitPages(void *baseAddr, size_t space, bool allowExecute) { return VirtualAlloc(baseAddr, space, MEM_COMMIT, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); } bool OSMem::UncommitPages(void *baseAddr, size_t space) { return VirtualFree(baseAddr, space, MEM_DECOMMIT) == TRUE; } bool OSMem::EnableWrite(bool enable, void* p, size_t space) { DWORD oldProtect; return VirtualProtect(p, space, enable ? PAGE_READWRITE : PAGE_READONLY, &oldProtect) == TRUE; } bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) { ASSERT(codeAddr == dataAddr); DWORD oldProtect; return VirtualProtect(codeAddr, space, needExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldProtect) == TRUE; } #else +OSMem::OSMem() +{ + allocPtr = 0; +} + bool OSMem::Initialise(bool requiresExecute, size_t space /* = 0 */, void **pBase /* = 0 */) { needExecute = requiresExecute; // Get the page size and round up to that multiple. SYSTEM_INFO sysInfo; GetSystemInfo(&sysInfo); // Get the page size. Put it in a size_t variable otherwise the rounding // up of "space" may go wrong on 64-bits. pageSize = sysInfo.dwPageSize; return true; } // Allocate space and return a pointer to it. The size is the minimum // size requested and it is updated with the actual space allocated. // Returns NULL if it cannot allocate the space. void *OSMem::AllocateDataArea(size_t &space) { space = (space + pageSize - 1) & ~(pageSize - 1); DWORD options = MEM_RESERVE | MEM_COMMIT; return VirtualAlloc(0, space, options, PAGE_READWRITE); } // Release the space previously allocated. This must free the whole of // the segment. The space must be the size actually allocated. bool OSMem::FreeDataArea(void *p, size_t space) { return VirtualFree(p, 0, MEM_RELEASE) == TRUE; } // Adjust the permissions on a segment. This must apply to the // whole of a segment. bool OSMem::EnableWrite(bool enable, void* p, size_t space) { DWORD oldProtect; return VirtualProtect(p, space, enable ? PAGE_READWRITE: PAGE_READONLY, &oldProtect) == TRUE; } void* OSMem::AllocateCodeArea(size_t& space, void*& shadowArea) { space = (space + pageSize - 1) & ~(pageSize - 1); DWORD options = MEM_RESERVE | MEM_COMMIT; void * dataAddr = VirtualAlloc(0, space, options, needExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE); shadowArea = dataAddr; return dataAddr; } bool OSMem::FreeCodeArea(void* codeAddr, void* dataAddr, size_t space) { ASSERT(codeAddr == dataAddr); return VirtualFree(codeAddr, 0, MEM_RELEASE) == TRUE; } bool OSMem::DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space) { ASSERT(codeAddr == dataAddr); DWORD oldProtect; return VirtualProtect(codeAddr, space, needExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldProtect) == TRUE; } #endif #else #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_MALLOC_H #include #endif #ifdef POLYML32IN64 #error "32 bit in 64-bits requires either mmap or VirtualAlloc" #endif // Use calloc to allocate the memory. Using calloc ensures the memory is // zeroed and is compatible with the other allocators. void *OSMem::Allocate(size_t &bytes, unsigned permissions) { return calloc(bytes, 1); } bool OSMem::Free(void *p, size_t/*space*/) { free(p); return true; } // We can't do this if we don't have mprotect. bool OSMem::SetPermissions(void *p, size_t space, unsigned permissions) { return true; // Let's hope this is all right. } #endif diff --git a/libpolyml/osmem.h b/libpolyml/osmem.h index b5a32231..0e8db884 100644 --- a/libpolyml/osmem.h +++ b/libpolyml/osmem.h @@ -1,96 +1,102 @@ /* Title: osomem.h - Interface to OS memory management Copyright (c) 2006, 2017-18, 2020 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef OS_MEM_H_INCLUDED #define OS_MEM_H_INCLUDED // We need size_t so include these two here. #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef POLYML32IN64 #include "bitmap.h" -#include "locking.h" #endif +#include "locking.h" + + // This class provides access to the memory management provided by the // operating system. It would be nice if we could always use malloc and // free for this but we need to have execute permission on the code // objects. class OSMem { public: - OSMem() {} + OSMem(); ~OSMem() {} bool Initialise(bool requiresExecute, size_t space = 0, void** pBase = 0); // Allocate space and return a pointer to it. The size is the minimum // size requested in bytes and it is updated with the actual space allocated. // Returns NULL if it cannot allocate the space. void *AllocateDataArea(size_t& bytes); // Release the space previously allocated. This must free the whole of // the segment. The space must be the size actually allocated. bool FreeDataArea(void* p, size_t space); // Enable/disable writing. This must apply to the whole of a segment. // Only for data areas. bool EnableWrite(bool enable, void* p, size_t space); // Allocate code area. Some systems will not allow both write and execute permissions // on the same page. On those systems we have to allocate two regions of shared memory, // one with read+execute permission and the other with read+write. void *AllocateCodeArea(size_t& bytes, void*& shadowArea); // Free the allocated areas. bool FreeCodeArea(void* codeAddr, void* dataAddr, size_t space); // Remove write access. This is used after the permanent code area has been created // either from importing a portable export file or copying the area in 32-in-64. bool DisableWriteForCode(void* codeAddr, void* dataAddr, size_t space); protected: size_t pageSize; bool needExecute; + // If we need to use dual areas because WRITE+EXECUTE permission is not allowed. + PLock allocLock; + size_t allocPtr; + #ifdef POLYML32IN64 size_t PageSize(); void* ReserveHeap(size_t space); bool UnreserveHeap(void* baseAddr, size_t space); void* CommitPages(void* baseAddr, size_t space, bool allowExecute); bool UncommitPages(void* baseAddr, size_t space); Bitmap pageMap; uintptr_t lastAllocated; char* memBase; PLock bitmapLock; #endif }; #endif diff --git a/libpolyml/pexport.cpp b/libpolyml/pexport.cpp index 6af4918c..ead4be7f 100644 --- a/libpolyml/pexport.cpp +++ b/libpolyml/pexport.cpp @@ -1,823 +1,826 @@ /* Title: Export and import memory in a portable format Author: David C. J. Matthews. Copyright (c) 2006-7, 2015-8 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR H PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_STDIO_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #include "globals.h" #include "pexport.h" #include "machine_dep.h" #include "scanaddrs.h" #include "run_time.h" #include "../polyexports.h" #include "version.h" #include "sys.h" #include "polystring.h" #include "memmgr.h" #include "rtsentry.h" #include "mpoly.h" // For polyStderr /* This file contains the code both to export the file and to import it in a new session. */ PExport::PExport() { } PExport::~PExport() { } // Get the index corresponding to an address. size_t PExport::getIndex(PolyObject *p) { // Binary chop to find the index from the address. size_t lower = 0, upper = pMap.size(); while (1) { ASSERT(lower < upper); size_t middle = (lower+upper)/2; ASSERT(middle < pMap.size()); if (p < pMap[middle]) { // Use lower to middle upper = middle; } else if (p > pMap[middle]) { // Use middle+1 to upper lower = middle+1; } else // Found it return middle; } } /* Get the index corresponding to an address. */ void PExport::printAddress(void *p) { fprintf(exportFile, "@%" PRI_SIZET "", getIndex((PolyObject*)p)); } void PExport::printValue(PolyWord q) { if (IS_INT(q) || q == PolyWord::FromUnsigned(0)) fprintf(exportFile, "%" POLYSFMT, UNTAGGED(q)); else printAddress(q.AsAddress()); } void PExport::printObject(PolyObject *p) { POLYUNSIGNED length = p->Length(); POLYUNSIGNED i; size_t myIndex = getIndex(p); fprintf(exportFile, "%" PRI_SIZET ":", myIndex); if (p->IsMutable()) putc('M', exportFile); if (OBJ_IS_NEGATIVE(p->LengthWord())) putc('N', exportFile); if (OBJ_IS_WEAKREF_OBJECT(p->LengthWord())) putc('W', exportFile); if (OBJ_IS_NO_OVERWRITE(p->LengthWord())) putc('V', exportFile); if (p->IsByteObject()) { if (p->IsMutable() && p->IsWeakRefObject()) { // This is either an entry point or a weak ref used in the FFI. // Clear the first word if (p->Length() == 1) p->Set(0, PolyWord::FromSigned(0)); // Weak ref else if (p->Length() > 1) *(uintptr_t*)p = 0; // Entry point } /* May be a string, a long format arbitrary precision number or a real number. */ PolyStringObject* ps = (PolyStringObject*)p; /* This is not infallible but it seems to be good enough to detect the strings. */ POLYUNSIGNED bytes = length * sizeof(PolyWord); if (length >= 2 && ps->length <= bytes - sizeof(POLYUNSIGNED) && ps->length > bytes - 2 * sizeof(POLYUNSIGNED)) { /* Looks like a string. */ fprintf(exportFile, "S%" POLYUFMT "|", ps->length); for (unsigned i = 0; i < ps->length; i++) { char ch = ps->chars[i]; fprintf(exportFile, "%02x", ch & 0xff); } } else { /* Not a string. May be an arbitrary precision integer. If the source and destination word lengths differ we could find that some long-format arbitrary precision numbers could be represented in the tagged short form or vice-versa. The former case might give rise to errors because when comparing two arbitrary precision numbers for equality we assume that they are not equal if they have different representation. The latter case could be a problem because we wouldn't know whether to convert the tagged form to long form, which would be correct if the value has type "int" or to truncate it which would be correct for "word". It could also be a real number but that doesn't matter if we recompile everything on the new machine. */ byte *u = (byte*)p; putc('B', exportFile); fprintf(exportFile, "%" PRI_SIZET "|", length*sizeof(PolyWord)); for (unsigned i = 0; i < (unsigned)(length*sizeof(PolyWord)); i++) { fprintf(exportFile, "%02x", u[i]); } } } else if (p->IsCodeObject()) { POLYUNSIGNED constCount, i; PolyWord *cp; ASSERT(! p->IsMutable() ); /* Work out the number of bytes in the code and the number of constants. */ p->GetConstSegmentForCode(cp, constCount); /* The byte count is the length of the segment minus the number of constants minus one for the constant count. It includes the marker word, byte count, profile count and, on the X86/64 at least, any non-address constants. These are actually word values. */ POLYUNSIGNED byteCount = (length - constCount - 1) * sizeof(PolyWord); fprintf(exportFile, "D%" POLYUFMT ",%" POLYUFMT "|", constCount, byteCount); // First the code. byte *u = (byte*)p; for (i = 0; i < byteCount; i++) fprintf(exportFile, "%02x", u[i]); putc('|', exportFile); // Now the constants. for (i = 0; i < constCount; i++) { printValue(cp[i]); if (i < constCount-1) putc(',', exportFile); } putc('|', exportFile); // Finally any constants in the code object. machineDependent->ScanConstantsWithinCode(p, this); } else // Ordinary objects, essentially tuples, or closures. { fprintf(exportFile, "%c%" POLYUFMT "|", p->IsClosureObject() ? 'L' : 'O', length); if (p->IsClosureObject()) { // The first word is always a code address. printAddress(*(PolyObject**)p); i = sizeof(PolyObject*)/sizeof(PolyWord); if (i < length) putc(',', exportFile); } else i = 0; while (i < length) { printValue(p->Get(i)); if (i < length-1) putc(',', exportFile); i++; } } fprintf(exportFile, "\n"); } /* This is called for each constant within the code. Print a relocation entry for the word and return a value that means that the offset is saved in original word. */ void PExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code) { PolyObject *p = GetConstantValue(addr, code); if (p == 0) return; // Don't put in tagged constants // Put in the byte offset and the relocation type code. POLYUNSIGNED offset = (POLYUNSIGNED)(addr - (byte*)base); ASSERT (offset < base->Length() * sizeof(POLYUNSIGNED)); fprintf(exportFile, "%" POLYUFMT ",%d,", (POLYUNSIGNED)(addr - (byte*)base), code); printAddress(p); // The value to plug in. fprintf(exportFile, " "); } void PExport::exportStore(void) { // We want the entries in pMap to be in ascending // order of address to make searching easy so we need to process the areas // in order of increasing address, which may not be the order in memTable. std::vector indexOrder; indexOrder.reserve(memTableEntries); for (size_t i = 0; i < memTableEntries; i++) { std::vector::iterator it; for (it = indexOrder.begin(); it != indexOrder.end(); it++) { if (memTable[*it].mtOriginalAddr >= memTable[i].mtOriginalAddr) break; } indexOrder.insert(it, i); } // Process the area in order of ascending address. for (std::vector::iterator i = indexOrder.begin(); i != indexOrder.end(); i++) { size_t index = *i; char *start = (char*)memTable[index].mtOriginalAddr; char *end = start + memTable[index].mtLength; for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); pMap.push_back(obj); p += length; } } /* Start writing the information. */ fprintf(exportFile, "Objects\t%" PRI_SIZET "\n", pMap.size()); fprintf(exportFile, "Root\t%" PRI_SIZET "\n", getIndex(rootFunction)); // Generate each of the areas. for (size_t i = 0; i < memTableEntries; i++) { char *start = (char*)memTable[i].mtOriginalAddr; char *end = start + memTable[i].mtLength; for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); #ifdef POLYML32IN64 // We may have filler cells to get the alignment right. // We mustn't try to print them. if (((uintptr_t)obj & 4) != 0 && length == 0) continue; #endif printObject(obj); p += length; } } fclose(exportFile); exportFile = NULL; } /* Import a portable export file and load it into memory. Creates "permanent" address entries in the global memory table. */ class SpaceAlloc { public: SpaceAlloc(unsigned *indexCtr, unsigned perms, POLYUNSIGNED def); PolyObject *NewObj(POLYUNSIGNED objWords); size_t defaultSize; PermanentMemSpace *memSpace; size_t used; unsigned permissions; unsigned *spaceIndexCtr; }; SpaceAlloc::SpaceAlloc(unsigned *indexCtr, unsigned perms, POLYUNSIGNED def) { permissions = perms; defaultSize = def; memSpace = 0; used = 0; spaceIndexCtr = indexCtr; } // Allocate a new object. May create a new space and add the old one to the permanent // memory table if this is exhausted. #ifndef POLYML32IN64 PolyObject *SpaceAlloc::NewObj(POLYUNSIGNED objWords) { if (memSpace == 0 || memSpace->spaceSize() - used <= objWords) { // Need some more space. size_t size = defaultSize; if (size <= objWords) size = objWords+1; memSpace = gMem.AllocateNewPermanentSpace(size * sizeof(PolyWord), permissions, *spaceIndexCtr); (*spaceIndexCtr)++; // The memory is writable until CompletePermanentSpaceAllocation is called if (memSpace == 0) { fprintf(polyStderr, "Unable to allocate memory\n"); return 0; } used = 0; } ASSERT(memSpace->spaceSize() - used > objWords); PolyObject *newObj = (PolyObject*)(memSpace->bottom + used+1); used += objWords+1; return newObj; } #else // With 32in64 we need to allocate on 8-byte boundaries. PolyObject *SpaceAlloc::NewObj(POLYUNSIGNED objWords) { size_t rounded = objWords; if ((objWords & 1) == 0) rounded++; if (memSpace == 0 || memSpace->spaceSize() - used <= rounded) { // Need some more space. size_t size = defaultSize; if (size <= rounded) size = rounded + 1; memSpace = gMem.AllocateNewPermanentSpace(size * sizeof(PolyWord), permissions, *spaceIndexCtr); (*spaceIndexCtr)++; // The memory is writable until CompletePermanentSpaceAllocation is called if (memSpace == 0) { fprintf(stderr, "Unable to allocate memory\n"); return 0; } - memSpace->bottom[0] = PolyWord::FromUnsigned(0); + memSpace->writeAble(memSpace->bottom)[0] = PolyWord::FromUnsigned(0); used = 1; } PolyObject *newObj = (PolyObject*)(memSpace->bottom + used + 1); - if (rounded != objWords) newObj->Set(objWords, PolyWord::FromUnsigned(0)); + if (rounded != objWords) memSpace->writeAble(newObj)->Set(objWords, PolyWord::FromUnsigned(0)); used += rounded + 1; ASSERT(((uintptr_t)newObj & 0x7) == 0); return newObj; } #endif class PImport { public: PImport(); ~PImport(); bool DoImport(void); FILE *f; PolyObject *Root(void) { return objMap[nRoot]; } private: bool ReadValue(PolyObject *p, POLYUNSIGNED i); bool GetValue(PolyWord *result); POLYUNSIGNED nObjects, nRoot; PolyObject **objMap; unsigned spaceIndex; SpaceAlloc mutSpace, immutSpace, codeSpace; }; PImport::PImport(): mutSpace(&spaceIndex, MTF_WRITEABLE, 1024*1024), immutSpace(&spaceIndex, 0, 1024*1024), codeSpace(&spaceIndex, MTF_EXECUTABLE, 1024 * 1024) { f = NULL; objMap = 0; spaceIndex = 1; } PImport::~PImport() { if (f) fclose(f); free(objMap); } bool PImport::GetValue(PolyWord *result) { int ch = getc(f); if (ch == '@') { /* Address of an object. */ POLYUNSIGNED obj; fscanf(f, "%" POLYUFMT, &obj); ASSERT(obj < nObjects); *result = objMap[obj]; } else if ((ch >= '0' && ch <= '9') || ch == '-') { /* Tagged integer. */ POLYSIGNED j; ungetc(ch, f); fscanf(f, "%" POLYSFMT, &j); /* The assertion may be false if we are porting to a machine with a shorter tagged representation. */ ASSERT(j >= -MAXTAGGED-1 && j <= MAXTAGGED); *result = TAGGED(j); } else { fprintf(polyStderr, "Unexpected character in stream"); return false; } return true; } /* Read a value and store it at the specified word. */ bool PImport::ReadValue(PolyObject *p, POLYUNSIGNED i) { PolyWord result = TAGGED(0); if (GetValue(&result)) { p->Set(i, result); return true; } else return false; } bool PImport::DoImport() { int ch; POLYUNSIGNED objNo; ASSERT(gMem.pSpaces.size() == 0); ASSERT(gMem.eSpaces.size() == 0); ch = getc(f); ASSERT(ch == 'O'); /* Number of objects. */ while (getc(f) != '\t') ; fscanf(f, "%" POLYUFMT, &nObjects); /* Create a mapping table. */ objMap = (PolyObject**)calloc(nObjects, sizeof(PolyObject*)); if (objMap == 0) { fprintf(polyStderr, "Unable to allocate memory\n"); return false; } do { ch = getc(f); } while (ch == '\n'); ASSERT(ch == 'R'); /* Root object number. */ while (getc(f) != '\t') ; fscanf(f, "%" POLYUFMT, &nRoot); /* Now the objects themselves. */ while (1) { unsigned objBits = 0; POLYUNSIGNED nWords, nBytes; do { ch = getc(f); } while (ch == '\r' || ch == '\n'); if (ch == EOF) break; ungetc(ch, f); fscanf(f, "%" POLYUFMT, &objNo); ch = getc(f); ASSERT(ch == ':'); ASSERT(objNo < nObjects); /* Modifiers, MNVW. */ do { ch = getc(f); if (ch == 'M') objBits |= F_MUTABLE_BIT; else if (ch == 'N') objBits |= F_NEGATIVE_BIT; if (ch == 'V') objBits |= F_NO_OVERWRITE; if (ch == 'W') objBits |= F_WEAK_BIT; } while (ch == 'M' || ch == 'N' || ch == 'V' || ch == 'W'); /* Object type. */ switch (ch) { case 'O': /* Simple object. */ fscanf(f, "%" POLYUFMT, &nWords); break; case 'B': /* Byte segment. */ objBits |= F_BYTE_OBJ; fscanf(f, "%" POLYUFMT, &nBytes); /* Round up to appropriate number of words. */ nWords = (nBytes + sizeof(PolyWord) -1) / sizeof(PolyWord); break; case 'S': /* String. */ objBits |= F_BYTE_OBJ; /* The length is the number of characters. */ fscanf(f, "%" POLYUFMT, &nBytes); /* Round up to appropriate number of words. Need to add one PolyWord for the length PolyWord. */ nWords = (nBytes + sizeof(PolyWord) -1) / sizeof(PolyWord) + 1; break; case 'C': /* Code segment (old form). */ case 'D': /* Code segment (new form). */ objBits |= F_CODE_OBJ; /* Read the number of bytes of code and the number of words for constants. */ fscanf(f, "%" POLYUFMT ",%" POLYUFMT, &nWords, &nBytes); nWords += ch == 'C' ? 4 : 1; /* Add words for extras. */ /* Add in the size of the code itself. */ nWords += (nBytes + sizeof(PolyWord) -1) / sizeof(PolyWord); break; case 'L': // Closure objBits |= F_CLOSURE_OBJ; fscanf(f, "%" POLYUFMT, &nWords); break; default: fprintf(polyStderr, "Invalid object type\n"); return false; } - PolyObject *p; + SpaceAlloc* alloc; if (objBits & F_MUTABLE_BIT) - p = mutSpace.NewObj(nWords); + alloc = &mutSpace; else if ((objBits & 3) == F_CODE_OBJ) - p = codeSpace.NewObj(nWords); - else p = immutSpace.NewObj(nWords); + alloc = &codeSpace; + else alloc = &immutSpace; + PolyObject* p = alloc->NewObj(nWords); if (p == 0) return false; objMap[objNo] = p; /* Put in length PolyWord and flag bits. */ - p->SetLengthWord(nWords, objBits); + alloc->memSpace->writeAble(p)->SetLengthWord(nWords, objBits); /* Skip the object contents. */ while (getc(f) != '\n') ; } /* Second pass - fill in the contents. */ fseek(f, 0, SEEK_SET); /* Skip the information at the start. */ ch = getc(f); ASSERT(ch == 'O'); /* Number of objects. */ while (getc(f) != '\n'); ch = getc(f); ASSERT(ch == 'R'); /* Root object number. */ while (getc(f) != '\n') ; while (1) { if (feof(f)) break; fscanf(f, "%" POLYUFMT, &objNo); if (feof(f)) break; ch = getc(f); ASSERT(ch == ':'); ASSERT(objNo < nObjects); PolyObject * p = objMap[objNo]; /* Modifiers, M or N. */ do { ch = getc(f); } while (ch == 'M' || ch == 'N' || ch == 'V' || ch == 'W'); /* Object type. */ switch (ch) { case 'O': /* Simple object. */ case 'L': // Closure { POLYUNSIGNED nWords; bool isClosure = ch == 'L'; fscanf(f, "%" POLYUFMT, &nWords); ch = getc(f); ASSERT(ch == '|'); ASSERT(nWords == p->Length()); POLYUNSIGNED i = 0; if (isClosure) { int ch = getc(f); // This should be an address if (ch != '@') return false; POLYUNSIGNED obj; fscanf(f, "%" POLYUFMT, &obj); ASSERT(obj < nObjects); *(PolyObject**)p = objMap[obj]; ch = getc(f); i = sizeof(PolyObject*) / sizeof(PolyWord); } while (i < nWords) { if (!ReadValue(p, i)) return false; ch = getc(f); ASSERT((ch == ',' && i < nWords - 1) || (ch == '\n' && i == nWords - 1)); i++; } break; } case 'B': /* Byte segment. */ { byte *u = (byte*)p; POLYUNSIGNED nBytes; fscanf(f, "%" POLYUFMT, &nBytes); ch = getc(f); ASSERT(ch == '|'); for (POLYUNSIGNED i = 0; i < nBytes; i++) { int n; fscanf(f, "%02x", &n); u[i] = n; } ch = getc(f); ASSERT(ch == '\n'); // If this is an entry point object set its value. //if (p->IsMutable() && p->IsWeakRefObject() && p->Length() > 2 && p->Get(2).AsUnsigned() != 0) if (p->IsMutable() && p->IsWeakRefObject() && p->Length() > sizeof(uintptr_t)/sizeof(PolyWord)) { bool loadEntryPt = setEntryPoint(p); ASSERT(loadEntryPt); } break; } case 'S': /* String. */ { PolyStringObject * ps = (PolyStringObject *)p; /* The length is the number of characters. */ POLYUNSIGNED nBytes; fscanf(f, "%" POLYUFMT, &nBytes); ch = getc(f); ASSERT(ch == '|'); ps->length = nBytes; for (POLYUNSIGNED i = 0; i < nBytes; i++) { int n; fscanf(f, "%02x", &n); ps->chars[i] = n; } ch = getc(f); ASSERT(ch == '\n'); break; } case 'C': /* Code segment. */ case 'D': { bool oldForm = ch == 'C'; - byte *u = (byte*)p; POLYUNSIGNED length = p->Length(); POLYUNSIGNED nWords, nBytes; + MemSpace* space = gMem.SpaceForAddress(p); + PolyObject *wr = space->writeAble(p); + byte* u = (byte*)wr; /* Read the number of bytes of code and the number of words for constants. */ fscanf(f, "%" POLYUFMT ",%" POLYUFMT, &nWords, &nBytes); /* Read the code. */ ch = getc(f); ASSERT(ch == '|'); for (POLYUNSIGNED i = 0; i < nBytes; i++) { int n; fscanf(f, "%02x", &n); u[i] = n; } - machineDependent->FlushInstructionCache(u, nBytes); + machineDependent->FlushInstructionCache(p, nBytes); ch = getc(f); ASSERT(ch == '|'); /* Set the constant count. */ - p->Set(length-1, PolyWord::FromUnsigned(nWords)); + wr->Set(length-1, PolyWord::FromUnsigned(nWords)); if (oldForm) { - p->Set(length-1-nWords-1, PolyWord::FromUnsigned(0)); /* Profile count. */ - p->Set(length-1-nWords-3, PolyWord::FromUnsigned(0)); /* Marker word. */ - p->Set(length-1-nWords-2, PolyWord::FromUnsigned((length-1-nWords-2)*sizeof(PolyWord))); + wr->Set(length-1-nWords-1, PolyWord::FromUnsigned(0)); /* Profile count. */ + wr->Set(length-1-nWords-3, PolyWord::FromUnsigned(0)); /* Marker word. */ + wr->Set(length-1-nWords-2, PolyWord::FromUnsigned((length-1-nWords-2)*sizeof(PolyWord))); /* Check - the code should end at the marker word. */ ASSERT(nBytes == ((length-1-nWords-3)*sizeof(PolyWord))); } /* Read in the constants. */ for (POLYUNSIGNED i = 0; i < nWords; i++) { - if (! ReadValue(p, i+length-nWords-1)) + if (! ReadValue(wr, i+length-nWords-1)) return false; ch = getc(f); ASSERT((ch == ',' && i < nWords-1) || ((ch == '\n' || ch == '|') && i == nWords-1)); } // Read in any constants in the code. if (ch == '|') { ch = getc(f); while (ch != '\n') { ungetc(ch, f); POLYUNSIGNED offset; int code; fscanf(f, "%" POLYUFMT ",%d", &offset, &code); ch = getc(f); ASSERT(ch == ','); // This should be an address. ch = getc(f); if (ch == '@') { POLYUNSIGNED obj; fscanf(f, "%" POLYUFMT, &obj); ASSERT(obj < nObjects); PolyObject *addr = objMap[obj]; - byte *toPatch = (byte*)p + offset; + byte *toPatch = (byte*)p + offset; // Pass the execute address here. ScanAddress::SetConstantValue(toPatch, addr, (ScanRelocationKind)code); } else { // Previously we also included tagged constants but they are // already in the code. ungetc(ch, f); PolyWord w; if (!GetValue(&w)) return false; } do ch = getc(f); while (ch == ' '); } } // Clear the mutable bit - p->SetLengthWord(p->Length(), F_CODE_OBJ); + wr->SetLengthWord(p->Length(), F_CODE_OBJ); break; } default: fprintf(polyStderr, "Invalid object type\n"); return false; } } // Now remove write access from immutable spaces. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) gMem.CompletePermanentSpaceAllocation(*i); return true; } // Import a file in the portable format and return a pointer to the root object. PolyObject *ImportPortable(const TCHAR *fileName) { PImport pImport; #if (defined(_WIN32) && defined(UNICODE)) pImport.f = _wfopen(fileName, L"r"); if (pImport.f == 0) { fprintf(polyStderr, "Unable to open file: %S\n", fileName); return 0; } #else pImport.f = fopen(fileName, "r"); if (pImport.f == 0) { fprintf(polyStderr, "Unable to open file: %s\n", fileName); return 0; } #endif if (pImport.DoImport()) return pImport.Root(); else return 0; } diff --git a/libpolyml/poly_specific.cpp b/libpolyml/poly_specific.cpp index b3e5630b..fbbc0c26 100644 --- a/libpolyml/poly_specific.cpp +++ b/libpolyml/poly_specific.cpp @@ -1,420 +1,422 @@ /* Title: poly_specific.cpp - Poly/ML specific RTS calls. Copyright (c) 2006, 2015-17, 2019 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* This module is used for various run-time calls that are either in the PolyML structure or otherwise specific to Poly/ML. */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #ifdef HAVE_STRING_H #include #endif #include "globals.h" #include "poly_specific.h" #include "arb.h" #include "mpoly.h" #include "sys.h" #include "machine_dep.h" #include "polystring.h" #include "run_time.h" #include "version.h" #include "save_vec.h" #include "version.h" #include "memmgr.h" #include "processes.h" #include "gc.h" #include "rtsentry.h" extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolySpecificGeneral(PolyObject *threadId, PolyWord code, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetABI(); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableCode(PolyObject * threadId, PolyWord byteSeg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableClosure(PolyObject * threadId, PolyWord closure); POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToCode(PolyObject *threadId, PolyWord byteVec); POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToClosure(PolyObject *threadId, PolyWord byteVec, PolyWord closure); POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeConstant(PolyWord closure, PolyWord offset, PolyWord c, PolyWord flags); POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeByte(PolyWord closure, PolyWord offset, PolyWord c); POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeByte(PolyWord closure, PolyWord offset); POLYEXTERNALSYMBOL POLYUNSIGNED PolySortArrayOfAddresses(PolyWord array); } #define SAVE(x) taskData->saveVec.push(x) #ifndef GIT_VERSION #define GIT_VERSION "" #endif Handle poly_dispatch_c(TaskData *taskData, Handle args, Handle code) { unsigned c = get_C_unsigned(taskData, DEREFWORD(code)); switch (c) { case 9: // Return the GIT version if appropriate { return SAVE(C_string_to_Poly(taskData, GIT_VERSION)); } case 10: // Return the RTS version string. { const char *version; switch (machineDependent->MachineArchitecture()) { case MA_Interpreted: version = "Portable-" TextVersion; break; case MA_I386: version = "I386-" TextVersion; break; case MA_X86_64: version = "X86_64-" TextVersion; break; default: version = "Unknown-" TextVersion; break; } return SAVE(C_string_to_Poly(taskData, version)); } case 12: // Return the architecture // Used in InitialPolyML.ML for PolyML.architecture { const char *arch; switch (machineDependent->MachineArchitecture()) { case MA_Interpreted: arch = "Interpreted"; break; case MA_I386: arch = "I386"; break; case MA_X86_64: arch = "X86_64"; break; case MA_X86_64_32: arch = "X86_64_32"; break; default: arch = "Unknown"; break; } return SAVE(C_string_to_Poly(taskData, arch)); } case 19: // Return the RTS argument help string. return SAVE(C_string_to_Poly(taskData, RTSArgHelp())); default: { char msg[100]; sprintf(msg, "Unknown poly-specific function: %d", c); raise_exception_string(taskData, EXC_Fail, msg); return 0; } } } // General interface to poly-specific. Ideally the various cases will be made into // separate functions. POLYUNSIGNED PolySpecificGeneral(PolyObject *threadId, PolyWord code, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedCode = taskData->saveVec.push(code); Handle pushedArg = taskData->saveVec.push(arg); Handle result = 0; try { result = poly_dispatch_c(taskData, pushedArg, pushedCode); } catch (...) { } // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } // Return the ABI - i.e. the calling conventions used when calling external functions. POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetABI() { // Return the ABI. For 64-bit we need to know if this is Windows. #if (SIZEOF_VOIDP == 8) #if (defined(_WIN32) || defined(__CYGWIN__)) return TAGGED(2).AsUnsigned(); // 64-bit Windows #else return TAGGED(1).AsUnsigned(); // 64-bit Unix #endif #else return TAGGED(0).AsUnsigned(); // 32-bit Unix and Windows #endif } // Code generation - Code is initially allocated in a byte segment. When all the // values have been set apart from any addresses the byte segment is copied into // a mutable code segment. // PolyCopyByteVecToCode is now replaced by PolyCopyByteVecToClosure POLYEXTERNALSYMBOL POLYUNSIGNED PolyCopyByteVecToCode(PolyObject * threadId, PolyWord byteVec) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(byteVec); PolyObject *result = 0; try { if (!pushedArg->WordP()->IsByteObject()) raise_fail(taskData, "Not byte data area"); do { PolyObject *initCell = pushedArg->WordP(); POLYUNSIGNED requiredSize = initCell->Length(); result = gMem.AllocCodeSpace(requiredSize); if (result == 0) { // Could not allocate - must GC. if (!QuickGC(taskData, pushedArg->WordP()->Length())) raise_fail(taskData, "Insufficient memory"); } else memcpy(result, initCell, requiredSize * sizeof(PolyWord)); } while (result == 0); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return ((PolyWord)result).AsUnsigned(); } // Copy the byte vector into code space. POLYUNSIGNED PolyCopyByteVecToClosure(PolyObject *threadId, PolyWord byteVec, PolyWord closure) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedByteVec = taskData->saveVec.push(byteVec); Handle pushedClosure = taskData->saveVec.push(closure); PolyObject *result = 0; try { if (!pushedByteVec->WordP()->IsByteObject()) raise_fail(taskData, "Not byte data area"); if (pushedClosure->WordP()->Length() != sizeof(PolyObject*)/sizeof(PolyWord)) raise_fail(taskData, "Invalid closure size"); if (!pushedClosure->WordP()->IsMutable()) raise_fail(taskData, "Closure is not mutable"); do { PolyObject *initCell = pushedByteVec->WordP(); POLYUNSIGNED requiredSize = initCell->Length(); result = gMem.AllocCodeSpace(requiredSize); if (result == 0) { // Could not allocate - must GC. if (!QuickGC(taskData, pushedByteVec->WordP()->Length())) raise_fail(taskData, "Insufficient memory"); } - else memcpy(result, initCell, requiredSize * sizeof(PolyWord)); + else memcpy(gMem.SpaceForAddress(result)->writeAble((byte*)result), initCell, requiredSize * sizeof(PolyWord)); } while (result == 0); } catch (...) {} // If an ML exception is raised // Store the code address in the closure. *((PolyObject**)pushedClosure->WordP()) = result; // Lock the closure. pushedClosure->WordP()->SetLengthWord(pushedClosure->WordP()->LengthWord() & ~_OBJ_MUTABLE_BIT); taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Code generation - Lock a mutable code segment and return the original address. // Currently this does not allocate so other than the exception it could // be a fast call. POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableCode(PolyObject * threadId, PolyWord byteSeg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(byteSeg); Handle result = 0; try { PolyObject *codeObj = pushedArg->WordP(); if (!codeObj->IsCodeObject() || !codeObj->IsMutable()) raise_fail(taskData, "Not mutable code area"); POLYUNSIGNED segLength = codeObj->Length(); codeObj->SetLengthWord(segLength, F_CODE_OBJ); // This is really a legacy of the PPC code-generator. machineDependent->FlushInstructionCache(codeObj, segLength * sizeof(PolyWord)); // In the future it may be necessary to return a different address here. // N.B. The code area should only have execute permission in the native // code version, not the interpreted version. result = pushedArg; // Return the original address. } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } // Replacement for above POLYEXTERNALSYMBOL POLYUNSIGNED PolyLockMutableClosure(PolyObject * threadId, PolyWord closure) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); PolyObject *codeObj = *(PolyObject**)(closure.AsObjPtr()); try { if (!codeObj->IsCodeObject() || !codeObj->IsMutable()) raise_fail(taskData, "Not mutable code area"); POLYUNSIGNED segLength = codeObj->Length(); - codeObj->SetLengthWord(segLength, F_CODE_OBJ); + gMem.SpaceForAddress(codeObj)->writeAble(codeObj)->SetLengthWord(segLength, F_CODE_OBJ); // This is really a legacy of the PPC code-generator. machineDependent->FlushInstructionCache(codeObj, segLength * sizeof(PolyWord)); // In the future it may be necessary to return a different address here. // N.B. The code area should only have execute permission in the native // code version, not the interpreted version. } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Set code constant. This can be a fast call. // This is in the RTS both because we pass a closure in here and cannot have // code addresses in 32-in-64 and also because we need to ensure there is no // possibility of a GC while the code is an inconsistent state. POLYUNSIGNED PolySetCodeConstant(PolyWord closure, PolyWord offset, PolyWord cWord, PolyWord flags) { byte *pointer; // Previously we passed the code address in here and we need to // retain that for legacy code. This is now the closure. if (closure.AsObjPtr()->IsCodeObject()) pointer = closure.AsCodePtr(); else pointer = *(POLYCODEPTR*)(closure.AsObjPtr()); // pointer is the start of the code segment. // c will usually be an address. // offset is a byte offset pointer += offset.UnTaggedUnsigned(); + byte* writeable = gMem.SpaceForAddress(pointer)->writeAble(pointer); switch (UNTAGGED(flags)) { case 0: // Absolute constant - size PolyWord { POLYUNSIGNED c = cWord.AsUnsigned(); #ifdef WORDS_BIGENDIAN // This is used to store constants in the constant area // on the interpreted version. for (unsigned i = sizeof(PolyWord); i > 0; i--) { - pointer[i-1] = (byte)(c & 255); + writeable[i-1] = (byte)(c & 255); c >>= 8; } #else for (unsigned i = 0; i < sizeof(PolyWord); i++) { - pointer[i] = (byte)(c & 255); + writeable[i] = (byte)(c & 255); c >>= 8; } #endif break; } case 1: // Relative constant - X86 - size 4 bytes { // The offset is relative to the END of the constant. byte *target; // In 32-in-64 we pass in the closure address here // rather than the code address. if (cWord.AsObjPtr()->IsCodeObject()) target = cWord.AsCodePtr(); else target = *(POLYCODEPTR*)(cWord.AsObjPtr()); size_t c = target - pointer - 4; for (unsigned i = 0; i < sizeof(PolyWord); i++) { - pointer[i] = (byte)(c & 255); + writeable[i] = (byte)(c & 255); c >>= 8; } break; } } return TAGGED(0).AsUnsigned(); } // Set a code byte. This needs to be in the RTS because it uses the closure POLYEXTERNALSYMBOL POLYUNSIGNED PolySetCodeByte(PolyWord closure, PolyWord offset, PolyWord cWord) { byte *pointer = *(POLYCODEPTR*)(closure.AsObjPtr()); - pointer[UNTAGGED_UNSIGNED(offset)] = (byte)UNTAGGED_UNSIGNED(cWord); + byte* writable = gMem.SpaceForAddress(pointer)->writeAble(pointer); + writable[UNTAGGED_UNSIGNED(offset)] = (byte)UNTAGGED_UNSIGNED(cWord); return TAGGED(0).AsUnsigned(); } POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetCodeByte(PolyWord closure, PolyWord offset) { byte *pointer = *(POLYCODEPTR*)(closure.AsObjPtr()); return TAGGED(pointer[UNTAGGED_UNSIGNED(offset)]).AsUnsigned(); } static int compare(const void *a, const void *b) { PolyWord *av = (PolyWord*)a; PolyWord *bv = (PolyWord*)b; if ((*av).IsTagged() || (*bv).IsTagged()) return 0; // Shouldn't happen PolyObject *ao = (*av).AsObjPtr(), *bo = (*bv).AsObjPtr(); if (ao->Length() < 1 || bo->Length() < 1) return 0; // Shouldn't happen if (ao->Get(0).AsUnsigned() < bo->Get(0).AsUnsigned()) return -1; if (ao->Get(0).AsUnsigned() > bo->Get(0).AsUnsigned()) return 1; return 0; } // Sort an array of addresses. This is used in the code-generator to search for // duplicates in the address area. The argument is an array of pairs. The first // item of each pair is an address, the second is an identifier of some kind. POLYEXTERNALSYMBOL POLYUNSIGNED PolySortArrayOfAddresses(PolyWord array) { if (!array.IsDataPtr()) return(TAGGED(0)).AsUnsigned(); PolyObject *arrayP = array.AsObjPtr(); POLYUNSIGNED numberOfItems = arrayP->Length(); if (!arrayP->IsMutable()) return(TAGGED(0)).AsUnsigned(); qsort(arrayP, numberOfItems, sizeof(PolyWord), compare); return (TAGGED(1)).AsUnsigned(); } struct _entrypts polySpecificEPT[] = { { "PolySpecificGeneral", (polyRTSFunction)&PolySpecificGeneral}, { "PolyGetABI", (polyRTSFunction)&PolyGetABI }, { "PolyCopyByteVecToCode", (polyRTSFunction)&PolyCopyByteVecToCode }, { "PolyCopyByteVecToClosure", (polyRTSFunction)&PolyCopyByteVecToClosure }, { "PolyLockMutableCode", (polyRTSFunction)&PolyLockMutableCode }, { "PolyLockMutableClosure", (polyRTSFunction)&PolyLockMutableClosure }, { "PolySetCodeConstant", (polyRTSFunction)&PolySetCodeConstant }, { "PolySetCodeByte", (polyRTSFunction)&PolySetCodeByte }, { "PolyGetCodeByte", (polyRTSFunction)&PolyGetCodeByte }, { "PolySortArrayOfAddresses", (polyRTSFunction)&PolySortArrayOfAddresses }, { NULL, NULL} // End of list. }; diff --git a/libpolyml/savestate.cpp b/libpolyml/savestate.cpp index 6fa2b353..0798a8f3 100644 --- a/libpolyml/savestate.cpp +++ b/libpolyml/savestate.cpp @@ -1,2221 +1,2223 @@ /* Title: savestate.cpp - Save and Load state Copyright (c) 2007, 2015, 2017-19 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_STDIO_H #include #endif #ifdef HAVE_WINDOWS_H #include // For MAX_PATH #endif #ifdef HAVE_SYS_PARAM_H #include // For MAX_PATH #endif #ifdef HAVE_ERRNO_H #include #endif #ifdef HAVE_TIME_H #include #endif #ifdef HAVE_SYS_TYPES_H #include #endif #ifdef HAVE_SYS_STAT_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #if (defined(_WIN32)) #include #define ERRORNUMBER _doserrno #define NOMEMORY ERROR_NOT_ENOUGH_MEMORY #else typedef char TCHAR; #define _T(x) x #define _tfopen fopen #define _tcscpy strcpy #define _tcsdup strdup #define _tcslen strlen #define _fputtc fputc #define _fputts fputs #ifndef lstrcmpi #define lstrcmpi strcasecmp #endif #define ERRORNUMBER errno #define NOMEMORY ENOMEM #endif #include "globals.h" #include "savestate.h" #include "processes.h" #include "run_time.h" #include "polystring.h" #include "scanaddrs.h" #include "arb.h" #include "memmgr.h" #include "mpoly.h" // For exportTimeStamp #include "exporter.h" // For CopyScan #include "machine_dep.h" #include "osmem.h" #include "gc.h" // For FullGC. #include "timing.h" #include "rtsentry.h" #include "check_objects.h" #include "rtsentry.h" #include "../polyexports.h" // For InitHeaderFromExport #include "version.h" // For InitHeaderFromExport #ifdef _MSC_VER // Don't tell me about ISO C++ changes. #pragma warning(disable:4996) #endif extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolySaveState(PolyObject *threadId, PolyWord fileName, PolyWord depth); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadState(PolyObject *threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowHierarchy(PolyObject *threadId); POLYEXTERNALSYMBOL POLYUNSIGNED PolyRenameParent(PolyObject *threadId, PolyWord childName, PolyWord parentName); POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowParent(PolyObject *threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyStoreModule(PolyObject *threadId, PolyWord name, PolyWord contents); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadModule(PolyObject *threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadHierarchy(PolyObject *threadId, PolyWord arg); POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetModuleDirectory(PolyObject *threadId); } // Helper class to close files on exit. class AutoClose { public: AutoClose(FILE *f = 0): m_file(f) {} ~AutoClose() { if (m_file) ::fclose(m_file); } operator FILE*() { return m_file; } FILE* operator = (FILE* p) { return (m_file = p); } private: FILE *m_file; }; // This is probably generally useful so may be moved into // a general header file. template class AutoFree { public: AutoFree(BASE p = 0): m_value(p) {} ~AutoFree() { free(m_value); } // Automatic conversions to the base type. operator BASE() { return m_value; } BASE operator = (BASE p) { return (m_value = p); } private: BASE m_value; }; #ifdef HAVE__FTELLI64 // fseek and ftell are only 32-bits in Windows. #define off_t __int64 #define fseek _fseeki64 #define ftell _ftelli64 #endif /* * Structure definitions for the saved state files. */ #define SAVEDSTATESIGNATURE "POLYSAVE" #define SAVEDSTATEVERSION 2 // File header for a saved state file. This appears as the first entry // in the file. typedef struct _savedStateHeader { // These entries are primarily to check that we have a valid // saved state file before we try to interpret anything else. char headerSignature[8]; // Should contain SAVEDSTATESIGNATURE unsigned headerVersion; // Should contain SAVEDSTATEVERSION unsigned headerLength; // Number of bytes in the header unsigned segmentDescrLength; // Number of bytes in a descriptor // These entries contain the real data. off_t segmentDescr; // Position of segment descriptor table unsigned segmentDescrCount; // Number of segment descriptors in the table off_t stringTable; // Pointer to the string table (zero if none) size_t stringTableSize; // Size of string table unsigned parentNameEntry; // Position of parent name in string table (0 if top) time_t timeStamp; // The time stamp for this file. time_t parentTimeStamp; // The time stamp for the parent. void *originalBaseAddr; // Original base address (32-in-64 only) } SavedStateHeader; // Entry for segment table. This describes the segments on the disc that // need to be loaded into memory. typedef struct _savedStateSegmentDescr { off_t segmentData; // Position of the segment data size_t segmentSize; // Size of the segment data off_t relocations; // Position of the relocation table unsigned relocationCount; // Number of entries in relocation table unsigned relocationSize; // Size of a relocation entry unsigned segmentFlags; // Segment flags (see SSF_ values) unsigned segmentIndex; // The index of this segment or the segment it overwrites void *originalAddress; // The base address when the segment was written. } SavedStateSegmentDescr; #define SSF_WRITABLE 1 // The segment contains mutable data #define SSF_OVERWRITE 2 // The segment overwrites the data (mutable) in a parent. #define SSF_NOOVERWRITE 4 // The segment must not be further overwritten #define SSF_BYTES 8 // The segment contains only byte data #define SSF_CODE 16 // The segment contains only code typedef struct _relocationEntry { // Each entry indicates a location that has to be set to an address. // The location to be set is determined by adding "relocAddress" to the base address of // this segment (the one to which these relocations apply) and the value to store // by adding "targetAddress" to the base address of the segment indicated by "targetSegment". POLYUNSIGNED relocAddress; // The (byte) offset in this segment that we will set POLYUNSIGNED targetAddress; // The value to add to the base of the destination segment unsigned targetSegment; // The base segment. 0 is IO segment. ScanRelocationKind relKind; // The kind of relocation (processor dependent). } RelocationEntry; #define SAVE(x) taskData->saveVec.push(x) /* * Hierarchy table: contains information about last loaded or saved state. */ // Pointer to list of files loaded in last load. // There's no need for a lock since the update is only made when all // the ML threads have stopped. class HierarchyTable { public: HierarchyTable(const TCHAR *file, time_t time): fileName(_tcsdup(file)), timeStamp(time) { } AutoFree fileName; time_t timeStamp; }; HierarchyTable **hierarchyTable; static unsigned hierarchyDepth; static bool AddHierarchyEntry(const TCHAR *fileName, time_t timeStamp) { // Add an entry to the hierarchy table for this file. HierarchyTable *newEntry = new HierarchyTable(fileName, timeStamp); if (newEntry == 0) return false; HierarchyTable **newTable = (HierarchyTable **)realloc(hierarchyTable, sizeof(HierarchyTable *)*(hierarchyDepth+1)); if (newTable == 0) return false; hierarchyTable = newTable; hierarchyTable[hierarchyDepth++] = newEntry; return true; } // Test whether we're overwriting a parent of ourself. #if (defined(_WIN32) || defined(__CYGWIN__)) static bool sameFile(const TCHAR *x, const TCHAR *y) { HANDLE hXFile = INVALID_HANDLE_VALUE, hYFile = INVALID_HANDLE_VALUE; bool result = false; hXFile = CreateFile(x, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hXFile == INVALID_HANDLE_VALUE) goto closeAndExit; hYFile = CreateFile(y, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hYFile == INVALID_HANDLE_VALUE) goto closeAndExit; BY_HANDLE_FILE_INFORMATION fileInfoX, fileInfoY; if (! GetFileInformationByHandle(hXFile, &fileInfoX)) goto closeAndExit; if (! GetFileInformationByHandle(hYFile, &fileInfoY)) goto closeAndExit; result = fileInfoX.dwVolumeSerialNumber == fileInfoY.dwVolumeSerialNumber && fileInfoX.nFileIndexLow == fileInfoY.nFileIndexLow && fileInfoX.nFileIndexHigh == fileInfoY.nFileIndexHigh; closeAndExit: if (hXFile != INVALID_HANDLE_VALUE) CloseHandle(hXFile); if (hYFile != INVALID_HANDLE_VALUE) CloseHandle(hYFile); return result; } #else static bool sameFile(const char *x, const char *y) { struct stat xStat, yStat; // If either file does not exist that's fine. if (stat(x, &xStat) != 0 || stat(y, &yStat) != 0) return false; return (xStat.st_dev == yStat.st_dev && xStat.st_ino == yStat.st_ino); } #endif /* * Saving state. */ // This class is used to create the relocations. It uses Exporter // for this but this may perhaps be too heavyweight. class SaveStateExport: public Exporter, public ScanAddress { public: SaveStateExport(unsigned int h=0): Exporter(h), relocationCount(0) {} public: virtual void exportStore(void) {} // Not used. private: // ScanAddress overrides virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code); // At the moment we should only get calls to ScanConstant. virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; } protected: void setRelocationAddress(void *p, POLYUNSIGNED *reloc); PolyWord createRelocation(PolyWord p, void *relocAddr); unsigned relocationCount; friend class SaveRequest; }; // Generate the address relative to the start of the segment. void SaveStateExport::setRelocationAddress(void *p, POLYUNSIGNED *reloc) { unsigned area = findArea(p); POLYUNSIGNED offset = (POLYUNSIGNED)((char*)p - (char*)memTable[area].mtOriginalAddr); *reloc = offset; } // Create a relocation entry for an address at a given location. PolyWord SaveStateExport::createRelocation(PolyWord p, void *relocAddr) { RelocationEntry reloc; // Set the offset within the section we're scanning. setRelocationAddress(relocAddr, &reloc.relocAddress); void *addr = p.AsAddress(); unsigned addrArea = findArea(addr); reloc.targetAddress = (POLYUNSIGNED)((char*)addr - (char*)memTable[addrArea].mtOriginalAddr); reloc.targetSegment = (unsigned)memTable[addrArea].mtIndex; reloc.relKind = PROCESS_RELOC_DIRECT; fwrite(&reloc, sizeof(reloc), 1, exportFile); relocationCount++; return p; // Don't change the contents } /* This is called for each constant within the code. Print a relocation entry for the word and return a value that means that the offset is saved in original word. */ void SaveStateExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code) { PolyObject *p = GetConstantValue(addr, code); if (p == 0) return; void *a = p; unsigned aArea = findArea(a); // We don't need a relocation if this is relative to the current segment // since the relative address will already be right. if (code == PROCESS_RELOC_I386RELATIVE && aArea == findArea(addr)) return; // Set the value at the address to the offset relative to the symbol. RelocationEntry reloc; setRelocationAddress(addr, &reloc.relocAddress); reloc.targetAddress = (POLYUNSIGNED)((char*)a - (char*)memTable[aArea].mtOriginalAddr); reloc.targetSegment = (unsigned)memTable[aArea].mtIndex; reloc.relKind = code; fwrite(&reloc, sizeof(reloc), 1, exportFile); relocationCount++; } // Request to the main thread to save data. class SaveRequest: public MainThreadRequest { public: SaveRequest(const TCHAR *name, unsigned h): MainThreadRequest(MTP_SAVESTATE), fileName(name), newHierarchy(h), errorMessage(0), errCode(0) {} virtual void Perform(); const TCHAR *fileName; unsigned newHierarchy; const char *errorMessage; int errCode; }; // This class is used to update references to objects that have moved. If // we have copied an object into the area to be exported we may still have references // to it from the stack or from RTS data structures. We have to ensure that these // are updated. // This is very similar to ProcessFixupAddress in sharedata.cpp class SaveFixupAddress: public ScanAddress { protected: virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt); virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { *pt = ScanObjectAddress(*pt); return 0; } virtual PolyObject *ScanObjectAddress(PolyObject *base); public: void ScanCodeSpace(CodeSpace *space); }; POLYUNSIGNED SaveFixupAddress::ScanAddressAt(PolyWord *pt) { PolyWord val = *pt; if (val.IsDataPtr() && val != PolyWord::FromUnsigned(0)) *pt = ScanObjectAddress(val.AsObjPtr()); return 0; } // Returns the new address if the argument is the address of an object that // has moved, otherwise returns the original. PolyObject *SaveFixupAddress::ScanObjectAddress(PolyObject *obj) { if (obj->ContainsForwardingPtr()) // tombstone is a pointer to a moved object { #ifdef POLYML32IN64 MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1); PolyObject *newp; if (space->isCode) newp = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else newp = obj->GetForwardingPtr(); #else PolyObject *newp = obj->GetForwardingPtr(); #endif ASSERT (newp->ContainsNormalLengthWord()); return newp; } ASSERT (obj->ContainsNormalLengthWord()); // object is not moved return obj; } // Fix up addresses in the code area. Unlike ScanAddressesInRegion this updates // cells that have been moved. We need to do that because we may still have // return addresses into those cells and we don't move return addresses. We // do want the code to see updated constant addresses. void SaveFixupAddress::ScanCodeSpace(CodeSpace *space) { for (PolyWord *pt = space->bottom; pt < space->top; ) { pt++; PolyObject *obj = (PolyObject*)pt; #ifdef POLYML32IN64 PolyObject *dest = obj; while (dest->ContainsForwardingPtr()) { MemSpace *space = gMem.SpaceForAddress((PolyWord*)dest - 1); if (space->isCode) dest = (PolyObject*)(globalCodeBase + ((dest->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); else dest = dest->GetForwardingPtr(); } #else PolyObject *dest = obj->FollowForwardingChain(); #endif POLYUNSIGNED length = dest->Length(); if (length != 0) ScanAddressesInObject(obj, dest->LengthWord()); pt += length; } } // Called by the root thread to actually save the state and write the file. void SaveRequest::Perform() { if (debugOptions & DEBUG_SAVING) Log("SAVE: Beginning saving state.\n"); // Check that we aren't overwriting our own parent. for (unsigned q = 0; q < newHierarchy-1; q++) { if (sameFile(hierarchyTable[q]->fileName, fileName)) { errorMessage = "File being saved is used as a parent of this file"; errCode = 0; if (debugOptions & DEBUG_SAVING) Log("SAVE: File being saved is used as a parent of this file.\n"); return; } } SaveStateExport exports; // Open the file. This could quite reasonably fail if the path is wrong. exports.exportFile = _tfopen(fileName, _T("wb")); if (exports.exportFile == NULL) { errorMessage = "Cannot open save file"; errCode = ERRORNUMBER; if (debugOptions & DEBUG_SAVING) Log("SAVE: Cannot open save file.\n"); return; } // Scan over the permanent mutable area copying all reachable data that is // not in a lower hierarchy into new permanent segments. CopyScan copyScan(newHierarchy); copyScan.initialise(false); bool success = true; try { for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->isMutable && !space->noOverwrite && !space->byteOnly) { if (debugOptions & DEBUG_SAVING) Log("SAVE: Scanning permanent mutable area %p allocated at %p size %lu\n", space, space->bottom, space->spaceSize()); copyScan.ScanAddressesInRegion(space->bottom, space->top); } } } catch (MemoryException &) { success = false; if (debugOptions & DEBUG_SAVING) Log("SAVE: Scan of permanent mutable area raised memory exception.\n"); } // Copy the areas into the export object. Make sufficient space for // the largest possible number of entries. exports.memTable = new memoryTableEntry[gMem.eSpaces.size()+gMem.pSpaces.size()+1]; unsigned memTableCount = 0; // Permanent spaces at higher level. These have to have entries although // only the mutable entries will be written. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->hierarchy < newHierarchy) { memoryTableEntry *entry = &exports.memTable[memTableCount++]; entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom; entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord); entry->mtIndex = space->index; entry->mtFlags = 0; if (space->isMutable) { entry->mtFlags |= MTF_WRITEABLE; if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE; if (space->byteOnly) entry->mtFlags |= MTF_BYTES; } if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE; } } unsigned permanentEntries = memTableCount; // Remember where new entries start. // Newly created spaces. for (std::vector::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++) { memoryTableEntry *entry = &exports.memTable[memTableCount++]; PermanentMemSpace *space = *i; entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom; entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord); entry->mtIndex = space->index; entry->mtFlags = 0; if (space->isMutable) { entry->mtFlags |= MTF_WRITEABLE; if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE; if (space->byteOnly) entry->mtFlags |= MTF_BYTES; } if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE; } exports.memTableEntries = memTableCount; if (debugOptions & DEBUG_SAVING) Log("SAVE: Updating references to moved objects.\n"); // Update references to moved objects. SaveFixupAddress fixup; for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *space = *i; fixup.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr); fixup.ScanAddressesInRegion(space->upperAllocPtr, space->top); } for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) fixup.ScanCodeSpace(*i); GCModules(&fixup); // Restore the length words in the code areas. // Although we've updated any pointers to the start of the code we could have return addresses // pointing to the original code. GCModules updates the stack but doesn't update return addresses. for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { CodeSpace *space = *i; for (PolyWord *pt = space->bottom; pt < space->top; ) { pt++; PolyObject *obj = (PolyObject*)pt; if (obj->ContainsForwardingPtr()) { #ifdef POLYML32IN64 PolyObject *forwardedTo = obj; while (forwardedTo->ContainsForwardingPtr()) forwardedTo = (PolyObject*)(globalCodeBase + ((forwardedTo->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1)); #else PolyObject *forwardedTo = obj->FollowForwardingChain(); #endif POLYUNSIGNED lengthWord = forwardedTo->LengthWord(); - obj->SetLengthWord(lengthWord); + space->writeAble(obj)->SetLengthWord(lengthWord); } pt += obj->Length(); } } // Update the global memory space table. Old segments at the same level // or lower are removed. The new segments become permanent. // Try to promote the spaces even if we've had a failure because export // spaces are deleted in ~CopyScan and we may have already copied // some objects there. if (debugOptions & DEBUG_SAVING) Log("SAVE: Promoting export spaces to permanent spaces.\n"); if (! gMem.PromoteExportSpaces(newHierarchy) || ! success) { errorMessage = "Out of Memory"; errCode = NOMEMORY; if (debugOptions & DEBUG_SAVING) Log("SAVE: Unable to promote export spaces.\n"); return; } // Remove any deeper entries from the hierarchy table. while (hierarchyDepth > newHierarchy-1) { hierarchyDepth--; delete(hierarchyTable[hierarchyDepth]); hierarchyTable[hierarchyDepth] = 0; } if (debugOptions & DEBUG_SAVING) Log("SAVE: Writing out data.\n"); // Write out the file header. SavedStateHeader saveHeader; memset(&saveHeader, 0, sizeof(saveHeader)); saveHeader.headerLength = sizeof(saveHeader); memcpy(saveHeader.headerSignature, SAVEDSTATESIGNATURE, sizeof(saveHeader.headerSignature)); saveHeader.headerVersion = SAVEDSTATEVERSION; saveHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr); if (newHierarchy == 1) saveHeader.parentTimeStamp = exportTimeStamp; else { saveHeader.parentTimeStamp = hierarchyTable[newHierarchy-2]->timeStamp; saveHeader.parentNameEntry = sizeof(TCHAR); // Always the first entry. } saveHeader.timeStamp = getBuildTime(); saveHeader.segmentDescrCount = exports.memTableEntries; // One segment for each space. #ifdef POLYML32IN64 saveHeader.originalBaseAddr = globalHeapBase; #endif // Write out the header. fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile); // We need a segment header for each permanent area whether it is // actually in this file or not. SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [exports.memTableEntries]; for (unsigned j = 0; j < exports.memTableEntries; j++) { memoryTableEntry *entry = &exports.memTable[j]; memset(&descrs[j], 0, sizeof(SavedStateSegmentDescr)); descrs[j].relocationSize = sizeof(RelocationEntry); descrs[j].segmentIndex = (unsigned)entry->mtIndex; descrs[j].segmentSize = entry->mtLength; // Set this even if we don't write it. descrs[j].originalAddress = entry->mtOriginalAddr; if (entry->mtFlags & MTF_WRITEABLE) { descrs[j].segmentFlags |= SSF_WRITABLE; if (entry->mtFlags & MTF_NO_OVERWRITE) descrs[j].segmentFlags |= SSF_NOOVERWRITE; if (j < permanentEntries && (entry->mtFlags & MTF_NO_OVERWRITE) == 0) descrs[j].segmentFlags |= SSF_OVERWRITE; if (entry->mtFlags & MTF_BYTES) descrs[j].segmentFlags |= SSF_BYTES; } if (entry->mtFlags & MTF_EXECUTABLE) descrs[j].segmentFlags |= SSF_CODE; } // Write out temporarily. Will be overwritten at the end. saveHeader.segmentDescr = ftell(exports.exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile); // Write out the relocations and the data. for (unsigned k = 1 /* Not IO area */; k < exports.memTableEntries; k++) { memoryTableEntry *entry = &exports.memTable[k]; // Write out the contents if this is new or if it is a normal, overwritable // mutable area. if (k >= permanentEntries || (entry->mtFlags & (MTF_WRITEABLE|MTF_NO_OVERWRITE)) == MTF_WRITEABLE) { descrs[k].relocations = ftell(exports.exportFile); // Have to write this out. exports.relocationCount = 0; // Create the relocation table. char *start = (char*)entry->mtOriginalAddr; char *end = start + entry->mtLength; for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); // Most relocations can be computed when the saved state is // loaded so we only write out the difficult ones: those that // occur within compiled code. // exports.relocateObject(obj); if (length != 0 && obj->IsCodeObject()) machineDependent->ScanConstantsWithinCode(obj, &exports); p += length; } descrs[k].relocationCount = exports.relocationCount; // Write out the data. descrs[k].segmentData = ftell(exports.exportFile); fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exports.exportFile); } } // If this is a child we need to write a string table containing the parent name. if (newHierarchy > 1) { saveHeader.stringTable = ftell(exports.exportFile); _fputtc(0, exports.exportFile); // First byte of string table is zero _fputts(hierarchyTable[newHierarchy-2]->fileName, exports.exportFile); _fputtc(0, exports.exportFile); // A terminating null. saveHeader.stringTableSize = (_tcslen(hierarchyTable[newHierarchy-2]->fileName) + 2)*sizeof(TCHAR); } // Rewrite the header and the segment tables now they're complete. fseek(exports.exportFile, 0, SEEK_SET); fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile); if (debugOptions & DEBUG_SAVING) Log("SAVE: Writing complete.\n"); // Add an entry to the hierarchy table for this file. (void)AddHierarchyEntry(fileName, saveHeader.timeStamp); delete[](descrs); CheckMemory(); } // Write a saved state file. POLYUNSIGNED PolySaveState(PolyObject *threadId, PolyWord fileName, PolyWord depth) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); try { TempString fileNameBuff(Poly_string_to_T_alloc(fileName)); // The value of depth is zero for top-level save so we need to add one for hierarchy. unsigned newHierarchy = get_C_unsigned(taskData, depth) + 1; if (newHierarchy > hierarchyDepth + 1) raise_fail(taskData, "Depth must be no more than the current hierarchy plus one"); // Request a full GC first. The main reason is to avoid running out of memory as a // result of repeated saves. Old export spaces are turned into local spaces and // the GC will delete them if they are completely empty FullGC(taskData); SaveRequest request(fileNameBuff, newHierarchy); processes->MakeRootRequest(taskData, &request); if (request.errorMessage) raise_syscall(taskData, request.errorMessage, request.errCode); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } /* * Loading saved state files. */ class StateLoader: public MainThreadRequest { public: StateLoader(bool isH, Handle files): MainThreadRequest(MTP_LOADSTATE), isHierarchy(isH), fileNameList(files), errorResult(0), errNumber(0) { } virtual void Perform(void); bool LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail); bool isHierarchy; Handle fileNameList; const char *errorResult; // The fileName here is the last file loaded. As well as using it // to load the name can also be printed out at the end to identify the // particular file in the hierarchy that failed. AutoFree fileName; int errNumber; }; // Called by the main thread once all the ML threads have stopped. void StateLoader::Perform(void) { // Copy the first file name into the buffer. if (isHierarchy) { if (ML_Cons_Cell::IsNull(fileNameList->Word())) { errorResult = "Hierarchy list is empty"; return; } ML_Cons_Cell *p = DEREFLISTHANDLE(fileNameList); fileName = Poly_string_to_T_alloc(p->h); if (fileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return; } (void)LoadFile(true, 0, p->t); } else { fileName = Poly_string_to_T_alloc(fileNameList->Word()); if (fileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return; } (void)LoadFile(true, 0, TAGGED(0)); } } class ClearWeakByteRef: public ScanAddress { public: ClearWeakByteRef() {} virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; } virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord); }; // Set the values of external references and clear the values of other weak byte refs. void ClearWeakByteRef::ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord) { if (OBJ_IS_MUTABLE_OBJECT(lengthWord) && OBJ_IS_BYTE_OBJECT(lengthWord) && OBJ_IS_WEAKREF_OBJECT(lengthWord)) { POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord); if (len > 0) base->Set(0, PolyWord::FromSigned(0)); setEntryPoint(base); } } // This is copied from the B-tree in MemMgr. It probably should be // merged but will do for the moment. It's intended to reduce the // cost of finding the segment for relocation. class SpaceBTree { public: SpaceBTree(bool is, unsigned i = 0) : isLeaf(is), index(i) { } virtual ~SpaceBTree() {} bool isLeaf; unsigned index; // The index if this is a leaf }; // A non-leaf node in the B-tree class SpaceBTreeTree : public SpaceBTree { public: SpaceBTreeTree(); virtual ~SpaceBTreeTree(); SpaceBTree *tree[256]; }; SpaceBTreeTree::SpaceBTreeTree() : SpaceBTree(false) { for (unsigned i = 0; i < 256; i++) tree[i] = 0; } SpaceBTreeTree::~SpaceBTreeTree() { for (unsigned i = 0; i < 256; i++) delete(tree[i]); } // This class is used to relocate addresses in areas that have been loaded. class LoadRelocate: public ScanAddress { public: LoadRelocate(bool pcc = false): processCodeConstants(pcc), originalBaseAddr(0), descrs(0), targetAddresses(0), nDescrs(0), spaceTree(0) {} ~LoadRelocate(); void RelocateObject(PolyObject *p); virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(0); return base; } // Not used virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code); void RelocateAddressAt(PolyWord *pt); PolyObject *RelocateAddress(PolyObject *obj); void AddTreeRange(SpaceBTree **t, unsigned index, uintptr_t startS, uintptr_t endS); bool processCodeConstants; PolyWord *originalBaseAddr; SavedStateSegmentDescr *descrs; PolyWord **targetAddresses; unsigned nDescrs; SpaceBTree *spaceTree; intptr_t relativeOffset; }; LoadRelocate::~LoadRelocate() { if (descrs) delete[](descrs); if (targetAddresses) delete[](targetAddresses); delete(spaceTree); } // Add an entry to the space B-tree. void LoadRelocate::AddTreeRange(SpaceBTree **tt, unsigned index, uintptr_t startS, uintptr_t endS) { if (*tt == 0) *tt = new SpaceBTreeTree; ASSERT(!(*tt)->isLeaf); SpaceBTreeTree *t = (SpaceBTreeTree*)*tt; const unsigned shift = (sizeof(void*) - 1) * 8; // Takes the high-order byte uintptr_t r = startS >> shift; ASSERT(r < 256); const uintptr_t s = endS == 0 ? 256 : endS >> shift; ASSERT(s >= r && s <= 256); if (r == s) // Wholly within this entry AddTreeRange(&(t->tree[r]), index, startS << 8, endS << 8); else { // Deal with any remainder at the start. if ((r << shift) != startS) { AddTreeRange(&(t->tree[r]), index, startS << 8, 0 /*End of range*/); r++; } // Whole entries. while (r < s) { ASSERT(t->tree[r] == 0); t->tree[r] = new SpaceBTree(true, index); r++; } // Remainder at the end. if ((s << shift) != endS) AddTreeRange(&(t->tree[r]), index, 0, endS << 8); } } // Update the addresses in a group of words. void LoadRelocate::RelocateAddressAt(PolyWord *pt) { PolyWord val = *pt; if (! val.IsTagged()) - *pt = RelocateAddress(val.AsObjPtr(originalBaseAddr)); + *gMem.SpaceForAddress(pt)->writeAble(pt) = RelocateAddress(val.AsObjPtr(originalBaseAddr)); } PolyObject *LoadRelocate::RelocateAddress(PolyObject *obj) { // Which segment is this address in? // N.B. As with SpaceForAddress we need to subtract 1 to point to the length word. uintptr_t t = (uintptr_t)((PolyWord*)obj - 1); SpaceBTree *tr = spaceTree; // Each level of the tree is either a leaf or a vector of trees. unsigned j = sizeof(void *) * 8; for (;;) { if (tr == 0) break; if (tr->isLeaf) { // It's in this segment: relocate it to the current position. unsigned i = tr->index; SavedStateSegmentDescr *descr = &descrs[i]; PolyWord *newAddress = targetAddresses[descr->segmentIndex]; ASSERT((char*)obj > descr->originalAddress && (char*)obj <= (char*)descr->originalAddress + descr->segmentSize); ASSERT(newAddress != 0); byte *setAddress = (byte*)newAddress + ((char*)obj - (char*)descr->originalAddress); return (PolyObject*)setAddress; } j -= 8; tr = ((SpaceBTreeTree*)tr)->tree[(t >> j) & 0xff]; } // This should never happen. ASSERT(0); return 0; } // This is based on Exporter::relocateObject but does the reverse. // It attempts to adjust all the addresses in the object when it has // been read in. void LoadRelocate::RelocateObject(PolyObject *p) { if (p->IsByteObject()) { } else if (p->IsCodeObject()) { POLYUNSIGNED constCount; PolyWord *cp; ASSERT(! p->IsMutable() ); p->GetConstSegmentForCode(cp, constCount); /* Now the constant area. */ for (POLYUNSIGNED i = 0; i < constCount; i++) RelocateAddressAt(&(cp[i])); // Saved states and modules have relocation entries for constants in the code. // We can't use them when loading object files in 32-in-64 so have to process the // constants here. if (processCodeConstants) { POLYUNSIGNED length = p->Length(); machineDependent->ScanConstantsWithinCode(p, p, length, this); } } else if (p->IsClosureObject()) { // The first word is the address of the code. POLYUNSIGNED length = p->Length(); *(PolyObject**)p = RelocateAddress(*(PolyObject**)p); for (POLYUNSIGNED i = sizeof(PolyObject*)/sizeof(PolyWord); i < length; i++) RelocateAddressAt(p->Offset(i)); } else /* Ordinary objects, essentially tuples. */ { POLYUNSIGNED length = p->Length(); for (POLYUNSIGNED i = 0; i < length; i++) RelocateAddressAt(p->Offset(i)); } } // Update addresses as constants within the code. void LoadRelocate::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code) { PolyObject *p = GetConstantValue(addressOfConstant, code, originalBaseAddr); if (p != 0) { // Relative addresses are computed by adding the CURRENT address. // We have to convert them into addresses in original space before we // can relocate them. if (code == PROCESS_RELOC_I386RELATIVE) p = (PolyObject*)((PolyWord*)p + relativeOffset); PolyObject *newValue = RelocateAddress(p); SetConstantValue(addressOfConstant, newValue, code); } } // Load a saved state file. Calls itself to handle parent files. bool StateLoader::LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail) { LoadRelocate relocate; AutoFree thisFile(_tcsdup(fileName)); AutoClose loadFile(_tfopen(fileName, _T("rb"))); if ((FILE*)loadFile == NULL) { errorResult = "Cannot open load file"; errNumber = ERRORNUMBER; return false; } SavedStateHeader header; // Read the header and check the signature. if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1) { errorResult = "Unable to load header"; return false; } if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0) { errorResult = "File is not a saved state"; return false; } if (header.headerVersion != SAVEDSTATEVERSION || header.headerLength != sizeof(SavedStateHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { errorResult = "Unsupported version of saved state file"; return false; } // Check that we have the required stamp before loading any children. // If a parent has been overwritten we could get a loop. if (! isInitial && header.timeStamp != requiredStamp) { // Time-stamps don't match. errorResult = "The parent for this saved state does not match or has been changed"; return false; } // Have verified that this is a reasonable saved state file. If it isn't a // top-level file we have to load the parents first. if (header.parentNameEntry != 0) { if (isHierarchy) { // Take the file name from the list if (ML_Cons_Cell::IsNull(tail)) { errorResult = "Missing parent name in argument list"; return false; } ML_Cons_Cell *p = (ML_Cons_Cell *)tail.AsObjPtr(); fileName = Poly_string_to_T_alloc(p->h); if (fileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return false; } if (! LoadFile(false, header.parentTimeStamp, p->t)) return false; } else { size_t toRead = header.stringTableSize-header.parentNameEntry; size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR)); // Always allow space for null terminator size_t roundedBytes = (elems + 1) * sizeof(TCHAR); TCHAR *newFileName = (TCHAR *)realloc(fileName, roundedBytes); if (newFileName == NULL) { errorResult = "Insufficient memory"; errNumber = NOMEMORY; return false; } fileName = newFileName; if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ || fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 || fread(fileName, 1, toRead, loadFile) != toRead) { errorResult = "Unable to read parent file name"; return false; } fileName[elems] = 0; // Should already be null-terminated, but just in case. if (! LoadFile(false, header.parentTimeStamp, TAGGED(0))) return false; } ASSERT(hierarchyDepth > 0 && hierarchyTable[hierarchyDepth-1] != 0); } else // Top-level file { if (isHierarchy && ! ML_Cons_Cell::IsNull(tail)) { // There should be no further file names if this is really the top. errorResult = "Too many file names in the list"; return false; } if (header.parentTimeStamp != exportTimeStamp) { // Time-stamp does not match executable. errorResult = "Saved state was exported from a different executable or the executable has changed"; return false; } // Any existing spaces at this level or greater must be turned // into local spaces. We may have references from the stack to objects that // have previously been imported but otherwise these spaces are no longer // needed. gMem.DemoteImportSpaces(); // Clean out the hierarchy table. for (unsigned h = 0; h < hierarchyDepth; h++) { delete(hierarchyTable[h]); hierarchyTable[h] = 0; } hierarchyDepth = 0; } // Now have a valid, matching saved state. // Load the segment descriptors. relocate.nDescrs = header.segmentDescrCount; relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs]; relocate.originalBaseAddr = (PolyWord*)header.originalBaseAddr; if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 || fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs) { errorResult = "Unable to read segment descriptors"; return false; } { unsigned maxIndex = 0; for (unsigned i = 0; i < relocate.nDescrs; i++) { if (relocate.descrs[i].segmentIndex > maxIndex) maxIndex = relocate.descrs[i].segmentIndex; relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress, (uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize-1)); } relocate.targetAddresses = new PolyWord*[maxIndex+1]; for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0; } // Read in and create the new segments first. If we have problems, // in particular if we have run out of memory, then it's easier to recover. for (unsigned i = 0; i < relocate.nDescrs; i++) { SavedStateSegmentDescr *descr = &relocate.descrs[i]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); if (space != NULL) relocate.targetAddresses[descr->segmentIndex] = space->bottom; if (descr->segmentData == 0) { // No data - just an entry in the index. if (space == NULL/* || descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/) { errorResult = "Mismatch for existing memory space"; return false; } } else if ((descr->segmentFlags & SSF_OVERWRITE) == 0) { // New segment. if (space != NULL) { errorResult = "Segment already exists"; return false; } // Allocate memory for the new segment. unsigned mFlags = (descr->segmentFlags & SSF_WRITABLE ? MTF_WRITEABLE : 0) | (descr->segmentFlags & SSF_NOOVERWRITE ? MTF_NO_OVERWRITE : 0) | (descr->segmentFlags & SSF_BYTES ? MTF_BYTES : 0) | (descr->segmentFlags & SSF_CODE ? MTF_EXECUTABLE : 0); PermanentMemSpace *newSpace = gMem.AllocateNewPermanentSpace(descr->segmentSize, mFlags, descr->segmentIndex, hierarchyDepth + 1); if (newSpace == 0) { errorResult = "Unable to allocate memory"; return false; } PolyWord *mem = newSpace->bottom; + PolyWord* writeAble = newSpace->writeAble(mem); if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 || - fread(mem, descr->segmentSize, 1, loadFile) != 1) + fread(writeAble, descr->segmentSize, 1, loadFile) != 1) { errorResult = "Unable to read segment"; return false; } // Fill unused space to the top of the area. - gMem.FillUnusedSpace(mem+descr->segmentSize/sizeof(PolyWord), + gMem.FillUnusedSpace(writeAble +descr->segmentSize/sizeof(PolyWord), newSpace->spaceSize() - descr->segmentSize/sizeof(PolyWord)); // Leave it writable until we've done the relocations. relocate.targetAddresses[descr->segmentIndex] = mem; if (newSpace->isMutable && newSpace->byteOnly) { ClearWeakByteRef cwbr; cwbr.ScanAddressesInRegion(newSpace->bottom, newSpace->topPointer); } } } // Now read in the mutable overwrites and relocate. for (unsigned j = 0; j < relocate.nDescrs; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); ASSERT(space != NULL); // We should have created it. if (descr->segmentFlags & SSF_OVERWRITE) { if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 || fread(space->bottom, descr->segmentSize, 1, loadFile) != 1) { errorResult = "Unable to read segment"; return false; } } // Relocation. if (descr->segmentData != 0) { // Adjust the addresses in the loaded segment. for (PolyWord *p = space->bottom; p < space->top; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); relocate.RelocateObject(obj); p += length; } } // Process explicit relocations. // If we get errors just skip the error and continue rather than leave // everything in an unstable state. if (descr->relocations) { if (fseek(loadFile, descr->relocations, SEEK_SET) != 0) { errorResult = "Unable to read relocation segment"; return false; } for (unsigned k = 0; k < descr->relocationCount; k++) { RelocationEntry reloc; if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1) { errorResult = "Unable to read relocation segment"; return false; } MemSpace *toSpace = gMem.SpaceForIndex(reloc.targetSegment); if (toSpace == NULL) { errorResult = "Unknown space reference in relocation"; continue; } byte *setAddress = (byte*)space->bottom + reloc.relocAddress; byte *targetAddress = (byte*)toSpace->bottom + reloc.targetAddress; if (setAddress >= (byte*)space->top || targetAddress >= (byte*)toSpace->top) { errorResult = "Bad relocation"; continue; } ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind); } } } // Set the final permissions. for (unsigned j = 0; j < relocate.nDescrs; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; if (descr->segmentData != 0) { PermanentMemSpace* space = gMem.SpaceForIndex(descr->segmentIndex); gMem.CompletePermanentSpaceAllocation(space); } } // Add an entry to the hierarchy table for this file. if (! AddHierarchyEntry(thisFile, header.timeStamp)) return false; return true; // Succeeded } static void LoadState(TaskData *taskData, bool isHierarchy, Handle hFileList) // Load a saved state or a hierarchy. // hFileList is a list if this is a hierarchy and a single name if it is not. { StateLoader loader(isHierarchy, hFileList); // Request the main thread to do the load. This may set the error string if it failed. processes->MakeRootRequest(taskData, &loader); if (loader.errorResult != 0) { if (loader.errNumber == 0) raise_fail(taskData, loader.errorResult); else { AutoFree buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1)); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "%s: %S", loader.errorResult, (TCHAR *)loader.fileName); #else sprintf(buff, "%s: %s", loader.errorResult, (TCHAR *)loader.fileName); #endif raise_syscall(taskData, buff, loader.errNumber); } } } // Load a saved state file and any ancestors. POLYUNSIGNED PolyLoadState(PolyObject *threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); try { LoadState(taskData, false, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Load hierarchy. This provides a complete list of children and parents. POLYUNSIGNED PolyLoadHierarchy(PolyObject *threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); try { LoadState(taskData, true, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } /* * Additional functions to provide information or change saved-state files. */ // These functions do not affect the global state so can be executed by // the ML threads directly. static Handle ShowHierarchy(TaskData *taskData) // Return the list of files in the hierarchy. { Handle saved = taskData->saveVec.mark(); Handle list = SAVE(ListNull); // Process this in reverse order. for (unsigned i = hierarchyDepth; i > 0; i--) { Handle value = SAVE(C_string_to_Poly(taskData, hierarchyTable[i-1]->fileName)); Handle next = alloc_and_save(taskData, sizeof(ML_Cons_Cell)/sizeof(PolyWord)); DEREFLISTHANDLE(next)->h = value->Word(); DEREFLISTHANDLE(next)->t = list->Word(); taskData->saveVec.reset(saved); list = SAVE(next->Word()); } return list; } // Show the hierarchy. POLYUNSIGNED PolyShowHierarchy(PolyObject *threadId) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle result = 0; try { result = ShowHierarchy(taskData); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } static void RenameParent(TaskData *taskData, PolyWord childName, PolyWord parentName) // Change the name of the immediate parent stored in a child { // The name of the file to modify. AutoFree fileNameBuff(Poly_string_to_T_alloc(childName)); if (fileNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); // The new parent name to insert. AutoFree parentNameBuff(Poly_string_to_T_alloc(parentName)); if (parentNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); AutoClose loadFile(_tfopen(fileNameBuff, _T("r+b"))); // Open for reading and writing if ((FILE*)loadFile == NULL) { AutoFree buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1)); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff); #else sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff); #endif raise_syscall(taskData, buff, ERRORNUMBER); } SavedStateHeader header; // Read the header and check the signature. if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1) raise_fail(taskData, "Unable to load header"); if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0) raise_fail(taskData, "File is not a saved state"); if (header.headerVersion != SAVEDSTATEVERSION || header.headerLength != sizeof(SavedStateHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { raise_fail(taskData, "Unsupported version of saved state file"); } // Does this actually have a parent? if (header.parentNameEntry == 0) raise_fail(taskData, "File does not have a parent"); // At the moment the only entry in the string table is the parent // name so we can simply write a new one on the end of the file. // This makes the file grow slightly each time but it shouldn't be // significant. fseek(loadFile, 0, SEEK_END); header.stringTable = ftell(loadFile); // Remember where this is _fputtc(0, loadFile); // First byte of string table is zero _fputts(parentNameBuff, loadFile); _fputtc(0, loadFile); // A terminating null. header.stringTableSize = (_tcslen(parentNameBuff) + 2)*sizeof(TCHAR); // Now rewind and write the header with the revised string table. fseek(loadFile, 0, SEEK_SET); fwrite(&header, sizeof(header), 1, loadFile); } POLYUNSIGNED PolyRenameParent(PolyObject *threadId, PolyWord childName, PolyWord parentName) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); try { RenameParent(taskData, childName, parentName); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } static Handle ShowParent(TaskData *taskData, Handle hFileName) // Return the name of the immediate parent stored in a child { AutoFree fileNameBuff(Poly_string_to_T_alloc(hFileName->Word())); if (fileNameBuff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); AutoClose loadFile(_tfopen(fileNameBuff, _T("rb"))); if ((FILE*)loadFile == NULL) { AutoFree buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1)); if (buff == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff); #else sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff); #endif raise_syscall(taskData, buff, ERRORNUMBER); } SavedStateHeader header; // Read the header and check the signature. if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1) raise_fail(taskData, "Unable to load header"); if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0) raise_fail(taskData, "File is not a saved state"); if (header.headerVersion != SAVEDSTATEVERSION || header.headerLength != sizeof(SavedStateHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { raise_fail(taskData, "Unsupported version of saved state file"); } // Does this have a parent? if (header.parentNameEntry != 0) { size_t toRead = header.stringTableSize-header.parentNameEntry; size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR)); // Always allow space for null terminator size_t roundedBytes = (elems + 1) * sizeof(TCHAR); AutoFree parentFileName((TCHAR *)malloc(roundedBytes)); if (parentFileName == NULL) raise_syscall(taskData, "Insufficient memory", NOMEMORY); if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ || fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 || fread(parentFileName, 1, toRead, loadFile) != toRead) { raise_fail(taskData, "Unable to read parent file name"); } parentFileName[elems] = 0; // Should already be null-terminated, but just in case. // Convert the name into a Poly string and then build a "Some" value. // It's possible, although silly, to have the empty string as a parent name. Handle resVal = SAVE(C_string_to_Poly(taskData, parentFileName)); Handle result = alloc_and_save(taskData, 1); DEREFHANDLE(result)->Set(0, resVal->Word()); return result; } else return SAVE(NONE_VALUE); } // Return the name of the immediate parent stored in a child POLYUNSIGNED PolyShowParent(PolyObject *threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); Handle result = 0; try { result = ShowParent(taskData, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } // Module system #define MODULESIGNATURE "POLYMODU" #define MODULEVERSION 2 typedef struct _moduleHeader { // These entries are primarily to check that we have a valid // saved state file before we try to interpret anything else. char headerSignature[8]; // Should contain MODULESIGNATURE unsigned headerVersion; // Should contain MODULEVERSION unsigned headerLength; // Number of bytes in the header unsigned segmentDescrLength; // Number of bytes in a descriptor // These entries contain the real data. off_t segmentDescr; // Position of segment descriptor table unsigned segmentDescrCount; // Number of segment descriptors in the table time_t timeStamp; // The time stamp for this file. time_t executableTimeStamp; // The time stamp for the parent executable. // Root uintptr_t rootSegment; POLYUNSIGNED rootOffset; } ModuleHeader; // Store a module class ModuleStorer: public MainThreadRequest { public: ModuleStorer(const TCHAR *file, Handle r): MainThreadRequest(MTP_STOREMODULE), fileName(file), root(r), errorMessage(0), errCode(0) {} virtual void Perform(); const TCHAR *fileName; Handle root; const char *errorMessage; int errCode; }; class ModuleExport: public SaveStateExport { public: ModuleExport(): SaveStateExport(1/* Everything EXCEPT the executable. */) {} virtual void exportStore(void); // Write the data out. }; void ModuleStorer::Perform() { ModuleExport exporter; #if (defined(_WIN32) && defined(UNICODE)) exporter.exportFile = _wfopen(fileName, L"wb"); #else exporter.exportFile = fopen(fileName, "wb"); #endif if (exporter.exportFile == NULL) { errorMessage = "Cannot open export file"; errCode = ERRORNUMBER; return; } // RunExport copies everything reachable from the root, except data from // the executable because we've set the hierarchy to 1, using CopyScan. // It builds the tables in the export data structure then calls exportStore // to actually write the data. if (! root->Word().IsDataPtr()) { // If we have a completely empty module the list may be null. // This needs to be dealt with at a higher level. errorMessage = "Module root is not an address"; return; } exporter.RunExport(root->WordP()); errorMessage = exporter.errorMessage; // This will be null unless there's been an error. } void ModuleExport::exportStore(void) { // What we need to do here is implement the export in a similar way to e.g. PECOFFExport::exportStore // This is copied from SaveRequest::Perform and should be common code. ModuleHeader modHeader; memset(&modHeader, 0, sizeof(modHeader)); modHeader.headerLength = sizeof(modHeader); memcpy(modHeader.headerSignature, MODULESIGNATURE, sizeof(modHeader.headerSignature)); modHeader.headerVersion = MODULEVERSION; modHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr); modHeader.executableTimeStamp = exportTimeStamp; { unsigned rootArea = findArea(this->rootFunction); struct _memTableEntry *mt = &memTable[rootArea]; modHeader.rootSegment = mt->mtIndex; modHeader.rootOffset = (POLYUNSIGNED)((char*)this->rootFunction - (char*)mt->mtOriginalAddr); } modHeader.timeStamp = getBuildTime(); modHeader.segmentDescrCount = this->memTableEntries; // One segment for each space. // Write out the header. fwrite(&modHeader, sizeof(modHeader), 1, this->exportFile); SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [this->memTableEntries]; // We need an entry in the descriptor tables for each segment in the executable because // we may have relocations that refer to addresses in it. for (unsigned j = 0; j < this->memTableEntries; j++) { SavedStateSegmentDescr *thisDescr = &descrs[j]; memoryTableEntry *entry = &this->memTable[j]; memset(thisDescr, 0, sizeof(SavedStateSegmentDescr)); thisDescr->relocationSize = sizeof(RelocationEntry); thisDescr->segmentIndex = (unsigned)entry->mtIndex; thisDescr->segmentSize = entry->mtLength; // Set this even if we don't write it. thisDescr->originalAddress = entry->mtOriginalAddr; if (entry->mtFlags & MTF_WRITEABLE) { thisDescr->segmentFlags |= SSF_WRITABLE; if (entry->mtFlags & MTF_NO_OVERWRITE) thisDescr->segmentFlags |= SSF_NOOVERWRITE; if ((entry->mtFlags & MTF_NO_OVERWRITE) == 0) thisDescr->segmentFlags |= SSF_OVERWRITE; if (entry->mtFlags & MTF_BYTES) thisDescr->segmentFlags |= SSF_BYTES; } if (entry->mtFlags & MTF_EXECUTABLE) thisDescr->segmentFlags |= SSF_CODE; } // Write out temporarily. Will be overwritten at the end. modHeader.segmentDescr = ftell(this->exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, this->exportFile); // Write out the relocations and the data. for (unsigned k = 0; k < this->memTableEntries; k++) { SavedStateSegmentDescr *thisDescr = &descrs[k]; memoryTableEntry *entry = &this->memTable[k]; if (k >= newAreas) // Not permanent areas { thisDescr->relocations = ftell(this->exportFile); // Have to write this out. this->relocationCount = 0; // Create the relocation table. char *start = (char*)entry->mtOriginalAddr; char *end = start + entry->mtLength; for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; ) { p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); // For saved states we don't include explicit relocations except // in code but it's easier if we do for modules. if (length != 0 && obj->IsCodeObject()) machineDependent->ScanConstantsWithinCode(obj, this); relocateObject(obj); p += length; } thisDescr->relocationCount = this->relocationCount; // Write out the data. thisDescr->segmentData = ftell(exportFile); fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exportFile); } } // Rewrite the header and the segment tables now they're complete. fseek(exportFile, 0, SEEK_SET); fwrite(&modHeader, sizeof(modHeader), 1, exportFile); fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, exportFile); delete[](descrs); fclose(exportFile); exportFile = NULL; } // Store a module POLYUNSIGNED PolyStoreModule(PolyObject *threadId, PolyWord name, PolyWord contents) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedContents = taskData->saveVec.push(contents); try { TempString fileName(name); ModuleStorer storer(fileName, pushedContents); processes->MakeRootRequest(taskData, &storer); if (storer.errorMessage) raise_syscall(taskData, storer.errorMessage, storer.errCode); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } // Load a module. class ModuleLoader: public MainThreadRequest { public: ModuleLoader(TaskData *taskData, const TCHAR *file): MainThreadRequest(MTP_LOADMODULE), callerTaskData(taskData), fileName(file), errorResult(NULL), errNumber(0), rootHandle(0) {} virtual void Perform(); TaskData *callerTaskData; const TCHAR *fileName; const char *errorResult; int errNumber; Handle rootHandle; }; void ModuleLoader::Perform() { AutoClose loadFile(_tfopen(fileName, _T("rb"))); if ((FILE*)loadFile == NULL) { errorResult = "Cannot open load file"; errNumber = ERRORNUMBER; return; } ModuleHeader header; // Read the header and check the signature. if (fread(&header, sizeof(ModuleHeader), 1, loadFile) != 1) { errorResult = "Unable to load header"; return; } if (strncmp(header.headerSignature, MODULESIGNATURE, sizeof(header.headerSignature)) != 0) { errorResult = "File is not a Poly/ML module"; return; } if (header.headerVersion != MODULEVERSION || header.headerLength != sizeof(ModuleHeader) || header.segmentDescrLength != sizeof(SavedStateSegmentDescr)) { errorResult = "Unsupported version of module file"; return; } if (header.executableTimeStamp != exportTimeStamp) { // Time-stamp does not match executable. errorResult = "Module was exported from a different executable or the executable has changed"; return; } LoadRelocate relocate; relocate.nDescrs = header.segmentDescrCount; relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs]; if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 || fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs) { errorResult = "Unable to read segment descriptors"; return; } { unsigned maxIndex = 0; for (unsigned i = 0; i < relocate.nDescrs; i++) if (relocate.descrs[i].segmentIndex > maxIndex) maxIndex = relocate.descrs[i].segmentIndex; relocate.targetAddresses = new PolyWord*[maxIndex+1]; for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0; } // Read in and create the new segments first. If we have problems, // in particular if we have run out of memory, then it's easier to recover. for (unsigned i = 0; i < relocate.nDescrs; i++) { SavedStateSegmentDescr *descr = &relocate.descrs[i]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); if (descr->segmentData == 0) { // No data - just an entry in the index. if (space == NULL/* || descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/) { errorResult = "Mismatch for existing memory space"; return; } else relocate.targetAddresses[descr->segmentIndex] = space->bottom; } else { // New segment. if (space != NULL) { errorResult = "Segment already exists"; return; } // Allocate memory for the new segment. size_t actualSize = descr->segmentSize; MemSpace *space; if (descr->segmentFlags & SSF_CODE) { CodeSpace *cSpace = gMem.NewCodeSpace(actualSize); if (cSpace == 0) { errorResult = "Unable to allocate memory"; return; } space = cSpace; cSpace->firstFree = (PolyWord*)((byte*)space->bottom + descr->segmentSize); if (cSpace->firstFree != cSpace->top) gMem.FillUnusedSpace(cSpace->firstFree, cSpace->top - cSpace->firstFree); } else { LocalMemSpace *lSpace = gMem.NewLocalSpace(actualSize, descr->segmentFlags & SSF_WRITABLE); if (lSpace == 0) { errorResult = "Unable to allocate memory"; return; } space = lSpace; lSpace->lowerAllocPtr = (PolyWord*)((byte*)lSpace->bottom + descr->segmentSize); } if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 || fread(space->bottom, descr->segmentSize, 1, loadFile) != 1) { errorResult = "Unable to read segment"; return; } relocate.targetAddresses[descr->segmentIndex] = space->bottom; if (space->isMutable && (descr->segmentFlags & SSF_BYTES) != 0) { ClearWeakByteRef cwbr; cwbr.ScanAddressesInRegion(space->bottom, (PolyWord*)((byte*)space->bottom + descr->segmentSize)); } } } // Now deal with relocation. for (unsigned j = 0; j < relocate.nDescrs; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; PolyWord *baseAddr = relocate.targetAddresses[descr->segmentIndex]; ASSERT(baseAddr != NULL); // We should have created it. // Process explicit relocations. // If we get errors just skip the error and continue rather than leave // everything in an unstable state. if (descr->relocations) { if (fseek(loadFile, descr->relocations, SEEK_SET) != 0) errorResult = "Unable to read relocation segment"; for (unsigned k = 0; k < descr->relocationCount; k++) { RelocationEntry reloc; if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1) errorResult = "Unable to read relocation segment"; byte *setAddress = (byte*)baseAddr + reloc.relocAddress; byte *targetAddress = (byte*)relocate.targetAddresses[reloc.targetSegment] + reloc.targetAddress; ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind); } } } // Get the root address. Push this to the caller's save vec. If we put the // newly created areas into local memory we could get a GC as soon as we // complete this root request. { PolyWord *baseAddr = relocate.targetAddresses[header.rootSegment]; rootHandle = callerTaskData->saveVec.push((PolyObject*)((byte*)baseAddr + header.rootOffset)); } } static Handle LoadModule(TaskData *taskData, Handle args) { TempString fileName(args->Word()); ModuleLoader loader(taskData, fileName); processes->MakeRootRequest(taskData, &loader); if (loader.errorResult != 0) { if (loader.errNumber == 0) raise_fail(taskData, loader.errorResult); else { AutoFree buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1)); #if (defined(_WIN32) && defined(UNICODE)) sprintf(buff, "%s: %S", loader.errorResult, loader.fileName); #else sprintf(buff, "%s: %s", loader.errorResult, loader.fileName); #endif raise_syscall(taskData, buff, loader.errNumber); } } return loader.rootHandle; } // Load a module POLYUNSIGNED PolyLoadModule(PolyObject *threadId, PolyWord arg) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle pushedArg = taskData->saveVec.push(arg); Handle result = 0; try { result = LoadModule(taskData, pushedArg); } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } PolyObject *InitHeaderFromExport(struct _exportDescription *exports) { // Check the structure sizes stored in the export structure match the versions // used in this library. if (exports->structLength != sizeof(exportDescription) || exports->memTableSize != sizeof(memoryTableEntry) || exports->rtsVersion < FIRST_supported_version || exports->rtsVersion > LAST_supported_version) { #if (FIRST_supported_version == LAST_supported_version) Exit("The exported object file has version %0.2f but this library supports %0.2f", ((float)exports->rtsVersion) / 100.0, ((float)FIRST_supported_version) / 100.0); #else Exit("The exported object file has version %0.2f but this library supports %0.2f-%0.2f", ((float)exports->rtsVersion) / 100.0, ((float)FIRST_supported_version) / 100.0, ((float)LAST_supported_version) / 100.0); #endif } // We could also check the RTS version and the architecture. exportTimeStamp = exports->timeStamp; // Needed for load and save. memoryTableEntry *memTable = exports->memTable; #ifdef POLYML32IN64 // We need to copy this into the heap before beginning execution. // This is very like loading a saved state and the code should probably // be merged. LoadRelocate relocate(true); relocate.nDescrs = exports->memTableEntries; relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs]; relocate.targetAddresses = new PolyWord*[exports->memTableEntries]; relocate.originalBaseAddr = (PolyWord*)exports->originalBaseAddr; PolyObject *root = 0; for (unsigned i = 0; i < exports->memTableEntries; i++) { relocate.descrs[i].segmentIndex = memTable[i].mtIndex; relocate.descrs[i].originalAddress = memTable[i].mtOriginalAddr; relocate.descrs[i].segmentSize = memTable[i].mtLength; PermanentMemSpace *newSpace = gMem.AllocateNewPermanentSpace(memTable[i].mtLength, (unsigned)memTable[i].mtFlags, (unsigned)memTable[i].mtIndex); if (newSpace == 0) Exit("Unable to initialise a permanent memory space"); PolyWord *mem = newSpace->bottom; - memcpy(mem, memTable[i].mtCurrentAddr, memTable[i].mtLength); - gMem.FillUnusedSpace(mem + memTable[i].mtLength / sizeof(PolyWord), + memcpy(newSpace->writeAble(mem), memTable[i].mtCurrentAddr, memTable[i].mtLength); + PolyWord* unused = mem + memTable[i].mtLength / sizeof(PolyWord); + gMem.FillUnusedSpace(newSpace->writeAble(unused), newSpace->spaceSize() - memTable[i].mtLength / sizeof(PolyWord)); if (newSpace == 0) Exit("Unable to initialise a permanent memory space"); relocate.targetAddresses[i] = mem; relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress, (uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize - 1)); // Relocate the root function. if (exports->rootFunction >= memTable[i].mtCurrentAddr && exports->rootFunction < (char*)memTable[i].mtCurrentAddr + memTable[i].mtLength) { root = (PolyObject*)((char*)mem + ((char*)exports->rootFunction - (char*)memTable[i].mtCurrentAddr)); } } // Now relocate the addresses for (unsigned j = 0; j < exports->memTableEntries; j++) { SavedStateSegmentDescr *descr = &relocate.descrs[j]; MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex); // Any relative addresses have to be corrected by adding this. relocate.relativeOffset = (PolyWord*)descr->originalAddress - space->bottom; for (PolyWord *p = space->bottom; p < space->top; ) { #ifdef POLYML32IN64 if ((((uintptr_t)p) & 4) == 0) { // Skip any padding. The length word should be on an odd-word boundary. p++; continue; } #endif p++; PolyObject *obj = (PolyObject*)p; POLYUNSIGNED length = obj->Length(); relocate.RelocateObject(obj); p += length; } } // Set the final permissions. for (unsigned j = 0; j < exports->memTableEntries; j++) { PermanentMemSpace *space = gMem.SpaceForIndex(memTable[j].mtIndex); gMem.CompletePermanentSpaceAllocation(space); } return root; #else for (unsigned i = 0; i < exports->memTableEntries; i++) { // Construct a new space for each of the entries. if (gMem.NewPermanentSpace( (PolyWord*)memTable[i].mtCurrentAddr, memTable[i].mtLength / sizeof(PolyWord), (unsigned)memTable[i].mtFlags, (unsigned)memTable[i].mtIndex) == 0) Exit("Unable to initialise a permanent memory space"); } return (PolyObject *)exports->rootFunction; #endif } // Return the system directory for modules. This is configured differently // in Unix and in Windows. POLYUNSIGNED PolyGetModuleDirectory(PolyObject *threadId) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); Handle result = 0; try { #if (defined(MODULEDIR)) result = SAVE(C_string_to_Poly(taskData, MODULEDIR)); #elif (defined(_WIN32)) { // This registry key is configured when Poly/ML is installed using the installer. // It gives the path to the Poly/ML installation directory. We return the // Modules subdirectory. HKEY hk; if (RegOpenKeyEx(HKEY_LOCAL_MACHINE, _T("SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\PolyML.exe"), 0, KEY_QUERY_VALUE, &hk) == ERROR_SUCCESS) { DWORD valSize; if (RegQueryValueEx(hk, _T("Path"), 0, NULL, NULL, &valSize) == ERROR_SUCCESS) { #define MODULEDIR _T("Modules") TempString buff((TCHAR*)malloc(valSize + (_tcslen(MODULEDIR) + 1) * sizeof(TCHAR))); DWORD dwType; if (RegQueryValueEx(hk, _T("Path"), 0, &dwType, (LPBYTE)(LPTSTR)buff, &valSize) == ERROR_SUCCESS) { // The registry entry should end with a backslash. _tcscat(buff, MODULEDIR); result = SAVE(C_string_to_Poly(taskData, buff)); } } RegCloseKey(hk); } result = SAVE(C_string_to_Poly(taskData, "")); } #else result = SAVE(C_string_to_Poly(taskData, "")); #endif } catch (...) {} // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); if (result == 0) return TAGGED(0).AsUnsigned(); else return result->Word().AsUnsigned(); } struct _entrypts savestateEPT[] = { { "PolySaveState", (polyRTSFunction)&PolySaveState }, { "PolyLoadState", (polyRTSFunction)&PolyLoadState }, { "PolyShowHierarchy", (polyRTSFunction)&PolyShowHierarchy }, { "PolyRenameParent", (polyRTSFunction)&PolyRenameParent }, { "PolyShowParent", (polyRTSFunction)&PolyShowParent }, { "PolyStoreModule", (polyRTSFunction)&PolyStoreModule }, { "PolyLoadModule", (polyRTSFunction)&PolyLoadModule }, { "PolyLoadHierarchy", (polyRTSFunction)&PolyLoadHierarchy }, { "PolyGetModuleDirectory", (polyRTSFunction)&PolyGetModuleDirectory }, { NULL, NULL } // End of list. }; diff --git a/libpolyml/scanaddrs.cpp b/libpolyml/scanaddrs.cpp index 479a6f88..c9d342d7 100644 --- a/libpolyml/scanaddrs.cpp +++ b/libpolyml/scanaddrs.cpp @@ -1,496 +1,499 @@ /* Title: Address scanner Copyright (c) 2006-8, 2012, 2019 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #include #include "globals.h" #include "scanaddrs.h" #include "machine_dep.h" #include "diagnostics.h" #include "memmgr.h" // Process the value at a given location and update it as necessary. POLYUNSIGNED ScanAddress::ScanAddressAt(PolyWord *pt) { PolyWord val = *pt; PolyWord newVal = val; if (IS_INT(val) || val == PolyWord::FromUnsigned(0)) { // We can get zeros in the constant area if we garbage collect // while compiling some code. */ } else { ASSERT(OBJ_IS_DATAPTR(val)); // Any sort of address newVal = ScanObjectAddress(val.AsObjPtr()); } if (newVal != val) // Only update if we need to. *pt = newVal; return 0; } // General purpose object processor, Processes all the addresses in an object. // Handles the various kinds of object that may contain addresses. void ScanAddress::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) { do { ASSERT (OBJ_IS_LENGTH(lengthWord)); if (OBJ_IS_BYTE_OBJECT(lengthWord)) return; /* Nothing more to do */ POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord); PolyWord *baseAddr = (PolyWord*)obj; if (OBJ_IS_CODE_OBJECT(lengthWord)) { // Scan constants within the code. machineDependent->ScanConstantsWithinCode(obj, obj, length, this); // Skip to the constants and get ready to scan them. obj->GetConstSegmentForCode(length, baseAddr, length); - + // Adjust to the read-write area if necessary. + baseAddr = gMem.SpaceForAddress(baseAddr)->writeAble(baseAddr); } else if (OBJ_IS_CLOSURE_OBJECT(lengthWord)) { // The first word is a code pointer so we need to treat it specially // but it is possible it hasn't yet been set. if ((*(uintptr_t*)baseAddr & 1) == 0) { POLYUNSIGNED lengthWord = ScanCodeAddressAt((PolyObject**)baseAddr); // N.B. This could side-effect *baseAddr if (lengthWord != 0) ScanAddressesInObject(*(PolyObject**)baseAddr, lengthWord); } baseAddr += sizeof(PolyObject*) / sizeof(PolyWord); length -= sizeof(PolyObject*) / sizeof(PolyWord); } PolyWord *endWord = baseAddr + length; // We want to minimise the actual recursion we perform so we try to // use tail recursion if we can. We first scan from the end and // remove any words that don't need recursion. POLYUNSIGNED lastLengthWord = 0; while (endWord != baseAddr) { PolyWord wordAt = endWord[-1]; if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0)) endWord--; // Don't need to look at this. else if ((lastLengthWord = ScanAddressAt(endWord-1)) != 0) // We need to process this one break; else endWord--; // We're not interested in this. } if (endWord == baseAddr) return; // We've done everything. // There is at least one word that needs to be processed, the // one at endWord-1. // Now process from the beginning forward to see if there are // any words before this that need to be handled. This way we are more // likely to handle the head of a list by recursion and the // tail by looping (tail recursion). while (baseAddr < endWord-1) { PolyWord wordAt = *baseAddr; if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0)) baseAddr++; // Don't need to look at this. else { POLYUNSIGNED lengthWord = ScanAddressAt(baseAddr); if (lengthWord != 0) { wordAt = *baseAddr; // Reload because it may have been side-effected // We really have to process this recursively. ASSERT(wordAt.IsDataPtr()); ScanAddressesInObject(wordAt.AsObjPtr(), lengthWord); baseAddr++; } else baseAddr++; } } // Finally process the last word we found that has to be processed. // Do this by looping rather than recursion. PolyWord wordAt = *baseAddr; // Last word to do. // This must be an address ASSERT(wordAt.IsDataPtr()); obj = wordAt.AsObjPtr(); lengthWord = lastLengthWord; } while(1); } void ScanAddress::ScanAddressesInRegion(PolyWord *region, PolyWord *end) { PolyWord *pt = region; while (pt < end) { #ifdef POLYML32IN64 if ((((uintptr_t)pt) & 4) == 0) { // Skip any padding. The length word should be on an odd-word boundary. pt++; continue; } #endif pt++; // Skip length word. // pt actually points AT the object here. PolyObject *obj = (PolyObject*)pt; if (obj->ContainsForwardingPtr()) /* skip over moved object */ { // We can now get multiple forwarding pointers as a result // of applying ShareData repeatedly. Perhaps we should // turn the forwarding pointers back into normal words in // an extra pass. obj = obj->FollowForwardingChain(); ASSERT(obj->ContainsNormalLengthWord()); pt += obj->Length(); } else { ASSERT(obj->ContainsNormalLengthWord()); POLYUNSIGNED length = obj->Length(); if (pt+length > end) Crash("Malformed object at %p - length %lu\n", pt, length); if (length != 0) ScanAddressesInObject(obj); pt += length; } } } // Extract a constant from the code. PolyObject *ScanAddress::GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base) { switch (code) { case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target { POLYUNSIGNED valu; unsigned i; byte *pt = addressOfConstant; if (pt[3] & 0x80) valu = 0-1; else valu = 0; for (i = sizeof(PolyWord); i > 0; i--) valu = (valu << 8) | pt[i-1]; if (valu == 0 || PolyWord::FromUnsigned(valu).IsTagged()) return 0; else return PolyWord::FromUnsigned(valu).AsObjPtr(base); } case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address { POLYSIGNED disp; byte *pt = addressOfConstant; // Get the displacement. This is signed. if (pt[3] & 0x80) disp = -1; else disp = 0; // Set the sign just in case. for(unsigned i = 4; i > 0; i--) disp = (disp << 8) | pt[i-1]; byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant return (PolyObject*)absAddr; } default: ASSERT(false); return 0; } } // Store a constant value. Also used with a patch table when importing a saved heap which has // been exported using the C exporter. void ScanAddress::SetConstantValue(byte *addressOfConstant, PolyObject *p, ScanRelocationKind code) { + MemSpace* space = gMem.SpaceForAddress(addressOfConstant); + byte* addressToWrite = space->writeAble(addressOfConstant); switch (code) { case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target { POLYUNSIGNED valu = ((PolyWord)p).AsUnsigned(); for (unsigned i = 0; i < sizeof(PolyWord); i++) { - addressOfConstant[i] = (byte)(valu & 255); + addressToWrite[i] = (byte)(valu & 255); valu >>= 8; } } break; case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address { // This offset may be positive or negative intptr_t newDisp = (byte*)p - addressOfConstant - 4; #if (SIZEOF_VOIDP != 4) ASSERT(newDisp < (intptr_t)0x80000000 && newDisp >= -(intptr_t)0x80000000); #endif for (unsigned i = 0; i < 4; i++) { - addressOfConstant[i] = (byte)(newDisp & 0xff); + addressToWrite[i] = (byte)(newDisp & 0xff); newDisp >>= 8; } } break; } } void ScanAddress::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code) { PolyObject *p = GetConstantValue(addressOfConstant, code); if (p != 0) { PolyObject *oldValue = p; // If this was a relative address we must have a code address. if (code == PROCESS_RELOC_I386RELATIVE) ScanCodeAddressAt(&p); else p = ScanObjectAddress(p); if (p != oldValue) // Update it if it has changed. SetConstantValue(addressOfConstant, p, code); } } void ScanAddress::ScanRuntimeWord(PolyWord *w) { if (w->IsTagged()) {} // Don't need to do anything else { ASSERT(w->IsDataPtr()); *w = ScanObjectAddress(w->AsObjPtr()); } } // This gets called in two circumstances. It may be called for the roots // in which case the stack will be empty and we want to process it completely // or it is called for a constant address in which case it will have been // called from RecursiveScan::ScanAddressesInObject and that can process // any addresses. PolyObject *RecursiveScan::ScanObjectAddress(PolyObject *obj) { PolyWord pWord = obj; // Test to see if this needs to be scanned. // It may update the word. bool test = TestForScan(&pWord); obj = pWord.AsObjPtr(); if (test) { MarkAsScanning(obj); if (obj->IsByteObject()) Completed(obj); // Don't need to put it on the stack // If we already have something on the stack we must being called // recursively to process a constant in a code segment. Just push // it on the stack and let the caller deal with it. else if (StackIsEmpty()) RecursiveScan::ScanAddressesInObject(obj, obj->LengthWord()); else PushToStack(obj, (PolyWord*)obj); } return obj; } // This is called via ScanAddressesInRegion to process the permanent mutables. It is // also called from ScanObjectAddress to process root addresses. // It processes all the addresses reachable from the object. // This is almost the same as MTGCProcessMarkPointers::ScanAddressesInObject. void RecursiveScan::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) { if (OBJ_IS_BYTE_OBJECT(lengthWord)) return; // Ignore byte cells and don't call Completed on them PolyWord *baseAddr = (PolyWord*)obj; while (true) { ASSERT (OBJ_IS_LENGTH(lengthWord)); // Get the length and base address. N.B. If this is a code segment // these will be side-effected by GetConstSegmentForCode. POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord); if (OBJ_IS_CODE_OBJECT(lengthWord) || OBJ_IS_CLOSURE_OBJECT(lengthWord)) { // It's better to process the whole code object in one go. // For the moment do that for closure objects as well. ScanAddress::ScanAddressesInObject(obj, lengthWord); length = 0; // Finished } // else it's a normal object, // If there are only two addresses in this cell that need to be // followed we follow them immediately and treat this cell as done. // If there are more than two we push the address of this cell on // the stack, follow the first address and then rescan it. That way // list cells are processed once only but we don't overflow the // stack by pushing all the addresses in a very large vector. PolyWord *endWord = (PolyWord*)obj + length; PolyObject *firstWord = 0; PolyObject *secondWord = 0; PolyWord *restartFrom = baseAddr; while (baseAddr != endWord) { PolyWord wordAt = *baseAddr; if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0)) { // Normal address. We can have words of all zeros at least in the // situation where we have a partially constructed code segment where // the constants at the end of the code have not yet been filled in. if (TestForScan(baseAddr)) // Test value at baseAddr (may side-effect it) { PolyObject *wObj = (*baseAddr).AsObjPtr(); if (wObj->IsByteObject()) { // Can do this now - don't need to push it MarkAsScanning(wObj); Completed(wObj); } else if (firstWord == 0) { firstWord = wObj; // We mark the word immediately. We can have // two words in an object that are the same // and we don't want to process it again. MarkAsScanning(firstWord); } else if (secondWord == 0) { secondWord = wObj; restartFrom = baseAddr; } else break; // More than two words. } } baseAddr++; } if (baseAddr == endWord) { // We have done everything except possibly firstWord and secondWord. // Note: Unfortunately the way that ScanAddressesInRegion works means that // we call Completed on the addresses of cells in the permanent areas without // having called TestForScan. Completed(obj); if (secondWord != 0) { MarkAsScanning(secondWord); // Put this on the stack. If this is a list node we will be // pushing the tail. PushToStack(secondWord, (PolyWord*)secondWord); } } else // Put this back on the stack while we process the first word PushToStack(obj, restartFrom); if (firstWord != 0) { // Process it immediately. obj = firstWord; baseAddr = (PolyWord*)obj; } else if (StackIsEmpty()) return; else PopFromStack(obj, baseAddr); lengthWord = obj->LengthWord(); } } // The stack is allocated as a series of blocks chained together. #define RSTACK_SEGMENT_SIZE 1000 class RScanStack { public: RScanStack(): nextStack(0), lastStack(0), sp(0) {} ~RScanStack() { delete(nextStack); } RScanStack *nextStack; RScanStack *lastStack; unsigned sp; struct { PolyObject *obj; PolyWord *base; } stack[RSTACK_SEGMENT_SIZE]; }; RecursiveScanWithStack::~RecursiveScanWithStack() { delete(stack); } bool RecursiveScanWithStack::StackIsEmpty(void) { return stack == 0 || (stack->sp == 0 && stack->lastStack == 0); } void RecursiveScanWithStack::PushToStack(PolyObject *obj, PolyWord *base) { if (stack == 0 || stack->sp == RSTACK_SEGMENT_SIZE) { if (stack != 0 && stack->nextStack != 0) stack = stack->nextStack; else { // Need a new segment try { RScanStack *s = new RScanStack; s->lastStack = stack; if (stack != 0) stack->nextStack = s; stack = s; } catch (std::bad_alloc &) { StackOverflow(); return; } } } stack->stack[stack->sp].obj = obj; stack->stack[stack->sp].base = base; stack->sp++; } void RecursiveScanWithStack::PopFromStack(PolyObject *&obj, PolyWord *&base) { if (stack->sp == 0) { // Chain to the previous stack if any ASSERT(stack->lastStack != 0); // Before we do, delete any further one to free some memory delete(stack->nextStack); stack->nextStack = 0; stack = stack->lastStack; ASSERT(stack->sp == RSTACK_SEGMENT_SIZE); } --stack->sp; obj = stack->stack[stack->sp].obj; base = stack->stack[stack->sp].base; } diff --git a/libpolyml/sharedata.cpp b/libpolyml/sharedata.cpp index bc257189..d72f998f 100644 --- a/libpolyml/sharedata.cpp +++ b/libpolyml/sharedata.cpp @@ -1,1117 +1,1121 @@ /* Title: Share common immutable data Copyright (c) 2000 Cambridge University Technical Services Limited and David C. J. Matthews 2006, 2010-13, 2016-17 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #ifdef HAVE_STRING_H #include #endif #include #include "globals.h" #include "save_vec.h" #include "machine_dep.h" #include "scanaddrs.h" #include "run_time.h" #include "sys.h" #include "gc.h" #include "rtsentry.h" #include "memmgr.h" #include "processes.h" #include "gctaskfarm.h" #include "diagnostics.h" #include "sharedata.h" /* This code was largely written by Simon Finn as a database improver for the memory-mapped persistent store version. The aim is that where two immutable objects (cells) contain the same data (i.e. where ML equality would say they were equal) they should be merged so that only a single object is retained. The basic algorithm works like this: 1. From the root, recursively process all objects and calculate a "depth" for each object. Mutable data and code segments have depth 0 and cannot be merged. Byte segments (e.g. strings and long-format arbitrary precision values) have depth 1. Other cells have depths of 1 or greater, the depth being the maximum recursion depth until a byte segment or an object with depth 0 is reached. Cycles of immutable data don't arise normally in ML but could be produced as a result of locking mutable objects. To avoid infinite recursion cycles are broken by setting the depth of an object to zero before processing it. The depth of each object is stored in the length word of the object. This ensures each object is processed once only. 2. Vectors are created containing objects of the same depth, from 1 to the maximum depth found. 3. We begin a loop starting at depth 1. 4. The length words are restored, replacing the depth count in the header. 5. The objects are sorted by their contents so bringing together objects with the same contents. The contents are considered simply as uninterpreted bits. 6. The sorted vector is processed to find those objects that are actually bitwise equal. One object is selected to be retained and other objects have their length words turned into tombstones pointing at the retained object. 7. Objects at the next depth are first processed to find pointers to objects that moved in the previous step (or that step with a lower depth). The addresses are updated to point to the retained object. The effect of this step is to ensure that now two objects that are equal in ML terms have identical contents. e.g. If we have val a = ("abc", "def") and b = ("abc", "def") then we will have merged the two occurrences of "abc" and "def" in the previous pass of level 1 objects. This step ensures that the two cells containing the pairs both hold pointers to the same objects and so are bitwise equal. 8. Repeat with 4, 5 and 6 until all the levels have been processed. Each object is processed once and at the end most of the objects have been updated with the shared addresses. We have to scan all the mutable and code objects to update the addresses but also have to scan the immutables because of the possibility of missing an update as a result of breaking a loop (see SPF's comment below). DCJM 3/8/06 This has been substantially updated while retaining the basic algorithm. Sorting is now done in parallel by the GC task farm and the stack is now in dynamic memory. That avoids a possible segfault if the normal C stack overflows. A further problem is that the vectors can get very large and this can cause problems if there is insufficient contiguous space. The code has been modified to reduce the size of the vectors at the cost of increasing the total memory requirement. */ extern "C" { POLYEXTERNALSYMBOL POLYUNSIGNED PolyShareCommonData(PolyObject *threadId, PolyWord root); } // The depth is stored in the length field. If the Weak bit is set but the Mutable bit // is clear the value in the length word is a depth rather than a real length. // The tombstone bit is zero. // Previously "depth" values were encoded with the tombstone bit set but that isn't // possible in 32-in-64 because we need 31 bits in a forwarding pointer. inline bool OBJ_IS_DEPTH(POLYUNSIGNED L) { return (L & (_OBJ_WEAK_BIT| _OBJ_MUTABLE_BIT)) == _OBJ_WEAK_BIT; } inline POLYUNSIGNED OBJ_GET_DEPTH(POLYUNSIGNED L) { return OBJ_OBJECT_LENGTH(L); } inline POLYUNSIGNED OBJ_SET_DEPTH(POLYUNSIGNED n) { return n | _OBJ_WEAK_BIT; } // The DepthVector type contains all the items of a particular depth. // This is the abstract class. There are variants for the case where all // the cells have the same size and where they may vary. class DepthVector { public: DepthVector() : nitems(0), vsize(0), ptrVector(0) {} virtual ~DepthVector() { free(ptrVector); } virtual POLYUNSIGNED MergeSameItems(void); virtual void Sort(void); virtual POLYUNSIGNED ItemCount(void) { return nitems; } virtual void AddToVector(POLYUNSIGNED L, PolyObject *pt) = 0; void FixLengthAndAddresses(ScanAddress *scan); virtual void RestoreForwardingPointers() = 0; protected: POLYUNSIGNED nitems; POLYUNSIGNED vsize; PolyObject **ptrVector; // This must only be called BEFORE sorting. The pointer vector will be // modified by sorting but the length vector is not. virtual void RestoreLengthWords(void) = 0; static void SortRange(PolyObject * *first, PolyObject * *last); static int CompareItems(const PolyObject * const *a, const PolyObject * const *b); static int qsCompare(const void *a, const void *b) { return CompareItems((const PolyObject * const*)a, (const PolyObject *const *)b); } static void sortTask(GCTaskId*, void *s, void *l) { SortRange((PolyObject **)s, (PolyObject **)l); } }; // DepthVector where the size needs to be held for each item. class DepthVectorWithVariableLength: public DepthVector { public: DepthVectorWithVariableLength() : lengthVector(0) {} virtual ~DepthVectorWithVariableLength() { free(lengthVector); } virtual void RestoreLengthWords(void); virtual void AddToVector(POLYUNSIGNED L, PolyObject *pt); virtual void RestoreForwardingPointers(); protected: POLYUNSIGNED *lengthVector; // Same size as the ptrVector }; class DepthVectorWithFixedLength : public DepthVector { public: DepthVectorWithFixedLength(POLYUNSIGNED l) : length(l) {} virtual void RestoreLengthWords(void); virtual void AddToVector(POLYUNSIGNED L, PolyObject *pt); // It's safe to run this again for the fixed length vectors. virtual void RestoreForwardingPointers() { RestoreLengthWords(); } protected: POLYUNSIGNED length; }; // We have special vectors for the sizes from 1 to FIXEDLENGTHSIZE-1. // Zero-sized and large objects go in depthVectorArray[0]. #define FIXEDLENGTHSIZE 10 class ShareDataClass { public: ShareDataClass(); ~ShareDataClass(); bool RunShareData(PolyObject *root); void AddToVector(POLYUNSIGNED depth, POLYUNSIGNED length, PolyObject *pt); private: std::vector depthVectorArray[FIXEDLENGTHSIZE]; POLYUNSIGNED maxVectorSize; }; ShareDataClass::ShareDataClass() { maxVectorSize = 0; } ShareDataClass::~ShareDataClass() { // Free the bitmaps associated with the permanent spaces. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) (*i)->shareBitmap.Destroy(); // Free the depth vectors. for (unsigned i = 0; i < FIXEDLENGTHSIZE; i++) { for (std::vector ::iterator j = depthVectorArray[i].begin(); j < depthVectorArray[i].end(); j++) delete(*j); } } // Grow the appropriate depth vector if necessary and add the item to it. void ShareDataClass::AddToVector(POLYUNSIGNED depth, POLYUNSIGNED length, PolyObject *pt) { // Select the appropriate vector. Element zero is the variable length vector and is // also used for the, rare, zero length objects. std::vector *vectorToUse = &(depthVectorArray[length < FIXEDLENGTHSIZE ? length : 0]); if (depth >= maxVectorSize) maxVectorSize = depth+1; while (vectorToUse->size() <= depth) { try { if (length != 0 && length < FIXEDLENGTHSIZE) vectorToUse->push_back(new DepthVectorWithFixedLength(length)); else vectorToUse->push_back(new DepthVectorWithVariableLength); } catch (std::bad_alloc&) { throw MemoryException(); } } (*vectorToUse)[depth]->AddToVector(length, pt); } // Add an object to a depth vector void DepthVectorWithVariableLength::AddToVector(POLYUNSIGNED L, PolyObject *pt) { ASSERT (this->nitems <= this->vsize); if (this->nitems == this->vsize) { // The vector is full or has not yet been allocated. Grow it by 50%. POLYUNSIGNED new_vsize = this->vsize + this->vsize / 2 + 1; if (new_vsize < 15) new_vsize = 15; // First the length vector. POLYUNSIGNED *newLength = (POLYUNSIGNED *)realloc(this->lengthVector, new_vsize * sizeof(POLYUNSIGNED)); if (newLength == 0) { // The vectors can get large and we may not be able to grow them // particularly if the address space is limited in 32-bit mode. // Try again with just a small increase. new_vsize = this->vsize + 15; newLength = (POLYUNSIGNED *)realloc(this->lengthVector, new_vsize * sizeof(POLYUNSIGNED)); // If that failed give up. if (newLength == 0) throw MemoryException(); } PolyObject **newPtrVector = (PolyObject * *)realloc (this->ptrVector, new_vsize*sizeof(PolyObject *)); if (newPtrVector == 0) { new_vsize = this->vsize + 15; newPtrVector = (PolyObject **)realloc (this->ptrVector, new_vsize*sizeof(PolyObject *)); // If that failed give up. if (newPtrVector == 0) throw MemoryException(); } this->lengthVector = newLength; this->ptrVector = newPtrVector; this->vsize = new_vsize; } ASSERT (this->nitems < this->vsize); this->lengthVector[this->nitems] = L; this->ptrVector[this->nitems] = pt; this->nitems++; ASSERT (this->nitems <= this->vsize); } // Add an object to a depth vector void DepthVectorWithFixedLength::AddToVector(POLYUNSIGNED L, PolyObject *pt) { ASSERT(this->nitems <= this->vsize); ASSERT(L == length); if (this->nitems == this->vsize) { // The vector is full or has not yet been allocated. Grow it by 50%. POLYUNSIGNED new_vsize = this->vsize + this->vsize / 2 + 1; if (new_vsize < 15) new_vsize = 15; PolyObject **newPtrVector = (PolyObject * *)realloc(this->ptrVector, new_vsize * sizeof(PolyObject *)); if (newPtrVector == 0) { new_vsize = this->vsize + 15; newPtrVector = (PolyObject **)realloc(this->ptrVector, new_vsize * sizeof(PolyObject *)); // If that failed give up. if (newPtrVector == 0) throw MemoryException(); } this->ptrVector = newPtrVector; this->vsize = new_vsize; } ASSERT(this->nitems < this->vsize); this->ptrVector[this->nitems] = pt; this->nitems++; ASSERT(this->nitems <= this->vsize); } // Comparison function used for sorting and also to test whether // two cells can be merged. int DepthVector::CompareItems(const PolyObject *const *a, const PolyObject *const *b) { const PolyObject *x = *a; const PolyObject *y = *b; POLYUNSIGNED lX = x->LengthWord(); POLYUNSIGNED lY = y->LengthWord(); // ASSERT (OBJ_IS_LENGTH(lX)); // ASSERT (OBJ_IS_LENGTH(lY)); if (lX > lY) return 1; // These tests include the flag bits if (lX < lY) return -1; // Return simple bitwise equality. return memcmp(x, y, OBJ_OBJECT_LENGTH(lX)*sizeof(PolyWord)); } // Merge cells with the same contents. POLYUNSIGNED DepthVector::MergeSameItems() { POLYUNSIGNED N = this->nitems; POLYUNSIGNED n = 0; POLYUNSIGNED i = 0; while (i < N) { PolyObject *bestShare = 0; // Candidate to share. MemSpace *bestSpace = 0; POLYUNSIGNED j; for (j = i; j < N; j++) { ASSERT (OBJ_IS_LENGTH(ptrVector[i]->LengthWord())); // Search for identical objects. Don't bother to compare it with itself. if (i != j && CompareItems (&ptrVector[i], &ptrVector[j]) != 0) break; // The order of sharing is significant. // Choose an object in the permanent memory if that is available. // This is necessary to retain the invariant that no object in // the permanent memory points to an object in the temporary heap. // (There may well be pointers to this object elsewhere in the permanent // heap). // Choose the lowest hierarchy value for preference since that // may reduce the size of saved state when resaving already saved // data. // If we can't find a permanent space choose a space that isn't // an allocation space. Otherwise we could break the invariant // that immutable areas never point into the allocation area. MemSpace *space = gMem.SpaceForAddress((PolyWord*)ptrVector[j]-1); if (bestSpace == 0) { bestShare = ptrVector[j]; bestSpace = space; } else if (bestSpace->spaceType == ST_PERMANENT) { // Only update if the current space is also permanent and a lower hierarchy if (space->spaceType == ST_PERMANENT && ((PermanentMemSpace *)space)->hierarchy < ((PermanentMemSpace *)bestSpace)->hierarchy) { bestShare = ptrVector[j]; bestSpace = space; } } else if (bestSpace->spaceType == ST_LOCAL) { // Update if the current space is not an allocation space if (space->spaceType != ST_LOCAL || ! ((LocalMemSpace*)space)->allocationSpace) { bestShare = ptrVector[j]; bestSpace = space; } } } POLYUNSIGNED k = j; // Remember the first object that didn't match. // For each identical object set all but the one we want to point to // the shared object. for (j = i; j < k; j++) { ASSERT (OBJ_IS_LENGTH(ptrVector[j]->LengthWord())); if (ptrVector[j] != bestShare) { ptrVector[j]->SetForwardingPtr(bestShare); /* an indirection */ n++; } } i = k; } return n; } // Sort this vector void DepthVector::Sort() { if (nitems > 1) { SortRange(ptrVector, ptrVector + (nitems - 1)); gpTaskFarm->WaitForCompletion(); } // Check // for (POLYUNSIGNED i = 0; i < nitems-1; i++) // ASSERT(CompareItems(vector+i, vector+i+1) <= 0); } inline void swapItems(PolyObject * *i, PolyObject * *j) { PolyObject * t = *i; *i = *j; *j = t; } // Simple parallel quick-sort. "first" and "last" are the first // and last items (inclusive) in the vector. void DepthVector::SortRange(PolyObject * *first, PolyObject * *last) { while (first < last) { if (last-first <= 100) { // Use the standard library function for small ranges. qsort(first, last-first+1, sizeof(PolyObject *), qsCompare); return; } // Select the best pivot from the first, last and middle item // by sorting these three items. We use the middle item as // the pivot and since the first and last items are sorted // by this we can skip them when we start the partitioning. PolyObject * *middle = first + (last-first)/2; if (CompareItems(first, middle) > 0) swapItems(first, middle); if (CompareItems(middle, last) > 0) { swapItems(middle, last); if (CompareItems(first, middle) > 0) swapItems(first, middle); } // Partition the data about the pivot. This divides the // vector into two partitions with all items <= pivot to // the left and all items >= pivot to the right. // Note: items equal to the pivot could be in either partition. PolyObject * *f = first+1; PolyObject * *l = last-1; do { // Find an item we have to move. These loops will always // terminate because testing the middle with itself // will return == 0. while (CompareItems(f, middle/* pivot*/) < 0) f++; while (CompareItems(middle/* pivot*/, l) < 0) l--; // If we haven't finished we need to swap the items. if (f < l) { swapItems(f, l); // If one of these was the pivot item it will have moved to // the other position. if (middle == f) middle = l; else if (middle == l) middle = f; f++; l--; } else if (f == l) { f++; l--; break; } } while (f <= l); // Process the larger partition as a separate task or // by recursion and do the smaller partition by tail // recursion. if (l-first > last-f) { // Lower part is larger gpTaskFarm->AddWorkOrRunNow(sortTask, first, l); first = f; } else { // Upper part is larger gpTaskFarm->AddWorkOrRunNow(sortTask, f, last); last = l; } } } // Set the genuine length word. This overwrites both depth words and forwarding pointers. void DepthVectorWithVariableLength::RestoreLengthWords() { for (POLYUNSIGNED i = 0; i < this->nitems; i++) - ptrVector[i]->SetLengthWord(lengthVector[i]); // restore genuine length word + { + PolyObject* obj = ptrVector[i]; + obj = gMem.SpaceForAddress(obj)->writeAble(obj); // This could be code. + obj->SetLengthWord(lengthVector[i]); // restore genuine length word + } } void DepthVectorWithFixedLength::RestoreLengthWords() { for (POLYUNSIGNED i = 0; i < this->nitems; i++) ptrVector[i]->SetLengthWord(length); // restore genuine length word } // Fix up the length word. Then update all addresses to their new location if // we have shared the original destination of the address with something else. void DepthVector::FixLengthAndAddresses(ScanAddress *scan) { RestoreLengthWords(); for (POLYUNSIGNED i = 0; i < this->nitems; i++) { // Fix up all addresses. scan->ScanAddressesInObject(ptrVector[i]); } } // Restore the original length words on forwarding pointers. // After sorting the pointer vector and length vector are no longer // matched so we have to follow the pointers. void DepthVectorWithVariableLength::RestoreForwardingPointers() { for (POLYUNSIGNED i = 0; i < this->nitems; i++) { PolyObject *obj = ptrVector[i]; if (obj->ContainsForwardingPtr()) obj->SetLengthWord(obj->GetForwardingPtr()->LengthWord()); } } // This class is used in two places and is called to ensure that all // object length words have been restored. // Before we actually try to share the immutable objects at a particular depth it // is called to update addresses in those objects to take account of // sharing at lower depths. // When all sharing is complete it is called to update the addresses in // level zero objects, i.e. mutables and code. class ProcessFixupAddress: public ScanAddress { protected: virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt); virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt); virtual PolyObject *ScanObjectAddress(PolyObject *base) { return GetNewAddress(base).AsObjPtr(); } PolyWord GetNewAddress(PolyWord old); }; POLYUNSIGNED ProcessFixupAddress::ScanAddressAt(PolyWord *pt) { *pt = GetNewAddress(*pt); return 0; } // Don't have to do anything for code since it isn't moved. POLYUNSIGNED ProcessFixupAddress::ScanCodeAddressAt(PolyObject **pt) { return 0; } // Returns the new address if the argument is the address of an object that // has moved, otherwise returns the original. PolyWord ProcessFixupAddress::GetNewAddress(PolyWord old) { if (old.IsTagged() || old == PolyWord::FromUnsigned(0)) return old; // Nothing to do. ASSERT(old.IsDataPtr()); PolyObject *obj = old.AsObjPtr(); POLYUNSIGNED L = obj->LengthWord(); if (obj->ContainsForwardingPtr()) // tombstone is a pointer to a shared object { PolyObject *newp = obj->GetForwardingPtr(); // ASSERT (newp->ContainsNormalLengthWord()); return newp; } // Generally each address will point to an object processed at a lower depth. // The exception is if we have a cycle and have assigned the rest of the // structure to a higher depth. // N.B. We return the original address here but this could actually share // with something else and not be retained. if (OBJ_IS_DEPTH(L)) return old; ASSERT (obj->ContainsNormalLengthWord()); // object is not shared return old; } // This class is used to set up the depth vectors for sorting. It subclasses ScanAddress // in order to be able to use that for code objects since they are complicated but it // handles all the other object types itself. It scans them depth-first using an explicit stack. class ProcessAddToVector: public ScanAddress { public: ProcessAddToVector(ShareDataClass *p): m_parent(p), addStack(0), stackSize(0), asp(0) {} ~ProcessAddToVector(); // These are used when scanning code areas. They return either // a length or a possibly updated address. virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt) { (void)AddPolyWordToDepthVectors(*pt); return 0; } virtual PolyObject *ScanObjectAddress(PolyObject *base) { (void)AddObjectToDepthVector(base); return base; } void ProcessRoot(PolyObject *root); protected: // Process an address and return the "depth". POLYUNSIGNED AddPolyWordToDepthVectors(PolyWord old); POLYUNSIGNED AddObjectToDepthVector(PolyObject *obj); void PushToStack(PolyObject *obj); ShareDataClass *m_parent; PolyObject **addStack; unsigned stackSize; unsigned asp; }; ProcessAddToVector::~ProcessAddToVector() { // Normally the stack will be empty. However if we have run out of // memory and thrown an exception we may well have items left. // We have to remove the mark bits otherwise it will mess up any // subsequent GC. for (unsigned i = 0; i < asp; i++) { PolyObject *obj = addStack[i]; if (obj->LengthWord() & _OBJ_GC_MARK) obj->SetLengthWord(obj->LengthWord() & (~_OBJ_GC_MARK)); } free(addStack); // Now free the stack } POLYUNSIGNED ProcessAddToVector::AddPolyWordToDepthVectors(PolyWord old) { // If this is a tagged integer or an IO pointer that's simply a constant. if (old.IsTagged() || old == PolyWord::FromUnsigned(0)) return 0; return AddObjectToDepthVector(old.AsObjPtr()); } // Either adds an object to the stack or, if its depth is known, adds it // to the depth vector and returns the depth. // We use _OBJ_GC_MARK to detect when we have visited a cell but not yet // computed the depth. We have to be careful that this bit is removed // before we finish in the case that we run out of memory and throw an // exception. PushToStack may throw the exception if the stack needs to // grow. POLYUNSIGNED ProcessAddToVector::AddObjectToDepthVector(PolyObject *obj) { MemSpace *space = gMem.SpaceForAddress(((PolyWord*)obj)-1); if (space == 0) return 0; POLYUNSIGNED L = obj->LengthWord(); if (OBJ_IS_DEPTH(L)) // tombstone contains genuine depth or 0. return OBJ_GET_DEPTH(L); if (obj->LengthWord() & _OBJ_GC_MARK) return 0; // Marked but not yet scanned. Circular structure. ASSERT (OBJ_IS_LENGTH(L)); if (obj->IsMutable()) { // Mutable data in the local or permanent areas. Ignore byte objects or // word objects containing only ints. if (obj->IsWordObject()) { bool containsAddress = false; for (POLYUNSIGNED j = 0; j < OBJ_OBJECT_LENGTH(L) && !containsAddress; j++) containsAddress = ! obj->Get(j).IsTagged(); if (containsAddress) { // Add it to the vector so we will update any addresses it contains. m_parent->AddToVector(0, L, obj); // and follow any addresses to try to merge those. PushToStack(obj); obj->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan } // If we don't add it to the vector we mustn't set _OBJ_GC_MARK. } return 0; // Level is zero } if (space->spaceType == ST_PERMANENT && ((PermanentMemSpace*)space)->hierarchy == 0) { // Immutable data in the permanent area can't be merged // because it's read only. We need to follow the addresses // because they may point to mutable areas containing data // that can be. A typical case is the root function pointing // at the global name table containing new declarations. Bitmap *bm = &((PermanentMemSpace*)space)->shareBitmap; if (! bm->TestBit((PolyWord*)obj - space->bottom)) { bm->SetBit((PolyWord*)obj - space->bottom); if (! obj->IsByteObject()) PushToStack(obj); } return 0; } /* There's a problem sharing code objects if they have relative calls/jumps in them to other code. The code of two functions may be identical (e.g. they both call functions 100 bytes ahead) and so they will appear the same but if the functions they jump to are different they are actually different. For that reason we don't share code segments. DCJM 4/1/01 */ if (obj->IsCodeObject()) { // We want to update addresses in the code segment. m_parent->AddToVector(0, L, obj); PushToStack(obj); - obj->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan + gMem.SpaceForAddress(obj)->writeAble(obj)->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan return 0; } // Byte objects always have depth 1 and can't contain addresses. if (obj->IsByteObject()) { m_parent->AddToVector (1, L, obj);// add to vector at correct depth obj->SetLengthWord(OBJ_SET_DEPTH(1)); return 1; } ASSERT(OBJ_IS_WORD_OBJECT(L) || OBJ_IS_CLOSURE_OBJECT(L)); // That leaves immutable data objects. PushToStack(obj); obj->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan return 0; } // Adds an object to the stack. void ProcessAddToVector::PushToStack(PolyObject *obj) { if (asp == stackSize) { if (addStack == 0) { addStack = (PolyObject**)malloc(sizeof(PolyObject*) * 100); if (addStack == 0) throw MemoryException(); stackSize = 100; } else { unsigned newSize = stackSize+100; PolyObject** newStack = (PolyObject**)realloc(addStack, sizeof(PolyObject*) * newSize); if (newStack == 0) throw MemoryException(); stackSize = newSize; addStack = newStack; } } ASSERT(asp < stackSize); addStack[asp++] = obj; } // Processes the root and anything reachable from it. Addresses are added to the // explicit stack if an object has not yet been processed. Most of this function // is about processing the stack. void ProcessAddToVector::ProcessRoot(PolyObject *root) { // Mark the initial object AddObjectToDepthVector(root); // Process the stack until it's empty. while (asp != 0) { // Pop it from the stack. PolyObject *obj = addStack[asp-1]; if (obj->IsCodeObject()) { // Code cells are now only found in the code area. /* There's a problem sharing code objects if they have relative calls/jumps in them to other code. The code of two functions may be identical (e.g. they both call functions 100 bytes ahead) and so they will appear the same but if the functions they jump to are different they are actually different. For that reason we don't share code segments. DCJM 4/1/01 */ asp--; // Pop it because we'll process it completely ScanAddressesInObject(obj); // If it's local set the depth with the value zero. It has already been // added to the zero depth vector. if (obj->LengthWord() & _OBJ_GC_MARK) - obj->SetLengthWord(OBJ_SET_DEPTH(0)); // Now scanned + gMem.SpaceForAddress(obj)->writeAble(obj)->SetLengthWord(OBJ_SET_DEPTH(0)); // Now scanned } else { POLYUNSIGNED length = obj->Length(); PolyWord *pt = (PolyWord*)obj; unsigned osp = asp; if (obj->IsClosureObject()) { // The first word of a closure is a code pointer. We don't share code but // we do want to share anything reachable from the constants. AddObjectToDepthVector(*(PolyObject**)pt); pt += sizeof(PolyObject*) / sizeof(PolyWord); length -= sizeof(PolyObject*) / sizeof(PolyWord); } if (((obj->LengthWord() & _OBJ_GC_MARK) && !obj->IsMutable())) { // Immutable local objects. These can be shared. We need to compute the // depth by computing the maximum of the depth of all the addresses in it. POLYUNSIGNED depth = 0; while (length != 0 && osp == asp) { POLYUNSIGNED d = AddPolyWordToDepthVectors(*pt); if (d > depth) depth = d; pt++; length--; } if (osp == asp) { // We've finished it asp--; // Pop this item. depth++; // One more for this object obj->SetLengthWord(obj->LengthWord() & (~_OBJ_GC_MARK)); m_parent->AddToVector(depth, obj->LengthWord() & (~_OBJ_GC_MARK), obj); obj->SetLengthWord(OBJ_SET_DEPTH(depth)); } } else { // Mutable or non-local objects. These have depth zero. Local objects have // _OBJ_GC_MARK in their header. Immutable permanent objects cannot be // modified so we don't set the depth. Mutable objects are added to the // depth vectors even though they aren't shared so that they will be // updated if they point to immutables that have been shared. while (length != 0) { if (!(*pt).IsTagged()) { // If we've already pushed an address break now if (osp != asp) break; // Process the address and possibly push it AddPolyWordToDepthVectors(*pt); } pt++; length--; } if (length == 0) { // We've finished it if (osp != asp) { ASSERT(osp == asp - 1); addStack[osp - 1] = addStack[osp]; } asp--; // Pop this item. if (obj->LengthWord() & _OBJ_GC_MARK) obj->SetLengthWord(OBJ_SET_DEPTH(0)); } } } } } // This is called by the root thread to do the work. bool ShareDataClass::RunShareData(PolyObject *root) { // We use a bitmap to indicate when we've visited an object to avoid // infinite recursion in cycles in the data. for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (!space->isMutable && space->hierarchy == 0) { if (! space->shareBitmap.Create(space->spaceSize())) return false; } } POLYUNSIGNED totalObjects = 0; POLYUNSIGNED totalShared = 0; // Build the vectors from the immutable objects. bool success = true; try { ProcessAddToVector addToVector(this); addToVector.ProcessRoot(root); } catch (MemoryException &) { // If we ran out of memory we may still be able to process what we have. // That will also do any clean-up. success = false; } ProcessFixupAddress fixup; for (POLYUNSIGNED depth = 1; depth < maxVectorSize; depth++) { for (unsigned j = 0; j < FIXEDLENGTHSIZE; j++) { if (depth < depthVectorArray[j].size()) { DepthVector *vec = depthVectorArray[j][depth]; // Set the length word and update all addresses. vec->FixLengthAndAddresses(&fixup); vec->Sort(); POLYUNSIGNED n = vec->MergeSameItems(); if ((debugOptions & DEBUG_SHARING) && n > 0) Log("Sharing: Level %4" POLYUFMT ", size %3u, Objects %6" POLYUFMT ", Shared %6" POLYUFMT " (%1.0f%%)\n", depth, j, vec->ItemCount(), n, (float)n / (float)vec->ItemCount() * 100.0); totalObjects += vec->ItemCount(); totalShared += n; } } } if (debugOptions & DEBUG_SHARING) Log("Sharing: Maximum level %4" POLYUFMT ",\n", maxVectorSize); /* At this stage, we have fixed up most but not all of the forwarding pointers. The ones that we haven't fixed up arise from situations such as the following: X -> Y <-> Z i.e. Y and Z form a loop, and X is isomorphic to Z. When we assigned the depths, we have to arbitrarily break the loop between Y and Z. Suppose Y is assigned to level 1, and Z is assigned to level 2. When we process level 1 and fixup Y, there's nothing to do, since Z is still an ordinary object. However when we process level 2, we find that X and Z are isomorphic so we arbitrarily choose one of them and turn it into a "tombstone" pointing at the other. If we change Z into the tombstone, then Y now contains a pointer that needs fixing up. That's why we need the second fixup pass. Note also that if we had broken the loop the other way, we would have assigned Z to level 1, Y to level 2 and X to level 3, so we would have missed the chance to share Z and X. Perhaps that's why running the program repeatedly sometimes finds extra things to share? SPF 26/1/95 */ /* We have updated the addresses in objects with non-zero level so they point to the single occurrence but we need to do the same with level 0 objects (mutables and code). */ for (unsigned j = 0; j < FIXEDLENGTHSIZE; j++) { if (! depthVectorArray[j].empty()) { DepthVector *v = depthVectorArray[j][0]; // Log this because it could be very large. if (debugOptions & DEBUG_SHARING) Log("Sharing: Level %4" POLYUFMT ", size %3u, Objects %6" POLYUFMT "\n", 0ul, j, v->ItemCount()); v->FixLengthAndAddresses(&fixup); } } /* Previously we made a complete scan over the memory updating any addresses so that if we have shared two substructures within our root we would also share any external pointers. This has been removed but we have to reinstate the length words we've overwritten with forwarding pointers because there may be references to unshared objects from outside. */ for (POLYUNSIGNED d = 1; d < maxVectorSize; d++) { for (unsigned j = 0; j < FIXEDLENGTHSIZE; j++) { if (d < depthVectorArray[j].size()) { DepthVector *v = depthVectorArray[j][d]; v->RestoreForwardingPointers(); } } } if (debugOptions & DEBUG_SHARING) Log ("Sharing: Total Objects %6" POLYUFMT ", Total Shared %6" POLYUFMT " (%1.0f%%)\n", totalObjects, totalShared, (float)totalShared / (float)totalObjects * 100.0); return success; // Succeeded. } class ShareRequest: public MainThreadRequest { public: ShareRequest(Handle root): MainThreadRequest(MTP_SHARING), shareRoot(root), result(false) {} virtual void Perform() { ShareDataClass s; // Do a full GC. If we have a large heap the allocation of the vectors // can cause paging. Doing this now reduces the heap and discards the // allocation spaces. It may be overkill if we are applying the sharing // to a small root but generally it seems to be applied to the whole heap. FullGCForShareCommonData(); // Now do the sharing. result = s.RunShareData(shareRoot->WordP()); } Handle shareRoot; bool result; }; // ShareData. This is the main entry point. // Because this can recurse deeply it needs to be run by the main thread. // Also it manipulates the heap in ways that could mess up other threads // so we need to stop them before executing this. void ShareData(TaskData *taskData, Handle root) { if (! root->Word().IsDataPtr()) return; // Nothing to do. We could do handle a code pointer but it shouldn't occur. // Request the main thread to do the sharing. ShareRequest request(root); processes->MakeRootRequest(taskData, &request); // Raise an exception if it failed. if (! request.result) raise_exception_string(taskData, EXC_Fail, "Insufficient memory"); } // RTS call entry. POLYUNSIGNED PolyShareCommonData(PolyObject *threadId, PolyWord root) { TaskData *taskData = TaskData::FindTaskForId(threadId); ASSERT(taskData != 0); taskData->PreRTSCall(); Handle reset = taskData->saveVec.mark(); try { if (! root.IsDataPtr()) return TAGGED(0).AsUnsigned(); // Nothing to do. // Request the main thread to do the sharing. ShareRequest request(taskData->saveVec.push(root)); processes->MakeRootRequest(taskData, &request); // Raise an exception if it failed. if (! request.result) raise_exception_string(taskData, EXC_Fail, "Insufficient memory"); } catch (...) { } // If an ML exception is raised taskData->saveVec.reset(reset); taskData->PostRTSCall(); return TAGGED(0).AsUnsigned(); } struct _entrypts shareDataEPT[] = { { "PolyShareCommonData", (polyRTSFunction)&PolyShareCommonData}, { NULL, NULL} // End of list. }; diff --git a/libpolyml/x86_dep.cpp b/libpolyml/x86_dep.cpp index 91566c2a..3d03ac96 100644 --- a/libpolyml/x86_dep.cpp +++ b/libpolyml/x86_dep.cpp @@ -1,1462 +1,1464 @@ /* Title: Machine dependent code for i386 and X64 under Windows and Unix Copyright (c) 2000-7 Cambridge University Technical Services Limited Further work copyright David C. J. Matthews 2011-19 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_STDLIB_H #include #endif #include #ifdef HAVE_SIGNAL_H #include #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #if (defined(_WIN32)) #include #include #endif #include "globals.h" #include "run_time.h" #include "diagnostics.h" #include "processes.h" #include "profiling.h" #include "machine_dep.h" #include "scanaddrs.h" #include "memmgr.h" #include "rtsentry.h" #include "sys.h" // Temporary /********************************************************************** * * Register usage: * * %Reax: First argument to function. Result of function call. * %Rebx: Second argument to function. * %Recx: General register * %Redx: Closure pointer in call. * %Rebp: Points to memory used for extra registers * %Resi: General register. * %Redi: General register. * %Resp: Stack pointer. * The following apply only on the X64 * %R8: Third argument to function * %R9: Fourth argument to function * %R10: Fifth argument to function * %R11: General register * %R12: General register * %R13: General register * %R14: General register * %R15: Memory allocation pointer * **********************************************************************/ #ifdef HOSTARCHITECTURE_X86_64 struct fpSaveArea { double fpregister[7]; // Save area for xmm0-6 }; #else // Structure of floating point save area. // This is dictated by the hardware. typedef byte fpregister[10]; struct fpSaveArea { unsigned short cw; unsigned short _unused0; unsigned short sw; unsigned short _unused1; unsigned short tw; unsigned short _unused2; unsigned fip; unsigned short fcs0; unsigned short _unused3; unsigned foo; unsigned short fcs1; unsigned short _unused4; fpregister registers[8]; }; #endif /* the amount of ML stack space to reserve for registers, C exception handling etc. The compiler requires us to reserve 2 stack-frames worth (2 * 20 words). We actually reserve slightly more than this. */ #if (!defined(_WIN32) && !defined(HAVE_SIGALTSTACK)) // If we can't handle signals on a separate stack make sure there's space // on the Poly stack. #define OVERFLOW_STACK_SIZE (50+1024) #else #define OVERFLOW_STACK_SIZE 50 #endif union stackItem { /* #ifndef POLYML32IN64 stackItem(PolyWord v) { words[0] = v.AsUnsigned(); }; stackItem() { words[0] = TAGGED(0).AsUnsigned(); } POLYUNSIGNED words[1]; #else // In 32-in-64 we need to clear the second PolyWord. This assumes little-endian. stackItem(PolyWord v) { words[0] = v.AsUnsigned(); words[1] = 0; }; stackItem() { words[0] = TAGGED(0).AsUnsigned(); words[1] = 0; } POLYUNSIGNED words[2]; #endif */ stackItem(PolyWord v) { argValue = v.AsUnsigned(); } stackItem() { argValue = TAGGED(0).AsUnsigned(); } // These return the low order word. PolyWord w()const { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); } operator PolyWord () { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); } POLYCODEPTR codeAddr; // Return addresses stackItem *stackAddr; // Stack addresses uintptr_t argValue; // Treat an address as an int }; class X86TaskData; // This is passed as the argument vector to X86AsmSwitchToPoly. // The offsets are built into the assembly code and the code-generator. // localMpointer and stackPtr are updated before control returns to C. typedef struct _AssemblyArgs { public: PolyWord *localMpointer; // Allocation ptr + 1 word stackItem *handlerRegister; // Current exception handler PolyWord *localMbottom; // Base of memory + 1 word stackItem *stackLimit; // Lower limit of stack stackItem exceptionPacket; // Set if there is an exception byte unusedRequestCode; // No longer used. byte unusedFlag; // No longer used byte returnReason; // Reason for returning from ML. byte unusedRestore; // No longer used. uintptr_t saveCStack; // Saved C stack frame. PolyWord threadId; // My thread id. Saves having to call into RTS for it. stackItem *stackPtr; // Current stack pointer byte *noLongerUsed; // Now removed byte *heapOverFlowCall; // These are filled in with the functions. byte *stackOverFlowCall; byte *stackOverFlowCallEx; // Saved registers, where applicable. stackItem p_rax; stackItem p_rbx; stackItem p_rcx; stackItem p_rdx; stackItem p_rsi; stackItem p_rdi; #ifdef HOSTARCHITECTURE_X86_64 stackItem p_r8; stackItem p_r9; stackItem p_r10; stackItem p_r11; stackItem p_r12; stackItem p_r13; stackItem p_r14; #endif struct fpSaveArea p_fp; } AssemblyArgs; // These next few are temporarily added for the interpreter // This duplicates some code in reals.cpp but is now updated. #define DOUBLESIZE (sizeof(double)/sizeof(POLYUNSIGNED)) union realdb { double dble; POLYUNSIGNED puns[DOUBLESIZE]; }; #define LGWORDSIZE (sizeof(uintptr_t) / sizeof(PolyWord)) class X86TaskData: public TaskData { public: X86TaskData(); unsigned allocReg; // The register to take the allocated space. POLYUNSIGNED allocWords; // The words to allocate. Handle callBackResult; AssemblyArgs assemblyInterface; int saveRegisterMask; // Registers that need to be updated by a GC. virtual void GarbageCollect(ScanAddress *process); void ScanStackAddress(ScanAddress *process, stackItem &val, StackSpace *stack); virtual Handle EnterPolyCode(); // Start running ML virtual void InterruptCode(); virtual bool AddTimeProfileCount(SIGNALCONTEXT *context); virtual void InitStackFrame(TaskData *parentTask, Handle proc, Handle arg); virtual void SetException(poly_exn *exc); // Release a mutex in exactly the same way as compiler code virtual Handle AtomicIncrement(Handle mutexp); virtual void AtomicReset(Handle mutexp); // Return the minimum space occupied by the stack. Used when setting a limit. // N.B. This is PolyWords not native words. virtual uintptr_t currentStackSpace(void) const { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE*sizeof(uintptr_t)/sizeof(PolyWord); } // Increment the profile count for an allocation. Also now used for mutex contention. virtual void addProfileCount(POLYUNSIGNED words) { add_count(this, assemblyInterface.stackPtr[0].codeAddr, words); } // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer virtual void PreRTSCall(void) { TaskData::PreRTSCall(); SaveMemRegisters(); } // PostRTSCall: Before returning we need to restore the heap pointer. // If there has been a GC in the RTS call we need to create a new heap area. virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall(); } virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length); virtual Handle EnterCallbackFunction(Handle func, Handle args); int SwitchToPoly(); void HeapOverflowTrap(byte *pcPtr); void SetMemRegisters(); void SaveMemRegisters(); void SetRegisterMask(); void MakeTrampoline(byte **pointer, byte*entryPt); PLock interruptLock; stackItem *get_reg(int n); stackItem *®SP() { return assemblyInterface.stackPtr; } stackItem ®AX() { return assemblyInterface.p_rax; } stackItem ®BX() { return assemblyInterface.p_rbx; } stackItem ®CX() { return assemblyInterface.p_rcx; } stackItem ®DX() { return assemblyInterface.p_rdx; } stackItem ®SI() { return assemblyInterface.p_rsi; } stackItem ®DI() { return assemblyInterface.p_rdi; } #ifdef HOSTARCHITECTURE_X86_64 stackItem ®8() { return assemblyInterface.p_r8; } stackItem ®9() { return assemblyInterface.p_r9; } stackItem ®10() { return assemblyInterface.p_r10; } stackItem ®11() { return assemblyInterface.p_r11; } stackItem ®12() { return assemblyInterface.p_r12; } stackItem ®13() { return assemblyInterface.p_r13; } stackItem ®14() { return assemblyInterface.p_r14; } #endif #if (defined(_WIN32)) DWORD savedErrno; #else int savedErrno; #endif }; class X86Dependent: public MachineDependent { public: X86Dependent() {} // Create a task data object. virtual TaskData *CreateTaskData(void) { return new X86TaskData(); } // Initial size of stack in PolyWords virtual unsigned InitialStackSize(void) { return (128+OVERFLOW_STACK_SIZE) * sizeof(uintptr_t) / sizeof(PolyWord); } virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length, ScanAddress *process); virtual Architectures MachineArchitecture(void) #ifndef HOSTARCHITECTURE_X86_64 { return MA_I386; } #elif defined(POLYML32IN64) { return MA_X86_64_32; } #else { return MA_X86_64; } #endif }; // Values for the returnReason byte enum RETURN_REASON { RETURN_IO_CALL_NOW_UNUSED = 0, RETURN_HEAP_OVERFLOW = 1, RETURN_STACK_OVERFLOW = 2, RETURN_STACK_OVERFLOWEX = 3, RETURN_CALLBACK_RETURN = 6, RETURN_CALLBACK_EXCEPTION = 7, RETURN_KILL_SELF = 9 }; extern "C" { // These are declared in the assembly code segment. void X86AsmSwitchToPoly(void *); extern int X86AsmKillSelf(void); extern int X86AsmCallbackReturn(void); extern int X86AsmCallbackException(void); extern int X86AsmPopArgAndClosure(void); extern int X86AsmRaiseException(void); extern int X86AsmCallExtraRETURN_HEAP_OVERFLOW(void); extern int X86AsmCallExtraRETURN_STACK_OVERFLOW(void); extern int X86AsmCallExtraRETURN_STACK_OVERFLOWEX(void); POLYUNSIGNED X86AsmAtomicIncrement(PolyObject*); POLYUNSIGNED X86AsmAtomicDecrement(PolyObject*); }; // Pointers to assembly code or trampolines to assembly code. static byte *popArgAndClosure, *killSelf, *raiseException, *callbackException, *callbackReturn; X86TaskData::X86TaskData(): allocReg(0), allocWords(0), saveRegisterMask(0) { assemblyInterface.heapOverFlowCall = (byte*)X86AsmCallExtraRETURN_HEAP_OVERFLOW; assemblyInterface.stackOverFlowCall = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOW; assemblyInterface.stackOverFlowCallEx = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOWEX; savedErrno = 0; } void X86TaskData::GarbageCollect(ScanAddress *process) { TaskData::GarbageCollect(process); // Process the parent first assemblyInterface.threadId = threadObject; if (stack != 0) { // Now the values on the stack. for (stackItem *q = assemblyInterface.stackPtr; q < (stackItem*)stack->top; q++) ScanStackAddress(process, *q, stack); } // Register mask for (int i = 0; i < 16; i++) { if (saveRegisterMask & (1 << i)) ScanStackAddress(process, *get_reg(i), stack); } } // Process a value within the stack. void X86TaskData::ScanStackAddress(ScanAddress *process, stackItem &stackItem, StackSpace *stack) { // We may have return addresses on the stack which could look like // tagged values. Check whether the value is in the code area before // checking whether it is untagged. #ifdef POLYML32IN64 // In 32-in-64 return addresses always have the top 32 bits non-zero. if (stackItem.argValue < ((uintptr_t)1 << 32)) { // It's either a tagged integer or an object pointer. if (stackItem.w().IsDataPtr()) { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } } else { // Could be a code address or a stack address. MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr - 1); if (space == 0 || space->spaceType != ST_CODE) return; PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr); ASSERT(obj != 0); // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } #else // The -1 here is because we may have a zero-sized cell in the last // word of a space. MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr-1); if (space == 0) return; // In particular we may have one of the assembly code addresses. if (space->spaceType == ST_CODE) { PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr); // If it is actually an integer it might be outside a valid code object. if (obj == 0) { ASSERT(stackItem.w().IsTagged()); // It must be an integer } else // Process the address of the start. Don't update anything. process->ScanObjectAddress(obj); } else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr()) // Local values must be word addresses. { PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr()); stackItem = val; } #endif } // Copy a stack void X86TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length) { /* Moves a stack, updating all references within the stack */ #ifdef POLYML32IN64 old_length = old_length / 2; new_length = new_length / 2; #endif stackItem *old_base = (stackItem *)old_stack; stackItem *new_base = (stackItem*)new_stack; stackItem *old_top = old_base + old_length; /* Calculate the offset of the new stack from the old. If the frame is being extended objects in the new frame will be further up the stack than in the old one. */ uintptr_t offset = new_base - old_base + new_length - old_length; stackItem *oldStackPtr = assemblyInterface.stackPtr; // Adjust the stack pointer and handler pointer since these point into the stack. assemblyInterface.stackPtr = assemblyInterface.stackPtr + offset; assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset; // We need to adjust any values on the stack that are pointers within the stack. // Skip the unused part of the stack. size_t i = oldStackPtr - old_base; ASSERT (i <= old_length); i = old_length - i; stackItem *old = oldStackPtr; stackItem *newp = assemblyInterface.stackPtr; while (i--) { stackItem old_word = *old++; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem *addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *newp++ = old_word; } ASSERT(old == ((stackItem*)old_stack)+old_length); ASSERT(newp == ((stackItem*)new_stack)+new_length); // And change any registers that pointed into the old stack for (int j = 0; j < 16; j++) { if (saveRegisterMask & (1 << j)) { stackItem *regAddr = get_reg(j); stackItem old_word = *regAddr; if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top) old_word.stackAddr = old_word.stackAddr + offset; else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr)) { stackItem *addr = (stackItem*)old_word.w().AsStackAddr(); if (addr >= old_base && addr <= old_top) { addr += offset; old_word = PolyWord::FromStackAddr((PolyWord*)addr); } } *regAddr = old_word; } } } Handle X86TaskData::EnterPolyCode() /* Called from "main" to enter the code. */ { Handle hOriginal = this->saveVec.mark(); // Set this up for the IO calls. while (1) { this->saveVec.reset(hOriginal); // Remove old RTS arguments and results. // Run the ML code and return with the function to call. this->inML = true; int ioFunction = SwitchToPoly(); this->inML = false; try { switch (ioFunction) { case -1: // We've been interrupted. This usually involves simulating a // stack overflow so we could come here because of a genuine // stack overflow. // Previously this code was executed on every RTS call but there // were problems on Mac OS X at least with contention on schedLock. // Process any asynchronous events i.e. interrupts or kill processes->ProcessAsynchRequests(this); // Release and re-acquire use of the ML memory to allow another thread // to GC. processes->ThreadReleaseMLMemory(this); processes->ThreadUseMLMemory(this); break; case -2: // A callback has returned. return callBackResult; // Return the saved value. Not used in the new interface. default: Crash("Unknown io operation %d\n", ioFunction); } } catch (IOException &) { } } } // Run the current ML process. X86AsmSwitchToPoly saves the C state so that // whenever the ML requires assistance from the rest of the RTS it simply // returns to C with the appropriate values set in assemblyInterface.requestCode and // int X86TaskData::SwitchToPoly() // (Re)-enter the Poly code from C. Returns with the io function to call or // -1 if we are responding to an interrupt. { Handle mark = this->saveVec.mark(); do { this->saveVec.reset(mark); // Remove old data e.g. from arbitrary precision. SetMemRegisters(); // We need to save the C stack entry across this call in case // we're making a callback and the previous C stack entry is // for the original call. uintptr_t savedCStack = this->assemblyInterface.saveCStack; // Restore the saved error state. #if (defined(_WIN32)) SetLastError(savedErrno); #else errno = savedErrno; #endif // Enter the ML code. X86AsmSwitchToPoly(&this->assemblyInterface); this->assemblyInterface.saveCStack = savedCStack; // Save the error codes. We may have made an RTS/FFI call that // has set these and we don't want to do anything to change them. #if (defined(_WIN32)) savedErrno = GetLastError(); #else savedErrno = errno; #endif SaveMemRegisters(); // Update globals from the memory registers. // Handle any heap/stack overflows or arbitrary precision traps. switch (this->assemblyInterface.returnReason) { case RETURN_HEAP_OVERFLOW: // The heap has overflowed. SetRegisterMask(); this->HeapOverflowTrap(assemblyInterface.stackPtr[0].codeAddr); // Computes a value for allocWords only break; case RETURN_STACK_OVERFLOW: case RETURN_STACK_OVERFLOWEX: { SetRegisterMask(); uintptr_t min_size; // Size in PolyWords if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW) { min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } else { // Stack limit overflow. If the required stack space is larger than // the fixed overflow size the code will calculate the limit in %EDI. stackItem *stackP = regDI().stackAddr; min_size = (this->stack->top - (PolyWord*)stackP) + OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord); } try { // The stack check has failed. This may either be because we really have // overflowed the stack or because the stack limit value has been adjusted // to result in a call here. CheckAndGrowStack(this, min_size); } catch (IOException &) { // We may get an exception while handling this if we run out of store } { PLocker l(&interruptLock); // Set the stack limit. This clears any interrupt and also sets the // correct value if we've grown the stack. this->assemblyInterface.stackLimit = (stackItem*)this->stack->bottom + OVERFLOW_STACK_SIZE; } return -1; // We're in a safe state to handle any interrupts. } case RETURN_CALLBACK_RETURN: // regSP has been set by the assembly code. N.B. This may not be the same value as when // EnterCallbackFunction was called because the callback may have grown and moved the stack. // Remove the extra exception handler we created in EnterCallbackFunction ASSERT(assemblyInterface.handlerRegister == regSP()); regSP() += 1; assemblyInterface.handlerRegister = (*(regSP()++)).stackAddr; // Restore the previous handler. this->callBackResult = this->saveVec.push(regAX()); // Argument to return is in RAX. return -2; case RETURN_CALLBACK_EXCEPTION: // An ML callback has raised an exception. // It isn't possible to do anything here except abort. Crash("An ML function called from foreign code raised an exception. Unable to continue."); case RETURN_KILL_SELF: exitThread(this); default: Crash("Unknown return reason code %u", this->assemblyInterface.returnReason); } } while (1); } void X86TaskData::MakeTrampoline(byte **pointer, byte *entryPt) { #ifdef POLYML32IN64 // In the native address versions we can store the address directly onto the stack. // We can't do that in 32-in-64 because it's likely that the address will be in the // bottom 32-bits and we can't distinguish it from an object ID. Instead we have to // build a small code segment which jumps to the code. unsigned requiredSize = 8; // 8 words i.e. 32 bytes PolyObject *result = gMem.AllocCodeSpace(requiredSize); - byte *p = (byte*)result; + PolyObject* writeAble = gMem.SpaceForAddress(result)->writeAble(result); + byte *p = (byte*)writeAble; *p++ = 0x48; // rex.w *p++ = 0x8b; // Movl *p++ = 0x0d; // rcx, pc relative *p++ = 0x09; // +2 bytes *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0xff; // jmp *p++ = 0xe1; // rcx *p++ = 0xf4; // hlt - needed to stop scan of constants for (unsigned i = 0; i < 6; i++) *p++ = 0; uintptr_t ep = (uintptr_t)entryPt; for (unsigned i = 0; i < 8; i++) { *p++ = ep & 0xff; ep >>= 8; } // Clear the remainder. In particular this sets the number // of address constants to zero. for (unsigned i = 0; i < 8; i++) *p++ = 0; - result->SetLengthWord(requiredSize, F_CODE_OBJ); + writeAble->SetLengthWord(requiredSize, F_CODE_OBJ); *pointer = (byte*)result; #else *pointer = entryPt; // Can go there directly #endif } void X86TaskData::InitStackFrame(TaskData *parentTaskData, Handle proc, Handle arg) /* Initialise stack frame. */ { // Set the assembly code addresses. if (popArgAndClosure == 0) MakeTrampoline(&popArgAndClosure, (byte*)&X86AsmPopArgAndClosure); if (killSelf == 0) MakeTrampoline(&killSelf, (byte*)&X86AsmKillSelf); if (raiseException == 0) MakeTrampoline(&raiseException, (byte*)&X86AsmRaiseException); if (callbackException == 0) MakeTrampoline(&callbackException, (byte*)&X86AsmCallbackException); if (callbackReturn == 0) MakeTrampoline(&callbackReturn, (byte*)&X86AsmCallbackReturn); StackSpace *space = this->stack; StackObject * newStack = space->stack(); uintptr_t stack_size = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem); uintptr_t topStack = stack_size-6; stackItem *stackTop = (stackItem*)newStack + topStack; assemblyInterface.stackPtr = stackTop; assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE; assemblyInterface.handlerRegister = (stackItem*)newStack+topStack+4; // Floating point save area. memset(&assemblyInterface.p_fp, 0, sizeof(struct fpSaveArea)); #ifndef HOSTARCHITECTURE_X86_64 // Set the control word for 64-bit precision otherwise we get inconsistent results. assemblyInterface.p_fp.cw = 0x027f ; // Control word assemblyInterface.p_fp.tw = 0xffff; // Tag registers - all unused #endif // Initial entry point - on the stack. stackTop[0].codeAddr = popArgAndClosure; // Push the argument and the closure on the stack. We can't put them into the registers // yet because we might get a GC before we actually start the code. stackTop[1] = proc->Word(); // Closure stackTop[2] = (arg == 0) ? TAGGED(0) : DEREFWORD(arg); // Argument /* We initialise the end of the stack with a sequence that will jump to kill_self whether the process ends with a normal return or by raising an exception. A bit of this was added to fix a bug when stacks were objects on the heap and could be scanned by the GC. */ stackTop[5] = TAGGED(0); // Probably no longer needed // Set the default handler and return address to point to this code. // PolyWord killJump(PolyWord::FromCodePtr((byte*)&X86AsmKillSelf)); // Exception handler. stackTop[4].codeAddr = killSelf; // Normal return address. We need a separate entry on the stack from // the exception handler because it is possible that the code we are entering // may replace this entry with an argument. The code-generator optimises tail-recursive // calls to functions with more args than the called function. stackTop[3].codeAddr = killSelf; #ifdef POLYML32IN64 // In 32-in-64 RBX always contains the heap base address. assemblyInterface.p_rbx.stackAddr = (stackItem*)globalHeapBase; #endif } // In Solaris-x86 the registers are named EIP and ESP. #if (!defined(REG_EIP) && defined(EIP)) #define REG_EIP EIP #endif #if (!defined(REG_ESP) && defined(ESP)) #define REG_ESP ESP #endif // Get the PC and SP(stack) from a signal context. This is needed for profiling. // This version gets the actual sp and pc if we are in ML. bool X86TaskData::AddTimeProfileCount(SIGNALCONTEXT *context) { stackItem * sp = 0; POLYCODEPTR pc = 0; if (context != 0) { // The tests for HAVE_UCONTEXT_T, HAVE_STRUCT_SIGCONTEXT and HAVE_WINDOWS_H need // to follow the tests in processes.h. #if defined(HAVE_WINDOWS_H) #ifdef _WIN64 sp = (stackItem *)context->Rsp; pc = (POLYCODEPTR)context->Rip; #else // Windows 32 including cygwin. sp = (stackItem *)context->Esp; pc = (POLYCODEPTR)context->Eip; #endif #elif defined(HAVE_UCONTEXT_T) #ifdef HAVE_MCONTEXT_T_GREGS // Linux #ifndef HOSTARCHITECTURE_X86_64 pc = (byte*)context->uc_mcontext.gregs[REG_EIP]; sp = (stackItem*)context->uc_mcontext.gregs[REG_ESP]; #else /* HOSTARCHITECTURE_X86_64 */ pc = (byte*)context->uc_mcontext.gregs[REG_RIP]; sp = (stackItem*)context->uc_mcontext.gregs[REG_RSP]; #endif /* HOSTARCHITECTURE_X86_64 */ #elif defined(HAVE_MCONTEXT_T_MC_ESP) // FreeBSD #ifndef HOSTARCHITECTURE_X86_64 pc = (byte*)context->uc_mcontext.mc_eip; sp = (stackItem*)context->uc_mcontext.mc_esp; #else /* HOSTARCHITECTURE_X86_64 */ pc = (byte*)context->uc_mcontext.mc_rip; sp = (stackItem*)context->uc_mcontext.mc_rsp; #endif /* HOSTARCHITECTURE_X86_64 */ #else // Mac OS X #ifndef HOSTARCHITECTURE_X86_64 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT32_SS)) pc = (byte*)context->uc_mcontext->ss.eip; sp = (stackItem*)context->uc_mcontext->ss.esp; #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT32___SS)) pc = (byte*)context->uc_mcontext->__ss.__eip; sp = (stackItem*)context->uc_mcontext->__ss.__esp; #endif #else /* HOSTARCHITECTURE_X86_64 */ #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT64_SS)) pc = (byte*)context->uc_mcontext->ss.rip; sp = (stackItem*)context->uc_mcontext->ss.rsp; #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT64___SS)) pc = (byte*)context->uc_mcontext->__ss.__rip; sp = (stackItem*)context->uc_mcontext->__ss.__rsp; #endif #endif /* HOSTARCHITECTURE_X86_64 */ #endif #elif defined(HAVE_STRUCT_SIGCONTEXT) #if defined(HOSTARCHITECTURE_X86_64) && defined(__OpenBSD__) // CPP defines missing in amd64/signal.h in OpenBSD pc = (byte*)context->sc_rip; sp = (stackItem*)context->sc_rsp; #else // !HOSTARCHITEXTURE_X86_64 || !defined(__OpenBSD__) pc = (byte*)context->sc_pc; sp = (stackItem*)context->sc_sp; #endif #endif } if (pc != 0) { // See if the PC we've got is an ML code address. MemSpace *space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { add_count(this, pc, 1); return true; } } // See if the sp value is in the current stack. if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top) { // We may be in the assembly code. The top of the stack will be a return address. pc = sp[0].w().AsCodePtr(); MemSpace *space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { add_count(this, pc, 1); return true; } } // See if the value of regSP is a valid stack pointer. // This works if we happen to be in an RTS call using a "Full" call. // It doesn't work if we've used a "Fast" call because that doesn't save the SP. sp = assemblyInterface.stackPtr; if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top) { // We may be in the run-time system. pc = sp[0].w().AsCodePtr(); MemSpace *space = gMem.SpaceForAddress(pc); if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT)) { add_count(this, pc, 1); return true; } } // None of those worked return false; } // This is called from a different thread so we have to be careful. void X86TaskData::InterruptCode() { PLocker l(&interruptLock); // Set the stack limit pointer to the top of the stack to cause // a trap when we next check for stack overflow. // We use a lock here to ensure that we always use the current value of the // stack. The thread we're interrupting could be growing the stack at this point. if (this->stack != 0) this->assemblyInterface.stackLimit = (stackItem*)(this->stack->top-1); } // This is called from SwitchToPoly before we enter the ML code. void X86TaskData::SetMemRegisters() { // Copy the current store limits into variables before we go into the assembly code. // If we haven't yet set the allocation area or we don't have enough we need // to create one (or a new one). if (this->allocPointer <= this->allocLimit + this->allocWords) { if (this->allocPointer < this->allocLimit) Crash ("Bad length in heap overflow trap"); // Find some space to allocate in. Updates taskData->allocPointer and // returns a pointer to the newly allocated space (if allocWords != 0) PolyWord *space = processes->FindAllocationSpace(this, this->allocWords, true); if (space == 0) { // We will now raise an exception instead of returning. // Set allocWords to zero so we don't set the allocation register // since that could be holding the exception packet. this->allocWords = 0; } // Undo the allocation just now. this->allocPointer += this->allocWords; } if (this->allocWords != 0) { // If we have had a heap trap we actually do the allocation here. // We will have already garbage collected and recovered sufficient space. // This also happens if we have just trapped because of store profiling. this->allocPointer -= this->allocWords; // Now allocate // Set the allocation register to this area. N.B. This is an absolute address. if (this->allocReg < 15) get_reg(this->allocReg)[0].codeAddr = (POLYCODEPTR)(this->allocPointer + 1); /* remember: it's off-by-one */ this->allocWords = 0; } // If we have run out of store, either just above or while allocating in the RTS, // allocPointer and allocLimit will have been set to zero as part of the GC. We will // now be raising an exception which may free some store but we need to come back here // before we allocate anything. The compiled code uses unsigned arithmetic to check for // heap overflow but only after subtracting the space required. We need to make sure // that the values are still non-negative after substracting any object size. if (this->allocPointer == 0) this->allocPointer += MAX_OBJECT_SIZE; if (this->allocLimit == 0) this->allocLimit += MAX_OBJECT_SIZE; this->assemblyInterface.localMbottom = this->allocLimit + 1; this->assemblyInterface.localMpointer = this->allocPointer + 1; // If we are profiling store allocation we set mem_hl so that a trap // will be generated. if (profileMode == kProfileStoreAllocation) this->assemblyInterface.localMbottom = this->assemblyInterface.localMpointer; this->assemblyInterface.returnReason = RETURN_IO_CALL_NOW_UNUSED; this->assemblyInterface.threadId = this->threadObject; } // This is called whenever we have returned from ML to C. void X86TaskData::SaveMemRegisters() { this->allocPointer = this->assemblyInterface.localMpointer - 1; this->allocWords = 0; this->assemblyInterface.exceptionPacket = TAGGED(0); this->saveRegisterMask = 0; } // Called on a GC or stack overflow trap. The register mask // is in the bytes after the trap call. void X86TaskData::SetRegisterMask() { byte *pc = assemblyInterface.stackPtr[0].codeAddr; if (*pc == 0xcd) // CD - INT n is used for a single byte { pc++; saveRegisterMask = *pc++; } else if (*pc == 0xca) // CA - FAR RETURN is used for a two byte mask { pc++; saveRegisterMask = pc[0] | (pc[1] << 8); pc += 2; } assemblyInterface.stackPtr[0].codeAddr = pc; } stackItem *X86TaskData::get_reg(int n) /* Returns a pointer to the register given by n. */ { switch (n) { case 0: return &assemblyInterface.p_rax; case 1: return &assemblyInterface.p_rcx; case 2: return &assemblyInterface.p_rdx; case 3: return &assemblyInterface.p_rbx; // Should not have rsp or rbp. case 6: return &assemblyInterface.p_rsi; case 7: return &assemblyInterface.p_rdi; #ifdef HOSTARCHITECTURE_X86_64 case 8: return &assemblyInterface.p_r8; case 9: return &assemblyInterface.p_r9; case 10: return &assemblyInterface.p_r10; case 11: return &assemblyInterface.p_r11; case 12: return &assemblyInterface.p_r12; case 13: return &assemblyInterface.p_r13; case 14: return &assemblyInterface.p_r14; // R15 is the heap pointer so shouldn't occur here. #endif /* HOSTARCHITECTURE_X86_64 */ default: Crash("Unknown register %d\n", n); } } // Called as a result of a heap overflow trap void X86TaskData::HeapOverflowTrap(byte *pcPtr) { X86TaskData *mdTask = this; POLYUNSIGNED wordsNeeded = 0; // The next instruction, after any branches round forwarding pointers or pop // instructions, will be a store of register containing the adjusted heap pointer. // We need to find that register and the value in it in order to find out how big // the area we actually wanted is. N.B. The code-generator and assembly code // must generate the correct instruction sequence. // byte *pcPtr = assemblyInterface.programCtr; while (true) { if (pcPtr[0] == 0xeb) { // Forwarding pointer if (pcPtr[1] >= 128) pcPtr += 256 - pcPtr[1] + 2; else pcPtr += pcPtr[1] + 2; } else if ((pcPtr[0] & 0xf8) == 0x58) // Pop instruction. pcPtr++; else if (pcPtr[0] == 0x41 && ((pcPtr[1] & 0xf8) == 0x58)) // Pop with Rex prefix pcPtr += 2; else break; } #ifndef HOSTARCHITECTURE_X86_64 // This should be movl REG,0[%ebp]. ASSERT(pcPtr[0] == 0x89); mdTask->allocReg = (pcPtr[1] >> 3) & 7; // Remember this until we allocate the memory stackItem *reg = get_reg(mdTask->allocReg); stackItem reg_val = *reg; // The space we need is the difference between this register // and the current value of newptr. // The +1 here is because assemblyInterface.localMpointer is A.M.pointer +1. The reason // is that after the allocation we have the register pointing at the address we will // actually use. wordsNeeded = (this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1; *reg = TAGGED(0); // Clear this - it's not a valid address. /* length in words, including length word */ ASSERT (wordsNeeded <= (1<<24)); /* Max object size including length/flag word is 2^24 words. */ #else /* HOSTARCHITECTURE_X86_64 */ ASSERT(pcPtr[1] == 0x89 || pcPtr[1] == 0x8b); if (pcPtr[1] == 0x89) { // New (5.4) format. This should be movq REG,%r15 ASSERT(pcPtr[0] == 0x49 || pcPtr[0] == 0x4d); mdTask->allocReg = (pcPtr[2] >> 3) & 7; // Remember this until we allocate the memory if (pcPtr[0] & 0x4) mdTask->allocReg += 8; } else { // Alternative form of movq REG,%r15 ASSERT(pcPtr[0] == 0x4c || pcPtr[0] == 0x4d); mdTask->allocReg = pcPtr[2] & 7; // Remember this until we allocate the memory if (pcPtr[0] & 0x1) mdTask->allocReg += 8; } stackItem *reg = get_reg(this->allocReg); stackItem reg_val = *reg; wordsNeeded = (POLYUNSIGNED)((this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1); *reg = TAGGED(0); // Clear this - it's not a valid address. #endif /* HOSTARCHITECTURE_X86_64 */ if (profileMode == kProfileStoreAllocation) addProfileCount(wordsNeeded); mdTask->allocWords = wordsNeeded; // The actual allocation is done in SetMemRegisters. } void X86TaskData::SetException(poly_exn *exc) // Set up the stack to raise an exception. { // Do we need to set the PC value any longer? It may be necessary if // we have taken a trap because another thread has sent a broadcast interrupt. (--assemblyInterface.stackPtr)->codeAddr = raiseException; regAX() = (PolyWord)exc; /* put exception data into eax */ assemblyInterface.exceptionPacket = (PolyWord)exc; // Set for direct calls. } // Sets up a callback function on the current stack. The present state is that // the ML code has made a call in to foreign_dispatch. We need to set the stack // up so that we will enter the callback (as with CallCodeTupled) but when we return // the result we enter callback_return. Handle X86TaskData::EnterCallbackFunction(Handle func, Handle args) { // If we ever implement a light version of the FFI that allows a call to C // code without saving enough to allow allocation in C code we need to ensure // that this code doesn't do any allocation. Essentially we need the values // in localMpointer and localMbottom to be valid across a call to C. If we do // a callback the ML callback function would pick up the values saved in the // originating call. // However, it is essential that the light version still saves the stack pointer // and reloads it afterwards. // Set up an exception handler so we will enter callBackException if there is an exception. (--regSP())->stackAddr = assemblyInterface.handlerRegister; // Create a special handler entry (--regSP())->codeAddr = callbackException; assemblyInterface.handlerRegister = regSP(); // Push the call to callBackReturn onto the stack as the return address. (--regSP())->codeAddr = callbackReturn; // Set up the entry point of the callback. PolyObject *functToCall = func->WordP(); regDX() = (PolyWord)functToCall; // Closure address regAX() = args->Word(); // Push entry point address (--regSP())->codeAddr = *(POLYCODEPTR*)functToCall; // First word of closure is entry pt. return EnterPolyCode(); } // Decode and process an effective address. There may // be a constant address in here but in any case we need // to decode it to work out where the next instruction starts. // If this is an lea instruction any addresses are just constants // so must not be treated as addresses. static void skipea(PolyObject *base, byte **pt, ScanAddress *process, bool lea) { unsigned int modrm = *((*pt)++); unsigned int md = modrm >> 6; unsigned int rm = modrm & 7; if (md == 3) { } /* Register. */ else if (rm == 4) { /* s-i-b present. */ unsigned int sib = *((*pt)++); if (md == 0) { if ((sib & 7) == 5) { if (! lea) { #ifndef HOSTARCHITECTURE_X86_64 process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT); #endif /* HOSTARCHITECTURE_X86_64 */ } (*pt) += 4; } } else if (md == 1) (*pt)++; else if (md == 2) (*pt) += 4; } else if (md == 0 && rm == 5) { if (!lea) { #ifndef HOSTARCHITECTURE_X86_64 /* Absolute address. */ process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT); #endif /* HOSTARCHITECTURE_X86_64 */ } *pt += 4; } else { if (md == 1) *pt += 1; else if (md == 2) *pt += 4; } } /* Added to deal with constants within the code rather than in the constant area. The constant area is still needed for the function name. DCJM 2/1/2001 */ void X86Dependent::ScanConstantsWithinCode(PolyObject *addr, PolyObject *old, POLYUNSIGNED length, ScanAddress *process) { byte *pt = (byte*)addr; PolyWord *end = addr->Offset(length - 1); #ifdef POLYML32IN64 // If this begins with enter-int it's interpreted code - ignore if (pt[0] == 0xff && pt[1] == 0x55 && pt[2] == 0x48) return; #endif while (true) { // Escape prefixes come before any Rex byte if (*pt == 0xf2 || *pt == 0xf3 || *pt == 0x66) pt++; #ifdef HOSTARCHITECTURE_X86_64 // REX prefixes. Set this first. byte lastRex; if (*pt >= 0x40 && *pt <= 0x4f) lastRex = *pt++; else lastRex = 0; //printf("pt=%p *pt=%x\n", pt, *pt); #endif /* HOSTARCHITECTURE_X86_64 */ switch (*pt) { case 0x00: return; // This is actually the first byte of the old "marker" word. case 0xf4: return; // Halt - now used as a marker. case 0x50: case 0x51: case 0x52: case 0x53: case 0x54: case 0x55: case 0x56: case 0x57: /* Push */ case 0x58: case 0x59: case 0x5a: case 0x5b: case 0x5c: case 0x5d: case 0x5e: case 0x5f: /* Pop */ case 0x90: /* nop */ case 0xc3: /* ret */ case 0xf9: /* stc */ case 0xce: /* into */ case 0xf0: /* lock. */ case 0xf3: /* rep/repe */ case 0xa4: case 0xa5: case 0xaa: case 0xab: /* movs/stos */ case 0xa6: /* cmpsb */ case 0x9e: /* sahf */ case 0x99: /* cqo/cdq */ pt++; break; case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77: case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f: case 0xeb: /* short jumps. */ case 0xcd: /* INT - now used for a register mask */ case 0xa8: /* TEST_ACC8 */ case 0x6a: /* PUSH_8 */ pt += 2; break; case 0xc2: /* RET_16 */ case 0xca: /* FAR RET 16 - used for a register mask */ pt += 3; break; case 0x8d: /* leal. */ pt++; skipea(addr, &pt, process, true); break; case 0x03: case 0x0b: case 0x13: case 0x1b: case 0x23: case 0x2b: case 0x33: case 0x3b: /* Add r,ea etc. */ case 0x88: /* MOVB_R_A */ case 0x89: /* MOVL_R_A */ case 0x8b: /* MOVL_A_R */ case 0x62: /* BOUNDL */ case 0xff: /* Group5 */ case 0xd1: /* Group2_1_A */ case 0x8f: /* POP_A */ case 0xd3: /* Group2_CL_A */ case 0x87: // XCHNG case 0x63: // MOVSXD pt++; skipea(addr, &pt, process, false); break; case 0xf6: /* Group3_a */ { int isTest = 0; pt++; /* The test instruction has an immediate operand. */ if ((*pt & 0x38) == 0) isTest = 1; skipea(addr, &pt, process, false); if (isTest) pt++; break; } case 0xf7: /* Group3_A */ { int isTest = 0; pt++; /* The test instruction has an immediate operand. */ if ((*pt & 0x38) == 0) isTest = 1; skipea(addr, &pt, process, false); if (isTest) pt += 4; break; } case 0xc1: /* Group2_8_A */ case 0xc6: /* MOVB_8_A */ case 0x83: /* Group1_8_A */ case 0x80: /* Group1_8_a */ case 0x6b: // IMUL Ev,Ib pt++; skipea(addr, &pt, process, false); pt++; break; case 0x69: // IMUL Ev,Iv pt++; skipea(addr, &pt, process, false); pt += 4; break; case 0x81: /* Group1_32_A */ { pt ++; #ifndef HOSTARCHITECTURE_X86_64 unsigned opCode = *pt; #endif skipea(addr, &pt, process, false); // Only check the 32 bit constant if this is a comparison. // For other operations this may be untagged and shouldn't be an address. #ifndef HOSTARCHITECTURE_X86_64 if ((opCode & 0x38) == 0x38) process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT); #endif pt += 4; break; } case 0xe8: case 0xe9: // Long jump and call. These are used to call constant (known) functions // and also long jumps within the function. { pt++; POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case. for(unsigned i = 4; i > 0; i--) disp = (disp << 8) | pt[i-1]; byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant // If the new address is within the current piece of code we don't do anything if (absAddr >= (byte*)addr && absAddr < (byte*)end) {} else { #ifdef HOSTARCHITECTURE_X86_64 ASSERT(sizeof(PolyWord) == 4); // Should only be used internally on x64 #endif /* HOSTARCHITECTURE_X86_64 */ if (addr != old) { // The old value of the displacement was relative to the old address before // we copied this code segment. // We have to correct it back to the original address. absAddr = absAddr - (byte*)addr + (byte*)old; // We have to correct the displacement for the new location and store // that away before we call ScanConstant. size_t newDisp = absAddr - pt - 4; + byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt); for (unsigned i = 0; i < 4; i++) { - pt[i] = (byte)(newDisp & 0xff); + wr[i] = (byte)(newDisp & 0xff); newDisp >>= 8; } } process->ScanConstant(addr, pt, PROCESS_RELOC_I386RELATIVE); } pt += 4; break; } case 0xc7:/* MOVL_32_A */ { pt++; if ((*pt & 0xc0) == 0x40 /* Byte offset or sib present */ && ((*pt & 7) != 4) /* But not sib present */ && pt[1] == 256-sizeof(PolyWord)) { /* We may use a move instruction to set the length word on a new segment. We mustn't try to treat this as a constant. */ pt += 6; /* Skip the modrm byte, the offset and the constant. */ } else { skipea(addr, &pt, process, false); #ifndef HOSTARCHITECTURE_X86_64 // This isn't used for addresses even in 32-in-64 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT); #endif /* HOSTARCHITECTURE_X86_64 */ pt += 4; } break; } case 0xb8: case 0xb9: case 0xba: case 0xbb: case 0xbc: case 0xbd: case 0xbe: case 0xbf: /* MOVL_32_64_R */ pt ++; #ifdef HOSTARCHITECTURE_X86_64 if ((lastRex & 8) == 0) pt += 4; // 32-bit mode on 64-bits else #endif /* HOSTARCHITECTURE_X86_64 */ { // This is no longer generated in 64-bit mode but needs to // be retained in native 64-bit for backwards compatibility. #ifndef POLYML32IN64 // 32 bits in 32-bit mode, 64-bits in 64-bit mode. process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT); #endif pt += sizeof(PolyWord); } break; case 0x68: /* PUSH_32 */ pt ++; #if (!defined(HOSTARCHITECTURE_X86_64) || defined(POLYML32IN64)) // Currently the only inline constant in 32-in-64. process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT); #endif pt += 4; break; case 0x0f: /* ESCAPE */ { pt++; switch (*pt) { case 0xb6: /* movzl */ case 0xb7: // movzw case 0xc1: /* xaddl */ case 0xae: // ldmxcsr/stmxcsr case 0xaf: // imul case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47: case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f: // cmov pt++; skipea(addr, &pt, process, false); break; case 0x80: case 0x81: case 0x82: case 0x83: case 0x84: case 0x85: case 0x86: case 0x87: case 0x88: case 0x89: case 0x8a: case 0x8b: case 0x8c: case 0x8d: case 0x8e: case 0x8f: /* Conditional branches with 32-bit displacement. */ pt += 5; break; case 0x90: case 0x91: case 0x92: case 0x93: case 0x94: case 0x95: case 0x96: case 0x97: case 0x98: case 0x99: case 0x9a: case 0x9b: case 0x9c: case 0x9d: case 0x9e: case 0x9f: /* SetCC. */ pt++; skipea(addr, &pt, process, false); break; // These are SSE2 instructions case 0x10: case 0x11: case 0x58: case 0x5c: case 0x59: case 0x5e: case 0x2e: case 0x2a: case 0x54: case 0x57: case 0x5a: case 0x6e: case 0x7e: case 0x2c: case 0x2d: pt++; skipea(addr, &pt, process, false); break; case 0x73: // PSRLDQ - EA,imm pt++; skipea(addr, &pt, process, false); pt++; break; default: Crash("Unknown opcode %d at %p\n", *pt, pt); } break; } case 0xd8: case 0xd9: case 0xda: case 0xdb: case 0xdc: case 0xdd: case 0xde: case 0xdf: // Floating point escape instructions { pt++; if ((*pt & 0xe0) == 0xe0) pt++; else skipea(addr, &pt, process, false); break; } default: Crash("Unknown opcode %d at %p\n", *pt, pt); } } } // Increment the value contained in the first word of the mutex. Handle X86TaskData::AtomicIncrement(Handle mutexp) { PolyObject *p = DEREFHANDLE(mutexp); POLYUNSIGNED result = X86AsmAtomicIncrement(p); return this->saveVec.push(PolyWord::FromUnsigned(result)); } // Release a mutex. Because the atomic increment and decrement // use the hardware LOCK prefix we can simply set this to one. void X86TaskData::AtomicReset(Handle mutexp) { DEREFHANDLE(mutexp)->Set(0, TAGGED(1)); } static X86Dependent x86Dependent; MachineDependent *machineDependent = &x86Dependent; class X86Module : public RtsModule { public: virtual void GarbageCollect(ScanAddress * /*process*/); }; // Declare this. It will be automatically added to the table. static X86Module x86Module; void X86Module::GarbageCollect(ScanAddress *process) { #ifdef POLYML32IN64 // These are trampolines in the code area rather than direct calls. if (popArgAndClosure != 0) process->ScanRuntimeAddress((PolyObject**)&popArgAndClosure, ScanAddress::STRENGTH_STRONG); if (killSelf != 0) process->ScanRuntimeAddress((PolyObject**)&killSelf, ScanAddress::STRENGTH_STRONG); if (raiseException != 0) process->ScanRuntimeAddress((PolyObject**)&raiseException, ScanAddress::STRENGTH_STRONG); if (callbackException != 0) process->ScanRuntimeAddress((PolyObject**)&callbackException, ScanAddress::STRENGTH_STRONG); if (callbackReturn != 0) process->ScanRuntimeAddress((PolyObject**)&callbackReturn, ScanAddress::STRENGTH_STRONG); #endif }