diff --git a/libpolyml/gc_share_phase.cpp b/libpolyml/gc_share_phase.cpp index f4c2bf80..42c87667 100644 --- a/libpolyml/gc_share_phase.cpp +++ b/libpolyml/gc_share_phase.cpp @@ -1,753 +1,1009 @@ /* Title: Multi-Threaded Garbage Collector - Data sharing phase Copyright (c) 2012, 2017, 2019 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ /* GC Sharing pass. This pass is invoked only if the heap sizing code detects that heap space is running very short because it adds a very considerable overhead to GC. It aims to reduce the size of the live data in a similar way to the data sharing function PolyML.shareCommonData by merging immutable cells that contain data that cannot be distinguished. This version of the code now does a deep structure merge in a similar way to the full sharing function. This code first does a full pass over the heap creating lists of cells that could possibly be merged. There are separate lists for byte and word objects up to a fixed size. Larger objects and other objects are not considered. Because all the items in a list have the same length and type (flag bits) we can use the length word to link the items in the list. A consequence of this is that positive long precision values can be shared but negative values cannot. There is a sharing function that first distributes items into a hash table. Then each hash table is sorted and as part of the sorting process cells with the same contents are merged. One cell is chosen and the length words on the others are set to be forwarding pointers to the chosen cell. Hashing allows for easy parallel processing. The structure sharing code works by first sharing the byte data which cannot contain pointers. Then the word data is processed to separate out "tail" cells that contain only tagged integers or pointers to cells that either cannot be merged, such as mutables, or those that have already been processed, such as the byte data. Any pointers to shared data are updated to point to the merged cell. The tail cells are then sorted and shared using the sharing function and become part of the "processed" set. This process is repeated to find cells that are now tails and so on. Compared with the full sharing code this is expensive since it requires repeated scans of the list of unprocessed cells. In particular there may be cells that form loops (basically closures for mutually recusive functions) and if they are present they and anything that points directly or indirectly at them will never be removed from the list. We stop when it appears that we are not making progress and simply do a final bit-wise share of the remainder. This now uses the forwarding pointer both to indicate that a cell shares with another and also to link together cells that have yet to be tested for sharing. To detect the difference the bitmap is used. The initial scan to create the sharing chains sets the bit for each visited cell so at the start of the sharing phase all reachable cells will be marked. We remove the mark if the cell is to be removed. This requires the bitmap to be locked. */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) #endif #ifdef HAVE_STRING_H #include #endif #ifdef HAVE_STDLIB_H #include #endif #include "globals.h" #include "processes.h" #include "gc.h" #include "scanaddrs.h" #include "bitmap.h" #include "memmgr.h" #include "diagnostics.h" #include "gctaskfarm.h" #include "heapsizing.h" #ifdef POLYML32IN64 #define ENDOFLIST ((PolyObject*)globalHeapBase) #else #define ENDOFLIST 0 #endif // Set the forwarding so that references to objToSet will be forwarded to // objToShare. objToSet will be garbage. void shareWith(PolyObject *objToSet, PolyObject *objToShare) { // We need to remove the bit from this so that we know it's not // a share chain. PolyWord *lengthWord = ((PolyWord*)objToSet) - 1; LocalMemSpace *space = gMem.LocalSpaceForAddress(lengthWord); ASSERT(space); PLocker locker(&space->bitmapLock); ASSERT(space->bitmap.TestBit(space->wordNo(lengthWord))); space->bitmap.ClearBit(space->wordNo(lengthWord)); // Actually do the forwarding objToSet->SetForwardingPtr(objToShare); } // When we find an address it could be a cell that: // 1. is never processed or one that is the copy to be retained, // 2. has been merged with another and contains a forwarding pointer or // 3. has not yet been processed. typedef enum { REALOBJECT, FORWARDED, CHAINED } objectState; objectState getObjectState(PolyObject *p) { PolyWord *lengthWord = ((PolyWord*)p) - 1; LocalMemSpace *space = gMem.LocalSpaceForAddress(lengthWord); if (space == 0) return REALOBJECT; // May be the address of a permanent or something else. PLocker locker(&space->bitmapLock); if (!p->ContainsForwardingPtr()) return REALOBJECT; if (space->bitmap.TestBit(space->wordNo(lengthWord))) return CHAINED; else return FORWARDED; } class ObjEntry { public: ObjEntry(): objList(ENDOFLIST), objCount(0), shareCount(0) {} PolyObject *objList; POLYUNSIGNED objCount; POLYUNSIGNED shareCount; }; // There is an instance of this class for each combination of size and // word/byte. class SortVector { public: SortVector(): totalCount(0), carryOver(0) {} void AddToVector(PolyObject *obj, POLYUNSIGNED length); void SortData(void); POLYUNSIGNED TotalCount() const { return totalCount; } POLYUNSIGNED CurrentCount() const { return baseObject.objCount; } POLYUNSIGNED Shared() const; void SetLengthWord(POLYUNSIGNED l) { lengthWord = l; } POLYUNSIGNED CarryOver() const { return carryOver; } static void hashAndSortAllTask(GCTaskId*, void *a, void *b); static void sharingTask(GCTaskId*, void *a, void *b); static void wordDataTask(GCTaskId*, void *a, void *b); private: void sortList(PolyObject *head, POLYUNSIGNED nItems, POLYUNSIGNED &count); ObjEntry baseObject, processObjects[256]; POLYUNSIGNED totalCount; POLYUNSIGNED lengthWord; POLYUNSIGNED carryOver; }; POLYUNSIGNED SortVector::Shared() const { // Add all the sharing counts POLYUNSIGNED shareCount = baseObject.shareCount; for (unsigned i = 0; i < 256; i++) shareCount += processObjects[i].shareCount; return shareCount; } void SortVector::AddToVector(PolyObject *obj, POLYUNSIGNED length) { obj->SetForwardingPtr(baseObject.objList); baseObject.objList = obj; baseObject.objCount++; totalCount++; } // The number of byte and word entries. // Objects of up to and including this size are shared. // Byte objects include strings so it is more likely that // larger objects will share. Word objects that share // are much more likely to be 2 or 3 words. #define NUM_BYTE_VECTORS 23 #define NUM_WORD_VECTORS 11 +// Recursive scan over a data structure. +class RecursiveScan : public ScanAddress +{ +public: + virtual PolyObject *ScanObjectAddress(PolyObject *base); + virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord); + // Have to redefine this for some reason. + void ScanAddressesInObject(PolyObject *base) + { + ScanAddressesInObject(base, base->LengthWord()); + } + +protected: + // The derived class must provide a stack. + virtual void PushToStack(PolyObject *obj, PolyWord *base) = 0; + virtual void PopFromStack(PolyObject *&obj, PolyWord *&base) = 0; + virtual bool StackIsEmpty(void) = 0; + + // Test the word at the location to see if it points to + // something that may have to be scanned. We pass in the + // pointer here because the called may side-effect it. + virtual bool TestForScan(PolyWord *) = 0; + // If we are definitely scanning the address we mark it. + virtual void MarkAsScanning(PolyObject *) = 0; + // Called when the object has been completed. + virtual void Completed(PolyObject *) {} +}; + +// Recursive scan with a dynamically allocated stack + +// The stack is allocated as a series of blocks chained together. +#define RSTACK_SEGMENT_SIZE 1000 + +class RScanStack { +public: + RScanStack() : nextStack(0), lastStack(0), sp(0) {} + ~RScanStack() { delete(nextStack); } + + RScanStack *nextStack; + RScanStack *lastStack; + unsigned sp; + struct { PolyObject *obj; PolyWord *base; } stack[RSTACK_SEGMENT_SIZE]; +}; + +class RecursiveScanWithStack : public RecursiveScan +{ +public: + RecursiveScanWithStack() : stack(0) {} + ~RecursiveScanWithStack(); + +protected: + // StackOverflow is called if allocating a new stack + // segment fails. + virtual void StackOverflow(void) = 0; + + virtual void PushToStack(PolyObject *obj, PolyWord *base); + virtual void PopFromStack(PolyObject *&obj, PolyWord *&base); + virtual bool StackIsEmpty(void); + + RScanStack *stack; +}; + +// This gets called in two circumstances. It may be called for the roots +// in which case the stack will be empty and we want to process it completely +// or it is called for a constant address in which case it will have been +// called from RecursiveScan::ScanAddressesInObject and that can process +// any addresses. +PolyObject *RecursiveScan::ScanObjectAddress(PolyObject *obj) +{ + PolyWord pWord = obj; + // Test to see if this needs to be scanned. + // It may update the word. + bool test = TestForScan(&pWord); + obj = pWord.AsObjPtr(); + + if (test) + { + MarkAsScanning(obj); + if (obj->IsByteObject()) + Completed(obj); // Don't need to put it on the stack + // If we already have something on the stack we must being called + // recursively to process a constant in a code segment. Just push + // it on the stack and let the caller deal with it. + else if (StackIsEmpty()) + RecursiveScan::ScanAddressesInObject(obj, obj->LengthWord()); + else + PushToStack(obj, (PolyWord*)obj); + } + + return obj; +} + +// This is called via ScanAddressesInRegion to process the permanent mutables. It is +// also called from ScanObjectAddress to process root addresses. +// It processes all the addresses reachable from the object. +// This is almost the same as MTGCProcessMarkPointers::ScanAddressesInObject. +void RecursiveScan::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) +{ + if (OBJ_IS_BYTE_OBJECT(lengthWord)) + return; // Ignore byte cells and don't call Completed on them + + PolyWord *baseAddr = (PolyWord*)obj; + + while (true) + { + ASSERT(OBJ_IS_LENGTH(lengthWord)); + + // Get the length and base address. N.B. If this is a code segment + // these will be side-effected by GetConstSegmentForCode. + POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord); + + if (OBJ_IS_CODE_OBJECT(lengthWord) || OBJ_IS_CLOSURE_OBJECT(lengthWord)) + { + // It's better to process the whole code object in one go. + // For the moment do that for closure objects as well. + ScanAddress::ScanAddressesInObject(obj, lengthWord); + length = 0; // Finished + } + + // else it's a normal object, + + // If there are only two addresses in this cell that need to be + // followed we follow them immediately and treat this cell as done. + // If there are more than two we push the address of this cell on + // the stack, follow the first address and then rescan it. That way + // list cells are processed once only but we don't overflow the + // stack by pushing all the addresses in a very large vector. + PolyWord *endWord = (PolyWord*)obj + length; + PolyObject *firstWord = 0; + PolyObject *secondWord = 0; + PolyWord *restartFrom = baseAddr; + + while (baseAddr != endWord) + { + PolyWord wordAt = *baseAddr; + + if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0)) + { + // Normal address. We can have words of all zeros at least in the + // situation where we have a partially constructed code segment where + // the constants at the end of the code have not yet been filled in. + if (TestForScan(baseAddr)) // Test value at baseAddr (may side-effect it) + { + PolyObject *wObj = (*baseAddr).AsObjPtr(); + if (wObj->IsByteObject()) + { + // Can do this now - don't need to push it + MarkAsScanning(wObj); + Completed(wObj); + } + else if (firstWord == 0) + { + firstWord = wObj; + // We mark the word immediately. We can have + // two words in an object that are the same + // and we don't want to process it again. + MarkAsScanning(firstWord); + } + else if (secondWord == 0) + { + secondWord = wObj; + restartFrom = baseAddr; + } + else break; // More than two words. + } + } + baseAddr++; + } + + if (baseAddr == endWord) + { + // We have done everything except possibly firstWord and secondWord. + // Note: Unfortunately the way that ScanAddressesInRegion works means that + // we call Completed on the addresses of cells in the permanent areas without + // having called TestForScan. + Completed(obj); + if (secondWord != 0) + { + MarkAsScanning(secondWord); + // Put this on the stack. If this is a list node we will be + // pushing the tail. + PushToStack(secondWord, (PolyWord*)secondWord); + } + } + else // Put this back on the stack while we process the first word + PushToStack(obj, restartFrom); + + if (firstWord != 0) + { + // Process it immediately. + obj = firstWord; + baseAddr = (PolyWord*)obj; + } + else if (StackIsEmpty()) + return; + else + PopFromStack(obj, baseAddr); + + lengthWord = obj->LengthWord(); + } +} + +RecursiveScanWithStack::~RecursiveScanWithStack() +{ + delete(stack); +} + +bool RecursiveScanWithStack::StackIsEmpty(void) +{ + return stack == 0 || (stack->sp == 0 && stack->lastStack == 0); +} + +void RecursiveScanWithStack::PushToStack(PolyObject *obj, PolyWord *base) +{ + if (stack == 0 || stack->sp == RSTACK_SEGMENT_SIZE) + { + if (stack != 0 && stack->nextStack != 0) + stack = stack->nextStack; + else + { + // Need a new segment + try { + RScanStack *s = new RScanStack; + s->lastStack = stack; + if (stack != 0) + stack->nextStack = s; + stack = s; + } + catch (std::bad_alloc &) { + StackOverflow(); + return; + } + } + } + stack->stack[stack->sp].obj = obj; + stack->stack[stack->sp].base = base; + stack->sp++; +} + +void RecursiveScanWithStack::PopFromStack(PolyObject *&obj, PolyWord *&base) +{ + if (stack->sp == 0) + { + // Chain to the previous stack if any + ASSERT(stack->lastStack != 0); + // Before we do, delete any further one to free some memory + delete(stack->nextStack); + stack->nextStack = 0; + stack = stack->lastStack; + ASSERT(stack->sp == RSTACK_SEGMENT_SIZE); + } + --stack->sp; + obj = stack->stack[stack->sp].obj; + base = stack->stack[stack->sp].base; +} + class GetSharing: public RecursiveScanWithStack { public: GetSharing(); void SortData(void); static void shareByteData(GCTaskId *, void *, void *); static void shareWordData(GCTaskId *, void *, void *); static void shareRemainingWordData(GCTaskId *, void *, void *); virtual PolyObject *ScanObjectAddress(PolyObject *obj); protected: virtual bool TestForScan(PolyWord *); virtual void MarkAsScanning(PolyObject *); virtual void StackOverflow(void) { } // Ignore stack overflow virtual void Completed(PolyObject *); private: // The head of chains of cells of the same size SortVector byteVectors[NUM_BYTE_VECTORS]; SortVector wordVectors[NUM_WORD_VECTORS]; POLYUNSIGNED largeWordCount, largeByteCount, excludedCount; public: POLYUNSIGNED totalVisited, byteAdded, wordAdded, totalSize; }; GetSharing::GetSharing() { for (unsigned i = 0; i < NUM_BYTE_VECTORS; i++) byteVectors[i].SetLengthWord((POLYUNSIGNED)i | _OBJ_BYTE_OBJ); for (unsigned j = 0; j < NUM_WORD_VECTORS; j++) wordVectors[j].SetLengthWord(j); largeWordCount = largeByteCount = excludedCount = 0; totalVisited = byteAdded = wordAdded = totalSize = 0; } // This is called for roots and also for constants in the constant area. // If we have a code address we MUSTN't call RecursiveScan::ScanObjectAddress // because that turns the address into a PolyWord and doesn't work in 32-in-64. // We process the code area explicitly so we can simply skip code addresses. PolyObject *GetSharing::ScanObjectAddress(PolyObject *obj) { LocalMemSpace *sp = gMem.LocalSpaceForAddress((PolyWord*)obj - 1); if (sp == 0) return obj; return RecursiveScanWithStack::ScanObjectAddress(obj); } bool GetSharing::TestForScan(PolyWord *pt) { PolyObject *obj; // This may be a forwarding pointer left over from a minor GC that did // not complete or it may be a sharing chain pointer that we've set up. while (1) { PolyWord p = *pt; ASSERT(p.IsDataPtr()); obj = p.AsObjPtr(); PolyWord *lengthWord = ((PolyWord*)obj) - 1; LocalMemSpace *space = gMem.LocalSpaceForAddress(lengthWord); if (space == 0) return false; // Ignore it if it points to a permanent area if (space->bitmap.TestBit(space->wordNo(lengthWord))) return false; // Wasn't marked - must be a forwarding pointer. if (obj->ContainsForwardingPtr()) { obj = obj->GetForwardingPtr(); *pt = obj; } else break; } ASSERT(obj->ContainsNormalLengthWord()); totalVisited += 1; totalSize += obj->Length() + 1; return true; } void GetSharing::MarkAsScanning(PolyObject *obj) { ASSERT(obj->ContainsNormalLengthWord()); PolyWord *lengthWord = ((PolyWord*)obj) - 1; LocalMemSpace *space = gMem.LocalSpaceForAddress(lengthWord); ASSERT(! space->bitmap.TestBit(space->wordNo(lengthWord))); space->bitmap.SetBit(space->wordNo(lengthWord)); } void GetSharing::Completed(PolyObject *obj) { // We mustn't include cells in the permanent area. // We scan the permanent mutable areas for local addresses // but we mustn't add the cells themselves. Normally they // will be mutable so would be ignored but cells that have been // locked will now be immutable. The test in TestForScan is bypassed // by ScanAddressesInRegion. PolyWord *lengthWord = ((PolyWord*)obj) - 1; if (gMem.LocalSpaceForAddress(lengthWord) == 0) return; POLYUNSIGNED L = obj->LengthWord(); // We have tables for word objects and byte objects // We chain entries together using the length word so it // is important that we only do this for objects that // have no other bits in the header, such as the sign bit. if ((L & _OBJ_PRIVATE_FLAGS_MASK) == 0) { POLYUNSIGNED length = obj->Length(); if (length < NUM_WORD_VECTORS) wordVectors[length].AddToVector(obj, length); else largeWordCount++; wordAdded++; } else if ((L & _OBJ_PRIVATE_FLAGS_MASK) == _OBJ_BYTE_OBJ) { POLYUNSIGNED length = obj->Length(); if (length < NUM_BYTE_VECTORS) byteVectors[length].AddToVector(obj, length); else largeByteCount++; byteAdded++; } else if (! OBJ_IS_CODE_OBJECT(L) && ! OBJ_IS_MUTABLE_OBJECT(L)) excludedCount++; // Code and mutables can't be shared - see what could be // TODO: We don't attempt to share closure cells in 32-in-64. } // Quicksort the list to detect cells with the same content. These are made // to share and removed from further sorting. void SortVector::sortList(PolyObject *head, POLYUNSIGNED nItems, POLYUNSIGNED &shareCount) { while (nItems > 2) { size_t bytesToCompare = OBJ_OBJECT_LENGTH(lengthWord)*sizeof(PolyWord); PolyObject *median = head; head = head->GetForwardingPtr(); median->SetLengthWord(lengthWord); PolyObject *left = ENDOFLIST, *right = ENDOFLIST; POLYUNSIGNED leftCount = 0, rightCount = 0; while (head != ENDOFLIST) { PolyObject *next = head->GetForwardingPtr(); int res = memcmp(median, head, bytesToCompare); if (res == 0) { // Equal - they can share shareWith(head, median); shareCount++; } else if (res < 0) { head->SetForwardingPtr(left); left = head; leftCount++; } else { head->SetForwardingPtr(right); right = head; rightCount++; } head = next; } // We can now drop the median and anything that shares with it. // Process the smaller partition recursively and the larger by // tail recursion. if (leftCount < rightCount) { sortList(left, leftCount, shareCount); head = right; nItems = rightCount; } else { sortList(right, rightCount, shareCount); head = left; nItems = leftCount; } } if (nItems == 1) head->SetLengthWord(lengthWord); else if (nItems == 2) { PolyObject *next = head->GetForwardingPtr(); head->SetLengthWord(lengthWord); if (memcmp(head, next, OBJ_OBJECT_LENGTH(lengthWord)*sizeof(PolyWord)) == 0) { shareWith(next, head); shareCount++; } else next->SetLengthWord(lengthWord); } } void SortVector::sharingTask(GCTaskId*, void *a, void *b) { SortVector *s = (SortVector *)a; ObjEntry *o = (ObjEntry*)b; s->sortList(o->objList, o->objCount, o->shareCount); } // Process one level of the word data. // N.B. The length words are updated without any locking. This is safe // because all length words are initially chain entries and a chain entry // can be replaced by another chain entry, a forwarding pointer or a normal // length word. Forwarding pointers and normal length words are only ever // set once. There is a small chance that we could lose some sharing as a // result of a race condition if a thread defers an object because it // contains a pointer with a chain entry and later sees an otherwise // equal object where another thread has replaced the chain with a // normal address, adds it to the list for immediate processing and // so never compares the two. void SortVector::wordDataTask(GCTaskId*, void *a, void *) { SortVector *s = (SortVector*)a; // Partition the objects between those that have pointers to objects that are // still to be processed and those that have been processed. if (s->baseObject.objList == ENDOFLIST) return; PolyObject *h = s->baseObject.objList; s->baseObject.objList = ENDOFLIST; s->baseObject.objCount = 0; POLYUNSIGNED words = OBJ_OBJECT_LENGTH(s->lengthWord); s->carryOver = 0; for (unsigned i = 0; i < 256; i++) { // Clear the entries in the hash table but not the sharing count. s->processObjects[i].objList = ENDOFLIST; s->processObjects[i].objCount = 0; } while (h != ENDOFLIST) { PolyObject *next = h->GetForwardingPtr(); bool deferred = false; for (POLYUNSIGNED i = 0; i < words; i++) { PolyWord w = h->Get(i); if (w.IsDataPtr()) { PolyObject *p = w.AsObjPtr(); objectState state = getObjectState(p); if (state == FORWARDED) { // Update the addresses of objects that have been merged h->Set(i, p->GetForwardingPtr()); s->carryOver++; break; } else if (state == CHAINED) { // If it is still to be shared leave it deferred = true; break; // from the loop } } } if (deferred) { // We can't do it yet: add it back to the list h->SetForwardingPtr(s->baseObject.objList); s->baseObject.objList = h; s->baseObject.objCount++; } else { // Add it to the hash table. unsigned char hash = 0; for (POLYUNSIGNED i = 0; i < words*sizeof(PolyWord); i++) hash += h->AsBytePtr()[i]; h->SetForwardingPtr(s->processObjects[hash].objList); s->processObjects[hash].objList = h; s->processObjects[hash].objCount++; } h = next; } s->SortData(); } // Sort the entries in the hash table. void SortVector::SortData() { for (unsigned j = 0; j < 256; j++) { ObjEntry *oentry = &processObjects[j]; // Sort this entry. If it's very small just process it now. switch (oentry->objCount) { case 0: break; // Nothing there case 1: // Singleton - just restore the length word oentry->objList->SetLengthWord(lengthWord); break; case 2: { // Two items - process now PolyObject *obj1 = oentry->objList; PolyObject *obj2 = obj1->GetForwardingPtr(); obj1->SetLengthWord(lengthWord); if (memcmp(obj1, obj2, OBJ_OBJECT_LENGTH(lengthWord)*sizeof(PolyWord)) == 0) { shareWith(obj2, obj1); oentry->shareCount++; } else obj2->SetLengthWord(lengthWord); break; } default: gpTaskFarm->AddWorkOrRunNow(sharingTask, this, oentry); } } } void SortVector::hashAndSortAllTask(GCTaskId*, void *a, void *b) { SortVector *s = (SortVector *)a; // Hash the contents of the base object then sort them. for (unsigned i = 0; i < 256; i++) { // Clear the entries in the hash table but not the sharing count. s->processObjects[i].objList = ENDOFLIST; s->processObjects[i].objCount = 0; } PolyObject *h = s->baseObject.objList; POLYUNSIGNED bytes = OBJ_OBJECT_LENGTH(s->lengthWord)*sizeof(PolyWord); while (h != ENDOFLIST) { PolyObject *next = h->GetForwardingPtr(); unsigned char hash = 0; for (POLYUNSIGNED j = 0; j < bytes; j++) hash += h->AsBytePtr()[j]; h->SetForwardingPtr(s->processObjects[hash].objList); s->processObjects[hash].objList = h; s->processObjects[hash].objCount++; h = next; } s->SortData(); } // Look for sharing between byte data. These cannot contain pointers // so they can all be processed together. void GetSharing::shareByteData(GCTaskId *, void *a, void *) { GetSharing *s = (GetSharing*)a; for (unsigned i = 0; i < NUM_BYTE_VECTORS; i++) { if (s->byteVectors[i].CurrentCount() != 0) gpTaskFarm->AddWorkOrRunNow(SortVector::hashAndSortAllTask, &(s->byteVectors[i]), 0); } } // Process word data at this particular level void GetSharing::shareWordData(GCTaskId *, void *a, void *) { GetSharing *s = (GetSharing*)a; for (unsigned i = 0; i < NUM_WORD_VECTORS; i++) { if (s->wordVectors[i].CurrentCount() != 0) gpTaskFarm->AddWorkOrRunNow(SortVector::wordDataTask, &(s->wordVectors[i]), 0); } } // Share any entries left. void GetSharing::shareRemainingWordData(GCTaskId *, void *a, void *) { GetSharing *s = (GetSharing*)a; for (unsigned i = 0; i < NUM_WORD_VECTORS; i++) { if (s->wordVectors[i].CurrentCount() != 0) gpTaskFarm->AddWorkOrRunNow(SortVector::hashAndSortAllTask, &(s->wordVectors[i]), 0); } } void GetSharing::SortData() { // First process the byte objects. They cannot contain pointers. // We create a task to do this so that we never have more threads // running than given with --gcthreads. gpTaskFarm->AddWorkOrRunNow(shareByteData, this, 0); gpTaskFarm->WaitForCompletion(); // Word data may contain pointers to other objects. If an object // has been processed its header will contain either a normal length // word or a forwarding pointer if it shares. We can process an // object if every word in it is either a tagged integer or an // address we have already processed. This works provided there // are no loops so when we reach a stage where we are unable to // process anything we simply run a final scan on the remainder. // Loops can arise from the closures of mutually recursive functions. // Now process the word entries until we have nothing left apart from loops. POLYUNSIGNED lastCount = 0, lastShared = 0; for (unsigned n = 0; n < NUM_WORD_VECTORS; n++) lastCount += wordVectors[n].CurrentCount(); for(unsigned pass = 1; lastCount != 0; pass++) { gpTaskFarm->AddWorkOrRunNow(shareWordData, this, 0); gpTaskFarm->WaitForCompletion(); // At each stage check that we have removed some items // from the lists. POLYUNSIGNED postCount = 0, postShared = 0, carryOver = 0; for (unsigned i = 0; i < NUM_WORD_VECTORS; i++) { postCount += wordVectors[i].CurrentCount(); postShared += wordVectors[i].Shared(); carryOver += wordVectors[i].CarryOver(); } if (debugOptions & DEBUG_GC) Log("GC: Share: Pass %u: %" POLYUFMT " removed (%1.1f%%) %" POLYUFMT " shared (%1.1f%%) %" POLYUFMT " remain. %" POLYUFMT " entries updated (%1.1f%%).\n", pass, lastCount-postCount, (double)(lastCount-postCount) / (double) lastCount * 100.0, postShared - lastShared, (double)(postShared - lastShared) / (double) (lastCount-postCount) * 100.0, postCount, carryOver, (double)carryOver / (double)(lastCount-postCount) * 100.0); // Condition for exiting the loop. There are some heuristics here. // If we remove less than 10% in a pass it's probably not worth continuing // unless the carry over is large. The "carry over" is the number of words updated as // a result of the last pass. It represents the extra sharing we gained in this pass // as a result of the last pass. If there are deep data structures that can be shared // we get better sharing with more passes. If the data structures are shallow we will // get as much sharing by just running the final pass. The first pass only carries // over any sharing from the byte objects so we need to run at least one more before // checking the carry over. if (pass > 1 && (lastCount - postCount) * 10 < lastCount && (carryOver*2 < (lastCount-postCount) || (lastCount - postCount) * 1000 < lastCount )) break; lastCount = postCount; lastShared = postShared; } // Process any remaining entries. There may be loops. gpTaskFarm->AddWorkOrRunNow(shareRemainingWordData, this, 0); gpTaskFarm->WaitForCompletion(); if (debugOptions & DEBUG_GC) { POLYUNSIGNED postShared = 0; for (unsigned i = 0; i < NUM_WORD_VECTORS; i++) postShared += wordVectors[i].Shared(); if (debugOptions & DEBUG_GC) Log("GC: Share: Final pass %" POLYUFMT " removed %" POLYUFMT " shared (%1.1f%%).\n", lastCount, postShared - lastShared, (double)(postShared - lastShared) / (double) lastCount * 100.0); } // Calculate the totals. POLYUNSIGNED totalSize = 0, totalShared = 0, totalRecovered = 0; for (unsigned k = 0; k < NUM_BYTE_VECTORS; k++) { totalSize += byteVectors[k].TotalCount(); POLYUNSIGNED shared = byteVectors[k].Shared(); totalShared += shared; totalRecovered += shared * (k+1); // Add 1 for the length word. if (debugOptions & DEBUG_GC) Log("GC: Share: Byte objects of size %u: %" POLYUFMT " objects %" POLYUFMT " shared\n", k, byteVectors[k].TotalCount(), byteVectors[k].Shared()); } for (unsigned l = 0; l < NUM_WORD_VECTORS; l++) { totalSize += wordVectors[l].TotalCount(); POLYUNSIGNED shared = wordVectors[l].Shared(); totalShared += shared; totalRecovered += shared * (l+1); if (debugOptions & DEBUG_GC) Log("GC: Share: Word objects of size %u: %" POLYUFMT " objects %" POLYUFMT " shared\n", l, wordVectors[l].TotalCount(), wordVectors[l].Shared()); } if (debugOptions & DEBUG_GC) { Log("GC: Share: Total %" POLYUFMT " objects, %" POLYUFMT " shared (%1.0f%%). %" POLYUFMT " words recovered.\n", totalSize, totalShared, (double)totalShared / (double)totalSize * 100.0, totalRecovered); Log("GC: Share: Excluding %" POLYUFMT " large word objects %" POLYUFMT " large byte objects and %" POLYUFMT " others\n", largeWordCount, largeByteCount, excludedCount); } gHeapSizeParameters.RecordSharingData(totalRecovered); } void GCSharingPhase(void) { mainThreadPhase = MTP_GCPHASESHARING; GetSharing sharer; for (std::vector::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++) { LocalMemSpace *lSpace = *i; lSpace->bitmap.ClearBits(0, lSpace->spaceSize()); } // Scan the code areas to share any constants. We don't share the code // cells themselves. for (std::vector::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++) { CodeSpace *space = *i; sharer.ScanAddressesInRegion(space->bottom, space->top); } if (debugOptions & DEBUG_GC) Log("GC: Share: After scanning code: Total %" POLYUFMT " (%" POLYUFMT " words) byte %" POLYUFMT " word %" POLYUFMT ".\n", sharer.totalVisited, sharer.totalSize, sharer.byteAdded, sharer.wordAdded); // Process the permanent mutable areas and the code areas for (std::vector::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++) { PermanentMemSpace *space = *i; if (space->isMutable && ! space->byteOnly) sharer.ScanAddressesInRegion(space->bottom, space->top); } if (debugOptions & DEBUG_GC) Log("GC: Share: After scanning permanent: Total %" POLYUFMT " (%" POLYUFMT " words) byte %" POLYUFMT " word %" POLYUFMT ".\n", sharer.totalVisited, sharer.totalSize, sharer.byteAdded, sharer.wordAdded); // Process the RTS roots. GCModules(&sharer); if (debugOptions & DEBUG_GC) Log("GC: Share: After scanning other roots: Total %" POLYUFMT " (%" POLYUFMT " words) byte %" POLYUFMT " word %" POLYUFMT ".\n", sharer.totalVisited, sharer.totalSize, sharer.byteAdded, sharer.wordAdded); gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Table"); // Sort and merge the data. sharer.SortData(); gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Sort"); } diff --git a/libpolyml/scanaddrs.cpp b/libpolyml/scanaddrs.cpp index 479a6f88..d6c5992c 100644 --- a/libpolyml/scanaddrs.cpp +++ b/libpolyml/scanaddrs.cpp @@ -1,496 +1,288 @@ /* Title: Address scanner Copyright (c) 2006-8, 2012, 2019 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #elif defined(_WIN32) #include "winconfig.h" #else #error "No configuration file" #endif #ifdef HAVE_ASSERT_H #include #define ASSERT(x) assert(x) #else #define ASSERT(x) 0 #endif #include #include "globals.h" #include "scanaddrs.h" #include "machine_dep.h" #include "diagnostics.h" #include "memmgr.h" // Process the value at a given location and update it as necessary. POLYUNSIGNED ScanAddress::ScanAddressAt(PolyWord *pt) { PolyWord val = *pt; PolyWord newVal = val; if (IS_INT(val) || val == PolyWord::FromUnsigned(0)) { // We can get zeros in the constant area if we garbage collect // while compiling some code. */ } else { ASSERT(OBJ_IS_DATAPTR(val)); // Any sort of address newVal = ScanObjectAddress(val.AsObjPtr()); } if (newVal != val) // Only update if we need to. *pt = newVal; return 0; } // General purpose object processor, Processes all the addresses in an object. // Handles the various kinds of object that may contain addresses. void ScanAddress::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) { do { ASSERT (OBJ_IS_LENGTH(lengthWord)); if (OBJ_IS_BYTE_OBJECT(lengthWord)) return; /* Nothing more to do */ POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord); PolyWord *baseAddr = (PolyWord*)obj; if (OBJ_IS_CODE_OBJECT(lengthWord)) { // Scan constants within the code. machineDependent->ScanConstantsWithinCode(obj, obj, length, this); // Skip to the constants and get ready to scan them. obj->GetConstSegmentForCode(length, baseAddr, length); } else if (OBJ_IS_CLOSURE_OBJECT(lengthWord)) { // The first word is a code pointer so we need to treat it specially // but it is possible it hasn't yet been set. if ((*(uintptr_t*)baseAddr & 1) == 0) { POLYUNSIGNED lengthWord = ScanCodeAddressAt((PolyObject**)baseAddr); // N.B. This could side-effect *baseAddr if (lengthWord != 0) ScanAddressesInObject(*(PolyObject**)baseAddr, lengthWord); } baseAddr += sizeof(PolyObject*) / sizeof(PolyWord); length -= sizeof(PolyObject*) / sizeof(PolyWord); } PolyWord *endWord = baseAddr + length; // We want to minimise the actual recursion we perform so we try to // use tail recursion if we can. We first scan from the end and // remove any words that don't need recursion. POLYUNSIGNED lastLengthWord = 0; while (endWord != baseAddr) { PolyWord wordAt = endWord[-1]; if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0)) endWord--; // Don't need to look at this. else if ((lastLengthWord = ScanAddressAt(endWord-1)) != 0) // We need to process this one break; else endWord--; // We're not interested in this. } if (endWord == baseAddr) return; // We've done everything. // There is at least one word that needs to be processed, the // one at endWord-1. // Now process from the beginning forward to see if there are // any words before this that need to be handled. This way we are more // likely to handle the head of a list by recursion and the // tail by looping (tail recursion). while (baseAddr < endWord-1) { PolyWord wordAt = *baseAddr; if (IS_INT(wordAt) || wordAt == PolyWord::FromUnsigned(0)) baseAddr++; // Don't need to look at this. else { POLYUNSIGNED lengthWord = ScanAddressAt(baseAddr); if (lengthWord != 0) { wordAt = *baseAddr; // Reload because it may have been side-effected // We really have to process this recursively. ASSERT(wordAt.IsDataPtr()); ScanAddressesInObject(wordAt.AsObjPtr(), lengthWord); baseAddr++; } else baseAddr++; } } // Finally process the last word we found that has to be processed. // Do this by looping rather than recursion. PolyWord wordAt = *baseAddr; // Last word to do. // This must be an address ASSERT(wordAt.IsDataPtr()); obj = wordAt.AsObjPtr(); lengthWord = lastLengthWord; } while(1); } void ScanAddress::ScanAddressesInRegion(PolyWord *region, PolyWord *end) { PolyWord *pt = region; while (pt < end) { #ifdef POLYML32IN64 if ((((uintptr_t)pt) & 4) == 0) { // Skip any padding. The length word should be on an odd-word boundary. pt++; continue; } #endif pt++; // Skip length word. // pt actually points AT the object here. PolyObject *obj = (PolyObject*)pt; if (obj->ContainsForwardingPtr()) /* skip over moved object */ { // We can now get multiple forwarding pointers as a result // of applying ShareData repeatedly. Perhaps we should // turn the forwarding pointers back into normal words in // an extra pass. obj = obj->FollowForwardingChain(); ASSERT(obj->ContainsNormalLengthWord()); pt += obj->Length(); } else { ASSERT(obj->ContainsNormalLengthWord()); POLYUNSIGNED length = obj->Length(); if (pt+length > end) Crash("Malformed object at %p - length %lu\n", pt, length); if (length != 0) ScanAddressesInObject(obj); pt += length; } } } // Extract a constant from the code. PolyObject *ScanAddress::GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base) { switch (code) { case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target { POLYUNSIGNED valu; unsigned i; byte *pt = addressOfConstant; if (pt[3] & 0x80) valu = 0-1; else valu = 0; for (i = sizeof(PolyWord); i > 0; i--) valu = (valu << 8) | pt[i-1]; if (valu == 0 || PolyWord::FromUnsigned(valu).IsTagged()) return 0; else return PolyWord::FromUnsigned(valu).AsObjPtr(base); } case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address { POLYSIGNED disp; byte *pt = addressOfConstant; // Get the displacement. This is signed. if (pt[3] & 0x80) disp = -1; else disp = 0; // Set the sign just in case. for(unsigned i = 4; i > 0; i--) disp = (disp << 8) | pt[i-1]; byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant return (PolyObject*)absAddr; } default: ASSERT(false); return 0; } } // Store a constant value. Also used with a patch table when importing a saved heap which has // been exported using the C exporter. void ScanAddress::SetConstantValue(byte *addressOfConstant, PolyObject *p, ScanRelocationKind code) { switch (code) { case PROCESS_RELOC_DIRECT: // 32 or 64 bit address of target { POLYUNSIGNED valu = ((PolyWord)p).AsUnsigned(); for (unsigned i = 0; i < sizeof(PolyWord); i++) { addressOfConstant[i] = (byte)(valu & 255); valu >>= 8; } } break; case PROCESS_RELOC_I386RELATIVE: // 32 bit relative address { // This offset may be positive or negative intptr_t newDisp = (byte*)p - addressOfConstant - 4; #if (SIZEOF_VOIDP != 4) ASSERT(newDisp < (intptr_t)0x80000000 && newDisp >= -(intptr_t)0x80000000); #endif for (unsigned i = 0; i < 4; i++) { addressOfConstant[i] = (byte)(newDisp & 0xff); newDisp >>= 8; } } break; } } void ScanAddress::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code) { PolyObject *p = GetConstantValue(addressOfConstant, code); if (p != 0) { PolyObject *oldValue = p; // If this was a relative address we must have a code address. if (code == PROCESS_RELOC_I386RELATIVE) ScanCodeAddressAt(&p); else p = ScanObjectAddress(p); if (p != oldValue) // Update it if it has changed. SetConstantValue(addressOfConstant, p, code); } } void ScanAddress::ScanRuntimeWord(PolyWord *w) { if (w->IsTagged()) {} // Don't need to do anything else { ASSERT(w->IsDataPtr()); *w = ScanObjectAddress(w->AsObjPtr()); } } - -// This gets called in two circumstances. It may be called for the roots -// in which case the stack will be empty and we want to process it completely -// or it is called for a constant address in which case it will have been -// called from RecursiveScan::ScanAddressesInObject and that can process -// any addresses. -PolyObject *RecursiveScan::ScanObjectAddress(PolyObject *obj) -{ - PolyWord pWord = obj; - // Test to see if this needs to be scanned. - // It may update the word. - bool test = TestForScan(&pWord); - obj = pWord.AsObjPtr(); - - if (test) - { - MarkAsScanning(obj); - if (obj->IsByteObject()) - Completed(obj); // Don't need to put it on the stack - // If we already have something on the stack we must being called - // recursively to process a constant in a code segment. Just push - // it on the stack and let the caller deal with it. - else if (StackIsEmpty()) - RecursiveScan::ScanAddressesInObject(obj, obj->LengthWord()); - else - PushToStack(obj, (PolyWord*)obj); - } - - return obj; -} - -// This is called via ScanAddressesInRegion to process the permanent mutables. It is -// also called from ScanObjectAddress to process root addresses. -// It processes all the addresses reachable from the object. -// This is almost the same as MTGCProcessMarkPointers::ScanAddressesInObject. -void RecursiveScan::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord) -{ - if (OBJ_IS_BYTE_OBJECT(lengthWord)) - return; // Ignore byte cells and don't call Completed on them - - PolyWord *baseAddr = (PolyWord*)obj; - - while (true) - { - ASSERT (OBJ_IS_LENGTH(lengthWord)); - - // Get the length and base address. N.B. If this is a code segment - // these will be side-effected by GetConstSegmentForCode. - POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord); - - if (OBJ_IS_CODE_OBJECT(lengthWord) || OBJ_IS_CLOSURE_OBJECT(lengthWord)) - { - // It's better to process the whole code object in one go. - // For the moment do that for closure objects as well. - ScanAddress::ScanAddressesInObject(obj, lengthWord); - length = 0; // Finished - } - - // else it's a normal object, - - // If there are only two addresses in this cell that need to be - // followed we follow them immediately and treat this cell as done. - // If there are more than two we push the address of this cell on - // the stack, follow the first address and then rescan it. That way - // list cells are processed once only but we don't overflow the - // stack by pushing all the addresses in a very large vector. - PolyWord *endWord = (PolyWord*)obj + length; - PolyObject *firstWord = 0; - PolyObject *secondWord = 0; - PolyWord *restartFrom = baseAddr; - - while (baseAddr != endWord) - { - PolyWord wordAt = *baseAddr; - - if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0)) - { - // Normal address. We can have words of all zeros at least in the - // situation where we have a partially constructed code segment where - // the constants at the end of the code have not yet been filled in. - if (TestForScan(baseAddr)) // Test value at baseAddr (may side-effect it) - { - PolyObject *wObj = (*baseAddr).AsObjPtr(); - if (wObj->IsByteObject()) - { - // Can do this now - don't need to push it - MarkAsScanning(wObj); - Completed(wObj); - } - else if (firstWord == 0) - { - firstWord = wObj; - // We mark the word immediately. We can have - // two words in an object that are the same - // and we don't want to process it again. - MarkAsScanning(firstWord); - } - else if (secondWord == 0) - { - secondWord = wObj; - restartFrom = baseAddr; - } - else break; // More than two words. - } - } - baseAddr++; - } - - if (baseAddr == endWord) - { - // We have done everything except possibly firstWord and secondWord. - // Note: Unfortunately the way that ScanAddressesInRegion works means that - // we call Completed on the addresses of cells in the permanent areas without - // having called TestForScan. - Completed(obj); - if (secondWord != 0) - { - MarkAsScanning(secondWord); - // Put this on the stack. If this is a list node we will be - // pushing the tail. - PushToStack(secondWord, (PolyWord*)secondWord); - } - } - else // Put this back on the stack while we process the first word - PushToStack(obj, restartFrom); - - if (firstWord != 0) - { - // Process it immediately. - obj = firstWord; - baseAddr = (PolyWord*)obj; - } - else if (StackIsEmpty()) - return; - else - PopFromStack(obj, baseAddr); - - lengthWord = obj->LengthWord(); - } -} - -// The stack is allocated as a series of blocks chained together. -#define RSTACK_SEGMENT_SIZE 1000 - -class RScanStack { -public: - RScanStack(): nextStack(0), lastStack(0), sp(0) {} - ~RScanStack() { delete(nextStack); } - - RScanStack *nextStack; - RScanStack *lastStack; - unsigned sp; - struct { PolyObject *obj; PolyWord *base; } stack[RSTACK_SEGMENT_SIZE]; -}; - -RecursiveScanWithStack::~RecursiveScanWithStack() -{ - delete(stack); -} - -bool RecursiveScanWithStack::StackIsEmpty(void) -{ - return stack == 0 || (stack->sp == 0 && stack->lastStack == 0); -} - -void RecursiveScanWithStack::PushToStack(PolyObject *obj, PolyWord *base) -{ - if (stack == 0 || stack->sp == RSTACK_SEGMENT_SIZE) - { - if (stack != 0 && stack->nextStack != 0) - stack = stack->nextStack; - else - { - // Need a new segment - try { - RScanStack *s = new RScanStack; - s->lastStack = stack; - if (stack != 0) - stack->nextStack = s; - stack = s; - } - catch (std::bad_alloc &) { - StackOverflow(); - return; - } - } - } - stack->stack[stack->sp].obj = obj; - stack->stack[stack->sp].base = base; - stack->sp++; -} - -void RecursiveScanWithStack::PopFromStack(PolyObject *&obj, PolyWord *&base) -{ - if (stack->sp == 0) - { - // Chain to the previous stack if any - ASSERT(stack->lastStack != 0); - // Before we do, delete any further one to free some memory - delete(stack->nextStack); - stack->nextStack = 0; - stack = stack->lastStack; - ASSERT(stack->sp == RSTACK_SEGMENT_SIZE); - } - --stack->sp; - obj = stack->stack[stack->sp].obj; - base = stack->stack[stack->sp].base; -} diff --git a/libpolyml/scanaddrs.h b/libpolyml/scanaddrs.h index adab0186..ff7c3482 100644 --- a/libpolyml/scanaddrs.h +++ b/libpolyml/scanaddrs.h @@ -1,149 +1,102 @@ /* Title: scanaddrs.h - Scan addresses in objects Copyright (c) 2006-8, 2012, 2015, 2018 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef SCANADDRS_H_INCLUDED #define SCANADDRS_H_INCLUDED #include "globals.h" // Type of relocations. typedef enum { PROCESS_RELOC_DIRECT = 0, // 32 or 64 bit address of target PROCESS_RELOC_I386RELATIVE // 32 or 64 bit relative address } ScanRelocationKind; class StackSpace; class ScanAddress { public: virtual ~ScanAddress() {} // Keeps gcc happy protected: // Scan an address in the memory. "pt" always points into an object. // It is not called with pt pointing at a C++ automatic variable. // Tagged integers have already been filtered out. // The result is the length word of the object to use if the object // is to be processed recursively or 0 if it should not be. // Default action - call ScanObjectAddress for the base object address of // the address. virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt); // As for ScanAddressAt except that the value is a pointer to the first word in a closure object. // In most cases we're just scanning the heap we don't need to do anything and we scan // the code area separately. virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { return 0; } public: // The fundamental overridable for this class. Takes the object address and returns // the updated address. If nothing else is overridden everything eventually comes here. virtual PolyObject *ScanObjectAddress(PolyObject *base) = 0;// { return base; } typedef enum { STRENGTH_STRONG = 0, STRENGTH_WEAK = 1 } RtsStrength; // Scan an address in the run-time system. This normally just applies ScanObjectAddress // but if this is a weak reference it can set *pt to NULL virtual void ScanRuntimeAddress(PolyObject **pt, RtsStrength weak) { *pt = ScanObjectAddress(*pt); } // Scan a word in the run-time system. This is the preferred call for non-weak // references and deals with the general case of a word. void ScanRuntimeWord(PolyWord *w); // Process a constant within the code. // The default action is to call the DEFAULT ScanAddressAt NOT the virtual which means that it calls // ScanObjectAddress for the base address of the object referred to. virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code); // Scan the objects in the region and process their addresses. Applies ScanAddressesInObject // to each of the objects. The "region" argument points AT the first length word. // Typically used to scan or update addresses in the mutable area. void ScanAddressesInRegion(PolyWord *region, PolyWord *endOfRegion); // General object processor. // If the object is a word object calls ScanAddressesAt for all the addresses. // // If the object is a code object calls ScanAddressesAt for the constant area and // calls (indirectly) ScanConstant, and by default ScanObjectAddress for addresses within // the code // // If the object is a stack calls ScanStackAddress which calls ScanObjectAddress for // addresses within the code. virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord); void ScanAddressesInObject(PolyObject *base) { ScanAddressesInObject(base, base->LengthWord()); } // Extract a constant from the code. #ifdef POLYML32IN64 static PolyObject *GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base = globalHeapBase); #else static PolyObject *GetConstantValue(byte *addressOfConstant, ScanRelocationKind code, PolyWord *base = 0); #endif // Store a constant in the code. static void SetConstantValue(byte *addressOfConstant, PolyObject *p, ScanRelocationKind code); }; -// Recursive scan over a data structure. -class RecursiveScan: public ScanAddress -{ -public: - virtual PolyObject *ScanObjectAddress(PolyObject *base); - virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord); - // Have to redefine this for some reason. - void ScanAddressesInObject(PolyObject *base) - { ScanAddressesInObject(base, base->LengthWord()); } - -protected: - // The derived class must provide a stack. - virtual void PushToStack(PolyObject *obj, PolyWord *base) = 0; - virtual void PopFromStack(PolyObject *&obj, PolyWord *&base) = 0; - virtual bool StackIsEmpty(void) = 0; - - // Test the word at the location to see if it points to - // something that may have to be scanned. We pass in the - // pointer here because the called may side-effect it. - virtual bool TestForScan(PolyWord *) = 0; - // If we are definitely scanning the address we mark it. - virtual void MarkAsScanning(PolyObject *) = 0; - // Called when the object has been completed. - virtual void Completed(PolyObject *) {} -}; - -// Recursive scan with a dynamically allocated stack -class RScanStack; - -class RecursiveScanWithStack: public RecursiveScan -{ -public: - RecursiveScanWithStack(): stack(0) {} - ~RecursiveScanWithStack(); - -protected: - // StackOverflow is called if allocating a new stack - // segment fails. - virtual void StackOverflow(void) = 0; - - virtual void PushToStack(PolyObject *obj, PolyWord *base); - virtual void PopFromStack(PolyObject *&obj, PolyWord *&base); - virtual bool StackIsEmpty(void); - - RScanStack *stack; -}; - #endif