diff --git a/libpolyml/exporter.cpp b/libpolyml/exporter.cpp
index 4e0ee44b..861534a0 100644
--- a/libpolyml/exporter.cpp
+++ b/libpolyml/exporter.cpp
@@ -1,936 +1,936 @@
 /*
     Title:  exporter.cpp - Export a function as an object or C file
 
     Copyright (c) 2006-7, 2015, 2016-20 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_ASSERT_H
 #include <assert.h>
 #define ASSERT(x) assert(x)
 
 #else
 #define ASSERT(x)
 #endif
 
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h>
 #endif
 
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 
 #if (defined(_WIN32))
 #include <tchar.h>
 #else
 #define _T(x) x
 #define _tcslen strlen
 #define _tcscmp strcmp
 #define _tcscat strcat
 #endif
 
 #include "exporter.h"
 #include "save_vec.h"
 #include "polystring.h"
 #include "run_time.h"
 #include "osmem.h"
 #include "scanaddrs.h"
 #include "gc.h"
 #include "machine_dep.h"
 #include "diagnostics.h"
 #include "memmgr.h"
 #include "processes.h" // For IO_SPACING
 #include "sys.h" // For EXC_Fail
 #include "rtsentry.h"
 
 #include "pexport.h"
 
 #ifdef HAVE_PECOFF
 #include "pecoffexport.h"
 #elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H)
 #include "elfexport.h"
 #elif defined(HAVE_MACH_O_RELOC_H)
 #include "machoexport.h"
 #endif
 
 #if (defined(_WIN32))
 #define NOMEMORY ERROR_NOT_ENOUGH_MEMORY
 #define ERRORNUMBER _doserrno
 #else
 #define NOMEMORY ENOMEM
 #define ERRORNUMBER errno
 #endif
 
 extern "C" {
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyExport(FirstArgument threadId, PolyWord fileName, PolyWord root);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyExportPortable(FirstArgument threadId, PolyWord fileName, PolyWord root);
 }
 
 /*
 To export the function and everything reachable from it we need to copy
 all the objects into a new area.  We leave tombstones in the original
 objects by overwriting the length word.  That prevents us from copying an
 object twice and breaks loops.  Once we've copied the objects we then
 have to go back over the memory and turn the tombstones back into length
 words.
 */
 
 GraveYard::~GraveYard()
 {
     free(graves);
 }
 
 // Used to calculate the space required for the ordinary mutables
 // and the no-overwrite mutables.  They are interspersed in local space.
 class MutSizes : public ScanAddress
 {
 public:
     MutSizes() : mutSize(0), noOverSize(0) {}
 
     virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }// No Actually used
 
     virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord)
     {
         const POLYUNSIGNED words = OBJ_OBJECT_LENGTH(lengthWord) + 1; // Include length word
         if (OBJ_IS_NO_OVERWRITE(lengthWord))
             noOverSize += words;
         else mutSize += words;
     }
 
     POLYUNSIGNED mutSize, noOverSize;
 };
 
 CopyScan::CopyScan(unsigned h/*=0*/): hierarchy(h)
 {
     defaultImmSize = defaultMutSize = defaultCodeSize = defaultNoOverSize = 0;
     tombs = 0;
     graveYard = 0;
 }
 
 void CopyScan::initialise(bool isExport/*=true*/)
 {
     ASSERT(gMem.eSpaces.size() == 0);
     // Set the space sizes to a proportion of the space currently in use.
     // Computing these sizes is not obvious because CopyScan is used both
     // for export and for saved states.  For saved states in particular we
     // want to use a smaller size because they are retained after we save
     // the state and if we have many child saved states it's important not
     // to waste memory.
     if (hierarchy == 0)
     {
         graveYard = new GraveYard[gMem.pSpaces.size()];
         if (graveYard == 0)
         {
             if (debugOptions & DEBUG_SAVING)
                 Log("SAVE: Unable to allocate graveyard, size: %lu.\n", gMem.pSpaces.size());
             throw MemoryException();
         }
     }
 
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (space->hierarchy >= hierarchy) {
             // Include this if we're exporting (hierarchy=0) or if we're saving a state
             // and will include this in the new state.
             size_t size = (space->top-space->bottom)/4;
             if (space->noOverwrite)
                 defaultNoOverSize += size;
             else if (space->isMutable)
                 defaultMutSize += size;
             else if (space->isCode)
                 defaultCodeSize += size;
             else
                 defaultImmSize += size;
             if (space->hierarchy == 0 && ! space->isMutable)
             {
                 // We need a separate area for the tombstones because this is read-only
                 graveYard[tombs].graves = (PolyWord*)calloc(space->spaceSize(), sizeof(PolyWord));
                 if (graveYard[tombs].graves == 0)
                 {
                     if (debugOptions & DEBUG_SAVING)
                         Log("SAVE: Unable to allocate graveyard for permanent space, size: %lu.\n",
                             space->spaceSize() * sizeof(PolyWord));
                     throw MemoryException();
                 }
                 if (debugOptions & DEBUG_SAVING)
                     Log("SAVE: Allocated graveyard for permanent space, %p size: %lu.\n",
                         graveYard[tombs].graves, space->spaceSize() * sizeof(PolyWord));
                 graveYard[tombs].startAddr = space->bottom;
                 graveYard[tombs].endAddr = space->top;
                 tombs++;
             }
         }
     }
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *space = *i;
         uintptr_t size = space->allocatedSpace();
         // It looks as though the mutable size generally gets
         // overestimated while the immutable size is correct.
         if (space->isMutable)
         {
             MutSizes sizeMut;
             sizeMut.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr);
             sizeMut.ScanAddressesInRegion(space->upperAllocPtr, space->top);
             defaultNoOverSize += sizeMut.noOverSize / 4;
             defaultMutSize += sizeMut.mutSize / 4;
         }
         else
             defaultImmSize += size/2;
     }
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         CodeSpace *space = *i;
         uintptr_t size = space->spaceSize();
         defaultCodeSize += size/2;
     }
     if (isExport)
     {
         // Minimum 1M words.
         if (defaultMutSize < 1024*1024) defaultMutSize = 1024*1024;
         if (defaultImmSize < 1024*1024) defaultImmSize = 1024*1024;
         if (defaultCodeSize < 1024*1024) defaultCodeSize = 1024*1024;
 #ifdef MACOSX
         // Limit the segment size for Mac OS X.  The linker has a limit of 2^24 relocations
         // in a segment so this is a crude way of ensuring the limit isn't exceeded.
         // It's unlikely to be exceeded by the code itself.
         // Actually, from trial-and-error, the limit seems to be around 6M.
         if (defaultMutSize > 6 * 1024 * 1024) defaultMutSize = 6 * 1024 * 1024;
         if (defaultImmSize > 6 * 1024 * 1024) defaultImmSize = 6 * 1024 * 1024;
 #endif
         if (defaultNoOverSize < 4096) defaultNoOverSize = 4096; // Except for the no-overwrite area
     }
     else
     {
         // Much smaller minimum sizes for saved states.
         if (defaultMutSize < 1024) defaultMutSize = 1024;
         if (defaultImmSize < 4096) defaultImmSize = 4096;
         if (defaultCodeSize < 4096) defaultCodeSize = 4096;
         if (defaultNoOverSize < 4096) defaultNoOverSize = 4096;
         // Set maximum sizes as well.  We may have insufficient contiguous space for
         // very large areas.
         if (defaultMutSize > 1024 * 1024) defaultMutSize = 1024 * 1024;
         if (defaultImmSize > 1024 * 1024) defaultImmSize = 1024 * 1024;
         if (defaultCodeSize > 1024 * 1024) defaultCodeSize = 1024 * 1024;
         if (defaultNoOverSize > 1024 * 1024) defaultNoOverSize = 1024 * 1024;
     }
     if (debugOptions & DEBUG_SAVING)
         Log("SAVE: Copyscan default sizes: Immutable: %" POLYUFMT ", Mutable: %" POLYUFMT ", Code: %" POLYUFMT ", No-overwrite %" POLYUFMT ".\n",
             defaultImmSize, defaultMutSize, defaultCodeSize, defaultNoOverSize);
 }
 
 CopyScan::~CopyScan()
 {
     gMem.DeleteExportSpaces();
     if (graveYard)
         delete[](graveYard);
 }
 
 
 // This function is called for each address in an object
 // once it has been copied to its new location.  We copy first
 // then scan to update the addresses.
 POLYUNSIGNED CopyScan::ScanAddressAt(PolyWord *pt)
 {
     PolyWord val = *pt;
     // Ignore integers.
     if (IS_INT(val) || val == PolyWord::FromUnsigned(0))
         return 0;
 
     PolyObject *obj = val.AsObjPtr();
     POLYUNSIGNED l = ScanAddress(&obj);
     *pt = obj;
     return l;
 }
 
 // This function is called for each address in an object
 // once it has been copied to its new location.  We copy first
 // then scan to update the addresses.
 POLYUNSIGNED CopyScan::ScanAddress(PolyObject **pt)
 {
     PolyObject *obj = *pt;
-    MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1);
+    MemSpace *space = gMem.SpaceForObjectAddress(obj);
     ASSERT(space != 0);
     // We may sometimes get addresses that have already been updated
     // to point to the new area.  e.g. (only?) in the case of constants
     // that have been updated in ScanConstantsWithinCode.
     if (space->spaceType == ST_EXPORT)
         return 0;
 
     // If this is at a lower level than the hierarchy we are saving
     // then leave it untouched.
     if (space->spaceType == ST_PERMANENT)
     {
         PermanentMemSpace *pmSpace = (PermanentMemSpace*)space;
         if (pmSpace->hierarchy < hierarchy)
             return 0;
     }
 
     // Have we already scanned this?
     if (obj->ContainsForwardingPtr())
     {
         // Update the address to the new value.
 #ifdef POLYML32IN64
         PolyObject *newAddr;
         if (space->isCode)
             newAddr = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
         else newAddr = obj->GetForwardingPtr();
 #else
         PolyObject *newAddr = obj->GetForwardingPtr();
 #endif
         *pt = newAddr;
         return 0; // No need to scan it again.
     }
     else if (space->spaceType == ST_PERMANENT)
     {
         // See if we have this in the grave-yard.
         for (unsigned i = 0; i < tombs; i++)
         {
             GraveYard *g = &graveYard[i];
             if ((PolyWord*)obj >= g->startAddr && (PolyWord*)obj < g->endAddr)
             {
                 PolyWord *tombAddr = g->graves + ((PolyWord*)obj - g->startAddr);
                 PolyObject *tombObject = (PolyObject*)tombAddr;
                 if (tombObject->ContainsForwardingPtr())
                 {
 #ifdef POLYML32IN64
                     PolyObject *newAddr;
                     if (space->isCode)
                         newAddr = (PolyObject*)(globalCodeBase + ((tombObject->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
                     else newAddr = tombObject->GetForwardingPtr();
 #else
                     PolyObject *newAddr = tombObject->GetForwardingPtr();
 #endif
                     *pt = newAddr;
                     return 0;
                 }
                 break; // No need to look further
             }
         }
     }
 
     // No, we need to copy it.
     ASSERT(space->spaceType == ST_LOCAL || space->spaceType == ST_PERMANENT ||
         space->spaceType == ST_CODE);
     POLYUNSIGNED lengthWord = obj->LengthWord();
     POLYUNSIGNED words = OBJ_OBJECT_LENGTH(lengthWord);
 
     PolyObject *newObj = 0;
     PolyObject* writeAble = 0;
     bool isMutableObj = obj->IsMutable();
     bool isNoOverwrite = false;
     bool isByteObj = false;
     bool isCodeObj = false;
     if (isMutableObj)
     {
         isNoOverwrite = obj->IsNoOverwriteObject();
         isByteObj = obj->IsByteObject();
     }
     else isCodeObj = obj->IsCodeObject();
     // Allocate a new address for the object.
     for (std::vector<PermanentMemSpace *>::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (isMutableObj == space->isMutable &&
             isNoOverwrite == space->noOverwrite &&
             isByteObj == space->byteOnly &&
             isCodeObj == space->isCode)
         {
             ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom);
             size_t spaceLeft = space->top - space->topPointer;
             if (spaceLeft > words)
             {
                 newObj = (PolyObject*)(space->topPointer + 1);
                 writeAble = space->writeAble(newObj);
                 space->topPointer += words + 1;
 #ifdef POLYML32IN64
                 // Maintain the odd-word alignment of topPointer
                 if ((words & 1) == 0 && space->topPointer < space->top)
                 {
                     *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0);
                     space->topPointer++;
                 }
 #endif
                 break;
             }
         }
     }
     if (newObj == 0)
     {
         // Didn't find room in the existing spaces.  Create a new space.
         uintptr_t spaceWords;
         if (isMutableObj)
         {
             if (isNoOverwrite) spaceWords = defaultNoOverSize;
             else spaceWords = defaultMutSize;
         }
         else
         {
             if (isCodeObj) spaceWords = defaultCodeSize;
             else spaceWords = defaultImmSize;
         }
         if (spaceWords <= words)
             spaceWords = words + 1; // Make sure there's space for this object.
         PermanentMemSpace *space = gMem.NewExportSpace(spaceWords, isMutableObj, isNoOverwrite, isCodeObj);
         if (isByteObj) space->byteOnly = true;
         if (space == 0)
         {
             if (debugOptions & DEBUG_SAVING)
                 Log("SAVE: Unable to allocate export space, size: %lu.\n", spaceWords);
             // Unable to allocate this.
             throw MemoryException();
         }
         newObj = (PolyObject*)(space->topPointer + 1);
         writeAble = space->writeAble(newObj);
         space->topPointer += words + 1;
 #ifdef POLYML32IN64
         // Maintain the odd-word alignment of topPointer
         if ((words & 1) == 0 && space->topPointer < space->top)
         {
             *space->writeAble(space->topPointer) = PolyWord::FromUnsigned(0);
             space->topPointer++;
         }
 #endif
         ASSERT(space->topPointer <= space->top && space->topPointer >= space->bottom);
     }
 
     writeAble->SetLengthWord(lengthWord); // copy length word
 
     if (hierarchy == 0 /* Exporting object module */ && isNoOverwrite && isMutableObj && !isByteObj)
     {
         // These are not exported. They are used for special values e.g. mutexes
         // that should be set to 0/nil/NONE at start-up.
         // Weak+No-overwrite byte objects are used for entry points and volatiles
         // in the foreign-function interface and have to be treated specially.
 
         // Note: this must not be done when exporting a saved state because the
         // copied version is used as the local data for the rest of the session.
         for (POLYUNSIGNED i = 0; i < words; i++)
             writeAble->Set(i, TAGGED(0));
     }
     else memcpy(writeAble, obj, words * sizeof(PolyWord));
 
     if (space->spaceType == ST_PERMANENT && !space->isMutable && ((PermanentMemSpace*)space)->hierarchy == 0)
     {
         // The immutable permanent areas are read-only.
         unsigned m;
         for (m = 0; m < tombs; m++)
         {
             GraveYard *g = &graveYard[m];
             if ((PolyWord*)obj >= g->startAddr && (PolyWord*)obj < g->endAddr)
             {
                 PolyWord *tombAddr = g->graves + ((PolyWord*)obj - g->startAddr);
                 PolyObject *tombObject = (PolyObject*)tombAddr;
 #ifdef POLYML32IN64
                 if (isCodeObj)
                 {
                     POLYUNSIGNED ll = (POLYUNSIGNED)(((PolyWord*)newObj - globalCodeBase) >> 1 | _OBJ_TOMBSTONE_BIT);
                     tombObject->SetLengthWord(ll);
                 }
                 else tombObject->SetForwardingPtr(newObj);
 #else
                 tombObject->SetForwardingPtr(newObj);
 #endif
                 break; // No need to look further
             }
         }
         ASSERT(m < tombs); // Should be there.
     }
     else if (isCodeObj)
 #ifdef POLYML32IN64
     // If this is a code address we can't use the usual forwarding pointer format.
     // Instead we have to compute the offset relative to the base of the code.
     {
         POLYUNSIGNED ll = (POLYUNSIGNED)(((PolyWord*)newObj-globalCodeBase) >> 1 | _OBJ_TOMBSTONE_BIT);
-        gMem.SpaceForAddress(obj)->writeAble(obj)->SetLengthWord(ll);
+        gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetLengthWord(ll);
     }
 #else
         gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetForwardingPtr(newObj);
 #endif
     else obj->SetForwardingPtr(newObj); // Put forwarding pointer in old object.
 
     if (OBJ_IS_CODE_OBJECT(lengthWord))
     {
         // We don't need to worry about flushing the instruction cache
         // since we're not going to execute this code here.
         // We do have to update any relative addresses within the code
         // to take account of its new position.  We have to do that now
         // even though ScanAddressesInObject will do it again because this
         // is the only point where we have both the old and the new addresses.
         machineDependent->ScanConstantsWithinCode(newObj, obj, words, this);
     }
     *pt = newObj; // Update it to the newly copied object.
     return lengthWord;  // This new object needs to be scanned.
 }
 
 // The address of code in the code area.  We treat this as a normal heap cell.
 // We will probably need to copy this and to process addresses within it.
 POLYUNSIGNED CopyScan::ScanCodeAddressAt(PolyObject **pt)
 {
     POLYUNSIGNED lengthWord = ScanAddress(pt);
     if (lengthWord)
         ScanAddressesInObject(*pt, lengthWord);
     return 0;
 }
 
 PolyObject *CopyScan::ScanObjectAddress(PolyObject *base)
 {
     PolyWord val = base;
     // Scan this as an address. 
     POLYUNSIGNED lengthWord = CopyScan::ScanAddressAt(&val);
     if (lengthWord)
         ScanAddressesInObject(val.AsObjPtr(), lengthWord);
     return val.AsObjPtr();
 }
 
 #define MAX_EXTENSION   4 // The longest extension we may need to add is ".obj"
 
 // Convert the forwarding pointers in a region back into length words.
 
 // Generally if this object has a forwarding pointer that's
 // because we've moved it into the export region.  We can,
 // though, get multiple levels of forwarding if there is an object
 // that has been shifted up by a garbage collection, leaving a forwarding
 // pointer and then that object has been moved to the export region.
 // We mustn't turn locally forwarded values back into ordinary objects
 // because they could contain addresses that are no longer valid.
 static POLYUNSIGNED GetObjLength(PolyObject *obj)
 {
     if (obj->ContainsForwardingPtr())
     {
         PolyObject *forwardedTo;
 #ifdef POLYML32IN64
         {
-            MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1);
+            MemSpace *space = gMem.SpaceForObjectAddress(obj);
             if (space->isCode)
                 forwardedTo = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
             else forwardedTo = obj->GetForwardingPtr();
         }
 #else
         forwardedTo = obj->GetForwardingPtr();
 #endif
         POLYUNSIGNED length = GetObjLength(forwardedTo);
-        MemSpace *space = gMem.SpaceForAddress((PolyWord*)forwardedTo-1);
+        MemSpace *space = gMem.SpaceForObjectAddress(forwardedTo);
         if (space->spaceType == ST_EXPORT)
             gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetLengthWord(length);
         return length;
     }
     else {
         ASSERT(obj->ContainsNormalLengthWord());
         return obj->LengthWord();
     }
 }
 
 static void FixForwarding(PolyWord *pt, size_t space)
 {
     while (space)
     {
         pt++;
         PolyObject *obj = (PolyObject*)pt;
 #ifdef POLYML32IN64
         if ((uintptr_t)obj & 4)
         {
             // Skip filler words needed to align to an even word
             space--;
             continue; // We've added 1 to pt so just loop.
         }
 #endif
         size_t length = OBJ_OBJECT_LENGTH(GetObjLength(obj));
         pt += length;
         ASSERT(space > length);
         space -= length+1;
     }
 }
 
 class ExportRequest: public MainThreadRequest
 {
 public:
     ExportRequest(Handle root, Exporter *exp): MainThreadRequest(MTP_EXPORTING),
         exportRoot(root), exporter(exp) {}
 
     virtual void Perform() { exporter->RunExport(exportRoot->WordP()); }
     Handle exportRoot;
     Exporter *exporter;
 };
 
 static void exporter(TaskData *taskData, Handle fileName, Handle root, const TCHAR *extension, Exporter *exports)
 {
     size_t extLen = _tcslen(extension);
     TempString fileNameBuff(Poly_string_to_T_alloc(fileName->Word(), extLen));
     if (fileNameBuff == NULL)
         raise_syscall(taskData, "Insufficient memory", NOMEMORY);
     size_t length = _tcslen(fileNameBuff);
 
     // Does it already have the extension?  If not add it on.
     if (length < extLen || _tcscmp(fileNameBuff + length - extLen, extension) != 0)
         _tcscat(fileNameBuff, extension);
 #if (defined(_WIN32) && defined(UNICODE))
     exports->exportFile = _wfopen(fileNameBuff, L"wb");
 #else
     exports->exportFile = fopen(fileNameBuff, "wb");
 #endif
     if (exports->exportFile == NULL)
         raise_syscall(taskData, "Cannot open export file", ERRORNUMBER);
 
     // Request a full GC  to reduce the size of fix-ups.
     FullGC(taskData);
     // Request the main thread to do the export.
     ExportRequest request(root, exports);
     processes->MakeRootRequest(taskData, &request);
     if (exports->errorMessage)
         raise_fail(taskData, exports->errorMessage);
 }
 
 // This is called by the initial thread to actually do the export.
 void Exporter::RunExport(PolyObject *rootFunction)
 {
     Exporter *exports = this;
 
     PolyObject *copiedRoot = 0;
     CopyScan copyScan(hierarchy);
 
     try {
         copyScan.initialise();
         // Copy the root and everything reachable from it into the temporary area.
         copiedRoot = copyScan.ScanObjectAddress(rootFunction);
     }
     catch (MemoryException &)
     {
         // If we ran out of memory.
         copiedRoot = 0;
     }
 
     // Fix the forwarding pointers.
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *space = *i;
         // Local areas only have objects from the allocation pointer to the top.
         FixForwarding(space->bottom, space->lowerAllocPtr - space->bottom);
         FixForwarding(space->upperAllocPtr, space->top - space->upperAllocPtr);
     }
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         MemSpace *space = *i;
         // Permanent areas are filled with objects from the bottom.
         FixForwarding(space->bottom, space->top - space->bottom);
     }
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         MemSpace *space = *i;
         // Code areas are filled with objects from the bottom.
         FixForwarding(space->bottom, space->top - space->bottom);
     }
 
     // Reraise the exception after cleaning up the forwarding pointers.
     if (copiedRoot == 0)
     {
         exports->errorMessage = "Insufficient Memory";
         return;
     }
 
     // Copy the areas into the export object.
     size_t tableEntries = gMem.eSpaces.size();
     unsigned memEntry = 0;
     if (hierarchy != 0) tableEntries += gMem.pSpaces.size();
     exports->memTable = new memoryTableEntry[tableEntries];
 
     // If we're constructing a module we need to include the global spaces.
     if (hierarchy != 0)
     {
         // Permanent spaces from the executable.
         for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
         {
             PermanentMemSpace *space = *i;
             if (space->hierarchy < hierarchy)
             {
                 memoryTableEntry *entry = &exports->memTable[memEntry++];
                 entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
                 entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
                 entry->mtIndex = space->index;
                 entry->mtFlags = 0;
                 if (space->isMutable) entry->mtFlags |= MTF_WRITEABLE;
                 if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE;
             }
         }
         newAreas = memEntry;
     }
 
     for (std::vector<PermanentMemSpace *>::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
     {
         memoryTableEntry *entry = &exports->memTable[memEntry++];
         PermanentMemSpace *space = *i;
         entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
         entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
         entry->mtIndex = hierarchy == 0 ? memEntry-1 : space->index;
         entry->mtFlags = 0;
         if (space->isMutable)
         {
             entry->mtFlags = MTF_WRITEABLE;
             if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE;
         }
         if (space->isCode) entry->mtFlags |= MTF_EXECUTABLE;
         if (space->byteOnly) entry->mtFlags |= MTF_BYTES;
     }
 
     ASSERT(memEntry == tableEntries);
     exports->memTableEntries = memEntry;
     exports->rootFunction = copiedRoot;
     try {
         // This can raise MemoryException at least in PExport::exportStore. 
         exports->exportStore();
     }
     catch (MemoryException &) {
         exports->errorMessage = "Insufficient Memory";
     }
 }
 
 // Functions called via the RTS call.
 Handle exportNative(TaskData *taskData, Handle args)
 {
 #ifdef HAVE_PECOFF
     // Windows including Cygwin
 #if (defined(_WIN32))
     const TCHAR *extension = _T(".obj"); // Windows
 #else
     const char *extension = ".o"; // Cygwin
 #endif
     PECOFFExport exports;
     exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
         taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports);
 #elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H)
     // Most Unix including Linux, FreeBSD and Solaris.
     const char *extension = ".o";
     ELFExport exports;
     exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
         taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports);
 #elif defined(HAVE_MACH_O_RELOC_H)
     // Mac OS-X
     const char *extension = ".o";
     MachoExport exports;
     exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
         taskData->saveVec.push(args->WordP()->Get(1)), extension, &exports);
 #else
     raise_exception_string (taskData, EXC_Fail, "Native export not available for this platform");
 #endif
     return taskData->saveVec.push(TAGGED(0));
 }
 
 Handle exportPortable(TaskData *taskData, Handle args)
 {
     PExport exports;
     exporter(taskData, taskData->saveVec.push(args->WordP()->Get(0)),
         taskData->saveVec.push(args->WordP()->Get(1)), _T(".txt"), &exports);
     return taskData->saveVec.push(TAGGED(0));
 }
 
 POLYUNSIGNED PolyExport(FirstArgument threadId, PolyWord fileName, PolyWord root)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedName = taskData->saveVec.push(fileName);
     Handle pushedRoot = taskData->saveVec.push(root);
 
     try {
 #ifdef HAVE_PECOFF
         // Windows including Cygwin
 #if (defined(_WIN32))
         const TCHAR *extension = _T(".obj"); // Windows
 #else
         const char *extension = ".o"; // Cygwin
 #endif
         PECOFFExport exports;
         exporter(taskData, pushedName, pushedRoot, extension, &exports);
 #elif defined(HAVE_ELF_H) || defined(HAVE_ELF_ABI_H)
         // Most Unix including Linux, FreeBSD and Solaris.
         const char *extension = ".o";
         ELFExport exports;
         exporter(taskData, pushedName, pushedRoot, extension, &exports);
 #elif defined(HAVE_MACH_O_RELOC_H)
         // Mac OS-X
         const char *extension = ".o";
         MachoExport exports;
         exporter(taskData, pushedName, pushedRoot, extension, &exports);
 #else
         raise_exception_string (taskData, EXC_Fail, "Native export not available for this platform");
 #endif
     } catch (...) { } // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned(); // Returns unit
 }
 
 POLYUNSIGNED PolyExportPortable(FirstArgument threadId, PolyWord fileName, PolyWord root)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedName = taskData->saveVec.push(fileName);
     Handle pushedRoot = taskData->saveVec.push(root);
 
     try {
         PExport exports;
         exporter(taskData, pushedName, pushedRoot, _T(".txt"), &exports);
     } catch (...) { } // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned(); // Returns unit
 }
 
 
 // Helper functions for exporting.  We need to produce relocation information
 // and this code is common to every method.
 Exporter::Exporter(unsigned int h): exportFile(NULL), errorMessage(0), hierarchy(h), memTable(0), newAreas(0)
 {
 }
 
 Exporter::~Exporter()
 {
     delete[](memTable);
     if (exportFile)
         fclose(exportFile);
 }
 
 void Exporter::relocateValue(PolyWord *pt)
 {
 #ifndef POLYML32IN64
     PolyWord q = *pt;
     if (IS_INT(q) || q == PolyWord::FromUnsigned(0)) {}
     else createRelocation(pt);
 #endif
 }
 
 void Exporter::createRelocation(PolyWord* pt)
 {
     *gMem.SpaceForAddress(pt)->writeAble(pt) = createRelocation(*pt, pt);
 }
 
 // Check through the areas to see where the address is.  It must be
 // in one of them.
 unsigned Exporter::findArea(void *p)
 {
     for (unsigned i = 0; i < memTableEntries; i++)
     {
         if (p > memTable[i].mtOriginalAddr &&
             p <= (char*)memTable[i].mtOriginalAddr + memTable[i].mtLength)
             return i;
     }
     { ASSERT(0); }
     return 0;
 }
 
 void Exporter::relocateObject(PolyObject *p)
 {
     if (p->IsByteObject())
     {
         if (p->IsMutable() && p->IsWeakRefObject())
         {
             // Weak mutable byte refs are used for external references and
             // also in the FFI for non-persistent values.
             bool isFuncPtr = true;
             const char *entryName = getEntryPointName(p, &isFuncPtr);
             if (entryName != 0) addExternalReference(p, entryName, isFuncPtr);
             // Clear the first word of the data.
             ASSERT(p->Length() >= sizeof(uintptr_t)/sizeof(PolyWord));
             *(uintptr_t*)p = 0;
         }
     }
     else if (p->IsCodeObject())
     {
         POLYUNSIGNED constCount;
         PolyWord *cp;
         ASSERT(! p->IsMutable() );
         p->GetConstSegmentForCode(cp, constCount);
         /* Now the constants. */
         for (POLYUNSIGNED i = 0; i < constCount; i++) relocateValue(&(cp[i]));
 
     }
     else if (p->IsClosureObject())
     {
 #ifndef POLYML32IN64
         ASSERT(0);
 #endif
         // This should only be used in 32-in-64 where we don't use relocations.
     }
     else /* Ordinary objects, essentially tuples. */
     {
         POLYUNSIGNED length = p->Length();
         for (POLYUNSIGNED i = 0; i < length; i++) relocateValue(p->Offset(i));
     }
 }
 
 ExportStringTable::ExportStringTable(): strings(0), stringSize(0), stringAvailable(0)
 {
 }
 
 ExportStringTable::~ExportStringTable()
 {
     free(strings);
 }
 
 // Add a string to the string table, growing it if necessary.
 unsigned long ExportStringTable::makeEntry(const char *str)
 {
     unsigned len = (unsigned)strlen(str);
     unsigned long entry = stringSize;
     if (stringSize + len + 1 > stringAvailable)
     {
         stringAvailable = stringAvailable+stringAvailable/2;
         if (stringAvailable < stringSize + len + 1)
             stringAvailable = stringSize + len + 1 + 500;
         char* newStrings = (char*)realloc(strings, stringAvailable);
         if (newStrings == 0)
         {
             if (debugOptions & DEBUG_SAVING)
                 Log("SAVE: Unable to realloc string table, size: %lu.\n", stringAvailable);
             throw MemoryException();
         }
         else strings = newStrings;
      }
     strcpy(strings + stringSize, str);
     stringSize += len + 1;
     return entry;
 }
 
 struct _entrypts exporterEPT[] =
 {
     { "PolyExport",                     (polyRTSFunction)&PolyExport},
     { "PolyExportPortable",             (polyRTSFunction)&PolyExportPortable},
 
     { NULL, NULL} // End of list.
 };
diff --git a/libpolyml/gc_mark_phase.cpp b/libpolyml/gc_mark_phase.cpp
index 87633796..b14ccc1a 100644
--- a/libpolyml/gc_mark_phase.cpp
+++ b/libpolyml/gc_mark_phase.cpp
@@ -1,888 +1,888 @@
 /*
     Title:      Multi-Threaded Garbage Collector - Mark phase
 
     Copyright (c) 2010-12, 2015-16, 2019 David C. J. Matthews
 
     Based on the original garbage collector code
         Copyright 2000-2008
         Cambridge University Technical Services Limited
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 /*
 This is the first, mark, phase of the garbage collector.  It detects all
 reachable cells in the area being collected.  At the end of the phase the
 bit-maps associated with the areas will have ones for words belonging to cells
 that must be retained and zeros for words that can be reused.
 
 This is now multi-threaded.  The mark phase involves setting a bit in the header
 of each live cell and then a pass over the memory building the bitmaps and clearing
 this bit.  It is unfortunate that we cannot use the GC-bit that is used in
 forwarding pointers but we may well have forwarded pointers left over from a
 partially completed minor GC.  Using a bit in the header avoids the need for
 locking since at worst it may involve two threads duplicating some marking.
 
 The code ensures that each reachable cell is marked at least once but with
 multiple threads a cell may be marked by more than once cell if the
 memory is not fully up to date.  Each thread has a stack on which it
 remembers cells that have been marked but not fully scanned.  If a
 thread runs out of cells of its own to scan it can pick a pointer off
 the stack of another thread and scan that.  The original thread will
 still scan it some time later but it should find that the addresses
 in it have all been marked and it can simply pop this off.  This is
 all done without locking.  Stacks are only modified by the owning
 thread and when they pop anything they write zero in its place.
 Other threads only need to search for a zero to find if they are
 at the top and if they get a pointer that has already been scanned
 then this is safe.  The only assumption made about the memory is
 that all the bits of a word are updated together so that a thread
 will always read a value that is a valid pointer.
 
 Many of the ideas are drawn from Flood, Detlefs, Shavit and Zhang 2001
 "Parallel Garbage Collection for Shared Memory Multiprocessors".
 */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_ASSERT_H
 #include <assert.h>
 #define ASSERT(x)   assert(x)
 #else
 #define ASSERT(x)
 #endif
 
 #include "globals.h"
 #include "processes.h"
 #include "gc.h"
 #include "scanaddrs.h"
 #include "check_objects.h"
 #include "bitmap.h"
 #include "memmgr.h"
 #include "diagnostics.h"
 #include "gctaskfarm.h"
 #include "profiling.h"
 #include "heapsizing.h"
 
 #define MARK_STACK_SIZE 3000
 #define LARGECACHE_SIZE 20
 
 class MTGCProcessMarkPointers: public ScanAddress
 {
 public:
     MTGCProcessMarkPointers();
 
     virtual void ScanRuntimeAddress(PolyObject **pt, RtsStrength weak);
     virtual PolyObject *ScanObjectAddress(PolyObject *base);
 
     virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord);
     // Have to redefine this for some reason.
     void ScanAddressesInObject(PolyObject *base)
         { ScanAddressesInObject(base, base->LengthWord()); }
 
     virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code);
     // ScanCodeAddressAt should never be called.
     POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { ASSERT(0); return 0; }
 
     static void MarkPointersTask(GCTaskId *, void *arg1, void *arg2);
 
     static void InitStatics(unsigned threads)
     {
         markStacks = new MTGCProcessMarkPointers[threads];
         nInUse = 0;
         nThreads = threads;
     }
 
     static void MarkRoots(void);
     static bool RescanForStackOverflow();
 
 private:
     bool TestForScan(PolyWord *pt);
     void MarkAndTestForScan(PolyWord *pt);
     void Reset();
 
     void PushToStack(PolyObject *obj, PolyWord *currentPtr = 0)
     {
         // If we don't have all the threads running we start a new one but
         // only once we have several items on the stack.  Otherwise we
         // can end up creating a task that terminates almost immediately.
         if (nInUse >= nThreads || msp < 2 || ! ForkNew(obj))
         {
             if (msp < MARK_STACK_SIZE)
             {
                 markStack[msp++] = obj;
                 if (currentPtr != 0)
                 {
                     locPtr++;
                     if (locPtr == LARGECACHE_SIZE) locPtr = 0;
                     largeObjectCache[locPtr].base = obj;
                     largeObjectCache[locPtr].current = currentPtr;
                 }
             }
             else StackOverflow(obj);
         }
         // else the new task is processing it.
     }
 
     static void StackOverflow(PolyObject *obj);
     static bool ForkNew(PolyObject *obj);    
 
     PolyObject *markStack[MARK_STACK_SIZE];
     unsigned msp;
     bool active;
 
     // For the typical small cell it's easier just to rescan from the start
     // but that can be expensive for large cells.  This caches the offset for
     // large cells.
     static const POLYUNSIGNED largeObjectSize = 50;
     struct { PolyObject *base; PolyWord *current; } largeObjectCache[LARGECACHE_SIZE];
     unsigned locPtr;
 
     static MTGCProcessMarkPointers *markStacks;
 protected:
     static unsigned nThreads, nInUse;
     static PLock stackLock;
 };
 
 // There is one mark-stack for each GC thread.  markStacks[0] is used by the
 // main thread when marking the roots and rescanning after mark-stack overflow.
 // Once that work is done markStacks[0] is released and is available for a
 // worker thread.
 MTGCProcessMarkPointers *MTGCProcessMarkPointers::markStacks;
 unsigned MTGCProcessMarkPointers::nThreads, MTGCProcessMarkPointers::nInUse;
 PLock MTGCProcessMarkPointers::stackLock("GC mark stack");
 
 // It is possible to have two levels of forwarding because
 // we could have a cell in the allocation area that has been moved
 // to the immutable area and then shared with another cell.
 inline PolyObject *FollowForwarding(PolyObject *obj)
 {
     while (obj->ContainsForwardingPtr())
         obj = obj->GetForwardingPtr();
     return obj;
 }
 
 MTGCProcessMarkPointers::MTGCProcessMarkPointers(): msp(0), active(false), locPtr(0)
 {
     // Clear the mark stack
     for (unsigned i = 0; i < MARK_STACK_SIZE; i++)
         markStack[i] = 0;
     // Clear the large object cache just to be sure.
     for (unsigned j = 0; j < LARGECACHE_SIZE; j++)
     {
         largeObjectCache[j].base = 0;
         largeObjectCache[j].current = 0;
     }
 }
 
 // Clear the state at the beginning of a new GC pass.
 void MTGCProcessMarkPointers::Reset()
 {
     locPtr = 0;
     //largeObjectCache[locPtr].base = 0;
     // Clear the cache completely just to be safe
     for (unsigned j = 0; j < LARGECACHE_SIZE; j++)
     {
         largeObjectCache[j].base = 0;
         largeObjectCache[j].current = 0;
     }
 
 }
 
 // Called when the stack has overflowed.  We need to include this
 // in the range to be rescanned.
 void MTGCProcessMarkPointers::StackOverflow(PolyObject *obj)
 {
-    MarkableSpace *space = (MarkableSpace*)gMem.SpaceForAddress((PolyWord*)obj-1);
+    MarkableSpace *space = (MarkableSpace*)gMem.SpaceForObjectAddress(obj);
     ASSERT(space != 0 && (space->spaceType == ST_LOCAL || space->spaceType == ST_CODE));
     PLocker lock(&space->spaceLock);
     // Have to include this in the range to rescan.
     if (space->fullGCRescanStart > ((PolyWord*)obj) - 1)
         space->fullGCRescanStart = ((PolyWord*)obj) - 1;
     POLYUNSIGNED n = obj->Length();
     if (space->fullGCRescanEnd < ((PolyWord*)obj) + n)
         space->fullGCRescanEnd = ((PolyWord*)obj) + n;
     ASSERT(obj->LengthWord() & _OBJ_GC_MARK); // Should have been marked.
     if (debugOptions & DEBUG_GC_ENHANCED)
         Log("GC: Mark: Stack overflow.  Rescan for %p\n", obj);
 }
 
 // Fork a new task.  Because we've checked nInUse without taking the lock
 // we may find that we can no longer create a new task.
 bool MTGCProcessMarkPointers::ForkNew(PolyObject *obj)
 {
     MTGCProcessMarkPointers *marker = 0;
     {
         PLocker lock(&stackLock);
         if (nInUse == nThreads)
             return false;
         for (unsigned i = 0; i < nThreads; i++)
         {
             if (! markStacks[i].active)
             {
                 marker = &markStacks[i];
                 break;
             }
         }
         ASSERT(marker != 0);
         marker->active = true;
         nInUse++;
     }
     bool test = gpTaskFarm->AddWork(&MTGCProcessMarkPointers::MarkPointersTask, marker, obj);
     ASSERT(test);
     return true;
 }
 
 // Main marking task.  This is forked off initially to scan a specific object and
 // anything reachable from it but once that has finished it tries to find objects
 // on other stacks to scan.
 void MTGCProcessMarkPointers::MarkPointersTask(GCTaskId *, void *arg1, void *arg2)
 {
     MTGCProcessMarkPointers *marker = (MTGCProcessMarkPointers*)arg1;
     marker->Reset();
 
     marker->ScanAddressesInObject((PolyObject*)arg2);
 
     while (true)
     {
         // Look for a stack that has at least one item on it.
         MTGCProcessMarkPointers *steal = 0;
         for (unsigned i = 0; i < nThreads && steal == 0; i++)
         {
             if (markStacks[i].markStack[0] != 0)
                 steal = &markStacks[i];
         }
         // We're finished if they're all done.
         if (steal == 0)
             break;
         // Look for items on this stack
         for (unsigned j = 0; j < MARK_STACK_SIZE; j++)
         {
             // Pick the item off the stack.
             // N.B. The owning thread may update this to zero
             // at any time.
             PolyObject *toSteal = steal->markStack[j];
             if (toSteal == 0) break; // Nothing more on the stack
             // The idea here is that the original thread pushed this
             // because there were at least two addresses it needed to
             // process.  It started down one branch but left the other.
             // Since it will have marked cells in the branch it has
             // followed this thread will start on the unprocessed
             // address(es).
             marker->ScanAddressesInObject(toSteal);
         }
     }
 
     PLocker lock(&stackLock);
     marker->active = false; // It's finished
     nInUse--;
     ASSERT(marker->markStack[0] == 0);
 }
 
 // Tests if this needs to be scanned.  It marks it if it has not been marked
 // unless it has to be scanned.
 bool MTGCProcessMarkPointers::TestForScan(PolyWord *pt)
 {
     if ((*pt).IsTagged())
         return false;
 
     // This could contain a forwarding pointer if it points into an
     // allocation area and has been moved by the minor GC.
     // We have to be a little careful.  Another thread could also
     // be following any forwarding pointers here.  However it's safe
     // because they will update it with the same value.
     PolyObject *obj = (*pt).AsObjPtr();
     if (obj->ContainsForwardingPtr())
     {
         obj = FollowForwarding(obj);
         *pt = obj;
     }
 
-    MemSpace *sp = gMem.SpaceForAddress((PolyWord*)obj-1);
+    MemSpace *sp = gMem.SpaceForObjectAddress(obj);
     if (sp == 0 || (sp->spaceType != ST_LOCAL && sp->spaceType != ST_CODE))
         return false; // Ignore it if it points to a permanent area
 
     POLYUNSIGNED L = obj->LengthWord();
     if (L & _OBJ_GC_MARK)
         return false; // Already marked
 
     if (debugOptions & DEBUG_GC_DETAIL)
         Log("GC: Mark: %p %" POLYUFMT " %u\n", obj, OBJ_OBJECT_LENGTH(L), GetTypeBits(L));
 
     if (OBJ_IS_BYTE_OBJECT(L))
     {
         obj->SetLengthWord(L | _OBJ_GC_MARK); // Mark it
         return false; // We've done as much as we need
     }
     return true;
 }
 
 void MTGCProcessMarkPointers::MarkAndTestForScan(PolyWord *pt)
 {
     if (TestForScan(pt))
     {
         PolyObject *obj = (*pt).AsObjPtr();
         obj->SetLengthWord(obj->LengthWord() | _OBJ_GC_MARK);
     }
 }
 
 // The initial entry to process the roots.  These may be RTS addresses or addresses in
 // a thread stack.  Also called recursively to process the addresses of constants in
 // code segments.  This is used in situations where a scanner may return the
 // updated address of an object.
 PolyObject *MTGCProcessMarkPointers::ScanObjectAddress(PolyObject *obj)
 {
     MemSpace *sp = gMem.SpaceForAddress((PolyWord*)obj-1);
 
     if (!(sp->spaceType == ST_LOCAL || sp->spaceType == ST_CODE))
         return obj; // Ignore it if it points to a permanent area
 
     // We may have a forwarding pointer if this has been moved by the
     // minor GC.
     if (obj->ContainsForwardingPtr())
     {
         obj = FollowForwarding(obj);
         sp = gMem.SpaceForAddress((PolyWord*)obj - 1);
     }
 
     ASSERT(obj->ContainsNormalLengthWord());
 
     POLYUNSIGNED L = obj->LengthWord();
     if (L & _OBJ_GC_MARK)
         return obj; // Already marked
     sp->writeAble(obj)->SetLengthWord(L | _OBJ_GC_MARK); // Mark it
 
     if (profileMode == kProfileLiveData || (profileMode == kProfileLiveMutables && obj->IsMutable()))
         AddObjectProfile(obj);
 
     POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L);
     if (debugOptions & DEBUG_GC_DETAIL)
         Log("GC: Mark: %p %" POLYUFMT " %u\n", obj, n, GetTypeBits(L));
 
     if (OBJ_IS_BYTE_OBJECT(L))
         return obj;
 
     // If we already have something on the stack we must being called
     // recursively to process a constant in a code segment.  Just push
     // it on the stack and let the caller deal with it.
     if (msp != 0)
         PushToStack(obj); // Can't check this because it may have forwarding ptrs.
     else
     {
         // Normally a root but this can happen if we're following constants in code.
         // In that case we want to make sure that we don't recurse too deeply and
         // overflow the C stack.  Push the address to the stack before calling
         // ScanAddressesInObject so that if we come back here msp will be non-zero.
         // ScanAddressesInObject will empty the stack.
         PushToStack(obj);
         MTGCProcessMarkPointers::ScanAddressesInObject(obj, L);
         // We can only check after we've processed it because if we
         // have addresses left over from an incomplete partial GC they
         // may need to forwarded.
         CheckObject (obj);
     }
 
     return obj;
 }
 
 // These functions are only called with pointers held by the runtime system.
 // Weak references can occur in the runtime system, eg. streams and windows.
 // Weak references are not marked and so unreferenced streams and windows
 // can be detected and closed.
 void MTGCProcessMarkPointers::ScanRuntimeAddress(PolyObject **pt, RtsStrength weak)
 {
     if (weak == STRENGTH_WEAK) return;
     *pt = ScanObjectAddress(*pt);
     CheckPointer (*pt); // Check it after any forwarding pointers have been followed.
 }
 
 // This is called via ScanAddressesInRegion to process the permanent mutables.  It is
 // also called from ScanObjectAddress to process root addresses.
 // It processes all the addresses reachable from the object.
 // This is almost the same as RecursiveScan::ScanAddressesInObject. 
 void MTGCProcessMarkPointers::ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
 {
     if (OBJ_IS_BYTE_OBJECT(lengthWord))
         return;
 
     while (true)
     {
         ASSERT (OBJ_IS_LENGTH(lengthWord));
 
         POLYUNSIGNED length = OBJ_OBJECT_LENGTH(lengthWord);
         PolyWord *baseAddr = (PolyWord*)obj;
         PolyWord *endWord = baseAddr + length;
 
         if (OBJ_IS_WEAKREF_OBJECT(lengthWord))
         {
             // Special case.  
             ASSERT(OBJ_IS_MUTABLE_OBJECT(lengthWord)); // Should be a mutable.
             ASSERT(OBJ_IS_WORD_OBJECT(lengthWord)); // Should be a plain object.
             // We need to mark the "SOME" values in this object but we don't mark
             // the references contained within the "SOME".
             // Mark every word but ignore the result.
             for (POLYUNSIGNED i = 0; i < length; i++)
                 (void)MarkAndTestForScan(baseAddr+i);
             // We've finished with this.
             endWord = baseAddr;
         }
 
         else if (OBJ_IS_CODE_OBJECT(lengthWord))
         {
             // Code addresses in the native code versions.
             // Closure cells are normal (word) objects and code addresses are normal addresses.
             // It's better to process the whole code object in one go.
             ScanAddress::ScanAddressesInObject(obj, lengthWord);
             endWord = baseAddr; // Finished
         }
 
         else if (OBJ_IS_CLOSURE_OBJECT(lengthWord))
         {
             // Closure cells in 32-in-64.
             // The first word is the absolute address of the code ...
             PolyObject *codeAddr = *(PolyObject**)obj;
             // except that it is possible we haven't yet set it.
             if (((uintptr_t)codeAddr & 1) == 0)
                 ScanObjectAddress(codeAddr);
             // The rest is a normal tuple.
             baseAddr += sizeof(PolyObject*) / sizeof(PolyWord);
         }
 
         // If there are only two addresses in this cell that need to be
         // followed we follow them immediately and treat this cell as done.
         // If there are more than two we push the address of this cell on
         // the stack, follow the first address and then rescan it.  That way
         // list cells are processed once only but we don't overflow the
         // stack by pushing all the addresses in a very large vector.
         PolyObject *firstWord = 0;
         PolyObject *secondWord = 0;
         PolyWord *restartAddr = 0;
 
         if (obj == largeObjectCache[locPtr].base)
         {
             baseAddr = largeObjectCache[locPtr].current;
             ASSERT(baseAddr > (PolyWord*)obj && baseAddr < endWord);
             if (locPtr == 0) locPtr = LARGECACHE_SIZE - 1; else locPtr--;
         }
 
         while (baseAddr != endWord)
         {
             PolyWord wordAt = *baseAddr;
 
             if (wordAt.IsDataPtr() && wordAt != PolyWord::FromUnsigned(0))
             {
                 // Normal address.  We can have words of all zeros at least in the
                 // situation where we have a partially constructed code segment where
                 // the constants at the end of the code have not yet been filled in.
                 if (TestForScan(baseAddr))
                 {
                     if (firstWord == 0)
                         firstWord = baseAddr->AsObjPtr();
                     else if (secondWord == 0)
                     {
                         // If we need to rescan because there are three or more words to do
                         // this is the place we need to restart (or the start of the cell if it's
                         // small).
                         restartAddr = baseAddr;
                         secondWord = baseAddr->AsObjPtr();
                     }
                     else break;  // More than two words.
                 }
             }
             baseAddr++;
         }
 
         if (baseAddr != endWord)
             // Put this back on the stack while we process the first word
             PushToStack(obj, length < largeObjectSize ? 0 : restartAddr);
         else if (secondWord != 0)
         {
             // Mark it now because we will process it.
             PolyObject* writeAble = secondWord;
             if (secondWord->IsCodeObject())
                 writeAble = gMem.SpaceForObjectAddress(secondWord)->writeAble(secondWord);
             writeAble->SetLengthWord(secondWord->LengthWord() | _OBJ_GC_MARK);
             // Put this on the stack.  If this is a list node we will be
             // pushing the tail.
             PushToStack(secondWord);
         }
 
         if (firstWord != 0)
         {
             // Mark it and process it immediately.
             PolyObject* writeAble = firstWord;
             if (firstWord->IsCodeObject())
                 writeAble = gMem.SpaceForObjectAddress(firstWord)->writeAble(firstWord);
             writeAble->SetLengthWord(firstWord->LengthWord() | _OBJ_GC_MARK);
             obj = firstWord;
         }
         else if (msp == 0)
         {
             markStack[msp] = 0; // Really finished
             return;
         }
         else
         {
             // Clear the item above the top.  This really is finished.
             if (msp < MARK_STACK_SIZE) markStack[msp] = 0;
             // Pop the item from the stack but don't overwrite it yet.
             // This allows another thread to steal it if there really
             // is nothing else to do.  This is only really important
             // for large objects.
             obj = markStack[--msp]; // Pop something.
         }
 
         lengthWord = obj->LengthWord();
     }
 }
 
 // Process a constant within the code.  This is a direct copy of ScanAddress::ScanConstant
 // with the addition of the locking.
 void MTGCProcessMarkPointers::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code)
 {
     // If we have newly compiled code the constants may be in the
     // local heap.  MTGCProcessMarkPointers::ScanObjectAddress can
     // return an updated address for a local address if there is a
     // forwarding pointer.  
     // Constants can be aligned on any byte offset so another thread
     // scanning the same code could see an invalid address if it read
     // the constant while it was being updated.  We put a lock round
     // this just in case.
     MemSpace *space = gMem.SpaceForAddress(addressOfConstant);
     PLock *lock = 0;
     if (space->spaceType == ST_CODE)
         lock = &((CodeSpace*)space)->spaceLock;
 
     if (lock != 0)
         lock->Lock();
     PolyObject *p = GetConstantValue(addressOfConstant, code);
     if (lock != 0)
         lock->Unlock();
 
     if (p != 0)
     {
         PolyObject *newVal = ScanObjectAddress(p);
         if (newVal != p) // Update it if it has changed.
         {
             if (lock != 0)
                 lock->Lock();
             SetConstantValue(addressOfConstant, newVal, code);
             if (lock != 0)
                 lock->Unlock();
         }
     }
 }
 
 // Mark all the roots.  This is run in the main thread and has the effect
 // of starting new tasks as the scanning runs.
 void MTGCProcessMarkPointers::MarkRoots(void)
 {
     ASSERT(nThreads >= 1);
     ASSERT(nInUse == 0);
     MTGCProcessMarkPointers *marker = &markStacks[0];
     marker->Reset();
     marker->active = true;
     nInUse = 1;
 
     // Scan the permanent mutable areas.
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (space->isMutable && ! space->byteOnly)
             marker->ScanAddressesInRegion(space->bottom, space->top);
     }
 
     // Scan the RTS roots.
     GCModules(marker);
 
     ASSERT(marker->markStack[0] == 0);
 
     // When this has finished there may well be other tasks running.
     PLocker lock(&stackLock);
     marker->active = false;
     nInUse--;
 }
 
 // This class just allows us to use ScanAddress::ScanAddressesInRegion to call
 // ScanAddressesInObject for each object in the region.
 class Rescanner: public ScanAddress
 {
 public:
     Rescanner(MTGCProcessMarkPointers *marker): m_marker(marker) {}
 
     virtual void ScanAddressesInObject(PolyObject *obj, POLYUNSIGNED lengthWord)
     {
         // If it has previously been marked it is known to be reachable but
         // the contents may not have been scanned if the stack overflowed.
         if (lengthWord &_OBJ_GC_MARK)
             m_marker->ScanAddressesInObject(obj, lengthWord);
     }
 
     // Have to define this.
     virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(false); return 0; }
     virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { ASSERT(false); return 0; }
 
     bool ScanSpace(MarkableSpace *space);
 private:
     MTGCProcessMarkPointers *m_marker;
 };
 
 // Rescan any marked objects in the area between fullGCRescanStart and fullGCRescanEnd.
 // N.B.  We may have threads already processing other areas and they could overflow
 // their stacks and change fullGCRescanStart or fullGCRescanEnd.
 bool Rescanner::ScanSpace(MarkableSpace *space)
 {
     PolyWord *start, *end;
     {
         PLocker lock(&space->spaceLock);
         start = space->fullGCRescanStart;
         end = space->fullGCRescanEnd;
         space->fullGCRescanStart = space->top;
         space->fullGCRescanEnd = space->bottom;
     }
     if (start < end)
     {
         if (debugOptions & DEBUG_GC_ENHANCED)
             Log("GC: Mark: Rescanning from %p to %p\n", start, end);
         ScanAddressesInRegion(start, end);
         return true; // Require rescan
     }
     else return false;
 }
 
 // When the threads created by marking the roots have completed we need to check that
 // the mark stack has not overflowed.  If it has we need to rescan.  This rescanning
 // pass may result in a further overflow so if we find we have to rescan we repeat.
 bool MTGCProcessMarkPointers::RescanForStackOverflow()
 {
     ASSERT(nThreads >= 1);
     ASSERT(nInUse == 0);
     MTGCProcessMarkPointers *marker = &markStacks[0];
     marker->Reset();
     marker->active = true;
     nInUse = 1;
     bool rescan = false;
     Rescanner rescanner(marker);
 
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         if (rescanner.ScanSpace(*i))
             rescan = true;
     }
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         if (rescanner.ScanSpace(*i))
             rescan = true;
     }
     {
         PLocker lock(&stackLock);
         nInUse--;
         marker->active = false;
     }
     return rescan;
 }
 
 static void SetBitmaps(LocalMemSpace *space, PolyWord *pt, PolyWord *top)
 {
     while (pt < top)
     {
 #ifdef POLYML32IN64
         if ((((uintptr_t)pt) & 4) == 0)
         {
             pt++;
             continue;
         }
 #endif
         PolyObject *obj = (PolyObject*)++pt;
         // If it has been copied by a minor collection skip it
         if (obj->ContainsForwardingPtr())
         {
             obj = FollowForwarding(obj);
             ASSERT(obj->ContainsNormalLengthWord());
             pt += obj->Length();
         }
         else
         {
             POLYUNSIGNED L = obj->LengthWord();
             POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L);
             if (L & _OBJ_GC_MARK)
             {
                 obj->SetLengthWord(L & ~(_OBJ_GC_MARK));
                 uintptr_t bitno = space->wordNo(pt);
                 space->bitmap.SetBits(bitno - 1, n + 1);
 
                 if (OBJ_IS_MUTABLE_OBJECT(L))
                     space->m_marked += n + 1;
                 else
                     space->i_marked += n + 1;
 
                 if ((PolyWord*)obj <= space->fullGCLowerLimit)
                     space->fullGCLowerLimit = (PolyWord*)obj-1;
 
                 if (OBJ_IS_WEAKREF_OBJECT(L))
                 {
                     // Add this to the limits for the containing area.
                     PolyWord *baseAddr = (PolyWord*)obj;
                     PolyWord *startAddr = baseAddr-1; // Must point AT length word.
                     PolyWord *endObject = baseAddr + n;
                     if (startAddr < space->lowestWeak) space->lowestWeak = startAddr;
                     if (endObject > space->highestWeak) space->highestWeak = endObject;
                 }
             }
             pt += n;
         }
     }
 }
 
 static void CreateBitmapsTask(GCTaskId *, void *arg1, void *arg2)
 {
     LocalMemSpace *lSpace = (LocalMemSpace *)arg1;
     lSpace->bitmap.ClearBits(0, lSpace->spaceSize());
     SetBitmaps(lSpace, lSpace->bottom, lSpace->top);
 }
 
 // Parallel task to check the marks on cells in the code area and
 // turn them into byte areas if they are free.
 static void CheckMarksOnCodeTask(GCTaskId *, void *arg1, void *arg2)
 {
     CodeSpace *space = (CodeSpace*)arg1;
 #ifdef POLYML32IN64
     PolyWord *pt = space->bottom+1;
 #else
     PolyWord *pt = space->bottom;
 #endif
     PolyWord *lastFree = 0;
     POLYUNSIGNED lastFreeSpace = 0;
     space->largestFree = 0;
     space->firstFree = 0;
     while (pt < space->top)
     {
         PolyObject *obj = (PolyObject*)(pt+1);
         // There should not be forwarding pointers
         ASSERT(obj->ContainsNormalLengthWord());
         POLYUNSIGNED L = obj->LengthWord();
         POLYUNSIGNED length = OBJ_OBJECT_LENGTH(L);
         if (L & _OBJ_GC_MARK)
         {
             // It's marked - retain it.
             ASSERT(L & _OBJ_CODE_OBJ);
             space->writeAble(obj)->SetLengthWord(L & ~(_OBJ_GC_MARK)); // Clear the mark bit
             lastFree = 0;
             lastFreeSpace = 0;
         }
 #ifdef POLYML32IN64
         else if (length == 0)
         {
             // We may have zero filler words to set the correct alignment.
             // Merge them into a previously free area otherwise leave
             // them if they're after something allocated.
             if (lastFree + lastFreeSpace == pt)
             {
                 lastFreeSpace += length + 1;
                 PolyObject *freeSpace = (PolyObject*)(lastFree + 1);
                 space->writeAble(freeSpace)->SetLengthWord(lastFreeSpace - 1, F_BYTE_OBJ);
             }
         }
 #endif
         else { // Turn it into a byte area i.e. free.  It may already be free.
             if (space->firstFree == 0) space->firstFree = pt;
             space->headerMap.ClearBit(pt-space->bottom); // Remove the "header" bit
             if (lastFree + lastFreeSpace == pt)
                 // Merge free spaces.  Speeds up subsequent scans.
                 lastFreeSpace += length + 1;
             else
             {
                 lastFree = pt;
                 lastFreeSpace = length + 1;
             }
             PolyObject *freeSpace = (PolyObject*)(lastFree+1);
             space->writeAble(freeSpace)->SetLengthWord(lastFreeSpace-1, F_BYTE_OBJ);
             if (lastFreeSpace > space->largestFree) space->largestFree = lastFreeSpace;
         }
         pt += length+1;
     }
 }
 
 void GCMarkPhase(void)
 {
     mainThreadPhase = MTP_GCPHASEMARK;
 
     // Clear the mark counters and set the rescan limits.
     for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *lSpace = *i;
         lSpace->i_marked = lSpace->m_marked = 0;
         lSpace->fullGCRescanStart = lSpace->top;
         lSpace->fullGCRescanEnd = lSpace->bottom;
     }
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         CodeSpace *space = *i;
         space->fullGCRescanStart = space->top;
         space->fullGCRescanEnd = space->bottom;
     }
     
     MTGCProcessMarkPointers::MarkRoots();
     gpTaskFarm->WaitForCompletion();
 
     // Do we have to rescan because the mark stack overflowed?
     bool rescan;
     do {
         rescan = MTGCProcessMarkPointers::RescanForStackOverflow();
         gpTaskFarm->WaitForCompletion();
     } while(rescan);
 
     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Mark");
 
     // Turn the marks into bitmap entries.
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
         gpTaskFarm->AddWorkOrRunNow(&CreateBitmapsTask, *i, 0);
 
     // Process the code areas.
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
         gpTaskFarm->AddWorkOrRunNow(&CheckMarksOnCodeTask, *i, 0);
 
     gpTaskFarm->WaitForCompletion(); // Wait for completion of the bitmaps
 
     gMem.RemoveEmptyCodeAreas();
 
     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeIntermediate, "Bitmap");
 
     uintptr_t totalLive = 0;
     for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *lSpace = *i;
         if (! lSpace->isMutable) ASSERT(lSpace->m_marked == 0);
         totalLive += lSpace->m_marked + lSpace->i_marked;
         if (debugOptions & DEBUG_GC_ENHANCED)
             Log("GC: Mark: %s space %p: %" POLYUFMT " immutable words marked, %" POLYUFMT " mutable words marked\n",
                                 lSpace->spaceTypeString(), lSpace,
                                 lSpace->i_marked, lSpace->m_marked);
     }
     if (debugOptions & DEBUG_GC)
         Log("GC: Mark: Total live data %" POLYUFMT " words\n", totalLive);
 }
 
 // Set up the stacks.
 void initialiseMarkerTables()
 {
     unsigned threads = gpTaskFarm->ThreadCount();
     if (threads == 0) threads = 1;
     MTGCProcessMarkPointers::InitStatics(threads);
 }
diff --git a/libpolyml/gc_update_phase.cpp b/libpolyml/gc_update_phase.cpp
index 5d2ab88e..82203170 100644
--- a/libpolyml/gc_update_phase.cpp
+++ b/libpolyml/gc_update_phase.cpp
@@ -1,282 +1,282 @@
 /*
     Title:      Multi-Threaded Garbage Collector - Update phase
 
     Copyright (c) 2010-12 David C. J. Matthews
 
     Based on the original garbage collector code
         Copyright 2000-2008
         Cambridge University Technical Services Limited
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License as published by the Free Software Foundation; either
     version 2.1 of the License, or (at your option) any later version.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 /*
 This is the third, update, phase of the garbage collector.  The previous, copy,
 phase will have moved cells in memory.  The update phase goes through all cells
 that could contain an address of a cell that has been moved and looks for a
 tomb-stone that contains its new location. 
 */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_ASSERT_H
 #include <assert.h>
 #define ASSERT(x)   assert(x)
 #else
 #define ASSERT(x)
 #endif
 
 #include "globals.h"
 #include "run_time.h"
 #include "processes.h"
 #include "gc.h"
 #include "scanaddrs.h"
 #include "check_objects.h"
 #include "bitmap.h"
 #include "memmgr.h"
 #include "gctaskfarm.h"
 #include "diagnostics.h"
 
 class MTGCProcessUpdate: public ScanAddress
 {
 public:
     virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
     virtual void ScanRuntimeAddress(PolyObject **pt, RtsStrength weak);
     virtual PolyObject *ScanObjectAddress(PolyObject *base);
 
     void UpdateObjectsInArea(LocalMemSpace *area);
 
 private:
     static void UpdateAddress(PolyObject *&obj)
     {
         while (obj->ContainsForwardingPtr())
             obj = obj->GetForwardingPtr();
     }
 };
 
 /*********************************************************************/
 /* This function is called in the update phase to update pointers to */
 /* objects in the gc area that are in old mutable segments.          */
 /*********************************************************************/
 PolyObject *MTGCProcessUpdate::ScanObjectAddress(PolyObject *obj)
 {
-    LocalMemSpace *space = gMem.LocalSpaceForAddress((PolyWord*)obj-1);
+    LocalMemSpace *space = gMem.LocalSpaceForObjectAddress(obj);
     if (space != 0)
     {
         UpdateAddress(obj);
         ASSERT(obj->ContainsNormalLengthWord());
     }
     return obj;
 }
 
 void MTGCProcessUpdate::ScanRuntimeAddress(PolyObject **pt, RtsStrength/* weak*/)
 /* weak is not used, but needed so type of the function is correct */
 {
     PolyObject *obj = *pt;
     if (obj->ContainsForwardingPtr())
     {
         UpdateAddress(obj);
         *pt = obj;
     }
 }  
 
 // Update the addresses in a group of words.
 POLYUNSIGNED MTGCProcessUpdate::ScanAddressAt(PolyWord *pt)
 {
     PolyWord val = *pt;
 
     if (val.IsTagged())
         return 0;
 
     // It looked like it would be possible to simplify this code and
     // just call UpdateAddress on any address.  It seems to be
     // better to avoid unnecessary writes so we only store into
     // *pt if it has actually changed.
 
     PolyObject *obj = val.AsObjPtr();
     if (obj->ContainsForwardingPtr())
     {
         UpdateAddress(obj);
         *pt = obj;
     }
     return 0;
 }
 
 // Updates the addresses for objects in the area with the "allocated" bit set.
 // It processes the area between area->pointer and the bit corresponding to area->highest.
 // area->highest corresponds to gen_top i.e. we don't process older generations.
 void MTGCProcessUpdate::UpdateObjectsInArea(LocalMemSpace *area)
 {
     PolyWord *pt      = area->upperAllocPtr;
     uintptr_t   bitno   = area->wordNo(pt);
     uintptr_t   highest = area->wordNo(area->top);
 
     for (;;)
     {
         ASSERT(bitno <= highest);
         /* Zero unused words.  This is necessary so that
            ScanAddressesInRegion can work.  It requires the allocated
            area of memory to contain either objects with a valid length
            word or forwarding pointer or zeros.  We should only be
            zeroing words that we couldn't fill with real data so it
            shouldn't be too much.  Profiling showed that using dummy
            byte objects here didn't make a measurable difference,
         */
         while (bitno < highest && !area->bitmap.TestBit(bitno))
         {
             *pt++ = PolyWord::FromUnsigned(0);
             bitno++;
         }
         
         if (bitno == highest) {
             // Have reached the top of the area
             ASSERT(pt == area->top);
             break;
         }
         
         /* first set bit corresponds to the length word */
         pt++;
         PolyObject *obj = (PolyObject*)pt;
         POLYUNSIGNED L = obj->LengthWord();
         bitno++;
         
         if (obj->ContainsForwardingPtr())
         {
             // Skip over moved objects.  We have to find the new location to find
             // its length.
             UpdateAddress(obj);            
             POLYUNSIGNED length = obj->Length();
             pt    += length;
             bitno += length;
         }
         else // Contains real object
         {
             
             if (OBJ_IS_WORD_OBJECT(L))
             {
                 POLYUNSIGNED length = OBJ_OBJECT_LENGTH(L);
                 
                 area->updated += length+1;
                 
                 while (length--)
                 {
                     PolyWord val = *pt;
 
                     if (! val.IsTagged() && val != PolyWord::FromUnsigned(0))
                     {
                         PolyObject *obj = val.AsObjPtr();
                     
                         if (obj->ContainsForwardingPtr())
                         {
                             UpdateAddress(obj);
                             *pt = obj;
                         }
                     }
                     
                     pt++;
                     bitno++;
                 }
             }
             
             else /* !OBJ_IS_WORD_OBJECT(L) */
             {
                 POLYUNSIGNED length = OBJ_OBJECT_LENGTH(L);
                 area->updated += length+1;
                 ScanAddressesInObject(obj, L);
                 pt    += length;
                 bitno += length;
             } /* !OBJ_IS_WORD_OBJECT(L) */
 
             CheckObject(obj); // Can check it after it's been updated
         }  /* !OBJ_IS_POINTER(L) */
     } /* for loop */
 }
 
 // Task to update addresses in a local area.
 static void updateLocalArea(GCTaskId*, void *arg1, void *arg2)
 {
     MTGCProcessUpdate *processUpdate = (MTGCProcessUpdate *)arg1;
     LocalMemSpace *space = (LocalMemSpace *)arg2;
     if (debugOptions & DEBUG_GC_ENHANCED)
         Log("GC: Update local area %p\n", space);
     // Process the current generation for mutable or immutable areas.
     processUpdate->UpdateObjectsInArea(space);
     if (debugOptions & DEBUG_GC_ENHANCED)
         Log("GC: Completed local update for %p. %lu words updated\n", space, space->updated);
 }
 
 // Task to update addresses in a non-local area.
 static void updateNonLocalMutableArea(GCTaskId*, void *arg1, void *arg2)
 {
     MTGCProcessUpdate *processUpdate = (MTGCProcessUpdate *)arg1;
     MemSpace *space = (MemSpace *)arg2;
     if (debugOptions & DEBUG_GC_ENHANCED)
         Log("GC: Update non-local mutable area %p\n", space);
     processUpdate->ScanAddressesInRegion(space->bottom, space->top);
     if (debugOptions & DEBUG_GC_ENHANCED)
         Log("GC: Completed non-local mutable update for %p\n", space);
 }
 
 // Task to update addresses maintained by the RTS itself.
 static void updateGCProcAddresses(GCTaskId*, void *arg1, void *)
 {
     MTGCProcessUpdate *processUpdate = (MTGCProcessUpdate *)arg1;
     GCModules(processUpdate);
 }
 
 void GCUpdatePhase()
 {
     /* Update phase */
     mainThreadPhase = MTP_GCPHASEUPDATE;
     
     /* Invariant: at most the first (gen_top - bottom) bits of each bitmap can be dirty here. */
     for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
         (*i)->updated = 0;
 
     // We can do the updates in parallel since they don't interfere at all.
     MTGCProcessUpdate processUpdate;
 
     // Process local areas.
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *space = *i;
         // As well as updating the addresses this also clears the bitmaps.
         gpTaskFarm->AddWorkOrRunNow(&updateLocalArea, &processUpdate, space);
     }
     // Scan the permanent mutable areas and the code areas.
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (space->isMutable && ! space->byteOnly)
             gpTaskFarm->AddWorkOrRunNow(&updateNonLocalMutableArea, &processUpdate, space);
     }
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         CodeSpace *space = *i;
         gpTaskFarm->AddWorkOrRunNow(&updateNonLocalMutableArea, &processUpdate, space);
         // We could remove the mutable bit if there are no longer any mutable code objects
         // but it's easier to leave that to the minor GC.
     }
 
     // Update addresses in RTS modules.
     gpTaskFarm->AddWorkOrRunNow(&updateGCProcAddresses, &processUpdate, 0);
     // Wait for these to complete before proceeding.
     gpTaskFarm->WaitForCompletion();
 }
diff --git a/libpolyml/quick_gc.cpp b/libpolyml/quick_gc.cpp
index 181e0b7b..9ee28053 100644
--- a/libpolyml/quick_gc.cpp
+++ b/libpolyml/quick_gc.cpp
@@ -1,730 +1,730 @@
 /*
     Title:      Quick copying garbage collector
 
     Copyright (c) 2011-12, 2016-17, 2019 David C. J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 /*
 This is a quick copying garbage collector that moves all the data out of
 the allocation areas and into the mutable and immutable areas.  If either of
 these has filled up it fails and a full garbage collection must be done.
 */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 
 #ifdef HAVE_STRING_H 
 #include <string.h>
 #endif
 
 #ifdef HAVE_ASSERT_H
 #include <assert.h>
 #define ASSERT(x)   assert(x)
 #else
 #define ASSERT(x)
 #endif
 
 #include "globals.h"
 #include "processes.h"
 #include "gc.h"
 #include "scanaddrs.h"
 #include "check_objects.h"
 #include "bitmap.h"
 #include "memmgr.h"
 #include "diagnostics.h"
 #include "heapsizing.h"
 #include "gctaskfarm.h"
 #include "statistics.h"
 #include "gc_progress.h"
 
 // This protects access to the gMem.lSpace table.
 static PLock localTableLock("Minor GC tables");
 
 static bool succeeded = true;
 
 class QuickGCScanner: public ScanAddress
 {
 public:
     QuickGCScanner(bool r): rootScan(r) {}
     virtual ~QuickGCScanner() {}
 
     // Overrides for ScanAddress class
     virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
     virtual PolyObject *ScanObjectAddress(PolyObject *base);
 private:
     PolyObject *FindNewAddress(PolyObject *obj, POLYUNSIGNED L, LocalMemSpace *srcSpace);
     virtual LocalMemSpace *FindSpace(POLYUNSIGNED length, bool isMutable) = 0;
 protected:
     bool objectCopied;
     bool rootScan;
 };
 
 class RootScanner: public QuickGCScanner
 {
 public:
     RootScanner(): QuickGCScanner(true), mutableSpace(0), immutableSpace(0) {}
 private:
     virtual LocalMemSpace *FindSpace(POLYUNSIGNED length, bool isMutable);
     LocalMemSpace *mutableSpace, *immutableSpace;
 };
 
 class ThreadScanner: public QuickGCScanner
 {
 public:
     ThreadScanner(GCTaskId* id): QuickGCScanner(false), taskID(id), mutableSpace(0), immutableSpace(0),
         spaceTable(0), nOwnedSpaces(0) {}
     virtual ~ThreadScanner() { free(spaceTable); }
 
     void ScanOwnedAreas(void);
 private:
     virtual LocalMemSpace *FindSpace(POLYUNSIGNED length, bool isMutable);
     bool TakeOwnership(LocalMemSpace *space);
 
     GCTaskId *taskID;
     LocalMemSpace *mutableSpace, *immutableSpace;
     LocalMemSpace **spaceTable;
     unsigned nOwnedSpaces;
 };
 
 // This is used when scanning code areas.  If there are no mutable cells left we can clear
 // the mutable bit and we don't have to scan it again.
 class CodeCheck: public ScanAddress
 {
 public:
     CodeCheck(): foundMutable(false) {}
     virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
     virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord)
         { if (OBJ_IS_MUTABLE_OBJECT(lengthWord)) foundMutable = true;  }
     bool foundMutable;
 };
 
 // This uses the conditional exchange instruction to check and update
 // the forwarding pointer.  It uses a lock prefix so that if another
 // thread has updated it in the meantime it will not set it.
 // Using the assembly code provides a very small speed-up so may not
 // be worth-while. 
 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
 // In later versions of MS C we can use the intrinsic.
 // 1600 is Visual Studio 2010.  It may well work in older versions
 #   include <intrin.h>
 #   pragma intrinsic(_InterlockedCompareExchange)
 #   if (SIZEOF_VOIDP == 8)
 #       define InterlockedCompareExchange64 _InterlockedCompareExchange64
 #   else
 #       define InterlockedCompareExchange   _InterlockedCompareExchange
 #   endif
 #endif
 
 #ifdef POLYML32IN64
 typedef uint32_t ptrasint;
 #else
 typedef uintptr_t ptrasint;
 #endif
 
 
 static bool atomiclySetForwarding(LocalMemSpace *space, ptrasint *pt, ptrasint testVal, ptrasint update)
 {
 #ifdef _MSC_VER
 # if (SIZEOF_POLYWORD == 8)
     LONGLONG *address = (LONGLONG*)(pt-1);
     uintptr_t result = InterlockedCompareExchange64(address, update, testVal);
     return result == testVal;
 # else
     LONG *address = (LONG*)(pt-1);
     uintptr_t result = InterlockedCompareExchange(address, update, testVal);
     return result == testVal;
 # endif
 #elif((defined(HOSTARCHITECTURE_X86) || defined(HOSTARCHITECTURE_X32) || defined(POLYML32IN64)) && defined(__GNUC__))
     uintptr_t result;
     __asm__ __volatile__ (
         "lock; cmpxchgl %1,%2"
         :"=a"(result)
         :"r"(update),"m"(pt[-1]),"0"(testVal)
         :"memory", "cc"
     );
     return result == testVal;
 #elif(defined(HOSTARCHITECTURE_X86_64) && defined(__GNUC__))
     uintptr_t result;
     __asm__ __volatile__ (
         "lock; cmpxchgq %1,%2"
         :"=a"(result)
         :"r"(update),"m"(pt[-1]),"0"(testVal)
         :"memory", "cc"
     );
     return result == testVal;
 #else
     // Fallback on other targets.
     PLocker lock(&space->spaceLock);
     if (pt[-1] == testVal)
     {
         pt[-1] = update;
         return true;
     }
     return false;
 #endif
 }
 
 PolyObject *QuickGCScanner::FindNewAddress(PolyObject *obj, POLYUNSIGNED L, LocalMemSpace *srcSpace)
 {
     bool isMutable = OBJ_IS_MUTABLE_OBJECT(L);
     POLYUNSIGNED n = OBJ_OBJECT_LENGTH(L);
     LocalMemSpace *lSpace = FindSpace(n, isMutable);
     if (lSpace == 0)
         return 0; // Unable to move it.
     PolyObject *newObject = (PolyObject*)(lSpace->lowerAllocPtr+1);
 
     // It's possible that another thread may have actually copied the 
     // object since we loaded the length word so we check it again.
     // If this is a mutable we must ensure that checking the forwarding
     // pointer here and updating it if necessary is atomic.  We don't need
     // to do that for immutable data so there is a small chance that an
     // object may be copied twice.  That's not a problem for immutable data.
     // Also lock this if it's code.  This may not be necessary but code objects
     // are rare. Updating the addresses in code objects is complicated and
     // it's possible that there are assumptions somewhere that there's only one
     // copy.
     // Avoiding locking for immutables provides only a small speed-up so may not
     // be worth-while.
     if (isMutable || OBJ_IS_CODE_OBJECT(L))
     {
         if (! atomiclySetForwarding(srcSpace, (ptrasint*)obj, L, OBJ_SET_POINTER(newObject)))
         {
             newObject = obj->GetForwardingPtr();
             if (debugOptions & DEBUG_GC_DETAIL)
                 Log("GC: Quick: %p %lu %u has already moved to %p\n", obj, n, GetTypeBits(L), newObject);
             objectCopied = false;
             return newObject;
         }
     }
     else
     {
         if (obj->ContainsForwardingPtr())
         {
             newObject = obj->GetForwardingPtr();
             if (debugOptions & DEBUG_GC_DETAIL)
                 Log("GC: Quick: %p %lu %u has already moved to %p\n", obj, n, GetTypeBits(L), newObject);
             objectCopied = false;
             return newObject;
         }
         else obj->SetForwardingPtr(newObject);
     }
 
     lSpace->lowerAllocPtr += n+1;
 #ifdef POLYML32IN64
     // Maintain the odd-word alignment of lowerAllocPtr
     if ((n & 1) == 0 && lSpace->lowerAllocPtr < lSpace->upperAllocPtr)
     {
         *lSpace->lowerAllocPtr = PolyWord::FromUnsigned(0);
         lSpace->lowerAllocPtr++;
     }
 #endif
     CopyObjectToNewAddress(obj, newObject, L);
     objectCopied = true;
     return newObject;
 }
 
 // When scanning the roots we want to distribute the data among the immutable and mutable areas
 // so that the work is distributed for the scanning threads.
 LocalMemSpace *RootScanner::FindSpace(POLYUNSIGNED n, bool isMutable)
 {
     LocalMemSpace *lSpace = isMutable ? mutableSpace : immutableSpace;
 
     if (lSpace != 0)
     {
         // See if there's space in the existing area.
         if (lSpace->freeSpace() > n /* At least n+1*/)
             return lSpace;
     }
 
     // Find the space with the largest free area.
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *sp = *i;
         if (sp->isMutable == isMutable && !sp->allocationSpace &&
                 (lSpace == 0 || sp->freeSpace() > lSpace->freeSpace()))
             lSpace = sp;
     }
 
     if (lSpace != 0 && lSpace->freeSpace() > n)
     {
         if (isMutable) mutableSpace = lSpace; else immutableSpace = lSpace;
         return lSpace;
     }
 
     return gHeapSizeParameters.AddSpaceInMinorGC(n+1, isMutable);
 }
 
 // When scanning within a thread we don't want to be searching the space table.
 LocalMemSpace *ThreadScanner::FindSpace(POLYUNSIGNED n, bool isMutable)
 {
     LocalMemSpace *lSpace = isMutable ? mutableSpace : immutableSpace;
 
     if (lSpace != 0)
     {
         // See if there's space in the existing area.
         if (lSpace->freeSpace() > n /* At least n+1*/)
             return lSpace;
     }
 
     for (unsigned i = 0; i < nOwnedSpaces; i++)
     {
         lSpace = spaceTable[i];
         if (lSpace->isMutable == isMutable &&
             ! lSpace->allocationSpace && lSpace->freeSpace() > n /* At least n+1*/)
         {
             if (n < 10)
             {
                 // We use this space for further allocations unless we are trying to
                 // allocate a "large" object.
                 if (isMutable) mutableSpace = lSpace; else immutableSpace = lSpace;
             }
             return lSpace;
         }
     }
 
     PLocker l(&localTableLock);
     // Another thread may allocate a new area, reallocating gMem.lSpaces so we
     // we need a lock here.
     if (taskID != 0)
     {
         // See if we can take a space that is currently unused.
         for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
         {
             lSpace = *i;
             if (lSpace->spaceOwner == 0 && lSpace->isMutable == isMutable &&
                 ! lSpace->allocationSpace && lSpace->freeSpace() > n /* At least n+1*/)
             {
                 if (debugOptions & DEBUG_GC_ENHANCED)
                     Log("GC: Quick: Thread %p is taking ownership of space %p\n", taskID, lSpace);
                 if (! TakeOwnership(lSpace))
                     return 0;
                 return lSpace;
             }
         }
     }
 
     lSpace = gHeapSizeParameters.AddSpaceInMinorGC(n+1, isMutable);
     if (lSpace != 0 && TakeOwnership(lSpace))
         return lSpace;
     return 0;
 }
 
 // Copy all the objects.
 POLYUNSIGNED QuickGCScanner::ScanAddressAt(PolyWord *pt)
 {
     POLYUNSIGNED n = 1; // Set up the loop to process one word at *pt
     pt++;
     
     while (n-- != 0)
     {
         PolyWord val = *(--pt);
         if (! val.IsTagged())
         {
             LocalMemSpace *space = gMem.LocalSpaceForAddress(val.AsStackAddr()-1);
 
             // We only copy it if it is in a local allocation space and not in the
             // "overflow" area of data that could not copied by the last full GC.
             if (space != 0 && space->allocationSpace && val.AsAddress() <= space->upperAllocPtr)
             {
                 // We shouldn't get code addresses since we handle code
                 // segments separately so if this isn't an integer it must be an object address.
                 ASSERT(OBJ_IS_DATAPTR(val));
 
                 PolyObject *obj = val.AsObjPtr();
                 // Load the length word without any interlock.  We can't assume that
                 // another thread won't also copy this at the same time.
                 POLYUNSIGNED L = obj->LengthWord();
 
                 // Has it been moved already? N.B.  Another thread may be in the process of
                 // moving it so the new object may not be fully copied.
                 if (OBJ_IS_POINTER(L))
                     *pt = OBJ_GET_POINTER(L);
                 else
                 {
                     // We need to copy this object.
                     PolyObject *newObject = FindNewAddress(obj, L, space); // New address of object.
 
                     if (newObject == 0) { // Couldn't copy it - not enough space.
                         succeeded = false;
 
                         if (debugOptions & DEBUG_GC_DETAIL)
                             Log("GC: Quick: Insufficient space to move %p %lu %u\n",
                                 obj, OBJ_OBJECT_LENGTH(L), GetTypeBits(L));
 
                         return 0;
                     }
 
                     *pt = newObject; // Update the pointer to the object
                     // N.B.  If another thread has just copied it "newObject" may actually
                     // be an address in another thread's space.  In that case "objectCopied"
                     // will be false.
 
                     if (debugOptions & DEBUG_GC_DETAIL)
                         Log("GC: Quick: %p %lu %u moved to %p\n", obj, OBJ_OBJECT_LENGTH(L), GetTypeBits(L), newObject);
 
                     // Stop now unless this is a simple word object we have been able to move.
                     // Also stop if we're just scanning the roots.
                     if (! rootScan && newObject != obj && ! OBJ_IS_MUTABLE_OBJECT(L) && 
                         GetTypeBits(L) == 0 && objectCopied)
                     {
                         // We can simply return zero in which case this performs a breadth-first scan.
                         // A breadth-first scan distributes the objects through the memory so
                         // to retain some degree of locality we try to copy some object pointed at
                         // by this one.  We work from the end back so that we follow the tail pointers
                         // for lists.
                         n = OBJ_OBJECT_LENGTH(L); // Object length
                         pt = (PolyWord*)newObject + n;
                     }
                 }
             }
         }
     }
     // We've reached the end without finding a pointer to follow
     return 0;
 }
 
 // The initial entry to process the roots.  Also used when processing the addresses
 // in objects that can't be handled by ScanAddressAt.
 PolyObject *QuickGCScanner::ScanObjectAddress(PolyObject *base)
 {
 #ifdef POLYML32IN64
     // If this is a code address we can't turn it into a PolyWord.
     // Check that it's a local address.
-    MemSpace *space = gMem.SpaceForAddress((PolyWord*)base - 1);
+    MemSpace *space = gMem.SpaceForObjectAddress(base);
     ASSERT(space != 0);
     if (space->spaceType != ST_LOCAL)
         return base;
 #endif
     PolyWord val = base;
     // Scan this as an address.
     (void)QuickGCScanner::ScanAddressAt(&val);
     // Ignore the result of ScanAddressAt which is always zero and
     // just return the updated address.
     return val.AsObjPtr();
 }
 
 // Add this to the set of spaces we own.  Must be called with the
 // localTableLock held.
 bool ThreadScanner::TakeOwnership(LocalMemSpace *space)
 {
     ASSERT(space->spaceOwner == 0);
     LocalMemSpace **v = (LocalMemSpace**)realloc(spaceTable, (nOwnedSpaces+1)*sizeof(LocalMemSpace*));
     if (v == 0)
         return false;
     spaceTable = v;
     space->spaceOwner = taskID;
     spaceTable[nOwnedSpaces++] = space;
     return true;
 }
 
 // Thread function to scan an area.  It scans the addresses in the region
 // copying any objects from the allocation area into mutable or immutable
 // areas it owns.  It then processes all the areas it owns until there
 // are no further addresses to scan.
 static void scanArea(GCTaskId *id, void *arg1, void *arg2)
 {
     ThreadScanner marker(id);
     marker.ScanAddressesInRegion((PolyWord*)arg1, (PolyWord*)arg2);
     marker.ScanOwnedAreas();
 }
 
 void ThreadScanner::ScanOwnedAreas()
 {
     while (true)
     {
         bool allDone = true;
         // We're finished when there is no unscanned data in any space we own.
         for (unsigned k = 0; k < nOwnedSpaces && allDone; k++)
         {
             LocalMemSpace *space = spaceTable[k];
             allDone = space->partialGCScan == space->lowerAllocPtr;
         }
         if (allDone)
             break;
 
         // Scan each area that has had data added to it.
         for (unsigned l = 0; l < nOwnedSpaces; l++)
         {
             LocalMemSpace *space = spaceTable[l];
             // Scan the area.  This may well result in more data being added
             while (space->partialGCScan < space->lowerAllocPtr)
             {
                 // Is the queue draining?  If so it's probably worth creating
                 // some spare work.
                 if (gpTaskFarm->Draining() && gpTaskFarm->ThreadCount() > 1)
                 {
                     PolyWord *mid =
                         space->partialGCScan + (space->lowerAllocPtr - space->partialGCScan)/2;
                     // Split the space in two.
                     PolyWord *p = space->partialGCScan;
                     while (p < mid)
                     {
 #ifdef POLYML32IN64
                         if ((((uintptr_t)p) & 4) == 0)
                         {
                             p++; // Should be on an odd-word boundary
                             continue;
                         }
 #endif
                         PolyObject *o = (PolyObject*)(p+1);
                         ASSERT(o->ContainsNormalLengthWord());
                         p += o->Length()+1;
                     }
                     // Start a new task to scan the area up to the half-way point.
                     // Because we round up to the end of the next object we may
                     // include the whole area but that's probably better because
                     // we may have other areas to scan.
                     if (gpTaskFarm->AddWork(scanArea, space->partialGCScan, p))
                     {
                         space->partialGCScan = p;
                         if (space->lowerAllocPtr == space->partialGCScan)
                             break;
                     }
                 }
                 PolyObject *obj = (PolyObject*)(space->partialGCScan+1);
 #ifdef POLYML32IN64
                 if ((((uintptr_t)obj) & 4) != 0)  // Should be on an even-word boundary
                 {
                     space->partialGCScan++;
                     continue;
                 }
 #endif
                 ASSERT(obj->ContainsNormalLengthWord());
                 POLYUNSIGNED length = obj->Length();
                 ASSERT(space->partialGCScan+length+1 <= space->lowerAllocPtr);
                 space->partialGCScan += length+1;
                 if (length != 0)
                     ScanAddressesInObject(obj);
                 // If any thread has run out of space we should stop.
                 if (! succeeded)
                     return;
             }
         }
     }
     // Release the spaces we're holding in case another thread wants to use them.
     for (unsigned m = 0; m < nOwnedSpaces; m++)
     {
         LocalMemSpace *space = spaceTable[m];
         space->spaceOwner = 0;
     }
     nOwnedSpaces = 0;
 }
 
 bool RunQuickGC(const POLYUNSIGNED wordsRequiredToAllocate)
 {
     // If the last minor GC took too long force a full GC.
     if (gHeapSizeParameters.RunMajorGCImmediately())
         return false;
 
     gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeStart);
     globalStats.incCount(PSC_GC_PARTIALGC);
     mainThreadPhase = MTP_GCQUICK;
     succeeded = true;
     gcProgressBeginMinorGC();
 
     if (debugOptions & DEBUG_GC)
         Log("GC: Beginning quick GC\n");
 
     if (debugOptions & DEBUG_HEAPSIZE)
         gMem.ReportHeapSizes("Minor GC (before)");
 
     uintptr_t spaceBeforeGC = 0;
 
     for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *lSpace = *i;
         ASSERT (lSpace->top >= lSpace->upperAllocPtr);
         ASSERT (lSpace->upperAllocPtr >= lSpace->lowerAllocPtr);
         ASSERT (lSpace->lowerAllocPtr >= lSpace->bottom);
         // Remember the top before we started this GC.  It's
         // only relevant for mutable areas.  It avoids us rescanning
         // objects that may have been added to the space as a result of
         // scanning another space.
         if (lSpace->isMutable)
             lSpace->partialGCTop = lSpace->upperAllocPtr;
         else lSpace->partialGCTop = lSpace->top;
         // If we're scanning a space this is where we start.
         // For immutable areas this only includes newly added
         // data but for mutable areas we have to scan data added
         // by previous partial GCs.
         if (lSpace->isMutable && ! lSpace->allocationSpace)
             lSpace->partialGCRootBase = lSpace->bottom;
         else lSpace->partialGCRootBase = lSpace->lowerAllocPtr;
         lSpace->spaceOwner = 0; // Not currently owned
         // Add up the space in the mutable and immutable areas
         if (! lSpace->allocationSpace)
             spaceBeforeGC += lSpace->allocatedSpace();
     }
 
     // First scan the roots, copying the data into the mutable and immutable areas.
     RootScanner rootScan;
     // Scan the permanent mutable areas.  This could be parallelised but it doesn't
     // appear to be worthwhile at the moment.
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (space->isMutable && ! space->byteOnly)
             rootScan.ScanAddressesInRegion(space->bottom, space->top);
     }
     // Scan code spaces.  
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         CodeSpace *space = *i;
         // Spaces are mutable if any object has been added to the area since the last GC.
         if (space->isMutable)
         {
             rootScan.ScanAddressesInRegion(space->bottom, space->top);
             // Check to see if any of the objects are still mutable.  If they are
             // we are still building the code and must rescan it on the next GC.
             // If there aren't we don't need to unless another code object is added.
             CodeCheck codeCheck;
             codeCheck.ScanAddressesInRegion(space->bottom, space->top);
             space->isMutable = codeCheck.foundMutable;
         }
     }
 
     // Scan RTS addresses.  This will include the thread stacks.
     GCModules(&rootScan);
 
     // At this point the immutable and mutable areas will have some root objects
     // in the space between partialGCRootBase (the old value of lowerAllocPtr) and
     // lowerAllocPtr.  These will contain the addresses of objects in the allocation
     // areas.  We need to scan these root objects and then any new objects we copy
     // until there are no objects left to scan.
     // We also need to scan local mutable areas since these are roots as well.
     // They have data between partialGCTop and top.  Parallelising this appears
     // to be a significant gain.
     // We have to be careful about the pointers here.  AddWorkOrRunNow begins
     // a thread immediately and so the scanning threads may be running while
     // we are still creating new tasks.  To avoid tripping up we use separate
     // pointers to the root objects rather than using lowerAllocPtr and
     // partialGCScan because these can be modified by the scanning tasks.
     // It's also possible for new spaces to be added to the table by the scanning
     // tasks while we are still adding tasks.  It is important that the values of
     // partialGCRootBase, partialGCRootTop and partialGCTop are properly initialised
     // for these new spaces.
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *space = *i;
         space->partialGCRootTop = space->lowerAllocPtr; // Top of the roots
         space->partialGCScan = space->lowerAllocPtr; // Start of scanning for new data.
     }
 
     // Now start creating tasks.  From this point only a thread that owns a space
     // may read or modify lowerAllocPtr or partialGCScan.
     {
         unsigned l = 0;
         while (true)
         {
             LocalMemSpace *space;
             {
                 // There is a chance that a thread that has already been forked may
                 // allocate a new space and realloc gMem.lSpaces.  We have to drop
                 // the lock before calling AddWorkOrRunNow in case we "run now".
                 PLocker lock(&localTableLock);
                 if (l >= gMem.lSpaces.size())
                     break;
                 space = gMem.lSpaces[l++];
             }
             if (space->partialGCRootBase != space->partialGCRootTop)
                 gpTaskFarm->AddWorkOrRunNow(scanArea, space->partialGCRootBase, space->partialGCRootTop);
             if (space->partialGCTop != space->top)
                 gpTaskFarm->AddWorkOrRunNow(scanArea, space->partialGCTop, space->top);
         }
     }
 
     gpTaskFarm->WaitForCompletion();
 
     uintptr_t spaceAfterGC = 0;
 
     if (succeeded)
     {
         globalStats.setSize(PSS_AFTER_LAST_GC, 0);
         globalStats.setSize(PSS_ALLOCATION, 0);
         globalStats.setSize(PSS_ALLOCATION_FREE, 0);
         // If it succeeded the allocation areas are now empty.
         for(std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
         {
             LocalMemSpace *lSpace = *i;
             uintptr_t free;
             if (lSpace->allocationSpace)
             {
 #ifdef POLYML32IN64
                 lSpace->lowerAllocPtr = lSpace->bottom + 1;
                 lSpace->lowerAllocPtr[-1] = PolyWord::FromUnsigned(0);
 #else
                 lSpace->lowerAllocPtr = lSpace->bottom;
 #endif
                 free = lSpace->freeSpace();
 #ifdef FILL_UNUSED_MEMORY
                 // This provides extra checking if we have dangling pointers
                 memset(lSpace->bottom, 0xaa, (char*)lSpace->upperAllocPtr - (char*)lSpace->bottom);
 #endif
                 globalStats.incSize(PSS_ALLOCATION, free*sizeof(PolyWord));
                 globalStats.incSize(PSS_ALLOCATION_FREE, free*sizeof(PolyWord));
             }
             else free = lSpace->freeSpace();
 
             if (debugOptions & DEBUG_GC_ENHANCED)
                 Log("GC: %s space %p %" PRI_SIZET " free in %" PRI_SIZET " words %2.1f%% full\n", lSpace->spaceTypeString(),
                     lSpace, lSpace->freeSpace(), lSpace->spaceSize(),
                     ((float)lSpace->allocatedSpace()) * 100 / (float)lSpace->spaceSize());
             globalStats.incSize(PSS_AFTER_LAST_GC, free*sizeof(PolyWord));
             spaceAfterGC += lSpace->allocatedSpace();
         }
 
         if (! gMem.CheckForAllocation(wordsRequiredToAllocate))
             succeeded = false;
     }
 
     if (succeeded)
     {
         gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeEnd);
 
         if (! gHeapSizeParameters.AdjustSizeAfterMinorGC(spaceAfterGC, spaceBeforeGC)) // Adjust the allocation size.
             return false; // If necessary trigger a full GC immediately
         gHeapSizeParameters.resetMinorTimingData();
         // Remove allocation spaces that are larger than the default
         // and any excess over the current size of the allocation area.
         gMem.RemoveExcessAllocation();
 
         if (debugOptions & DEBUG_HEAPSIZE)
             gMem.ReportHeapSizes("Minor GC (after)");
 
         if (debugOptions & DEBUG_GC)
             Log("GC: Completed successfully\n");
 
         CheckMemory();
     }
     else
     {
         // There was insufficient room to copy everything.  We will need to
         // run a full GC.
         gHeapSizeParameters.RecordGCTime(HeapSizeParameters::GCTimeEnd);
         if (debugOptions & DEBUG_GC)
             Log("GC: Quick GC failed\n");
     }
 
     return succeeded;
 }
diff --git a/libpolyml/savestate.cpp b/libpolyml/savestate.cpp
index 879e027e..00eb387c 100644
--- a/libpolyml/savestate.cpp
+++ b/libpolyml/savestate.cpp
@@ -1,2237 +1,2237 @@
 /*
     Title:  savestate.cpp - Save and Load state
 
     Copyright (c) 2007, 2015, 2017-19 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_STDIO_H
 #include <stdio.h>
 #endif
 
 #ifdef HAVE_WINDOWS_H
 #include <windows.h> // For MAX_PATH
 #endif
 
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h> // For MAX_PATH
 #endif
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 
 #ifdef HAVE_TIME_H
 #include <time.h>
 #endif
 
 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
 #endif
 
 #ifdef HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
 
 #ifdef HAVE_UNISTD_H
 #include <unistd.h>
 #endif
 
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
 
 #ifdef HAVE_ASSERT_H
 #include <assert.h>
 #define ASSERT(x)   assert(x)
 #else
 #define ASSERT(x)
 #endif
 
 #if (defined(_WIN32))
 #include <tchar.h>
 #define ERRORNUMBER _doserrno
 #define NOMEMORY ERROR_NOT_ENOUGH_MEMORY
 #else
 typedef char TCHAR;
 #define _T(x) x
 #define _tfopen fopen
 #define _tcscpy strcpy
 #define _tcsdup strdup
 #define _tcslen strlen
 #define _fputtc fputc
 #define _fputts fputs
 #ifndef lstrcmpi
 #define lstrcmpi strcasecmp
 #endif
 #define ERRORNUMBER errno
 #define NOMEMORY ENOMEM
 #endif
 
 
 #include "globals.h"
 #include "savestate.h"
 #include "processes.h"
 #include "run_time.h"
 #include "polystring.h"
 #include "scanaddrs.h"
 #include "arb.h"
 #include "memmgr.h"
 #include "mpoly.h" // For exportTimeStamp
 #include "exporter.h" // For CopyScan
 #include "machine_dep.h"
 #include "osmem.h"
 #include "gc.h" // For FullGC.
 #include "timing.h"
 #include "rtsentry.h"
 #include "check_objects.h"
 #include "rtsentry.h"
 
 #include "../polyexports.h" // For InitHeaderFromExport
 #include "version.h" // For InitHeaderFromExport
 
 #ifdef _MSC_VER
 // Don't tell me about ISO C++ changes.
 #pragma warning(disable:4996)
 #endif
 
 extern "C" {
     POLYEXTERNALSYMBOL POLYUNSIGNED PolySaveState(FirstArgument threadId, PolyWord fileName, PolyWord depth);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadState(FirstArgument threadId, PolyWord arg);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowHierarchy(FirstArgument threadId);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyRenameParent(FirstArgument threadId, PolyWord childName, PolyWord parentName);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyShowParent(FirstArgument threadId, PolyWord arg);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyStoreModule(FirstArgument threadId, PolyWord name, PolyWord contents);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadModule(FirstArgument threadId, PolyWord arg);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyLoadHierarchy(FirstArgument threadId, PolyWord arg);
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyGetModuleDirectory(FirstArgument threadId);
 }
 
 // Helper class to close files on exit.
 class AutoClose {
 public:
     AutoClose(FILE *f = 0): m_file(f) {}
     ~AutoClose() { if (m_file) ::fclose(m_file); }
 
     operator FILE*() { return m_file; }
     FILE* operator = (FILE* p)  { return (m_file = p); }
 
 private:
     FILE *m_file;
 };
 
 // This is probably generally useful so may be moved into
 // a general header file.
 template<typename BASE> class AutoFree
 {
 public:
     AutoFree(BASE p = 0): m_value(p) {}
     ~AutoFree() { free(m_value); }
 
     // Automatic conversions to the base type.
     operator BASE() { return m_value; }
     BASE operator = (BASE p)  { return (m_value = p); }
 
 private:
     BASE m_value;
 };
 
 #ifdef HAVE__FTELLI64
 // fseek and ftell are only 32-bits in Windows.
 #define off_t   __int64
 #define fseek _fseeki64
 #define ftell _ftelli64
 #endif
 
 /*
  *  Structure definitions for the saved state files.
  */
 
 #define SAVEDSTATESIGNATURE "POLYSAVE"
 #define SAVEDSTATEVERSION   2
 
 // File header for a saved state file.  This appears as the first entry
 // in the file.
 typedef struct _savedStateHeader
 {
     // These entries are primarily to check that we have a valid
     // saved state file before we try to interpret anything else.
     char        headerSignature[8];     // Should contain SAVEDSTATESIGNATURE
     unsigned    headerVersion;          // Should contain SAVEDSTATEVERSION
     unsigned    headerLength;           // Number of bytes in the header
     unsigned    segmentDescrLength;     // Number of bytes in a descriptor
 
     // These entries contain the real data.
     off_t       segmentDescr;           // Position of segment descriptor table
     unsigned    segmentDescrCount;      // Number of segment descriptors in the table
     off_t       stringTable;            // Pointer to the string table (zero if none)
     size_t      stringTableSize;        // Size of string table
     unsigned    parentNameEntry;        // Position of parent name in string table (0 if top)
     time_t      timeStamp;            // The time stamp for this file.
     time_t      parentTimeStamp;      // The time stamp for the parent.
     void       *originalBaseAddr;        // Original base address (32-in-64 only)
 } SavedStateHeader;
 
 // Entry for segment table.  This describes the segments on the disc that
 // need to be loaded into memory.
 typedef struct _savedStateSegmentDescr
 {
     off_t       segmentData;            // Position of the segment data
     size_t      segmentSize;            // Size of the segment data
     off_t       relocations;            // Position of the relocation table
     unsigned    relocationCount;        // Number of entries in relocation table
     unsigned    relocationSize;         // Size of a relocation entry
     unsigned    segmentFlags;           // Segment flags (see SSF_ values)
     unsigned    segmentIndex;           // The index of this segment or the segment it overwrites
     void        *originalAddress;       // The base address when the segment was written.
 } SavedStateSegmentDescr;
 
 #define SSF_WRITABLE    1               // The segment contains mutable data
 #define SSF_OVERWRITE   2               // The segment overwrites the data (mutable) in a parent.
 #define SSF_NOOVERWRITE 4               // The segment must not be further overwritten
 #define SSF_BYTES       8               // The segment contains only byte data
 #define SSF_CODE        16              // The segment contains only code
 
 typedef struct _relocationEntry
 {
     // Each entry indicates a location that has to be set to an address.
     // The location to be set is determined by adding "relocAddress" to the base address of
     // this segment (the one to which these relocations apply) and the value to store
     // by adding "targetAddress" to the base address of the segment indicated by "targetSegment".
     POLYUNSIGNED    relocAddress;       // The (byte) offset in this segment that we will set
     POLYUNSIGNED    targetAddress;      // The value to add to the base of the destination segment
     unsigned        targetSegment;      // The base segment.  0 is IO segment. 
     ScanRelocationKind relKind;         // The kind of relocation (processor dependent).
 } RelocationEntry;
 
 #define SAVE(x) taskData->saveVec.push(x)
 
 /*
  *  Hierarchy table: contains information about last loaded or saved state.
  */
 
 // Pointer to list of files loaded in last load.
 // There's no need for a lock since the update is only made when all
 // the ML threads have stopped.
 class HierarchyTable
 {
 public:
     HierarchyTable(const TCHAR *file, time_t time):
       fileName(_tcsdup(file)), timeStamp(time) { }
     AutoFree<TCHAR*> fileName;
     time_t          timeStamp;
 };
 
 HierarchyTable **hierarchyTable;
 
 static unsigned hierarchyDepth;
 
 static bool AddHierarchyEntry(const TCHAR *fileName, time_t timeStamp)
 {
     // Add an entry to the hierarchy table for this file.
     HierarchyTable *newEntry = new HierarchyTable(fileName, timeStamp);
     if (newEntry == 0) return false;
     HierarchyTable **newTable =
         (HierarchyTable **)realloc(hierarchyTable, sizeof(HierarchyTable *)*(hierarchyDepth+1));
     if (newTable == 0) return false;
     hierarchyTable = newTable;
     hierarchyTable[hierarchyDepth++] = newEntry;
     return true;
 }
 
 // Test whether we're overwriting a parent of ourself.
 #if (defined(_WIN32) || defined(__CYGWIN__))
 static bool sameFile(const TCHAR *x, const TCHAR *y)
 {
     HANDLE hXFile = INVALID_HANDLE_VALUE, hYFile = INVALID_HANDLE_VALUE;
     bool result = false;
 
     hXFile = CreateFile(x, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
     if (hXFile == INVALID_HANDLE_VALUE) goto closeAndExit;
     hYFile = CreateFile(y, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
     if (hYFile == INVALID_HANDLE_VALUE) goto closeAndExit;
     BY_HANDLE_FILE_INFORMATION fileInfoX, fileInfoY;
     if (! GetFileInformationByHandle(hXFile, &fileInfoX)) goto closeAndExit;
     if (! GetFileInformationByHandle(hYFile, &fileInfoY)) goto closeAndExit;
 
     result = fileInfoX.dwVolumeSerialNumber == fileInfoY.dwVolumeSerialNumber &&
         fileInfoX.nFileIndexLow == fileInfoY.nFileIndexLow &&
         fileInfoX.nFileIndexHigh == fileInfoY.nFileIndexHigh;
 
 closeAndExit:
     if (hXFile != INVALID_HANDLE_VALUE) CloseHandle(hXFile);
     if (hYFile != INVALID_HANDLE_VALUE) CloseHandle(hYFile);
     return result;
 }
 #else
 static bool sameFile(const char *x, const char *y)
 {
     struct stat xStat, yStat;
     // If either file does not exist that's fine.
     if (stat(x, &xStat) != 0 || stat(y, &yStat) != 0)
         return false;
     return (xStat.st_dev == yStat.st_dev && xStat.st_ino == yStat.st_ino);
 }
 #endif
 
 /*
  *  Saving state.
  */
 
 // This class is used to create the relocations.  It uses Exporter
 // for this but this may perhaps be too heavyweight.
 class SaveStateExport: public Exporter, public ScanAddress
 {
 public:
     SaveStateExport(unsigned int h=0): Exporter(h), relocationCount(0) {}
 public:
     virtual void exportStore(void) {} // Not used.
 
 private:
     // ScanAddress overrides
     virtual void ScanConstant(PolyObject *base, byte *addrOfConst, ScanRelocationKind code);
     // At the moment we should only get calls to ScanConstant.
     virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
 
 protected:
     void setRelocationAddress(void *p, POLYUNSIGNED *reloc);
     PolyWord createRelocation(PolyWord p, void *relocAddr);
     unsigned relocationCount;
 
     friend class SaveRequest;
 };
 
 
 // Generate the address relative to the start of the segment.
 void SaveStateExport::setRelocationAddress(void *p, POLYUNSIGNED *reloc)
 {
     unsigned area = findArea(p);
     POLYUNSIGNED offset = (POLYUNSIGNED)((char*)p - (char*)memTable[area].mtOriginalAddr);
     *reloc = offset;
 }
 
 
 // Create a relocation entry for an address at a given location.
 PolyWord SaveStateExport::createRelocation(PolyWord p, void *relocAddr)
 {
     RelocationEntry reloc;
     // Set the offset within the section we're scanning.
     setRelocationAddress(relocAddr, &reloc.relocAddress);
     void *addr = p.AsAddress();
     unsigned addrArea = findArea(addr);
     reloc.targetAddress = (POLYUNSIGNED)((char*)addr - (char*)memTable[addrArea].mtOriginalAddr);
     reloc.targetSegment = (unsigned)memTable[addrArea].mtIndex;
     reloc.relKind = PROCESS_RELOC_DIRECT;
     fwrite(&reloc, sizeof(reloc), 1, exportFile);
     relocationCount++;
     return p; // Don't change the contents
 }
 
 
 /* This is called for each constant within the code. 
    Print a relocation entry for the word and return a value that means
    that the offset is saved in original word. */
 void SaveStateExport::ScanConstant(PolyObject *base, byte *addr, ScanRelocationKind code)
 {
     PolyObject *p = GetConstantValue(addr, code);
 
     if (p == 0)
         return;
 
     void *a = p;
     unsigned aArea = findArea(a);
 
     // We don't need a relocation if this is relative to the current segment
     // since the relative address will already be right.
     if (code == PROCESS_RELOC_I386RELATIVE && aArea == findArea(addr))
         return;
 
     // Set the value at the address to the offset relative to the symbol.
     RelocationEntry reloc;
     setRelocationAddress(addr, &reloc.relocAddress);
     reloc.targetAddress = (POLYUNSIGNED)((char*)a - (char*)memTable[aArea].mtOriginalAddr);
     reloc.targetSegment = (unsigned)memTable[aArea].mtIndex;
     reloc.relKind = code;
     fwrite(&reloc, sizeof(reloc), 1, exportFile);
     relocationCount++;
 }
 
 // Request to the main thread to save data.
 class SaveRequest: public MainThreadRequest
 {
 public:
     SaveRequest(const TCHAR *name, unsigned h): MainThreadRequest(MTP_SAVESTATE),
         fileName(name), newHierarchy(h),
         errorMessage(0), errCode(0) {}
 
     virtual void Perform();
     const TCHAR *fileName;
     unsigned newHierarchy;
     const char *errorMessage;
     int errCode;
 };
 
 // This class is used to update references to objects that have moved.  If
 // we have copied an object into the area to be exported we may still have references
 // to it from the stack or from RTS data structures.  We have to ensure that these
 // are updated.
 // This is very similar to ProcessFixupAddress in sharedata.cpp
 class SaveFixupAddress: public ScanAddress
 {
 protected:
     virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
     virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt) { *pt = ScanObjectAddress(*pt); return 0; }
     virtual PolyObject *ScanObjectAddress(PolyObject *base);
 
 public:
     void ScanCodeSpace(CodeSpace *space);
 };
 
 
 POLYUNSIGNED SaveFixupAddress::ScanAddressAt(PolyWord *pt)
 {
     PolyWord val = *pt;
     if (val.IsDataPtr() && val != PolyWord::FromUnsigned(0))
         *pt = ScanObjectAddress(val.AsObjPtr());
     return 0;
 }
 
 // Returns the new address if the argument is the address of an object that
 // has moved, otherwise returns the original.
 PolyObject *SaveFixupAddress::ScanObjectAddress(PolyObject *obj)
 {
     if (obj->ContainsForwardingPtr()) // tombstone is a pointer to a moved object
     {
 #ifdef POLYML32IN64
         MemSpace *space = gMem.SpaceForAddress((PolyWord*)obj - 1);
         PolyObject *newp;
         if (space->isCode)
             newp = (PolyObject*)(globalCodeBase + ((obj->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
         else newp = obj->GetForwardingPtr();
 #else
         PolyObject *newp = obj->GetForwardingPtr();
 #endif
         ASSERT (newp->ContainsNormalLengthWord());
         return newp;
     }
     
     ASSERT (obj->ContainsNormalLengthWord()); // object is not moved
     return obj;
 }
 
 // Fix up addresses in the code area.  Unlike ScanAddressesInRegion this updates
 // cells that have been moved.  We need to do that because we may still have
 // return addresses into those cells and we don't move return addresses.  We
 // do want the code to see updated constant addresses.
 void SaveFixupAddress::ScanCodeSpace(CodeSpace *space)
 {
     for (PolyWord *pt = space->bottom; pt < space->top; )
     {
         pt++;
         PolyObject *obj = (PolyObject*)pt;
 #ifdef POLYML32IN64
         PolyObject *dest = obj;
         while (dest->ContainsForwardingPtr())
         {
-            MemSpace *space = gMem.SpaceForAddress((PolyWord*)dest - 1);
+            MemSpace *space = gMem.SpaceForObjectAddress(dest);
             if (space->isCode)
                 dest = (PolyObject*)(globalCodeBase + ((dest->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
             else dest = dest->GetForwardingPtr();
         }
 #else
         PolyObject *dest = obj->FollowForwardingChain();
 #endif
         POLYUNSIGNED length = dest->Length();
         if (length != 0)
             ScanAddressesInObject(obj, dest->LengthWord());
         pt += length;
     }
 }
 
 // Called by the root thread to actually save the state and write the file.
 void SaveRequest::Perform()
 {
     if (debugOptions & DEBUG_SAVING)
         Log("SAVE: Beginning saving state.\n");
     // Check that we aren't overwriting our own parent.
     for (unsigned q = 0; q < newHierarchy-1; q++) {
         if (sameFile(hierarchyTable[q]->fileName, fileName))
         {
             errorMessage = "File being saved is used as a parent of this file";
             errCode = 0;
             if (debugOptions & DEBUG_SAVING)
                 Log("SAVE: File being saved is used as a parent of this file.\n");
             return;
         }
     }
 
     SaveStateExport exports;
     // Open the file.  This could quite reasonably fail if the path is wrong.
     exports.exportFile = _tfopen(fileName, _T("wb"));
     if (exports.exportFile == NULL)
     {
         errorMessage = "Cannot open save file";
         errCode = ERRORNUMBER;
         if (debugOptions & DEBUG_SAVING)
             Log("SAVE: Cannot open save file.\n");
         return;
     }
 
     // Scan over the permanent mutable area copying all reachable data that is
     // not in a lower hierarchy into new permanent segments.
     CopyScan copyScan(newHierarchy);
     copyScan.initialise(false);
     bool success = true;
     try {
         for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
         {
             PermanentMemSpace *space = *i;
             if (space->isMutable && !space->noOverwrite && !space->byteOnly)
             {
                 if (debugOptions & DEBUG_SAVING)
                     Log("SAVE: Scanning permanent mutable area %p allocated at %p size %lu\n",
                         space, space->bottom, space->spaceSize());
                 copyScan.ScanAddressesInRegion(space->bottom, space->top);
             }
         }
     }
     catch (MemoryException &)
     {
         success = false;
         if (debugOptions & DEBUG_SAVING)
             Log("SAVE: Scan of permanent mutable area raised memory exception.\n");
     }
 
     // Copy the areas into the export object.  Make sufficient space for
     // the largest possible number of entries.
     exports.memTable = new memoryTableEntry[gMem.eSpaces.size()+gMem.pSpaces.size()+1];
     unsigned memTableCount = 0;
 
     // Permanent spaces at higher level.  These have to have entries although
     // only the mutable entries will be written.
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (space->hierarchy < newHierarchy)
         {
             memoryTableEntry *entry = &exports.memTable[memTableCount++];
             entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
             entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
             entry->mtIndex = space->index;
             entry->mtFlags = 0;
             if (space->isMutable)
             {
                 entry->mtFlags |= MTF_WRITEABLE;
                 if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE;
                 if (space->byteOnly) entry->mtFlags |= MTF_BYTES;
             }
             if (space->isCode)
                 entry->mtFlags |= MTF_EXECUTABLE;
         }
     }
     unsigned permanentEntries = memTableCount; // Remember where new entries start.
 
     // Newly created spaces.
     for (std::vector<PermanentMemSpace *>::iterator i = gMem.eSpaces.begin(); i < gMem.eSpaces.end(); i++)
     {
         memoryTableEntry *entry = &exports.memTable[memTableCount++];
         PermanentMemSpace *space = *i;
         entry->mtOriginalAddr = entry->mtCurrentAddr = space->bottom;
         entry->mtLength = (space->topPointer-space->bottom)*sizeof(PolyWord);
         entry->mtIndex = space->index;
         entry->mtFlags = 0;
         if (space->isMutable)
         {
             entry->mtFlags |= MTF_WRITEABLE;
             if (space->noOverwrite) entry->mtFlags |= MTF_NO_OVERWRITE;
             if (space->byteOnly) entry->mtFlags |= MTF_BYTES;
         }
         if (space->isCode)
             entry->mtFlags |= MTF_EXECUTABLE;
     }
 
     exports.memTableEntries = memTableCount;
 
     if (debugOptions & DEBUG_SAVING)
         Log("SAVE: Updating references to moved objects.\n");
 
     // Update references to moved objects.
     SaveFixupAddress fixup;
     for (std::vector<LocalMemSpace*>::iterator i = gMem.lSpaces.begin(); i < gMem.lSpaces.end(); i++)
     {
         LocalMemSpace *space = *i;
         fixup.ScanAddressesInRegion(space->bottom, space->lowerAllocPtr);
         fixup.ScanAddressesInRegion(space->upperAllocPtr, space->top);
     }
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
         fixup.ScanCodeSpace(*i);
 
     GCModules(&fixup);
 
     // Restore the length words in the code areas.
     // Although we've updated any pointers to the start of the code we could have return addresses
     // pointing to the original code.  GCModules updates the stack but doesn't update return addresses.
     for (std::vector<CodeSpace *>::iterator i = gMem.cSpaces.begin(); i < gMem.cSpaces.end(); i++)
     {
         CodeSpace *space = *i;
         for (PolyWord *pt = space->bottom; pt < space->top; )
         {
             pt++;
             PolyObject *obj = (PolyObject*)pt;
             if (obj->ContainsForwardingPtr())
             {
 #ifdef POLYML32IN64
                 PolyObject *forwardedTo = obj;
                 while (forwardedTo->ContainsForwardingPtr())
                     forwardedTo = (PolyObject*)(globalCodeBase + ((forwardedTo->LengthWord() & ~_OBJ_TOMBSTONE_BIT) << 1));
 #else
                 PolyObject *forwardedTo = obj->FollowForwardingChain();
 #endif
                 POLYUNSIGNED lengthWord = forwardedTo->LengthWord();
                 space->writeAble(obj)->SetLengthWord(lengthWord);
             }
             pt += obj->Length();
         }
     }
 
     // Update the global memory space table.  Old segments at the same level
     // or lower are removed.  The new segments become permanent.
     // Try to promote the spaces even if we've had a failure because export
     // spaces are deleted in ~CopyScan and we may have already copied
     // some objects there.
     if (debugOptions & DEBUG_SAVING)
         Log("SAVE: Promoting export spaces to permanent spaces.\n");
     if (! gMem.PromoteExportSpaces(newHierarchy) || ! success)
     {
         errorMessage = "Out of Memory";
         errCode = NOMEMORY;
         if (debugOptions & DEBUG_SAVING)
             Log("SAVE: Unable to promote export spaces.\n");
         return;
     }
     // Remove any deeper entries from the hierarchy table.
     while (hierarchyDepth > newHierarchy-1)
     {
         hierarchyDepth--;
         delete(hierarchyTable[hierarchyDepth]);
         hierarchyTable[hierarchyDepth] = 0;
     }
 
     if (debugOptions & DEBUG_SAVING)
         Log("SAVE: Writing out data.\n");
 
     // Write out the file header.
     SavedStateHeader saveHeader;
     memset(&saveHeader, 0, sizeof(saveHeader));
     saveHeader.headerLength = sizeof(saveHeader);
     memcpy(saveHeader.headerSignature,
         SAVEDSTATESIGNATURE, sizeof(saveHeader.headerSignature));
     saveHeader.headerVersion = SAVEDSTATEVERSION;
     saveHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr);
     if (newHierarchy == 1)
         saveHeader.parentTimeStamp = exportTimeStamp;
     else
     {
         saveHeader.parentTimeStamp = hierarchyTable[newHierarchy-2]->timeStamp;
         saveHeader.parentNameEntry = sizeof(TCHAR); // Always the first entry.
     }
     saveHeader.timeStamp = getBuildTime();
     saveHeader.segmentDescrCount = exports.memTableEntries; // One segment for each space.
 #ifdef POLYML32IN64
     saveHeader.originalBaseAddr = globalHeapBase;
 #endif
     // Write out the header.
     fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile);
 
     // We need a segment header for each permanent area whether it is
     // actually in this file or not.
     SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [exports.memTableEntries];
 
     for (unsigned j = 0; j < exports.memTableEntries; j++)
     {
         memoryTableEntry *entry = &exports.memTable[j];
         memset(&descrs[j], 0, sizeof(SavedStateSegmentDescr));
         descrs[j].relocationSize = sizeof(RelocationEntry);
         descrs[j].segmentIndex = (unsigned)entry->mtIndex;
         descrs[j].segmentSize = entry->mtLength; // Set this even if we don't write it.
         descrs[j].originalAddress = entry->mtOriginalAddr;
         if (entry->mtFlags & MTF_WRITEABLE)
         {
             descrs[j].segmentFlags |= SSF_WRITABLE;
             if (entry->mtFlags & MTF_NO_OVERWRITE)
                 descrs[j].segmentFlags |= SSF_NOOVERWRITE;
             if (j < permanentEntries && (entry->mtFlags & MTF_NO_OVERWRITE) == 0)
                 descrs[j].segmentFlags |= SSF_OVERWRITE;
             if (entry->mtFlags & MTF_BYTES)
                 descrs[j].segmentFlags |= SSF_BYTES;
         }
         if (entry->mtFlags & MTF_EXECUTABLE)
             descrs[j].segmentFlags |= SSF_CODE;
     }
     // Write out temporarily. Will be overwritten at the end.
     saveHeader.segmentDescr = ftell(exports.exportFile);
     fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile);
 
     // Write out the relocations and the data.
     for (unsigned k = 1 /* Not IO area */; k < exports.memTableEntries; k++)
     {
         memoryTableEntry *entry = &exports.memTable[k];
         // Write out the contents if this is new or if it is a normal, overwritable
         // mutable area.
         if (k >= permanentEntries ||
             (entry->mtFlags & (MTF_WRITEABLE|MTF_NO_OVERWRITE)) == MTF_WRITEABLE)
         {
             descrs[k].relocations = ftell(exports.exportFile);
             // Have to write this out.
             exports.relocationCount = 0;
             // Create the relocation table.
             char *start = (char*)entry->mtOriginalAddr;
             char *end = start + entry->mtLength;
             for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; )
             {
                 p++;
                 PolyObject *obj = (PolyObject*)p;
                 POLYUNSIGNED length = obj->Length();
                 // Most relocations can be computed when the saved state is
                 // loaded so we only write out the difficult ones: those that
                 // occur within compiled code.
                 //  exports.relocateObject(obj);
                 if (length != 0 && obj->IsCodeObject())
                     machineDependent->ScanConstantsWithinCode(obj, &exports);
                 p += length;
             }
             descrs[k].relocationCount = exports.relocationCount;
             // Write out the data.
             descrs[k].segmentData = ftell(exports.exportFile);
             fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exports.exportFile);
        }
     }
 
     // If this is a child we need to write a string table containing the parent name.
     if (newHierarchy > 1)
     {
         saveHeader.stringTable = ftell(exports.exportFile);
         _fputtc(0, exports.exportFile); // First byte of string table is zero
         _fputts(hierarchyTable[newHierarchy-2]->fileName, exports.exportFile);
         _fputtc(0, exports.exportFile); // A terminating null.
         saveHeader.stringTableSize = (_tcslen(hierarchyTable[newHierarchy-2]->fileName) + 2)*sizeof(TCHAR);
     }
 
     // Rewrite the header and the segment tables now they're complete.
     fseek(exports.exportFile, 0, SEEK_SET);
     fwrite(&saveHeader, sizeof(saveHeader), 1, exports.exportFile);
     fwrite(descrs, sizeof(SavedStateSegmentDescr), exports.memTableEntries, exports.exportFile);
 
     if (debugOptions & DEBUG_SAVING)
         Log("SAVE: Writing complete.\n");
 
     // Add an entry to the hierarchy table for this file.
     (void)AddHierarchyEntry(fileName, saveHeader.timeStamp);
 
     delete[](descrs);
 
     CheckMemory();
 }
 
 // Write a saved state file.
 POLYUNSIGNED PolySaveState(FirstArgument threadId, PolyWord fileName, PolyWord depth)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
 
     try {
         TempString fileNameBuff(Poly_string_to_T_alloc(fileName));
         // The value of depth is zero for top-level save so we need to add one for hierarchy.
         unsigned newHierarchy = get_C_unsigned(taskData, depth) + 1;
 
         if (newHierarchy > hierarchyDepth + 1)
             raise_fail(taskData, "Depth must be no more than the current hierarchy plus one");
 
         // Request a full GC first.  The main reason is to avoid running out of memory as a
         // result of repeated saves.  Old export spaces are turned into local spaces and
         // the GC will delete them if they are completely empty
         FullGC(taskData);
 
         SaveRequest request(fileNameBuff, newHierarchy);
         processes->MakeRootRequest(taskData, &request);
         if (request.errorMessage)
             raise_syscall(taskData, request.errorMessage, request.errCode);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned();
 }
 
 /*
  *  Loading saved state files.
  */
 
 class StateLoader: public MainThreadRequest
 {
 public:
     StateLoader(bool isH, Handle files): MainThreadRequest(MTP_LOADSTATE),
         isHierarchy(isH), fileNameList(files), errorResult(0), errNumber(0) { }
 
     virtual void Perform(void);
     bool LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail);
     bool isHierarchy;
     Handle fileNameList;
     const char *errorResult;
     // The fileName here is the last file loaded.  As well as using it
     // to load the name can also be printed out at the end to identify the
     // particular file in the hierarchy that failed.
     AutoFree<TCHAR*> fileName;
     int errNumber;
 };
 
 // Called by the main thread once all the ML threads have stopped.
 void StateLoader::Perform(void)
 {
     // Copy the first file name into the buffer.
     if (isHierarchy)
     {
         if (ML_Cons_Cell::IsNull(fileNameList->Word()))
         {
             errorResult = "Hierarchy list is empty";
             return;
         }
         ML_Cons_Cell *p = DEREFLISTHANDLE(fileNameList);
         fileName = Poly_string_to_T_alloc(p->h);
         if (fileName == NULL)
         {
             errorResult = "Insufficient memory";
             errNumber = NOMEMORY;
             return;
         }
         (void)LoadFile(true, 0, p->t);
     }
     else
     {
         fileName = Poly_string_to_T_alloc(fileNameList->Word());
         if (fileName == NULL)
         {
             errorResult = "Insufficient memory";
             errNumber = NOMEMORY;
             return;
         }
         (void)LoadFile(true, 0, TAGGED(0));
     }
 }
 
 class ClearVolatile: public ScanAddress
 {
 public:
     ClearVolatile() {}
     virtual PolyObject *ScanObjectAddress(PolyObject *base) { return base; }
     virtual void ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord);
 };
 
 // Set the values of external references and clear the values of other weak byte refs.
 void ClearVolatile::ScanAddressesInObject(PolyObject *base, POLYUNSIGNED lengthWord)
 {
     if (OBJ_IS_MUTABLE_OBJECT(lengthWord) && OBJ_IS_NO_OVERWRITE(lengthWord))
     {
         if (OBJ_IS_BYTE_OBJECT(lengthWord))
         {
             if (OBJ_IS_WEAKREF_OBJECT(lengthWord))
             {
                 POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord);
                 if (len >= sizeof(uintptr_t) / sizeof(PolyWord))
                     *((uintptr_t*)base) = 0;
                 setEntryPoint(base);
             }
         }
         else
         {
             // Clear volatile refs
             POLYUNSIGNED len = OBJ_OBJECT_LENGTH(lengthWord);
             for (POLYUNSIGNED i = 0; i < len; i++)
                 base->Set(i, TAGGED(0));
         }
     }
 }
 
 // This is copied from the B-tree in MemMgr.  It probably should be
 // merged but will do for the moment.  It's intended to reduce the
 // cost of finding the segment for relocation.
 
 class SpaceBTree
 {
 public:
     SpaceBTree(bool is, unsigned i = 0) : isLeaf(is), index(i) { }
     virtual ~SpaceBTree() {}
 
     bool isLeaf;
     unsigned index; // The index if this is a leaf
 };
 
 // A non-leaf node in the B-tree
 class SpaceBTreeTree : public SpaceBTree
 {
 public:
     SpaceBTreeTree();
     virtual ~SpaceBTreeTree();
 
     SpaceBTree *tree[256];
 };
 
 SpaceBTreeTree::SpaceBTreeTree() : SpaceBTree(false)
 {
     for (unsigned i = 0; i < 256; i++)
         tree[i] = 0;
 }
 
 SpaceBTreeTree::~SpaceBTreeTree()
 {
     for (unsigned i = 0; i < 256; i++)
         delete(tree[i]);
 }
 
 
 // This class is used to relocate addresses in areas that have been loaded.
 class LoadRelocate: public ScanAddress
 {
 public:
     LoadRelocate(bool pcc = false): processCodeConstants(pcc), originalBaseAddr(0), descrs(0),
         targetAddresses(0), nDescrs(0), spaceTree(0) {}
     ~LoadRelocate();
 
     void RelocateObject(PolyObject *p);
     virtual PolyObject *ScanObjectAddress(PolyObject *base) { ASSERT(0); return base; } // Not used
     virtual void ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code);
     void RelocateAddressAt(PolyWord *pt);
     PolyObject *RelocateAddress(PolyObject *obj);
     void AddTreeRange(SpaceBTree **t, unsigned index, uintptr_t startS, uintptr_t endS);
 
     bool processCodeConstants;
     PolyWord *originalBaseAddr;
     SavedStateSegmentDescr *descrs;
     PolyWord **targetAddresses;
     unsigned nDescrs;
     SpaceBTree *spaceTree;
     intptr_t relativeOffset;
 };
 
 LoadRelocate::~LoadRelocate()
 {
     if (descrs) delete[](descrs);
     if (targetAddresses) delete[](targetAddresses);
     delete(spaceTree);
 }
 
 // Add an entry to the space B-tree.
 void LoadRelocate::AddTreeRange(SpaceBTree **tt, unsigned index, uintptr_t startS, uintptr_t endS)
 {
     if (*tt == 0)
         *tt = new SpaceBTreeTree;
     ASSERT(!(*tt)->isLeaf);
     SpaceBTreeTree *t = (SpaceBTreeTree*)*tt;
 
     const unsigned shift = (sizeof(void*) - 1) * 8; // Takes the high-order byte
     uintptr_t r = startS >> shift;
     ASSERT(r < 256);
     const uintptr_t s = endS == 0 ? 256 : endS >> shift;
     ASSERT(s >= r && s <= 256);
 
     if (r == s) // Wholly within this entry
         AddTreeRange(&(t->tree[r]), index, startS << 8, endS << 8);
     else
     {
         // Deal with any remainder at the start.
         if ((r << shift) != startS)
         {
             AddTreeRange(&(t->tree[r]), index, startS << 8, 0 /*End of range*/);
             r++;
         }
         // Whole entries.
         while (r < s)
         {
             ASSERT(t->tree[r] == 0);
             t->tree[r] = new SpaceBTree(true, index);
             r++;
         }
         // Remainder at the end.
         if ((s << shift) != endS)
             AddTreeRange(&(t->tree[r]), index, 0, endS << 8);
     }
 }
 
 
 // Update the addresses in a group of words.
 void LoadRelocate::RelocateAddressAt(PolyWord *pt)
 {
     PolyWord val = *pt;
     if (! val.IsTagged())
         *gMem.SpaceForAddress(pt)->writeAble(pt) = RelocateAddress(val.AsObjPtr(originalBaseAddr));
 }
 
 PolyObject *LoadRelocate::RelocateAddress(PolyObject *obj)
 {
     // Which segment is this address in?
     // N.B. As with SpaceForAddress we need to subtract 1 to point to the length word.
     uintptr_t t = (uintptr_t)((PolyWord*)obj - 1);
     SpaceBTree *tr = spaceTree;
 
     // Each level of the tree is either a leaf or a vector of trees.
     unsigned j = sizeof(void *) * 8;
     for (;;)
     {
         if (tr == 0) break;
         if (tr->isLeaf) {
             // It's in this segment: relocate it to the current position.
             unsigned i = tr->index;
             SavedStateSegmentDescr *descr = &descrs[i];
             PolyWord *newAddress = targetAddresses[descr->segmentIndex];
             ASSERT((char*)obj > descr->originalAddress &&
                 (char*)obj <= (char*)descr->originalAddress + descr->segmentSize);
             ASSERT(newAddress != 0);
             byte *setAddress = (byte*)newAddress + ((char*)obj - (char*)descr->originalAddress);
             return (PolyObject*)setAddress;
         }
         j -= 8;
         tr = ((SpaceBTreeTree*)tr)->tree[(t >> j) & 0xff];
     }
 
     // This should never happen.
     ASSERT(0);
     return 0;
 }
 
 // This is based on Exporter::relocateObject but does the reverse.
 // It attempts to adjust all the addresses in the object when it has
 // been read in.
 void LoadRelocate::RelocateObject(PolyObject *p)
 {
     if (p->IsByteObject())
     {
     }
     else if (p->IsCodeObject())
     {
         POLYUNSIGNED constCount;
         PolyWord *cp;
         ASSERT(! p->IsMutable() );
         p->GetConstSegmentForCode(cp, constCount);
         /* Now the constant area. */
         for (POLYUNSIGNED i = 0; i < constCount; i++) RelocateAddressAt(&(cp[i]));
         // Saved states and modules have relocation entries for constants in the code.
         // We can't use them when loading object files in 32-in-64 so have to process the
         // constants here.
         if (processCodeConstants)
         {
             POLYUNSIGNED length = p->Length();
             machineDependent->ScanConstantsWithinCode(p, p, length, this);
         }
     }
     else if (p->IsClosureObject())
     {
         // The first word is the address of the code.
         POLYUNSIGNED length = p->Length();
         *(PolyObject**)p = RelocateAddress(*(PolyObject**)p);
         for (POLYUNSIGNED i = sizeof(PolyObject*)/sizeof(PolyWord); i < length; i++)
             RelocateAddressAt(p->Offset(i));
     }
     else /* Ordinary objects, essentially tuples. */
     {
         POLYUNSIGNED length = p->Length();
         for (POLYUNSIGNED i = 0; i < length; i++) RelocateAddressAt(p->Offset(i));
     }
 }
 
 // Update addresses as constants within the code.
 void LoadRelocate::ScanConstant(PolyObject *base, byte *addressOfConstant, ScanRelocationKind code)
 {
     PolyObject *p = GetConstantValue(addressOfConstant, code, originalBaseAddr);
 
     if (p != 0)
     {
         // Relative addresses are computed by adding the CURRENT address.
         // We have to convert them into addresses in original space before we
         // can relocate them.
         if (code == PROCESS_RELOC_I386RELATIVE)
             p = (PolyObject*)((PolyWord*)p + relativeOffset);
         PolyObject *newValue = RelocateAddress(p);
         SetConstantValue(addressOfConstant, newValue, code);
     }
 }
 
 // Load a saved state file.  Calls itself to handle parent files.
 bool StateLoader::LoadFile(bool isInitial, time_t requiredStamp, PolyWord tail)
 {
     LoadRelocate relocate;
     AutoFree<TCHAR*> thisFile(_tcsdup(fileName));
 
     AutoClose loadFile(_tfopen(fileName, _T("rb")));
     if ((FILE*)loadFile == NULL)
     {
         errorResult = "Cannot open load file";
         errNumber = ERRORNUMBER;
         return false;
     }
 
     SavedStateHeader header;
     // Read the header and check the signature.
     if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1)
     {
         errorResult = "Unable to load header";
         return false;
     }
     if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0)
     {
         errorResult = "File is not a saved state";
         return false;
     }
     if (header.headerVersion != SAVEDSTATEVERSION ||
         header.headerLength != sizeof(SavedStateHeader) ||
         header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
     {
         errorResult = "Unsupported version of saved state file";
         return false;
     }
 
     // Check that we have the required stamp before loading any children.
     // If a parent has been overwritten we could get a loop.
     if (! isInitial && header.timeStamp != requiredStamp)
     {
         // Time-stamps don't match.
         errorResult = "The parent for this saved state does not match or has been changed";
         return false;
     }
 
     // Have verified that this is a reasonable saved state file.  If it isn't a
     // top-level file we have to load the parents first.
     if (header.parentNameEntry != 0)
     {
         if (isHierarchy)
         {
             // Take the file name from the list
             if (ML_Cons_Cell::IsNull(tail))
             {
                 errorResult = "Missing parent name in argument list";
                 return false;
             }
             ML_Cons_Cell *p = (ML_Cons_Cell *)tail.AsObjPtr();
             fileName = Poly_string_to_T_alloc(p->h);
             if (fileName == NULL)
             {
                 errorResult = "Insufficient memory";
                 errNumber = NOMEMORY;
                 return false;
             }
             if (! LoadFile(false, header.parentTimeStamp, p->t))
                 return false;
         }
         else
         {
             size_t toRead = header.stringTableSize-header.parentNameEntry;
             size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR));
             // Always allow space for null terminator
             size_t roundedBytes = (elems + 1) * sizeof(TCHAR);
             TCHAR *newFileName = (TCHAR *)realloc(fileName, roundedBytes);
             if (newFileName == NULL)
             {
                 errorResult = "Insufficient memory";
                 errNumber = NOMEMORY;
                 return false;
             }
             fileName = newFileName;
 
             if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ ||
                 fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 ||
                 fread(fileName, 1, toRead, loadFile) != toRead)
             {
                 errorResult = "Unable to read parent file name";
                 return false;
             }
             fileName[elems] = 0; // Should already be null-terminated, but just in case.
 
             if (! LoadFile(false, header.parentTimeStamp, TAGGED(0)))
                 return false;
         }
 
         ASSERT(hierarchyDepth > 0 && hierarchyTable[hierarchyDepth-1] != 0);
     }
     else // Top-level file
     {
         if (isHierarchy && ! ML_Cons_Cell::IsNull(tail))
         {
             // There should be no further file names if this is really the top.
             errorResult = "Too many file names in the list";
             return false;
         }
         if (header.parentTimeStamp != exportTimeStamp)
         {
             // Time-stamp does not match executable.
             errorResult = 
                     "Saved state was exported from a different executable or the executable has changed";
             return false;
         }
 
         // Any existing spaces at this level or greater must be turned
         // into local spaces.  We may have references from the stack to objects that
         // have previously been imported but otherwise these spaces are no longer
         // needed.
         gMem.DemoteImportSpaces();
         // Clean out the hierarchy table.
         for (unsigned h = 0; h < hierarchyDepth; h++)
         {
             delete(hierarchyTable[h]);
             hierarchyTable[h] = 0;
         }
         hierarchyDepth = 0;
     }
 
     // Now have a valid, matching saved state.
     // Load the segment descriptors.
     relocate.nDescrs = header.segmentDescrCount;
     relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs];
     relocate.originalBaseAddr = (PolyWord*)header.originalBaseAddr;
 
     if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 ||
         fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs)
     {
         errorResult = "Unable to read segment descriptors";
         return false;
     }
     {
         unsigned maxIndex = 0;
         for (unsigned i = 0; i < relocate.nDescrs; i++)
         {
             if (relocate.descrs[i].segmentIndex > maxIndex)
                 maxIndex = relocate.descrs[i].segmentIndex;
             relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress,
                 (uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize-1));
         }
         relocate.targetAddresses = new PolyWord*[maxIndex+1];
         for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0;
     }
 
     // Read in and create the new segments first.  If we have problems,
     // in particular if we have run out of memory, then it's easier to recover.  
     for (unsigned i = 0; i < relocate.nDescrs; i++)
     {
         SavedStateSegmentDescr *descr = &relocate.descrs[i];
         MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
         if (space != NULL) relocate.targetAddresses[descr->segmentIndex] = space->bottom;
 
         if (descr->segmentData == 0)
         { // No data - just an entry in the index.
             if (space == NULL/* ||
                 descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/)
             {
                 errorResult = "Mismatch for existing memory space";
                 return false;
             }
         }
         else if ((descr->segmentFlags & SSF_OVERWRITE) == 0)
         { // New segment.
             if (space != NULL)
             {
                 errorResult = "Segment already exists";
                 return false;
             }
             // Allocate memory for the new segment.
             unsigned mFlags =
                 (descr->segmentFlags & SSF_WRITABLE ? MTF_WRITEABLE : 0) |
                 (descr->segmentFlags & SSF_NOOVERWRITE ? MTF_NO_OVERWRITE : 0) |
                 (descr->segmentFlags & SSF_BYTES ? MTF_BYTES : 0) |
                 (descr->segmentFlags & SSF_CODE ? MTF_EXECUTABLE : 0);
             PermanentMemSpace *newSpace =
                 gMem.AllocateNewPermanentSpace(descr->segmentSize, mFlags, descr->segmentIndex, hierarchyDepth + 1);
             if (newSpace == 0)
             {
                 errorResult = "Unable to allocate memory";
                 return false;
             }
 
             PolyWord *mem  = newSpace->bottom;
             PolyWord* writeAble = newSpace->writeAble(mem);
             if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 ||
                 fread(writeAble, descr->segmentSize, 1, loadFile) != 1)
             {
                 errorResult = "Unable to read segment";
                 return false;
             }
             // Fill unused space to the top of the area.
             gMem.FillUnusedSpace(writeAble +descr->segmentSize/sizeof(PolyWord),
                 newSpace->spaceSize() - descr->segmentSize/sizeof(PolyWord));
             // Leave it writable until we've done the relocations.
 
             relocate.targetAddresses[descr->segmentIndex] = mem;
             if (newSpace->noOverwrite)
             {
                 ClearVolatile cwbr;
                 cwbr.ScanAddressesInRegion(newSpace->bottom, newSpace->topPointer);
             }
         }
     }
 
     // Now read in the mutable overwrites and relocate.
 
     for (unsigned j = 0; j < relocate.nDescrs; j++)
     {
         SavedStateSegmentDescr *descr = &relocate.descrs[j];
         MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
         ASSERT(space != NULL); // We should have created it.
         if (descr->segmentFlags & SSF_OVERWRITE)
         {
             if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 ||
                 fread(space->bottom, descr->segmentSize, 1, loadFile) != 1)
             {
                 errorResult = "Unable to read segment";
                 return false;
             }
         }
 
         // Relocation.
         if (descr->segmentData != 0)
         {
             // Adjust the addresses in the loaded segment.
             for (PolyWord *p = space->bottom; p < space->top; )
             {
                 p++;
                 PolyObject *obj = (PolyObject*)p;
                 POLYUNSIGNED length = obj->Length();
                 relocate.RelocateObject(obj);
                 p += length;
             }
         }
 
         // Process explicit relocations.
         // If we get errors just skip the error and continue rather than leave
         // everything in an unstable state.
         if (descr->relocations)
         {
             if (fseek(loadFile, descr->relocations, SEEK_SET) != 0)
             {
                 errorResult = "Unable to read relocation segment";
                 return false;
             }
             for (unsigned k = 0; k < descr->relocationCount; k++)
             {
                 RelocationEntry reloc;
                 if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1)
                 {
                     errorResult = "Unable to read relocation segment";
                     return false;
                 }
                 MemSpace *toSpace = gMem.SpaceForIndex(reloc.targetSegment);
                 if (toSpace == NULL)
                 {
                     errorResult = "Unknown space reference in relocation";
                     continue;
                 }
                 byte *setAddress = (byte*)space->bottom + reloc.relocAddress;
                 byte *targetAddress = (byte*)toSpace->bottom + reloc.targetAddress;
                 if (setAddress >= (byte*)space->top || targetAddress >= (byte*)toSpace->top)
                 {
                     errorResult = "Bad relocation";
                     continue;
                 }
                 ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind);
             }
         }
     }
 
     // Set the final permissions.
     for (unsigned j = 0; j < relocate.nDescrs; j++)
     {
         SavedStateSegmentDescr *descr = &relocate.descrs[j];
         if (descr->segmentData != 0)
         {
             PermanentMemSpace* space = gMem.SpaceForIndex(descr->segmentIndex);
             gMem.CompletePermanentSpaceAllocation(space);
         }
     }
 
     // Add an entry to the hierarchy table for this file.
     if (! AddHierarchyEntry(thisFile, header.timeStamp))
         return false;
 
     return true; // Succeeded
 }
 
 static void LoadState(TaskData *taskData, bool isHierarchy, Handle hFileList)
 // Load a saved state or a hierarchy.  
 // hFileList is a list if this is a hierarchy and a single name if it is not.
 {
     StateLoader loader(isHierarchy, hFileList);
     // Request the main thread to do the load.  This may set the error string if it failed.
     processes->MakeRootRequest(taskData, &loader);
 
     if (loader.errorResult != 0)
     {
         if (loader.errNumber == 0)
             raise_fail(taskData, loader.errorResult);
         else
         {
             AutoFree<char*> buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1));
 #if (defined(_WIN32) && defined(UNICODE))
             sprintf(buff, "%s: %S", loader.errorResult, (TCHAR *)loader.fileName);
 #else
             sprintf(buff, "%s: %s", loader.errorResult, (TCHAR *)loader.fileName);
 #endif
             raise_syscall(taskData, buff, loader.errNumber);
         }
     }
 }
 
 // Load a saved state file and any ancestors.
 POLYUNSIGNED PolyLoadState(FirstArgument threadId, PolyWord arg)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedArg = taskData->saveVec.push(arg);
 
     try {
         LoadState(taskData, false, pushedArg);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned();
 }
 
 // Load hierarchy.  This provides a complete list of children and parents.
 POLYUNSIGNED PolyLoadHierarchy(FirstArgument threadId, PolyWord arg)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedArg = taskData->saveVec.push(arg);
 
     try {
          LoadState(taskData, true, pushedArg);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned();
 }
 
 /*
  *  Additional functions to provide information or change saved-state files.
  */
 
 // These functions do not affect the global state so can be executed by
 // the ML threads directly.
 
 static Handle ShowHierarchy(TaskData *taskData)
 // Return the list of files in the hierarchy.
 {
     Handle saved = taskData->saveVec.mark();
     Handle list  = SAVE(ListNull);
 
     // Process this in reverse order.
     for (unsigned i = hierarchyDepth; i > 0; i--)
     {
         Handle value = SAVE(C_string_to_Poly(taskData, hierarchyTable[i-1]->fileName));
         Handle next  = alloc_and_save(taskData, sizeof(ML_Cons_Cell)/sizeof(PolyWord));
         DEREFLISTHANDLE(next)->h = value->Word();
         DEREFLISTHANDLE(next)->t = list->Word();
         taskData->saveVec.reset(saved);
         list = SAVE(next->Word());
     }
     return list;
 }
 
 // Show the hierarchy.
 POLYUNSIGNED PolyShowHierarchy(FirstArgument threadId)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle result = 0;
 
     try {
         result = ShowHierarchy(taskData);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     if (result == 0) return TAGGED(0).AsUnsigned();
     else return result->Word().AsUnsigned();
 }
 
 static void RenameParent(TaskData *taskData, PolyWord childName, PolyWord parentName)
 // Change the name of the immediate parent stored in a child
 {
     // The name of the file to modify.
     AutoFree<TCHAR*> fileNameBuff(Poly_string_to_T_alloc(childName));
     if (fileNameBuff == NULL)
         raise_syscall(taskData, "Insufficient memory", NOMEMORY);
     // The new parent name to insert.
     AutoFree<TCHAR*> parentNameBuff(Poly_string_to_T_alloc(parentName));
     if (parentNameBuff == NULL)
         raise_syscall(taskData, "Insufficient memory", NOMEMORY);
 
     AutoClose loadFile(_tfopen(fileNameBuff, _T("r+b"))); // Open for reading and writing
     if ((FILE*)loadFile == NULL)
     {
         AutoFree<char*> buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1));
 #if (defined(_WIN32) && defined(UNICODE))
         sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff);
 #else
         sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff);
 #endif
         raise_syscall(taskData, buff, ERRORNUMBER);
     }
 
     SavedStateHeader header;
     // Read the header and check the signature.
     if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1)
         raise_fail(taskData, "Unable to load header");
 
     if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0)
         raise_fail(taskData, "File is not a saved state");
 
     if (header.headerVersion != SAVEDSTATEVERSION ||
         header.headerLength != sizeof(SavedStateHeader) ||
         header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
     {
         raise_fail(taskData, "Unsupported version of saved state file");
     }
 
     // Does this actually have a parent?
     if (header.parentNameEntry == 0)
         raise_fail(taskData, "File does not have a parent");
 
     // At the moment the only entry in the string table is the parent
     // name so we can simply write a new one on the end of the file.
     // This makes the file grow slightly each time but it shouldn't be
     // significant.
     fseek(loadFile, 0, SEEK_END);
     header.stringTable = ftell(loadFile); // Remember where this is
     _fputtc(0, loadFile); // First byte of string table is zero
     _fputts(parentNameBuff, loadFile);
     _fputtc(0, loadFile); // A terminating null.
     header.stringTableSize = (_tcslen(parentNameBuff) + 2)*sizeof(TCHAR);
 
     // Now rewind and write the header with the revised string table.
     fseek(loadFile, 0, SEEK_SET);
     fwrite(&header, sizeof(header), 1, loadFile);
 }
 
 POLYUNSIGNED PolyRenameParent(FirstArgument threadId, PolyWord childName, PolyWord parentName)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
 
     try {
         RenameParent(taskData, childName, parentName);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned();
 }
 
 static Handle ShowParent(TaskData *taskData, Handle hFileName)
 // Return the name of the immediate parent stored in a child
 {
     AutoFree<TCHAR*> fileNameBuff(Poly_string_to_T_alloc(hFileName->Word()));
     if (fileNameBuff == NULL)
         raise_syscall(taskData, "Insufficient memory", NOMEMORY);
 
     AutoClose loadFile(_tfopen(fileNameBuff, _T("rb")));
     if ((FILE*)loadFile == NULL)
     {
         AutoFree<char*> buff((char *)malloc(23 + _tcslen(fileNameBuff) * sizeof(TCHAR) + 1));
         if (buff == NULL)
             raise_syscall(taskData, "Insufficient memory", NOMEMORY);
 #if (defined(_WIN32) && defined(UNICODE))
         sprintf(buff, "Cannot open load file: %S", (TCHAR *)fileNameBuff);
 #else
         sprintf(buff, "Cannot open load file: %s", (TCHAR *)fileNameBuff);
 #endif
         raise_syscall(taskData, buff, ERRORNUMBER);
     }
 
     SavedStateHeader header;
     // Read the header and check the signature.
     if (fread(&header, sizeof(SavedStateHeader), 1, loadFile) != 1)
         raise_fail(taskData, "Unable to load header");
 
     if (strncmp(header.headerSignature, SAVEDSTATESIGNATURE, sizeof(header.headerSignature)) != 0)
         raise_fail(taskData, "File is not a saved state");
 
     if (header.headerVersion != SAVEDSTATEVERSION ||
         header.headerLength != sizeof(SavedStateHeader) ||
         header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
     {
         raise_fail(taskData, "Unsupported version of saved state file");
     }
 
     // Does this have a parent?
     if (header.parentNameEntry != 0)
     {
         size_t toRead = header.stringTableSize-header.parentNameEntry;
         size_t elems = ((toRead + sizeof(TCHAR) - 1) / sizeof(TCHAR));
         // Always allow space for null terminator
         size_t roundedBytes = (elems + 1) * sizeof(TCHAR);
         AutoFree<TCHAR*> parentFileName((TCHAR *)malloc(roundedBytes));
         if (parentFileName == NULL)
             raise_syscall(taskData, "Insufficient memory", NOMEMORY);
 
         if (header.parentNameEntry >= header.stringTableSize /* Bad entry */ ||
             fseek(loadFile, header.stringTable + header.parentNameEntry, SEEK_SET) != 0 ||
             fread(parentFileName, 1, toRead, loadFile) != toRead)
         {
             raise_fail(taskData, "Unable to read parent file name");
         }
         parentFileName[elems] = 0; // Should already be null-terminated, but just in case.
         // Convert the name into a Poly string and then build a "Some" value.
         // It's possible, although silly, to have the empty string as a parent name.
         Handle resVal = SAVE(C_string_to_Poly(taskData, parentFileName));
         Handle result = alloc_and_save(taskData, 1);
         DEREFHANDLE(result)->Set(0, resVal->Word());
         return result;
     }
     else return SAVE(NONE_VALUE);
 }
 
 // Return the name of the immediate parent stored in a child
 POLYUNSIGNED PolyShowParent(FirstArgument threadId, PolyWord arg)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedArg = taskData->saveVec.push(arg);
     Handle result = 0;
 
     try {
         result = ShowParent(taskData, pushedArg);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     if (result == 0) return TAGGED(0).AsUnsigned();
     else return result->Word().AsUnsigned();
 }
 
 // Module system
 #define MODULESIGNATURE "POLYMODU"
 #define MODULEVERSION   2
 
 typedef struct _moduleHeader
 {
     // These entries are primarily to check that we have a valid
     // saved state file before we try to interpret anything else.
     char        headerSignature[8];     // Should contain MODULESIGNATURE
     unsigned    headerVersion;          // Should contain MODULEVERSION
     unsigned    headerLength;           // Number of bytes in the header
     unsigned    segmentDescrLength;     // Number of bytes in a descriptor
 
     // These entries contain the real data.
     off_t       segmentDescr;           // Position of segment descriptor table
     unsigned    segmentDescrCount;      // Number of segment descriptors in the table
     time_t      timeStamp;              // The time stamp for this file.
     time_t      executableTimeStamp;    // The time stamp for the parent executable.
     // Root
     uintptr_t   rootSegment;
     POLYUNSIGNED     rootOffset;
 } ModuleHeader;
 
 // Store a module
 class ModuleStorer: public MainThreadRequest
 {
 public:
     ModuleStorer(const TCHAR *file, Handle r):
         MainThreadRequest(MTP_STOREMODULE), fileName(file), root(r), errorMessage(0), errCode(0) {}
 
     virtual void Perform();
 
     const TCHAR *fileName;
     Handle root;
     const char *errorMessage;
     int errCode;
 };
 
 class ModuleExport: public SaveStateExport
 {
 public:
     ModuleExport(): SaveStateExport(1/* Everything EXCEPT the executable. */) {}
     virtual void exportStore(void); // Write the data out.
 };
 
 void ModuleStorer::Perform()
 {
     ModuleExport exporter;
 #if (defined(_WIN32) && defined(UNICODE))
     exporter.exportFile = _wfopen(fileName, L"wb");
 #else
     exporter.exportFile = fopen(fileName, "wb");
 #endif
     if (exporter.exportFile == NULL)
     {
         errorMessage = "Cannot open export file";
         errCode = ERRORNUMBER;
         return;
     }
     // RunExport copies everything reachable from the root, except data from
     // the executable because we've set the hierarchy to 1, using CopyScan.
     // It builds the tables in the export data structure then calls exportStore
     // to actually write the data.
     if (! root->Word().IsDataPtr())
     {
         // If we have a completely empty module the list may be null.
         // This needs to be dealt with at a higher level.
         errorMessage = "Module root is not an address";
         return;
     }
     exporter.RunExport(root->WordP());
     errorMessage = exporter.errorMessage; // This will be null unless there's been an error.
 }
 
 void ModuleExport::exportStore(void)
 {
     // What we need to do here is implement the export in a similar way to e.g. PECOFFExport::exportStore
     // This is copied from SaveRequest::Perform and should be common code.
     ModuleHeader modHeader;
     memset(&modHeader, 0, sizeof(modHeader));
     modHeader.headerLength = sizeof(modHeader);
     memcpy(modHeader.headerSignature,
         MODULESIGNATURE, sizeof(modHeader.headerSignature));
     modHeader.headerVersion = MODULEVERSION;
     modHeader.segmentDescrLength = sizeof(SavedStateSegmentDescr);
     modHeader.executableTimeStamp = exportTimeStamp;
     {
         unsigned rootArea = findArea(this->rootFunction);
         struct _memTableEntry *mt = &memTable[rootArea];
         modHeader.rootSegment = mt->mtIndex;
         modHeader.rootOffset = (POLYUNSIGNED)((char*)this->rootFunction - (char*)mt->mtOriginalAddr);
     }
     modHeader.timeStamp = getBuildTime();
     modHeader.segmentDescrCount = this->memTableEntries; // One segment for each space.
     // Write out the header.
     fwrite(&modHeader, sizeof(modHeader), 1, this->exportFile);
 
     SavedStateSegmentDescr *descrs = new SavedStateSegmentDescr [this->memTableEntries];
     // We need an entry in the descriptor tables for each segment in the executable because
     // we may have relocations that refer to addresses in it.
     for (unsigned j = 0; j < this->memTableEntries; j++)
     {
         SavedStateSegmentDescr *thisDescr = &descrs[j];
         memoryTableEntry *entry = &this->memTable[j];
         memset(thisDescr, 0, sizeof(SavedStateSegmentDescr));
         thisDescr->relocationSize = sizeof(RelocationEntry);
         thisDescr->segmentIndex = (unsigned)entry->mtIndex;
         thisDescr->segmentSize = entry->mtLength; // Set this even if we don't write it.
         thisDescr->originalAddress = entry->mtOriginalAddr;
         if (entry->mtFlags & MTF_WRITEABLE)
         {
             thisDescr->segmentFlags |= SSF_WRITABLE;
             if (entry->mtFlags & MTF_NO_OVERWRITE)
                 thisDescr->segmentFlags |= SSF_NOOVERWRITE;
             if ((entry->mtFlags & MTF_NO_OVERWRITE) == 0)
                 thisDescr->segmentFlags |= SSF_OVERWRITE;
             if (entry->mtFlags & MTF_BYTES)
                 thisDescr->segmentFlags |= SSF_BYTES;
         }
         if (entry->mtFlags & MTF_EXECUTABLE)
              thisDescr->segmentFlags |= SSF_CODE;
     }
     // Write out temporarily. Will be overwritten at the end.
     modHeader.segmentDescr = ftell(this->exportFile);
     fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, this->exportFile);
 
     // Write out the relocations and the data.
     for (unsigned k = 0; k < this->memTableEntries; k++)
     {
         SavedStateSegmentDescr *thisDescr = &descrs[k];
         memoryTableEntry *entry = &this->memTable[k];
         if (k >= newAreas) // Not permanent areas
         {
             thisDescr->relocations = ftell(this->exportFile);
             // Have to write this out.
             this->relocationCount = 0;
             // Create the relocation table.
             char *start = (char*)entry->mtOriginalAddr;
             char *end = start + entry->mtLength;
             for (PolyWord *p = (PolyWord*)start; p < (PolyWord*)end; )
             {
                 p++;
                 PolyObject *obj = (PolyObject*)p;
                 POLYUNSIGNED length = obj->Length();
                 // For saved states we don't include explicit relocations except
                 // in code but it's easier if we do for modules.
                 if (length != 0 && obj->IsCodeObject())
                     machineDependent->ScanConstantsWithinCode(obj, this);
                 relocateObject(obj);
                 p += length;
             }
             thisDescr->relocationCount = this->relocationCount;
             // Write out the data.
             thisDescr->segmentData = ftell(exportFile);
             fwrite(entry->mtOriginalAddr, entry->mtLength, 1, exportFile);
         }
     }
 
     // Rewrite the header and the segment tables now they're complete.
     fseek(exportFile, 0, SEEK_SET);
     fwrite(&modHeader, sizeof(modHeader), 1, exportFile);
     fwrite(descrs, sizeof(SavedStateSegmentDescr), this->memTableEntries, exportFile);
     delete[](descrs);
 
     fclose(exportFile); exportFile = NULL;
 }
 
 // Store a module
 POLYUNSIGNED PolyStoreModule(FirstArgument threadId, PolyWord name, PolyWord contents)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedContents = taskData->saveVec.push(contents);
 
     try {
         TempString fileName(name);
         ModuleStorer storer(fileName, pushedContents);
         processes->MakeRootRequest(taskData, &storer);
         if (storer.errorMessage)
             raise_syscall(taskData, storer.errorMessage, storer.errCode);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned();
 }
 
 // Load a module.
 class ModuleLoader: public MainThreadRequest
 {
 public:
     ModuleLoader(TaskData *taskData, const TCHAR *file):
         MainThreadRequest(MTP_LOADMODULE), callerTaskData(taskData), fileName(file),
             errorResult(NULL), errNumber(0), rootHandle(0) {}
 
     virtual void Perform();
 
     TaskData *callerTaskData;
     const TCHAR *fileName;
     const char *errorResult;
     int errNumber;
     Handle rootHandle;
 };
 
 void ModuleLoader::Perform()
 {
     AutoClose loadFile(_tfopen(fileName, _T("rb")));
     if ((FILE*)loadFile == NULL)
     {
         errorResult = "Cannot open load file";
         errNumber = ERRORNUMBER;
         return;
     }
 
     ModuleHeader header;
     // Read the header and check the signature.
     if (fread(&header, sizeof(ModuleHeader), 1, loadFile) != 1)
     {
         errorResult = "Unable to load header";
         return;
     }
     if (strncmp(header.headerSignature, MODULESIGNATURE, sizeof(header.headerSignature)) != 0)
     {
         errorResult = "File is not a Poly/ML module";
         return;
     }
     if (header.headerVersion != MODULEVERSION ||
         header.headerLength != sizeof(ModuleHeader) ||
         header.segmentDescrLength != sizeof(SavedStateSegmentDescr))
     {
         errorResult = "Unsupported version of module file";
         return;
     }
     if (header.executableTimeStamp != exportTimeStamp)
     {
         // Time-stamp does not match executable.
         errorResult = 
                 "Module was exported from a different executable or the executable has changed";
         return;
     }
     LoadRelocate relocate;
     relocate.nDescrs = header.segmentDescrCount;
     relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs];
 
     if (fseek(loadFile, header.segmentDescr, SEEK_SET) != 0 ||
         fread(relocate.descrs, sizeof(SavedStateSegmentDescr), relocate.nDescrs, loadFile) != relocate.nDescrs)
     {
         errorResult = "Unable to read segment descriptors";
         return;
     }
     {
         unsigned maxIndex = 0;
         for (unsigned i = 0; i < relocate.nDescrs; i++)
             if (relocate.descrs[i].segmentIndex > maxIndex)
                 maxIndex = relocate.descrs[i].segmentIndex;
         relocate.targetAddresses = new PolyWord*[maxIndex+1];
         for (unsigned i = 0; i <= maxIndex; i++) relocate.targetAddresses[i] = 0;
     }
 
     // Read in and create the new segments first.  If we have problems,
     // in particular if we have run out of memory, then it's easier to recover.  
     for (unsigned i = 0; i < relocate.nDescrs; i++)
     {
         SavedStateSegmentDescr *descr = &relocate.descrs[i];
         MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
 
         if (descr->segmentData == 0)
         { // No data - just an entry in the index.
             if (space == NULL/* ||
                 descr->segmentSize != (size_t)((char*)space->top - (char*)space->bottom)*/)
             {
                 errorResult = "Mismatch for existing memory space";
                 return;
             }
             else relocate.targetAddresses[descr->segmentIndex] = space->bottom;
         }
         else
         { // New segment.
             if (space != NULL)
             {
                 errorResult = "Segment already exists";
                 return;
             }
             // Allocate memory for the new segment.
             size_t actualSize = descr->segmentSize;
             MemSpace *space;
             if (descr->segmentFlags & SSF_CODE)
             {
                 CodeSpace *cSpace = gMem.NewCodeSpace(actualSize);
                 if (cSpace == 0)
                 {
                     errorResult = "Unable to allocate memory";
                     return;
                 }
                 space = cSpace;
                 cSpace->firstFree = (PolyWord*)((byte*)space->bottom + descr->segmentSize);
                 if (cSpace->firstFree != cSpace->top)
                     gMem.FillUnusedSpace(cSpace->firstFree, cSpace->top - cSpace->firstFree);
             }
             else
             {
                 LocalMemSpace *lSpace = gMem.NewLocalSpace(actualSize, descr->segmentFlags & SSF_WRITABLE);
                 if (lSpace == 0)
                 {
                     errorResult = "Unable to allocate memory";
                     return;
                 }
                 space = lSpace;
                 lSpace->lowerAllocPtr = (PolyWord*)((byte*)lSpace->bottom + descr->segmentSize);
             }
             if (fseek(loadFile, descr->segmentData, SEEK_SET) != 0 ||
                 fread(space->bottom, descr->segmentSize, 1, loadFile) != 1)
             {
                 errorResult = "Unable to read segment";
                 return;
             }
             relocate.targetAddresses[descr->segmentIndex] = space->bottom;
             if (space->isMutable && (descr->segmentFlags & SSF_BYTES) != 0)
             {
                 ClearVolatile cwbr;
                 cwbr.ScanAddressesInRegion(space->bottom, (PolyWord*)((byte*)space->bottom + descr->segmentSize));
             }
         }
     }
     // Now deal with relocation.
     for (unsigned j = 0; j < relocate.nDescrs; j++)
     {
         SavedStateSegmentDescr *descr = &relocate.descrs[j];
         PolyWord *baseAddr = relocate.targetAddresses[descr->segmentIndex];
         ASSERT(baseAddr != NULL); // We should have created it.
         // Process explicit relocations.
         // If we get errors just skip the error and continue rather than leave
         // everything in an unstable state.
         if (descr->relocations)
         {
             if (fseek(loadFile, descr->relocations, SEEK_SET) != 0)
                 errorResult = "Unable to read relocation segment";
             for (unsigned k = 0; k < descr->relocationCount; k++)
             {
                 RelocationEntry reloc;
                 if (fread(&reloc, sizeof(reloc), 1, loadFile) != 1)
                     errorResult = "Unable to read relocation segment";
                 byte *setAddress = (byte*)baseAddr + reloc.relocAddress;
                 byte *targetAddress = (byte*)relocate.targetAddresses[reloc.targetSegment] + reloc.targetAddress;
                 ScanAddress::SetConstantValue(setAddress, (PolyObject*)(targetAddress), reloc.relKind);
             }
         }
     }
 
     // Get the root address.  Push this to the caller's save vec.  If we put the
     // newly created areas into local memory we could get a GC as soon as we
     // complete this root request.
     {
         PolyWord *baseAddr = relocate.targetAddresses[header.rootSegment];
         rootHandle = callerTaskData->saveVec.push((PolyObject*)((byte*)baseAddr + header.rootOffset));
     }
 }
 
 static Handle LoadModule(TaskData *taskData, Handle args)
 {
     TempString fileName(args->Word());
     ModuleLoader loader(taskData, fileName);
     processes->MakeRootRequest(taskData, &loader);
 
     if (loader.errorResult != 0)
     {
         if (loader.errNumber == 0)
             raise_fail(taskData, loader.errorResult);
         else
         {
             AutoFree<char*> buff((char *)malloc(strlen(loader.errorResult) + 2 + _tcslen(loader.fileName) * sizeof(TCHAR) + 1));
 #if (defined(_WIN32) && defined(UNICODE))
             sprintf(buff, "%s: %S", loader.errorResult, loader.fileName);
 #else
             sprintf(buff, "%s: %s", loader.errorResult, loader.fileName);
 #endif
             raise_syscall(taskData, buff, loader.errNumber);
         }
     }
 
     return loader.rootHandle;
 }
 
 // Load a module
 POLYUNSIGNED PolyLoadModule(FirstArgument threadId, PolyWord arg)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle pushedArg = taskData->saveVec.push(arg);
     Handle result = 0;
 
     try {
         result = LoadModule(taskData, pushedArg);
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     if (result == 0) return TAGGED(0).AsUnsigned();
     else return result->Word().AsUnsigned();
 }
 
 
 PolyObject *InitHeaderFromExport(struct _exportDescription *exports)
 {
     // Check the structure sizes stored in the export structure match the versions
     // used in this library.
     if (exports->structLength != sizeof(exportDescription) ||
         exports->memTableSize != sizeof(memoryTableEntry) ||
         exports->rtsVersion < FIRST_supported_version ||
         exports->rtsVersion > LAST_supported_version)
     {
 #if (FIRST_supported_version == LAST_supported_version)
         Exit("The exported object file has version %0.2f but this library supports %0.2f",
             ((float)exports->rtsVersion) / 100.0,
             ((float)FIRST_supported_version) / 100.0);
 #else
         Exit("The exported object file has version %0.2f but this library supports %0.2f-%0.2f",
             ((float)exports->rtsVersion) / 100.0,
             ((float)FIRST_supported_version) / 100.0,
             ((float)LAST_supported_version) / 100.0);
 #endif
     }
     // We could also check the RTS version and the architecture.
     exportTimeStamp = exports->timeStamp; // Needed for load and save.
 
     memoryTableEntry *memTable = exports->memTable;
 #ifdef POLYML32IN64
     // We need to copy this into the heap before beginning execution.
     // This is very like loading a saved state and the code should probably
     // be merged.
     LoadRelocate relocate(true);
     relocate.nDescrs = exports->memTableEntries;
     relocate.descrs = new SavedStateSegmentDescr[relocate.nDescrs];
     relocate.targetAddresses = new PolyWord*[exports->memTableEntries];
     relocate.originalBaseAddr = (PolyWord*)exports->originalBaseAddr;
 
     PolyObject *root = 0;
 
     for (unsigned i = 0; i < exports->memTableEntries; i++)
     {
         relocate.descrs[i].segmentIndex = memTable[i].mtIndex;
         relocate.descrs[i].originalAddress = memTable[i].mtOriginalAddr;
         relocate.descrs[i].segmentSize = memTable[i].mtLength;
         PermanentMemSpace *newSpace =
             gMem.AllocateNewPermanentSpace(memTable[i].mtLength, (unsigned)memTable[i].mtFlags, (unsigned)memTable[i].mtIndex);
         if (newSpace == 0)
             Exit("Unable to initialise a permanent memory space");
 
         PolyWord *mem = newSpace->bottom;
         memcpy(newSpace->writeAble(mem), memTable[i].mtCurrentAddr, memTable[i].mtLength);
         PolyWord* unused = mem + memTable[i].mtLength / sizeof(PolyWord);
         gMem.FillUnusedSpace(newSpace->writeAble(unused),
             newSpace->spaceSize() - memTable[i].mtLength / sizeof(PolyWord));
 
         if (newSpace == 0)
             Exit("Unable to initialise a permanent memory space");
 
         relocate.targetAddresses[i] = mem;
         relocate.AddTreeRange(&relocate.spaceTree, i, (uintptr_t)relocate.descrs[i].originalAddress,
             (uintptr_t)((char*)relocate.descrs[i].originalAddress + relocate.descrs[i].segmentSize - 1));
 
         // Relocate the root function.
         if (exports->rootFunction >= memTable[i].mtCurrentAddr && exports->rootFunction < (char*)memTable[i].mtCurrentAddr + memTable[i].mtLength)
         {
             root = (PolyObject*)((char*)mem + ((char*)exports->rootFunction - (char*)memTable[i].mtCurrentAddr));
         }
     }
 
     // Now relocate the addresses
     for (unsigned j = 0; j < exports->memTableEntries; j++)
     {
         SavedStateSegmentDescr *descr = &relocate.descrs[j];
         MemSpace *space = gMem.SpaceForIndex(descr->segmentIndex);
         // Any relative addresses have to be corrected by adding this.
         relocate.relativeOffset = (PolyWord*)descr->originalAddress - space->bottom;
         for (PolyWord *p = space->bottom; p < space->top; )
         {
 #ifdef POLYML32IN64
             if ((((uintptr_t)p) & 4) == 0)
             {
                 // Skip any padding.  The length word should be on an odd-word boundary.
                 p++;
                 continue;
             }
 #endif
             p++;
             PolyObject *obj = (PolyObject*)p;
             POLYUNSIGNED length = obj->Length();
             relocate.RelocateObject(obj);
             p += length;
         }
     }
 
     // Set the final permissions.
     for (unsigned j = 0; j < exports->memTableEntries; j++)
     {
         PermanentMemSpace *space = gMem.SpaceForIndex(memTable[j].mtIndex);
         gMem.CompletePermanentSpaceAllocation(space);
     }
 
     return root;
 
 #else
     for (unsigned i = 0; i < exports->memTableEntries; i++)
     {
         // Construct a new space for each of the entries.
         if (gMem.NewPermanentSpace(
             (PolyWord*)memTable[i].mtCurrentAddr,
             memTable[i].mtLength / sizeof(PolyWord), (unsigned)memTable[i].mtFlags,
             (unsigned)memTable[i].mtIndex) == 0)
             Exit("Unable to initialise a permanent memory space");
     }
     return (PolyObject *)exports->rootFunction;
 #endif
 }
 
 // Return the system directory for modules.  This is configured differently
 // in Unix and in Windows.
 POLYUNSIGNED PolyGetModuleDirectory(FirstArgument threadId)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
     Handle result = 0;
 
     try {
 #if (defined(MODULEDIR))
         result = SAVE(C_string_to_Poly(taskData, MODULEDIR));
 #elif (defined(_WIN32))
         {
             // This registry key is configured when Poly/ML is installed using the installer.
             // It gives the path to the Poly/ML installation directory.  We return the
             // Modules subdirectory.
             HKEY hk;
             if (RegOpenKeyEx(HKEY_LOCAL_MACHINE,
                 _T("SOFTWARE\\Microsoft\\Windows\\CurrentVersion\\App Paths\\PolyML.exe"), 0,
                 KEY_QUERY_VALUE, &hk) == ERROR_SUCCESS)
             {
                 DWORD valSize;
                 if (RegQueryValueEx(hk, _T("Path"), 0, NULL, NULL, &valSize) == ERROR_SUCCESS)
                 {
 #define MODULEDIR _T("Modules")
                     TempString buff((TCHAR*)malloc(valSize + (_tcslen(MODULEDIR) + 1) * sizeof(TCHAR)));
                     DWORD dwType;
                     if (RegQueryValueEx(hk, _T("Path"), 0, &dwType, (LPBYTE)(LPTSTR)buff, &valSize) == ERROR_SUCCESS)
                     {
                         // The registry entry should end with a backslash.
                         _tcscat(buff, MODULEDIR);
                         result = SAVE(C_string_to_Poly(taskData, buff));
                     }
                 }
                 RegCloseKey(hk);
             }
             result = SAVE(C_string_to_Poly(taskData, ""));
         }
 #else
         result = SAVE(C_string_to_Poly(taskData, ""));
 #endif
     }
     catch (...) {} // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     if (result == 0) return TAGGED(0).AsUnsigned();
     else return result->Word().AsUnsigned();
 }
 
 struct _entrypts savestateEPT[] =
 {
     { "PolySaveState",                  (polyRTSFunction)&PolySaveState },
     { "PolyLoadState",                  (polyRTSFunction)&PolyLoadState },
     { "PolyShowHierarchy",              (polyRTSFunction)&PolyShowHierarchy },
     { "PolyRenameParent",               (polyRTSFunction)&PolyRenameParent },
     { "PolyShowParent",                 (polyRTSFunction)&PolyShowParent },
     { "PolyStoreModule",                (polyRTSFunction)&PolyStoreModule },
     { "PolyLoadModule",                 (polyRTSFunction)&PolyLoadModule },
     { "PolyLoadHierarchy",              (polyRTSFunction)&PolyLoadHierarchy },
     { "PolyGetModuleDirectory",         (polyRTSFunction)&PolyGetModuleDirectory },
 
     { NULL, NULL } // End of list.
 };
 
diff --git a/libpolyml/sharedata.cpp b/libpolyml/sharedata.cpp
index 2a3b6f9c..efa47528 100644
--- a/libpolyml/sharedata.cpp
+++ b/libpolyml/sharedata.cpp
@@ -1,1123 +1,1123 @@
 /*
     Title:      Share common immutable data
 
     Copyright (c) 2000
         Cambridge University Technical Services Limited
     and David C. J. Matthews 2006, 2010-13, 2016-17, 2019
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
 
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
 
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 
 #ifdef HAVE_ASSERT_H
 #include <assert.h>
 #define ASSERT(x)   assert(x)
 #else
 #define ASSERT(x)
 #endif
 
 #ifdef HAVE_STRING_H
 #include <string.h>
 #endif
 
 #include <vector>
 
 #include "globals.h"
 #include "save_vec.h"
 #include "machine_dep.h"
 #include "scanaddrs.h"
 #include "run_time.h"
 #include "sys.h"
 #include "gc.h"
 #include "rtsentry.h"
 #include "memmgr.h"
 #include "processes.h"
 #include "gctaskfarm.h"
 #include "diagnostics.h"
 #include "sharedata.h"
 #include "gc_progress.h"
 
 /*
 This code was largely written by Simon Finn as a database improver for the
 memory-mapped persistent store version.  The aim is that where two immutable
 objects (cells) contain the same data (i.e. where ML equality would say they
 were equal) they should be merged so that only a single object is retained.
 
 The basic algorithm works like this:
 1. From the root, recursively process all objects and calculate a "depth"
    for each object.  Mutable data and code segments have depth 0 and
    cannot be merged.  Byte segments (e.g. strings and long-format arbitrary
    precision values) have depth 1.  Other cells have depths of 1 or greater,
    the depth being the maximum recursion depth until a byte segment or an
    object with depth 0 is reached.  Cycles of immutable data don't arise
    normally in ML but could be produced as a result of locking mutable objects.
    To avoid infinite recursion cycles are broken by setting the depth of an
    object to zero before processing it.  The depth of each object is stored
    in the length word of the object.  This ensures each object is processed
    once only.
 2. Vectors are created containing objects of the same depth, from 1 to the
    maximum depth found.
 3. We begin a loop starting at depth 1.
 4. The length words are restored, replacing the depth count in the header.
 5. The objects are sorted by their contents so bringing together objects
    with the same contents.  The contents are considered simply as
    uninterpreted bits.
 6. The sorted vector is processed to find those objects that are actually
    bitwise equal.  One object is selected to be retained and other objects
    have their length words turned into tombstones pointing at the retained
    object.
 7. Objects at the next depth are first processed to find pointers to objects
    that moved in the previous step (or that step with a lower depth).  The
    addresses are updated to point to the retained object.  The effect of this
    step is to ensure that now two objects that are equal in ML terms have
    identical contents.
    e.g. If we have
       val a = ("abc", "def") and b = ("abc", "def")
    then we will have merged the two occurrences of "abc" and "def" in the
    previous pass of level 1 objects.  This step ensures that the two cells
    containing the pairs both hold pointers to the same objects and so are
    bitwise equal.
 8. Repeat with 4, 5 and 6 until all the levels have been processed.
 
 Each object is processed once and at the end most of the objects have been
 updated with the shared addresses.  We have to scan all the mutable and
 code objects to update the addresses but also have to scan the immutables
 because of the possibility of missing an update as a result of breaking a
 loop (see SPF's comment below).
 DCJM 3/8/06
 
 This has been substantially updated while retaining the basic algorithm.
 Sorting is now done in parallel by the GC task farm and the stack is
 now in dynamic memory.  That avoids a possible segfault if the normal
 C stack overflows.
 
 A further problem is that the vectors can get very large and this
 can cause problems if there is insufficient contiguous space.
 The code has been modified to reduce the size of the vectors
 at the cost of increasing the total memory requirement.
 */
 
 extern "C" {
     POLYEXTERNALSYMBOL POLYUNSIGNED PolyShareCommonData(FirstArgument threadId, PolyWord root);
 }
 
 // The depth is stored in the length field.  If the Weak bit is set but the Mutable bit
 // is clear the value in the length word is a depth rather than a real length.
 // The tombstone bit is zero.
 // Previously "depth" values were encoded with the tombstone bit set but that isn't
 // possible in 32-in-64 because we need 31 bits in a forwarding pointer.
 inline bool OBJ_IS_DEPTH(POLYUNSIGNED L) { return (L & (_OBJ_WEAK_BIT| _OBJ_MUTABLE_BIT)) == _OBJ_WEAK_BIT; }
 inline POLYUNSIGNED OBJ_GET_DEPTH(POLYUNSIGNED L) { return OBJ_OBJECT_LENGTH(L); }
 inline POLYUNSIGNED OBJ_SET_DEPTH(POLYUNSIGNED n) { return n | _OBJ_WEAK_BIT; }
 
 // The DepthVector type contains all the items of a particular depth.
 // This is the abstract class.  There are variants for the case where all
 // the cells have the same size and where they may vary.
 class DepthVector {
 public:
     DepthVector() : nitems(0), vsize(0), ptrVector(0) {}
 
     virtual ~DepthVector() { free(ptrVector);  }
     virtual POLYUNSIGNED MergeSameItems(void);
     virtual void Sort(void);
     virtual POLYUNSIGNED ItemCount(void) { return nitems; }
 
     virtual void AddToVector(POLYUNSIGNED L, PolyObject *pt) = 0;
 
     void FixLengthAndAddresses(ScanAddress *scan);
 
     virtual void RestoreForwardingPointers() = 0;
 
 protected:
     POLYUNSIGNED    nitems;
     POLYUNSIGNED    vsize;
     PolyObject      **ptrVector;
 
     // This must only be called BEFORE sorting.  The pointer vector will be
     // modified by sorting but the length vector is not.
     virtual void RestoreLengthWords(void) = 0;
 
     static void SortRange(PolyObject * *first, PolyObject * *last);
 
     static int CompareItems(const PolyObject * const *a, const PolyObject * const *b);
 
     static int qsCompare(const void *a, const void *b)
     {
         return CompareItems((const PolyObject * const*)a, (const PolyObject *const *)b);
     }
 
     static void sortTask(GCTaskId*, void *s, void *l)
     {
         SortRange((PolyObject **)s, (PolyObject **)l);
     }
 };
 
 // DepthVector where the size needs to be held for each item.
 class DepthVectorWithVariableLength: public DepthVector {
 public:
     DepthVectorWithVariableLength() : lengthVector(0) {}
     virtual ~DepthVectorWithVariableLength() { free(lengthVector); }
 
     virtual void RestoreLengthWords(void);
     virtual void AddToVector(POLYUNSIGNED L, PolyObject *pt);
     virtual void RestoreForwardingPointers();
 
 protected:
     POLYUNSIGNED    *lengthVector; // Same size as the ptrVector
 };
 
 class DepthVectorWithFixedLength : public DepthVector {
 public:
     DepthVectorWithFixedLength(POLYUNSIGNED l) : length(l) {}
 
     virtual void RestoreLengthWords(void);
     virtual void AddToVector(POLYUNSIGNED L, PolyObject *pt);
 
     // It's safe to run this again for the fixed length vectors.
     virtual void RestoreForwardingPointers() { RestoreLengthWords(); }
 
 protected:
     POLYUNSIGNED length;
 };
 
 
 // We have special vectors for the sizes from 1 to FIXEDLENGTHSIZE-1.
 // Zero-sized and large objects go in depthVectorArray[0].
 #define FIXEDLENGTHSIZE     10
 
 class ShareDataClass {
 public:
     ShareDataClass();
     ~ShareDataClass();
 
     bool RunShareData(PolyObject *root);
     void AddToVector(POLYUNSIGNED depth, POLYUNSIGNED length, PolyObject *pt);
 
 private:
 	std::vector<DepthVector*> depthVectorArray[FIXEDLENGTHSIZE];
 
     POLYUNSIGNED maxVectorSize;
 };
 
 ShareDataClass::ShareDataClass()
 {
     maxVectorSize = 0;
 }
 
 ShareDataClass::~ShareDataClass()
 {
     // Free the bitmaps associated with the permanent spaces.
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
        (*i)->shareBitmap.Destroy();
 
     // Free the depth vectors.
     for (unsigned i = 0; i < FIXEDLENGTHSIZE; i++)
     {
 		for (std::vector <DepthVector*>::iterator j = depthVectorArray[i].begin(); j < depthVectorArray[i].end(); j++)
 			delete(*j);
 	}
 }
 
 // Grow the appropriate depth vector if necessary and add the item to it.
 void ShareDataClass::AddToVector(POLYUNSIGNED depth, POLYUNSIGNED length, PolyObject *pt)
 {
     // Select the appropriate vector.  Element zero is the variable length vector and is
     // also used for the, rare, zero length objects.
     std::vector<DepthVector*> *vectorToUse = &(depthVectorArray[length < FIXEDLENGTHSIZE ? length : 0]);
 
     if (depth >= maxVectorSize) maxVectorSize = depth+1;
 
 	while (vectorToUse->size() <= depth)
 	{
 		try {
 			if (length != 0 && length < FIXEDLENGTHSIZE)
 				vectorToUse->push_back(new DepthVectorWithFixedLength(length));
 			else vectorToUse->push_back(new DepthVectorWithVariableLength);
 		}
 		catch (std::bad_alloc&) {
 			throw MemoryException();
 		}
 	}
 
     (*vectorToUse)[depth]->AddToVector(length, pt);
 }
 
 // Add an object to a depth vector
 void DepthVectorWithVariableLength::AddToVector(POLYUNSIGNED L, PolyObject *pt)
 {
     ASSERT (this->nitems <= this->vsize);
 
     if (this->nitems == this->vsize)
     {
         // The vector is full or has not yet been allocated.  Grow it by 50%.
         POLYUNSIGNED new_vsize = this->vsize + this->vsize / 2 + 1;
         if (new_vsize < 15)
             new_vsize = 15;
 
         // First the length vector.
         POLYUNSIGNED *newLength = (POLYUNSIGNED *)realloc(this->lengthVector, new_vsize * sizeof(POLYUNSIGNED));
         if (newLength == 0)
         {
             // The vectors can get large and we may not be able to grow them
             // particularly if the address space is limited in 32-bit mode.
             // Try again with just a small increase.
             new_vsize = this->vsize + 15;
             newLength = (POLYUNSIGNED *)realloc(this->lengthVector, new_vsize * sizeof(POLYUNSIGNED));
             // If that failed give up.
             if (newLength == 0)
                 throw MemoryException();
         }
 
         PolyObject **newPtrVector = (PolyObject * *)realloc (this->ptrVector, new_vsize*sizeof(PolyObject *));
 
         if (newPtrVector == 0)
         {
             new_vsize = this->vsize + 15;
             newPtrVector = (PolyObject **)realloc (this->ptrVector, new_vsize*sizeof(PolyObject *));
             // If that failed give up.
             if (newPtrVector == 0)
                 throw MemoryException();
         }
 
         this->lengthVector = newLength;
         this->ptrVector = newPtrVector;
         this->vsize  = new_vsize;
     }
 
     ASSERT (this->nitems < this->vsize);
     this->lengthVector[this->nitems]  = L;
     this->ptrVector[this->nitems] = pt;
     this->nitems++;
     ASSERT (this->nitems <= this->vsize);
 }
 
 // Add an object to a depth vector
 void DepthVectorWithFixedLength::AddToVector(POLYUNSIGNED L, PolyObject *pt)
 {
     ASSERT(this->nitems <= this->vsize);
     ASSERT(L == length);
 
     if (this->nitems == this->vsize)
     {
         // The vector is full or has not yet been allocated.  Grow it by 50%.
         POLYUNSIGNED new_vsize = this->vsize + this->vsize / 2 + 1;
         if (new_vsize < 15)
             new_vsize = 15;
 
         PolyObject **newPtrVector = (PolyObject * *)realloc(this->ptrVector, new_vsize * sizeof(PolyObject *));
 
         if (newPtrVector == 0)
         {
             new_vsize = this->vsize + 15;
             newPtrVector = (PolyObject **)realloc(this->ptrVector, new_vsize * sizeof(PolyObject *));
             // If that failed give up.
             if (newPtrVector == 0)
                 throw MemoryException();
         }
 
         this->ptrVector = newPtrVector;
         this->vsize = new_vsize;
     }
 
     ASSERT(this->nitems < this->vsize);
     this->ptrVector[this->nitems] = pt;
     this->nitems++;
     ASSERT(this->nitems <= this->vsize);
 }
 
 // Comparison function used for sorting and also to test whether
 // two cells can be merged.
 int DepthVector::CompareItems(const PolyObject *const *a, const PolyObject *const *b)
 {
     const PolyObject *x = *a;
     const PolyObject *y = *b;
     POLYUNSIGNED  lX = x->LengthWord();
     POLYUNSIGNED  lY = y->LengthWord();
 
 //    ASSERT (OBJ_IS_LENGTH(lX));
 //    ASSERT (OBJ_IS_LENGTH(lY));
 
     if (lX > lY) return  1; // These tests include the flag bits
     if (lX < lY) return -1;
 
     // Return simple bitwise equality.
     return memcmp(x, y, OBJ_OBJECT_LENGTH(lX)*sizeof(PolyWord));
 }
 
 // Merge cells with the same contents.
 POLYUNSIGNED DepthVector::MergeSameItems()
 {
     POLYUNSIGNED  N = this->nitems;
     POLYUNSIGNED  n = 0;
     POLYUNSIGNED  i = 0;
 
     while (i < N)
     {
         PolyObject *bestShare = 0; // Candidate to share.
         MemSpace *bestSpace = 0;
 
         POLYUNSIGNED j;
         for (j = i; j < N; j++)
         {
             ASSERT (OBJ_IS_LENGTH(ptrVector[i]->LengthWord()));
             // Search for identical objects.  Don't bother to compare it with itself.
             if (i != j && CompareItems (&ptrVector[i], &ptrVector[j]) != 0) break;
             // The order of sharing is significant.
             // Choose an object in the permanent memory if that is available.
             // This is necessary to retain the invariant that no object in
             // the permanent memory points to an object in the temporary heap.
             // (There may well be pointers to this object elsewhere in the permanent
             // heap).
             // Choose the lowest hierarchy value for preference since that
             // may reduce the size of saved state when resaving already saved
             // data.
             // If we can't find a permanent space choose a space that isn't
             // an allocation space.  Otherwise we could break the invariant
             // that immutable areas never point into the allocation area.
-            MemSpace *space = gMem.SpaceForAddress((PolyWord*)ptrVector[j]-1);
+            MemSpace *space = gMem.SpaceForObjectAddress(ptrVector[j]);
             if (bestSpace == 0)
             {
                 bestShare = ptrVector[j];
                 bestSpace = space;
             }
             else if (bestSpace->spaceType == ST_PERMANENT)
             {
                 // Only update if the current space is also permanent and a lower hierarchy
                 if (space->spaceType == ST_PERMANENT &&
                         ((PermanentMemSpace *)space)->hierarchy < ((PermanentMemSpace *)bestSpace)->hierarchy)
                 {
                     bestShare = ptrVector[j];
                     bestSpace = space;
                 }
             }
             else if (bestSpace->spaceType == ST_LOCAL)
             {
                 // Update if the current space is not an allocation space
                 if (space->spaceType != ST_LOCAL || ! ((LocalMemSpace*)space)->allocationSpace)
                 {
                     bestShare = ptrVector[j];
                     bestSpace = space;
                 }
             }
         }
         POLYUNSIGNED k = j; // Remember the first object that didn't match.
         // For each identical object set all but the one we want to point to
         // the shared object.
         for (j = i; j < k; j++)
         {
             ASSERT (OBJ_IS_LENGTH(ptrVector[j]->LengthWord()));
             if (ptrVector[j] != bestShare)
             {
                 ptrVector[j]->SetForwardingPtr(bestShare); /* an indirection */
                 n++;
             }
         }
         i = k;
     }
 
     return n;
 }
 
 // Sort this vector
 void DepthVector::Sort()
 {
     if (nitems > 1)
     {
         SortRange(ptrVector, ptrVector + (nitems - 1));
         gpTaskFarm->WaitForCompletion();
     }
 
     // Check
 //    for (POLYUNSIGNED i = 0; i < nitems-1; i++)
 //       ASSERT(CompareItems(vector+i, vector+i+1) <= 0);
 }
 
 inline void swapItems(PolyObject * *i, PolyObject * *j)
 {
     PolyObject * t = *i;
     *i = *j;
     *j = t;
 }
 
 // Simple parallel quick-sort.  "first" and "last" are the first
 // and last items (inclusive) in the vector.
 void DepthVector::SortRange(PolyObject * *first, PolyObject * *last)
 {
     while (first < last)
     {
         if (last-first <= 100)
         {
             // Use the standard library function for small ranges.
             qsort(first, last-first+1, sizeof(PolyObject *), qsCompare);
             return;
         }
         // Select the best pivot from the first, last and middle item
         // by sorting these three items.  We use the middle item as
         // the pivot and since the first and last items are sorted
         // by this we can skip them when we start the partitioning.
         PolyObject * *middle = first + (last-first)/2;
         if (CompareItems(first, middle) > 0)
             swapItems(first, middle);
         if (CompareItems(middle, last) > 0)
         {
             swapItems(middle, last);
             if (CompareItems(first, middle) > 0)
                 swapItems(first, middle);
         }
 
         // Partition the data about the pivot.  This divides the
         // vector into two partitions with all items <= pivot to
         // the left and all items >= pivot to the right.
         // Note: items equal to the pivot could be in either partition.
         PolyObject * *f = first+1;
         PolyObject * *l = last-1;
 
         do {
             // Find an item we have to move.  These loops will always
             // terminate because testing the middle with itself
             // will return == 0.
             while (CompareItems(f, middle/* pivot*/) < 0)
                 f++;
             while (CompareItems(middle/* pivot*/, l) < 0)
                 l--;
             // If we haven't finished we need to swap the items.
             if (f < l)
             {
                 swapItems(f, l);
                 // If one of these was the pivot item it will have moved to
                 // the other position.
                 if (middle == f)
                     middle = l;
                 else if (middle == l)
                     middle = f;
                 f++;
                 l--;
             }
             else if (f == l)
             {
                 f++;
                 l--;
                 break;
             }
         } while (f <= l);
 
         // Process the larger partition as a separate task or
         // by recursion and do the smaller partition by tail
         // recursion.
         if (l-first > last-f)
         {
             // Lower part is larger
             gpTaskFarm->AddWorkOrRunNow(sortTask, first, l);
             first = f;
         }
         else
         {
             // Upper part is larger
             gpTaskFarm->AddWorkOrRunNow(sortTask, f, last);
             last = l;
         }
     }
 }
 
 // Set the genuine length word.  This overwrites both depth words and forwarding pointers.
 void DepthVectorWithVariableLength::RestoreLengthWords()
 {
     for (POLYUNSIGNED i = 0; i < this->nitems; i++)
     {
         PolyObject* obj = ptrVector[i];
         obj = gMem.SpaceForObjectAddress(obj)->writeAble(obj); // This could be code.
         obj->SetLengthWord(lengthVector[i]); // restore genuine length word
     }
 }
 void DepthVectorWithFixedLength::RestoreLengthWords()
 {
     for (POLYUNSIGNED i = 0; i < this->nitems; i++)
         ptrVector[i]->SetLengthWord(length); // restore genuine length word
 }
 
 // Fix up the length word.  Then update all addresses to their new location if
 // we have shared the original destination of the address with something else.
 void DepthVector::FixLengthAndAddresses(ScanAddress *scan)
 {
     RestoreLengthWords();
     for (POLYUNSIGNED i = 0; i < this->nitems; i++)
     {
         // Fix up all addresses.
         scan->ScanAddressesInObject(ptrVector[i]);
     }
 }
 
 // Restore the original length words on forwarding pointers.
 // After sorting the pointer vector and length vector are no longer
 // matched so we have to follow the pointers.
 void DepthVectorWithVariableLength::RestoreForwardingPointers()
 {
     for (POLYUNSIGNED i = 0; i < this->nitems; i++)
     {
         PolyObject *obj = ptrVector[i];
         if (obj->ContainsForwardingPtr())
             obj->SetLengthWord(obj->GetForwardingPtr()->LengthWord());
     }
 }
 
 // This class is used in two places and is called to ensure that all
 // object length words have been restored.
 // Before we actually try to share the immutable objects at a particular depth it
 // is called to update addresses in those objects to take account of
 // sharing at lower depths.
 // When all sharing is complete it is called to update the addresses in
 // level zero objects, i.e. mutables and code.
 class ProcessFixupAddress: public ScanAddress
 {
 protected:
     virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt);
     virtual POLYUNSIGNED ScanCodeAddressAt(PolyObject **pt);
     virtual PolyObject *ScanObjectAddress(PolyObject *base)
         { return GetNewAddress(base).AsObjPtr(); }
     PolyWord GetNewAddress(PolyWord old);
 };
 
 POLYUNSIGNED ProcessFixupAddress::ScanAddressAt(PolyWord *pt)
 {
     *pt = GetNewAddress(*pt);
     return 0;
 }
 
 // Don't have to do anything for code since it isn't moved.
 POLYUNSIGNED ProcessFixupAddress::ScanCodeAddressAt(PolyObject **pt)
 {
     return 0;
 }
 
 // Returns the new address if the argument is the address of an object that
 // has moved, otherwise returns the original.
 PolyWord ProcessFixupAddress::GetNewAddress(PolyWord old)
 {
     if (old.IsTagged() || old == PolyWord::FromUnsigned(0))
         return old; //  Nothing to do.
 
     ASSERT(old.IsDataPtr());
 
     PolyObject *obj = old.AsObjPtr();
     POLYUNSIGNED L = obj->LengthWord();
 
     if (obj->ContainsForwardingPtr()) // tombstone is a pointer to a shared object
     {
         PolyObject *newp = obj->GetForwardingPtr();
 //        ASSERT (newp->ContainsNormalLengthWord());
         return newp;
     }
 
     // Generally each address will point to an object processed at a lower depth.
     // The exception is if we have a cycle and have assigned the rest of the
     // structure to a higher depth.
     // N.B. We return the original address here but this could actually share
     // with something else and not be retained.
     if (OBJ_IS_DEPTH(L))
         return old;
 
     ASSERT (obj->ContainsNormalLengthWord()); // object is not shared
     return old;
 }
 
 // This class is used to set up the depth vectors for sorting.  It subclasses ScanAddress
 // in order to be able to use that for code objects since they are complicated but it
 // handles all the other object types itself.  It scans them depth-first using an explicit stack.
 class ProcessAddToVector: public ScanAddress
 {
 public:
     ProcessAddToVector(ShareDataClass *p): m_parent(p), addStack(0), stackSize(0), asp(0) {}
 
     ~ProcessAddToVector();
 
     // These are used when scanning code areas.  They return either
     // a length or a possibly updated address.
     virtual POLYUNSIGNED ScanAddressAt(PolyWord *pt)
         { (void)AddPolyWordToDepthVectors(*pt); return 0; }
     virtual PolyObject *ScanObjectAddress(PolyObject *base)
         { (void)AddObjectToDepthVector(base); return base; }
 
     void ProcessRoot(PolyObject *root);
 
 protected:
     // Process an address and return the "depth".
     POLYUNSIGNED AddPolyWordToDepthVectors(PolyWord old);
     POLYUNSIGNED AddObjectToDepthVector(PolyObject *obj);
 
     void PushToStack(PolyObject *obj);
 
     ShareDataClass *m_parent;
     PolyObject **addStack;
     unsigned stackSize;
     unsigned asp;
 };
 
 ProcessAddToVector::~ProcessAddToVector()
 {
     // Normally the stack will be empty.  However if we have run out of
     // memory and thrown an exception we may well have items left.
     // We have to remove the mark bits otherwise it will mess up any
     // subsequent GC.
     for (unsigned i = 0; i < asp; i++)
     {
         PolyObject *obj = addStack[i];
         if (obj->LengthWord() & _OBJ_GC_MARK)
             obj->SetLengthWord(obj->LengthWord() & (~_OBJ_GC_MARK));
     }
 
     free(addStack); // Now free the stack
 }
 
 POLYUNSIGNED ProcessAddToVector::AddPolyWordToDepthVectors(PolyWord old)
 {
     // If this is a tagged integer or an IO pointer that's simply a constant.
     if (old.IsTagged() || old == PolyWord::FromUnsigned(0))
         return 0;
     return AddObjectToDepthVector(old.AsObjPtr());
 }
 
 // Either adds an object to the stack or, if its depth is known, adds it
 // to the depth vector and returns the depth.
 // We use _OBJ_GC_MARK to detect when we have visited a cell but not yet
 // computed the depth.  We have to be careful that this bit is removed
 // before we finish in the case that we run out of memory and throw an
 // exception.  PushToStack may throw the exception if the stack needs to
 // grow.
 POLYUNSIGNED ProcessAddToVector::AddObjectToDepthVector(PolyObject *obj)
 {
-    MemSpace *space = gMem.SpaceForAddress(((PolyWord*)obj)-1);
+    MemSpace *space = gMem.SpaceForObjectAddress(obj);
     if (space == 0)
         return 0;
 
     POLYUNSIGNED L = obj->LengthWord();
 
     if (OBJ_IS_DEPTH(L)) // tombstone contains genuine depth or 0.
         return OBJ_GET_DEPTH(L);
 
     if (obj->LengthWord() & _OBJ_GC_MARK)
         return 0; // Marked but not yet scanned. Circular structure.
 
     ASSERT (OBJ_IS_LENGTH(L));
 
     if (obj->IsMutable())
     {
         // Mutable data in the local or permanent areas.  Ignore byte objects or
         // word objects containing only ints.
         if (obj->IsWordObject())
         {
             bool containsAddress = false;
             for (POLYUNSIGNED j = 0; j < OBJ_OBJECT_LENGTH(L) && !containsAddress; j++)
                 containsAddress = ! obj->Get(j).IsTagged();
 
             if (containsAddress)
             {
                 // Add it to the vector so we will update any addresses it contains.
                 m_parent->AddToVector(0, L, obj);
                 // and follow any addresses to try to merge those.
                 PushToStack(obj);
                 obj->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan
             }
             // If we don't add it to the vector we mustn't set _OBJ_GC_MARK.
         }
         return 0; // Level is zero
     }
 
     if (space->spaceType == ST_PERMANENT &&
              ((PermanentMemSpace*)space)->hierarchy == 0)
     {
         // Immutable data in the permanent area can't be merged
         // because it's read only.  We need to follow the addresses
         // because they may point to mutable areas containing data
         // that can be.  A typical case is the root function pointing
         // at the global name table containing new declarations.
         Bitmap *bm = &((PermanentMemSpace*)space)->shareBitmap;
         if (! bm->TestBit((PolyWord*)obj - space->bottom))
         {
             bm->SetBit((PolyWord*)obj - space->bottom);
             if (! obj->IsByteObject())
                 PushToStack(obj);
         }
         return 0;
     }
 
     /* There's a problem sharing code objects if they have relative calls/jumps
        in them to other code.  The code of two functions may be identical (e.g.
        they both call functions 100 bytes ahead) and so they will appear the
        same but if the functions they jump to are different they are actually
        different.  For that reason we don't share code segments.  DCJM 4/1/01 */
     if (obj->IsCodeObject())
     {
         // We want to update addresses in the code segment.
         m_parent->AddToVector(0, L, obj);
         PushToStack(obj);
         gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan
 
         return 0;
     }
 
     // Byte objects always have depth 1 and can't contain addresses.
     if (obj->IsByteObject())
     {
         m_parent->AddToVector (1, L, obj);// add to vector at correct depth
         obj->SetLengthWord(OBJ_SET_DEPTH(1));
         return 1;
     }
 
     ASSERT(OBJ_IS_WORD_OBJECT(L) || OBJ_IS_CLOSURE_OBJECT(L)); // That leaves immutable data objects.
     PushToStack(obj);
     obj->SetLengthWord(L | _OBJ_GC_MARK); // To prevent rescan
 
     return 0;
 }
 
 // Adds an object to the stack.
 void ProcessAddToVector::PushToStack(PolyObject *obj)
 {
     if (asp == stackSize)
     {
         if (addStack == 0)
         {
             addStack = (PolyObject**)malloc(sizeof(PolyObject*) * 100);
             if (addStack == 0) throw MemoryException();
             stackSize = 100;
         }
         else
         {
             unsigned newSize = stackSize+100;
             PolyObject** newStack = (PolyObject**)realloc(addStack, sizeof(PolyObject*) * newSize);
             if (newStack == 0) throw MemoryException();
             stackSize = newSize;
             addStack = newStack;
         }
     }
 
     ASSERT(asp < stackSize);
 
     addStack[asp++] = obj;
 }
 
 // Processes the root and anything reachable from it.  Addresses are added to the
 // explicit stack if an object has not yet been processed.  Most of this function
 // is about processing the stack.
 void ProcessAddToVector::ProcessRoot(PolyObject *root)
 {
     // Mark the initial object
     AddObjectToDepthVector(root);
 
     // Process the stack until it's empty.
     while (asp != 0)
     {
         // Pop it from the stack.
         PolyObject *obj = addStack[asp-1];
 
         if (obj->IsCodeObject())
         {
             // Code cells are now only found in the code area.
             /* There's a problem sharing code objects if they have relative calls/jumps
                in them to other code.  The code of two functions may be identical (e.g.
                they both call functions 100 bytes ahead) and so they will appear the
                same but if the functions they jump to are different they are actually
                different.  For that reason we don't share code segments.  DCJM 4/1/01 */
             asp--; // Pop it because we'll process it completely
             ScanAddressesInObject(obj);
             // If it's local set the depth with the value zero.  It has already been
             // added to the zero depth vector.
             if (obj->LengthWord() & _OBJ_GC_MARK)
                 gMem.SpaceForObjectAddress(obj)->writeAble(obj)->SetLengthWord(OBJ_SET_DEPTH(0)); // Now scanned
         }
 
         else
         {
             POLYUNSIGNED length = obj->Length();
             PolyWord *pt = (PolyWord*)obj;
             unsigned osp = asp;
 
             if (obj->IsClosureObject())
             {
                 // The first word of a closure is a code pointer.  We don't share code but
                 // we do want to share anything reachable from the constants.
                 AddObjectToDepthVector(*(PolyObject**)pt);
                 pt += sizeof(PolyObject*) / sizeof(PolyWord);
                 length -= sizeof(PolyObject*) / sizeof(PolyWord);
             }
 
             if (((obj->LengthWord() & _OBJ_GC_MARK) && !obj->IsMutable()))
             {
                 // Immutable local objects.  These can be shared.  We need to compute the
                 // depth by computing the maximum of the depth of all the addresses in it.
                 POLYUNSIGNED depth = 0;
                 while (length != 0 && osp == asp)
                 {
                     POLYUNSIGNED d = AddPolyWordToDepthVectors(*pt);
                     if (d > depth) depth = d;
                     pt++;
                     length--;
                 }
 
                 if (osp == asp)
                 {
                     // We've finished it
                     asp--; // Pop this item.
                     depth++; // One more for this object
                     obj->SetLengthWord(obj->LengthWord() & (~_OBJ_GC_MARK));
                     m_parent->AddToVector(depth, obj->LengthWord() & (~_OBJ_GC_MARK), obj);
                     obj->SetLengthWord(OBJ_SET_DEPTH(depth));
                 }
             }
             else
             {
                 // Mutable or non-local objects.  These have depth zero.  Local objects have
                 // _OBJ_GC_MARK in their header.  Immutable permanent objects cannot be
                 // modified so we don't set the depth.  Mutable objects are added to the
                 // depth vectors even though they aren't shared so that they will be
                 // updated if they point to immutables that have been shared.
                 while (length != 0)
                 {
                     if (!(*pt).IsTagged())
                     {
                         // If we've already pushed an address break now
                         if (osp != asp) break;
                         // Process the address and possibly push it
                         AddPolyWordToDepthVectors(*pt);
                     }
                     pt++;
                     length--;
                 }
 
                 if (length == 0)
                 {
                     // We've finished it
                     if (osp != asp)
                     {
                         ASSERT(osp == asp - 1);
                         addStack[osp - 1] = addStack[osp];
                     }
                     asp--; // Pop this item.
                     if (obj->LengthWord() & _OBJ_GC_MARK)
                         obj->SetLengthWord(OBJ_SET_DEPTH(0));
                 }
             }
         }
     }
 }
 
 // This is called by the root thread to do the work.
 bool ShareDataClass::RunShareData(PolyObject *root)
 {
     // We use a bitmap to indicate when we've visited an object to avoid
     // infinite recursion in cycles in the data.
     for (std::vector<PermanentMemSpace*>::iterator i = gMem.pSpaces.begin(); i < gMem.pSpaces.end(); i++)
     {
         PermanentMemSpace *space = *i;
         if (!space->isMutable && space->hierarchy == 0)
         {
             if (! space->shareBitmap.Create(space->spaceSize()))
                 return false;
         }
     }
 
     POLYUNSIGNED totalObjects = 0;
     POLYUNSIGNED totalShared  = 0;
 
     // Build the vectors from the immutable objects.
     bool success = true;
 
     try {
         ProcessAddToVector addToVector(this);
         addToVector.ProcessRoot(root);
     }
     catch (MemoryException &)
     {
         // If we ran out of memory we may still be able to process what we have.
         // That will also do any clean-up.
         success = false;
     }
 
     ProcessFixupAddress fixup;
 
     for (POLYUNSIGNED depth = 1; depth < maxVectorSize; depth++)
     {
         for (unsigned j = 0; j < FIXEDLENGTHSIZE; j++)
         {
             if (depth < depthVectorArray[j].size())
             {
                 DepthVector *vec = depthVectorArray[j][depth];
                 // Set the length word and update all addresses.
                 vec->FixLengthAndAddresses(&fixup);
 
                 vec->Sort();
 
                 POLYUNSIGNED n = vec->MergeSameItems();
 
                 if ((debugOptions & DEBUG_SHARING) && n > 0)
                     Log("Sharing: Level %4" POLYUFMT ", size %3u, Objects %6" POLYUFMT ", Shared %6" POLYUFMT " (%1.0f%%)\n",
                         depth, j, vec->ItemCount(), n, (float)n / (float)vec->ItemCount() * 100.0);
 
                 totalObjects += vec->ItemCount();
                 totalShared += n;
             }
         }
     }
     
     if (debugOptions & DEBUG_SHARING)
         Log("Sharing: Maximum level %4" POLYUFMT ",\n", maxVectorSize);
 
       /*
        At this stage, we have fixed up most but not all of the forwarding
        pointers. The ones that we haven't fixed up arise from situations
        such as the following:
 
                X -> Y <-> Z
 
        i.e. Y and Z form a loop, and X is isomorphic to Z. When we assigned
        the depths, we have to arbitrarily break the loop between Y and Z.
        Suppose Y is assigned to level 1, and Z is assigned to level 2.
        When we process level 1 and fixup Y, there's nothing to do, since
        Z is still an ordinary object. However when we process level 2,
        we find that X and Z are isomorphic so we arbitrarily choose one
        of them and turn it into a "tombstone" pointing at the other. If
        we change Z into the tombstone, then Y now contains a pointer
        that needs fixing up. That's why we need the second fixup pass.
 
        Note also that if we had broken the loop the other way, we would have
        assigned Z to level 1, Y to level 2 and X to level 3, so we would
        have missed the chance to share Z and X. Perhaps that's why running
        the program repeatedly sometimes finds extra things to share?
 
       SPF 26/1/95
     */
 
     /* We have updated the addresses in objects with non-zero level so they point to
        the single occurrence but we need to do the same with level 0 objects
        (mutables and code). */
     for (unsigned j = 0; j < FIXEDLENGTHSIZE; j++)
     {
         if (! depthVectorArray[j].empty())
         {
             DepthVector *v = depthVectorArray[j][0];
             // Log this because it could be very large.
             if (debugOptions & DEBUG_SHARING)
                 Log("Sharing: Level %4" POLYUFMT ", size %3u, Objects %6" POLYUFMT "\n", 0ul, j, v->ItemCount());
             v->FixLengthAndAddresses(&fixup);
         }
     }
     /* Previously we made a complete scan over the memory updating any addresses so
        that if we have shared two substructures within our root we would also
        share any external pointers.  This has been removed but we have to
        reinstate the length words we've overwritten with forwarding pointers because
        there may be references to unshared objects from outside. */
     for (POLYUNSIGNED d = 1; d < maxVectorSize; d++)
     {
         for (unsigned j = 0; j < FIXEDLENGTHSIZE; j++)
         {
             if (d < depthVectorArray[j].size())
             {
                 DepthVector *v = depthVectorArray[j][d];
                 v->RestoreForwardingPointers();
             }
         }
     }
 
     if (debugOptions & DEBUG_SHARING)
         Log ("Sharing: Total Objects %6" POLYUFMT ", Total Shared %6" POLYUFMT " (%1.0f%%)\n",
             totalObjects, totalShared, (float)totalShared / (float)totalObjects * 100.0);
 
     return success; // Succeeded.
 }
 
 class ShareRequest: public MainThreadRequest
 {
 public:
     ShareRequest(Handle root): MainThreadRequest(MTP_SHARING), shareRoot(root), result(false) {}
 
     virtual void Perform()
     {
         ShareDataClass s; 
         // Do a full GC.  If we have a large heap the allocation of the vectors
         // can cause paging.  Doing this now reduces the heap and discards the
         // allocation spaces.  It may be overkill if we are applying the sharing
         // to a small root but generally it seems to be applied to the whole heap.
         FullGCForShareCommonData();
 		gcProgressBeginOtherGC(); // Set the phase to "other" now the GC is complete.
         // Now do the sharing.
         result = s.RunShareData(shareRoot->WordP());
     }
     Handle shareRoot;
     bool result;
 };
 
 
 // ShareData.  This is the main entry point.
 // Because this can recurse deeply it needs to be run by the main thread.
 // Also it manipulates the heap in ways that could mess up other threads
 // so we need to stop them before executing this.
 void ShareData(TaskData *taskData, Handle root)
 {
     if (! root->Word().IsDataPtr())
         return; // Nothing to do.  We could do handle a code pointer but it shouldn't occur.
 
     // Request the main thread to do the sharing.
     ShareRequest request(root);
     processes->MakeRootRequest(taskData, &request);
 
     // Raise an exception if it failed.
     if (! request.result)
         raise_exception_string(taskData, EXC_Fail, "Insufficient memory");
 }
 
 // RTS call entry.
 POLYUNSIGNED PolyShareCommonData(FirstArgument threadId, PolyWord root)
 {
     TaskData *taskData = TaskData::FindTaskForId(threadId);
     ASSERT(taskData != 0);
     taskData->PreRTSCall();
     Handle reset = taskData->saveVec.mark();
 
     try {
         if (! root.IsDataPtr())
             return TAGGED(0).AsUnsigned(); // Nothing to do.
 
         // Request the main thread to do the sharing.
         ShareRequest request(taskData->saveVec.push(root));
         processes->MakeRootRequest(taskData, &request);
 
         // Raise an exception if it failed.
         if (! request.result)
             raise_exception_string(taskData, EXC_Fail, "Insufficient memory");
     } catch (...) { } // If an ML exception is raised
 
     taskData->saveVec.reset(reset);
     taskData->PostRTSCall();
     return TAGGED(0).AsUnsigned();
 }
 
 struct _entrypts shareDataEPT[] =
 {
     { "PolyShareCommonData",            (polyRTSFunction)&PolyShareCommonData},
 
     { NULL, NULL} // End of list.
 };
 
diff --git a/libpolyml/x86_dep.cpp b/libpolyml/x86_dep.cpp
index 3034cc1e..1f7ddbb8 100644
--- a/libpolyml/x86_dep.cpp
+++ b/libpolyml/x86_dep.cpp
@@ -1,1296 +1,1297 @@
 /*
     Title:  Machine dependent code for i386 and X64 under Windows and Unix
 
     Copyright (c) 2000-7
         Cambridge University Technical Services Limited
 
     Further work copyright David C. J. Matthews 2011-20
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
 
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
 
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #elif defined(_WIN32)
 #include "winconfig.h"
 #else
 #error "No configuration file"
 #endif
 
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 
 #include <stdio.h>
 
 #ifdef HAVE_SIGNAL_H
 #include <signal.h>
 #endif
 
 #ifdef HAVE_ASSERT_H 
 #include <assert.h>
 #define ASSERT(x)   assert(x)
 #else
 #define ASSERT(x)
 #endif
 
 #ifdef HAVE_STRING_H 
 #include <string.h>
 #endif
 
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 
 #if (defined(_WIN32))
 #include <windows.h>
 #include <excpt.h>
 #endif
 
 #include "globals.h"
 #include "run_time.h"
 #include "diagnostics.h"
 #include "processes.h"
 #include "profiling.h"
 #include "machine_dep.h"
 #include "scanaddrs.h"
 #include "memmgr.h"
 #include "rtsentry.h"
 
 #include "sys.h" // Temporary
 
 
 /**********************************************************************
  *
  * Register usage:
  *
  *  %Reax: First argument to function.  Result of function call.
  *  %Rebx: Second argument to function.
  *  %Recx: General register
  *  %Redx: Closure pointer in call.
  *  %Rebp: Points to memory used for extra registers
  *  %Resi: General register.
  *  %Redi: General register.
  *  %Resp: Stack pointer.
  *  The following apply only on the X64
  *  %R8:   Third argument to function
  *  %R9:   Fourth argument to function
  *  %R10:  Fifth argument to function
  *  %R11:  General register
  *  %R12:  General register
  *  %R13:  General register
  *  %R14:  General register
  *  %R15:  Memory allocation pointer
 
  *
  **********************************************************************/
 
 #ifdef HOSTARCHITECTURE_X86_64
 struct fpSaveArea {
     double fpregister[7]; // Save area for xmm0-6
 };
 #else
 // Structure of floating point save area.
 // This is dictated by the hardware.
 typedef byte fpregister[10];
 
 struct fpSaveArea {
     unsigned short cw;
     unsigned short _unused0;
     unsigned short sw;
     unsigned short _unused1;
     unsigned short tw;
     unsigned short _unused2;
     unsigned fip;
     unsigned short fcs0;
     unsigned short _unused3;
     unsigned foo;
     unsigned short fcs1;
     unsigned short _unused4;
     fpregister registers[8];
 };
 #endif
 
 /* the amount of ML stack space to reserve for registers,
    C exception handling etc. The compiler requires us to
    reserve 2 stack-frames worth (2 * 20 words). We actually reserve
    slightly more than this.
 */
 #if (!defined(_WIN32) && !defined(HAVE_SIGALTSTACK))
 // If we can't handle signals on a separate stack make sure there's space
 // on the Poly stack.
 #define OVERFLOW_STACK_SIZE (50+1024)
 #else
 #define OVERFLOW_STACK_SIZE 50
 #endif
 
 union stackItem
 {
 /*
 #ifndef POLYML32IN64
     stackItem(PolyWord v) { words[0] = v.AsUnsigned(); };
     stackItem() { words[0] = TAGGED(0).AsUnsigned(); }
     POLYUNSIGNED words[1];
 #else
     // In 32-in-64 we need to clear the second PolyWord.  This assumes little-endian.
     stackItem(PolyWord v) { words[0] = v.AsUnsigned(); words[1] = 0; };
     stackItem() { words[0]  = TAGGED(0).AsUnsigned(); words[1] = 0; }
     POLYUNSIGNED words[2];
 #endif
    */
     stackItem(PolyWord v) { argValue = v.AsUnsigned(); }
     stackItem() { argValue = TAGGED(0).AsUnsigned(); }
 
     // These return the low order word.
     PolyWord w()const { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); }
     operator PolyWord () { return PolyWord::FromUnsigned((POLYUNSIGNED)argValue); }
     POLYCODEPTR codeAddr; // Return addresses
     stackItem *stackAddr; // Stack addresses
     uintptr_t argValue; // Treat an address as an int
 };
 
 class X86TaskData;
 
 // This is passed as the argument vector to X86AsmSwitchToPoly.
 // The offsets are built into the assembly code and the code-generator.
 // localMpointer and stackPtr are updated before control returns to C.
 typedef struct _AssemblyArgs {
 public:
     PolyWord        *localMpointer;     // Allocation ptr + 1 word
     stackItem       *handlerRegister;   // Current exception handler
     PolyWord        *localMbottom;      // Base of memory + 1 word
     stackItem       *stackLimit;        // Lower limit of stack
     stackItem       exceptionPacket;    // Set if there is an exception
     byte            unusedRequestCode;  // No longer used.
     byte            unusedFlag;         // No longer used
     byte            returnReason;       // Reason for returning from ML.
     byte            unusedRestore;      // No longer used.
     uintptr_t       saveCStack;         // Saved C stack frame.
     PolyWord        threadId;           // My thread id.  Saves having to call into RTS for it.
     stackItem       *stackPtr;          // Current stack pointer
     byte            *noLongerUsed;      // Now removed
     byte            *heapOverFlowCall;  // These are filled in with the functions.
     byte            *stackOverFlowCall;
     byte            *stackOverFlowCallEx;
     // Saved registers, where applicable.
     stackItem       p_rax;
     stackItem       p_rbx;
     stackItem       p_rcx;
     stackItem       p_rdx;
     stackItem       p_rsi;
     stackItem       p_rdi;
 #ifdef HOSTARCHITECTURE_X86_64
     stackItem       p_r8;
     stackItem       p_r9;
     stackItem       p_r10;
     stackItem       p_r11;
     stackItem       p_r12;
     stackItem       p_r13;
     stackItem       p_r14;
 #endif
     struct fpSaveArea p_fp;
 } AssemblyArgs;
 
 // These next few are temporarily added for the interpreter
 // This duplicates some code in reals.cpp but is now updated.
 #define DOUBLESIZE (sizeof(double)/sizeof(POLYUNSIGNED))
 
 union realdb { double dble; POLYUNSIGNED puns[DOUBLESIZE]; };
 
 #define LGWORDSIZE (sizeof(uintptr_t) / sizeof(PolyWord))
 
 class X86TaskData: public TaskData {
 public:
     X86TaskData();
     unsigned allocReg; // The register to take the allocated space.
     POLYUNSIGNED allocWords; // The words to allocate.
     AssemblyArgs assemblyInterface;
     int saveRegisterMask; // Registers that need to be updated by a GC.
 
     virtual void GarbageCollect(ScanAddress *process);
     void ScanStackAddress(ScanAddress *process, stackItem &val, StackSpace *stack);
     virtual void EnterPolyCode(); // Start running ML
     virtual void InterruptCode();
     virtual bool AddTimeProfileCount(SIGNALCONTEXT *context);
     virtual void InitStackFrame(TaskData *parentTask, Handle proc, Handle arg);
     virtual void SetException(poly_exn *exc);
 
     // Release a mutex in exactly the same way as compiler code
     virtual Handle AtomicDecrement(Handle mutexp);
     virtual void AtomicReset(Handle mutexp);
 
     // Return the minimum space occupied by the stack.  Used when setting a limit.
     // N.B. This is PolyWords not native words.
     virtual uintptr_t currentStackSpace(void) const
         { return (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
             OVERFLOW_STACK_SIZE*sizeof(uintptr_t)/sizeof(PolyWord); }
 
     // Increment the profile count for an allocation.  Also now used for mutex contention.
     virtual void addProfileCount(POLYUNSIGNED words)
     { addSynchronousCount(assemblyInterface.stackPtr[0].codeAddr, words); }
 
     // PreRTSCall: After calling from ML to the RTS we need to save the current heap pointer
     virtual void PreRTSCall(void) { TaskData::PreRTSCall();  SaveMemRegisters(); }
     // PostRTSCall: Before returning we need to restore the heap pointer.
     // If there has been a GC in the RTS call we need to create a new heap area.
     virtual void PostRTSCall(void) { SetMemRegisters(); TaskData::PostRTSCall();  }
 
     virtual void CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length);
 
     void HeapOverflowTrap(byte *pcPtr);
 
     void SetMemRegisters();
     void SaveMemRegisters();
     void SetRegisterMask();
 
     void HandleTrap();
 
     PLock interruptLock;
 
     stackItem *get_reg(int n);
 
     stackItem *&regSP() { return assemblyInterface.stackPtr; }
 
     stackItem &regAX() { return assemblyInterface.p_rax; }
     stackItem &regBX() { return assemblyInterface.p_rbx; }
     stackItem &regCX() { return assemblyInterface.p_rcx; }
     stackItem &regDX() { return assemblyInterface.p_rdx; }
     stackItem &regSI() { return assemblyInterface.p_rsi; }
     stackItem &regDI() { return assemblyInterface.p_rdi; }
 #ifdef HOSTARCHITECTURE_X86_64
     stackItem &reg8() { return assemblyInterface.p_r8; }
     stackItem &reg9() { return assemblyInterface.p_r9; }
     stackItem &reg10() { return assemblyInterface.p_r10; }
     stackItem &reg11() { return assemblyInterface.p_r11; }
     stackItem &reg12() { return assemblyInterface.p_r12; }
     stackItem &reg13() { return assemblyInterface.p_r13; }
     stackItem &reg14() { return assemblyInterface.p_r14; }
 #endif
 
 #if (defined(_WIN32))
     DWORD savedErrno;
 #else
     int savedErrno;
 #endif
 };
 
 class X86Dependent: public MachineDependent {
 public:
     X86Dependent() {}
 
     // Create a task data object.
     virtual TaskData *CreateTaskData(void) { return new X86TaskData(); }
 
     // Initial size of stack in PolyWords
     virtual unsigned InitialStackSize(void) { return (128+OVERFLOW_STACK_SIZE) * sizeof(uintptr_t) / sizeof(PolyWord); }
     virtual void ScanConstantsWithinCode(PolyObject *addr, PolyObject *oldAddr, POLYUNSIGNED length, ScanAddress *process);
 
     virtual Architectures MachineArchitecture(void)
 #ifndef HOSTARCHITECTURE_X86_64
          { return MA_I386; }
 #elif defined(POLYML32IN64)
         { return MA_X86_64_32; }
 #else
          { return MA_X86_64; }
 #endif
 };
 
 // Values for the returnReason byte
 enum RETURN_REASON {
     RETURN_HEAP_OVERFLOW = 1,
     RETURN_STACK_OVERFLOW = 2,
     RETURN_STACK_OVERFLOWEX = 3,
 };
 
 extern "C" {
 
     // These are declared in the assembly code segment.
     void X86AsmSwitchToPoly(void *);
     extern int X86AsmCallExtraRETURN_HEAP_OVERFLOW(void);
     extern int X86AsmCallExtraRETURN_STACK_OVERFLOW(void);
     extern int X86AsmCallExtraRETURN_STACK_OVERFLOWEX(void);
 
     POLYUNSIGNED X86AsmAtomicDecrement(PolyObject*);
 
     void X86TrapHandler(PolyWord threadId);
 };
 
 X86TaskData::X86TaskData(): allocReg(0), allocWords(0), saveRegisterMask(0)
 {
     assemblyInterface.heapOverFlowCall = (byte*)X86AsmCallExtraRETURN_HEAP_OVERFLOW;
     assemblyInterface.stackOverFlowCall = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOW;
     assemblyInterface.stackOverFlowCallEx = (byte*)X86AsmCallExtraRETURN_STACK_OVERFLOWEX;
     savedErrno = 0;
 }
 
 void X86TaskData::GarbageCollect(ScanAddress *process)
 {
     TaskData::GarbageCollect(process); // Process the parent first
     assemblyInterface.threadId = threadObject;
 
     if (stack != 0)
     {
+        ASSERT(assemblyInterface.stackPtr >= (stackItem*)stack->bottom && assemblyInterface.stackPtr <= (stackItem*)stack->top);
         // Now the values on the stack.
         for (stackItem *q = assemblyInterface.stackPtr; q < (stackItem*)stack->top; q++)
             ScanStackAddress(process, *q, stack);
     }
     // Register mask
     for (int i = 0; i < 16; i++)
     {
         if (saveRegisterMask & (1 << i))
             ScanStackAddress(process, *get_reg(i), stack);
     }
 }
 
 // Process a value within the stack.
 void X86TaskData::ScanStackAddress(ScanAddress *process, stackItem &stackItem, StackSpace *stack)
 {
     // We may have return addresses on the stack which could look like
     // tagged values.  Check whether the value is in the code area before
     // checking whether it is untagged.
 #ifdef POLYML32IN64
     // In 32-in-64 return addresses always have the top 32 bits non-zero. 
     if (stackItem.argValue < ((uintptr_t)1 << 32))
     {
         // It's either a tagged integer or an object pointer.
         if (stackItem.w().IsDataPtr())
         {
             PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
             stackItem = val;
         }
     }
     else
     {
         // Could be a code address or a stack address.
         MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr - 1);
         if (space == 0 || space->spaceType != ST_CODE) return;
         PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
         ASSERT(obj != 0);
         // Process the address of the start.  Don't update anything.
         process->ScanObjectAddress(obj);
     }
 #else
     // The -1 here is because we may have a zero-sized cell in the last
     // word of a space.
     MemSpace *space = gMem.SpaceForAddress(stackItem.codeAddr-1);
     if (space == 0) return; // In particular we may have one of the assembly code addresses.
     if (space->spaceType == ST_CODE)
     {
         PolyObject *obj = gMem.FindCodeObject(stackItem.codeAddr);
         // If it is actually an integer it might be outside a valid code object.
         if (obj == 0)
         {
             ASSERT(stackItem.w().IsTagged()); // It must be an integer
         }
         else // Process the address of the start.  Don't update anything.
             process->ScanObjectAddress(obj);
     }
     else if (space->spaceType == ST_LOCAL && stackItem.w().IsDataPtr())
         // Local values must be word addresses.
     {
         PolyWord val = process->ScanObjectAddress(stackItem.w().AsObjPtr());
         stackItem = val;
     }
 #endif
 }
 
 
 // Copy a stack
 void X86TaskData::CopyStackFrame(StackObject *old_stack, uintptr_t old_length, StackObject *new_stack, uintptr_t new_length)
 {
     /* Moves a stack, updating all references within the stack */
 #ifdef POLYML32IN64
     old_length = old_length / 2;
     new_length = new_length / 2;
 #endif
 
     stackItem *old_base  = (stackItem *)old_stack;
     stackItem *new_base  = (stackItem*)new_stack;
     stackItem *old_top   = old_base + old_length;
 
     /* Calculate the offset of the new stack from the old. If the frame is
        being extended objects in the new frame will be further up the stack
        than in the old one. */
 
     uintptr_t offset = new_base - old_base + new_length - old_length;
 
     stackItem *oldStackPtr = assemblyInterface.stackPtr;
 
     // Adjust the stack pointer and handler pointer since these point into the stack.
     assemblyInterface.stackPtr = assemblyInterface.stackPtr + offset;
     assemblyInterface.handlerRegister = assemblyInterface.handlerRegister + offset;
 
     // We need to adjust any values on the stack that are pointers within the stack.
     // Skip the unused part of the stack.
 
     size_t i = oldStackPtr - old_base;
 
     ASSERT (i <= old_length);
 
     i = old_length - i;
 
     stackItem *old = oldStackPtr;
     stackItem *newp = assemblyInterface.stackPtr;
 
     while (i--)
     {
         stackItem old_word = *old++;
         if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
             old_word.stackAddr = old_word.stackAddr + offset;
         else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
         {
             stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
             if (addr >= old_base && addr <= old_top)
             {
                 addr += offset;
                 old_word = PolyWord::FromStackAddr((PolyWord*)addr);
             }
         }
         *newp++ = old_word;
     }
     ASSERT(old == ((stackItem*)old_stack)+old_length);
     ASSERT(newp == ((stackItem*)new_stack)+new_length);
     // And change any registers that pointed into the old stack
     for (int j = 0; j < 16; j++)
     {
         if (saveRegisterMask & (1 << j))
         {
             stackItem *regAddr = get_reg(j);
             stackItem old_word = *regAddr;
             if (old_word.w().IsDataPtr() && old_word.stackAddr >= old_base && old_word.stackAddr <= old_top)
                 old_word.stackAddr = old_word.stackAddr + offset;
             else if (old_word.w().IsDataPtr() && IsHeapAddress(old_word.stackAddr))
             {
                 stackItem *addr = (stackItem*)old_word.w().AsStackAddr();
                 if (addr >= old_base && addr <= old_top)
                 {
                     addr += offset;
                     old_word = PolyWord::FromStackAddr((PolyWord*)addr);
                 }
             }
             *regAddr = old_word;
        }
     }
 }
 
 void X86TaskData::EnterPolyCode()
 /* Called from "main" to enter the code. */
 {
     SetMemRegisters();
     // Enter the ML code.
     X86AsmSwitchToPoly(&this->assemblyInterface);
     // This should never return
     ASSERT(0);
  }
 
 // Called from the assembly code as a result of a trap i.e. a request for
 // a GC or to extend the stack.
 void X86TrapHandler(PolyWord threadId)
 {
     X86TaskData* taskData = (X86TaskData*)TaskData::FindTaskForId(threadId);
     taskData->HandleTrap();
 }
 
 void X86TaskData::HandleTrap()
 {
     SaveMemRegisters(); // Update globals from the memory registers.
 
     switch (this->assemblyInterface.returnReason)
     {
 
     case RETURN_HEAP_OVERFLOW:
         // The heap has overflowed.
         SetRegisterMask();
         this->HeapOverflowTrap(assemblyInterface.stackPtr[0].codeAddr); // Computes a value for allocWords only
         break;
 
     case RETURN_STACK_OVERFLOW:
     case RETURN_STACK_OVERFLOWEX:
     {
         SetRegisterMask();
         uintptr_t min_size; // Size in PolyWords
         if (assemblyInterface.returnReason == RETURN_STACK_OVERFLOW)
         {
             min_size = (this->stack->top - (PolyWord*)assemblyInterface.stackPtr) +
                 OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
         }
         else
         {
             // Stack limit overflow.  If the required stack space is larger than
             // the fixed overflow size the code will calculate the limit in %EDI.
             stackItem* stackP = regDI().stackAddr;
             min_size = (this->stack->top - (PolyWord*)stackP) +
                 OVERFLOW_STACK_SIZE * sizeof(uintptr_t) / sizeof(PolyWord);
         }
         try {
             // The stack check has failed.  This may either be because we really have
             // overflowed the stack or because the stack limit value has been adjusted
             // to result in a call here.
             CheckAndGrowStack(this, min_size);
         }
         catch (IOException&) {
             // We may get an exception while handling this if we run out of store
         }
         {
             PLocker l(&interruptLock);
             // Set the stack limit.  This clears any interrupt and also sets the
             // correct value if we've grown the stack.
             this->assemblyInterface.stackLimit = (stackItem*)this->stack->bottom + OVERFLOW_STACK_SIZE;
         }
         // We're in a safe state to handle any interrupts.
         try {
             // Process any asynchronous events i.e. interrupts or kill
             processes->ProcessAsynchRequests(this);
             // Release and re-acquire use of the ML memory to allow another thread to GC.
             processes->ThreadReleaseMLMemory(this);
             processes->ThreadUseMLMemory(this);
         }
         catch (IOException&) {
             // If this resulted in an ML exception it will also raise a C++ exception.
         }
         catch (KillException&) {
             processes->ThreadExit(this);
         }
         break;
     }
 
     default:
         Crash("Unknown return reason code %u", this->assemblyInterface.returnReason);
     }
     SetMemRegisters();
 }
 
 void X86TaskData::InitStackFrame(TaskData *parentTaskData, Handle proc, Handle arg)
 /* Initialise stack frame. */
 {
     StackSpace *space = this->stack;
     StackObject * newStack = space->stack();
     uintptr_t stack_size     = space->spaceSize() * sizeof(PolyWord) / sizeof(stackItem);
     // Set the top of the stack inside the stack rather than at the end.  This wastes
     // a word but if sp is actually at the end OpenBSD segfaults because it isn't in
     // a MAP_STACK area.
     uintptr_t topStack = stack_size - sizeof(uintptr_t);
     stackItem *stackTop = (stackItem*)newStack + topStack;
     assemblyInterface.stackPtr = stackTop;
     assemblyInterface.stackLimit = (stackItem*)space->bottom + OVERFLOW_STACK_SIZE;
     assemblyInterface.handlerRegister = stackTop;
 
     // Floating point save area.
     memset(&assemblyInterface.p_fp, 0, sizeof(struct fpSaveArea));
 #ifndef HOSTARCHITECTURE_X86_64
     // Set the control word for 64-bit precision otherwise we get inconsistent results.
     assemblyInterface.p_fp.cw = 0x027f ; // Control word
     assemblyInterface.p_fp.tw = 0xffff; // Tag registers - all unused
 #endif
     // Store the argument and the closure.
     assemblyInterface.p_rdx = proc->Word(); // Closure
     assemblyInterface.p_rax = (arg == 0) ? TAGGED(0) : DEREFWORD(arg); // Argument
     // Have to set the register mask in case we get a GC before the thread starts.
     saveRegisterMask = (1 << 2) | 1; // Rdx and rax
 
 #ifdef POLYML32IN64
     // In 32-in-64 RBX always contains the heap base address.
     assemblyInterface.p_rbx.stackAddr = (stackItem*)globalHeapBase;
 #endif
 }
 
 // In Solaris-x86 the registers are named EIP and ESP.
 #if (!defined(REG_EIP) && defined(EIP))
 #define REG_EIP EIP
 #endif
 #if (!defined(REG_ESP) && defined(ESP))
 #define REG_ESP ESP
 #endif
 
 
 // Get the PC and SP(stack) from a signal context.  This is needed for profiling.
 // This version gets the actual sp and pc if we are in ML.
 // N.B. This must not call malloc since we're in a signal handler.
 bool X86TaskData::AddTimeProfileCount(SIGNALCONTEXT *context)
 {
     stackItem * sp = 0;
     POLYCODEPTR pc = 0;
     if (context != 0)
     {
         // The tests for HAVE_UCONTEXT_T, HAVE_STRUCT_SIGCONTEXT and HAVE_WINDOWS_H need
         // to follow the tests in processes.h.
 #if defined(HAVE_WINDOWS_H)
 #ifdef _WIN64
         sp = (stackItem *)context->Rsp;
         pc = (POLYCODEPTR)context->Rip;
 #else
         // Windows 32 including cygwin.
         sp = (stackItem *)context->Esp;
         pc = (POLYCODEPTR)context->Eip;
 #endif
 #elif defined(HAVE_UCONTEXT_T)
 #ifdef HAVE_MCONTEXT_T_GREGS
         // Linux
 #ifndef HOSTARCHITECTURE_X86_64
         pc = (byte*)context->uc_mcontext.gregs[REG_EIP];
         sp = (stackItem*)context->uc_mcontext.gregs[REG_ESP];
 #else /* HOSTARCHITECTURE_X86_64 */
         pc = (byte*)context->uc_mcontext.gregs[REG_RIP];
         sp = (stackItem*)context->uc_mcontext.gregs[REG_RSP];
 #endif /* HOSTARCHITECTURE_X86_64 */
 #elif defined(HAVE_MCONTEXT_T_MC_ESP)
        // FreeBSD
 #ifndef HOSTARCHITECTURE_X86_64
         pc = (byte*)context->uc_mcontext.mc_eip;
         sp = (stackItem*)context->uc_mcontext.mc_esp;
 #else /* HOSTARCHITECTURE_X86_64 */
         pc = (byte*)context->uc_mcontext.mc_rip;
         sp = (stackItem*)context->uc_mcontext.mc_rsp;
 #endif /* HOSTARCHITECTURE_X86_64 */
 #else
        // Mac OS X
 #ifndef HOSTARCHITECTURE_X86_64
 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT32_SS))
         pc = (byte*)context->uc_mcontext->ss.eip;
         sp = (stackItem*)context->uc_mcontext->ss.esp;
 #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT32___SS))
         pc = (byte*)context->uc_mcontext->__ss.__eip;
         sp = (stackItem*)context->uc_mcontext->__ss.__esp;
 #endif
 #else /* HOSTARCHITECTURE_X86_64 */
 #if(defined(HAVE_STRUCT_MCONTEXT_SS)||defined(HAVE_STRUCT___DARWIN_MCONTEXT64_SS))
         pc = (byte*)context->uc_mcontext->ss.rip;
         sp = (stackItem*)context->uc_mcontext->ss.rsp;
 #elif(defined(HAVE_STRUCT___DARWIN_MCONTEXT64___SS))
         pc = (byte*)context->uc_mcontext->__ss.__rip;
         sp = (stackItem*)context->uc_mcontext->__ss.__rsp;
 #endif
 #endif /* HOSTARCHITECTURE_X86_64 */
 #endif
 #elif defined(HAVE_STRUCT_SIGCONTEXT)
 #if defined(HOSTARCHITECTURE_X86_64) && defined(__OpenBSD__)
         // CPP defines missing in amd64/signal.h in OpenBSD
         pc = (byte*)context->sc_rip;
         sp = (stackItem*)context->sc_rsp;
 #else // !HOSTARCHITEXTURE_X86_64 || !defined(__OpenBSD__)
         pc = (byte*)context->sc_pc;
         sp = (stackItem*)context->sc_sp;
 #endif
 #endif
     }
     if (pc != 0)
     {
         // See if the PC we've got is an ML code address.
         MemSpace *space = gMem.SpaceForAddress(pc);
         if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
         {
             incrementCountAsynch(pc);
             return true;
         }
     }
     // See if the sp value is in the current stack.
     if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
     {
         // We may be in the assembly code.  The top of the stack will be a return address.
         pc = sp[0].w().AsCodePtr();
         MemSpace *space = gMem.SpaceForAddress(pc);
         if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
         {
             incrementCountAsynch(pc);
             return true;
         }
     }
     // See if the value of regSP is a valid stack pointer.
     // This works if we happen to be in an RTS call using a "Full" call.
     // It doesn't work if we've used a "Fast" call because that doesn't save the SP.
     sp = assemblyInterface.stackPtr;
     if (sp >= (stackItem*)this->stack->bottom && sp < (stackItem*)this->stack->top)
     {
         // We may be in the run-time system.
         pc = sp[0].w().AsCodePtr();
         MemSpace *space = gMem.SpaceForAddress(pc);
         if (space != 0 && (space->spaceType == ST_CODE || space->spaceType == ST_PERMANENT))
         {
             incrementCountAsynch(pc);
             return true;
         }
     }
     // None of those worked
     return false;
 }
 
 // This is called from a different thread so we have to be careful.
 void X86TaskData::InterruptCode()
 {
     PLocker l(&interruptLock);
     // Set the stack limit pointer to the top of the stack to cause
     // a trap when we next check for stack overflow.
     // We use a lock here to ensure that we always use the current value of the
     // stack.  The thread we're interrupting could be growing the stack at this point.
     if (this->stack != 0) 
         this->assemblyInterface.stackLimit = (stackItem*)(this->stack->top-1);
 }
 
 // This is called from SwitchToPoly before we enter the ML code.
 void X86TaskData::SetMemRegisters()
 {
     // Copy the current store limits into variables before we go into the assembly code.
 
     // If we haven't yet set the allocation area or we don't have enough we need
     // to create one (or a new one).
     if (this->allocPointer <= this->allocLimit + this->allocWords)
     {
         if (this->allocPointer < this->allocLimit)
             Crash ("Bad length in heap overflow trap");
 
         // Find some space to allocate in.  Updates taskData->allocPointer and
         // returns a pointer to the newly allocated space (if allocWords != 0)
         PolyWord *space =
             processes->FindAllocationSpace(this, this->allocWords, true);
         if (space == 0)
         {
             // We will now raise an exception instead of returning.
             // Set allocWords to zero so we don't set the allocation register
             // since that could be holding the exception packet.
             this->allocWords = 0;
         }
         // Undo the allocation just now.
         this->allocPointer += this->allocWords;
     }
 
     if (this->allocWords != 0)
     {
         // If we have had a heap trap we actually do the allocation here.
         // We will have already garbage collected and recovered sufficient space.
         // This also happens if we have just trapped because of store profiling.
         this->allocPointer -= this->allocWords; // Now allocate
         // Set the allocation register to this area. N.B.  This is an absolute address.
         if (this->allocReg < 15)
             get_reg(this->allocReg)[0].codeAddr = (POLYCODEPTR)(this->allocPointer + 1); /* remember: it's off-by-one */
         this->allocWords = 0;
     }
 
     // If we have run out of store, either just above or while allocating in the RTS,
     // allocPointer and allocLimit will have been set to zero as part of the GC.  We will
     // now be raising an exception which may free some store but we need to come back here
     // before we allocate anything.  The compiled code uses unsigned arithmetic to check for
     // heap overflow but only after subtracting the space required.  We need to make sure
     // that the values are still non-negative after substracting any object size.
     if (this->allocPointer == 0) this->allocPointer += MAX_OBJECT_SIZE;
     if (this->allocLimit == 0) this->allocLimit += MAX_OBJECT_SIZE;
 
     this->assemblyInterface.localMbottom = this->allocLimit + 1;
     this->assemblyInterface.localMpointer = this->allocPointer + 1;
     // If we are profiling store allocation we set mem_hl so that a trap
     // will be generated.
     if (profileMode == kProfileStoreAllocation)
         this->assemblyInterface.localMbottom = this->assemblyInterface.localMpointer;
 
     this->assemblyInterface.threadId = this->threadObject;
 }
 
 // This is called whenever we have returned from ML to C.
 void X86TaskData::SaveMemRegisters()
 {
     this->allocPointer = this->assemblyInterface.localMpointer - 1;
     this->allocWords = 0;
     this->assemblyInterface.exceptionPacket = TAGGED(0);
     this->saveRegisterMask = 0;
 }
 
 // Called on a GC or stack overflow trap.  The register mask
 // is in the bytes after the trap call.
 void X86TaskData::SetRegisterMask()
 {
     byte *pc = assemblyInterface.stackPtr[0].codeAddr;
     if (*pc == 0xcd) // CD - INT n is used for a single byte
     {
         pc++;
         saveRegisterMask = *pc++;
     }
     else if (*pc == 0xca) // CA - FAR RETURN is used for a two byte mask
     {
         pc++;
         saveRegisterMask = pc[0] | (pc[1] << 8);
         pc += 2;
     }
     assemblyInterface.stackPtr[0].codeAddr = pc;
 }
 
 stackItem *X86TaskData::get_reg(int n)
 /* Returns a pointer to the register given by n. */
 {
     switch (n) 
     {
     case 0: return &assemblyInterface.p_rax;
     case 1: return &assemblyInterface.p_rcx;
     case 2: return &assemblyInterface.p_rdx;
     case 3: return &assemblyInterface.p_rbx;
         // Should not have rsp or rbp.
     case 6: return &assemblyInterface.p_rsi;
     case 7: return &assemblyInterface.p_rdi;
 #ifdef HOSTARCHITECTURE_X86_64
     case 8: return &assemblyInterface.p_r8;
     case 9: return &assemblyInterface.p_r9;
     case 10: return &assemblyInterface.p_r10;
     case 11: return &assemblyInterface.p_r11;
     case 12: return &assemblyInterface.p_r12;
     case 13: return &assemblyInterface.p_r13;
     case 14: return &assemblyInterface.p_r14;
     // R15 is the heap pointer so shouldn't occur here.
 #endif /* HOSTARCHITECTURE_X86_64 */
     default: Crash("Unknown register %d\n", n);
     }
 }
 
 // Called as a result of a heap overflow trap
 void X86TaskData::HeapOverflowTrap(byte *pcPtr)
 {
     X86TaskData *mdTask = this;
     POLYUNSIGNED wordsNeeded = 0;
     // The next instruction, after any branches round forwarding pointers or pop
     // instructions, will be a store of register containing the adjusted heap pointer.
     // We need to find that register and the value in it in order to find out how big
     // the area we actually wanted is.  N.B.  The code-generator and assembly code
     // must generate the correct instruction sequence.
 //    byte *pcPtr = assemblyInterface.programCtr;
     while (true)
     {
         if (pcPtr[0] == 0xeb)
         {
             // Forwarding pointer
             if (pcPtr[1] >= 128) pcPtr += 256 - pcPtr[1] + 2;
             else pcPtr += pcPtr[1] + 2;
         }
         else if ((pcPtr[0] & 0xf8) == 0x58) // Pop instruction.
             pcPtr++;
         else if (pcPtr[0] == 0x41 && ((pcPtr[1] & 0xf8) == 0x58)) // Pop with Rex prefix
             pcPtr += 2;
         else break;
     }
 #ifndef HOSTARCHITECTURE_X86_64
     // This should be movl REG,0[%ebp].
     ASSERT(pcPtr[0] == 0x89);
     mdTask->allocReg = (pcPtr[1] >> 3) & 7; // Remember this until we allocate the memory
     stackItem *reg = get_reg(mdTask->allocReg);
     stackItem reg_val = *reg;
     // The space we need is the difference between this register
     // and the current value of newptr.
     // The +1 here is because assemblyInterface.localMpointer is A.M.pointer +1.  The reason
     // is that after the allocation we have the register pointing at the address we will
     // actually use.
     wordsNeeded = (this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1;
     *reg = TAGGED(0); // Clear this - it's not a valid address.
     /* length in words, including length word */
 
     ASSERT (wordsNeeded <= (1<<24)); /* Max object size including length/flag word is 2^24 words.  */
 #else /* HOSTARCHITECTURE_X86_64 */
     ASSERT(pcPtr[1] == 0x89 || pcPtr[1] == 0x8b);
     if (pcPtr[1] == 0x89)
     {
         // New (5.4) format.  This should be movq REG,%r15
         ASSERT(pcPtr[0] == 0x49 || pcPtr[0] == 0x4d);
         mdTask->allocReg = (pcPtr[2] >> 3) & 7; // Remember this until we allocate the memory
         if (pcPtr[0] & 0x4) mdTask->allocReg += 8;
     }
     else
     {
         // Alternative form of movq REG,%r15
         ASSERT(pcPtr[0] == 0x4c || pcPtr[0] == 0x4d);
         mdTask->allocReg = pcPtr[2] & 7; // Remember this until we allocate the memory
         if (pcPtr[0] & 0x1) mdTask->allocReg += 8;
     }
     stackItem *reg = get_reg(this->allocReg);
     stackItem reg_val = *reg;
     wordsNeeded = (POLYUNSIGNED)((this->allocPointer - (PolyWord*)reg_val.stackAddr) + 1);
     *reg = TAGGED(0); // Clear this - it's not a valid address.
  #endif /* HOSTARCHITECTURE_X86_64 */
     if (profileMode == kProfileStoreAllocation)
         addProfileCount(wordsNeeded);
 
     mdTask->allocWords = wordsNeeded; // The actual allocation is done in SetMemRegisters.
 }
 
 void X86TaskData::SetException(poly_exn *exc)
 // The RTS wants to raise an exception packet.  Normally this is as the
 // result of an RTS call in which case the caller will check this.  It can
 // also happen in a trap.
 {
     assemblyInterface.exceptionPacket = (PolyWord)exc; // Set for direct calls.
 }
 
 // Decode and process an effective address.  There may
 // be a constant address in here but in any case we need
 // to decode it to work out where the next instruction starts.
 // If this is an lea instruction any addresses are just constants
 // so must not be treated as addresses.
 static void skipea(PolyObject *base, byte **pt, ScanAddress *process, bool lea)
 {
     unsigned int modrm = *((*pt)++);
     unsigned int md = modrm >> 6;
     unsigned int rm = modrm & 7;
 
     if (md == 3) { } /* Register. */
     else if (rm == 4)
     {
         /* s-i-b present. */
         unsigned int sib = *((*pt)++);
 
         if (md == 0)
         {
             if ((sib & 7) == 5) 
             {
                 if (! lea) {
 #ifndef HOSTARCHITECTURE_X86_64
                     process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
 #endif /* HOSTARCHITECTURE_X86_64 */
                 }
                 (*pt) += 4;
             }
         }
         else if (md == 1) (*pt)++;
         else if (md == 2) (*pt) += 4;
     }
     else if (md == 0 && rm == 5)
     {
         if (!lea) {
 #ifndef HOSTARCHITECTURE_X86_64
             /* Absolute address. */
             process->ScanConstant(base, *pt, PROCESS_RELOC_DIRECT);
 #endif /* HOSTARCHITECTURE_X86_64 */
         }
         *pt += 4;
     }
     else
     {
         if (md == 1) *pt += 1;
         else if (md == 2) *pt += 4;
     }
 }
 
 /* Added to deal with constants within the
    code rather than in the constant area.  The constant
    area is still needed for the function name.
    DCJM 2/1/2001 
 */
 void X86Dependent::ScanConstantsWithinCode(PolyObject *addr, PolyObject *old, POLYUNSIGNED length, ScanAddress *process)
 {
     byte *pt = (byte*)addr;
     PolyWord *end = addr->Offset(length - 1);
 #ifdef POLYML32IN64
     // If this begins with enter-int it's interpreted code - ignore
     if (pt[0] == 0xff && pt[1] == 0x55 && pt[2] == 0x48) return;
 #endif
 
     while (true)
     {
         // Escape prefixes come before any Rex byte
         if (*pt == 0xf2 || *pt == 0xf3 || *pt == 0x66)
             pt++;
 #ifdef HOSTARCHITECTURE_X86_64
         // REX prefixes.  Set this first.
         byte lastRex;
         if (*pt >= 0x40 && *pt <= 0x4f)
             lastRex = *pt++;
         else
             lastRex = 0;
 
         //printf("pt=%p *pt=%x\n", pt, *pt);
 
 #endif /* HOSTARCHITECTURE_X86_64 */
         switch (*pt)
         {
         case 0x00: return; // This is actually the first byte of the old "marker" word.
         case 0xf4: return; // Halt - now used as a marker.
         case 0x50: case 0x51: case 0x52: case 0x53:
         case 0x54: case 0x55: case 0x56: case 0x57: /* Push */
         case 0x58: case 0x59: case 0x5a: case 0x5b:
         case 0x5c: case 0x5d: case 0x5e: case 0x5f: /* Pop */
         case 0x90: /* nop */ case 0xc3: /* ret */
         case 0xf9: /* stc */ case 0xce: /* into */
         case 0xf0: /* lock. */ case 0xf3: /* rep/repe */
         case 0xa4: case 0xa5: case 0xaa: case 0xab: /* movs/stos */
         case 0xa6: /* cmpsb */ case 0x9e: /* sahf */ case 0x99: /* cqo/cdq */
             pt++; break;
 
         case 0x70: case 0x71: case 0x72: case 0x73: case 0x74: case 0x75: case 0x76: case 0x77:
         case 0x78: case 0x79: case 0x7a: case 0x7b: case 0x7c: case 0x7d: case 0x7e: case 0x7f:
         case 0xeb:
             /* short jumps. */
         case 0xcd: /* INT - now used for a register mask */
         case 0xa8: /* TEST_ACC8 */
         case 0x6a: /* PUSH_8 */
             pt += 2; break;
 
         case 0xc2: /* RET_16 */
         case 0xca: /* FAR RET 16 - used for a register mask */
             pt += 3; break;
 
         case 0x8d: /* leal. */
             pt++; skipea(addr, &pt, process, true); break;
 
         case 0x03: case 0x0b: case 0x13: case 0x1b:
         case 0x23: case 0x2b: case 0x33: case 0x3b: /* Add r,ea etc. */
         case 0x88: /* MOVB_R_A */ case 0x89: /* MOVL_R_A */
         case 0x8b: /* MOVL_A_R */
         case 0x62: /* BOUNDL */
         case 0xff: /* Group5 */
         case 0xd1: /* Group2_1_A */
         case 0x8f: /* POP_A */
         case 0xd3: /* Group2_CL_A */
         case 0x87: // XCHNG
         case 0x63: // MOVSXD
             pt++; skipea(addr, &pt, process, false); break;
 
         case 0xf6: /* Group3_a */
             {
                 int isTest = 0;
                 pt++;
                 /* The test instruction has an immediate operand. */
                 if ((*pt & 0x38) == 0) isTest = 1;
                 skipea(addr, &pt, process, false);
                 if (isTest) pt++;
                 break;
             }
 
         case 0xf7: /* Group3_A */
             {
                 int isTest = 0;
                 pt++;
                 /* The test instruction has an immediate operand. */
                 if ((*pt & 0x38) == 0) isTest = 1;
                 skipea(addr, &pt, process, false);
                 if (isTest) pt += 4;
                 break;
             }
 
         case 0xc1: /* Group2_8_A */
         case 0xc6: /* MOVB_8_A */
         case 0x83: /* Group1_8_A */
         case 0x80: /* Group1_8_a */
         case 0x6b: // IMUL Ev,Ib
             pt++; skipea(addr, &pt, process, false); pt++; break;
 
         case 0x69: // IMUL Ev,Iv
             pt++; skipea(addr, &pt, process, false); pt += 4; break;
 
         case 0x81: /* Group1_32_A */
             {
                 pt ++;
 #ifndef HOSTARCHITECTURE_X86_64
                 unsigned opCode = *pt;
 #endif
                 skipea(addr, &pt, process, false);
                 // Only check the 32 bit constant if this is a comparison.
                 // For other operations this may be untagged and shouldn't be an address.
 #ifndef HOSTARCHITECTURE_X86_64
                 if ((opCode & 0x38) == 0x38)
                     process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
 #endif
                 pt += 4;
                 break;
             }
 
         case 0xe8: case 0xe9:
             // Long jump and call.  These are used to call constant (known) functions
             // and also long jumps within the function.
             {
                 pt++;
                 POLYSIGNED disp = (pt[3] & 0x80) ? -1 : 0; // Set the sign just in case.
                 for(unsigned i = 4; i > 0; i--)
                     disp = (disp << 8) | pt[i-1];
                 byte *absAddr = pt + disp + 4; // The address is relative to AFTER the constant
 
                 // If the new address is within the current piece of code we don't do anything
                 if (absAddr >= (byte*)addr && absAddr < (byte*)end) {}
                 else {
 #ifdef HOSTARCHITECTURE_X86_64
                     ASSERT(sizeof(PolyWord) == 4); // Should only be used internally on x64
 #endif /* HOSTARCHITECTURE_X86_64 */
                     if (addr != old)
                     {
                         // The old value of the displacement was relative to the old address before
                         // we copied this code segment.
                         // We have to correct it back to the original address.
                         absAddr = absAddr - (byte*)addr + (byte*)old;
                         // We have to correct the displacement for the new location and store
                         // that away before we call ScanConstant.
                         size_t newDisp = absAddr - pt - 4;
                         byte* wr = gMem.SpaceForAddress(pt)->writeAble(pt);
                         for (unsigned i = 0; i < 4; i++)
                         {
                             wr[i] = (byte)(newDisp & 0xff);
                             newDisp >>= 8;
                         }
                     }
                     process->ScanConstant(addr, pt, PROCESS_RELOC_I386RELATIVE);
                 }
                 pt += 4;
                 break;
             }
 
         case 0xc7:/* MOVL_32_A */
             {
                 pt++;
                 if ((*pt & 0xc0) == 0x40 /* Byte offset or sib present */ &&
                     ((*pt & 7) != 4) /* But not sib present */ && pt[1] == 256-sizeof(PolyWord))
                 {
                     /* We may use a move instruction to set the length
                        word on a new segment.  We mustn't try to treat this as a constant.  */
                     pt += 6; /* Skip the modrm byte, the offset and the constant. */
                 }
                 else
                 {
                     skipea(addr, &pt, process, false);
 #ifndef HOSTARCHITECTURE_X86_64
                     // This isn't used for addresses even in 32-in-64
                     process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
 #endif /* HOSTARCHITECTURE_X86_64 */
                     pt += 4;
                 }
                 break;
             }
 
         case 0xb8: case 0xb9: case 0xba: case 0xbb:
         case 0xbc: case 0xbd: case 0xbe: case 0xbf: /* MOVL_32_64_R */
             pt ++;
 #ifdef HOSTARCHITECTURE_X86_64
             if ((lastRex & 8) == 0)
                 pt += 4; // 32-bit mode on 64-bits
             else
 #endif /* HOSTARCHITECTURE_X86_64 */
             {
                 // This is used in native 32-bit for constants and in
                 // 32-in-64 for the special case of an absolute address.
                 process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
                 pt += sizeof(uintptr_t);
             }
             break;
 
         case 0x68: /* PUSH_32 */
             pt ++;
 #if (!defined(HOSTARCHITECTURE_X86_64))
             process->ScanConstant(addr, pt, PROCESS_RELOC_DIRECT);
 #endif
             pt += 4;
             break;
 
         case 0x0f: /* ESCAPE */
             {
                 pt++;
                 switch (*pt)
                 {
                 case 0xb6: /* movzl */
                 case 0xb7: // movzw
                 case 0xbe: // movsx
                 case 0xbf: // movsx
                 case 0xc1: /* xaddl */
                 case 0xae: // ldmxcsr/stmxcsr
                 case 0xaf: // imul
                 case 0x40: case 0x41: case 0x42: case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
                 case 0x48: case 0x49: case 0x4a: case 0x4b: case 0x4c: case 0x4d: case 0x4e: case 0x4f:
                     // cmov
                     pt++; skipea(addr, &pt, process, false); break;
 
                 case 0x80: case 0x81: case 0x82: case 0x83:
                 case 0x84: case 0x85: case 0x86: case 0x87:
                 case 0x88: case 0x89: case 0x8a: case 0x8b:
                 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
                     /* Conditional branches with 32-bit displacement. */
                     pt += 5; break;
 
                 case 0x90: case 0x91: case 0x92: case 0x93:
                 case 0x94: case 0x95: case 0x96: case 0x97:
                 case 0x98: case 0x99: case 0x9a: case 0x9b:
                 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
                     /* SetCC. */
                     pt++; skipea(addr, &pt, process, false); break;
 
                 // These are SSE2 instructions
                 case 0x10: case 0x11: case 0x58: case 0x5c: case 0x59: case 0x5e:
                 case 0x2e: case 0x2a: case 0x54: case 0x57: case 0x5a: case 0x6e:
                 case 0x7e: case 0x2c: case 0x2d:
                     pt++; skipea(addr, &pt, process, false); break;
 
                 case 0x73: // PSRLDQ - EA,imm
                     pt++; skipea(addr, &pt, process, false); pt++;  break;
 
                 default: Crash("Unknown opcode %d at %p\n", *pt, pt);
                 }
                 break;
             }
 
         case 0xd8: case 0xd9: case 0xda: case 0xdb:
         case 0xdc: case 0xdd: case 0xde: case 0xdf: // Floating point escape instructions
             {
                 pt++;
                 if ((*pt & 0xe0) == 0xe0) pt++;
                 else skipea(addr, &pt, process, false);
                 break;
             }
 
         default: Crash("Unknown opcode %d at %p\n", *pt, pt);
         }
     }
 }
 
 // Increment the value contained in the first word of the mutex.
 Handle X86TaskData::AtomicDecrement(Handle mutexp)
 {
     PolyObject *p = DEREFHANDLE(mutexp);
     POLYUNSIGNED result = X86AsmAtomicDecrement(p);
     return this->saveVec.push(PolyWord::FromUnsigned(result));
 }
 
 // Release a mutex.  Because the atomic increment and decrement
 // use the hardware LOCK prefix we can simply set this to zero.
 void X86TaskData::AtomicReset(Handle mutexp)
 {
     DEREFHANDLE(mutexp)->Set(0, TAGGED(0));
 }
 
 static X86Dependent x86Dependent;
 
 MachineDependent *machineDependent = &x86Dependent;
 
 extern "C" {
     POLYEXTERNALSYMBOL void *PolyX86GetThreadData();
 }
 
 // Return the address of assembly data for the current thread.  This is normally in
 // RBP except if we are in a callback.
 void *PolyX86GetThreadData()
 {
     // We should get the task data for the thread that is running this code.
     // If this thread has been created by the foreign code we will have to
     // create a new one here.
     TaskData* taskData = processes->GetTaskDataForThread();
     if (taskData == 0)
     {
         try {
             taskData = processes->CreateNewTaskData(0, 0, 0, TAGGED(0));
         }
         catch (std::bad_alloc&) {
             ::Exit("Unable to create thread data - insufficient memory");
         }
         catch (MemoryException&) {
             ::Exit("Unable to create thread data - insufficient memory");
         }
     }
     return &((X86TaskData*)taskData)->assemblyInterface;
 }
 
 struct _entrypts machineSpecificEPT[] =
 {
     { "PolyX86GetThreadData",           (polyRTSFunction)& PolyX86GetThreadData },
 
     { NULL, NULL} // End of list.
 };