diff --git a/mlsource/MLCompiler/BUILTINS.sml b/mlsource/MLCompiler/BUILTINS.sml
index 0bbea279..82a612e0 100644
--- a/mlsource/MLCompiler/BUILTINS.sml
+++ b/mlsource/MLCompiler/BUILTINS.sml
@@ -1,118 +1,117 @@
 (*
     Signature for built-in functions
 
-    Copyright David C. J. Matthews 2016, 2018-21
+    Copyright David C. J. Matthews 2016, 2018-22
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 signature BUILTINS =
 sig
     datatype testConditions =
         TestEqual (* No TestNotEqual because that is always generated with "not" *)
     |   TestLess
     |   TestLessEqual
     |   TestGreater
     |   TestGreaterEqual
     |   TestUnordered (* Reals only. *)
 
     datatype arithmeticOperations =
         ArithAdd
     |   ArithSub
     |   ArithMult
     |   ArithQuot
     |   ArithRem
     |   ArithDiv
     |   ArithMod
 
     datatype logicalOperations =
         LogicalAnd
     |   LogicalOr
     |   LogicalXor
     
     datatype shiftOperations =
         ShiftLeft
     |   ShiftRightLogical   (* Logical shift - zero added bits. *)
     |   ShiftRightArithmetic (* Arithmetic shift - add the sign bit. *)
 
     datatype unaryOps =
         NotBoolean (* true => false; false => true - XOR *)
     |   IsTaggedValue (* Test the tag bit. *)
     |   MemoryCellLength (* Return the length of a memory cell (heap object) *)
     |   MemoryCellFlags (* Return the flags byte of a memory cell (heap object) *)
     |   ClearMutableFlag (* Remove the mutable flag from the flags byte *)
     |   LongWordToTagged (* Convert a LargeWord.word to a Word.word or FixedInt.int. *)
     |   SignedToLongWord (* Convert a tagged value to a LargeWord with sign extension. *)
     |   UnsignedToLongWord (* Convert a tagged value to a LargeWord without sign extension. *)
     |   RealAbs of precision     (* Set the sign bit of a real to positive. *)
     |   RealNeg of precision     (* Invert the sign bit of a real. *)
     |   RealFixedInt of precision (* Convert an integer value into a real value. *)
     |   FloatToDouble (* Convert a single precision floating point value to double precision. *)
     |   DoubleToFloat (* Convert a double precision floating point value to single precision
                          using current rounding mode. *)
     |   RealToInt of precision * IEEEReal.rounding_mode (* Convert a double or float to a fixed precision int. *)
     |   TouchAddress (* Ensures that the cell is reachable. *)
     |   AllocCStack (* Allocate space on the C stack. *)
     |   LockMutex (* Try to lock a mutex, returning true if it succeeded. If it failed the thread must block. *)
     |   TryLockMutex (* Try to lock a mutex but if it failed the thread will not block. *)
     |   UnlockMutex (* Unlock a mutex. Returns false if there are blocked threads that must be woken. *)
 
     and precision = PrecSingle | PrecDouble (* Single or double precision floating pt. *)
 
     and binaryOps =
         (* Compare two words and return the result.  This is used for both
            word values (isSigned=false) and fixed precision integer (isSigned=true).
            Values must be tagged and not pointers. *)
         WordComparison of { test: testConditions, isSigned: bool }
         (* Fixed precision int operations.  These may raise Overflow. *)
     |   FixedPrecisionArith of arithmeticOperations
         (* Arithmetic operations on word values.  These do not raise Overflow. *)
     |   WordArith of arithmeticOperations
         (* Load a word at a specific offset in a heap object.  If this is immutable and the
            arguments are constants it can be folded at compile time since the result will
            never change. *)
     |   WordLogical of logicalOperations (* Logical operations on words. *)
     |   WordShift of shiftOperations (* Shift operations on words. *)
          (* Allocate a heap cell for byte data.  The first argument is the number of words (not bytes)
             needed.  The second argument is the "flags" byte which must include F_bytes and F_mutable.
             The new cell is not initialised. *)
     |   AllocateByteMemory
         (* Operations on LargeWords.  These are 32/64 bit values that are "boxed". *)
     |   LargeWordComparison of testConditions
     |   LargeWordArith of arithmeticOperations
     |   LargeWordLogical of logicalOperations
     |   LargeWordShift of shiftOperations
     |   RealComparison of testConditions * precision
     |   RealArith of arithmeticOperations * precision
         (* Equality of values which could be pointers or tagged values.
            At the lowest level this is the same as WordComparison but
            if we try to use an indexed case there must be a check that the
            values are tagged. *)
     |   PointerEq
     |   FreeCStack  (* Free  space on the C stack. *)
     
     and nullaryOps =
         (* Get the current thread id *)
         GetCurrentThreadId
-        (* Check whether the last RTS call set the exception status and raise it if it had. *)
-    |   CheckRTSException
+    |   CPUPause (* Pause a CPU while waiting for a spinlock. *)
         (* Allocate memory for a mutex *)
     |   CreateMutex
 
     val unaryRepr: unaryOps -> string
     and binaryRepr: binaryOps -> string
     and testRepr: testConditions -> string
     and arithRepr: arithmeticOperations -> string
     and nullaryRepr: nullaryOps -> string
 end;
diff --git a/mlsource/MLCompiler/CODETREE.sig b/mlsource/MLCompiler/CODETREE.sig
index 5fc2a4c5..7e0ef890 100644
--- a/mlsource/MLCompiler/CODETREE.sig
+++ b/mlsource/MLCompiler/CODETREE.sig
@@ -1,164 +1,164 @@
 (*
-    Copyright (c) 2012,13,15-21 David C.J. Matthews
+    Copyright (c) 2012,13,15-22 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 signature CODETREE =
 sig
     type machineWord
     type codetree
     type pretty
     type codeBinding
     
     type level
 
     datatype argumentType =
         GeneralType
     |   DoubleFloatType
     |   SingleFloatType
     |   ContainerType of int
 
     and loadStoreKind =
         LoadStoreMLWord of {isImmutable: bool}
     |   LoadStoreMLByte of {isImmutable: bool}
     |   LoadStoreC8
     |   LoadStoreC16
     |   LoadStoreC32
     |   LoadStoreC64
     |   LoadStoreCFloat
     |   LoadStoreCDouble
     |   LoadStoreUntaggedUnsigned
 
     and blockOpKind =
         BlockOpMove of {isByteMove: bool}
     |   BlockOpEqualByte
     |   BlockOpCompareByte
     
     structure BuiltIns: BUILTINS
  
     datatype arbPrecisionOps =
         ArbCompare of BuiltIns.testConditions
     |   ArbArith of BuiltIns.arithmeticOperations
 
     val CodeTrue:           codetree (* code for "true"  *)
     val CodeFalse:          codetree (* code for "false" *)
     val CodeZero:           codetree (* code for 0, nil etc. *)
 
     val mkFunction:
         {
             body: codetree, argTypes:argumentType list, resultType: argumentType,
             name: string, closure: codetree list, numLocals: int 
         } -> codetree
     val mkInlineFunction:
         {
             body: codetree, argTypes:argumentType list, resultType: argumentType,
             name: string, closure: codetree list, numLocals: int 
         } -> codetree
     val mkCall:             codetree * (codetree * argumentType) list * argumentType -> codetree
 
     val mkLoadLocal:        int -> codetree
     and mkLoadArgument:     int -> codetree
     and mkLoadClosure:      int -> codetree
 
     val mkConst:            machineWord -> codetree
     val mkInd:              int * codetree -> codetree
     val mkVarField:         int * codetree -> codetree
     val mkProc:             codetree * int * string * codetree list * int -> codetree
     val mkInlproc:          codetree * int * string * codetree list * int -> codetree
     val mkMacroProc:        codetree * int * string * codetree list * int -> codetree
     val mkIf:               codetree * codetree * codetree -> codetree
     val mkWhile:            codetree * codetree -> codetree
     val mkEnv:              codeBinding list * codetree -> codetree
     val mkStr:              string -> codetree
     val mkTuple:            codetree list -> codetree
     val mkDatatype:         codetree list -> codetree
     val mkRaise:            codetree -> codetree
     val mkCor:              codetree * codetree -> codetree
     val mkCand:             codetree * codetree -> codetree
     val mkHandle:           codetree * codetree * int -> codetree
     val mkEval:             codetree * codetree list -> codetree
     val identityFunction:   string -> codetree
 
     val mkSetContainer:     codetree * codetree * int -> codetree
     val mkTupleFromContainer: int * int -> codetree
     val mkTagTest:          codetree * word * word -> codetree
     val mkBeginLoop:        codetree * (int * codetree) list -> codetree
     val mkLoop:             codetree list -> codetree
 
     val mkDec:              int * codetree -> codeBinding
     val mkMutualDecs:       (int * codetree) list -> codeBinding
     val mkNullDec:          codetree -> codeBinding
     val mkContainer:        int * int * codetree -> codeBinding
     
     val mkNot:                codetree -> codetree
     val mkIsShort:            codetree -> codetree
     val mkEqualTaggedWord:    codetree * codetree -> codetree
     val mkEqualPointerOrWord: codetree * codetree -> codetree
     val equalTaggedWordFn:    codetree
     val equalPointerOrWordFn: codetree
 
     val decSequenceWithFinalExp: codeBinding list -> codetree
 
     val pretty:    codetree -> pretty
     val evalue:    codetree -> machineWord option
     val genCode:   codetree * Universal.universal list * int -> (unit -> codetree)
 
     (* Helper functions to build closure. *)
     val mkLoad:             int * level * level -> codetree
     and mkLoadParam:        int * level * level -> codetree
     val baseLevel:          level
     val newLevel:           level -> level
     val getClosure:         level -> codetree list
     val multipleUses: codetree * (unit -> int) * level -> {load: level -> codetree, dec: codeBinding list}
     
     val mkUnary: BuiltIns.unaryOps * codetree -> codetree
     and mkBinary: BuiltIns.binaryOps * codetree * codetree -> codetree
 
     val mkUnaryFn: BuiltIns.unaryOps -> codetree
     and mkBinaryFn: BuiltIns.binaryOps -> codetree
     and mkArbitraryFn: arbPrecisionOps -> codetree
     
     val getCurrentThreadId: codetree
     and getCurrentThreadIdFn: codetree
-    and checkRTSException: codetree
+    and cpuPauseFn: codetree
     and createMutexFn: codetree
 
     val mkAllocateWordMemory: codetree * codetree * codetree -> codetree
     and mkAllocateWordMemoryFn: codetree
 
     (* Load and store operations.  At this level the first operand is the base address and
        the second is an index. *)
     val mkLoadOperation: loadStoreKind * codetree * codetree -> codetree
     val mkLoadOperationFn: loadStoreKind -> codetree
     val mkStoreOperation: loadStoreKind * codetree * codetree * codetree -> codetree
     val mkStoreOperationFn: loadStoreKind -> codetree
     val mkBlockOperation:
         {kind:blockOpKind, leftBase: codetree, rightBase: codetree, leftIndex: codetree,
          rightIndex: codetree, length: codetree} -> codetree
     val mkBlockOperationFn: blockOpKind -> codetree
 
     structure Foreign: FOREIGNCALL
     structure Sharing:
     sig
         type machineWord = machineWord
         type codetree    = codetree
         type pretty      = pretty
         type argumentType=argumentType
         type codeBinding     = codeBinding
         type level       = level
     end
 
 end;
 
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig
index b0d53275..220fe16c 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig
@@ -1,441 +1,444 @@
 (*
     Signature for the high-level ARM64 code
 
     Copyright David C. J. Matthews 2021-2
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 signature ARM64ICODE =
 sig
     type machineWord = Address.machineWord
     type address = Address.address
     
     type closureRef
 
     (* Registers. *)
     datatype xReg = XReg of Word8.word | XZero | XSP
     and vReg = VReg of Word8.word
 
     (* It is simpler to use a single type for all registers. *)
     datatype reg = GenReg of xReg | FPReg of vReg
 
     val X0:  xReg   and X1:  xReg   and X2:  xReg   and X3: xReg
     and X4:  xReg   and X5:  xReg   and X6:  xReg   and X7: xReg
     and X8:  xReg   and X9:  xReg   and X10: xReg   and X11: xReg
     and X12: xReg   and X13: xReg   and X14: xReg   and X15: xReg
     and X16: xReg   and X17: xReg   and X18: xReg   and X19: xReg
     and X20: xReg   and X21: xReg   and X22: xReg   and X23: xReg
     and X24: xReg   and X25: xReg   and X26: xReg   and X27: xReg
     and X28: xReg   and X29: xReg   and X30: xReg
 
     val V0:  vReg   and V1:  vReg   and V2:  vReg   and V3: vReg
     and V4:  vReg   and V5:  vReg   and V6:  vReg   and V7: vReg
     
     val is32in64: bool and isBigEndian: bool
     
     (* Condition for conditional branches etc. *)
     datatype condition =
         CondEqual            (* Z=1 *)
     |   CondNotEqual         (* Z=0 *)
     |   CondCarrySet         (* C=1 *)
     |   CondCarryClear       (* C=0 *)
     |   CondNegative         (* N=1 *)
     |   CondPositive         (* N=0 imcludes zero *)
     |   CondOverflow         (* V=1 *)
     |   CondNoOverflow       (* V=0 *)
     |   CondUnsignedHigher   (* C=1 && Z=0 *)
     |   CondUnsignedLowOrEq  (* ! (C=1 && Z=0) *)
     |   CondSignedGreaterEq  (* N=V *)
     |   CondSignedLess       (* N<>V *)
     |   CondSignedGreater    (* Z==0 && N=V *)
     |   CondSignedLessEq     (* !(Z==0 && N=V) *)
 
     (* The shift used in arithemtic operations. *)
     and shiftType =
         ShiftLSL of Word8.word
     |   ShiftLSR of Word8.word
     |   ShiftASR of Word8.word
     |   ShiftNone
 
     datatype preg = PReg of int (* A pseudo-register - an abstract register. *)
 
     (* If the value is zero we can use X0/W0. *)
     datatype pregOrZero = SomeReg of preg | ZeroReg
     
     (* A location on the stack.  May be more than word if this is a container or a handler entry. *)
     datatype stackLocn = StackLoc of {size: int, rno: int }
     
     (* This combines pregKind and stackLocn.  *)
     datatype regProperty =
         RegPropGeneral      (* A general register. *)
     |   RegPropUntagged     (* An untagged general register. *)
     |   RegPropStack of int (* A stack location or container. *)
     |   RegPropCacheTagged
     |   RegPropCacheUntagged
     |   RegPropMultiple     (* The result of a conditional or case. May be defined at multiple points. *)
     
     (* The reference to a condition code. *)
     datatype ccRef = CcRef of int
 
     datatype loadType = Load64 | Load32 | Load16 | Load8
     and opSize = OpSize32 | OpSize64
     and logicalOp = LogAnd | LogOr | LogXor
     and callKind = Recursive | ConstantCode of machineWord | FullCall
     and floatSize = Float32 | Double64
     and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic
     and multKind =
         MultAdd32 | MultSub32 | MultAdd64 | MultSub64 |
         SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) |
         SignedMultHigh (* High order part of 64bit*64Bit *)
     and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat
     and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP
 
     (* Function calls can have an unlimited number of arguments so it isn't always
        going to be possible to load them into registers. *)
     datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int }
 
     datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode =
         (* Move the contents of one preg to another.  These are always 64-bits. *)
         MoveRegister of { source: 'genReg, dest: 'genReg }
 
         (* Numerical constant. *)
     |   LoadNonAddressConstant of { source: Word64.word, dest: 'genReg }
 
         (* Address constant. *)
     |   LoadAddressConstant of { source: machineWord, dest: 'genReg }
 
         (* Load a value into a register using a constant, signed, byte offset.  The offset
            is in the range of -256 to (+4095*unit size). *)
     |   LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType }
 
         (* Similarly for FP registers. *)
     |   LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize }
 
         (* Load a value into a register using an index register. *)
     |   LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool }
 
         (* Ditto for FP. *)
     |   LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool }
 
         (* Returns the current thread ID.  Always a 64-bit value.. *)
     |   GetThreadId of { dest: 'genReg }
 
         (* Convert a 32-in-64 object index into an absolute address. *)
     |   ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg }
 
         (* Convert an absolute address into an object index. *)
     |   AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg }
 
         (* Allocate a fixed sized piece of memory and puts the absolute address into dest.
            bytesRequired is the total number of bytes including the length word and any alignment
            necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we
            need to do a garbage collection. *)
     |   AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list }
 
         (* Allocate a piece of memory.  The size argument is an untagged value containing
            the number of words i.e. the same value used for InitialiseMemory and to store
            in the length word. *)
     |   AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list }
 
         (* Initialise a piece of memory by writing "size" copies of the value
            in "init".  N.B. The size is an untagged value containing the
            number of words. *)
     |   InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg }
 
         (* Mark the beginning of a loop.  This is really only to prevent the initialisation code being
            duplicated in ICodeOptimise. *)
     |   BeginLoop
 
         (* Set up the registers for a jump back to the start of a loop. *)
     |   JumpLoop of
             { regArgs: {src: 'genReg fnarg, dst: 'genReg} list,
               stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list,
               checkInterrupt: 'genReg list option }
 
         (* Store a register using a constant, signed, byte offset.  The offset
            is in the range of -256 to (+4095*unit size). *)
     |   StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType }
 
         (* Ditto for FP regs. *)
     |   StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize }
 
         (* Store a register using an index register. *)
     |   StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool }
 
         (* and for FP regs. *)
     |   StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool }
 
         (* Add/Subtract immediate.  The destination is optional in which case XZero is used.
            ccRef is optional.  If it is NONE the version of the instruction that does not generate
            a condition code is used. immed must be < 0wx1000. *)
     |   AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word,
                              isAdd: bool, length: opSize }
 
         (* Add/Subtract register.  As with AddSubImmediate, both the destination and cc are optional. *)
     |   AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option,
                             isAdd: bool, length: opSize, shift: shiftType }
 
         (* Bitwise logical operations.  The immediate value must be a valid bit pattern.  ccRef can
            only be SOME if logOp is LogAnd. *)
     |   LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word,
                               logOp: logicalOp, length: opSize }
 
         (* Register logical operations.  ccRef can only be SOME if logOp is LogAnd.*)
     |   LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option,
                              logOp: logicalOp, length: opSize, shift: shiftType }
 
         (* Shift a word by an amount specified in a register. *)
     |   ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize }
 
         (* The various forms of multiply all take three arguments and the general form is
            dest = M * N +/- A..   *)
     |   Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg }
 
         (* Signed or unsigned division.  Sets the result to zero if the divisor is zero. *)
     |   Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize }
 
         (* Start of function.  Set the register arguments.  stackArgs is the list of
            stack arguments.  If the function has a real closure regArgs includes the
            closure register (X8).  The register arguments include the return register
            (X30). *)
     |   BeginFunction of { regArgs: ('genReg * xReg) list, stackArgs: stackLocn list }
 
         (* Call a function.  If the code address is a constant it is passed here.
            Otherwise the address is obtained by indirecting through X8 which has been loaded
            as one of the argument registers.  The results are stored in the result registers,
            usually just X0.
            The "containers" argument is used to ensure that any container whose address is passed
            as one of the other arguments continues to be referenced until the function is called
            since there's a possibility that it isn't actually used after the function. *)
     |   FunctionCall of
             { callKind: callKind, regArgs: ('genReg fnarg * xReg) list,
               stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list,
               saveRegs: 'genReg list, containers: stackLocn list}
 
         (* Jump to a tail-recursive function.  This is similar to FunctionCall
            but complicated for stack arguments because the stack and the return
            address need to be overwritten.
            stackAdjust is the number of words to remove (positive) or add
            (negative) to the stack before the call.
            currStackSize contains the number of items currently on the stack. *)
     |   TailRecursiveCall of
             { callKind: callKind, regArgs: ('genReg fnarg * xReg) list,
               stackArgs: {src: 'genReg fnarg, stack: int} list,
               stackAdjust: int, currStackSize: int }
 
         (* Return from the function.  resultRegs are the registers containing
            the result,
            returnReg is the preg that contains the return address. *)
     |   ReturnResultFromFunction of { results: ('genReg * xReg) list, returnReg: 'genReg, numStackArgs: int }
 
         (* Raise an exception.  The packet is always loaded into X0. *)
     |   RaiseExceptionPacket of { packetReg: 'genReg }
 
         (* Push a register to the stack.  This is used both for a normal push, copies=1, and
            also to reserve a container. *)
     |   PushToStack of { source: 'genReg, copies: int, container: stackLocn }
 
         (* Load a register from the stack.  The container is the stack location identifier,
            the field is an offset in a container. *)
     |   LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int }
 
         (* Store a value into the stack. *)
     |   StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int }
 
         (* Set the register to the address of the container i.e. a specific offset on the stack. *)
     |   ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int }
 
         (* Remove items from the stack.  Used to remove containers or
            registers pushed to the stack.. *)
     |   ResetStackPtr of { numWords: int }
 
         (* Tag a value by shifting and setting the tag bit. *)
     |   TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize }
 
         (* Shift a value to remove the tag bit.  The cache is used if this is untagging a
            value that has previously been tagged. *)
     |   UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize }
 
         (* Box a largeword value.  Stores a value
            into a byte area.  This can be implemented using AllocateMemoryFixed
            but keeping it separate makes optimisation easier.
            The result is always an address and needs to be converted to an
            object index on 32-in-64. *)
     |   BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list }
 
         (* Load a value from a box.  This can be implemented using a load but
            is kept separate to simplify optimisation.  The source is always
            an absolute address. *)
     |   UnboxLarge of { source: 'genReg, dest: 'genReg }
 
         (* Convert a floating point value into a value suitable for storing
            in the heap.  This normally involves boxing except that 32-bit
            floats can be tagged in native 64-bits. *)
     |   BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list }
 
         (* The reverse of BoxTagFloat. *)
     |   UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg }
 
         (* Load a value with acquire semantics.  This means that any other
            load in this thread after this sees the value of the shared
            memory at this point and not earlier.  This is used for
            references and arrays to ensure that if another thread has
            built a data structure on the heap and then assigns the
            address to a shared ref this thread will see the updated heap
            and not any locally cached previous version. *)
     |   LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType }
 
         (* Store a value with release semantics.  This ensures that any
            other write completes before this operation and works with
            LoadAcquire. *)
     |   StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType }
 
         (* This is a generalised constant shift which includes selection of a
            range of bits. *)
     |   BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word }
 
         (*  Copy a range of bits and insert it into another register.  This is the
             only case where a register functions both as a source and a destination. *)
     |   BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg,
                             length: opSize, immr: word, imms: word }
 
         (* Indexed case. *)
     |   IndexedCaseOperation of { testReg: 'genReg }
 
         (* Exception handling.  - Set up an exception handler. *)
     |   PushExceptionHandler
 
         (* End of a handled section.  Restore the previous handler. *)
     |   PopExceptionHandler
 
         (* Marks the start of a handler.  This sets the stack pointer and
            restores the old handler.  Sets the exception packet register. *) 
     |   BeginHandler of { packetReg: 'genReg }
 
         (* Compare two vectors of bytes and set the condition code on the result.
            The registers are modified by the instruction. *)
     |   CompareByteVectors of
             { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef }
 
         (* Move a block of bytes (isByteMove true) or words (isByteMove false).  The length is the
            number of items (bytes or words) to move. The registers are modified by
            the instruction. *)
     |   BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool }
 
         (* Add or subtract to the system stack pointer and optionally return the new value.
            This is used to allocate and deallocate C space. *)
     |   AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool  }
 
         (* Ensures the value will actually be referenced although it doesn't generate any code. *)
     |   TouchValue of { source: 'genReg }
 
         (* Load a value at the address and get exclusive access.  Always loads a
            64-bit value. *)
     |   LoadAcquireExclusive of { base: 'genReg, dest: 'genReg }
 
         (* Store a value into an address releasing the lock.  Sets the result to
            either 0 or 1 if it succeeds or fails. *)
     |   StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg }
 
         (* Insert a memory barrier. dmb ish. *)
     |   MemoryBarrier
 
         (* Convert an integer to a floating point value. *)
     |   ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize }
 
         (* Convert a floating point value to an integer using the specified rounding mode.
            We could get an overflow here but fortunately the ARM generates a value
            that will cause an overflow when we tag it, provided we tag it explicitly. *)
     |   ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode }
 
         (* Unary floating point.  This includes conversions between float and double. *)
     |   UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary }
 
         (* Binary floating point: addition, subtraction, multiplication and division. *)
     |   BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize }
 
         (* Floating point comparison. *)
     |   CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize }
 
+        (* Yield control during a spin-lock. *)
+    |   CPUYield
+
         (* Debugging - fault if values don't match. *)
     |   CacheCheck of { arg1: 'genReg, arg2: 'genReg }
 
         (* Destinations at the end of a basic block. *)
     and controlFlow =
         (* Unconditional branch to a label - should be a merge point. *)
         Unconditional of int
         (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *)
     |   Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int }
         (* Exit - the last instruction of the block is a return, raise or tailcall. *)
     |   ExitCode
         (* Indexed case - this branches to one of a number of labels *)
     |   IndexedBr of int list
         (* Set up a handler.  This doesn't cause an immediate branch but the state at the
            start of the handler is the state at this point. *)
     |   SetHandler of { handler: int, continue: int }
         (* Unconditional branch to a handler.  If an exception is raised explicitly
            within the scope of a handler. *)
     |   UnconditionalHandle of int
         (* Conditional branch to a handler.  Occurs if there is a call to a
            function within the scope of a handler.  It may jump to the handler. *)
     |   ConditionalHandle of { handler: int, continue: int }
 
     and ('genReg, 'optGenReg, 'fpReg) basicBlock =
             BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow }
     
     (* Return the successor blocks from a control flow. *)
     val successorBlocks: controlFlow -> int list
 
     type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock
     and  iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock
 
     val printICodeAbstract: basicBlockAbstract vector * (string -> unit) -> unit
     and printICodeConcrete: basicBlockConcrete vector * (string -> unit) -> unit
 
     (* Check whether this value is acceptable for LogicalImmediate. *)
     val isEncodableBitPattern: Word64.word * opSize -> bool
 
     (* This generates a  BitField instruction with the appropriate values for immr and imms. *)
     val shiftConstant:
         { direction: shiftDirection, dest: preg, source: preg, shift: word, opSize: opSize } -> iCodeAbstract
     
     structure Sharing:
     sig
         type xReg           = xReg
         and  vReg           = vReg
         and  reg            = reg
         and  condition      = condition
         and  shiftType      = shiftType
         and  ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode
         and  preg           = preg
         and  pregOrZero     = pregOrZero
         and  controlFlow    = controlFlow
         and  ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock
         and  stackLocn      = stackLocn
         and  regProperty    = regProperty
         and  ccRef          = ccRef
         and  'genReg fnarg  = 'genReg fnarg
         and  closureRef     = closureRef
         and  loadType       = loadType
         and  opSize         = opSize
         and  logicalOp      = logicalOp
         and  callKind       = callKind
         and  floatSize      = floatSize
         and  shiftDirection = shiftDirection
         and  multKind       = multKind
         and  fpUnary        = fpUnary
         and  fpBinary       = fpBinary
    end
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig
index 5992e264..0b643099 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig
@@ -1,260 +1,261 @@
 (*
     Copyright (c) 2021-2 David C. J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     Licence version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public Licence for more details.
     
     You should have received a copy of the GNU Lesser General Public
     Licence along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *)
 signature ARM64PREASSEMBLY =
 sig
     type closureRef
     type machineWord = Address.machineWord
 
     (* XZero and XSP are both encoded as 31 but the interpretation
        depends on the instruction 
        The datatype definition is included here to allow for
        pattern matching on XSP and XZero. *)
     datatype xReg = XReg of Word8.word | XZero | XSP
     and vReg = VReg of Word8.word
 
     val X0:  xReg   and X1:  xReg   and X2:  xReg   and X3: xReg
     and X4:  xReg   and X5:  xReg   and X6:  xReg   and X7: xReg
     and X8:  xReg   and X9:  xReg   and X10: xReg   and X11: xReg
     and X12: xReg   and X13: xReg   and X14: xReg   and X15: xReg
     and X16: xReg   and X17: xReg   and X18: xReg   and X19: xReg
     and X20: xReg   and X21: xReg   and X22: xReg   and X23: xReg
     and X24: xReg   and X25: xReg   and X26: xReg   and X27: xReg
     and X28: xReg   and X29: xReg   and X30: xReg
 
     val X_MLHeapLimit: xReg (* ML Heap limit pointer *)
     and X_MLAssemblyInt: xReg (* ML assembly interface pointer. *)
     and X_MLHeapAllocPtr: xReg (* ML Heap allocation pointer. *)
     and X_MLStackPtr: xReg (* ML Stack pointer. *)
     and X_LinkReg: xReg (* Link reg - return address *)
     and X_Base32in64: xReg (* X24 is used for the heap base in 32-in-64. *)
 
     val V0:  vReg   and V1:  vReg   and V2:  vReg   and V3: vReg
     and V4:  vReg   and V5:  vReg   and V6:  vReg   and V7: vReg
 
     (* Condition for conditional branches etc. *)
     datatype condition =
         CondEqual            (* Z=1 *)
     |   CondNotEqual         (* Z=0 *)
     |   CondCarrySet         (* C=1 *)
     |   CondCarryClear       (* C=0 *)
     |   CondNegative         (* N=1 *)
     |   CondPositive         (* N=0 imcludes zero *)
     |   CondOverflow         (* V=1 *)
     |   CondNoOverflow       (* V=0 *)
     |   CondUnsignedHigher   (* C=1 && Z=0 *)
     |   CondUnsignedLowOrEq  (* ! (C=1 && Z=0) *)
     |   CondSignedGreaterEq  (* N=V *)
     |   CondSignedLess       (* N<>V *)
     |   CondSignedGreater    (* Z==0 && N=V *)
     |   CondSignedLessEq     (* !(Z==0 && N=V) *)
 
     val invertTest: condition -> condition (* i.e. jump when the condition is not true. *)
     val condToString: condition -> string
 
     datatype shiftType =
         ShiftLSL of Word8.word
     |   ShiftLSR of Word8.word
     |   ShiftASR of Word8.word
     |   ShiftNone
 
     datatype wordSize = WordSize32 | WordSize64
 
     datatype 'a extend =
         ExtUXTB of 'a (* Unsigned extend byte *)
     |   ExtUXTH of 'a (* Unsigned extend byte *)
     |   ExtUXTW of 'a (* Unsigned extend byte *)
     |   ExtUXTX of 'a (* Left shift *)
     |   ExtSXTB of 'a (* Sign extend byte *)
     |   ExtSXTH of 'a (* Sign extend halfword *)
     |   ExtSXTW of 'a (* Sign extend word *)
     |   ExtSXTX of 'a (* Left shift *)
 
     (* Load/store instructions have only a single bit for the shift.  For byte
        operations this is one bit shift; for others it scales by the size of
        the operand if set. *)
     datatype scale =
         ScaleOrShift
     |   NoScale
 
     datatype loadType = Load64 | Load32 | Load16 | Load8
     and opSize = OpSize32 | OpSize64
     and logicalOp = LogAnd | LogOr | LogXor
     and floatSize = Float32 | Double64
     and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic
     and multKind =
         MultAdd32 | MultSub32 | MultAdd64 | MultSub64 |
         SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) |
         SignedMultHigh (* High order part of 64bit*64Bit *)
     and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat
     and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP
     and unscaledType = NoUpdate | PreIndex | PostIndex
     and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate
     and bitfieldKind = BFUnsigned | BFSigned | BFInsert
     and brRegType = BRRBranch | BRRAndLink | BRRReturn
 
     type label and labelMaker
     val createLabelMaker: unit -> labelMaker
     and createLabel: labelMaker -> label
 
     datatype precode =
         (* Basic instructions *)
         AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool}
     |   SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool}
     |   AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool}
     |   SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool}
     |   AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool}
     |   SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool}
     |   MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind}
     |   DivideRegs of
             {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize}
     |   LogicalShiftedReg of
             {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool}
     |   LoadRegScaled of
             {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType}
     |   LoadFPRegScaled of
             {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize}
     |   StoreRegScaled of
             {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType}
     |   StoreFPRegScaled of
             {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize}
     |   LoadRegUnscaled of
             {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   StoreRegUnscaled of
             {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   LoadFPRegUnscaled of
             {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   StoreFPRegUnscaled of
             {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend}
     |   StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend}
     |   LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend}
     |   StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend}
         (* LoadAcquire and StoreRelease are used for mutables. *)
     |   LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType}
     |   StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType}
         (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *)
     |   LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg}
     |   StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg}
     |   MemBarrier
     |   LoadRegPair of
             { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   StoreRegPair of
             { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   LoadFPRegPair of
             { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   StoreFPRegPair of
             { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   ConditionalSet of
             {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize}
     |   BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind}
     |   ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection}
     |   BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp}
         (* Floating point *)
     |   MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize}
     |   MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize}
     |   CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize}
     |   CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize}
     |   FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary}
     |   FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize}
     |   FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary}
         (* Branches and Labels. *)
     |   SetLabel of label
     |   ConditionalBranch of condition * label
     |   UnconditionalBranch of label
     |   BranchAndLink of label
     |   BranchReg of {regD: xReg, brRegType: brRegType }
     |   LoadLabelAddress of xReg * label
     |   TestBitBranch of { test: xReg, bit: Word8.word, label: label, onZero: bool }
     |   CompareBranch of { test: xReg, label: label, onZero: bool, opSize: opSize }
         (* Composite instructions *)
     |   MoveXRegToXReg of {sReg: xReg, dReg: xReg}
     |   LoadNonAddr of xReg * Word64.word
     |   LoadAddr of xReg * machineWord
     |   RTSTrap of { rtsEntry: int, work: xReg, save: xReg list }
         (* Allocate memory - bytes includes the length word and rounding. *)
     |   AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg }
         (* Allocate memory - sizeReg is number of ML words needed for cell. *)
     |   AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg }
         (* Branch table for indexed case. startLabel is the address of the first label in
            the list.  The branch table is a sequence of unconditional branches. *)
     |   BranchTable of { startLabel: label, brTable: label list }
     |   LoadGlobalHeapBaseInCallback of xReg
+    |   Yield
 
     (* Wrapper for BitField *)
     val shiftConstant: { direction: shiftDirection, regD: xReg, regN: xReg, shift: word, opSize: opSize } -> precode
     
     (* Convenient sequences.  N.B. These are in reverse order. *)
     val boxDouble:
         {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list
     and boxFloat:
         {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list
     and boxSysWord:
         {source: xReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list
 
     (* Create the vector of code from the list of instructions and update the
        closure reference to point to it. *)
     val generateFinalCode:
         {instrs: precode list, name: string, parameters: Universal.universal list, resultClosure: closureRef,
          profileObject: machineWord, labelMaker: labelMaker} -> unit
 
     (* Offsets in the assembly code interface pointed at by X26
        These are in units of 64-bits NOT bytes. *)
     val heapOverflowCallOffset: int
     and stackOverflowCallOffset: int
     and stackOverflowXCallOffset: int
     and exceptionHandlerOffset: int
     and stackLimitOffset: int
     and threadIdOffset: int
     and heapLimitPtrOffset: int
     and heapAllocPtrOffset: int
     and mlStackPtrOffset: int
     and exceptionPacketOffset: int
 
     val is32in64: bool and isBigEndian: bool
 
     val isEncodableBitPattern: Word64.word * wordSize -> bool
 
     structure Sharing:
     sig
         type closureRef = closureRef
         type loadType = loadType
         type opSize = opSize
         type logicalOp = logicalOp
         type floatSize = floatSize
         type shiftDirection = shiftDirection
         type multKind = multKind
         type fpUnary = fpUnary
         type fpBinary = fpBinary
         type unscaledType = unscaledType
         type condSet = condSet
         type bitfieldKind = bitfieldKind
         type brRegType = brRegType
         type precode = precode
         type xReg = xReg
         type vReg = vReg
         type label = label
         type labelMaker = labelMaker
         type condition = condition
         type shiftType = shiftType
         type wordSize = wordSize
         type 'a extend = 'a extend
         type scale = scale
     end
 
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML
index 1ada6681..ba45ae29 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML
@@ -1,1297 +1,1299 @@
 (*
     Copyright David C. J. Matthews 2016-22
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64AllocateRegisters(
     structure Arm64ICode: ARM64ICODE
     structure Identify: ARM64IDENTIFYREFERENCES
     structure IntSet: INTSET
     
     sharing Arm64ICode.Sharing = Identify.Sharing = IntSet
 ): ARM64ALLOCATEREGISTERS =
 struct
     open Arm64ICode
     open Identify
     open IntSet
     
     exception InternalError = Misc.InternalError
 
     val checkCache = false (* Use the cache *)
 
     datatype allocateResult =
         AllocateSuccess of basicBlockConcrete vector
     |   AllocateFailure of intSet list
 
     (* General registers.  X24 is used as the global heap base in 32-in-64. X30 is the
        return address set by blr but is otherwise a general register.
        Put the argument registers at the end of the list so they'll only be
        used when hinted. *)
     val generalRegisters =
         map GenReg ([X9, X10, X11, X12, X13, X14, X15, X19, X20, X21, X22, X23,
                       X0, X1, X2, X3, X4, X5, X6, X7, X8, X30] @ (if is32in64 then [] else [X24]))
 
     val floatingPtRegisters =
         map FPReg [V7, V6, V5, V4, V3, V2, V1]
     type conflictState =
     {
         conflicts: intSet, realConflicts: reg list
     }
     
     type triple = {instr: iCodeAbstract, current: intSet, active: intSet}
 
     exception InternalError = Misc.InternalError
 
     (* Get the conflict states, allocate registers and return the code with the allocated
        registers if it is successful. *)
     fun allocateRegisters{blocks, regProps, maxPRegs, ...} =
     let
         (* Other registers that conflict with this i.e. cannot share the same
            real register. *)
         val regConflicts = Array.array(maxPRegs, emptySet)
         (* Real registers that cannot be used for this because they are needed for
            an instruction.  Only X30 in calls and RTS traps. *)
         and regRealConflicts = Array.array(maxPRegs, []: reg list)
  
         fun addConflictsTo(addTo, conflicts) =
             List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo
 
         (* To reserve a register we need to add the real register to the
            real conflict sets of all the abstract conflicts. *)
         local
             fun isInset reg set = List.exists (fn r => r = reg) set
         in
             fun reserveRegister(reserveFor, reg) =
             let
                 val absConflicts = Array.sub(regConflicts, reserveFor)
                 fun addConflict i =
                     if i = reserveFor then () else addRealConflict (i, reg)
             in
                 List.app addConflict (setToList absConflicts)
             end
 
             and addRealConflict (i, reg) =
             let
                 val currentConflicts = Array.sub(regRealConflicts, i)
             in
                 if isInset reg currentConflicts
                 then ()
                 else Array.update(regRealConflicts, i, reg :: currentConflicts)
             end
         end
       
         fun conflictsForInstr passThrough {instr, current, ...} =
         let
             val {sources, dests} = getInstructionRegisters instr
             fun regNo(PReg i) = i
             val destRegNos = map regNo dests
             and sourceRegNos = map regNo sources
             val destSet = listToSet destRegNos
             val afterRemoveDests = minus(current, destSet)
             
             local
                 (* In almost all circumstances the destination and sources don't
                    conflict and the same register can be used as a destination and
                    a source.  BoxLarge can only store the value after the memory
                    has been allocated.  BitFieldInsert has to copy the "destAsSource"
                    value into the destination so cannot use the same register for
                    the "source". *)
                 val postInstruction =
                     case instr of
                         BoxLarge _ => destRegNos @ sourceRegNos
                     |   BoxTagFloat _ => destRegNos @ sourceRegNos (* Not sure about this. *)
                     |   BitFieldInsert{source, ...} => regNo source :: destRegNos
                     |   _ => destRegNos
             in
                 (* If there is more than one destination they conflict with each other. *)
                 val () = addConflictsTo(postInstruction, listToSet postInstruction);
                 (* Mark conflicts for the destinations, i.e. after the instruction.
                    The destinations conflict with the registers that are used
                    subsequently. *)
                 val () = addConflictsTo(postInstruction, current);
                 val () = addConflictsTo(postInstruction, passThrough);
 
                 (* Mark conflicts for the sources i.e. before the instruction. *)
                 (* Sources must be set up as conflicts with each other i.e. when we
                    come to allocate registers we must choose different real registers
                    for different abstract registers. *)
                 val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos)
                 val () = addConflictsTo(sourceRegNos, afterRemoveDests);
                 val () = addConflictsTo(sourceRegNos, passThrough)
             end
             
             (* I'm not sure if this is needed.  There was a check in the old code to ensure that
                different registers were used for loop variables even if they were actually unused.
                This may happen anyway.
                Comment and code copied from X86 version.  Retain it for the moment. *)
             val () =
                 case instr of
                     JumpLoop{regArgs, ...} =>
                     let
                         val destRegs = List.foldl(fn ({dst=PReg loopReg, ...}, dests) => loopReg :: dests) [] regArgs
                     in
                         addConflictsTo(destRegs, listToSet destRegs);
                         addConflictsTo(destRegs, current);
                         addConflictsTo(destRegs, passThrough)
                     end
                 |   _ => ()
 
             (* Certain instructions are specific as to the real registers. *)
             val () =
                 case instr of
                     ReturnResultFromFunction{ returnReg=PReg retReg, ... } =>
                         (* We're going to put the return value in X0 so we can't use that for
                            the return address. *)
                         addRealConflict(retReg, GenReg X0)
 
                 |   RaiseExceptionPacket{ packetReg } =>
                         (* This wasn't needed previously because we always pushed the registers
                            across an exception. *)
                         reserveRegister(regNo packetReg, GenReg X0)
                 
                 |   BeginHandler { packetReg, ...} =>
                         reserveRegister(regNo packetReg, GenReg X0)
                 
                 |   FunctionCall { dests, regArgs, ...} =>
                         (* This is only needed if we are saving the registers rather
                            than marking them as "must push". *)
                     let
                         val () = List.app(fn (PReg pr, r) => reserveRegister(pr, GenReg r)) dests
                         (* The argument registers also conflict.  In order to execute this call we need to load
                            the arguments into specific registers so we can't use them for values that we want after
                            the call. *)
                         val toReserve = X30 :: List.map #2 regArgs
                     in
                         List.app(fn i => List.app(fn r => addRealConflict(i, GenReg r)) toReserve)
                             (setToList passThrough @ setToList afterRemoveDests)
                     end
 
                     (* We can't use X30 as the result because it's needed for the return addr if we have to GC. *)
                 |   AllocateMemoryFixed{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30)
                 |   AllocateMemoryVariable{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30)
                 |   BoxLarge{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30)
                 |   BoxTagFloat{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) (* Could exclude floats on native addr. *)
 
                 |   _ => ()
         in
             ()
         end
         
         (* Process the block. *)
         fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) =
         let
             (* We need to establish conflicts between all the registers active at
                the end of the block since they may not be established elsewhere.
                This isn't necessary for an unconditional branch since the
                same registers will be included in the block that is the target
                of the branch, possibly along with others.  However if this is
                a conditional or indexed branch we may have different sets at
                each of the targets and we have to ensure that all the registers
                differ. *)
             val united = union(exports, passThrough)
             val () = addConflictsTo(setToList united, united)
 
             val () = List.app (conflictsForInstr passThrough) block
         in
             ()
         end
 
         val () = Vector.app conflictsForBlock blocks
 
         (* Hint values.  The idea of hints is that by using a hinted register
            we may avoid an unnecessary move instruction.  realHints is set when
            a pseudo-register is going to be loaded from a specific register
            e.g. a register argument, or moved into one e.g. X0 for the
            function result.
            friends is set to the other pReg that may be associated
            with the pReg.  Typically this is where we have a merge
            register that we move some value into. *)
         val realHints = Array.array(maxPRegs, NONE: reg option)
 
         (* Sources and destinations.  These indicate the registers that are
            the sources and destinations of the indexing register and are used
            as hints.  If a register has been allocated for a source or destination
            we may be able to reuse it.  *)
         val sourceRegs = Array.array(maxPRegs, []: int list)
         and destinationRegs = Array.array(maxPRegs, []: int list)
         local
             (* Real hints.  If this is the source of a value e.g. a function argument in a register,
                we'll use it directly.  If, though, this is the result of a function and we want the
                result to end up in a specific register we want to propagate it to any pReg that
                moves its value into this. *)
             fun addRealHint(r, reg) =
                 case Array.sub(realHints, r) of
                     SOME _ => ()
                 |   NONE =>
                     (
                         (* Add to this pReg *)
                         Array.update(realHints, r, SOME reg);
                         (* and to any other pReg that moves here. *)
                         List.app(fn r => addRealHint(r, reg)) (Array.sub(sourceRegs, r))
                     )
 
             fun addSourceAndDestinationHint{src, dst} =
             let
                 val conflicts = Array.sub(regConflicts, src)
             in
                 (* If they conflict we can't add them. *)
                 if member(dst, conflicts)
                 then ()
                 else
                 let
                     val currentDests = Array.sub(destinationRegs, src)
                     val currentSources = Array.sub(sourceRegs, dst)
                 in
                     (* Add the destination for this source i.e. the registers we move
                        this source into. *)
                     if List.exists(fn i => i=dst) currentDests
                     then ()
                     else Array.update(destinationRegs, src, dst :: currentDests);
 
                     (* Add the source to the list of sources for this destination.
                        A merge register may have several sources, a different one
                        for each path. If the destination has a real hint we want to
                        propagate that back.  That isn't needed for the destinations
                        because we allocate the registers from the start forward. *)
                     if List.exists(fn i => i=src) currentSources
                     then ()
                     else
                     let
                         val sources = src :: currentSources
                         val () = Array.update(sourceRegs, dst, sources)
                     in
                         case Array.sub(realHints, dst) of
                             NONE => ()
                         |   SOME real => List.app(fn r => addRealHint(r, real)) sources
                     end
                 end
             end
 
             (* Add the hints to steer the register allocation.  The idea is to avoid moves between
                registers by getting values into the appropriate register in advance.
                We don't actually need to add real hints where the real register is providing
                the value, e.g. BeginFunction, because the allocation process will take care of
                that. *)
             fun addHints{instr=MoveRegister{source=PReg sreg, dest=PReg dreg, ...}, ...} =
                     addSourceAndDestinationHint {src=sreg, dst=dreg}
 
             |   addHints{instr=BitFieldInsert{destAsSource=PReg dsReg, dest=PReg dReg, ...}, ...} =
                     (* The "destAsSource" is the destination if some bits are retained. *)
                     addSourceAndDestinationHint {src=dsReg, dst=dReg}
 
             |   addHints{instr=ReturnResultFromFunction { results, ... }, ...} =
                     List.app(fn(PReg pr, r) => addRealHint(pr, GenReg r)) results
 
             |   addHints{instr=JumpLoop{regArgs, ...}, ...} =
                 let
                     fun addRegArg {src=ArgInReg(PReg argReg), dst=PReg resReg} =
                             addSourceAndDestinationHint {dst=resReg, src=argReg}
                     |   addRegArg {src=ArgOnStack _, ...} = ()
                 in
                     List.app addRegArg regArgs
                 end
 
             |   addHints{instr=BeginFunction{regArgs, ...}, ...} =
                     List.app (fn (PReg pr, reg) => addRealHint(pr, GenReg reg)) regArgs
 
             |   addHints{instr=TailRecursiveCall{regArgs, ...}, ...} =
                 let
                     fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = ()
                 in
                     List.app setHint regArgs
                 end
         
             |   addHints{instr=FunctionCall{regArgs, dests, ...}, ...} =
                 let
                     fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = ()
                 in
                     List.app(fn(PReg pr, r) => addRealHint(pr, GenReg r)) dests;
                     List.app setHint regArgs
                 end
 
                 (* Exception packets are in X0 *)
             |   addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0)
             |   addHints{instr=BeginHandler{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0)
 
             |   addHints _ = ()
         in
             val () = Vector.app(fn ExtendedBasicBlock { block, ...} => List.app addHints block) blocks        
         end
 
 
         val allocatedRegs = Array.array(maxPRegs, NONE: reg option)
         val failures = ref []: intSet list ref
 
         (* Find a real register for a preg.
            1.  If a register is already allocated use that.
            2.  Try the "preferred" register if one has been given.
            3.  Try the realHints value if there is one.
            4.  See if there is a "friend" that has an appropriate register
            5.  Look at all the registers and find one. *)
         fun findRegister(r, pref, regSet, cache) =
         case Array.sub(allocatedRegs, r) of
             SOME reg => reg
 
         |   NONE =>
             let
                 val conflicts = Array.sub(regConflicts, r)
                 and realConflicts = Array.sub(regRealConflicts, r)
                 (* Find the registers we've already allocated that may conflict. *)
                 val conflictingRegs =
                     List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @
                         realConflicts
 
                 fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs)
 
                 fun tryAReg NONE = NONE
                 |   tryAReg (somePref as SOME prefReg) =
                         if isFree prefReg
                         then (Array.update(allocatedRegs, r, somePref); somePref)
                         else NONE
 
                 (* Search the sources and destinations to see if a register has
                    already been allocated or there is a hint. *)
                 fun findAFriend([], [], _) = NONE
 
                 |   findAFriend(aDest :: otherDests, sources, alreadySeen) =
                     let
                         val possReg =
                             case Array.sub(allocatedRegs, aDest) of
                                 v as SOME _ => tryAReg v
                             |   NONE => tryAReg(Array.sub(realHints, aDest))
                     in
                         case possReg of
                             reg as SOME _ => reg
                         |   NONE =>
                             let
                                 (* Add the destinations of the destinations to the list
                                    if they don't conflict and haven't been seen. *)
                                 fun newFriend f =
                                     not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts))
                                 val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest))
                             in
                                 findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen)
                             end
                     end
 
                 |   findAFriend([], aSrc :: otherSrcs, alreadySeen) =
                     let
                         val possReg =
                             case Array.sub(allocatedRegs, aSrc) of
                                 v as SOME _ => tryAReg v
                             |   NONE => tryAReg(Array.sub(realHints, aSrc))
                     in
                         case possReg of
                             reg as SOME _ => reg
                         |   NONE =>
                             let
                                 (* Add the sources of the sources to the list
                                    if they don't conflict and haven't been seen. *)
                                 fun newFriend f =
                                     not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts))
                                 val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc))
                             in
                                 findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen)
                             end
                     end
             in
                 case tryAReg pref of
                     SOME r => r
                 |   NONE =>
                     (
                         case tryAReg (Array.sub(realHints, r)) of
                             SOME r => r
                         |   NONE =>
                             (
                                 case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of
                                     SOME r => r
                                     (* Look through the registers to find one that's free.  First try excluding
                                        the cache registers. *)
                                 |   NONE =>
                                     let
                                         (* First try filtering all the cache registers to see if we can find a
                                            register.  If not see if it works by *)
                                         fun filterCache(filteredRegset, []) = List.find isFree filteredRegset
 
                                         |   filterCache(filteredRegset, (cReg, _) :: cache) =
                                             (
                                                 case filterCache(List.filter(fn r => r <> cReg) filteredRegset, cache) of
                                                     NONE => if isFree cReg then SOME cReg else NONE
                                                 |   result => result
                                             )
 
                                        val pick =
                                             case filterCache(regSet, cache) of
                                                 SOME reg => reg
                                             |   NONE =>
                                                 (
                                                     (* This failed.  We're going to have to spill something. *)
                                                     failures := conflicts :: ! failures;
                                                     hd regSet (* Return something to allow this pass to complete *)
                                                 )
                                         val () = Array.update(allocatedRegs, r, SOME pick)
                                     in
                                         pick
                                     end
                             )
                     )
             end
 
         (* Turn the abstract icode into a concrete version by allocating the registers. *)
         local
             fun asGenReg(GenReg reg) = reg | asGenReg _ = raise InternalError "asGenReg"
             and asFPReg(FPReg reg) = reg | asFPReg _ = raise InternalError "asFPReg"
 
             datatype cacheItem =
                 CacheStack of stackLocn             (* A value loaded from the stack. *)
             |   CacheAbsAddress of preg             (* 32-in-64: An absolute address from an object ID *)
             |   CacheAbsAddrOnStack of stackLocn    (* 32-in-64: An absolute address from an object loaded from the stack. *)
 
             (* Cache hints: Try to use the same register for values that can be cached.  This increases
                the chances that we will be able to retain the cache when we merge different branches. *)
             val cacheHints = Array.array(maxPRegs, NONE: reg option)
 
             (* Remove any reference to newly allocated registers from the cache.  Also used after
                block move and comparison that modify registers *)
             fun pruneCache(reg: reg, cache) = List.filter(fn (r, _) => r <> reg) cache
 
             (* Return the cache registers that contain valid addresses. *)
             fun cachedAddressRegs cache = List.map (asGenReg o #1) cache
 
             (* Merge the cache states *)
             fun mergeCacheStates ([]: (reg * cacheItem) list list) = []: (reg * cacheItem) list
             |   mergeCacheStates [single] = single
         
             |   mergeCacheStates (many as first :: rest) =
                 let
                     (* Generally we will either be unable to merge and have an empty cache or
                        will have just one or two entries. *)
                     (* Find the shortest.  If it's empty we're done. *)
                     fun findShortest(_, [], _) = []
                     |   findShortest(_, shortest, []) = shortest
                 
                     |   findShortest(len, shortest, hd::tl) =
                         let
                             val hdLen = List.length hd
                         in
                             if hdLen < len then findShortest(hdLen, hd, tl) else findShortest(len, shortest, tl)
                         end
                     val shortest = findShortest(List.length first, first, rest)
                 
                     (* Find the item we're caching for.  If it is in a different register we
                        can't use it. *)
                     fun findItem search (hd::tl) = search = hd orelse findItem search tl
                     |   findItem _ [] = false
                 
                     (* It's present if it's in all the sources. *)
                     fun present search = List.all(findItem search) many
                 
                     val filtered =
                         List.foldl (fn (search, l) => if present search then search :: l else l) [] shortest
                 in
                     filtered
                 end
 
             fun allocateNewDestination(PReg r, pref, regSet, cacheList) =
                 case Array.sub(allocatedRegs, r) of
                     SOME reg =>
                         (
                             case Vector.sub(regProps, r) of
                                 RegPropMultiple => (reg, pruneCache(reg, cacheList)) (* This is allowed for merge registers *)
                             |   _ => raise InternalError "Register defined at multiple points"
                         )
                 |   NONE =>
                     let
                         val reg = findRegister(r, pref, regSet, cacheList)
                     in
                         (reg, pruneCache(reg, cacheList))
                     end
 
             fun allocateGenReg(r, hint, cache) =
             let
                 val (reg, newCache) = allocateNewDestination(r, hint, generalRegisters, cache)
             in
                 (asGenReg reg, newCache)
             end
             and allocateFPReg(r, hint, cache) =
             let
                 val (reg, newCache) = allocateNewDestination(r, hint, floatingPtRegisters, cache)
             in
                 (asFPReg reg, newCache)
             end
 
             and allocateGenRegOrZero(ZeroReg, _, cache) = (XZero, cache)
             |   allocateGenRegOrZero(SomeReg reg, hint, cache) = allocateGenReg(reg, hint, cache)
 
             fun getAllocatedGenReg(PReg r) =
                 case Array.sub(allocatedRegs, r) of SOME(GenReg reg) => reg | _ => raise InternalError "getAllocatedGenReg"
             and getAllocatedFPReg(PReg r) =
                 case Array.sub(allocatedRegs, r) of SOME(FPReg reg) => reg | _ => raise InternalError "getAllocatedFPReg"
 
             fun getAllocatedGenRegOrZero ZeroReg = XZero
             |   getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg
 
             fun getAllocatedArg(ArgInReg reg) = ArgInReg(getAllocatedGenReg reg)
             |   getAllocatedArg(ArgOnStack stackLoc) = ArgOnStack stackLoc
 
             val getSaveRegs = List.map getAllocatedGenReg
 
             (* Reversed cons and append to make the code easier to read. *)
             infix 5 <::> <@>
             fun tl <::> hd = hd :: tl
             (*and snd <@> fst = fst @ snd*)
 
             fun absToConcrete([], context, code) = (context, code)
 
             |   absToConcrete({instr=MoveRegister{ source, dest}, ...} :: rest, cache, code) =
                 let
                     (* Try to use the register we've allocated for the source as the destination so
                        that we can eliminate this instruction altogether. *)
                     val sourceReg = getAllocatedGenReg source
                     val (destReg, newCache) = allocateGenReg(dest, SOME(GenReg sourceReg), cache)
                 in
                     if sourceReg = destReg
                     then absToConcrete(rest, newCache, code)
                     else absToConcrete(rest, newCache, code <::> MoveRegister { source=sourceReg, dest=destReg})
                 end
 
             |   absToConcrete({instr=LoadNonAddressConstant { dest, source}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> LoadNonAddressConstant { dest=destReg, source=source})
                 end
 
             |   absToConcrete({instr=LoadAddressConstant { dest, source}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> LoadAddressConstant { dest=destReg, source=source})
                 end
 
             |   absToConcrete({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         LoadWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, loadType=loadType})
                 end
 
             |   absToConcrete({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateFPReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         LoadFPWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, floatSize=floatSize})
                 end
 
             |   absToConcrete({instr=LoadWithIndexedOffset { base, dest, index, loadType, signExtendIndex}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         LoadWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index,
                                                 loadType=loadType, signExtendIndex=signExtendIndex})
                 end
 
             |   absToConcrete({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize, signExtendIndex}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateFPReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         LoadFPWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index,
                                                   floatSize=floatSize, signExtendIndex=signExtendIndex})
                 end
 
             |   absToConcrete({instr=GetThreadId { dest}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> GetThreadId { dest=destReg})
                 end
 
             |   absToConcrete({instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest=destOiA as PReg doia}, kill, ...}
                                     :: rest, cache, code) =
                 let
                     (* See if this is in the cache and use it if it is.  If this is the last reference to this
                        source entry we don't want it in the cache any longer. *)
                     val killThis = member(srcNo, kill)
                     val (newCode, destReg, newCache, next) =
                         case List.find(fn (_, CacheAbsAddress c) => c=source | _ => false) cache of
                             SOME (srcReg, _) =>
                             let
                                 (* Try to use the cache register as the destination if we can. *)
                                 val (destReg, newCache) = allocateNewDestination(destOiA, SOME srcReg, generalRegisters, cache)
                                 val dReg = asGenReg destReg
                                 and sReg = asGenReg srcReg
                             in
                                 if checkCache
                                 then (code <::> MoveRegister{source=sReg, dest=X17} <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=dReg}
                                         <::> CacheCheck{ arg1=dReg, arg2=X17 },
                                      destReg, if killThis then pruneCache(srcReg, newCache) else newCache, rest)
                                 else if dReg = sReg
                                 then (code, destReg, newCache, rest) (* We will have pruned this since it's the destination. *)
                                 else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg,
                                         if killThis then pruneCache(srcReg, newCache) else newCache, rest)
                             end
 
                         |   NONE =>
                                 (* If this is the last reference and the next instruction is loading with a zero
                                    offset we can use an indexed load and avoid converting to an absolute address.
                                    If this is not the last reference it's likely that we're loading another field
                                    so it's probably better to convert the object index and cache it.  We might
                                    manage to use a load-pair instruction. *)
                             (
                                 case (killThis, rest) of
                                     (true, {instr=LoadWithConstantOffset{ byteOffset=0, loadType=Load32, base, dest=destLoad, ... },
                                             kill=killLoad, ...} :: next) =>
                                     if base = destOiA (* of objectindex *) andalso member(doia, killLoad)
                                     then
                                     let
                                         val (destReg, newCache) = allocateGenReg(destLoad, NONE, cache)
                                     in
                                         (code <::> LoadWithIndexedOffset{ base=X24(*X_Base32in64*), dest=destReg,
                                                         index=getAllocatedGenReg source, loadType=Load32, signExtendIndex=false },
                                                         GenReg destReg, newCache, next)
                                     end
                                     else
                                     let
                                         val (destReg, newCache) = allocateGenReg(destOiA, Array.sub(cacheHints, srcNo), cache)
                                     in
                                         (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg},
                                          GenReg destReg, newCache, rest)
                                     end
                                     
                                 |   _ =>
                                     let
                                         val (destReg, newCache) = allocateGenReg(destOiA, Array.sub(cacheHints, srcNo), cache)
                                     in
                                         (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg},
                                          GenReg destReg, newCache, rest)
                                     end
                             )
                     val () = if killThis then () else Array.update(cacheHints, srcNo, SOME destReg)
                 in
                     absToConcrete(next, if killThis then newCache else (destReg, CacheAbsAddress source) :: newCache, newCode)
                 end
 
             |   absToConcrete({instr=AbsoluteToObjectIndex { source, dest}, ...} :: rest, cache, code) =
                 let
                     (* Don't make an entry in the cache for this; it won't be used. *)
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> AbsoluteToObjectIndex { source=getAllocatedGenReg source, dest=destReg})
                 end
 
             |   absToConcrete({instr=AllocateMemoryFixed { bytesRequired, dest, saveRegs }, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                     val filteredCache = pruneCache(GenReg X30, newCache)
                     val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs
                 in
                     absToConcrete(rest, filteredCache, code <::>
                         AllocateMemoryFixed { dest=destReg, bytesRequired=bytesRequired, saveRegs=saved})
                 end
 
             |   absToConcrete({instr=AllocateMemoryVariable{size, dest, saveRegs}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                     val filteredCache = pruneCache(GenReg X30, newCache)
                     val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs
                 in
                     absToConcrete(rest, filteredCache, code <::>
                         AllocateMemoryVariable{size=getAllocatedGenReg size, dest=destReg, saveRegs=saved})
                 end
 
             |   absToConcrete({instr=InitialiseMem{size, addr, init}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::> InitialiseMem{size=getAllocatedGenReg size, addr=getAllocatedGenReg addr, init=getAllocatedGenReg init})
 
             |   absToConcrete({instr=BeginLoop, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> BeginLoop)
 
             |   absToConcrete({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...} :: rest, context, code) =
                 let
                     fun getStackArg{src, wordOffset, stackloc} =
                             {src=getAllocatedArg src, wordOffset=wordOffset, stackloc=stackloc}
                     and getRegArg{src, dst} = {src=getAllocatedArg src, dst=getAllocatedGenReg dst}
                 in
                     absToConcrete(rest, context, code <::> JumpLoop{ regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs,
                               checkInterrupt=Option.map getSaveRegs checkInterrupt})
                 end
 
             |   absToConcrete({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::>
                         StoreWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, byteOffset=byteOffset, loadType=loadType})
 
             |   absToConcrete({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::>
                         StoreFPWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, byteOffset=byteOffset, floatSize=floatSize})
 
             |   absToConcrete({instr=StoreWithIndexedOffset { base, source, index, loadType, signExtendIndex}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::>
                         StoreWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, index=getAllocatedGenReg index,
                                                 loadType=loadType, signExtendIndex=signExtendIndex})
 
             |   absToConcrete({instr=StoreFPWithIndexedOffset { base, source, index, floatSize, signExtendIndex}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::>
                         StoreFPWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, index=getAllocatedGenReg index,
                                                   floatSize=floatSize, signExtendIndex=signExtendIndex})
 
             |   absToConcrete({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         AddSubImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef,
                                                  immed=immed, isAdd=isAdd, length=length})
                 end
 
             |   absToConcrete({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         AddSubRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted,
                                      dest=destReg, ccRef=ccRef,
                                      isAdd=isAdd, length=length, shift=shift})
                 end
 
             |   absToConcrete({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         LogicalImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef,
                                                   immed=immed, logOp=logOp, length=length})
                 end
 
             |   absToConcrete({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         LogicalRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted,
                                       dest=destReg, ccRef=ccRef,
                                       logOp=logOp, length=length, shift=shift})
                 end
 
             |   absToConcrete({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         ShiftRegister { source=getAllocatedGenReg source, shift=getAllocatedGenReg shift, dest=destReg,
                                                direction=direction, opSize=opSize})
                 end
 
             |   absToConcrete({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         Multiplication { kind=kind, sourceA=getAllocatedGenRegOrZero sourceA, sourceM=getAllocatedGenReg sourceM,
                                      sourceN=getAllocatedGenReg sourceN, dest=destReg})
                 end
 
             |   absToConcrete({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         Division { isSigned=isSigned, dividend=getAllocatedGenReg dividend, divisor=getAllocatedGenReg divisor,
                                dest=destReg, opSize=opSize})
                 end
 
             |   absToConcrete({instr=BeginFunction {regArgs, stackArgs}, ...} :: rest, _, code) =
                 let
                     (* Allocate the register arguments.  At this point all the registers are free and the cache is empty.
                        However we may have a "real conflict" that means that the allocated register is different.  e.g.
                        we need this argument some time after an arbitrary precision operation that may call a
                        function. *)
                     fun allocReg(src, dst) =
                     let
                         val (destReg, _) = allocateNewDestination(src, SOME(GenReg dst), generalRegisters, [])
                     in
                         (asGenReg destReg, dst)
                     end
                 in
                     absToConcrete(rest, [], code <::>
                         BeginFunction {regArgs=map allocReg regArgs, stackArgs=stackArgs})
                 end
 
             |   absToConcrete({instr=FunctionCall{callKind, regArgs, stackArgs, dests, containers, saveRegs, ...}, ...} :: rest, _, code) =
                 let
                     fun getRegArg(src, dst) = (getAllocatedArg src, dst)
                     fun getResult(preg, reg) =
                     let
                         (* We empty the cache at this point. *)
                         val (newReg, _) = allocateGenReg(preg, NONE, [])
                     in
                         (newReg, reg)
                     end
                 in
                     absToConcrete(rest, [] (* Empty after a function call. *), code <::>
                         FunctionCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getAllocatedArg stackArgs,
                                   dests=map getResult dests, saveRegs=getSaveRegs saveRegs, containers=containers})
                 end
 
             |   absToConcrete({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, ...} :: rest, context, code) =
                 let
                     fun getRegArg(src, dst) = (getAllocatedArg src, dst)
                     and getStackArg{src, stack} = {src=getAllocatedArg src, stack=stack}
                 in
                     absToConcrete(rest, context, code <::>
                         TailRecursiveCall{ callKind=callKind, regArgs=map getRegArg regArgs,
                             stackArgs=map getStackArg stackArgs, stackAdjust=stackAdjust, currStackSize=currStackSize})
                 end
 
             |   absToConcrete({instr=ReturnResultFromFunction{results, returnReg, numStackArgs}, ...} :: rest, context, code) =
                 let
                     fun getResult(preg, reg) = (getAllocatedGenReg preg, reg)
                 in
                     absToConcrete(rest, context, code <::>
                         ReturnResultFromFunction{results=map getResult results, returnReg=getAllocatedGenReg returnReg, numStackArgs=numStackArgs})
                 end
 
             |   absToConcrete({instr=RaiseExceptionPacket{packetReg}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::> RaiseExceptionPacket{packetReg=getAllocatedGenReg packetReg})
 
             |   absToConcrete({instr=PushToStack{ source, container as StackLoc{size, rno}, copies }, ...} :: rest, cache, code) =
                 let
                     val srcReg = getAllocatedGenReg source
                     val newCache =
                         if size = 1 then (GenReg srcReg, CacheStack container) :: cache else cache
                     val () = Array.update(cacheHints, rno, SOME(GenReg srcReg))
                 in
                     absToConcrete(rest, newCache, code <::> PushToStack{source=srcReg, container=container, copies=copies})
                 end
 
             |   absToConcrete({instr=LoadStack{ dest=destLoad, container as StackLoc{rno, ...} , field=0, wordOffset}, kill, ...} :: 
                               (restPlusOia as {instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest=destOia}, kill=killOia, ...} :: rest), cache, code) =
                 (* If a preg has been pushed to the stack every subsequent reference will be via the stack.  If we want to
                    be able to cache object index to absolute addresses for them we have to recognise this combination. *)
                 (* They could be unrelated in which case process the LoadStack and then the ObjectIndex...
                         It seems there are also rare circumstances(??) where the result of the load is not
                         killed and so would have to be preserved. *)
                 if destLoad = source andalso member(srcNo, killOia)
                 then
                 let
                     val killThis = member(rno, kill) (* Is it the last reference to the stack entry? *)
 
                     val (newCode, destReg, newCache) =
                         case List.find(fn (_, CacheAbsAddrOnStack c) => c=container | _ => false) cache of
                             SOME (srcReg, _) =>
                             let
                                 (* Try to use the cache register as the destination if we can. *)
                                 val (destReg, newCache) = allocateNewDestination(destOia, SOME srcReg, generalRegisters, cache)
                                 val dReg = asGenReg destReg
                                 and sReg = asGenReg srcReg
                             in
                                 if checkCache
                                 then (code <::> MoveRegister{source=sReg, dest=X17} <::> LoadStack{ dest=X16, container=container, field=0, wordOffset=wordOffset } <::>
                                         ObjectIndexAddressToAbsolute { source=X16, dest=dReg} <::> CacheCheck{ arg1=dReg, arg2=X17 }, destReg,
                                         if killThis then pruneCache(srcReg, newCache) else newCache)
                                 else if dReg = sReg
                                 then (code, destReg, newCache) (* We will have pruned this since it's the destination. *)
                                 else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg,
                                         if killThis then pruneCache(srcReg, newCache) else newCache)
                             end
 
                         |   NONE => (* It's not in the cache - load it which could be cached. *)
                             let
                                 val (cachePostLoad, loadCode) =
                                     processLoadStack(destLoad, container, wordOffset, kill, cache, code)
                                 val (destReg, cachePlusOia) = allocateGenReg(destOia, Array.sub(cacheHints, rno), cachePostLoad)
                             in
                                 (loadCode <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg},
                                  GenReg destReg, cachePlusOia)
                             end
                     val () = if killThis then () else Array.update(cacheHints, rno, SOME destReg)
                 in
                     absToConcrete(rest, if killThis then newCache else (destReg, CacheAbsAddrOnStack container) :: newCache, newCode)
                 end
                 else (* Can't combine these. *)
                 let
                     val (newCache, newCode) = processLoadStack(destLoad, container, wordOffset, kill, cache, code)
                 in
                     absToConcrete(restPlusOia, newCache, newCode)
                 end
 
             |   absToConcrete({instr=LoadStack{ dest, container, wordOffset, field=0, ...}, kill, ...} :: rest, cache, code) =
                 let
                     val (newCache, newCode) = processLoadStack(dest, container, wordOffset, kill, cache, code)
                 in
                     absToConcrete(rest, newCache, newCode)
                 end
 
             |   absToConcrete({instr=LoadStack{ dest, container, field, wordOffset}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> LoadStack{ dest=destReg, container=container, field=field, wordOffset=wordOffset })
                 end
 
             |   absToConcrete({instr=StoreToStack{source, container, field, stackOffset}, ...} :: rest, cache, code) =
                 (* We may have cached the original push that cleared the container.  We could cache this since it now
                    contains the entry but it's probably better to deal with multiple results at a higher level. *)
                 let
                     val sReg = getAllocatedGenReg source
                     val newCache =
                         List.filter(fn (_, CacheStack c) => c <> container | (_, CacheAbsAddrOnStack c) => c <> container | _ => true) cache
                 in
                     absToConcrete(rest, newCache, code <::>
                         StoreToStack{source=sReg, container=container, field=field, stackOffset=stackOffset})
                 end
 
             |   absToConcrete({instr=ContainerAddress{ dest, container, stackOffset}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         ContainerAddress{ dest=destReg, container=container, stackOffset=stackOffset })
                 end
 
             |   absToConcrete({instr=ResetStackPtr {numWords}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::> ResetStackPtr {numWords=numWords})
 
             |   absToConcrete({instr=TagValue{source, dest, isSigned, opSize}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         TagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize})
                 end
 
             |   absToConcrete({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         UntagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize})
                 end
 
             |   absToConcrete({instr=BoxLarge{source, dest, saveRegs, ...}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                     val filteredCache = pruneCache(GenReg X30, newCache)
                     val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs
                 in
                     absToConcrete(rest, filteredCache, code <::>
                         BoxLarge{source=getAllocatedGenReg source, dest=destReg, saveRegs=saved})
                 end
 
             |   absToConcrete({instr=UnboxLarge{source, dest}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> UnboxLarge{source=getAllocatedGenReg source, dest=destReg})
                 end
 
             |   absToConcrete({instr=BoxTagFloat{floatSize, source, dest, saveRegs}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                     val filteredCache = pruneCache(GenReg X30, newCache)
                     val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs
                 in
                     absToConcrete(rest, filteredCache, code <::>
                         BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, dest=destReg, saveRegs=saved})
                 end
 
             |   absToConcrete({instr=UnboxTagFloat{floatSize, source, dest}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateFPReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> UnboxTagFloat{floatSize=floatSize, source=getAllocatedGenReg source, dest=destReg})
                 end
 
             |   absToConcrete({instr=LoadAcquire { base, dest, loadType}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> LoadAcquire { base=getAllocatedGenReg base, dest=destReg, loadType=loadType})
                 end
 
             |   absToConcrete({instr=StoreRelease { base, source, loadType}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::> StoreRelease{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, loadType=loadType})
 
             |   absToConcrete({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> BitFieldShift { source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned,
                                     immr=immr, imms=imms, length=length})
                 end
 
             |   absToConcrete({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...} :: rest, cache, code) =
                 let
                     val destAsSourceReg = getAllocatedGenReg destAsSource
                     val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg destAsSourceReg), generalRegisters, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         BitFieldInsert { source=getAllocatedGenReg source, destAsSource=destAsSourceReg, dest=asGenReg destReg,
                                      immr=immr, imms=imms, length=length})
                 end
 
             |   absToConcrete({instr=IndexedCaseOperation{testReg}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::> IndexedCaseOperation{testReg=getAllocatedGenReg testReg})
 
             |   absToConcrete({instr=PushExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PushExceptionHandler)
 
             |   absToConcrete({instr=PopExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PopExceptionHandler)
 
             |   absToConcrete({instr=BeginHandler{packetReg}, ...} :: rest, _, code) =
                 let
                     (* The cache is undefined at the start of a handler. *)
                     val (destReg, newCache) = allocateGenReg(packetReg, NONE, [])
                 in
                     absToConcrete(rest, newCache, code <::> BeginHandler{packetReg=destReg})
                 end
 
             |   absToConcrete({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...} :: rest, cache, code) =
                 let
                     (* This instruction modifies these registers so they must be removed from the cache *)
                     val vec1Reg = getAllocatedGenReg vec1Addr
                     and vec2Reg = getAllocatedGenReg vec2Addr
                     and lenReg = getAllocatedGenReg length
                     val newCache = pruneCache(GenReg vec1Reg, pruneCache(GenReg vec2Reg, pruneCache(GenReg lenReg, cache)))
                 in
                     absToConcrete(rest, newCache, code <::> CompareByteVectors{vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lenReg, ccRef=ccRef})
                 end
 
             |   absToConcrete({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...} :: rest, cache, code) =
                 let
                     (* This instruction modifies these registers so they must be removed from the cache *)
                     val srcAReg = getAllocatedGenReg srcAddr
                     and dstAReg = getAllocatedGenReg destAddr
                     and lenReg = getAllocatedGenReg length
                     val newCache = pruneCache(GenReg srcAReg, pruneCache(GenReg dstAReg, pruneCache(GenReg lenReg, cache)))
                 in
                     absToConcrete(rest, newCache, code <::> BlockMove{srcAddr=srcAReg, destAddr=dstAReg, length=lenReg, isByteMove=isByteMove})
                 end
 
             |   absToConcrete({instr=AddSubXSP{source, dest, isAdd}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> AddSubXSP { source=getAllocatedGenReg source, dest=destReg, isAdd=isAdd})
                 end
 
             |   absToConcrete({instr=TouchValue{source}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::> TouchValue { source=getAllocatedGenReg source})
 
             |   absToConcrete({instr=LoadAcquireExclusive{ base, dest }, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> LoadAcquireExclusive { base=getAllocatedGenReg base, dest=destReg})
                 end
 
             |   absToConcrete({instr=StoreReleaseExclusive{ base, source, result }, ...} :: rest, cache, code) =
                 let
                     val (resultReg, newCache) = allocateGenReg(result, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> StoreReleaseExclusive{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source,
                                            result=resultReg})
                 end
 
             |   absToConcrete({instr=MemoryBarrier, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> MemoryBarrier)
 
             |   absToConcrete({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateFPReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         ConvertIntToFloat{ source=getAllocatedGenReg source, dest=destReg, srcSize=srcSize, destSize=destSize})
                 end
 
             |   absToConcrete({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateGenReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::>
                         ConvertFloatToInt{ source=getAllocatedFPReg source, dest=destReg, srcSize=srcSize,
                                        destSize=destSize, rounding=rounding})
                 end
 
             |   absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateFPReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> UnaryFloatingPt{ source=getAllocatedFPReg source, dest=destReg, fpOp=fpOp})
                 end
 
             |   absToConcrete({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...} :: rest, cache, code) =
                 let
                     val (destReg, newCache) = allocateFPReg(dest, NONE, cache)
                 in
                     absToConcrete(rest, newCache, code <::> BinaryFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2,
                                          dest=destReg, fpOp=fpOp, opSize=opSize})
                 end
 
             |   absToConcrete({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...} :: rest, context, code) =
                     absToConcrete(rest, context, code <::>
                         CompareFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, opSize=opSize, ccRef=ccRef})
 
+            |   absToConcrete({instr=CPUYield, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> CPUYield)
+
             |   absToConcrete({instr=CacheCheck _, ...} :: _, _, _) =  (* Concrete only. *)
                     raise InternalError "absToConcrete: CheckCache"
 
             (* LoadStack. *)
             and processLoadStack(dest, container as StackLoc{rno, ...}, wordOffset, kill, cache, code) =
             let
                 (* See if this is in the cache and use it if it is.  If this is the last reference to this
                    stack entry we don't want it in the cache any longer. *)
                 val killThis = member(rno, kill)
 
                 val (newCode, destReg, newCache) =
                     case List.find(fn (_, CacheStack c) => c=container | _ => false) cache of
                         SOME (srcReg, _) =>
                         let
                             val (destReg, newCache) = allocateNewDestination(dest, SOME srcReg, generalRegisters, cache)
                             val dReg = asGenReg destReg
                             and sReg = asGenReg srcReg
                         in
                             if checkCache
                             then (code <::> MoveRegister{source=sReg, dest=X17} <::> LoadStack{ dest=dReg, container=container, field=0, wordOffset=wordOffset }
                                 <::> CacheCheck{ arg1=dReg, arg2=X17 }, destReg, if killThis then pruneCache(srcReg, newCache) else newCache)
                             else if dReg = sReg andalso false
                             then (code, destReg, newCache) (* We will have pruned this since it's the destination. *)
                             else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg,
                                     if killThis then pruneCache(srcReg, newCache) else newCache)
                         end
 
                     |   NONE =>
                         let
                             val (destReg, newCache) = allocateGenReg(dest, Array.sub(cacheHints, rno), cache)
                         in
                             (code <::> LoadStack{ dest=destReg, container=container, field=0, wordOffset=wordOffset },
                              GenReg destReg, newCache)
                         end
                 val () = if killThis then () else Array.update(cacheHints, rno, SOME destReg)
             in
                 (if killThis then newCache else (destReg, CacheStack container) :: newCache, newCode)
             end
 
         in
             fun concreteBlock(ExtendedBasicBlock{ block, ...}, inputCache) =
             let
                 val (cache, code) = absToConcrete(block, inputCache, [])
             in
                 {cache=cache, code=List.rev code}
             end
             
             val mergeCacheStates = mergeCacheStates
         end
 
         val numBlocks = Vector.length blocks
         (* The results.  The cache state is initialised to empty so that if we
            have a loop we will end up with an empty input cache. *)
         val resultArray = Array.array(numBlocks, {code=[], cache=[]})
 
         (* Process the blocks in execution order so that normally we will be able to
            propagate the cache states.  If we have a loop the input cache state will
            be empty because the output cache state for an unprocessed block is empty. *)
         (* Get the blocks that are inputs for each one. *)
         local
             val blockRefs = Array.array(numBlocks, [])
 
             (* The successors of this block but only including handlers in SetHandler. *)
             fun directSuccessors ExitCode = []
             |   directSuccessors(IndexedBr cases) = cases
             |   directSuccessors(Unconditional dest) = [dest]
             |   directSuccessors(Conditional {trueJump, falseJump, ...}) = [falseJump, trueJump]
             |   directSuccessors(SetHandler { handler, continue }) = [handler, continue]
             |   directSuccessors(UnconditionalHandle _) = []
             |   directSuccessors(ConditionalHandle { continue, ...}) = [continue]
             
             fun setReferences fromBlock =
             let
                 val ExtendedBasicBlock{ flow, ...} = Vector.sub(blocks, fromBlock)
                 val refs = directSuccessors flow
                 fun setRefs toBlock =
                 let
                     val oldRefs = Array.sub(blockRefs, toBlock)
                 in
                     Array.update(blockRefs, toBlock, fromBlock :: oldRefs);
                     if null oldRefs
                     then setReferences toBlock
                     else ()
                 end
             in
                 List.app setRefs refs
             end
 
             val () = setReferences 0
         in
             val directSuccessors = directSuccessors
             val blockRefs = blockRefs
         end
 
         val processed = Array.array(numBlocks, false)
         fun haveProcessed n = Array.sub(processed, n)
            
         fun processBlocks (toDo: int list) =
             case List.filter (fn n => not(haveProcessed n)) toDo of
                 [] => ()
             |   stillToDo as head :: _ =>
                 let
                     (* Try to find a block all of whose predecessors have been processed.  That
                        increases the chances that we will have cached items. *)
                     fun available dest = List.all haveProcessed (Array.sub(blockRefs, dest))
                     val blockNo =
                         case List.find available stillToDo of
                             SOME c => c
                         |   NONE => head
                     val thisBlock as ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, blockNo)
                     (* Get the input cache state.  Take the list of output caches of everything that
                        jumps here and produce the intersection. *)
                     val inputCacheList =
                         List.map (fn n => #cache(Array.sub(resultArray, n))) (Array.sub(blockRefs, blockNo))
                     val inputCache = mergeCacheStates inputCacheList
                     val inputCache = [] (* Temporarily *)
                     (* Process this block and add it to the results. *)
                     val () = Array.update(processed, blockNo, true)
                     val () = Array.update(resultArray, blockNo, concreteBlock(thisBlock, inputCache))
 
                     (* Add the successors but with handlers only included in SetHandler. *)
                     val addSet = directSuccessors flow
                 in
                     processBlocks(addSet @ stillToDo)
                 end
     in
         processBlocks [0];
         (* If the failures list is empty we succeeded. *)
         case !failures of
             [] =>  (* Return the allocation vector.  We may have unused registers, *)
                 AllocateSuccess(
                     Vector.mapi(fn (i, ExtendedBasicBlock{ flow, ...}) =>
                         BasicBlock{block= #code(Array.sub(resultArray, i)), flow=flow}) blocks )
             (* Else we'll have to spill something. *)
         |   l => AllocateFailure l
     end
 
     val nGenRegs = List.length generalRegisters
 
     structure Sharing =
     struct
         type intSet             = intSet
         and extendedBasicBlock  = extendedBasicBlock
         and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock
         and regProperty         = regProperty
         and reg                 = reg
         and xReg                = xReg
         and vReg                = vReg
         and allocateResult      = allocateResult
     end
 
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML
index 13308b20..d3fc2713 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML
@@ -1,3198 +1,3199 @@
 (*
     Copyright David C. J. Matthews 2021-2
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64CodetreeToICode(
     structure BackendTree: BACKENDINTERMEDIATECODE
     structure Arm64ICode: ARM64ICODE
     structure Debug: DEBUG
     structure Arm64Foreign: FOREIGNCALL
     structure ICodeTransform: ARM64ICODETRANSFORM
     structure CodeArray: CODEARRAY
     structure Pretty:PRETTY
 
     sharing Arm64ICode.Sharing = ICodeTransform.Sharing = CodeArray.Sharing = BackendTree.Sharing
 ): GENCODE =
 struct
     open BackendTree
     open Address
     open Arm64ICode
     open CodeArray
     open BuiltIns
 
     (* Reversed cons and append to make the code easier to read. *)
     infix 5 <::> <@>
     fun tl <::> hd = hd :: tl
     and snd <@> fst = fst @ snd
 
     type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock
     
     exception InternalError = Misc.InternalError
     
     fun taggedWord64 w: Word64.word = w * 0w2 + 0w1
     and taggedWord w: word = w * 0w2 + 0w1
     
     datatype blockStruct =
         BlockSimple of iCodeAbstract
     |   BlockExit of iCodeAbstract
     |   BlockLabel of int
     |   BlockFlow of controlFlow
     |   BlockBegin of { regArgs: (preg * xReg) list, stackArgs: stackLocn list }
     |   BlockRaiseAndHandle of iCodeAbstract * int
     |   BlockOptionalHandle of {call: iCodeAbstract, handler: int, label: int }
 
 
     val moveRegister = BlockSimple o MoveRegister
     and loadNonAddressConstant = BlockSimple o LoadNonAddressConstant
     and loadAddressConstant = BlockSimple o LoadAddressConstant
     and loadWithConstantOffset = BlockSimple o LoadWithConstantOffset
     and loadFPWithConstantOffset = BlockSimple o LoadFPWithConstantOffset
     and loadWithIndexedOffset = BlockSimple o LoadWithIndexedOffset
     and loadFPWithIndexedOffset = BlockSimple o LoadFPWithIndexedOffset
     and getThreadId = BlockSimple o GetThreadId
     and objectIndexAddressToAbsolute = BlockSimple o ObjectIndexAddressToAbsolute
     and absoluteToObjectIndex = BlockSimple o AbsoluteToObjectIndex
     and allocateMemoryFixed = BlockSimple o AllocateMemoryFixed
     and allocateMemoryVariable = BlockSimple o AllocateMemoryVariable
     and initialiseMem = BlockSimple o InitialiseMem
     and storeWithConstantOffset = BlockSimple o StoreWithConstantOffset
     and storeFPWithConstantOffset = BlockSimple o StoreFPWithConstantOffset
     and storeWithIndexedOffset = BlockSimple o StoreWithIndexedOffset
     and storeFPWithIndexedOffset = BlockSimple o StoreFPWithIndexedOffset
     and addSubImmediate = BlockSimple o AddSubImmediate
     and addSubRegister = BlockSimple o AddSubRegister
     and logicalImmediate = BlockSimple o LogicalImmediate
     and logicalRegister = BlockSimple o LogicalRegister
     and shiftRegister = BlockSimple o ShiftRegister
     and multiplication = BlockSimple o Multiplication
     and division = BlockSimple o Division
     and pushToStack = BlockSimple o PushToStack
     and loadStack = BlockSimple o LoadStack
     and storeToStack = BlockSimple o StoreToStack
     and containerAddress = BlockSimple o ContainerAddress
     and resetStackPtr = BlockSimple o ResetStackPtr
     and tagValue = BlockSimple o TagValue
     and untagValue = BlockSimple o UntagValue
     and boxLarge = BlockSimple o BoxLarge
     and unboxLarge = BlockSimple o UnboxLarge
     and boxTagFloat = BlockSimple o BoxTagFloat
     and unboxTagFloat = BlockSimple o UnboxTagFloat
     and loadAcquire = BlockSimple o LoadAcquire
     and storeRelease = BlockSimple o StoreRelease
     and bitFieldShift = BlockSimple o BitFieldShift
     and bitFieldInsert = BlockSimple o BitFieldInsert
     and compareByteVectors = BlockSimple o CompareByteVectors
     and blockMove = BlockSimple o BlockMove
     and addSubXSP = BlockSimple o AddSubXSP
     and touchValue = BlockSimple o TouchValue
     and loadAcquireExclusive = BlockSimple o LoadAcquireExclusive
     and storeReleaseExclusive = BlockSimple o StoreReleaseExclusive
     and memoryBarrier = BlockSimple MemoryBarrier
     and convertIntToFloat = BlockSimple o ConvertIntToFloat
     and convertFloatToInt = BlockSimple o ConvertFloatToInt
     and unaryFloatingPt = BlockSimple o UnaryFloatingPt
     and binaryFloatingPoint = BlockSimple o BinaryFloatingPoint
     and compareFloatingPoint = BlockSimple o CompareFloatingPoint
+    and cpuYield = BlockSimple CPUYield
 
     val shiftConstant = BlockSimple o shiftConstant
 
     (* Many operations use 32-bit arguments in 32-in-64 and 64-bit in native 64. *)
     val polyWordLoadSize = if is32in64 then Load32 else Load64
     val polyWordOpSize = if is32in64 then OpSize32 else OpSize64
 
     val tagBitMask64 = Word64.<<(Word64.fromInt ~1, 0w1)
     val tagBitMask32 = Word64.andb(tagBitMask64, 0wxffffffff)
 
     val polyWordTagBitMask = if is32in64 then tagBitMask32 else tagBitMask64
 
     (* The flags byte is the high-order byte of length word. *)
     val flagsByteOffset = if isBigEndian then ~ (Word.toInt wordSize) else ~1
 
     (* Size of operand in bytes and therefore the scale factor. *)
     fun opWordSize Load64 = 8
     |   opWordSize Load32 = 4
     |   opWordSize Load16 = 2
     |   opWordSize Load8 = 1
 
     (* Shift for each size.  i.e. log2 of opWordSize. *)
     fun loadShift Load64 = 0w3
     |   loadShift Load32 = 0w2
     |   loadShift Load16 = 0w1
     |   loadShift Load8 = 0w0
 
     fun precisionToFpSize PrecSingle = Float32 | precisionToFpSize PrecDouble = Double64
 
     fun codeFunctionToArm64({body, localCount, name, argTypes, closure, ...}:bicLambdaForm, debugSwitches, resultClosure) =
     let
         (* Pseudo-registers are allocated sequentially and the properties added to the list. *)
         val pregCounter = ref 0
         val pregPropList = ref []
         fun newPReg() =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropGeneral :: !pregPropList
         in
             PReg regNo
         end
         
         and newUReg() =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropUntagged :: !pregPropList
         in
             PReg regNo
         end
         
         and newStackLoc size =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropStack size :: !pregPropList
         in
             StackLoc{size=size, rno=regNo}
         end
         
         and newMergeReg() =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropMultiple :: !pregPropList
         in
             PReg regNo
         end
         
         datatype locationValue =
             NoLocation
         |   PregLocation of preg
         |   StackContainer of { container: stackLocn, stackOffset: int }
         |   RegisterContainer of preg list
 
         val locToPregArray = Array.array(localCount, NoLocation)
         val labelCounter = ref 1 (* Start at 1.  Zero is used for the root. *)
         fun newLabel() = !labelCounter before labelCounter := !labelCounter + 1
         val ccRefCounter = ref 0
         fun newCCRef() = CcRef(!ccRefCounter) before ccRefCounter := !ccRefCounter + 1
 
         (* The profile object is a single mutable with the F_bytes bit set. *)        
         val profileObject = CodeArray.createProfileObject()
 
         (* Switch to indicate if we want to trace where live data has been allocated. *)
         (* TODO: This should be used in AllocateMemoryOperation and BoxValue and possibly AllocateMemoryVariable. *)
         val addAllocatingFunction =
             Debug.getParameter Debug.profileAllocationTag debugSwitches = 1
 
         datatype destination =
             SpecificPReg of preg
         |   NoResult
         |   AnyReg
 
         (* Context type. *)
         type context =
             { loopArgs: (preg list * int * int) option, stackPtr: int, currHandler: int option,
               overflowBlock: int option ref }
 
         datatype argLoc =
             ArgumentIsInReg of { realReg: xReg, argReg: preg }
         |   ArgumentIsOnStack of { stackOffset: int, stackReg: stackLocn }
         |   ArgumentIsRegContainer of preg list
 
         (* An address as either suitable for Load/StoreWithConstantOffset or else Load/StoreWithIndexedOffset. *)
         datatype addressKind =
             AddrOffset of {base: preg, offset: int}
         |   AddrIndex of {base: preg, index: preg}
 
         (* Pseudo-regs for the result, the closure and the args that were passed in real regs. *)
         val resultTarget = newPReg()
         val closureRegAddr = newPReg()
         val returnAddrReg = newPReg()
         
         val generalArgRegs = [X0, X1, X2, X3, X4, X5, X6, X7]
         
         (* If a container is larger than this it is passed on the stack. *)
         val smallContainerSize = 4
 
         (* Create a map for the arguments indicating their register or stack location. *)
         local
             val containerRegs =
                 case List.filter(fn ContainerType _ => true | _ => false) argTypes of
                     [] => NONE
                 |   [ContainerType s] =>
                         if s <= smallContainerSize
                         then SOME(List.tabulate(s, fn _ => newMergeReg()))
                         else SOME [] (* Larger containers return their result on the stack. *)
                 |   _ => raise InternalError "more than one container arg"
 
             (* Select the appropriate argument register depending on the argument type. *)
             fun argTypesToArgEntries([], _, _) = ([], [], [], [])
 
             |   argTypesToArgEntries(ContainerType s :: tl, gRegs, n) =
                 if s <= smallContainerSize
                 then
                 let
                     val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, n-1)
                     val regs = valOf containerRegs
                 in
                     (ArgumentIsRegContainer regs :: argTypes, argCode, argRegs, stackArgs)
                 end
                 (* The address of a larger container is passed as an argument *)
                 else argTypesToArgEntries(GeneralType :: tl, gRegs, n)
 
             |   argTypesToArgEntries(_ :: tl, gReg :: gRegs, n) =
                 (* This deals with general arguments but also with extra floating point arguments.
                    They are boxed as usual. *)
                 let
                     val (argTypes, argCode, argRegs, stackArgs) =
                         argTypesToArgEntries(tl, gRegs, n-1)
                     val argReg=newPReg()
                 in
                     (ArgumentIsInReg{realReg=gReg, argReg=argReg} :: argTypes, argCode, (argReg, gReg) :: argRegs, stackArgs)
                 end
 
             |   argTypesToArgEntries(_ :: tl, [], n) =
                 let
                     val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, [], n-1)
                     val stackLoc = newStackLoc 1
                 in
                     (ArgumentIsOnStack {stackOffset=n, stackReg = stackLoc } :: argTypes, argCode, argRegs, stackLoc :: stackArgs)
                 end
 
             val (argEntries, argCode, argRegs, stackArguments) =
                 argTypesToArgEntries(argTypes, generalArgRegs, List.length argTypes)
 
             val clReg = case closure of [] => [] | _ => [(closureRegAddr, X8)]
             val retReg = [(returnAddrReg, X30)]
         in
             val argumentVector = Vector.fromList argEntries
 
             (* Start code for the function. *)
             val beginInstructions = argCode @
                 [BlockBegin{regArgs=retReg @ clReg @ argRegs, stackArgs=stackArguments }]
 
             (* The number of arguments on the stack.  Needed in return instrs and tail calls. *)
             val currentStackArgs = List.length stackArguments
             
             val containerResults = Option.map(fn regs => ListPair.zip(regs, generalArgRegs)) containerRegs
         end
 
         (* TODO: Return the values of the container registers if we have multiple results. *)
 
 
         fun returnInstruction({stackPtr, ...}, resReg, tailCode) =
         let
             val results = getOpt(containerResults, [(resReg, X0)]) (* Return the result in X0 unless there's a container. *)
         in
             BlockExit(ReturnResultFromFunction{results=results, returnReg = returnAddrReg, numStackArgs=currentStackArgs}) ::
                 (if stackPtr <> 0 then resetStackPtr{numWords=stackPtr} :: tailCode else tailCode)
         end
 
         fun asTarget(SpecificPReg preg) = preg
         |   asTarget _ = newPReg()
 
         fun moveToResult(SpecificPReg tReg, code, sReg) =
                 (moveRegister{source=sReg, dest=tReg} :: code, tReg, false)
         |   moveToResult(AnyReg, code, sReg) = (code, sReg, false)
         |   moveToResult(NoResult, code, sReg) =
             let
                 val tReg = newPReg()
             in
                 (moveRegister{source=sReg, dest=tReg} :: code, tReg, false)
             end
 
         (* Store a register at a given offset.  This may have to use an index register
            if the offset is too large. *)
         fun storeAtWordOffset(toStore, offset, base, loadSize, tailCode) =
         let
             val wSize = opWordSize loadSize
             val byteOffset = offset*wSize
         in
             if offset < 4096 andalso byteOffset > ~256 
             then storeWithConstantOffset{base=base, source=toStore,
                             byteOffset=byteOffset, loadType=loadSize} :: tailCode
             else
             let
                 val indexReg = newUReg()
             in
                 storeWithIndexedOffset{ base=base, source=toStore, index=indexReg, loadType=loadSize, signExtendIndex=false } ::
                     loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=indexReg } :: tailCode
             end
         end
 
         (* Allocate a fixed size cell with a reference to the profile object if we want
            to trace the location of live data.  Currently only used for tuples and closures. *)
         fun allocateWithProfileRev(n, flags, memAddr, tlCode) =
         let
             fun doAllocation(words, flags, tlCode) =
             let
                 val wordsRequired =
                     if is32in64
                     then (* Have to round this up to 8 bytes *)
                         Word64.andb(Word64.fromInt(words+2), ~ 0w2)
                     else Word64.fromInt(words+1)
                 val bytesRequired = Word64.fromLarge(Word.toLarge wordSize) * wordsRequired
                 val lengthWord =
                     Word64.orb(Word64.fromInt words, Word64.<<(Word64.fromLarge(Word8.toLarge flags),
                         if is32in64 then 0w24 else 0w56))
                 val lengthReg = newUReg()
             in
                 storeWithConstantOffset{ source=lengthReg, base=memAddr, byteOffset= ~(Word.toInt wordSize), loadType=polyWordLoadSize } ::
                 loadNonAddressConstant{ source=lengthWord, dest=lengthReg } ::
                 allocateMemoryFixed{bytesRequired=bytesRequired, dest=memAddr, saveRegs=[]} :: tlCode
             end
         in
             if addAllocatingFunction
             then
             let
                 val profReg = newPReg()
             in
                 storeAtWordOffset(profReg, n, memAddr, polyWordLoadSize,
                     loadAddressConstant{ source=profileObject, dest=profReg} ::
                         doAllocation(n+1, Word8.orb(flags, Address.F_profile), tlCode))
             end
             else doAllocation(n, flags, tlCode)
         end
 
         (* Return a unit result. *)
         fun returnUnit(target, code, exit) =
         let
             val tReg = asTarget target
         in
             (loadNonAddressConstant{source=taggedWord64 0w0, dest=tReg} :: code, tReg, exit)
         end
 
         (* Create a bool result from a test by returning true or false. *)
         fun makeBoolResultRev(condition, ccRef, target, testCode) =
         let
             val trueLab = newLabel() and falseLab = newLabel() and mergeLab = newLabel()
             val mergeReg = newMergeReg()
         in
             moveRegister{dest=target, source=mergeReg} ::
             BlockLabel mergeLab ::
             BlockFlow(Unconditional mergeLab) ::
             loadNonAddressConstant{dest=mergeReg, source=taggedWord64 0w0} ::
             BlockLabel falseLab ::
             BlockFlow(Unconditional mergeLab) ::
             loadNonAddressConstant{dest=mergeReg, source=taggedWord64 0w1} ::
             BlockLabel trueLab ::
             BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=trueLab, falseJump=falseLab }) ::
             testCode
         end
 
         (* Return an absolute address in both native addressing and 32-in-64. *)
         fun getAbsoluteAddress(code, baseReg) =
             if is32in64
             then let val absReg = newUReg() in (objectIndexAddressToAbsolute{ source=baseReg, dest=absReg } :: code, absReg) end
             else (code, baseReg)
 
         (* Load a value aligned on a 64 or 32-bit boundary.  offset is the number
            of units.  Typically this will be a polyword. *)
         fun wordAddressOffset(destination, baseReg1, offset, loadOp, code) =
         let
             val dReg = asTarget destination
             val opWordSize = opWordSize loadOp
             val byteOffset = offset * opWordSize
             val (codeBase, baseReg) = getAbsoluteAddress(code, baseReg1)
             val code =
                 if offset < 4096 andalso byteOffset > ~256
                 then loadWithConstantOffset{base=baseReg, dest=dReg, byteOffset=byteOffset, loadType=loadOp} :: codeBase
                 else
                 let
                     val indexReg = newUReg()
                 in
                     loadWithIndexedOffset{ base=baseReg, dest=dReg, index=indexReg, loadType=loadOp, signExtendIndex=false } ::
                         loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=indexReg } :: codeBase
                 end
         in
             (code, dReg, false)
         end
 
         (* See if we have a container and return the entry if present. *)
         datatype containerType =
             NoContainer | ContainerOnStack of { container: stackLocn, stackOffset: int } | ContainerInRegs of preg list
         fun getContainerIfPresent(BICExtract(BICLoadLocal l)) =
             (
                 case Array.sub(locToPregArray, l) of
                     StackContainer container => ContainerOnStack container
                 |   RegisterContainer rc => ContainerInRegs rc
                 |   _ => NoContainer
             )
         |   getContainerIfPresent(BICExtract(BICLoadArgument a)) =
             (
                 case Vector.sub(argumentVector, a) of
                     ArgumentIsRegContainer rc => ContainerInRegs rc
                 |   _ => NoContainer
             )
             
         |   getContainerIfPresent _ = NoContainer
 
         (* General function for loads and stores. *)
         fun loadAndStoreWithAddress
                 ({base=bReg1, index, offset}, loadSize, loadShift, isCAddress, loadStoreOffset, loadStoreIndex, code) =
         let
             val byteOffset = offset * loadSize
             (* Get the base register value *)
             val bCode = code
             val sCode = bCode
 
             (* Get any index register value. *)
             val (iCode, iReg1Opt) =
                 case index of
                     NONE =>
                         if offset < 4096 andalso byteOffset > ~256
                         then (sCode, NONE) (* We can use this offset. *)
                         else
                         let
                             val iReg = newUReg()
                         in
                             (loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=iReg } :: sCode, SOME iReg)
                         end
                 |   SOME iReg1 =>
                         let
                             val iCode1 = sCode
                             (* The index is a tagged integer containing the number of units (words, bytes etc).
                                It has to be untagged.  If this is a C address it may be negative. *)
                             val iReg2 = newUReg()
                             (* Logical shift if this is a Poly address, arithmetic shift if this is
                                a C address. *)
                             val iCode2 =
                                 untagValue{source=iReg1, dest=iReg2, opSize=polyWordOpSize, isSigned=isCAddress } :: iCode1
                         in
                             if offset = 0
                             then (iCode2, SOME iReg2)
                             else
                             let
                                 (* If there's some constant offset add it to the index.  Because it's a byte offset
                                    we need to divide it by the scale but it should always be a multiple.
                                    N.B. In 32-in-64 the index register contains a 32-bit value even when
                                    the offset is negative. *)
                                 val cReg = newUReg() and iReg3 = newUReg()
                                 val offsetAsWord = LargeWord.fromInt offset
                                 (* It could be negative if it's a C address. *)
                                 val shiftedOffset = (if isCAddress then LargeWord.~>> else LargeWord.>>) (offsetAsWord, loadShift)
                             in
                                 (addSubRegister{ base=iReg2, shifted=cReg, dest=SomeReg iReg3, ccRef=NONE, isAdd=true,
                                     length=polyWordOpSize, shift=ShiftNone} ::
                                  loadNonAddressConstant{ source=shiftedOffset, dest=cReg } :: iCode2,
                                  SOME iReg3)
                             end
                         end
 
             (* If this is 32in64 get the absolute address. *)
             val (absBCode, absBReg) = getAbsoluteAddress(iCode, bReg1)
 
             (* If this is a C address the "base address" is actually a box containing the address. *)
             val (effBCode, effBReg) =
                 if isCAddress
                 then
                     let
                         val bReg = newUReg()
                     in
                         (loadWithConstantOffset{ base=absBReg, dest=bReg, byteOffset=0, loadType=Load64 } :: absBCode, bReg)
                     end
                 else (absBCode, absBReg)
 
         in
             case iReg1Opt of
                 SOME iReg => loadStoreIndex(effBReg, iReg, effBCode)
             |   NONE => loadStoreOffset(effBReg, offset, effBCode)
         end
 
         (* Some operations require a single absolute address.
            These are all ML addresses so the index/offset is always unsigned. *)
         fun loadAndStoreWithAbsolute (address, loadSize, loadShift, loadStore, code) =
         let
             (* Have to add the offset/index register. *)
             fun loadStoreOffset(bReg, 0, code) = loadStore(bReg, code)
             |   loadStoreOffset(bReg, offset, code) =
                 let
                     val cReg = newUReg() and aReg = newUReg()
                 in
                     loadStore(aReg,
                         addSubRegister{ base=bReg, shifted=cReg, dest=SomeReg aReg, ccRef=NONE, isAdd=true,
                                         length=OpSize64, shift=ShiftNone} ::
                         loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=cReg } :: code)
                 end
 
             and loadStoreIndex(bReg, iReg, code) =
             let
                 val aReg = newUReg()
                 (* The index register is a number of words/bytes etc so has to be multiplied when it's
                    added in. *)
                 val indexShift =
                     if loadShift = 0w0 then ShiftNone else ShiftLSL(Word8.fromLarge(Word.toLarge loadShift))
             in
                 loadStore(aReg,
                     addSubRegister{ base=bReg, shifted=iReg, dest=SomeReg aReg, ccRef=NONE, isAdd=true,
                                     length=OpSize64, shift=indexShift} :: code)
             end
         in
             loadAndStoreWithAddress (address, loadSize, loadShift, false, loadStoreOffset, loadStoreIndex, code)
         end
 
         (* Overflow check.  This raises Overflow if the condition is satisfied.  Normally this will be
            that the overflow bit is set but for multiplication it's more complicated.  This generates
            a single block for the function unless there is a handler.
            As well as reducing the size of the code this also means that overflow checks are generally
            BO instructions to the end of the code.  Since the default branch prediction is not to take
            forward jumps this should improve prefetching on the normal, non-overflow, path. *)
         fun checkOverflow (condition, {currHandler=NONE, overflowBlock=ref(SOME overFlowLab), ...}, ccRef) =
             (* It's already been set and there's no surrounding handler - use this. *)
             let
                 val noOverflowLab = newLabel()
             in
                 [
                     BlockLabel noOverflowLab,
                     BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=overFlowLab, falseJump=noOverflowLab })
                 ]
             end
 
         |   checkOverflow (condition, {currHandler=NONE, overflowBlock, ...}, ccRef) =
             let
                 (* *)
                 val overFlowLab = newLabel() and noOverflowLab = newLabel()
                 val packetReg = newPReg()
                 val () = overflowBlock := SOME overFlowLab
             in
                 [
                     BlockLabel noOverflowLab,
                     BlockExit(RaiseExceptionPacket{packetReg=packetReg}),
                     loadAddressConstant{source=toMachineWord(Overflow), dest=packetReg},
                     BlockLabel overFlowLab,
                     BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=overFlowLab, falseJump=noOverflowLab })
                 ]
             end
 
         |   checkOverflow (condition, {currHandler=SOME h, ...}, ccRef) =
             let
                 val overFlowLab = newLabel() and noOverflowLab = newLabel()
                 val packetReg = newPReg()
             in
                 [
                     BlockLabel noOverflowLab,
                     BlockRaiseAndHandle(RaiseExceptionPacket{packetReg=packetReg}, h),
                     loadAddressConstant{source=toMachineWord(Overflow), dest=packetReg},
                     BlockLabel overFlowLab,
                     BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=overFlowLab, falseJump=noOverflowLab })
                 ]
             end
 
         fun codeToICodeRev(BICNewenv (bindings, exp), context: context as {stackPtr=initialSp, ...}, isTail, destination, tailCode) =
             let
                 (* Process a list of bindings.  We need to accumulate the space used by
                    any containers and reset the stack pointer at the end if necessary. *)
                 fun doBindings([], context, tailCode) = (tailCode, context)
  
                 |   doBindings(BICDeclar{value=BICExtract(BICLoadLocal l), addr, ...} :: decs, context, tailCode) =
                     let
                         (* Giving a new name to an existing entry.  This should have been removed
                            at a higher level but it doesn't always seem to be.  In particular we
                            must treat this specially if it's a container. *)
                         val original = Array.sub(locToPregArray, l)
                         val () = Array.update(locToPregArray, addr, original)
                     in
                         doBindings(decs, context, tailCode)
                     end
 
                 |   doBindings(BICDeclar{value, addr, ...} :: decs, context, tailCode) =
                     let
                         val (code, dest, _) = codeToICodeRev(value, context, false, AnyReg, tailCode)
                         val () = Array.update(locToPregArray, addr, PregLocation dest)
                     in
                         doBindings(decs, context, code)
                     end
 
                 |   doBindings(BICRecDecs [{lambda, addr, ...}] :: decs, context, tailCode) =
                     (* We shouldn't have single entries in RecDecs but it seems to occur at the moment. *)
                     let
                         val dest = newPReg()
                         val (code, _, _) = codeToICodeRev(BICLambda lambda, context, false, SpecificPReg dest, tailCode)
                         val () = Array.update(locToPregArray, addr, PregLocation dest)
                     in
                         doBindings(decs, context, code)
                     end
 
                 |   doBindings(BICRecDecs recDecs :: decs, context, tailCode) =
                     let
                         val destRegs = map (fn _ => newPReg()) recDecs
                         val flagsValue = if is32in64 then F_closure else 0w0
                         (* First build the closures as mutable cells containing zeros.  Set the
                            entry in the address table to the register containing the address. *)
                         fun makeClosure({lambda={closure, ...}, addr, ...}, dest, tailCode) =
                         let
                             val () = Array.update(locToPregArray, addr, PregLocation dest)
                             val wordsRequired = List.length closure + (if is32in64 then 2 else 1)
                             val absAddr = if is32in64 then newUReg() else dest
                             val zeroReg = newPReg()
                             val allocAndSetZero =
                                 loadNonAddressConstant{ source=taggedWord64 0w0, dest=zeroReg} ::
                                     allocateWithProfileRev(wordsRequired, Word8.orb(F_mutable, flagsValue), absAddr, tailCode)
                             val (_, clearCode) =
                                 List.foldl(fn (_, (n, l)) =>
                                     (n+1, storeAtWordOffset(zeroReg, n, absAddr, polyWordLoadSize, l))) (0, allocAndSetZero) closure
                         in
                             if is32in64
                             then absoluteToObjectIndex{ source=absAddr, dest=dest } :: clearCode
                             else clearCode
                         end
                         val allocClosures = ListPair.foldlEq makeClosure tailCode (recDecs, destRegs)
 
                         fun setClosure({lambda, ...}, dest, l) =
                         let
                             val absAddr = if is32in64 then newUReg() else dest
                             val flagsReg = newUReg()
                             (* Lock the closure by storing the flags byte without the mutable flag.
                                TODO: We could simply use XZ here. *)
                         in
                             storeWithConstantOffset{ base=absAddr, source=flagsReg, byteOffset=flagsByteOffset,
                                     loadType=Load8 } ::
                             loadNonAddressConstant{ source=Word8.toLarge flagsValue, dest=flagsReg } ::
                             storeIntoClosure(lambda, absAddr, context,
                                     if is32in64
                                     then objectIndexAddressToAbsolute{ source=dest, dest=absAddr } :: l else l)
                         end
                             
                         val setAndLockClosures = ListPair.foldlEq setClosure allocClosures (recDecs, destRegs)
                     in
                         doBindings(decs, context, setAndLockClosures)
                     end
 
                 |   doBindings(BICNullBinding exp :: decs, context, tailCode) =
                     let
                         val (code, _, _) = codeToICodeRev(exp, context, false, NoResult, tailCode) (* And discard result. *)
                     in
                         doBindings(decs, context, code)
                     end
        
                 |   doBindings(BICDecContainer{ addr, size } :: decs, context as {loopArgs, stackPtr, currHandler, overflowBlock}, tailCode) =
                     if size <= smallContainerSize
                     then
                     let
                         val regs = List.tabulate(size, fn _ => newMergeReg())
                         val () = Array.update(locToPregArray, addr, RegisterContainer regs)
                     in
                         doBindings(decs, context, tailCode)
                     end
                     else
                     let (* Larger container - reserve a portion of stack and zero it. *)
                         val containerLoc = newStackLoc size
                         val () = Array.update(locToPregArray, addr,
                                     StackContainer{container=containerLoc, stackOffset=stackPtr+size})
                         val zeroReg = newPReg()
                     in
                         doBindings(decs,
                             {loopArgs=loopArgs, stackPtr=stackPtr+size, currHandler=currHandler, overflowBlock=overflowBlock},
                             tailCode <::> loadNonAddressConstant{ source=taggedWord64 0w0, dest=zeroReg } <::>
                             pushToStack{copies=size, container=containerLoc, source=zeroReg})
                     end
 
                 val (codeBindings, resContext as {stackPtr=finalSp, ...}) = doBindings(bindings, context, tailCode)
                 (* If we have had a container we'll need to reset the stack *)
             in
                 if initialSp <> finalSp
                 then
                 let
                     val _ = finalSp >= initialSp orelse raise InternalError "codeToICode - stack ptr"
                     val bodyReg = newPReg() and resultReg = asTarget destination
                     val (codeExp, result, haveExited) =
                         codeToICodeRev(exp, resContext, isTail, SpecificPReg bodyReg, codeBindings)
                     val afterAdjustSp =
                         if haveExited
                         then codeExp
                         else
                             moveRegister{source=result, dest=resultReg} ::
                             resetStackPtr{numWords=finalSp-initialSp} :: codeExp
                 in
                     (afterAdjustSp, resultReg, haveExited)
                 end
                 else codeToICodeRev(exp, resContext, isTail, destination, codeBindings)
             end
 
         |   codeToICodeRev(BICExtract(BICLoadLocal l), {stackPtr, ...}, _, destination, tailCode) =
             (
                 case Array.sub(locToPregArray, l) of
                     NoLocation => raise InternalError "codeToICodeRev - local unset"
                 |   PregLocation preg => moveToResult(destination, tailCode, preg)
                 |   StackContainer{container, stackOffset} =>
                     let
                         val target = asTarget destination
                     in
                         (containerAddress{dest=target, container=container, stackOffset=stackPtr-stackOffset}
                             :: tailCode, target, false)
                     end
                 |   RegisterContainer _ => raise InternalError "BICExtract local: reg container"
             )
 
         |   codeToICodeRev(BICExtract(BICLoadArgument a), {stackPtr, ...}, _, destination, tailCode) =
             (
                 case Vector.sub(argumentVector, a) of
                     ArgumentIsInReg{argReg, ...} => (* It was originally in a register.  It's now in a preg. *)
                         moveToResult(destination, tailCode, argReg)
                 |   ArgumentIsOnStack{stackOffset, stackReg} => (* Pushed before call. *)
                     let
                         val target = asTarget destination
                     in
                         (loadStack{wordOffset=stackOffset+stackPtr, container=stackReg, field=0,
                                                dest=target} :: tailCode, target, false)
                     end
                 |   ArgumentIsRegContainer _ => raise InternalError "BICExtract argument: reg container"
             )
 
         |   codeToICodeRev(BICExtract(BICLoadClosure c), _, _, destination, tailCode) =
             let
                 (* Add the number of words for the code address.  This is 1 in native but 2 in 32-in-64. *)
                 val offset = if is32in64 then c+2 else c+1
             in
                 if c >= List.length closure then raise InternalError "BICExtract: closure" else ();
                 wordAddressOffset(destination, closureRegAddr, offset, polyWordLoadSize, tailCode)
             end
 
         |   codeToICodeRev(BICExtract BICLoadRecursive, _, _, destination, tailCode) =
                 (* If the closure is empty we must use the constant.  We can't guarantee that
                    the caller will actually load the closure register if it knows the closure
                    is empty. *)
             (
                  case closure of
                     [] =>
                         let
                             val dReg = asTarget destination
                         in
                             (loadAddressConstant{source=closureAsAddress resultClosure, dest=dReg} :: tailCode, dReg, false)
                         end
                  |  _ => moveToResult(destination, tailCode, closureRegAddr)
             )
 
         |   codeToICodeRev(BICConstnt(w, _), _, _, destination, tailCode) =
             let
                 val dReg = asTarget destination
                 val instr =
                     if isShort w
                     then (* When converting to Word64 we do NOT want to use sign-extension.
                             In 32-in-64 signed fixed-precision ints need to have zeros
                             in the top 32 bits. *)
                         loadNonAddressConstant{source=taggedWord64(Word64.fromLarge(Word.toLarge(toShort w))),
                             dest=dReg}
                     else loadAddressConstant{source=w, dest=dReg}
             in
                 (instr :: tailCode, dReg, false)
             end
 
         |   codeToICodeRev(BICField{base, offset}, context, _, destination, tailCode) =
             let
                 val (codeBase, baseReg, _) = codeToICodeRev(base, context, false, AnyReg, tailCode)
             in
                 wordAddressOffset(destination, baseReg, offset, polyWordLoadSize, codeBase)
             end
 
         |   codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, NoResult, tailCode) =
             let
                 (* If we don't want the result but are only evaluating for side-effects we
                    may be able to optimise special cases.  This was easier in the forward
                    case but for now we don't bother and leave it to the lower levels. *)
                 val startElse = newLabel() and skipElse = newLabel()
                 val codeTest = codeConditionRev(test, context, false, startElse, tailCode)
                 val (codeThen, _, _) =
                     codeToICodeRev(thenPt, context, isTail, NoResult, codeTest)
                 val (codeElse, _, _) =
                      codeToICodeRev(elsePt, context, isTail, NoResult,
                         BlockLabel startElse ::
                         BlockFlow(Unconditional skipElse) :: codeThen)
             in
                 returnUnit(NoResult, BlockLabel skipElse :: codeElse, false(*??*))
             end
 
         |   codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, destination, tailCode) =
             let
                 (* Because we may push the result onto the stack we have to create a new preg to
                    hold the result and then copy that to the final result. *)
                 (* If this is a tail each arm will exit separately and neither will return a result. *)
                 val target = asTarget destination
                 val condResult = newMergeReg()
                 val thenTarget = if isTail then newPReg() else condResult
                 val startElse = newLabel()
                 val testCode = codeConditionRev(test, context, false, startElse, tailCode)
                 
                 (* Put the result in the target register. *)
                 val (thenCode, _, thenExited) = codeToICodeRev(thenPt, context, isTail, SpecificPReg thenTarget, testCode)
                 (* Add a jump round the else-part except that if this is a tail we
                    return.  The then-part could have exited e.g. with a raise or a loop. *)
                 val (exitThen, thenLabel, elseTarget) =
                     if thenExited then (thenCode, [], target (* Can use original target. *))
                     else if isTail then (returnInstruction(context, thenTarget, thenCode), [], newPReg())
                     else
                     let
                         val skipElse = newLabel()
                     in
                         (BlockFlow(Unconditional skipElse) :: thenCode,
                          [moveRegister{source=condResult, dest=target},
                           BlockLabel skipElse],
                          condResult)
                     end
                 val (elseCode, _, elseExited) =
                     codeToICodeRev(elsePt, context, isTail, SpecificPReg elseTarget,
                         BlockLabel startElse :: exitThen)
                 (* Add a return to the else-part if necessary so we will always exit on a tail. *)
                 val exitElse =
                     if isTail andalso not elseExited
                     then returnInstruction(context, elseTarget, elseCode) else elseCode
             in
                 (thenLabel @ exitElse, target, isTail orelse thenExited andalso elseExited)
             end
 
         |   codeToICodeRev(BICUnary instr, context, isTail, destination, tailCode) =
                 codeToICodeUnaryRev(instr, context, isTail, destination, tailCode)
 
         |   codeToICodeRev(BICBinary instr, context, isTail, destination, tailCode) =
                 codeToICodeBinaryRev(instr, context, isTail, destination, tailCode)
 
         |   codeToICodeRev(BICTagTest{test, tag=tagValue, ...}, context, isTail, destination, tailCode) =
             (* Check the "tag" word of a union (datatype).  N.B.  Not the same as testing the
                tag bit of a word.  Just generate it as a general word comparison.  The optimiser
                will sort out whether the tag value can be an immediate. *)
                 codeToICodeRev(BICBinary{oper=WordComparison{test=TestEqual, isSigned=false},
                                          arg1=test, arg2=BICConstnt(toMachineWord tagValue, [])},
                         context, isTail, destination, tailCode)
 
         |   codeToICodeRev(BICTuple fields, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* The allocator sets the register to the absolute address.  It
                    has to be converted to an object pointer in 32-in-64. *)
                 val absAddr = if is32in64 then newUReg() else target
                 
                 fun loadFields([], n, tlCode) = allocateWithProfileRev(n, 0w0, absAddr, tlCode)
                 
                 |   loadFields((f as BICConstnt _) :: rest, n, tlCode) =
                     let
                         (* Unlike the X86 we still need to load a constant into a register in order to
                            store it in the new tuple.  However, it's better to leave that until after
                            the allocation and move it then.  That way we can use the same register
                            for different constants if we have a very large tuple. *)
                         val restAndAlloc = loadFields(rest, n+1, tlCode)
                         val (code1, source, _) = codeToICodeRev(f, context, false, AnyReg, restAndAlloc)
                     in
                         storeAtWordOffset(source, n, absAddr, polyWordLoadSize, code1)
                     end
                     
                 |   loadFields(f :: rest, n, tlCode) =
                     let
                         val (code1, source, _) = codeToICodeRev(f, context, false, AnyReg, tlCode)
                         val restAndAlloc = loadFields(rest, n+1, code1)
                     in
                         storeAtWordOffset(source, n, absAddr, polyWordLoadSize, restAndAlloc)
                     end
                 val allocAndStore = loadFields(fields, 0, tailCode)
                 val code =
                     if is32in64
                     then absoluteToObjectIndex{source=absAddr, dest=target} :: allocAndStore
                     else allocAndStore
             in
                 (code, target, false)
             end
 
         |   codeToICodeRev(BICRaise exc, context as { currHandler, ...}, _, destination, tailCode) =
             let
                 val (code, packetReg, _) = codeToICodeRev(exc, context, false, AnyReg, tailCode)
                 val raiseCode = RaiseExceptionPacket{packetReg=packetReg}
                 val block =
                     case currHandler of
                         NONE => BlockExit raiseCode | SOME h => BlockRaiseAndHandle(raiseCode, h)
             in
                 returnUnit(destination,  block :: code, true (* Always exits *))
             end
 
         |   codeToICodeRev(BICEval{function, argList, ...}, context as { currHandler, ...}, isTail, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* Create pregs for the closure and each argument. *)
                 val clPReg = newPReg()
                 (* If we have a constant closure we can go directly to the entry point.
                    If the closure is a single word we don't need to load the closure register. *)
                 val (functionCode, closureEntry, callKind) =
                     case function of
                         BICConstnt(addr, _) =>
                         let
                             val addrAsAddr = toAddress addr
                             (* If this is a closure we're still compiling we can't get the code address.
                                However if this is directly recursive we can use the recursive
                                convention. *)
                         in
                             if wordEq(closureAsAddress resultClosure, addr)
                             then (tailCode, [], Recursive)
                             else if flags addrAsAddr <> Address.F_words andalso flags addrAsAddr <> Address.F_closure
                             then (loadAddressConstant{source=addr, dest=clPReg} :: tailCode,
                                       [(ArgInReg clPReg, X8)], FullCall)
                             else if is32in64
                             then (* The code address is a 64-bit value so we have to load it
                                     at run-time.  The X86 version passes the closure address here
                                     and generates a relative CALL/JMP.  The actual offset is
                                     computed by the RTS.
                                     For the moment just use a full call. *)
                                 (loadAddressConstant{source=addr, dest=clPReg} :: tailCode,
                                       [(ArgInReg clPReg, X8)], FullCall)
                             else (* Native 64-bits. *)
                             let
                                 val addrLength = length addrAsAddr
                                 val _ = addrLength >= 0w1 orelse raise InternalError "BICEval address"
                                 val codeAddr = loadWord(addrAsAddr, 0w0)
                                 val _ = isCode (toAddress codeAddr) orelse raise InternalError "BICEval address not code"
                             in
                                 if addrLength = 0w1
                                 then (tailCode, [], ConstantCode codeAddr)
                                 else (loadAddressConstant{source=addr, dest=clPReg} :: tailCode,
                                       [(ArgInReg clPReg, X8)], ConstantCode codeAddr)
                             end
                         end
 
                     |   BICExtract BICLoadRecursive =>
                         (
                             (* If the closure is empty we don't need to load X8 *)
                             case closure of
                                 [] => (tailCode, [], Recursive)
                             |   _ =>
                                     (moveRegister {source=closureRegAddr, dest=clPReg} :: tailCode,
                                      [(ArgInReg clPReg, X8)], Recursive)
                         )
 
                     |   function => (* General case. *)
                             (#1 (codeToICodeRev(function, context, false, SpecificPReg clPReg, tailCode)), [(ArgInReg clPReg, X8)], FullCall)
 
                 local
                     (* Load the first arguments into registers and the rest to the stack. *)
                     fun loadArgs ([], _, tailCode) = (tailCode, [], [])
 
                     |   loadArgs ((arg, _) :: args, gReg::gRegs, tailCode) =
                         let (* General register argument. *)
                             val (c, r, _) = codeToICodeRev(arg, context, false, AnyReg, tailCode)
                             val (code, regArgs, stackArgs) = loadArgs(args, gRegs, c)
                         in
                             (code, (ArgInReg r, gReg) :: regArgs, stackArgs)
                         end
 
                     |   loadArgs ((arg, _) :: args, [], tailCode) =
                         let (* Stack argument. *)
                             val (c, r, _) = codeToICodeRev(arg, context, false, AnyReg, tailCode)
                             val (code, regArgs, stackArgs) = loadArgs(args, [], c)
                         in
                             (code, regArgs, ArgInReg r :: stackArgs)
                         end
 
                     fun isSmallContainer(ContainerType s) = s <= smallContainerSize | isSmallContainer _ = false
                 in
                     val (codeArgs, regArgs, stackArgs) =
                         loadArgs(List.filter(not o isSmallContainer o #2) argList, generalArgRegs, functionCode)
                 end
                 
                 (* If this is at the end of the function and the result types are the
                    same we can use a tail-recursive call. *)
                 val tailCall = isTail (*andalso resultType = fnResultType*)
                 
                 val callCode =
                     if tailCall
                     then
                     let
                         val {stackPtr, ...} = context
                         (* The number of arguments currently on the stack. *)
                         val currentStackArgCount = currentStackArgs
                         val newStackArgCount = List.length stackArgs
                         (* The offset of the first argument.  Offsets can be negative. *)
                         val stackOffset = stackPtr
                         fun makeStackArgs([], _) = []
                         |   makeStackArgs(arg::args, offset) = {src=arg, stack=offset} :: makeStackArgs(args, offset-1)
                         val stackArgs = makeStackArgs(stackArgs, currentStackArgCount-1)
                         (* The stack adjustment needed to compensate for any items that have been pushed
                            and the differences in the number of arguments.  May be positive or negative. *)
                         val stackAdjust = currentStackArgCount - newStackArgCount
                         (* Add an entry for the return address to the register arguments. *)
                     in
                         BlockExit(TailRecursiveCall{regArgs=(ArgInReg returnAddrReg, X30) :: closureEntry @ regArgs, stackArgs=stackArgs,
                                   stackAdjust = stackAdjust, currStackSize=stackOffset, callKind=callKind}) ::
                                   codeArgs
                     end
                     else
                     let
                         (* See if there is a container argument. *)
                         val containerArg = List.find(fn (_, ContainerType _) => true | _ => false) argList
 
                         val containerValue =
                             case containerArg of
                                 SOME(argVal, _) => getContainerIfPresent argVal
                             |   NONE => NoContainer
                         (* When a container is passed as an argument we put the address into a register.
                            Normally the container will be referenced after the call in order to extract
                            the values but if it's discarded we need to make sure it will continue to
                            be referenced at least as far as the call.  This isn't a problem for the X86
                            code-generator since container addresses are a form of the "argument"
                            datatype. *)
                         val stackContainers =
                             case containerValue of ContainerOnStack{container, ...} => [container] | _ => []
 
                         (* Get the results.  If we're returning the result through a container the target isn't used
                            so we return unit. *)
                         val (results, setTarget) =
                             case containerValue of
                                 ContainerInRegs regs =>
                                     (ListPair.zip(regs, generalArgRegs), [loadNonAddressConstant{source=taggedWord64 0w0, dest=target}])
                             |   ContainerOnStack _ =>
                                     ([], [loadNonAddressConstant{source=taggedWord64 0w0, dest=target}])
                             |   NoContainer => ([(target, X0)], [])
 
                         val call =
                             FunctionCall{regArgs=closureEntry @ regArgs, stackArgs=stackArgs, dests=results,
                                          callKind=callKind, saveRegs=[], containers=stackContainers}
                         val callBlock =
                             case currHandler of
                                 NONE => BlockSimple call :: codeArgs
                             |   SOME h => BlockOptionalHandle{call=call, handler=h, label=newLabel()}  :: codeArgs
                     in
                         callBlock <@> setTarget
                     end
             in
                 (callCode, target, tailCall (* We've exited if this was a tail jump *))
             end
 
         |   codeToICodeRev(BICNullary{oper=BuiltIns.GetCurrentThreadId}, _, _, destination, tailCode) =
             (* Get the ID of the current thread. *)
             let
                 val target = asTarget destination
             in
                 (getThreadId{dest=target} :: tailCode, target, false)
             end
 
-        |   codeToICodeRev(BICNullary{oper=BuiltIns.CheckRTSException}, _, _, destination, tailCode) =
+        |   codeToICodeRev(BICNullary{oper=BuiltIns.CPUPause}, _, _, destination, tailCode) =
             (* This is now done in the RTS call code. *)
-                returnUnit(destination, tailCode, false)
+                returnUnit(destination, tailCode <::> cpuYield, false)
 
         |   codeToICodeRev(BICNullary {oper=CreateMutex}, _, _, destination, tailCode) =
             let
                 (* Allocate memory for a mutex.  Use a native word as a mutable, weak, no-overwrite, byte cell
                    which is the same as a volatileRef. This ensures that it will always be cleared when it is
                    loaded even if it was locked when it was saved. *)
                 val target = asTarget destination
                 val flags = Word8.orb(F_mutable, Word8.orb(F_weak, Word8.orb(F_noOverwrite, F_bytes))) (* 0wx69 *)
                 val absAddr = if is32in64 then newUReg() else target
                 val zeroReg = newUReg()
                 val allocAndStore =
                     storeWithConstantOffset{ source=zeroReg, base=absAddr, byteOffset=0, loadType=Load64 } ::
                     loadNonAddressConstant{source=0w0, dest=zeroReg} ::
                     allocateWithProfileRev(if is32in64 then 2 else 1, flags, absAddr, tailCode)
                 val code =
                     if is32in64
                     then absoluteToObjectIndex{source=absAddr, dest=target} :: allocAndStore
                     else allocAndStore
             in
                 (code, target, false)
             end
 
         |   codeToICodeRev(BICArbitrary { oper=ArithMult, longCall, ... }, context, isTail, destination, tailCode) =
                 (* Just call the long function to do this.  Overflow detection makes this too complicated. *)
                 codeToICodeRev(longCall, context, isTail, destination, tailCode)
 
         |   codeToICodeRev(BICArbitrary { oper, shortCond, arg1, arg2, longCall }, context, _, destination, tailCode) =
             let
                 val startLong = newLabel() and resultLabel = newLabel()
                 val condResult = newMergeReg()
                 (* Test to see if the arguments are short and go straight to the long case if not. *)
                 val testCode = codeConditionRev(shortCond, context, false, startLong, tailCode) 
                 (* Do the short case *)
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, testCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 (* We need to subtract the tag from one of the arguments and then do the
                    addition.  The optimiser will do the subtraction at compile time if we
                    subtract from a constant so if this is and Add we try to put the constant
                    in the second arg. *)
                 val (firstReg, secondReg) =
                     case (arg1, oper) of
                         (BICConstnt _, ArithAdd) => (aReg2, aReg1) | _ => (aReg1, aReg2)
                 (* Generate code for the short case.  Put the result in the merge register.  Jump to
                    the result if there's no overflow and to the long case if there is. *)
                 val codeShort =
                     case oper of
                         ArithAdd =>
                         let
                             val uReg = newUReg() and chkOverflow = newCCRef()
                         in
                             BlockFlow(Conditional{ ccRef=chkOverflow, condition=CondOverflow, trueJump=startLong, falseJump=resultLabel }) ::
                             addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg condResult, length=polyWordOpSize,
                                          ccRef=SOME chkOverflow, isAdd=true, shift=ShiftNone} ::
                             addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize,
                                             ccRef=NONE, isAdd=false}:: arg2Code
                         end
 
                     |   ArithSub =>
                         let
                             val uReg = newUReg() and chkOverflow = newCCRef()
                         in
                             BlockFlow(Conditional{ ccRef=chkOverflow, condition=CondOverflow, trueJump=startLong, falseJump=resultLabel }) ::
                             addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg condResult, length=polyWordOpSize,
                                          ccRef=SOME chkOverflow, isAdd=false, shift=ShiftNone} ::
                             addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize,
                                             ccRef=NONE, isAdd=false}:: arg2Code
                         end
 
                     |   _ => raise InternalError "BICArbitrary: unimplemented operation"
 
                 (* Code for the long case. Put the result into the merge register. *)
                 (* TODO: This could use a tail call if this is at the end of the function. *)
                 val (codeLong, _, _) =
                     codeToICodeRev(longCall, context, false, SpecificPReg condResult,
                                    BlockLabel startLong :: codeShort)
                 val target = asTarget destination
                 (* Copy the merge register into the result. *)
                 val finalCode =
                     moveRegister{source=condResult, dest=target} ::
                     BlockLabel resultLabel :: codeLong
             in
                 (finalCode, target, false)
             end
 
         |   codeToICodeRev(BICLambda(lambda as { closure = [], ...}), _, _, destination, tailCode) =
             (* Empty closure - create a constant closure for any recursive calls. *)
             let
                 val closure = makeConstantClosure()
                 val () = codeFunctionToArm64(lambda, debugSwitches, closure)
                 val dReg = asTarget destination
                 (* Return the closure itself as the value. *)
             in
                 (BlockSimple(LoadAddressConstant{source=closureAsAddress closure, dest=dReg}) :: tailCode, dReg, false)
             end
 
         |   codeToICodeRev(BICLambda(lambda as { closure, ...}), context, _, destination, tailCode) =
             (* Non-empty closure.  Ignore stack closure option at the moment. *)
             let
                 val wordsRequired = List.length closure  + (if is32in64 then 2 else 1)
                 val target = asTarget destination
                 val absAddr = if is32in64 then newUReg() else target
                 (* The values we're storing are all either constants or local/closure variables so
                    we can allocate the memory and then store into it. *)
                 val allocCode =
                     allocateWithProfileRev(wordsRequired, if is32in64 then F_closure else 0w0, absAddr, tailCode)
                 val storeCode =
                     storeIntoClosure(lambda, absAddr, context, allocCode)
                 val finalCode =
                     if is32in64 then BlockSimple(AbsoluteToObjectIndex{source=absAddr, dest=target}) :: storeCode else storeCode
             in
                 (finalCode, target, false)
             end
 
         |   codeToICodeRev(BICCase { cases, test, default, isExhaustive, firstIndex}, context, isTail, destination, tailCode) =
             let
                 (* We have to create a new preg for the result in case we need to push
                    it to the stack. *)
                 val targetReg = newMergeReg()
                 
                 local
                     val (testCode, initialTestReg, _) = codeToICodeRev(test, context, false, AnyReg, tailCode)
                     (* Subtract the minimum even if it is zero to remove the tag.
                        This leaves us with a shifted but untagged value. Don't check for overflow.
                        Instead allow large values to wrap around and check later. *)
                     val cReg1 = newUReg()
                     val subValue = taggedWord64(Word64.fromLarge(Word.toLargeX firstIndex))
                 in
                     val testReg = newUReg()
                     val testCode =
                         addSubRegister{ base=initialTestReg, shifted=cReg1, dest=SomeReg testReg, ccRef=NONE, isAdd=false,
                                         length=polyWordOpSize, shift=ShiftNone} ::
                         loadNonAddressConstant{ source=subValue, dest=cReg1 } :: testCode
                 end
 
                 val (rangeCheck, extraDefaults) =
                     if isExhaustive then (testCode, [])
                     else
                     let
                         (* Check the value is within the number of cases, *2 because this is shifted. *)
                         val cReg2 = newUReg() and ccRef1 = newCCRef()
                         val nCases = List.length cases
                         val continueLab = newLabel() and defaultLab1 = newLabel()
                         val rangeCheck =
                             BlockLabel continueLab ::
                             BlockFlow(Conditional{ccRef=ccRef1, condition=CondCarrySet, trueJump=defaultLab1, falseJump=continueLab}) ::
                             addSubRegister{base=testReg, shifted=cReg2, dest=ZeroReg, ccRef=SOME ccRef1, isAdd=false,
                                            length=OpSize64, shift=ShiftNone} ::
                             loadNonAddressConstant{ source=Word64.fromInt nCases * 0w2, dest=cReg2 } :: testCode
                     in
                         (rangeCheck, [defaultLab1])
                     end
 
                 (* Make a label for each item in the list. *)
                 val codeLabels = map (fn _ => newLabel()) cases
 
                 (* Create an exit label in case it's needed. *)
                 val labelForExit = newLabel()
 
                 (* Generate the code for each of the cases and the default.  We need to put an
                    unconditional branch after each to skip the other cases. *)
                 fun codeCases (SOME c :: otherCases, startLabel :: otherLabels, tailCode) =
                     let
                         val caseTarget = if isTail then newPReg() else targetReg
                         (* Put in the case with a jump to the end of the sequence. *)
                         val (codeThisCase, _, caseExited) =
                             codeToICodeRev(c, context, isTail, SpecificPReg caseTarget,
                                 BlockLabel startLabel :: tailCode) 
                         val exitThisCase =
                             if caseExited then codeThisCase
                             else if isTail then returnInstruction(context, caseTarget, codeThisCase)
                             else BlockFlow(Unconditional labelForExit) :: codeThisCase
                     in
                         codeCases(otherCases, otherLabels, exitThisCase)
                     end
 
                 |   codeCases(NONE :: otherCases, _ :: otherLabels, tailCode) = codeCases(otherCases, otherLabels, tailCode)
                         
                 |   codeCases ([], [], tailCode) =
                     let
                         (* We need to add labels for all the gaps we filled and also for a "default" label for
                            the indexed-case instruction itself as well as any range checks. *)
                         fun addDefault (startLabel, NONE, l) = BlockLabel startLabel :: l
                         |   addDefault (_, SOME _, l) = l
                         fun asForward l = BlockLabel l
                         val dLabs = map asForward extraDefaults @ tailCode
                         val defLabels = ListPair.foldlEq addDefault dLabs (codeLabels, cases)
                         val defaultTarget = if isTail then newPReg() else targetReg
                         val (defaultCode, _, defaultExited) =
                             codeToICodeRev(default, context, isTail, SpecificPReg defaultTarget, defLabels)
                     in
                         (* Put in the default.  Because this is the last we don't need to
                            jump round it.  However if this is a tail and we haven't exited
                            we put in a return.  That way the case will always have
                            exited if this is a tail. *)
                          if isTail andalso not defaultExited
                          then returnInstruction(context, defaultTarget, defaultCode)
                          else defaultCode
                     end
 
                 |   codeCases _ = raise InternalError "codeCases: mismatch"
                     
                 val codedCases =
                     codeCases(cases, codeLabels,
                         BlockFlow(IndexedBr codeLabels) ::
                         BlockSimple(IndexedCaseOperation{testReg=testReg}) ::
                         rangeCheck)
    
                 (* We can now copy to the target.  If we need to push the result this load
                    will be converted into a push. *)
                 val target = asTarget destination
                 val copyToTarget =
                     if isTail then codedCases
                     else moveRegister{source=targetReg, dest=target} :: BlockLabel labelForExit :: codedCases
             in
                 (copyToTarget, target, isTail (* We have always exited on a tail. *))
             end
 
         |   codeToICodeRev(BICBeginLoop {loop, arguments}, context as { stackPtr, currHandler, overflowBlock, ...},
                            isTail, destination, tailCode) =
             let
                 val target = asTarget destination
                 
                 fun codeArgs ([], tailCode) = ([], tailCode)
                 |   codeArgs (({value, addr}, _) :: rest, tailCode) =
                     let
                         val pr = newPReg()
                         val () = Array.update(locToPregArray, addr, PregLocation pr)
                         val (code, _, _) = codeToICodeRev(value, context, false, SpecificPReg pr, tailCode)
                         val (pregs, othercode) = codeArgs(rest, code)
                     in
                         (pr::pregs, othercode)
                     end
                 val (loopRegs, argCode) = codeArgs(arguments, tailCode)
 
                 val loopLabel = newLabel()
                 val (loopBody, _, loopExited) =
                     codeToICodeRev(loop,
                         {loopArgs=SOME (loopRegs, loopLabel, stackPtr), stackPtr=stackPtr,
                          currHandler=currHandler, overflowBlock=overflowBlock },
                             isTail, SpecificPReg target, BlockLabel loopLabel :: BlockSimple BeginLoop :: argCode)
             in
                 (loopBody, target, loopExited)
             end
 
         |   codeToICodeRev(BICLoop args, context as {loopArgs=SOME (loopRegs, loopLabel, loopSp), stackPtr, currHandler, ...}, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* Registers to receive the evaluated arguments.  We can't put the
                    values into the loop variables yet because the values could depend
                    on the current values of the loop variables. *)
                 val argPRegs = map(fn _ => newPReg()) args
                 val codeArgs =
                     ListPair.foldlEq(fn ((arg, _), pr, l) =>
                         #1 (codeToICodeRev(arg, context, false, SpecificPReg pr, l))) tailCode
                         (args, argPRegs)
                 val jumpArgs = ListPair.mapEq(fn (s, l) => {src=ArgInReg s, dst=l}) (argPRegs, loopRegs)
                 (* If we've allocated a container in the loop we have to remove it before jumping back. *)
                 val stackReset =
                     if loopSp = stackPtr then codeArgs
                     else resetStackPtr{numWords=stackPtr-loopSp} :: codeArgs
                 val jumpLoop = JumpLoop{regArgs=jumpArgs, stackArgs=[], checkInterrupt=SOME[]}
                 (* "checkInterrupt" could result in a Interrupt exception so we treat this like
                    a function call. *)
                 val code =
                     case currHandler of
                         NONE => BlockFlow(Unconditional loopLabel) :: BlockSimple jumpLoop :: stackReset
                     |   SOME h => BlockOptionalHandle{call=jumpLoop, handler=h, label=loopLabel} :: stackReset
             in
                 (code, target, true)
             end
 
         |   codeToICodeRev(BICLoop _, {loopArgs=NONE, ...}, _, _, _) = raise InternalError "BICLoop without BICBeginLoop"
 
             (* Copy the source tuple into the container.  There are important special cases for
                both the source tuple and the container.  If the source tuple is a BICTuple we have
                the fields and can store them without creating a tuple on the heap.  If the
                destination is a local container we can store directly into the stack. *)
         |   codeToICodeRev(BICSetContainer{container, tuple, filter}, context as {stackPtr, ...}, _, destination, tailCode) =
             let
                 local
                     fun createStore containerReg (source, destWord, tail) =
                         storeAtWordOffset(source, destWord, containerReg, Load64, tail)
                 in
                     val (codeContainer, storeInstr) =
                         case getContainerIfPresent container of
                             ContainerOnStack{container, stackOffset} =>
                             let
                                 fun store(source, destWord, tail) =
                                     storeToStack{source=source, container=container, field=destWord,
                                         stackOffset=stackPtr-stackOffset+destWord} :: tail
                             in
                                 (tailCode, store)
                             end
 
                         |   ContainerInRegs regs =>
                             let
                                 fun copy(source, destWord, tail) =
                                     tail <::> moveRegister{source=source, dest=List.nth(regs, destWord)}
                             in
                                 (tailCode, copy)
                             end
 
                         |   NoContainer =>
                             let
                                 val containerTarget = newPReg()
                                 val (codeContainer, _, _) =
                                     codeToICodeRev(container, context, false, SpecificPReg containerTarget, tailCode)
                             in
                                 (codeContainer, createStore containerTarget)
                             end
                 end
                 
                 val filterLength = BoolVector.length filter
 
                 val code =
                     case tuple of
                         BICTuple cl =>
                         let
                             (* In theory it's possible that the tuple could contain fields that are not
                                used but nevertheless need to be evaluated for their side-effects.
                                Create all the fields and push to the stack. *)
                             fun codeField(arg, (regs, tailCode)) =
                             let
                                 val (c, r, _) =
                                     codeToICodeRev(arg, context, false, AnyReg, tailCode)
                             in
                                 (r :: regs, c)
                             end
 
                             val (pregsRev, codeFields) = List.foldl codeField ([], codeContainer) cl
                             val pregs = List.rev pregsRev
 
                             fun copyField(srcReg, (sourceWord, destWord, tailCode)) =
                                 if sourceWord < filterLength andalso BoolVector.sub(filter, sourceWord)
                                 then (sourceWord+1, destWord+1, storeInstr(srcReg, destWord, tailCode))
                                 else (sourceWord+1, destWord, tailCode)
                             
                             val (_, _, resultCode) = List.foldl copyField (0, 0, codeFields) pregs
                         in
                             resultCode
                         end
 
                     |   tuple =>
                         let (* Copy a heap tuple.  It is possible that this is another container in which case
                                we must load the fields directly.  We mustn't load its address and then copy
                                because loading the address would be the last reference and might cause
                                the container to be reused prematurely. ??? Is that an old comment ?? *)
                             val (codeTuple, loadField) =
                                 case getContainerIfPresent tuple of
                                     ContainerOnStack {container, stackOffset} =>
                                     let
                                         fun getAddr(destReg, sourceWord, tail) =
                                             loadStack{dest=destReg, wordOffset=stackPtr-stackOffset+sourceWord, container=container,
                                                           field=sourceWord} :: tail
                                     in
                                         (codeContainer, getAddr)
                                     end
 
                                 |   ContainerInRegs regs =>
                                     let
                                         fun copyReg(destReg, sourceWord, tail) =
                                             tail <::> moveRegister{dest=destReg, source=List.nth(regs, sourceWord)}
                                     in
                                         (codeContainer, copyReg)
                                     end
 
                                 |   NoContainer =>
                                     let
                                         val (codeTuple, tupleTarget, _) = codeToICodeRev(tuple, context, false, AnyReg, codeContainer)
                                         fun loadField(destReg: preg, sourceWord: int, tail): blockStruct list =
                                         let
                                             val (code, _, _) =
                                                 wordAddressOffset(SpecificPReg destReg, tupleTarget, sourceWord, polyWordLoadSize, tail)
                                         in
                                             code
                                         end
                                     in
                                         (codeTuple, loadField)
                                     end
 
                             fun copyContainer(sourceWord, destWord, tailCode) =
                             if sourceWord = filterLength
                             then tailCode
                             else if BoolVector.sub(filter, sourceWord)
                             then
                             let
                                 val loadReg = newPReg()
                                 val code =
                                     storeInstr(loadReg, destWord, loadField(loadReg, sourceWord, tailCode))
                             in
                                 copyContainer(sourceWord+1, destWord+1, code)
                             end
                             else copyContainer(sourceWord+1, destWord, tailCode)
                         in
                             copyContainer(0, 0, codeTuple)
                         end
             in
                 returnUnit(destination, code, false)
             end
 
         |   codeToICodeRev(BICLoadContainer{base, offset}, context as {stackPtr, ...}, _, destination, tailCode) =
             (
                 case getContainerIfPresent base of
                     ContainerOnStack {container, stackOffset} =>
                     let (* If this is a local container we extract the field. *)
                         val target = asTarget destination
                         val finalOffset = stackPtr-stackOffset+offset
                         val _ = finalOffset >= 0 orelse raise InternalError "offset"
                     in
                         (BlockSimple(LoadStack{wordOffset=finalOffset, container=container, field=offset,
                             dest=target}) :: tailCode, target, false)
                     end
 
                 |   NoContainer =>
                     let
                         val (codeBase, baseEntry, _) = codeToICodeRev(base, context, false, AnyReg, tailCode)
                     in
                         wordAddressOffset(destination, baseEntry, offset, Load64, codeBase)
                     end
 
                 |   ContainerInRegs regs =>
                     let
                         (* Always copy this into a new register because the source will be a merge reg. *)
                         val target = asTarget destination
                     in
                         (moveRegister{source=List.nth(regs, offset), dest=target} :: tailCode, target, false)
                     end 
             )
 
         |   codeToICodeRev(BICLoadOperation{ kind, address}, context, _, destination, tailCode) =
                 codeLoadOperation(kind, address, context, asTarget destination, tailCode)
 
         |   codeToICodeRev(BICStoreOperation{ kind, address, value}, context, _, destination, tailCode) =
                 codeStoreOperation(kind, address, value, context, destination, tailCode)
 
         |   codeToICodeRev(BICBlockOperation{ kind=BlockOpMove{isByteMove}, sourceLeft, destRight, length }, context, _, destination, tailCode) =
                 (* Assume these are copying immutable data i.e. vector to vector and string to string.
                    The simplifier now assumes that when optimising short constant moves e.g. concatenating
                    with a constant string. *)
             let
                 (* Move bytes or words from the source to the destination.  Need to get the start addresses
                    and length into new registers because they will be modified. *)
                 val (leftAddr, codeLft) = addressToPregAddress(sourceLeft, context, tailCode)
                 val (rightAddr, codeRt) = addressToPregAddress(destRight, context, codeLft)
                 val (codeLength, lengthReg, _) = codeToICodeRev(length, context, false, AnyReg, codeRt)
                 val loadOp = if isByteMove then Load8 else if is32in64 then Load32 else Load64
 
                 (* This threads the calls through two calls to loadAndStoreWithAbsolute to compute the addresses. *)
                 fun getDestAndMove(ltReg, tailCode) =
                 let
                     fun doMove (rtReg, code) =
                     let
                         val lengthReg2 = newUReg() and ltReg2 = newUReg() and rtReg2 = newUReg()
                     in
                         blockMove{ srcAddr=ltReg2, destAddr=rtReg2, length=lengthReg2, isByteMove=isByteMove } ::
                         moveRegister{dest=rtReg2, source=rtReg} ::
                         moveRegister{dest=ltReg2, source=ltReg} ::
                         untagValue{dest=lengthReg2, source=lengthReg, isSigned=false, opSize=polyWordOpSize} :: code
                     end
                 in
                     loadAndStoreWithAbsolute (rightAddr, opWordSize loadOp, loadShift loadOp, doMove, tailCode)
                 end
             in
                 returnUnit(destination, loadAndStoreWithAbsolute (leftAddr, opWordSize loadOp, loadShift loadOp, getDestAndMove, codeLength), false)
             end
 
         |   codeToICodeRev(BICBlockOperation{ kind=BlockOpEqualByte, sourceLeft, destRight, length }, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 (* Compare bytes for equality.  Need to get the start addresses and length into new registers
                    because they will be modified. *)
                 val (leftAddr, codeLft) = addressToPregAddress(sourceLeft, context, tailCode)
                 val (rightAddr, codeRt) = addressToPregAddress(destRight, context, codeLft)
                 val (codeLength, lengthReg, _) = codeToICodeRev(length, context, false, AnyReg, codeRt)
 
                 (* This threads the calls through two calls to loadAndStoreWithAbsolute to compute the addresses. *)
 
                 fun getRightAndCompare(ltReg, tailCode) =
                 let
                     fun doComparison (rtReg, code) =
                     let
                         val lengthReg2 = newUReg() and ltReg2 = newUReg() and rtReg2 = newUReg()
                     in
                         compareByteVectors{ vec1Addr=ltReg2, vec2Addr=rtReg2, length=lengthReg2, ccRef=ccRef } ::
                         moveRegister{dest=rtReg2, source=rtReg} ::
                         moveRegister{dest=ltReg2, source=ltReg} ::
                         untagValue{dest=lengthReg2, source=lengthReg, isSigned=false, opSize=polyWordOpSize} :: code
                     end
                 in
                     loadAndStoreWithAbsolute (rightAddr, opWordSize Load8, loadShift Load8, doComparison, tailCode)
                 end
 
                 val testCode =
                     loadAndStoreWithAbsolute (leftAddr, opWordSize Load8, loadShift Load8, getRightAndCompare, codeLength)
             in
                 (makeBoolResultRev(CondEqual, ccRef, target, testCode), target, false)
             end
 
         |   codeToICodeRev(BICBlockOperation{ kind=BlockOpCompareByte, sourceLeft, destRight, length }, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 (* Similar to OpEqualByte except it returns -1, 0, +1 depending on the condition code. *)
                 (* Compare bytes for equality.  Need to get the start addresses and length into new registers
                    because they will be modified. *)
                 val (leftAddr, codeLft) = addressToPregAddress(sourceLeft, context, tailCode)
                 val (rightAddr, codeRt) = addressToPregAddress(destRight, context, codeLft)
                 val (codeLength, lengthReg, _) = codeToICodeRev(length, context, false, AnyReg, codeRt)
 
                 (* This threads the calls through two calls to loadAndStoreWithAbsolute to compute the addresses. *)
 
                 fun getRightAndCompare(ltReg, tailCode) =
                 let
                     fun doComparison (rtReg, code) =
                     let
                         val lengthReg2 = newUReg() and ltReg2 = newUReg() and rtReg2 = newUReg()
                         val exitLab = newLabel() and labGreater = newLabel() and labNotGreater = newLabel() 
                         and labLess = newLabel() and labNotLess = newLabel()
                         val mergeResult = newMergeReg()
                         val taggedMinus1 =
                             if is32in64 then 0wxffffffff else 0wxffffffffffffffff
                     in
                         (* Compare the words then a series of comparisons to set the result.
                            TODO; The old code-generator makes the "equal" exit of compareByteVectors jump
                            directly to code to set the result to zero.  It then uses
                            loadNonAddress(X0, Word64.fromInt(tag 1)) followed by
                            conditionalSetInverted{regD=X0, regTrue=X0, regFalse=XZero, cond=CondUnsignedHigher}
                            to set the result to one or minus one. N.B.  This needs to use a 32-bit operation on
                            32-in-64. *)
                         moveRegister{dest=target, source=mergeResult} ::
                         BlockLabel exitLab ::
                         loadNonAddressConstant{source=taggedWord64 0w1, dest=mergeResult} ::
                         BlockLabel labGreater ::
                         BlockFlow(Unconditional exitLab) ::
                         loadNonAddressConstant{source=taggedMinus1, dest=mergeResult} ::
                         BlockLabel labLess ::
                         BlockFlow(Unconditional exitLab) ::
                         loadNonAddressConstant{source=taggedWord64 0w0, dest=mergeResult} ::
                         BlockLabel labNotGreater ::
                         BlockFlow(Conditional{ ccRef=ccRef, condition=CondUnsignedHigher, trueJump=labGreater, falseJump=labNotGreater }) ::
                         BlockLabel labNotLess ::
                         BlockFlow(Conditional{ ccRef=ccRef, condition=CondCarryClear, trueJump=labLess, falseJump=labNotLess }) ::
                         compareByteVectors{ vec1Addr=ltReg2, vec2Addr=rtReg2, length=lengthReg2, ccRef=ccRef } ::
                         moveRegister{dest=rtReg2, source=rtReg} ::
                         moveRegister{dest=ltReg2, source=ltReg} ::
                         untagValue{dest=lengthReg2, source=lengthReg, isSigned=false, opSize=polyWordOpSize} :: code
                     end
                 in
                     loadAndStoreWithAbsolute (rightAddr, opWordSize Load8, loadShift Load8, doComparison, tailCode)
                 end
 
                 val testCode =
                     loadAndStoreWithAbsolute (leftAddr, opWordSize Load8, loadShift Load8, getRightAndCompare, codeLength)
             in
                 (testCode, target, false)
             end
 
         |   codeToICodeRev(BICAllocateWordMemory {numWords, flags, initial }, context, _, destination, tailCode) =
             let
                 (* Allocate a block of memory and initialise it. *)
                 val target = asTarget destination
                 val (codeSize, sizeReg, _) = codeToICodeRev(numWords, context, false, AnyReg, tailCode)
                 val (codeFlags, flagsReg, _) = codeToICodeRev(flags, context, false, AnyReg, codeSize)
                 val (codeInit, initReg, _) = codeToICodeRev(initial, context, false, AnyReg, codeFlags)
                 val uSizeReg = newUReg() and shiftFReg = newUReg() and lengthWord = newUReg()
                 val absAddr = if is32in64 then newUReg() else target
                 val untagSize = untagValue{source=sizeReg, dest=uSizeReg, opSize=polyWordOpSize, isSigned=false} :: codeInit
                 val allocateMem = allocateMemoryVariable{ size=uSizeReg, dest=absAddr, saveRegs=[]} :: untagSize
                 (* Make the length word by first shifting the flags into the length word reg by
                    55 or 23 bits.  This puts the tag bit in the top bit of the size.  Then insert the size
                    into this which will overwrite the flag's tag bit. *)
                 val makeLengthWord =
                     bitFieldInsert{ source=uSizeReg, destAsSource=shiftFReg, dest=lengthWord, length=polyWordOpSize,
                               immr=0w0 (*bit 0*), imms=if is32in64 then 0w23 else 0w55 (*width-1*) } ::
                     shiftConstant{direction=Arm64ICode.ShiftLeft, dest=shiftFReg, source=flagsReg,
                         shift=if is32in64 then 0w23 else 0w55, opSize=polyWordOpSize } :: allocateMem
                 val setLengthWordAndInit =
                     initialiseMem{ size=uSizeReg, addr=absAddr, init=initReg} ::
                     storeWithConstantOffset{ source=lengthWord, base=absAddr,
                         byteOffset= ~(Word.toInt wordSize), loadType=polyWordLoadSize } :: makeLengthWord
                 val finalCode =
                     if is32in64 then absoluteToObjectIndex{ source=absAddr, dest=target } :: setLengthWordAndInit
                     else setLengthWordAndInit
             in
                 (finalCode, target, false)
             end
 
         |   codeToICodeRev(BICHandle{exp, handler, exPacketAddr}, context as { stackPtr, loopArgs, overflowBlock, ... }, isTail, destination, tailCode) =
             let
                 (* As with BICCond and BICCase we need to create a new register for the
                    result in case we need to push it to the stack. *)
                 val handleResult = newMergeReg()
                 val handlerLab = newLabel() and startHandling = newLabel()
                 val (bodyTarget, handlerTarget) =
                     if isTail then (newPReg(), newPReg()) else (handleResult, handleResult)
                 (* TODO: Even if we don't actually want a result we force one in here by
                    using "asTarget".  *)
                 (* The expression cannot be treated as a tail because the handler has
                    to be removed after.  It may "exit" if it has raised an unconditional
                    exception.  If it has we mustn't generate a PopExceptionHandler because
                    there won't be any result for resultReg.
                    We need to add two words to the stack to account for the items pushed by
                    PushExceptionHandler.
                    We create an instruction to push the handler followed by a block fork to
                    the start of the code and, potentially the handler, then a label to start
                    the code that the handler is in effect for. *)
                 val initialCode =
                     BlockLabel startHandling ::
                     BlockFlow(SetHandler{handler=handlerLab, continue=startHandling}) ::
                     BlockSimple(PushExceptionHandler) :: tailCode
                 val (expCode, _, expExit) =
                     codeToICodeRev(exp, {stackPtr=stackPtr+2, loopArgs=loopArgs, currHandler=SOME handlerLab, overflowBlock=overflowBlock},
                         false (* Not tail *), SpecificPReg bodyTarget, initialCode)
                 (* If this is the tail we can replace the jump at the end of the
                    handled code with returns.  If the handler has exited we don't need
                    a return there.  Otherwise we need to add an unconditional jump to
                    skip the handler. *)
                 val (atExpEnd, skipExpLabel) =
                     case (isTail, expExit) of
                         (true, true) => (* Tail and exited. *) (expCode, NONE)
                     |   (true, false) => (* Tail and not exited. *)
                             (returnInstruction(context, bodyTarget, BlockSimple(PopExceptionHandler) :: expCode), NONE)
                     |   (false, true) => (* Not tail but exited. *) (expCode, NONE)
                     |   (false, false) =>
                         let
                             val skipHandler = newLabel()
                         in
                             (BlockFlow(Unconditional skipHandler) ::
                              BlockSimple(PopExceptionHandler) :: expCode, SOME skipHandler)
                         end
                 (* Make a register to hold the exception packet and put eax into it. *)
                 val packetAddr = newPReg()
                 val () = Array.update(locToPregArray, exPacketAddr, PregLocation packetAddr)
                 val (handleCode, _, handleExit) =
                     codeToICodeRev(handler, context, isTail, SpecificPReg handlerTarget,
                         BlockSimple(BeginHandler{packetReg=packetAddr}) :: BlockLabel handlerLab :: atExpEnd)
                 val target = asTarget destination
                 val afterHandler =
                     case (isTail, handleExit) of
                         (true, true) => (* Tail and exited. *) handleCode
                     |   (true, false) => (* Tail and not exited. *)
                             returnInstruction(context, handlerTarget, handleCode)
                     |   (false, _) => (* Not tail. *) handleCode
                 
                 val addLabel =
                     case skipExpLabel of
                         SOME lab => BlockLabel lab:: afterHandler
                     |   NONE => afterHandler
             in
                 (moveRegister{source=handleResult, dest=target} :: addLabel, target, isTail)
             end
 
         and codeConditionRev(condition, context, jumpOn, jumpLabel, tailCode) =
        (* Jump optimisation is done later.  Just generate the general case.
           Load the value into a register and compare it with 1 (true) *)
         let
             val ccRef = newCCRef()
             val (testCode, testReg, _) = codeToICodeRev(condition, context, false, AnyReg, tailCode)
             val noJumpLabel = newLabel()
         in
             BlockLabel noJumpLabel ::
             BlockFlow(Conditional{ccRef=ccRef,
                        condition=if jumpOn then CondEqual else CondNotEqual, trueJump=jumpLabel, falseJump=noJumpLabel}) ::
             (* Compare: SUBS XZ,reg,3.  Can use 32-bit comparison because it's either tagged 0 or tagged 1. *)
             addSubImmediate{source=testReg, immed=taggedWord 0w1, isAdd=false, dest=ZeroReg, length=OpSize32, ccRef=SOME ccRef} ::
             testCode
         end
 
         and codeToICodeUnaryRev({oper=NotBoolean, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 val (argCode, testDest, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
             in
                 (* Test the argument and return a boolean result.  If either the argument is a condition
                    or the result is used in a test this will be better than using XOR. *)
                 (makeBoolResultRev(CondNotEqual, ccRef, target,
                         addSubImmediate{source=testDest, immed=taggedWord 0w1, isAdd=false,
                                 dest=ZeroReg, length=OpSize32 (* Always either tagged 0 or tagged 1 *), ccRef=SOME ccRef} ::
                             argCode), target, false)
             end
 
         |   codeToICodeUnaryRev({oper=IsTaggedValue, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 val (argCode, testDest, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
             in
                 (* Test the argument and return a boolean result.  This ought to be optimised at
                    a lower level to use a test-and-branch. *)
                 (makeBoolResultRev(CondNotEqual, ccRef, target,
                             logicalImmediate{source=testDest, immed=0w1 (* The tag bit*), logOp=LogAnd,
                                 dest=ZeroReg, length=OpSize32 (* Always either tagged 0 or tagged 1 *), ccRef=SOME ccRef} ::
                             argCode), target, false)
             end
 
         |   codeToICodeUnaryRev({oper=MemoryCellLength, arg1}, context, _, destination, tailCode) =
             let
                 val ureg1 = newUReg() and ureg2 = newUReg()
                 val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 (* Load the word at -1 (words) into a ureg *)
                 val (codeLoad, _, _) = wordAddressOffset(SpecificPReg ureg1, baseReg, ~1, polyWordLoadSize, codeBase)
                 (* Select 56 or 24 bits and shift it left.  This disassembles as UBFIZ..*)
                 val lsb = 0w1 and width = if is32in64 then 0w24 else 0w56
                 (* Encoding for unsignedBitfieldInsertinZeros64/32 *)
                 val immr = if is32in64 then Word.~ lsb mod 0w32 else Word.~ lsb mod 0w64
                 val imms = width-0w1
                 val maskAndShift =
                     bitFieldShift{source=ureg1, dest=ureg2, isSigned=false, length=polyWordOpSize, immr=immr, imms=imms} :: codeLoad
                 val target = asTarget destination
                 val addTag =
                     addSubImmediate{dest=SomeReg target, source=ureg2, immed=0w1, length=polyWordOpSize,
                                             ccRef=NONE, isAdd=true} :: maskAndShift
             in
                 (addTag, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=MemoryCellFlags, arg1}, context, _, destination, tailCode) =
             let
                 (* Load the flags byte and tag it. *)
                 val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (codeRealBase, realBaseReg) = getAbsoluteAddress(codeBase, baseReg)
                 val ureg = newUReg()
                 val codeLoad =
                     loadWithConstantOffset{ base=realBaseReg, dest=ureg, byteOffset=flagsByteOffset, loadType=Load8 } :: codeRealBase
                 val target = asTarget destination
                 val withTag = tagValue{ source=ureg, dest=target, isSigned=false, opSize=OpSize32 } :: codeLoad
             in
                 (withTag, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=ClearMutableFlag, arg1}, context, _, destination, tailCode) =
             let
                 val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (codeRealBase, realBaseReg) = getAbsoluteAddress(codeBase, baseReg)
                 val ureg1 = newUReg() and ureg2 = newUReg()
                 (* Load the flags, mask off the mutable bit and store it back. *)
                 val code =
                     storeWithConstantOffset{ base=realBaseReg, source=ureg2, byteOffset=flagsByteOffset, loadType=Load8 } ::
                     logicalImmediate{ source=ureg1, dest=SomeReg ureg2, ccRef=NONE,
                                       immed=Word64.xorb(0wxffffffff, 0wx40), logOp=LogAnd, length=OpSize32 } ::
                     loadWithConstantOffset{ base=realBaseReg, dest=ureg1, byteOffset=flagsByteOffset, loadType=Load8 } :: codeRealBase
             in
                 returnUnit(destination, code, false)
             end
 
         |   codeToICodeUnaryRev({oper=LongWordToTagged, arg1}, context, _, destination, tailCode) =
             let
                 val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg = newUReg()
                 val target = asTarget destination
                 val code =
                     tagValue{ source=uReg, dest=target, isSigned=false, opSize=polyWordOpSize } ::
                     unboxLarge{ source=baseReg, dest=uReg } :: codeBase
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=SignedToLongWord, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg = newUReg()
                 val target = asTarget destination
                 (* We can use a single instruction here on both 32-in-64 and native 64-bits.
                    On 64-bits this is equivalent to an arithmetic shift; on 32-bits
                    it propagates the sign bit into the high-order part. *)
                 val code =
                     boxLarge{ source=uReg, dest=target, saveRegs=[] } ::
                     bitFieldShift{ source=aReg1, dest=uReg, isSigned=true, length=OpSize64,
                                    immr=0w1, imms=if is32in64 then 0wx1f else 0wx3f } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=UnsignedToLongWord, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg = newUReg()
                 val target = asTarget destination
                 (* This amounts to a logical shift.  Since the top half of the
                    register is zero in 32-in-64 we don't have to select just the
                    low word but there's no advantage in not. *)
                 val code =
                     boxLarge{ source=uReg, dest=target, saveRegs=[] } ::
                     bitFieldShift{ source=aReg1, dest=uReg, isSigned=false, length=OpSize64,
                                    immr=0w1, imms=if is32in64 then 0wx1f else 0wx3f } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=RealAbs precision, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val target = asTarget destination
                 val fpSize = precisionToFpSize precision
                 val fpOp = case precision of PrecSingle => AbsFloat | PrecDouble => AbsDouble
                 val code =
                     boxTagFloat{ floatSize=fpSize, source=uReg2, dest=target, saveRegs=[] } ::
                     unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=fpOp } ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=RealNeg precision, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val target = asTarget destination
                 val fpSize = precisionToFpSize precision
                 val fpOp = case precision of PrecSingle => NegFloat | PrecDouble => NegDouble
                 val code =
                     boxTagFloat{ floatSize=fpSize, source=uReg2, dest=target, saveRegs=[] } ::
                     unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=fpOp } ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=RealFixedInt precision, arg1}, context, _, destination, tailCode) =
             let
                 (* Convert a tagged integer (FixedInt.int) to float or double. *)
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val target = asTarget destination
                 val fpSize = precisionToFpSize precision
                 val code =
                     boxTagFloat{ floatSize=fpSize, source=uReg2, dest=target, saveRegs=[] } ::
                     convertIntToFloat{ source=uReg1, dest=uReg2, srcSize=polyWordOpSize, destSize=fpSize } ::
                     untagValue{ source=aReg1, dest=uReg1, opSize=polyWordOpSize, isSigned=true } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=FloatToDouble, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val target = asTarget destination
                 val code =
                     boxTagFloat{ floatSize=Double64, source=uReg2, dest=target, saveRegs=[] } ::
                     unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=ConvFloatToDble } ::
                     unboxTagFloat{ floatSize=Float32, source=aReg1, dest=uReg1 } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=DoubleToFloat, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val target = asTarget destination
                 val code =
                     boxTagFloat{ floatSize=Float32, source=uReg2, dest=target, saveRegs=[] } ::
                     unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=ConvDbleToFloat } ::
                     unboxTagFloat{ floatSize=Double64, source=aReg1, dest=uReg1 } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=RealToInt(precision, rounding), arg1}, context, _, destination, tailCode) =
             let
                 (* Convert a float or double to a tagged int.
                    We could get an overflow in either the conversion to integer
                    or in the conversion to a tagged value.  Fortunately if the
                    conversion detects an overflow it sets the result to a
                    value that will cause an overflow in the addition. *)
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val target = asTarget destination
                 val chkOverflow = newCCRef()
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val fpSize = precisionToFpSize precision
                 val code =
                     (* Set the tag bit. *)
                     addSubImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} ::
                     checkOverflow(CondOverflow, context, chkOverflow) @
                     (* Add it to itself and set the condition code. *)
                     addSubRegister{base=uReg2, shifted=uReg2, dest=SomeReg uReg3, length=polyWordOpSize,
                                  ccRef=SOME chkOverflow, isAdd=true, shift=ShiftNone} ::
                     convertFloatToInt{ source=uReg1, dest=uReg2, srcSize=fpSize, destSize=polyWordOpSize, rounding=rounding } ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=TouchAddress, arg1}, context, _, destination, tailCode) =
             let
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
             in
                 returnUnit(destination, touchValue{source=aReg1} :: arg1Code, false)
             end
 
         |   codeToICodeUnaryRev({oper=AllocCStack, arg1}, context, _, destination, tailCode) =
             let
                 (* Allocate space on the stack.  The higher levels have already aligned
                    the size to a multiple of 16. The number of bytes to allocate is a
                    Word.word value.  The result is a boxed large word. *)
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val target = asTarget destination
                 val code =
                     boxLarge{ source=uReg2, dest=target, saveRegs=[] } ::
                     addSubXSP{ source=uReg1, dest=SomeReg uReg2, isAdd=false  } ::
                     untagValue{ source=aReg1, dest=uReg1, isSigned=false, opSize=polyWordOpSize } ::
                     arg1Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeUnaryRev({oper=LockMutex, arg1}, context, _, destination, tailCode) =
             (* The earliest versions of the Arm8 do not have the LDADD instruction which
                will do this directly.  To preserve compatibility we use LDAXR/STLXR
                which require a loop. *)
             let
                 local
                     val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 in
                     val (baseCode, baseReg) = getAbsoluteAddress(arg1Code, aReg1)
                 end
                 val loopLabel = newLabel() and noLoopLabel = newLabel()
                 val target = asTarget destination
                 val ccRef1 = newCCRef() and ccRef2 = newCCRef()
                 val uRegNew = newUReg() and uRegTest = newUReg() and uRegOld = newUReg()
                 (* N.B. in reverse order. *)
                 val code =
                     (* The result is true if the old value was zero. *)
                     addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize64, ccRef=SOME ccRef2} ::
                     memoryBarrier :: (* Put in the memory barrier. *)
                     (* If the result is zero we've been successful otherwise we loop. *)
                     BlockLabel noLoopLabel ::
                     BlockFlow(Conditional{ ccRef=ccRef1, condition=CondNotEqual, trueJump=loopLabel, falseJump=noLoopLabel }) ::
                     addSubImmediate{source=uRegTest, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize32, ccRef=SOME ccRef1} ::
                     (* Add and try to store the result *)
                     storeReleaseExclusive{ base=baseReg, source=SomeReg uRegNew, result=uRegTest } ::
                     addSubImmediate{source=uRegOld, dest=SomeReg uRegNew, immed=0w1, isAdd=true, length=OpSize64, ccRef=NONE} ::
                     loadAcquireExclusive{ base=baseReg, dest=uRegOld } ::
                     BlockLabel loopLabel :: baseCode
             in
                 (makeBoolResultRev(CondEqual, ccRef2, target, code), target, false)
             end
 
         |   codeToICodeUnaryRev({oper=TryLockMutex, arg1}, context, _, destination, tailCode) =
             (* *)
             let
             (* Could use LDUMAXAL to set it the greater of the current value or 1. *)
                 local
                     val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 in
                     val (baseCode, baseReg) = getAbsoluteAddress(arg1Code, aReg1)
                 end
                 val loopLabel = newLabel() and noLoopLabel = newLabel() and okLabel = newLabel()
                 val target = asTarget destination
                 val ccRef0 = newCCRef() and ccRef1 = newCCRef() and ccRef2 = newCCRef()
                 val uRegNew = newUReg() and uRegTest = newUReg() and uRegOld = newUReg()
                 val code =
                     (* The result is true if the old value was zero. *)
                     addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize64, ccRef=SOME ccRef2} ::
                     memoryBarrier :: (* Put in the memory barrier. *)
                     (* If the result is zero we've been successful otherwise we loop. *)
                     BlockLabel noLoopLabel ::
                     BlockFlow(Conditional{ ccRef=ccRef1, condition=CondNotEqual, trueJump=loopLabel, falseJump=noLoopLabel }) ::
                     addSubImmediate{source=uRegTest, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize32, ccRef=SOME ccRef1} ::
                     (* If the lock wasn't taken set it to one to lock it. *)
                     storeReleaseExclusive{ base=baseReg, source=SomeReg uRegNew, result=uRegTest } ::
                     loadNonAddressConstant{source=0w1, dest=uRegNew } :: BlockLabel okLabel ::
                     (* If it's not zero don't try to store anything back and exit the loop. *)
                     BlockFlow(Conditional{ ccRef=ccRef0, condition=CondNotEqual, trueJump=noLoopLabel, falseJump=okLabel }) ::
                     addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize64, ccRef=SOME ccRef0} ::
                     (* Get the old value and see if it's zero i.e. unlocked. *)
                     loadAcquireExclusive{ base=baseReg, dest=uRegOld } ::
                     BlockLabel loopLabel :: baseCode
             in
                 (makeBoolResultRev(CondEqual, ccRef2, target, code), target, false)
             end
 
         |   codeToICodeUnaryRev({oper=UnlockMutex, arg1}, context, _, destination, tailCode) =
             (* Get the previous value of the mutex to see if another thread had tried to
                lock it and set the result to zero. *)
             let (* Could use SWAPAL *)
                 local
                     val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 in
                     val (baseCode, baseReg) = getAbsoluteAddress(arg1Code, aReg1)
                 end
                 val loopLabel = newLabel() and noLoopLabel = newLabel()
                 val target = asTarget destination
                 val ccRef1 = newCCRef() and ccRef2 = newCCRef()
                 val uRegTest = newUReg() and uRegOld = newUReg()
                 val code =
                     (* The result is true if the old value was one. i.e. we were the only thread
                        that locked it. *)
                     addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w1, isAdd=false, length=OpSize64, ccRef=SOME ccRef2} ::
                     memoryBarrier :: (* Put in the memory barrier. *)
                     (* If the result is zero we've been successful otherwise we loop. *)
                     BlockLabel noLoopLabel ::
                     BlockFlow(Conditional{ ccRef=ccRef1, condition=CondNotEqual, trueJump=loopLabel, falseJump=noLoopLabel }) ::
                     addSubImmediate{source=uRegTest, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize32, ccRef=SOME ccRef1} ::
                     (* Try to set this to zero *)
                     storeReleaseExclusive{ base=baseReg, source=ZeroReg, result=uRegTest } ::
                     loadAcquireExclusive{ base=baseReg, dest=uRegOld } ::
                     BlockLabel loopLabel :: baseCode
             in
                 (makeBoolResultRev(CondEqual, ccRef2, target, code), target, false)
             end
 
         and codeToICodeBinaryRev({oper=WordComparison{test, isSigned}, arg1, arg2},
                 context, _, destination, tailCode) =
             let
                 (* Comparisons. This is now only used for tagged values, not for pointer equality. *)
                 val ccRef = newCCRef()
                 val (testCode1, testDest1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (testCode2, testDest2, _) = codeToICodeRev(arg2, context, false, AnyReg, testCode1)
                 val comparison =
                     addSubRegister{base=testDest1, shifted=testDest2, dest=ZeroReg, length=polyWordOpSize,
                                    ccRef=SOME ccRef, isAdd=false, shift=ShiftNone} :: testCode2
                 val target = asTarget destination
                 open BuiltIns
                 val cond =
                     case (test, isSigned) of
                         (TestEqual,         _) => CondEqual
                     |   (TestLess,          true) => CondSignedLess
                     |   (TestLessEqual,     true) => CondSignedLessEq
                     |   (TestGreater,       true) => CondSignedGreater
                     |   (TestGreaterEqual,  true) => CondSignedGreaterEq
                     |   (TestLess,          false) => CondCarryClear
                     |   (TestLessEqual,     false) => CondUnsignedLowOrEq
                     |   (TestGreater,       false) => CondUnsignedHigher
                     |   (TestGreaterEqual,  false) => CondCarrySet
                     |   (TestUnordered,     _) => raise InternalError "WordComparison: TestUnordered"
             in
                 (makeBoolResultRev(cond, ccRef, target, comparison), target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithAdd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 (* We need to subtract the tag from one of the arguments and then do the
                    addition.  The optimiser will do the subtraction at compile time if we
                    subtract from a constant so try to put the constant in the second arg. *)
                 val (firstReg, secondReg) =
                     case arg1 of BICConstnt _ => (aReg2, aReg1) | _ => (aReg1, aReg2)
                 val uReg = newUReg()
                 val chkOverflow = newCCRef()
                 val code =
                     checkOverflow(CondOverflow, context, chkOverflow) @
                     addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg target, length=polyWordOpSize,
                                  ccRef=SOME chkOverflow, isAdd=true, shift=ShiftNone} ::
                     addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithSub, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg = newUReg()
                 val chkOverflow = newCCRef()
                 val code =
                     checkOverflow(CondOverflow, context, chkOverflow) @
                     addSubRegister{base=aReg1, shifted=uReg, dest=SomeReg target, length=polyWordOpSize,
                                  ccRef=SOME chkOverflow, isAdd=false, shift=ShiftNone} ::
                     addSubImmediate{dest=SomeReg uReg, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithMult, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg()
                 val chkOverflow = newCCRef()
 
                 (* Untag one argument. subtract the tag from the second, multiply and add back the tag. *)
                 val multiplyCode =
                     addSubImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} ::
                     multiplication{kind=if is32in64 then SignedMultAddLong else MultAdd64, dest=uReg3, sourceA=ZeroReg,
                                    sourceM=uReg1, sourceN=uReg2} ::
                     addSubImmediate{dest=SomeReg uReg2, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} ::
                     untagValue{ source=aReg1, dest=uReg1, isSigned=true, opSize=polyWordOpSize } ::
                     arg2Code
 
                 (* Overflow check:  The condition for overflow is that the high order part (64-bits in native
                    64-bits, 32-bits in 32-in-64) must be zero if the result is positive and all ones if the
                    result is negative.  The high-order part is in uReg3 in 32-in-64 since we've already used
                    SignedMultAddLong but in native 64-bits we need to use SignedMultHigh to get the
                    high order part.  In both cases we can use a comparison with ShiftASR to give a
                    value containing just the sign of the result. *)
                 val checkOverflowCode =
                     if is32in64
                     then addSubRegister{ base=uReg4, shifted=target, dest=ZeroReg, ccRef=SOME chkOverflow,
                             isAdd=false, length=OpSize32, shift=ShiftASR 0w31 } ::
                          shiftConstant{direction=Arm64ICode.ShiftRightArithmetic, source=uReg3, dest=uReg4, shift=0w32,
                             opSize=OpSize64 (* Have to start with 64-bits *)} :: multiplyCode
                     else addSubRegister{ base=uReg4, shifted=target, dest=ZeroReg, ccRef=SOME chkOverflow,
                             isAdd=false, length=OpSize64, shift=ShiftASR 0w63 } ::
                          multiplication{kind=SignedMultHigh, dest=uReg4, sourceA=ZeroReg,
                                        sourceM=uReg1, sourceN=uReg2} :: multiplyCode
                 val code =
                     checkOverflow(CondNotEqual, context, chkOverflow) @ checkOverflowCode
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithQuot, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* The word version avoids an extra shift.  Don't do that here at least
                    for the moment.  Division by zero and overflow are checked for at
                    the higher level. *)
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val code =
                     tagValue { source=uReg3, dest=target, opSize=polyWordOpSize, isSigned=true } ::
                     division{isSigned=true, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} ::
                     untagValue{ source=aReg2, dest=uReg2, isSigned=true, opSize=polyWordOpSize } ::
                     untagValue{ source=aReg1, dest=uReg1, isSigned=true, opSize=polyWordOpSize } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithRem, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* For the moment we remove the tags and then retag afterwards.  The word
                    version avoids this but at least for the moment we do it the longer way. *)
                 (* There's no direct way to get the remainder - have to use divide and multiply. *)
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg()
                 val code =
                     tagValue { source=uReg4, dest=target, opSize=polyWordOpSize, isSigned=true } ::
                     multiplication{kind=if is32in64 then MultSub32 else MultSub64, dest=uReg4,
                                    sourceM=uReg3, sourceN=uReg2, sourceA=SomeReg uReg1} ::
                     division{isSigned=true, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} ::
                     untagValue{ source=aReg2, dest=uReg2, isSigned=true, opSize=polyWordOpSize } ::
                     untagValue{ source=aReg1, dest=uReg1, isSigned=true, opSize=polyWordOpSize } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithDiv, ...}, _, _, _, _) =
                raise InternalError "unimplemented operation: FixedPrecisionArith ArithDiv"
 
         |   codeToICodeBinaryRev({oper=FixedPrecisionArith ArithMod, ...}, _, _, _, _) =
                 raise InternalError "unimplemented operation: FixedPrecisionArith ArithMod"
 
         |   codeToICodeBinaryRev({oper=WordArith ArithAdd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 (* We need to subtract the tag from one of the arguments and then do the
                    addition.  The optimiser will do the subtraction at compile time if we
                    subtract from a constant so try to put the constant in the second arg. *)
                 val (firstReg, secondReg) =
                     case arg1 of BICConstnt _ => (aReg2, aReg1) | _ => (aReg1, aReg2)
                 val uReg = newUReg()
                 val code =
                     addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg target, length=polyWordOpSize,
                                  ccRef=NONE, isAdd=true, shift=ShiftNone} ::
                     addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordArith ArithSub, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg = newUReg()
                 (* TODO: If the first argument is a constant we could add one to that rather than
                    subtracting one from the second argument.  We're not concerned with overflow. *)
                 val code =
                     addSubRegister{base=aReg1, shifted=uReg, dest=SomeReg target, length=polyWordOpSize,
                                  ccRef=NONE, isAdd=false, shift=ShiftNone} ::
                     addSubImmediate{dest=SomeReg uReg, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordArith ArithMult, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 (* Untag one argument. subtract the tag from the second, multiply and add back the tag. *)
                 val code =
                     addSubImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} ::
                     multiplication{kind=if is32in64 then MultAdd32 else MultAdd64, dest=uReg3, sourceA=ZeroReg,
                                    sourceM=uReg1, sourceN=uReg2} ::
                     addSubImmediate{dest=SomeReg uReg2, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} ::
                     untagValue{ source=aReg1, dest=uReg1, isSigned=false, opSize=polyWordOpSize } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordArith ArithDiv, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 (* Untag the divisor (into uReg2). subtract the tag from the dividend (into uReg1),
                    divide and or in the tag.  The tag may have been set already depending on
                    the result of the division. *)
                 val code =
                     logicalImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, logOp=LogOr} ::
                     division{isSigned=false, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} ::
                     addSubImmediate{dest=SomeReg uReg1, source=aReg1, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} ::
                     untagValue{ source=aReg2, dest=uReg2, isSigned=false, opSize=polyWordOpSize } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordArith ArithMod, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* There's no direct way to get the remainder - have to use divide and multiply. *)
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg()
                 (* Untag the divisor (into uReg2). subtract the tag from the dividend (into uReg1)
                 Untag one argument. subtract the tag from the second, divide and or in the tag.  The
                    tag may have been set already depending on the result of the division. *)
                 val tagBitMask = Word64.<<(Word64.fromInt ~1, 0w1) (* Requires a 64-bit AND. *)
                 val code =
                     (* Multiply the result of the division by the divisor and subtract this from the original, tagged
                        dividend.  This leaves us a tagged value so it can go straight into the result. *)
                     multiplication{kind=if is32in64 then MultSub32 else MultSub64, dest=target,
                                    sourceM=uReg4, sourceN=uReg2, sourceA=SomeReg aReg1} ::
                     (* Clear the bottom bit before the multiplication. *)
                     logicalImmediate{dest=SomeReg uReg4, source=uReg3, immed=tagBitMask, length=OpSize64, ccRef=NONE, logOp=LogAnd} ::
                     division{isSigned=false, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} ::
                     addSubImmediate{dest=SomeReg uReg1, source=aReg1, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} ::
                     untagValue{ source=aReg2, dest=uReg2, isSigned=false, opSize=polyWordOpSize } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordArith _, ...}, _, _, _, _) =
                 raise InternalError "WordArith - unimplemented instruction"
 
         |   codeToICodeBinaryRev({oper=WordLogical LogicalAnd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 (* Since both values are tagged the tag will be preserved. *)
                 val code =
                     logicalRegister{base=aReg1, shifted=aReg2, dest=SomeReg target, length=polyWordOpSize,
                                     ccRef=NONE, logOp=LogAnd, shift=ShiftNone} :: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordLogical LogicalOr, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 (* Since both values are tagged the tag will be preserved. *)
                 val code =
                     logicalRegister{base=aReg1, shifted=aReg2, dest=SomeReg target, length=polyWordOpSize,
                                     ccRef=NONE, logOp=LogOr, shift=ShiftNone} :: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordLogical LogicalXor, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 (* If we just XOR the values together the tag bit in the result will be zero.
                    It's better to remove one of the tag bits beforehand.  As with Add, we
                    try to choose a constant. *)
                 val (firstReg, secondReg) =
                     case arg1 of BICConstnt _ => (aReg2, aReg1) | _ => (aReg1, aReg2)
                 val uReg = newUReg()
                 val code =
                     logicalRegister{base=firstReg, shifted=uReg, dest=SomeReg target, length=polyWordOpSize,
                                  ccRef=NONE, logOp=LogXor, shift=ShiftNone} ::
                     addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code
             in
                 (code , target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordShift ShiftLeft, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val ureg1 = newUReg() and ureg2 = newUReg() and ureg3 = newUReg()
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val code =
                     (* Put back the tag. *)
                     logicalImmediate{ source=ureg3, dest=SomeReg target, ccRef=NONE, immed=0w1, logOp=LogOr, length=polyWordOpSize } ::
                     (* Do the shift *)
                     shiftRegister{direction=Arm64ICode.ShiftLeft, dest=ureg3, source=ureg1, shift=ureg2, opSize=polyWordOpSize} ::
                     (* Untag the shift amount.  Since it's at most 64 we can use a 32-bit operation. *)
                     untagValue{source=aReg2, dest=ureg2, opSize=OpSize32, isSigned=false} ::
                     (* Remove tag bit from the value we're shifting. *)
                     logicalImmediate{ source=aReg1, dest=SomeReg ureg1, ccRef=NONE, immed=polyWordTagBitMask,
                               logOp=LogAnd, length=polyWordOpSize } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordShift ShiftRightLogical, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val ureg1 = newUReg() and ureg2 = newUReg()
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val code =
                     (* Put back the tag. *)
                     logicalImmediate{ source=ureg2, dest=SomeReg target, ccRef=NONE, immed=0w1, logOp=LogOr, length=polyWordOpSize } ::
                     (* Do the shift *)
                     shiftRegister{direction=Arm64ICode.ShiftRightLogical, dest=ureg2, source=aReg1, shift=ureg1, opSize=polyWordOpSize} ::
                     (* Untag the shift amount.  Since it's at most 64 we can use a 32-bit operation. *)
                     untagValue{source=aReg2, dest=ureg1, opSize=OpSize32, isSigned=false} :: arg2Code
                     
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=WordShift ShiftRightArithmetic, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val ureg1 = newUReg() and ureg2 = newUReg()
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val code =
                     (* Put back the tag. *)
                     logicalImmediate{ source=ureg2, dest=SomeReg target, ccRef=NONE, immed=0w1, logOp=LogOr, length=polyWordOpSize } ::
                     (* Do the shift *)
                     shiftRegister{direction=Arm64ICode.ShiftRightArithmetic, dest=ureg2, source=aReg1, shift=ureg1, opSize=polyWordOpSize} ::
                     (* Untag the shift amount.  Since it's at most 64 we can use a 32-bit operation. *)
                     untagValue{source=aReg2, dest=ureg1, opSize=OpSize32, isSigned=false} :: arg2Code
                     
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=AllocateByteMemory, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* Allocate a block of memory and without initialisation.  If the flags include
                    the "bytes" bit the GC won't look at it so it doesn't matter that it's not
                    initialised.
                    This is identical to AllocateWordMemory apart from the lack of initialisation. *)
                 val target = asTarget destination
                 val (codeSize, sizeReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (codeFlags, flagsReg, _) = codeToICodeRev(arg2, context, false, AnyReg, codeSize)
                 val uSizeReg = newUReg() and shiftFReg = newUReg() and lengthWord = newUReg()
                 val absAddr = if is32in64 then newUReg() else target
                 val untagSize = untagValue{source=sizeReg, dest=uSizeReg, opSize=polyWordOpSize, isSigned=false} :: codeFlags
                 val allocateMem = allocateMemoryVariable{ size=uSizeReg, dest=absAddr, saveRegs=[]} :: untagSize
                 (* Make the length word by first shifting the flags into the length word reg by
                    55 or 23 bits.  This puts the tag bit in the top bit of the size.  Then insert the size
                    into this which will overwrite the flag's tag bit. *)
                 val makeLengthWord =
                     bitFieldInsert{ source=uSizeReg, destAsSource=shiftFReg, dest=lengthWord, length=polyWordOpSize,
                               immr=0w0 (*bit 0*), imms=if is32in64 then 0w23 else 0w55 (*width-1*) } ::
                     shiftConstant{direction=Arm64ICode.ShiftLeft, dest=shiftFReg, source=flagsReg,
                         shift=if is32in64 then 0w23 else 0w55, opSize=polyWordOpSize } :: allocateMem
                 val setLengthWordAndInit =
                     storeWithConstantOffset{ source=lengthWord, base=absAddr,
                         byteOffset= ~(Word.toInt wordSize), loadType=polyWordLoadSize } :: makeLengthWord
                 val finalCode =
                     if is32in64 then absoluteToObjectIndex{ source=absAddr, dest=target } :: setLengthWordAndInit
                     else setLengthWordAndInit
             in
                 (finalCode, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordComparison test, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 val (testCode1, testDest1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (testCode2, testDest2, _) = codeToICodeRev(arg2, context, false, AnyReg, testCode1)
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 val comparison =
                     addSubRegister{base=uReg1, shifted=uReg2, dest=ZeroReg, length=OpSize64,
                                    ccRef=SOME ccRef, isAdd=false, shift=ShiftNone} ::
                     unboxLarge{ source=testDest2, dest=uReg2 } ::
                     unboxLarge{ source=testDest1, dest=uReg1 } :: testCode2
                 open BuiltIns
                 val cond =
                     case test of
                         TestEqual => CondEqual
                     |   TestLess => CondCarryClear
                     |   TestLessEqual => CondUnsignedLowOrEq
                     |   TestGreater => CondUnsignedHigher
                     |   TestGreaterEqual => CondCarrySet
                     |   TestUnordered => raise InternalError "LargeWordComparison: TestUnordered"
             in
                 (makeBoolResultRev(cond, ccRef, target, comparison), target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordArith ArithAdd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val code =
                     boxLarge{ source=uReg3, dest=target, saveRegs=[] } ::
                     addSubRegister{base=uReg1, shifted=uReg2, dest=SomeReg uReg3, length=OpSize64,
                                    ccRef=NONE, isAdd=true, shift=ShiftNone} ::
                     unboxLarge{ source=aReg2, dest=uReg2 } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordArith ArithSub, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val code =
                     boxLarge{ source=uReg3, dest=target, saveRegs=[] } ::
                     addSubRegister{base=uReg1, shifted=uReg2, dest=SomeReg uReg3, length=OpSize64,
                                    ccRef=NONE, isAdd=false, shift=ShiftNone} ::
                     unboxLarge{ source=aReg2, dest=uReg2 } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordArith ArithMult, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val code =
                     boxLarge{ source=uReg3, dest=target, saveRegs=[] } ::
                     multiplication{kind=MultAdd64, sourceA=ZeroReg, sourceM=uReg1, sourceN=uReg2, dest=uReg3} ::
                     unboxLarge{ source=aReg2, dest=uReg2 } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordArith ArithDiv, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val code =
                     boxLarge{ source=uReg3, dest=target, saveRegs=[] } ::
                     division{isSigned=false, opSize=OpSize64, dividend=uReg1, divisor=uReg2, dest=uReg3} ::
                     unboxLarge{ source=aReg2, dest=uReg2 } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordArith ArithMod, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg()
                 val code =
                     boxLarge{ source=uReg4, dest=target, saveRegs=[] } ::
                     multiplication{kind=MultSub64, dest=uReg4, sourceM=uReg3, sourceN=uReg2, sourceA=SomeReg uReg1} ::
                     division{isSigned=false, opSize=OpSize64, dividend=uReg1, divisor=uReg2, dest=uReg3} ::
                     unboxLarge{ source=aReg2, dest=uReg2 } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordArith _, ...}, _, _, _, _) =
                 raise InternalError "LargeWordArith - unimplemented instruction"
 
         |   codeToICodeBinaryRev({oper=LargeWordLogical logop, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val logicalOp = case logop of LogicalAnd => LogAnd | LogicalOr => LogOr | LogicalXor => LogXor
                 val code =
                     boxLarge{ source=uReg3, dest=target, saveRegs=[] } ::
                     logicalRegister{base=uReg1, shifted=uReg2, dest=SomeReg uReg3, length=OpSize64,
                                    ccRef=NONE, logOp=logicalOp, shift=ShiftNone} ::
                     unboxLarge{ source=aReg2, dest=uReg2 } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=LargeWordShift shiftKind, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val shiftType =
                     case shiftKind of
                         ShiftLeft => Arm64ICode.ShiftLeft
                     |   ShiftRightLogical => Arm64ICode.ShiftRightLogical
                     |   ShiftRightArithmetic => Arm64ICode.ShiftRightArithmetic
                 val code =
                     boxLarge{ source=uReg3, dest=target, saveRegs=[] } ::
                     shiftRegister{direction=shiftType, source=uReg1, shift=uReg2, dest=uReg3, opSize=OpSize64 } ::
                     (* The shift amount is a word, not a large word. *)
                     untagValue{ source=aReg2, dest=uReg2, opSize=OpSize32, isSigned=false } ::
                     unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=RealComparison(test, precision), arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val fpSize = precisionToFpSize precision
                 val uReg1 = newUReg() and uReg2 = newUReg()
                 (* Floating point comparisons.
                    The fcmp instruction differs from integer comparison.  If either
                    argument is a NaN the overflow bit is set and the other bits are
                    cleared.  That means that in order to get a true result only
                    if the values are not NaNs we have to test that at least one of
                    C, N, or Z are set.  We use unsigned tests for < and <=
                    and signed tests for > and >=. *)
                 val cond =
                     case test of
                         TestEqual => CondEqual
                     |   TestLess => CondCarryClear
                     |   TestLessEqual => CondUnsignedLowOrEq
                     |   TestGreater => CondSignedGreater
                     |   TestGreaterEqual => CondSignedGreaterEq
                     |   TestUnordered => CondOverflow
                 val code =
                     compareFloatingPoint{arg1=uReg1, arg2=uReg2, ccRef=ccRef, opSize=fpSize} ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg2, dest=uReg2 } ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } ::
                     arg2Code
             in
                 (makeBoolResultRev(cond, ccRef, target, code), target, false)
             end
 
         |   codeToICodeBinaryRev({oper=RealArith(oper, precision), arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val fpSize = precisionToFpSize precision
                 val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg()
                 val fpOp =
                     case oper of
                         ArithAdd => AddFP
                     |   ArithSub => SubtractFP
                     |   ArithMult => MultiplyFP
                     |   ArithDiv => DivideFP
                     |   _ => raise InternalError "RealArith - unimplemented instruction"
                 val code =
                     boxTagFloat{ floatSize=fpSize, source=uReg3, dest=target, saveRegs=[] } ::
                     binaryFloatingPoint{arg1=uReg1, arg2=uReg2, dest=uReg3, fpOp=fpOp, opSize=fpSize } ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg2, dest=uReg2 } ::
                     unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } ::
                     arg2Code
             in
                 (code, target, false)
             end
 
         |   codeToICodeBinaryRev({oper=PointerEq, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* Equality of general values which can include pointers. This can be treated exactly as a word equality.
                    It has to be analysed differently for indexed cases. *)
                 val ccRef = newCCRef()
                 val (testCode1, testDest1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (testCode2, testDest2, _) = codeToICodeRev(arg2, context, false, AnyReg, testCode1)
                 val comparison =
                     addSubRegister{base=testDest1, shifted=testDest2, dest=ZeroReg, length=polyWordOpSize,
                                    ccRef=SOME ccRef, isAdd=false, shift=ShiftNone} :: testCode2
                 val target = asTarget destination
             in
                 (makeBoolResultRev(CondEqual, ccRef, target, comparison), target, false)
             end
 
         |   codeToICodeBinaryRev({oper=FreeCStack, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* Free space on the C stack. This is a binary operation that takes the base address
                    and the size.  The base address isn't used in this version. *)
                 val (arg1Code, _, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode)
                 val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code)
                 val uReg = newUReg()
                 val code =
                     addSubXSP{ source=uReg, dest=ZeroReg, isAdd=true  } ::
                     untagValue{ source=aReg2, dest=uReg, isSigned=false, opSize=polyWordOpSize } ::
                     arg2Code
             in
                 returnUnit(destination, code, false)
             end
 
         (* Code-generate an address into one or two Pregs. At this point they are in a state where
            we can code-generate arbitrary code before the address is used *)
         and addressToPregAddress({base, index, offset}, context, code) =
         let
             val (bCode, bReg, _) = codeToICodeRev(base, context, false, AnyReg, code)
         in
             case index of
                 NONE => ({base=bReg, index=NONE, offset=offset}, bCode)
             |   SOME index =>
                 let
                     val (iCode, iReg, _) = codeToICodeRev(index, context, false, AnyReg, bCode)
                 in
                     ({base=bReg, index=SOME iReg, offset=offset}, iCode)
                 end
         end
 
         (* Store the code address and the closure items into a previously allocated closure on the
            heap.  This is used both in the simple case and also with mutually recursive declarations. *)
         and storeIntoClosure(lambda as { closure, ...}, absClosureAddr, context, tailCode) =
         let
             val closureRef = makeConstantClosure()
             val () = codeFunctionToArm64(lambda, debugSwitches, closureRef)
             val codeAddrWords = if is32in64 then 2 else 1
 
             fun storeAValue(f, (n, tlCode)) =
             let
                 val (code, source, _) = codeToICodeRev(BICExtract f, context, false, AnyReg, tlCode)
             in
                 (n+1, storeAtWordOffset(source, n, absClosureAddr, polyWordLoadSize, code))
             end
             (* Store the code address in the first 64-bits. *)
             val storeCodeAddress =
                 if is32in64
                 then
                 let
                     (* We can't use codeAddressFromClosure on 32-in-64 because it always returns
                        a 64-bit value.  Instead we have to get the code address at run-time. *)
                     val clReg = newPReg() and absClReg = newUReg() and absCodeReg = newUReg()
                 in
                     storeAtWordOffset(absCodeReg, 0, absClosureAddr, Load64,
                         loadWithConstantOffset{base=absClReg, dest=absCodeReg, byteOffset=0, loadType=Load64} ::
                         objectIndexAddressToAbsolute{ source=clReg, dest=absClReg } ::
                         loadAddressConstant{source=closureAsAddress closureRef, dest=clReg} :: tailCode)
                 end
                 else
                 let
                     val cReg = newPReg()
                 in
                     storeAtWordOffset(cReg, 0, absClosureAddr, Load64,
                         loadAddressConstant{source=codeAddressFromClosure closureRef, dest=cReg} :: tailCode)
                 end
             val (_, storeCode) = List.foldl storeAValue (codeAddrWords, storeCodeAddress) closure
         in
             storeCode
         end
 
         (* Load operations. *)
         and codeLoadOperation(kind, address, context, target, tailCode) =
         let
             val (regAddr, codeAddr) = addressToPregAddress(address, context, tailCode)
 
             val code =
             case kind of
                 LoadStoreMLWord {isImmutable=false} =>
                 let
                     fun loadOp(addrReg, code) =
                         loadAcquire{base=addrReg, dest=target, loadType=polyWordLoadSize} :: code
                 in
                     loadAndStoreWithAbsolute (regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, loadOp, codeAddr)
                 end
 
             |   LoadStoreMLWord {isImmutable=true} =>
                 let
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=target, byteOffset=offset, loadType=polyWordLoadSize} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=target, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: code
                 in
                     loadAndStoreWithAddress (regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreMLByte {isImmutable=false} =>
                 let
                     (* Have to load into a ureg and then tag it. *)
                     val destReg = newUReg()
                     fun loadOp(addrReg, code) =
                         loadAcquire{base=addrReg, dest=destReg, loadType=Load8} :: code
                 in
                     tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} ::
                         loadAndStoreWithAbsolute (regAddr, opWordSize Load8, loadShift Load8, loadOp, codeAddr)
                 end
 
             |   LoadStoreMLByte {isImmutable=true} =>
                 let
                     (* Have to load into a ureg and then tag it. *)
                     val destReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load8} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load8, signExtendIndex=false} :: code
                 in
                     tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} ::
                         loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, false, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreC8 =>
                 let
                     (* Have to load into a ureg and then tag it. *)
                     val destReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load8} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load8, signExtendIndex=true} :: code
                 in
                     tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} ::
                         loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, true, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreC16 =>
                 let
                     (* Have to load into a ureg and then tag it. *)
                     val destReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load16} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load16, signExtendIndex=true} :: code
                 in
                     tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} ::
                         loadAndStoreWithAddress(regAddr, opWordSize Load16, loadShift Load16, true, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreC32 =>
                 let
                     (* This is tagged in native 64-bits and boxed in 32-in-64. *)
                     val destReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load32} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load32, signExtendIndex=true} :: code
                 in
                     (if is32in64
                      then boxLarge{ source=destReg, dest=target, saveRegs=[] }
                      else tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize64 (* It becomes 33 bits *)}) ::
                         loadAndStoreWithAddress(regAddr, opWordSize Load32, loadShift Load32, true, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreC64 =>
                 let
                     (* This is always boxed. *)
                     val destReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load64} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load64, signExtendIndex=true} :: code
                 in
                     boxLarge{ source=destReg, dest=target, saveRegs=[]} ::
                         loadAndStoreWithAddress(regAddr, opWordSize Load64, loadShift Load64, true, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreCFloat =>
                 let
                     (* This always returns a double, not a 32-bit float. *)
                     val destReg = newUReg() and convertReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadFPWithConstantOffset{base=base, dest=destReg, byteOffset=offset, floatSize=Float32} :: code
                     fun loadIndexed(base, index, code) =
                         loadFPWithIndexedOffset{base=base, dest=destReg, index=index, floatSize=Float32, signExtendIndex=true} :: code
                 in
                     boxTagFloat{floatSize=Double64, source=convertReg, dest=target, saveRegs=[]} ::
                         unaryFloatingPt{source=destReg, dest=convertReg, fpOp=ConvFloatToDble} ::
                         loadAndStoreWithAddress(regAddr, 4, 0w2, true, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreCDouble =>
                 let
                     (* This is always boxed. *)
                     val destReg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadFPWithConstantOffset{base=base, dest=destReg, byteOffset=offset, floatSize=Double64} :: code
                     fun loadIndexed(base, index, code) =
                         loadFPWithIndexedOffset{base=base, dest=destReg, index=index, floatSize=Double64, signExtendIndex=true} :: code
                 in
                     boxTagFloat{floatSize=Double64, source=destReg, dest=target, saveRegs=[]} ::
                         loadAndStoreWithAddress(regAddr, 8, 0w3, true, loadConstOffset, loadIndexed, codeAddr)
                 end
 
             |   LoadStoreUntaggedUnsigned =>
                 let
                     (* LoadStoreMLWord {isImmutable=true} except it has to be tagged. *)
                     val ureg = newUReg()
                     fun loadConstOffset(base, offset, code) =
                         loadWithConstantOffset{base=base, dest=ureg, byteOffset=offset, loadType=polyWordLoadSize} :: code
                     fun loadIndexed(base, index, code) =
                         loadWithIndexedOffset{base=base, dest=ureg, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: code
                 in
                     tagValue{source=ureg, dest=target, isSigned=false, opSize=polyWordOpSize} ::
                         loadAndStoreWithAddress(regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, codeAddr)
                 end
 
         in
             (code, target, false)
         end
 
         (* Store operations. *)
         and codeStoreOperation(kind, address, value, context, destination, tailCode1) =
         let
             val (regAddr, codeAddr) = addressToPregAddress(address, context, tailCode1)
             val (sourceCode, sourceReg, _) = codeToICodeRev(value, context, false, AnyReg, codeAddr)
             
             val storeCode =
             case kind of
                 LoadStoreMLWord {isImmutable=false} =>
                 let
                     fun storeOp(addrReg, code) =
                         storeRelease{base=addrReg, source=sourceReg, loadType=polyWordLoadSize} :: code
                 in
                     loadAndStoreWithAbsolute(regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, storeOp, sourceCode)
                 end
 
             |   LoadStoreMLWord {isImmutable=true} =>
                 let
                     (* Used when initialising immutables that do not require store-release. *)
                     fun loadConstOffset(base, offset, code) =
                         storeWithConstantOffset{base=base, source=sourceReg, byteOffset=offset, loadType=polyWordLoadSize} :: code
                     fun loadIndexed(base, index, code) =
                         storeWithIndexedOffset{base=base, source=sourceReg, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: code
                 in
                     loadAndStoreWithAddress (regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreMLByte {isImmutable=false} =>
                 let
                     fun storeOp(addrReg, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeRelease{base=addrReg, source=tReg, loadType=Load8} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
                 in
                     loadAndStoreWithAbsolute(regAddr, opWordSize Load8, loadShift Load8, storeOp, sourceCode)
                 end
 
             |   LoadStoreMLByte {isImmutable=true} =>
                 let
                     (* Used when initialising immutables that do not require store-release. *)
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load8} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load8, signExtendIndex=false} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, false, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreC8 =>
                 let
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load8} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load8, signExtendIndex=true} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, true, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreC16 =>
                 let
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load16} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load16, signExtendIndex=true} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, opWordSize Load16, loadShift Load16, true, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreC32 =>
                 let
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load32} ::
                         (if is32in64 then unboxLarge{source=sourceReg, dest=tReg}
                          else untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize64}) :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load32, signExtendIndex=true} ::
                         (if is32in64 then unboxLarge{source=sourceReg, dest=tReg}
                          else untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize64}) :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, opWordSize Load32, loadShift Load32, true, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreC64 =>
                 let
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load64} ::
                         unboxLarge{source=sourceReg, dest=tReg} :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load64, signExtendIndex=true} ::
                         unboxLarge{source=sourceReg, dest=tReg} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, opWordSize Load64, loadShift Load64, true, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreCFloat =>
                 let
                     (* The "real" value is a double, not a 32-bit float *)
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg() and cReg = newUReg()
                     in
                         storeFPWithConstantOffset{base=base, source=tReg, byteOffset=offset, floatSize=Float32} ::
                         unaryFloatingPt{source=cReg, dest=tReg, fpOp=ConvDbleToFloat} ::
                         unboxTagFloat{floatSize=Double64, source=sourceReg, dest=cReg} :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg() and cReg = newUReg()
                     in
                         storeFPWithIndexedOffset{base=base, source=tReg, index=index, floatSize=Float32, signExtendIndex=true} ::
                         unaryFloatingPt{source=cReg, dest=tReg, fpOp=ConvDbleToFloat} ::
                         unboxTagFloat{floatSize=Double64, source=sourceReg, dest=cReg} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, 4, 0w2, true, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreCDouble =>
                 let
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeFPWithConstantOffset{base=base, source=tReg, byteOffset=offset, floatSize=Double64} ::
                         unboxTagFloat{floatSize=Double64, source=sourceReg, dest=tReg} :: code
                     end
 
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeFPWithIndexedOffset{base=base, source=tReg, index=index, floatSize=Double64, signExtendIndex=true} ::
                         unboxTagFloat{floatSize=Double64, source=sourceReg, dest=tReg} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, 8, 0w3, true, loadConstOffset, loadIndexed, sourceCode)
                 end
 
             |   LoadStoreUntaggedUnsigned =>
                 let
                     (* Only used when initialising strings so this does not require store-release. *)
                     fun loadConstOffset(base, offset, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=polyWordLoadSize} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=polyWordOpSize} :: code
                     end
                     fun loadIndexed(base, index, code) =
                     let
                         val tReg = newUReg()
                     in
                         storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=polyWordLoadSize, signExtendIndex=false} ::
                         untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=polyWordOpSize} :: code
                     end
                 in
                     loadAndStoreWithAddress(regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, sourceCode)
                 end
         in
             returnUnit(destination, storeCode, false)
         end
 
 
         (*Turn the codetree structure into icode. *)
         val bodyContext = {loopArgs=NONE, stackPtr=0, currHandler=NONE, overflowBlock=ref NONE}
         val (bodyCode, _, bodyExited) =
             codeToICodeRev(body, bodyContext, true, SpecificPReg resultTarget, beginInstructions)
         val icode = if bodyExited then bodyCode else returnInstruction(bodyContext, resultTarget, bodyCode)
         
         (* Turn the icode list into basic blocks.  The input list is in reverse so as part of
            this we reverse the list. *)
         local
             val resArray = Array.array(!labelCounter, BasicBlock{ block=[], flow=ExitCode })
             
             fun createEntry (blockNo, block, flow) =
                 Array.update(resArray, blockNo, BasicBlock{ block=block, flow=flow})
             
             fun splitCode([], _, _) = 
                 (* End of code.  We should have had a BeginFunction. *)
                 raise InternalError "splitCode - no begin"
             
             |   splitCode(BlockBegin args :: _, sinceLabel, flow) =
                     (* Final instruction.  Create the initial block and exit. *)
                     createEntry(0, BeginFunction args ::sinceLabel, flow)
             
             |   splitCode(BlockSimple instr :: rest, sinceLabel, flow) =
                     splitCode(rest, instr :: sinceLabel, flow)
 
             |   splitCode(BlockLabel label :: rest, sinceLabel, flow) =
                     (* Label - finish this block and start another. *)
                 (
                     createEntry(label, sinceLabel, flow);
                     (* Default to a jump to this label.  That is used if we have
                        assumed a drop-through. *)
                     splitCode(rest, [], Unconditional label)
                 )
             
             |   splitCode(BlockExit instr :: rest, _, _) =
                     splitCode(rest, [instr], ExitCode)
 
             |   splitCode(BlockFlow flow :: rest, _, _) =
                     splitCode(rest, [], flow)
             
             |   splitCode(BlockRaiseAndHandle(instr, handler) :: rest, _, _) =
                     splitCode(rest, [instr], UnconditionalHandle handler)
 
             |   splitCode(BlockOptionalHandle{call, handler, label} :: rest, sinceLabel, flow) =
                 let
                     (* A function call within a handler.  This could go to the handler but
                        if there is no exception will go to the next instruction.
                        Also includes JumpLoop since the stack check could result in an
                        Interrupt exception. *)
                 in
                     createEntry(label, sinceLabel, flow);
                     splitCode(rest, [call], ConditionalHandle{handler=handler, continue=label})
                 end
 
         in
             val () = splitCode(icode, [], ExitCode)
             val resultVector = Array.vector resArray
         end
       
         open ICodeTransform
         
         val pregProperties = Vector.fromList(List.rev(! pregPropList))
     in
         codeICodeFunctionToArm64{blocks = resultVector, functionName = name, pregProps = pregProperties,
             ccCount= ! ccRefCounter, debugSwitches = debugSwitches, resultClosure = resultClosure,
             profileObject = profileObject}
     end
 
     val gencodeLambda = codeFunctionToArm64
 
     structure Foreign = Arm64Foreign
     
     structure Sharing =
     struct
         type backendIC = backendIC
         and  bicLoadForm = bicLoadForm
         and argumentType = argumentType
         and closureRef = closureRef
     end
     
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML
index a177429c..d16c624b 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML
@@ -1,983 +1,988 @@
 (*
     Copyright David C. J. Matthews 2021-2
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64ICode(
 
     structure Arm64Code: ARM64PREASSEMBLY
 
 ): ARM64ICODE =
 
 struct
     open Arm64Code
 
     open Address
     datatype preg = PReg of int (* A pseudo-register - an abstract register. *)
 
     (* If the value is zero we can use X0/W0. *)
     datatype pregOrZero = SomeReg of preg | ZeroReg
     
     (* A location on the stack.  May be more than word if this is a container or a handler entry. *)
     datatype stackLocn = StackLoc of {size: int, rno: int }
     
     (* This combines pregKind and stackLocn.  *)
     datatype regProperty =
         RegPropGeneral      (* A general register. *)
     |   RegPropUntagged     (* An untagged general register. *)
     |   RegPropStack of int (* A stack location or container. *)
     |   RegPropCacheTagged
     |   RegPropCacheUntagged
     |   RegPropMultiple     (* The result of a conditional or case. May be defined at multiple points. *)
 
     (* The reference to a condition code. *)
     datatype ccRef = CcRef of int
     datatype reg = GenReg of xReg | FPReg of vReg
 
     datatype callKind = Recursive | ConstantCode of machineWord | FullCall
 
     (* Function calls can have an unlimited number of arguments so it isn't always
        going to be possible to load them into registers. *)
     datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int }
 
     datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode =
         (* Move the contents of one preg to another.  These are always 64-bits. *)
         MoveRegister of { source: 'genReg, dest: 'genReg }
 
         (* Numerical constant. *)
     |   LoadNonAddressConstant of { source: Word64.word, dest: 'genReg }
 
         (* Address constant. *)
     |   LoadAddressConstant of { source: machineWord, dest: 'genReg }
 
         (* Load a value into a register using a constant, signed, byte offset.  The offset
            is in the range of -256 to (+4095*unit size). *)
     |   LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType }
 
         (* Similarly for FP registers. *)
     |   LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize }
 
         (* Load a value into a register using an index register. *)
     |   LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool }
 
         (* Ditto for FP. *)
     |   LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool }
 
         (* Returns the current thread ID.  Always a 64-bit value.. *)
     |   GetThreadId of { dest: 'genReg }
 
         (* Convert a 32-in-64 object index into an absolute address. *)
     |   ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg }
 
         (* Convert an absolute address into an object index. *)
     |   AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg }
 
         (* Allocate a fixed sized piece of memory and puts the absolute address into dest.
            bytesRequired is the total number of bytes including the length word and any alignment
            necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we
            need to do a garbage collection. *)
     |   AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list }
 
         (* Allocate a piece of memory.  The size argument is an untagged value containing
            the number of words i.e. the same value used for InitialiseMemory and to store
            in the length word. *)
     |   AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list }
 
         (* Initialise a piece of memory by writing "size" copies of the value
            in "init".  N.B. The size is an untagged value containing the
            number of words. *)
     |   InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg }
 
         (* Mark the beginning of a loop.  This is really only to prevent the initialisation code being
            duplicated in ICodeOptimise. *)
     |   BeginLoop
 
         (* Set up the registers for a jump back to the start of a loop. *)
     |   JumpLoop of
             { regArgs: {src: 'genReg fnarg, dst: 'genReg} list,
               stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list,
               checkInterrupt: 'genReg list option }
 
         (* Store a register using a constant, signed, byte offset.  The offset
            is in the range of -256 to (+4095*unit size). *)
     |   StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType }
 
         (* Ditto for FP regs. *)
     |   StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize }
 
         (* Store a register using an index register. *)
     |   StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool }
 
         (* and for FP regs. *)
     |   StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool }
 
         (* Add/Subtract immediate.  The destination is optional in which case XZero is used.
            ccRef is optional.  If it is NONE the version of the instruction that does not generate
            a condition code is used. immed must be < 0wx1000. *)
     |   AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word,
                              isAdd: bool, length: opSize }
 
         (* Add/Subtract register.  As with AddSubImmediate, both the destination and cc are optional. *)
     |   AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option,
                             isAdd: bool, length: opSize, shift: shiftType }
 
         (* Bitwise logical operations.  The immediate value must be a valid bit pattern.  ccRef can
            only be SOME if logOp is LogAnd. *)
     |   LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word,
                               logOp: logicalOp, length: opSize }
 
         (* Register logical operations.  ccRef can only be SOME if logOp is LogAnd.*)
     |   LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option,
                              logOp: logicalOp, length: opSize, shift: shiftType }
 
         (* Shift a word by an amount specified in a register. *)
     |   ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize }
 
         (* The various forms of multiply all take three arguments and the general form is
            dest = M * N +/- A..   *)
     |   Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg }
 
         (* Signed or unsigned division.  Sets the result to zero if the divisor is zero. *)
     |   Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize }
 
         (* Start of function.  Set the register arguments.  stackArgs is the list of
            stack arguments.  If the function has a real closure regArgs includes the
            closure register (X8).  The register arguments include the return register
            (X30). *)
     |   BeginFunction of { regArgs: ('genReg * xReg) list, stackArgs: stackLocn list }
 
         (* Call a function.  If the code address is a constant it is passed here.
            Otherwise the address is obtained by indirecting through X8 which has been loaded
            as one of the argument registers.  The results are stored in the result registers,
            usually just X0.
            The "containers" argument is used to ensure that any container whose address is passed
            as one of the other arguments continues to be referenced until the function is called
            since there's a possibility that it isn't actually used after the function. *)
     |   FunctionCall of
             { callKind: callKind, regArgs: ('genReg fnarg * xReg) list,
               stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list,
               saveRegs: 'genReg list, containers: stackLocn list}
 
         (* Jump to a tail-recursive function.  This is similar to FunctionCall
            but complicated for stack arguments because the stack and the return
            address need to be overwritten.
            stackAdjust is the number of words to remove (positive) or add
            (negative) to the stack before the call.
            currStackSize contains the number of items currently on the stack. *)
     |   TailRecursiveCall of
             { callKind: callKind, regArgs: ('genReg fnarg * xReg) list,
               stackArgs: {src: 'genReg fnarg, stack: int} list,
               stackAdjust: int, currStackSize: int }
 
         (* Return from the function.  resultRegs are the registers containing
            the result,
            returnReg is the preg that contains the return address. *)
     |   ReturnResultFromFunction of { results: ('genReg * xReg) list, returnReg: 'genReg, numStackArgs: int }
 
         (* Raise an exception.  The packet is always loaded into X0. *)
     |   RaiseExceptionPacket of { packetReg: 'genReg }
 
         (* Push a register to the stack.  This is used both for a normal push, copies=1, and
            also to reserve a container. *)
     |   PushToStack of { source: 'genReg, copies: int, container: stackLocn }
 
         (* Load a register from the stack.  The container is the stack location identifier,
            the field is an offset in a container. *)
     |   LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int }
 
         (* Store a value into the stack. *)
     |   StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int }
 
         (* Set the register to the address of the container i.e. a specific offset on the stack. *)
     |   ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int }
 
         (* Remove items from the stack.  Used to remove containers or
            registers pushed to the stack.. *)
     |   ResetStackPtr of { numWords: int }
 
         (* Tag a value by shifting and setting the tag bit. *)
     |   TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize }
 
         (* Shift a value to remove the tag bit.  The cache is used if this is untagging a
            value that has previously been tagged. *)
     |   UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize }
 
         (* Box a largeword value.  Stores a value
            into a byte area.  This can be implemented using AllocateMemoryFixed
            but keeping it separate makes optimisation easier.
            The result is always an address and needs to be converted to an
            object index on 32-in-64. *)
     |   BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list }
 
         (* Load a value from a box.  This can be implemented using a load but
            is kept separate to simplify optimisation.  The source is always
            an absolute address. *)
     |   UnboxLarge of { source: 'genReg, dest: 'genReg }
 
         (* Convert a floating point value into a value suitable for storing
            in the heap.  This normally involves boxing except that 32-bit
            floats can be tagged in native 64-bits. *)
     |   BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list }
 
         (* The reverse of BoxTagFloat. *)
     |   UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg }
 
         (* Load a value with acquire semantics.  This means that any other
            load in this thread after this sees the value of the shared
            memory at this point and not earlier.  This is used for
            references and arrays to ensure that if another thread has
            built a data structure on the heap and then assigns the
            address to a shared ref this thread will see the updated heap
            and not any locally cached previous version. *)
     |   LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType }
 
         (* Store a value with release semantics.  This ensures that any
            other write completes before this operation and works with
            LoadAcquire. *)
     |   StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType }
 
         (* This is a generalised constant shift which includes selection of a
            range of bits. *)
     |   BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word }
 
         (*  Copy a range of bits and insert it into another register.  This is the
             only case where a register functions both as a source and a destination. *)
     |   BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg,
                             length: opSize, immr: word, imms: word }
 
         (* Indexed case. *)
     |   IndexedCaseOperation of { testReg: 'genReg }
 
         (* Exception handling.  - Set up an exception handler. *)
     |   PushExceptionHandler
 
         (* End of a handled section.  Restore the previous handler. *)
     |   PopExceptionHandler
 
         (* Marks the start of a handler.  This sets the stack pointer and
            restores the old handler.  Sets the exception packet register. *) 
     |   BeginHandler of { packetReg: 'genReg }
 
         (* Compare two vectors of bytes and set the condition code on the result.
            The registers are modified by the instruction. *)
     |   CompareByteVectors of
             { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef }
 
         (* Move a block of bytes (isByteMove true) or words (isByteMove false).  The length is the
            number of items (bytes or words) to move. The registers are modified by
            the instruction. *)
     |   BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool }
 
         (* Add or subtract to the system stack pointer and optionally return the new value.
            This is used to allocate and deallocate C space. *)
     |   AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool  }
 
         (* Ensures the value will actually be referenced although it doesn't generate any code. *)
     |   TouchValue of { source: 'genReg }
 
         (* Load a value at the address and get exclusive access.  Always loads a
            64-bit value. *)
     |   LoadAcquireExclusive of { base: 'genReg, dest: 'genReg }
 
         (* Store a value into an address releasing the lock.  Sets the result to
            either 0 or 1 if it succeeds or fails. *)
     |   StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg }
 
         (* Insert a memory barrier. dmb ish. *)
     |   MemoryBarrier
 
         (* Convert an integer to a floating point value. *)
     |   ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize }
 
         (* Convert a floating point value to an integer using the specified rounding mode.
            We could get an overflow here but fortunately the ARM generates a value
            that will cause an overflow when we tag it, provided we tag it explicitly. *)
     |   ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode }
 
         (* Unary floating point.  This includes conversions between float and double. *)
     |   UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary }
 
         (* Binary floating point: addition, subtraction, multiplication and division. *)
     |   BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize }
 
         (* Floating point comparison. *)
     |   CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize }
 
+        (* Yield control during a spin-lock. *)
+    |   CPUYield
+
         (* Debugging - fault if values don't match. *)
     |   CacheCheck of { arg1: 'genReg, arg2: 'genReg }
 
         (* Destinations at the end of a basic block. *)
     and controlFlow =
         (* Unconditional branch to a label - should be a merge point. *)
         Unconditional of int
         (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *)
     |   Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int }
         (* Exit - the last instruction of the block is a return, raise or tailcall. *)
     |   ExitCode
         (* Indexed case - this branches to one of a number of labels *)
     |   IndexedBr of int list
         (* Set up a handler.  This doesn't cause an immediate branch but the state at the
            start of the handler is the state at this point. *)
     |   SetHandler of { handler: int, continue: int }
         (* Unconditional branch to a handler.  If an exception is raised explicitly
            within the scope of a handler. *)
     |   UnconditionalHandle of int
         (* Conditional branch to a handler.  Occurs if there is a call to a
            function within the scope of a handler.  It may jump to the handler. *)
     |   ConditionalHandle of { handler: int, continue: int }
 
     and ('genReg, 'optGenReg, 'fpReg) basicBlock =
             BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow }
   
     type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock
     and  iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock
 
     (* Return the list of blocks that are the immediate successor of this. *)
     fun successorBlocks(Unconditional l) = [l]
     |   successorBlocks(Conditional{trueJump, falseJump, ...}) = [trueJump, falseJump]
     |   successorBlocks ExitCode = []
     |   successorBlocks(IndexedBr cases) = cases
     |   successorBlocks(SetHandler{handler, continue, ...}) = [handler, continue]
         (* We only need "handler" in SetHandler because we may have a handler that is never actually jumped to. *)
     |   successorBlocks(UnconditionalHandle handler) = [handler]
     |   successorBlocks(ConditionalHandle{handler, continue, ...}) = [handler, continue]
 
     local
         fun printCC(CcRef ccRef, stream) = stream ("CC" ^ Int.toString ccRef)
 
         fun printStackLoc(StackLoc{size, rno}, stream) =
             (stream "S"; stream(Int.toString rno); stream "("; stream(Int.toString size); stream ")")
         
         fun regRepr(XReg w) = "X" ^ Int.toString(Word8.toInt w)
         |   regRepr XZero = "XZ"
         |   regRepr XSP = "SP"
 
         fun arithRepr OpSize64 = "64"
         |   arithRepr OpSize32 = "32"
 
         fun printLoadType(Load64, stream) = stream "64"
         |   printLoadType(Load32, stream) = stream "32"
         |   printLoadType(Load16, stream) = stream "16"
         |   printLoadType(Load8, stream) = stream "8"
 
         fun printSaves([], _, _) = ()
         |   printSaves([areg], _, printReg) = printReg areg
         |   printSaves(areg::more, stream, printReg) =
                 (printReg areg; stream ","; printSaves(more, stream, printReg))
 
         fun printArg(ArgInReg reg, _, printReg) = printReg reg
         |   printArg(ArgOnStack{wordOffset, container, field, ...}, stream, _) =
             (
                 printStackLoc(container, stream); stream " + ";
                 stream(Int.toString field);
                 stream " (";  stream(Int.toString wordOffset); stream ")"
             )
 
         fun printShift(ShiftLSL w, stream) = stream(" LSL " ^ Word8.toString w)
         |   printShift(ShiftLSR w, stream) = stream(" LSR " ^ Word8.toString w)
         |   printShift(ShiftASR w, stream) = stream(" ASR " ^ Word8.toString w)
         |   printShift(ShiftNone, _) = ()
 
         fun printFloatSize(Float32, stream) = stream "F"
         |   printFloatSize(Double64, stream) = stream "D"
 
         fun printICode (stream, printGenReg:'a -> unit, _: 'b->unit, _: 'c->unit) (MoveRegister{ source, dest }: ('a, 'b, 'c) arm64ICode) =
             (
                 stream "\tMove\t";
                 printGenReg source;
                 stream " => ";
                 printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (LoadNonAddressConstant{ source, dest }) =
             (
                 stream "\tLoadNonAddress\t"; stream(Word64.toString source);
                 stream " => ";
                 printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (LoadAddressConstant{ source, dest }) =
             (
                 stream "\tLoadAddress\t"; stream(Address.stringOfWord source);
                 stream " => ";
                 printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (LoadWithConstantOffset{ base, dest, byteOffset, loadType }) =
             (
                 stream "\tLoadConstOffset"; printLoadType(loadType, stream); stream "\t[";
                 printGenReg base; stream "]+";
                 stream(Int.toString byteOffset);
                 stream " => ";
                 printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (LoadFPWithConstantOffset{ base, dest, byteOffset, floatSize }) =
             (
                 stream "\tLoadConstOffset"; printFloatSize(floatSize, stream); stream "\t[";
                 printGenReg base; stream "]+";
                 stream(Int.toString byteOffset);
                 stream " => ";
                 printFPReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (LoadWithIndexedOffset{ base, dest, index, loadType, signExtendIndex }) =
             (
                 stream "\tLoadIndexed"; printLoadType(loadType, stream); stream "\t[";
                 printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else ();
                 stream "] => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (LoadFPWithIndexedOffset{ base, dest, index, floatSize, signExtendIndex }) =
             (
                 stream "\tLoadIndexed"; printFloatSize(floatSize, stream); stream "\t[";
                 printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else ();
                 stream "] => "; printFPReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (GetThreadId { dest}) =
                 ( stream "\tGetThreadId\t"; stream " => "; printGenReg dest )
 
         |   printICode (stream, printGenReg, _, _) (ObjectIndexAddressToAbsolute{ source, dest }) =
             (
                 stream "\tObjectAddrToAbs\t";
                 printGenReg source; stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (AbsoluteToObjectIndex{ source, dest }) =
             (
                 stream "\tAbsToObjectAddr\t";
                 printGenReg source; stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (AllocateMemoryFixed{bytesRequired, dest, saveRegs}) =
             (
                 stream "\tAllocateMemory\t";
                 stream(Word64.fmt StringCvt.DEC bytesRequired); stream " => ";
                 printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg)
             )
 
         |   printICode (stream, printGenReg, _, _) (AllocateMemoryVariable{size, dest, saveRegs}) =
             (
                 stream "\tAllocateMemory\t";
                 stream "s="; printGenReg(size);
                 stream " => "; printGenReg dest;
                 stream " save="; printSaves(saveRegs, stream, printGenReg)
             )
 
         |   printICode (stream, printGenReg, _, _) (InitialiseMem{size, addr, init}) =
             (
                 stream "\tInitialiseMem\t";
                 stream "s="; printGenReg(size);
                 stream ",i="; printGenReg(init);
                 stream ",a="; printGenReg(addr)
             )
 
         |   printICode (stream, _, _, _) BeginLoop = stream "\tBeginLoop"
 
         |   printICode (stream, printGenReg, _, _) (JumpLoop{regArgs, stackArgs, checkInterrupt, ... }) =
             (
                 stream "\tJumpLoop\t";
                 List.app(fn {src, dst} => (printGenReg(dst); stream "="; printArg(src, stream, printGenReg); stream " ")) regArgs;
                 List.app(
                     fn {src, wordOffset, stackloc} =>
                         (printStackLoc(stackloc, stream); stream("(sp" ^ Int.toString wordOffset); stream ")="; printArg(src, stream, printGenReg); stream " ")
                     ) stackArgs;
                 case checkInterrupt of
                     NONE => ()
                 |   SOME saveRegs => (stream " Check:save="; printSaves(saveRegs, stream, printGenReg))
             )
 
         |   printICode (stream, printGenReg, _, _) (StoreWithConstantOffset{ base, source, byteOffset, loadType }) =
             (
                 stream "\tStoreConstOffset"; printLoadType(loadType, stream); stream "\t";
                 printGenReg source; stream " => [";
                 printGenReg base; stream "+";
                 stream(Int.toString byteOffset); stream "]"
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (StoreFPWithConstantOffset{ base, source, byteOffset, floatSize }) =
             (
                 stream "\tStoreConstOffset"; printFloatSize(floatSize, stream); stream "\t";
                 printFPReg source; stream " => [";
                 printGenReg base; stream "+";
                 stream(Int.toString byteOffset); stream "]"
             )
 
         |   printICode (stream, printGenReg, _, _) (StoreWithIndexedOffset{ base, source, index, loadType, signExtendIndex }) =
             (
                 stream "\tStoreIndexed"; printLoadType(loadType, stream); stream "\t";
                 printGenReg source; stream " => [";
                 printGenReg base; stream "+"; printGenReg index;
                 if signExtendIndex then stream " SX" else (); stream "]"
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (StoreFPWithIndexedOffset{ base, source, index, floatSize, signExtendIndex }) =
             (
                 stream "\tStoreIndexed"; printFloatSize(floatSize, stream); stream "\t";
                 printFPReg source; stream " => [";
                 printGenReg base; stream "+"; printGenReg index;
                 if signExtendIndex then stream " SX" else (); stream "]"
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }) =
             (
                 stream (if isAdd then "\tAddImmediate" else "\tSubImmediate"); stream(arithRepr length);
                 stream "\t"; printGenReg source; stream ",0x"; stream(Word.toString immed);
                 stream " => "; printOptGenReg dest;
                 case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream))
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift }) =
             (
                 stream (if isAdd then "\tAddRegister" else "\tSubRegister"); stream(arithRepr length);
                 stream "\t"; printGenReg base; stream ", ";
                 printGenReg(shifted); printShift(shift, stream);
                 stream " => "; printOptGenReg dest;
                 case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream))
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (LogicalImmediate{ source, dest, ccRef, immed, logOp, length }) =
             (
                 stream (case logOp of LogAnd => "\tAndImmediate" | LogOr => "\tOrImmediate" | LogXor => "\tXorImmediate");
                 stream(arithRepr length);
                 stream "\t"; printGenReg source; stream ",0x"; stream(Word64.toString immed);
                 stream " => "; printOptGenReg dest;
                 case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream))
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift }) =
             (
                 stream (case logOp of LogAnd => "\tAndRegister"  | LogOr => "\tOrRegister" | LogXor => "\tXorRegister");
                 stream(arithRepr length);
                 stream "\t"; printGenReg base; stream ", ";
                 printGenReg(shifted); printShift(shift, stream);
                 stream " => "; printOptGenReg dest;
                 case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream))
             )
 
         |   printICode (stream, printGenReg, _, _) (ShiftRegister{ direction, dest, source, shift, opSize }) =
             (
                 stream (
                     case direction of
                         ShiftLeft => "\tShiftLeft"
                     |   ShiftRightLogical => "\tShiftRightLog"
                     |   ShiftRightArithmetic => "\tShiftRightArith");
                 stream(arithRepr opSize);
                 stream "\t"; printGenReg source; stream " by ";
                 printGenReg(shift);
                 stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (Multiplication{ kind, dest, sourceA, sourceM, sourceN }) =
             (
                 stream (
                     case kind of
                         MultAdd32 => "\tMultAdd32\t"
                     |   MultSub32 => "\tMultSub32\t"
                     |   MultAdd64 => "\tMultAdd64\t"
                     |   MultSub64 => "\tMultSub64\t"
                     |   SignedMultAddLong => "\tSignedMultAddLong\t"
                     |   SignedMultHigh => "\tSignedMultHigh\t");
                 printGenReg(sourceM); stream " * ";
                 printGenReg(sourceN);
                 stream " +/- "; printOptGenReg sourceA;
                 stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (Division{ isSigned, dest, dividend, divisor, opSize }) =
             (
                 stream (if isSigned then "\tSignedDivide" else "\tUnsignedDivide");
                 stream(arithRepr opSize);
                 stream "\t"; printGenReg(dividend); stream " by ";
                 printGenReg(divisor);
                 stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (BeginFunction{ regArgs, stackArgs }) =
             (
                 stream "\tBeginFunction\t";
                 List.app(fn (arg, r) => (stream(regRepr r); stream "="; printGenReg(arg); stream " ")) regArgs;
                 List.app(fn s => printStackLoc(s, stream)) stackArgs
             )
 
         |   printICode (stream, printGenReg, _, _) (FunctionCall{callKind, regArgs, stackArgs, dests, saveRegs, containers}) =
             (
                 stream "\tFunctionCall\t";
                 case callKind of
                     Recursive => stream "recursive "
                 |   ConstantCode m => (stream(stringOfWord m); stream " ")
                 |   FullCall => ();
                 stream "(";
                 List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs;
                 List.app(fn arg => (stream "p="; printArg(arg, stream, printGenReg); stream " ")) stackArgs;
                 stream ") ";
                 List.app(fn (pr, r) => (stream(regRepr r); stream "=>"; printGenReg pr; stream " ")) dests;
                 stream " save="; printSaves(saveRegs, stream, printGenReg);
                 if null containers then ()
                 else (stream " containers="; List.app (fn c => (printStackLoc(c, stream); stream " ")) containers)
             )
 
         |   printICode (stream, printGenReg, _, _) (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize, ...}) =
             (
                 stream "\tTailCall\t";
                 case callKind of
                     Recursive => stream "recursive "
                 |   ConstantCode m => (stream(stringOfWord m); stream " ")
                 |   FullCall => ();
                 List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs;
                 List.app(fn {src, stack} => (stream (Int.toString stack); stream "<="; printArg(src, stream, printGenReg); stream " ")) stackArgs;
                 stream "adjust="; stream(Int.toString stackAdjust);
                 stream " stackSize="; stream(Int.toString currStackSize)
             )
 
         |   printICode (stream, printGenReg, _, _) (ReturnResultFromFunction{ results, returnReg, numStackArgs }) =
             (
                 stream "\tReturnFromFunction\t"; printGenReg(returnReg); stream "with ";
                 List.app(fn (reg, r) => (stream(regRepr r); stream "=>"; printGenReg reg; stream " ")) results;
                 stream("," ^ Int.toString numStackArgs)
             )
 
         |   printICode (stream, printGenReg, _, _) (RaiseExceptionPacket{ packetReg }) =
             ( stream "\tRaiseException\t"; printGenReg(packetReg) )
 
         |   printICode (stream, printGenReg, _, _) (PushToStack{ source, copies, container }) =
             (
                 stream "\tPushToStack\t"; printGenReg source;
                 if copies > 1 then (stream " * "; stream(Int.toString copies)) else ();
                 stream " => "; printStackLoc(container, stream)
             )
 
         |   printICode (stream, printGenReg, _, _) (LoadStack{ dest, wordOffset, container, field }) =
             (
                 stream "\tLoadStack\t";
                 printStackLoc(container, stream); stream " + ";
                 stream(Int.toString field);
                 stream " (";  stream(Int.toString wordOffset); stream ")";
                 stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (StoreToStack{ source, container, field, stackOffset }) =
             (
                 stream "\tStoreToStack\t"; printGenReg source;
                 stream " => "; printStackLoc(container, stream); stream "+";
                 stream (Int.toString field); stream "(";
                 stream(Int.toString stackOffset); stream ")"
             )
 
         |   printICode (stream, printGenReg, _, _) (ContainerAddress{ dest, container, stackOffset }) =
             (
                 stream "\tContainerAddress\t";
                 stream "@"; printStackLoc(container, stream);
                 stream " (";  stream(Int.toString stackOffset); stream ") => ";
                 printGenReg dest
             )
 
         |   printICode (stream, _, _, _) (ResetStackPtr{ numWords }) =
             ( stream "\tResetStackPtr\t"; stream(Int.toString numWords) )
 
         |   printICode (stream, printGenReg, _, _) (TagValue{ source, dest, isSigned, opSize }) =
             (
                 stream "\tTag"; stream(if isSigned then "Signed" else "Unsigned");
                 stream(arithRepr opSize); stream "\t";
                 printGenReg source; stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (UntagValue{ source, dest, isSigned, opSize }) =
             (
                 stream "\tUntag"; stream(if isSigned then "Signed" else "Unsigned");
                 stream(arithRepr opSize); stream "\t";
                 printGenReg source; stream " => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (BoxLarge{source, dest, saveRegs}) =
             (
                 stream "\tBoxLarge\t";
                 printGenReg source;
                 stream " => ";
                 printGenReg dest;
                 stream " save="; printSaves(saveRegs, stream, printGenReg)
             )
 
         |   printICode (stream, printGenReg, _, _) (UnboxLarge{source, dest}) =
             (
                 stream "\tUnboxLarge\t";
                 printGenReg source;
                 stream " => ";
                 printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (BoxTagFloat{floatSize, source, dest, saveRegs}) =
             (
                 stream "\tBoxTagFloat"; printFloatSize(floatSize, stream); stream "\t";
                 printFPReg source;
                 stream " => ";
                 printGenReg dest;
                 stream " save="; printSaves(saveRegs, stream, printGenReg)
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (UnboxTagFloat{floatSize, source, dest}) =
             (
                 stream "\tUnboxTagFloat"; printFloatSize(floatSize, stream); stream "\t";
                 printGenReg source;
                 stream " => ";
                 printFPReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (LoadAcquire{ base, dest, loadType }) =
             (
                 stream "\tLoadAcquire"; printLoadType(loadType, stream); stream "\t[";
                 printGenReg base; stream "] => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (StoreRelease{ base, source, loadType }) =
             (
                 stream "\tStoreRelease"; printLoadType(loadType, stream); stream "\t";
                 printGenReg source; stream " => [";
                 printGenReg base; stream "]"
             )
 
         |   printICode (stream, printGenReg, _, _) (BitFieldShift{ source, dest, isSigned, length, immr, imms }) =
             (
                 stream "\tBitShift"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr length); stream "\t";
                 printGenReg source;
                 stream " => "; printGenReg dest;
                 stream " immr="; stream(Word.fmt StringCvt.DEC immr);
                 stream " imms="; stream(Word.fmt StringCvt.DEC imms)
             )
 
         |   printICode (stream, printGenReg, _, _) (BitFieldInsert{ source, dest, destAsSource, length, immr, imms }) =
             (
                 stream "\tBitInsert";  stream(arithRepr length); stream "\t";
                 printGenReg source; stream " with "; printGenReg destAsSource;
                 stream " => "; printGenReg dest;
                 stream " immr="; stream(Word.fmt StringCvt.DEC immr);
                 stream " imms="; stream(Word.fmt StringCvt.DEC imms)
             )
 
         |   printICode (stream, printGenReg, _, _) (IndexedCaseOperation{testReg}) =
                 ( stream "\tIndexedCase\t"; printGenReg testReg )
 
         |   printICode (stream, _, _, _) PushExceptionHandler = stream "\tPushExcHandler"
 
         |   printICode (stream, _, _, _) PopExceptionHandler = stream "\tPopExcHandler"
 
         |   printICode (stream, printGenReg, _, _) (BeginHandler{packetReg}) =
             (
                 stream "\tBeginHandler\t";
                 printGenReg packetReg
             )
 
         |   printICode (stream, printGenReg, _, _) (CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) =
             (
                 stream "\tCompareByteVectors\t";
                 printGenReg(vec1Addr); stream ",";
                 printGenReg(vec2Addr); stream ",";
                 printGenReg(length);
                 stream " => "; printCC(ccRef, stream)
             )
 
         |   printICode (stream, printGenReg, _, _) (BlockMove{srcAddr, destAddr, length, isByteMove}) =
             (
                 stream(if isByteMove then "\tBlockByteMove\t" else "\tBlockWordMove\t");
                 stream "src="; printGenReg(srcAddr);
                 stream ",dest="; printGenReg(destAddr);
                 stream ",len="; printGenReg(length)
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (AddSubXSP{ source, dest, isAdd }) =
             (
                 stream(if isAdd then "\tAdd\t" else "\tSubtract\t");
                 printGenReg source; stream " XSP => ";
                 printOptGenReg dest
             )
 
         |   printICode (stream, printGenReg, _, _) (TouchValue{ source }) =
             ( stream "\tTouchValue\t"; printGenReg source )
 
         |   printICode (stream, printGenReg, _, _) (LoadAcquireExclusive{ base, dest }) =
             (
                 stream "\tLoadExclusive\t["; printGenReg base;
                 stream "] => "; printGenReg dest
             )
 
         |   printICode (stream, printGenReg, printOptGenReg, _) (StoreReleaseExclusive{ base, source, result }) =
             (
                 stream "\tStoreExclusive\t";
                 printOptGenReg source; stream " => [";
                 printGenReg base; stream "] result => ";
                 printGenReg result
             )
 
         |   printICode (stream, _, _, _) MemoryBarrier = stream "\tMemoryBarrier"
 
         |   printICode (stream, printGenReg, _, printFPReg) (ConvertIntToFloat{ source, dest, srcSize, destSize}) =
             (
                 stream "\tConvert"; stream(arithRepr srcSize); stream "To";
                 printFloatSize(destSize, stream); stream "\t";
                 printGenReg source; stream " => "; printFPReg dest
             )
 
         |   printICode (stream, printGenReg, _, printFPReg) (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}) =
             let
                 open IEEEReal
             in
                 stream "\tConvert"; printFloatSize(srcSize, stream); stream "To";
                 stream(arithRepr destSize); stream "\t";
                 printFPReg source; stream " => "; printGenReg dest;
                 stream(
                     case rounding of
                         TO_NEAREST => " rounding"
                     |   TO_NEGINF => " rounding down"
                     |   TO_POSINF => " rounding up"
                     |   TO_ZERO => " truncating"
                 )
             end
 
         |   printICode (stream, _, _, printFPReg) (UnaryFloatingPt{ source, dest, fpOp}) =
             (
                 stream(
                     case fpOp of
                         NegFloat => "\tNegFloat\t"
                     |   NegDouble => "\tNegDouble\t"
                     |   AbsFloat => "\tAbsFloat\t"
                     |   AbsDouble => "\tAbsDouble\t"
                     |   ConvFloatToDble => "\tFloatToDble\t"
                     |   ConvDbleToFloat => "\t\t"
                 );
                 printFPReg source; stream " => "; printFPReg dest
             )
 
         |   printICode (stream, _, _, printFPReg) (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}) =
             (
                 stream(
                     case fpOp of
                         MultiplyFP => "\tMultiply"
                     |   DivideFP => "\tDivide"
                     |   AddFP => "\tAdd"
                     |   SubtractFP => "\tSubtract"
                 );
                 printFloatSize(opSize, stream); stream "\t";
                 printFPReg arg1; stream ", "; printFPReg arg2;
                 stream " => "; printFPReg dest
             )
 
         |   printICode (stream, _, _, printFPReg) (CompareFloatingPoint{ arg1, arg2, opSize, ccRef}) =
             (
                 stream "\tCompare"; printFloatSize(opSize, stream); stream "\t";
                 printFPReg arg1; stream ", "; printFPReg arg2;
                 stream ", "; printCC(ccRef, stream)
             )
 
+        |   printICode (stream, _, _, _) CPUYield = stream "\tCpuYield"
+
         |   printICode (stream, printGenReg, _, _) (CacheCheck{ arg1, arg2}) =
             ( stream "\tCacheCheck\t"; printGenReg arg1; stream ", "; printGenReg arg2 )
 
         and printCondition(cond,  stream)  = stream(condToString cond)
 
         (* Print a basic block. *)
         fun printBlock (stream, printGenReg, printOptGenReg, printFPReg) (blockNo, BasicBlock{block, flow, ...}) =
         (
             (* Put a label on all but the first. *)
             if blockNo <> 0 then stream("L" ^ Int.toString blockNo ^ ":") else ();
             List.app (fn icode => (printICode (stream, printGenReg, printOptGenReg, printFPReg) (icode); stream "\n")) block;
             case flow of
                 Unconditional l => stream("\tJump\tL" ^ Int.toString l ^ "\n")
             |   Conditional {condition, trueJump, falseJump, ccRef, ...} =>
                 (
                     stream "\tJump"; printCondition(condition, stream);
                     stream "\t"; printCC(ccRef, stream);
                     stream " L"; stream (Int.toString trueJump);
                     stream " else L"; stream (Int.toString falseJump); stream "\n"
                 )
             |   ExitCode => ()
             |   IndexedBr _ => ()
             |   SetHandler{handler, continue} =>
                     stream(concat["\tSetHandler\tH", Int.toString handler, "\n",
                                   "\tJump\tL", Int.toString continue, "\n"])
             |   UnconditionalHandle handler => stream("\tJump\tH" ^ Int.toString handler ^ "\n")
             |   ConditionalHandle{handler, continue} =>
                     stream(concat["\tJump\tL", Int.toString continue, " or H", Int.toString handler, "\n"])
         )
     in
         fun printPReg stream (PReg i) = stream("R" ^ Int.toString i)
         fun printOptPReg stream ZeroReg = stream "Zero"
         |   printOptPReg stream (SomeReg reg) = printPReg stream reg
         
         fun printXReg stream (XReg w) = stream("X" ^ Int.toString(Word8.toInt w))
         |   printXReg stream XZero = stream "XZ"
         |   printXReg stream XSP = stream "XSP"
 
         fun printVReg stream (VReg w) = stream("V" ^ Int.toString(Word8.toInt w))
 
         fun printICodeAbstract(blockVec, stream) =
             Vector.appi(printBlock(stream, printPReg stream, printOptPReg stream, printPReg stream)) blockVec
         
         and printICodeConcrete(blockVec, stream) =
             Vector.appi(printBlock(stream, printXReg stream, printXReg stream, printVReg stream)) blockVec
     end
 
     (* Only certain bit patterns are allowed in a logical immediate instruction
        but the encoding is complex so it's easiest to inherit the test from
        the assembler layer. *)
     local
         fun optow OpSize32 = WordSize32 | optow OpSize64 = WordSize64
     in
         fun isEncodableBitPattern(v, w) = Arm64Code.isEncodableBitPattern(v, optow w)
     end
 
     (* This generates a  BitField instruction with the appropriate values for immr and imms. *)
     fun shiftConstant{ direction, dest, source, shift, opSize } =
     let
         val (isSigned, immr, imms) =
             case (direction, opSize) of
                 (ShiftLeft, OpSize64) => (false, Word.~ shift mod 0w64, 0w64-0w1-shift)
             |   (ShiftLeft, OpSize32) => (false, Word.~ shift mod 0w32, 0w32-0w1-shift)
             |   (ShiftRightLogical, OpSize64) => (false, shift, 0wx3f)
             |   (ShiftRightLogical, OpSize32) => (false, shift, 0wx1f)
             |   (ShiftRightArithmetic, OpSize64) => (true, shift, 0wx3f)
             |   (ShiftRightArithmetic, OpSize32) => (true, shift, 0wx1f)
     in
         BitFieldShift{ source=source, dest=dest, isSigned=isSigned, length=opSize, immr=immr, imms=imms }
     end
 
     structure Sharing =
     struct
         type xReg           = xReg
         and  vReg           = vReg
         and  reg            = reg
         and  condition      = condition
         and  shiftType      = shiftType
         and  ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode
         and  preg           = preg
         and  pregOrZero     = pregOrZero
         and  controlFlow    = controlFlow
         and  ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock
         and  stackLocn      = stackLocn
         and  regProperty    = regProperty
         and  ccRef          = ccRef
         and  'genReg fnarg  = 'genReg fnarg
         and  closureRef     = closureRef
         and  loadType       = loadType
         and  opSize         = opSize
         and  logicalOp      = logicalOp
         and  callKind       = callKind
         and  floatSize      = floatSize
         and  shiftDirection = shiftDirection
         and  multKind       = multKind
         and  fpUnary        = fpUnary
         and  fpBinary       = fpBinary
    end
 end;
 
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML
index 99da294c..bb86fe02 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML
@@ -1,1219 +1,1221 @@
 (*
     Copyright David C. J. Matthews 2021-2
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64ICodeToArm64Code(
     structure Arm64PreAssembly: ARM64PREASSEMBLY
     structure Debug: DEBUG
     structure Arm64ICode: ARM64ICODE
     structure Identify: ARM64IDENTIFYREFERENCES
     structure IntSet: INTSET
     structure Pretty: PRETTY
 
     structure Strongly:
         sig
             val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list
         end
     
     sharing Arm64PreAssembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet
 ): ARM64ICODEGENERATE =
 struct
     open Identify
     open Arm64ICode
     open Arm64PreAssembly
     open Address
     exception InternalError = Misc.InternalError
 
     (* Reversed cons and append to make the code easier to read. *)
     infix 5 <::> <@>
     fun tl <::> hd = hd :: tl
     (*and snd <@> fst = fst @ snd*)
 
     (* These aren't currently used for anything. *)
     val workReg1 = X16 and workReg2 = X17
 
     fun icodeToArm64Code {blocks: basicBlockConcrete vector, functionName, stackRequired, debugSwitches, resultClosure, profileObject, ...} =
     let
         val numBlocks = Vector.length blocks
 
         (* Load from and store to stack. *)
         fun loadFromStack(destReg, wordOffset, code) =
             if wordOffset >= 4096
             then (LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) ::
                     [LoadNonAddr(destReg, Word64.fromInt wordOffset)] @ code
             else (LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code
 
         and storeToStack(sourceReg, wordOffset, workReg, code) =
             if wordOffset >= 4096
             then (StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) ::
                     [LoadNonAddr(workReg, Word64.fromInt wordOffset)] @ code
             else (StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code
 
         datatype srcAndDest = IsInReg of xReg | IsOnStack of int
 
         local
             (* The registers are numbered from 0.  Choose values that don't conflict with
                the stack addresses. *)
             fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31
             type node = {src: srcAndDest, dst: srcAndDest }
             
             fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r
             |   nodeAddress({dst=IsOnStack a, ...}) = a
             
             fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset]
             |   arcs{src=IsInReg r, ...} = [regNo r]
         in
             val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs }
         end
         
         (* This is a general function for moving values into registers or to the stack
            where it is possible that the source values might also be in use as destinations.
            The stack is used for destinations only for tail recursive calls. *)
         fun moveMultipleValues(moves, code) =
         let
             fun moveValues ([], code) = code (* We're done. *)
 
             |   moveValues (arguments, code) =
                 let
                     (* stronglyConnectedComponents does two things.  It detects loops where
                        it's not possible to move items without breaking the loop but more
                        importantly it orders the dependencies so that if there are no loops we
                        can load the source and store it in the destination knowing that
                        we won't overwrite anything we might later need. *)
                     
                     val ordered = stronglyConnected arguments
                     
                     fun loadIntoReg(IsInReg sReg, dReg, code) =
                             if sReg = dReg then code else (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code
                     |   loadIntoReg(IsOnStack wordOffset, dReg, code) =
                             loadFromStack(dReg, wordOffset, code)
 
                     fun moveEachValue ([], code) = code
 
                     |   moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) =
                             moveEachValue(rest, loadIntoReg(src, dReg, code))
 
                     |   moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) =
                             (* Storing into the stack. *)
                             moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code))
 
                     |   moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) =
                             (* Copy a stack location - needs a load and store unless the address is the same. *)
                             if dstOffset = srcOffset
                             then moveEachValue(rest, code)
                             else moveEachValue(rest,
                                     storeToStack(workReg2, dstOffset, workReg1,
                                         loadFromStack(workReg2, srcOffset, code)))
 
                     |   moveEachValue((cycle as first :: _ :: _) :: rest, code) =
                         (* We have a cycle. *)
                         let
                             (* We need to exchange some of the arguments.  Doing an exchange here will
                                set the destination with the correct source.  However we have to process
                                every subsequent entry with the swapped registers.  That may well mean that
                                one of those entries becomes trivial.
                                We also need to rerun stronglyConnectedComponents on at least the rest of
                                this cycle.  It's easiest to flatten the rest and do everything. *)
                             (* Exchange the source and destination.  We don't have an exchange instruction
                                and there's a further complication.  We could be copying between stack locations
                                and their offsets could be > 4096.  Since we've only got two work registers
                                we need to use the hardware stack as an extra location. Stack-stack
                                exchange is very rare so the extra overhead to handle the general case
                                is worth it. *)
                             local
                                 fun storeToDest(sReg, IsInReg dReg, _, code) =
                                         (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code
                                 |   storeToDest(sReg, IsOnStack wordOffset, work, code) =
                                         storeToStack(sReg, wordOffset, work, code)
                             in
                                 fun exchange(IsInReg arg1Reg, arg2, code) =
                                     (MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) ::
                                     storeToDest(arg1Reg, arg2, workReg1,
                                         loadIntoReg(arg2, workReg2, code))
 
                                 |   exchange(arg1, IsInReg arg2Reg, code) =
                                     (MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) ::
                                     storeToDest(arg2Reg, arg1, workReg1,
                                         loadIntoReg(arg1, workReg2, code))
 
                                 |   exchange(arg1, arg2, code) =
                                     (* The hardware stack must be 16-byte aligned. *)
                                     storeToDest(workReg2, arg2, workReg1,
                                         (LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) ::
                                         storeToDest(workReg2, arg1, workReg1,
                                             loadIntoReg(arg2, workReg2,
                                                 (StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) ::
                                                     loadIntoReg(arg1, workReg2, code))))
                             end
                             
                             (* Try to find either a register-register move or a register-stack move.
                                If not use the first.  If there's a stack-register move there will
                                also be a register-stack so we don't need to look for both. *)
                                
                             val {dst=selectDst, src=selectSrc} = first
 
                             (* This includes this entry but after the swap we'll eliminate it. *)
                             val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest)
                             val destAsSource = selectDst
 
                             fun match(s1: srcAndDest, s2) = s1 = s2
                             
                             fun swapSources{src, dst} =
                                 if match(src, selectSrc) then {src=destAsSource, dst=dst}
                                 else if match(src, destAsSource) then {src=selectSrc, dst=dst}
                                 else {src=src, dst=dst}
 
                             val exchangeCode = exchange(selectDst, selectSrc, code)                                    
                         in
                             moveValues(List.map swapSources flattened, exchangeCode)
                         end
 
                     |   moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *)
                             raise InternalError "moveEachValue - empty set"
                 in
                     moveEachValue(ordered, code)
                 end
         in
             moveValues(moves, code)
         end
 
         (* Where we have multiple specific registers as either source or
            destination there is the potential that a destination register
            if currently in use as a source. *) 
         fun moveMultipleRegisters(regPairList, code) =
         let
             val regPairsAsDests =
                 List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList
         in
             moveMultipleValues(regPairsAsDests, code)
         end
 
         fun moveIfNecessary({src, dst}, code) =
             if src = dst then code
             else MoveXRegToXReg{sReg=src, dReg=dst} :: code
 
         (* Add a constant word to the source register and put the result in the
            destination.  regW is used as a work register if necessary.  This is used
            both for addition and subtraction. *)
         fun addConstantWord({regS, regD, value=0w0, ...}, code) =
             if regS = regD then code else MoveXRegToXReg{sReg=regS, dReg=regD} :: code
     
         |   addConstantWord({regS, regD, regW, value}, code) =
             let
                 (* If we have to load the constant it's better if the top 32-bits are
                    zero if possible. *)
                 val (isSub, unsigned) =
                     if value > Word64.<<(0w1, 0w63)
                     then (true, ~ value)
                     else (false, value)
             in
                 if unsigned < Word64.<<(0w1, 0w24)
                 then (* We can put up to 24 in a shifted and an unshifted constant. *)
                 let
                     val w = Word.fromLarge(Word64.toLarge unsigned)
                     val high = Word.andb(Word.>>(w, 0w12), 0wxfff)
                     val low = Word.andb(w, 0wxfff)
                     val addSub = if isSub then SubImmediate else AddImmediate
                 in
                     if high <> 0w0
                     then
                     (
                         (if low <> 0w0 then [addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}] else []) @
                         addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64} :: code
                     )
                     else addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64} :: code
                 end
                 else
                 let
                     (* To minimise the constant and increase the chances that it
                        will fit in a single word look to see if we can shift it. *)
                     fun getShift(value, shift) =
                         if Word64.andb(value, 0w1) = 0w0
                         then getShift(Word64.>>(value, 0w1), shift+0w1)
                         else (value, shift)
                     val (shifted, shift) = getShift(unsigned, 0w0)
                 in
                     code <::> LoadNonAddr(regW, shifted) <::>
                         (if isSub then SubShiftedReg else AddShiftedReg)
                             {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64}
                 end
             end
 
         val labelMaker = createLabelMaker()
 
         val startOfFunctionLabel = createLabel labelMaker (* Used for recursive calls/jumps *)
 
         val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel labelMaker)
 
         fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo)
 
         fun codeExtended _ (MoveRegister{source, dest, ...}, code) =
             moveIfNecessary({src=source, dst=dest}, code)
 
         |   codeExtended _ (LoadNonAddressConstant{source, dest, ...}, code) =
                 code <::> LoadNonAddr(dest, source)
 
         |   codeExtended _ (LoadAddressConstant{source, dest, ...}, code) =
                 code <::> LoadAddr(dest, source)
 
         |   codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) =
             if byteOffset < 0
             then LoadRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset,
                                         loadType=loadType, unscaledType=NoUpdate} :: code
             else
             let
                 val unitOffset =
                     case loadType of
                         Load64 => Int.quot(byteOffset, 8)
                     |   Load32 => Int.quot(byteOffset, 4)
                     |   Load16 => Int.quot(byteOffset, 2)
                     |   Load8 => byteOffset
             in
                 LoadRegScaled{regT=dest, regN=base, unitOffset=unitOffset, loadType=loadType} :: code
             end
 
         |   codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) =
             if byteOffset < 0
             then (LoadFPRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset,
                                         floatSize=floatSize, unscaledType=NoUpdate}) :: code
             else
             let
                 val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8)
             in
                 (LoadFPRegScaled{regT=dest, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code
             end
 
         |   codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, signExtendIndex, ...}, code) =
             let
                 val baseReg = base and indexReg = index
                 (* In 32-in-64 the index is a 32-bit value in the low order 32-bits.  It may be
                    signed if it is a C address. *)
                 val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX
                 val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift
             in
                 (LoadRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code
             end
 
         |   codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, signExtendIndex, ...}, code) =
             let
                 val baseReg = base and indexReg = index
                 (* In 32-in-64 the index is a 32-bit value in the low order 32-bits.  It may be
                    signed if it is a C address. *)
                 val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX
             in
                 (LoadFPRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code
             end
 
         |   codeExtended _ (GetThreadId { dest}, code) =
                 (* Load the thread id.  This is always a 64-bit value. *)
                 (LoadRegScaled{regT=dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code
 
         |   codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) =
                 (AddShiftedReg{regM=source, regN=X_Base32in64, regD=dest,
                                       shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code
 
         |   codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) =
             let
                 val destReg = dest
             in
                 code <::>
                 (SubShiftedReg{regM=X_Base32in64, regN=source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::>
                 (shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64})
             end
 
         |   codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs }, code) =
                 code <::> AllocateMemoryFixedSize{ bytes=Word.fromLarge bytesRequired, dest=dest, save=saveRegs, work=workReg1 }
 
         |   codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs }, code) =
                 code <::> AllocateMemoryVariableSize{ sizeReg=size, dest=dest, save=saveRegs, work=workReg1 }
 
         |   codeExtended _ (InitialiseMem{ size, addr, init}, code) =
             let
                 val sizeReg = size
                 and addrReg = addr
                 and initReg = init
                 val exitLabel = createLabel labelMaker and loopLabel = createLabel labelMaker
                 (* This uses a loop to initialise.  It's possible the size is
                    zero so we have to check at the top of the loop. *)
                 val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64)
             in
                 code <::>
                 (* Add the length in bytes so we point at the end. *)
                 AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64} <::>
                 SetLabel loopLabel <::>
                 (* Are we at the start? *)
                 SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::>
                 ConditionalBranch(CondEqual, exitLabel) <::>
                 StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex } <::>
                 UnconditionalBranch loopLabel <::>
                 SetLabel exitLabel
             end
 
         |   codeExtended _ (BeginLoop, code) = code
 
         |   codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) =
             let
                 (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *)
                 fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset
                 |   convertArg(ArgInReg reg) = IsInReg(reg)
                 val extStackArgs =
                     map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs
                 val extRegArgs =
                     map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs
                 val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code)
             in
                 case checkInterrupt of
                     NONE => code2
                 |   SOME saveRegs =>
                     let
                         val skipCheck = createLabel labelMaker
                     in
                         code2 <::>
                         (* Put in stack-check code to allow this to be interrupted. *)
                         LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} <::>
                         SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::>
                         ConditionalBranch(CondCarrySet, skipCheck) <::>
                         RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=saveRegs} <::>
                         SetLabel skipCheck
                     end
             end
 
         |   codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) =
             if byteOffset < 0
             then (StoreRegUnscaled{regT=source, regN=base, byteOffset=byteOffset,
                                         loadType=loadType, unscaledType=NoUpdate}) :: code
             else
             let
                 val unitOffset =
                     case loadType of
                         Load64 => Int.quot(byteOffset, 8)
                     |   Load32 => Int.quot(byteOffset, 4)
                     |   Load16 => Int.quot(byteOffset, 2)
                     |   Load8 => byteOffset
             in
                 (StoreRegScaled{regT=source, regN=base, unitOffset=unitOffset, loadType=loadType}) :: code
             end
 
         |   codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) =
             if byteOffset < 0
             then (StoreFPRegUnscaled{regT=source, regN=base, byteOffset=byteOffset,
                                         floatSize=floatSize, unscaledType=NoUpdate}) :: code
             else
             let
                 val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8)
             in
                 (StoreFPRegScaled{regT=source, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code
             end
 
         |   codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, signExtendIndex, ...}, code) =
             let
                 val baseReg = base and indexReg = index
                 (* In 32-in-64 the index is a 32-bit value in the low order 32-bits.  It may be
                    signed if it is a C address. *)
                 val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX
                 val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift
             in
                 (StoreRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code
             end
 
         |   codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, signExtendIndex, ...}, code) =
             let
                 val baseReg = base and indexReg = index
                 (* In 32-in-64 the index is a 32-bit value in the low order 32-bits.  It may be
                    signed if it is a C address. *)
                 val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX
             in
                 (StoreFPRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code
             end
 
         |   codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) =
             let
                 val destReg = dest
             in
                 ((if isAdd then AddImmediate else SubImmediate)
                     {regN=source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code
             end
 
         |   codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) =
             let
                 val destReg = dest
             in
                 (
                     (if isAdd then AddShiftedReg else SubShiftedReg)
                         {regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code
             end           
 
         |   codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) =
             let
                 val destReg = dest
             in
                 (BitwiseLogical{regN=source, regD=destReg, bits=immed, opSize=length,
                                        logOp=logOp, setFlags=isSome ccRef}) :: code
             end
 
         |   codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) =
             let
                 (* There are also versions of AND/OR/XOR which operate on a complement (NOT)
                    of the shifted register.  It's probably not worth looking for a use for them. *)
                 val destReg = dest
             in
                 (LogicalShiftedReg{regN=base, regM=shifted, regD=destReg,
                                           shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code
             end
 
         |   codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) =
                 (ShiftRegisterVariable{regN=source, regM=shift, regD=dest,
                                               shiftDirection=direction, opSize=opSize}) :: code
 
         |   codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) =
             let
                 val destReg = dest
                 and srcAReg = sourceA
                 and srcNReg = sourceN
                 and srcMReg = sourceM
             in
                 (MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code
             end
 
         |   codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) =
                 (DivideRegs{regN=dividend, regM=divisor,
                                    regD=dest, isSigned=isSigned, opSize=opSize}) :: code
 
         |   codeExtended _ (BeginFunction{regArgs, ...}, code) =
             let
                 val skipCheck = createLabel labelMaker
                 val defaultWords = 10 (* This is wired into the RTS. *)
                 val workRegister = workReg1
                 val debugTrapAlways = false (* Can be set to true for debugging *)
 
                 (* Test with either the stack-pointer or a high-water value.
                    The RTS assumes that X9 has been used as the high-water if it is
                    called through stackOverflowXCallOffset rather than stackOverflowCallOffset *)
                 val (testReg, entryPt, code1) =
                     if stackRequired <= defaultWords
                     then (X_MLStackPtr, stackOverflowCallOffset, code)
                     else (X9, stackOverflowXCallOffset,
                             addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister,
                             value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code))
 
                 (* Skip the RTS call if there is enough stack.  N.B. The RTS can modify the end-of-stack value
                    to force a trap here even if there is really enough stack. *)
                 val code2 =
                     (if debugTrapAlways then []
                      else
                      [ConditionalBranch(CondCarrySet, skipCheck),
                       SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}]) @
                     (* Load the end-of-stack value. *)
                     LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} :: code1
 
                 val code3 =
                     code2 <::>
                         RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs} <::>
                         SetLabel skipCheck
 
                 val usedRegs = regArgs
                 fun mkPair(pr, rr) = {src=rr,dst=pr}
                 val regPairs = List.map mkPair usedRegs
             in
                 moveMultipleRegisters(regPairs, code3)
             end
 
         |   codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, code) =
             let
                 fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset
                 |   convertArg(ArgInReg reg) = IsInReg(reg)
 
                 val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs
                 val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs
 
                 (* Tail recursive calls are complicated because we generally have to overwrite the existing stack.
                    That means storing the arguments in the right order to avoid overwriting a
                    value that we are using for a different argument. *)
                 fun codeTailCall(arguments, stackAdjust, code) =
                     if stackAdjust < 0
                     then
                     let
                         (* If the function we're calling takes more arguments on the stack than the
                            current function we will have to extend the stack.  Do that by pushing the
                            argument whose offset is at -1.  Then adjust all the offsets and repeat. *)
                         val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments)
                         fun renumberArgs [] = []
                         |   renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *)
                         |   renumberArgs ({dst, src} :: args) =
                             let
                                 val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest
                                 val newSrc =
                                     case src of
                                         IsOnStack wordOffset => IsOnStack(wordOffset+1)
                                     |   other => other
                             in
                                 {dst=newDest, src=newSrc} :: renumberArgs args
                             end
                         val pushCode =
                             case argM1 of
                                 IsOnStack wordOffset =>
                                     (StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) ::
                                         loadFromStack(workReg2, wordOffset, code)
                             |   IsInReg reg =>
                                     (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code
                                 
                     in
                         codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode)
                     end
                     else
                     let
                         val loadArgs = moveMultipleValues(arguments, code)
                     in
                         if stackAdjust = 0
                         then loadArgs
                         else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1,
                                 value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs)
                     end
                 val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code)
                 
                 val jumpToFunctionCode =
                     case callKind of
                         Recursive => [(UnconditionalBranch startOfFunctionLabel)]
                     |   ConstantCode m =>
                             if is32in64
                             then raise InternalError "ConstantCode" (* Not currently *)
                             else [(BranchReg{regD=workReg1, brRegType=BRRBranch}), (LoadAddr(workReg1, m))]
                     |   FullCall =>
                             if is32in64
                             then [BranchReg{regD=workReg1, brRegType=BRRBranch},
                                  LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64},
                                  AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}]
                             else [BranchReg{regD=workReg1, brRegType=BRRBranch},
                                     LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}]
             in
                 jumpToFunctionCode @ setArgumentsCode
             end
 
         |   codeExtended _ (FunctionCall{callKind, regArgs=regArgs, stackArgs=stackArgs, dests, saveRegs, ...}, code) =
             let
                 local
                     fun pushStackArgs ([], _, code) = code
 
                     |   pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) =
                         let
                             (* Have to adjust the offsets of stack arguments. *)
                             val adjustedOffset = wordOffset+argNum
                         in
                             pushStackArgs(args, argNum+1,
                                 loadFromStack(workReg1, adjustedOffset, code) <::>
                                 StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})
                         end
 
                     |   pushStackArgs (ArgInReg reg ::args, argNum, code) =
                             pushStackArgs(args, argNum+1,
                                 code <::>
                                 (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8,
                                                          unscaledType=PreIndex, loadType=Load64}))
 
                     val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *))
 
                     (* We have to adjust any stack offset to account for the arguments we've pushed. *)
                     val numStackArgs = List.length stackArgs
 
                     fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs)
                     |   convertArg(ArgInReg reg) = IsInReg(reg)
 
                 in
                     val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs
                     val loadArgs = moveMultipleValues(extRegArgs, pushedArgs)
                 end
 
                 (* Push the registers before the call and pop them afterwards. *)
                 fun makeSavesAndCall([], code) =
                     (
                         case callKind of
                             Recursive => code <::> (BranchAndLink startOfFunctionLabel)
                         |   ConstantCode m =>
                                 if is32in64
                                 then raise InternalError "ConstantCode" (* Not currently *)
                                 else code <::> (LoadAddr(workReg1, m)) <::> (BranchReg{regD=workReg1, brRegType=BRRAndLink})
                         |   FullCall =>
                                 if is32in64
                                 then code <::>
                                     AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2,
                                                   setFlags=false, opSize=OpSize64} <::>
                                     LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64} <::>
                                     BranchReg{regD=workReg1, brRegType=BRRAndLink}
                                 else code <::> LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64} <::>
                                             BranchReg{regD=workReg1, brRegType=BRRAndLink}
                     )
 
                 |   makeSavesAndCall(reg::regs, code) =
                     let
                         val areg = reg
                     in
                         makeSavesAndCall(regs,
                                 code <::> StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8,
                                                            loadType=Load64, unscaledType=PreIndex}) <::>
                             LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex}
                     end
 
                 (* Results.  These go from the specific result register into the allocated register. *)
                 val resultPairs = List.map(fn (pr, rr) => {src=rr,dst=pr}) dests
             in
                 moveMultipleRegisters(resultPairs, makeSavesAndCall(saveRegs, loadArgs))
             end
 
         |   codeExtended _ (ReturnResultFromFunction { results, returnReg, numStackArgs }, code) =
             let
                 fun resetStack(0, code) = code
                 |   resetStack(nItems, code) =
                         addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3,
                                         value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code)
                 (* Return results.  This goes from the allocated register into the specific register rr. *)
                 val resultPairs = List.map(fn (pr, rr) => {src=pr,dst=rr}) results
             in
                 BranchReg{regD=returnReg, brRegType=BRRReturn} :: resetStack(numStackArgs, moveMultipleRegisters(resultPairs, code))
             end
 
         |   codeExtended _ (RaiseExceptionPacket{ packetReg }, code) =
                 (* We need a work register here.  It can be any register other than X0 since
                    we don't preserve registers across calls. *)
                 (* Copy the handler "register" into the stack pointer.  Then
                    jump to the address in the first word.  The second word is
                    the next handler.  This is set up in the handler.  We have a lot
                    more raises than handlers since most raises are exceptional conditions
                    such as overflow so it makes sense to minimise the code in each raise. *)
                 moveIfNecessary({src=packetReg, dst=X0}, code) <::>
                 LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::>
                 LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64} <::>
                 BranchReg{regD=workReg1, brRegType=BRRBranch }
 
         |   codeExtended _ (PushToStack{ source, copies, ... }, code) =
             let
                 val reg = source
                 val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1"
                 fun pushn(0, c) = c
                 |   pushn(n, c) =
                         pushn(n-1,
                             (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c)
             in
                 pushn(copies, code)
             end
 
         |   codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) =
                 loadFromStack(dest, wordOffset, code)
 
         |   codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) =
                 (* Store into the stack to set a field of a container.  Always 64-bits. *)
                 storeToStack(source, stackOffset, workReg1, code)
 
         |   codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) =
                 (* Set the register to an offset in the stack. *)
             let
                 val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset"
                 val byteOffset = stackOffset * Word.toInt nativeWordSize
             in
                 if byteOffset >= 4096
                 then code <::> LoadNonAddr(dest, Word64.fromInt byteOffset) <::>
                         AddShiftedReg{regN=X_MLStackPtr, regM=dest, regD=dest, shift=ShiftNone, setFlags=false, opSize=OpSize64}
                 else code <::> AddImmediate{regN=X_MLStackPtr, regD=dest, immed=Word.fromInt byteOffset,
                                           shifted=false, setFlags=false, opSize=OpSize64}
             end
 
         |   codeExtended _ (ResetStackPtr{ numWords, ... }, code) =
                 addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1,
                     value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code)
 
         |   codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) =
                 (* Shift left by one bit and add one. *)
                 code <::>
                     shiftConstant{ direction=ShiftLeft, regD=dest, regN=source, shift=0w1, opSize=opSize } <::>
                     BitwiseLogical{ bits=0w1, regN=dest, regD=dest, opSize=opSize, setFlags=false, logOp=LogOr}
 
         |   codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) =
                 code <::>
                     shiftConstant{ direction=if isSigned then ShiftRightArithmetic else ShiftRightLogical,
                                     regD=dest, regN=source, shift=0w1, opSize=opSize }
 
         |   codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) =
                boxSysWord({source=source, destination=dest,
                            workReg=workReg1, saveRegs=saveRegs}, code)
 
         |   codeExtended _ (UnboxLarge{ source, dest }, code) =
             let
                 (* Unbox a large word.  The argument is a poly word. *)
                 val destReg = dest
                 and srcReg = source
             in
                 if is32in64
                 then LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64} ::
                         AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code
                 else LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64} :: code
             end
 
         |   codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) =
                 boxDouble({source=source, destination=dest,
                            workReg=workReg1, saveRegs=saveRegs}, code)
 
         |   codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) =
             let
                 val floatReg = source
                 and fixedReg = dest
             in
                 if is32in64
                 then boxFloat({source=floatReg, destination=fixedReg,
                                workReg=workReg1, saveRegs=saveRegs}, code)
                 else
                     code <::>
                         MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32} <::>
                         shiftConstant{ direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64} <::>
                         BitwiseLogical{ bits=0w1, regN=fixedReg, regD=fixedReg, opSize=OpSize64, setFlags=false, logOp=LogOr}
             end
 
         |   codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) =
                 if is32in64
                 then code <::>
                     AddShiftedReg{regM=source, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2,
                                           setFlags=false, opSize=OpSize64} <::>
                     LoadFPRegScaled{regT=dest, regN=workReg1, unitOffset=0, floatSize=Double64}
                 else code <::> LoadFPRegScaled{regT=dest, regN=source, unitOffset=0, floatSize=Double64}
 
         |   codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) =
                 (* This is tagged in native 64-bits.  In 32-in-64 we're loading 32-bits so we
                    can use an indexed load directly. *)
                 if is32in64
                 then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=source, regT=dest, option=ExtUXTX ScaleOrShift, floatSize=Float32}
                 else code <::>
                         shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=source, regD=workReg1, opSize=OpSize64} <::>
                         MoveGeneralToFP{regN=workReg1, regD=dest, floatSize=Float32}
 
         |   codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) =
                 LoadAcquireReg{regT=dest, regN=base, loadType=loadType} :: code
 
         |   codeExtended _ (StoreRelease{source, base, loadType, ...}, code) =
                 StoreReleaseReg{regT=source, regN=base, loadType=loadType} :: code
 
         |   codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) =
                 BitField{immr=immr, imms=imms, regN=source, regD=dest,
                          bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length} :: code
 
         |   codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) =
             let
                 (* If we're using BitFieldMove we retain some of the bits of the
                    destination.  The higher levels require us to treat that as a source.  *)
                 val _ = source = dest andalso raise InternalError "codeExtended: bitfield: dest=source"
             in
                 BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=BFInsert, opSize=length} ::
                     moveIfNecessary({src=destAsSource, dst=dest}, code)
             end
 
         |   codeExtended {flow} (IndexedCaseOperation{testReg}, code) =
             let
                 (* testReg contains the original value after the lowest value has been subtracted.
                    Since both the original value and the lowest value were tagged it contains a
                    shifted but untagged value. *)
                 (* This should only be within a block with an IndexedBr flow type. *)
                 val cases =
                     case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation"
                 val caseLabels = map getBlockLabel cases
                 val tableLabel = createLabel labelMaker
             in
                 code <::>
                     LoadLabelAddress(workReg1, tableLabel) <::>
                     (* Add the value shifted by one since it's already shifted. *)
                     AddShiftedReg{regN=workReg1, regD=workReg1, regM=testReg,
                                           shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64} <::>
                     BranchReg{regD=workReg1, brRegType=BRRBranch} <::>
                     BranchTable{ startLabel=tableLabel, brTable=caseLabels }
             end
 
         |   codeExtended {flow} (PushExceptionHandler, code) =
             let
                 (* This should only be within a block with a SetHandler flow type. *)
                 val handleLabel =
                     case flow of
                         SetHandler{ handler, ...} => handler
                     |   _ => raise InternalError "codeGenICode: PushExceptionHandler"
                 val labelRef = getBlockLabel handleLabel
             in
                 (* Push the old handler and the handler entry point and set the "current handler"
                    to point to the stack after we've pushed these. *)
                 code <::>
                     LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::>
                     LoadLabelAddress(workReg2, labelRef) <::>
                     StoreRegPair{regT1=workReg2, regT2=workReg1, regN=X_MLStackPtr, unitOffset= ~2, unscaledType=PreIndex, loadType=Load64} <::>
                     StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64}
             end
 
         |   codeExtended _ (PopExceptionHandler, code) =
                 (* Remove and discard the handler we've set up.  Pop the previous handler
                    and put into "current handler". *)
                 code <::>
                     LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::>
                     StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64}
 
         |   codeExtended _ (BeginHandler{packetReg}, code) =
             let
                 val beginHandleCode =
                     code <::>
                         (* The exception raise code resets the stack pointer to the value in the exception handler
                            so this is probably redundant.  Leave it for the moment, *)
                         LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::>
                         LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::>
                         StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64}
             in
                 moveIfNecessary({src=X0, dst=packetReg }, beginHandleCode)
             end
 
         |   codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length,  ...}, code) =
             let
                 (* Construct a loop to compare two vectors of bytes. *)
                 val vec1Reg = vec1Addr
                 and vec2Reg = vec2Addr
                 and lenReg = length
                 val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker
             in
                 code <::>
                 (* Set the CC to Equal before we start in case length = 0 *)
                 SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::>
                 SetLabel loopLabel <::> (* Start of loop *)
                 CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Go to the end when len = zero *)
                 (* Load the bytes for the comparison and increment each. *)
                 LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::>
                 LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::>            
                 SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *)
                 (* Compare *)
                 SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::>
                 ConditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *)
                 SetLabel exitLabel
             end
 
         |   codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) =
             let
                 (* Construct a loop to move the data. *)
                 val srcReg = srcAddr
                 and destReg = destAddr
                 and lenReg = length
                 val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker
                 val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64)
             in
                 code <::> SetLabel loopLabel (* Start of loop *) <::> 
                 CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Exit when length = 0 *)
                 LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::>
                 StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::>
                 SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *)
                 UnconditionalBranch loopLabel <::> (* Back to the start *)
                 SetLabel exitLabel
             end
 
         |   codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) =
             let
                 val allocFreeCode =
                     (if isAdd then AddExtendedReg else SubExtendedReg)
                         {regM=source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code
             in
                 case dest of
                     XZero => allocFreeCode
                 |   destReg =>
                         (* We have to use add here to get the SP into the destination instead of the usual move. *)
                         AddImmediate{regN=XSP, regD=destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} ::
                             allocFreeCode
             end
 
         |   codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *)
 
             (* Used in mutex operations. *)
         |   codeExtended _ (LoadAcquireExclusive{ base, dest }, code) =
                 LoadAcquireExclusiveRegister{regN=base, regT=dest} :: code
 
         |   codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) =
                 StoreReleaseExclusiveRegister{regS=result,
                         regT=source, regN=base} :: code
 
         |   codeExtended _ (MemoryBarrier, code) = code <::> MemBarrier
 
         |   codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) =
                 (CvtIntToFP{regN=source, regD=dest, floatSize=destSize, opSize=srcSize}) :: code
 
         |   codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) =
                 (CvtFloatToInt{regN=source, regD=dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code
 
         |   codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) =
                 (FPUnaryOp{regN=source, regD=dest, fpOp=fpOp}) :: code
 
         |   codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) =
                 (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, floatSize=opSize, fpOp=fpOp}) :: code
 
         |   codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) =
                 (FPComparison{regN=arg1, regM=arg2, floatSize=opSize}) :: code
 
+        |   codeExtended _ (CPUYield, code) = code <::> Yield
+
         |   codeExtended _ (CacheCheck{ arg1, arg2 }, code) =
             let
                 val okLabel = createLabel labelMaker
             in
                 (code <::> SubShiftedReg {regM=arg1, regN=arg2, regD=XZero, shift=ShiftNone, opSize=OpSize64, setFlags=true} <::>
                     ConditionalBranch(CondEqual, okLabel) <::>
                     MoveXRegToXReg{sReg=XZero, dReg=X16} <::>
                     LoadRegScaled{regT=X16, regN=X16, unitOffset=0, loadType=Load16} <::>
                     SetLabel okLabel)
             end
 
         local
             (* processed - set to true when a block has been processed. *)
             val processed = Array.array(numBlocks, false)
             fun haveProcessed n = Array.sub(processed, n)
             
             (* Find the blocks that reference this one.  This isn't essential but
                allows us to try to generate blocks in the order of the control
                flow.  This in turn may allow us to use short branches rather
                than long ones. *)
             val labelRefs = Array.array(numBlocks, [])
 
             datatype flowCode =
                 FlowCodeSimple of int
             |   FlowCodeCMove of {code: precode list, trueJump: int, falseJump: int}
             
             (* Process this recursively to set the references.  If we have
                unreachable blocks, perhaps because they've been merged, we
                don't want to include them in the reference counting.
                This shouldn't happen now that IdentifyReferences removes
                unreferenced blocks. *)
             fun setReferences fromLabel toLabel =
                 case Array.sub(labelRefs, toLabel) of
                     [] => (* Not yet visited at all. *)
                     let
                         val BasicBlock{ flow, ...} = Vector.sub(blocks, toLabel)
                         val refs =
                             case flow of
                                 ExitCode => []
                             |   Unconditional lab => [lab]
                             |   Conditional{trueJump, falseJump, ... } => [trueJump, falseJump]
                             |   IndexedBr labs => labs
                             |   SetHandler { handler, continue } => [handler, continue]
                             |   UnconditionalHandle _ => []
                             |   ConditionalHandle { continue, ...} => [continue]
 
                         val () =
                             if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else ()
                     in
                         List.app (setReferences toLabel) refs
                     end
                     
                 |   refs =>
                     (* We've visiting this at least once.  Just add us to the list. *)
                         Array.update(labelRefs, toLabel, fromLabel :: refs)
             
             val _ = setReferences 0 0
             
             (* Process the blocks.  We keep the "stack" explicit rather than using recursion
                because this allows us to select both arms of a conditional branch sooner. *)
             fun genCode(toDo, lastFlow, code) =
             case List.filter (not o haveProcessed) toDo of
                 [] =>
                 let
                     (* There's nothing left to do. We may need to add a final branch to the end. *)
                     val finalBranch =
                         case lastFlow of
                             ExitCode => []
                         |   IndexedBr _ => []
                         |   Unconditional dest => [(UnconditionalBranch(getBlockLabel dest))]
                         |   Conditional { condition, trueJump, falseJump, ...} =>
                                 [
                                     (UnconditionalBranch(getBlockLabel falseJump)),
                                     (ConditionalBranch(condition, getBlockLabel trueJump))
                                 ]
                         |   SetHandler { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))]
                         |   UnconditionalHandle _ => []
                         |   ConditionalHandle { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))]
                 in
                     finalBranch @ code (* Done. *)
                 end
 
             |   stillToDo as head :: _ =>
                 let
                     local
                         (* Check the references.  If all the sources that lead up to this have
                            already been we won't have any backward jumps. *)
                         fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest))
 
                         val continuation =
                             case lastFlow of
                                 ExitCode => NONE
                             |   IndexedBr _ => NONE (* We could put the last branch in here. *)
                             |   Unconditional dest =>
                                     if not (haveProcessed dest) andalso available dest
                                     then SOME(FlowCodeSimple dest)
                                     else NONE
                             |   Conditional {trueJump, falseJump, condition, ...} =>
                                 (* We can usually choose either destination and in nearly all cases
                                    it won't matter.  The default branch is not to take forward jumps
                                    so if there is reason to believe that one branch is more likely
                                    we should follow that branch now and leave the other.  If we
                                    have Cond(No)Overflow we assume that overflow is unusual.
                                    If one branch raises an exception we assume that that is unusual. *)
                                 let
                                     val (first, second) =
                                         case (condition, Vector.sub(blocks, falseJump)) of
                                             (CondNoOverflow, _) => (trueJump, falseJump)
                                         |   (_, BasicBlock{ flow=ExitCode, block, ...}) =>
                                                 if List.exists(fn RaiseExceptionPacket _ => true | _ => false) block
                                                 then (trueJump, falseJump)
                                                 else (falseJump, trueJump)
                                         |   _ => (falseJump, trueJump)
                                 in
                                     if not (haveProcessed first) andalso available first
                                     then SOME(FlowCodeSimple first)
                                     else if not (haveProcessed second) andalso available second
                                     then SOME(FlowCodeSimple second)
                                     else NONE
                                 end
                            |    SetHandler { continue, ... } =>
                                     (* We want the continuation if possible.  We'll need a
                                        branch round the handler so that won't help. *)
                                     if not (haveProcessed continue) andalso available continue
                                     then SOME(FlowCodeSimple continue)
                                     else NONE
                            |    UnconditionalHandle _ => NONE
                            |    ConditionalHandle _ => NONE
                     in
                         (* First choice - continue the existing block.
                            Second choice - the first item whose sources have all been
                            processed.
                            Third choice - something from the list. *)
                         val picked =
                             case continuation of
                                 SOME c => c
                             |   NONE =>
                                     case List.find available stillToDo of
                                         SOME c => FlowCodeSimple c
                                     |   NONE => FlowCodeSimple head
                     end
                     
                 in
                     case picked of
                         FlowCodeSimple picked =>
                         let
                             val () = Array.update(processed, picked, true)
 
                             (* Code to terminate the previous block. *)
                             val startCode =
                                 case lastFlow of
                                     ExitCode => []
                                 |   IndexedBr _ => []
                                 |   UnconditionalHandle _ => []
                                 |   Unconditional dest =>
                                         if dest = picked then [] else [(UnconditionalBranch(getBlockLabel dest))]
                                 |   ConditionalHandle { continue, ...} =>
                                         if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))]
                                 |   SetHandler { continue, ... } =>
                                         if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))]
                                 |   Conditional { condition, trueJump, falseJump, ...} =>
                                     if picked = falseJump (* Usual case. *)
                                     then [(ConditionalBranch(condition, getBlockLabel trueJump))]
                                     else if picked = trueJump
                                     then (* We have a jump to the true condition. Invert the jump.
                                             This is more than an optimisation.  Because this immediately precedes the
                                             true block we're not going to generate a label. *)
                                         [(ConditionalBranch(invertTest condition, getBlockLabel falseJump))]
                                     else
                                     [
                                         (UnconditionalBranch(getBlockLabel falseJump)),
                                         (ConditionalBranch(condition, getBlockLabel trueJump))
                                     ]
 
                             (* Code-generate the body with the code we've done so far
                                at the end.  Add a label at the start if necessary. *)
                             local
                                 (* If the previous block dropped through to this and this was
                                    the only reference then we don't need a label. *)
                                 fun onlyJumpingHere (lab: int) =
                                     if lab <> picked then false
                                     else case Array.sub(labelRefs, picked) of
                                         [singleton] => singleton = lab
                                     |   _ => false
                     
                                 val noLabel =
                                     case lastFlow of
                                         ExitCode => picked = 0 (* Unless this was the first block. *)
                                     |   Unconditional dest => onlyJumpingHere dest
                                     |   Conditional { trueJump, falseJump, ...} =>
                                             onlyJumpingHere trueJump orelse onlyJumpingHere falseJump
                                     |   IndexedBr _ => false
                                     |   SetHandler _ => false
                                     |   UnconditionalHandle _ => false
                                     |   ConditionalHandle { continue, ...} => onlyJumpingHere continue
                             in
                                 val startLabel = if noLabel then [] else [(SetLabel(getBlockLabel picked))]
                             end
 
                             val BasicBlock { flow, block, ...} = Vector.sub(blocks, picked)
 
                             local
                                 fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code)
                             in
                                 val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block
                             end
 
                             val addSet =
                                 case flow of
                                     ExitCode => []
                                 |   IndexedBr cases => cases
                                 |   Unconditional dest => [dest]
                                 |   Conditional {trueJump, falseJump, ...} => [falseJump, trueJump]
                                 |   SetHandler { handler, continue } => [handler, continue]
                                 |   UnconditionalHandle _ => []
                                 |   ConditionalHandle { continue, ...} => [continue]
 
                         in
                             genCode(addSet @ stillToDo, flow, bodyCode)
                         end
                 
                     |   FlowCodeCMove{code, trueJump, falseJump} =>
                         let
                             (* We've generated a conditional move and possibly a return.  If the
                                trueJump and falseJump are only ever referenced from this block
                                they're done, otherwise we still need to do them. *)
                             val _ =
                                 case Array.sub(labelRefs, trueJump) of
                                     [_] => Array.update(processed, trueJump, true)
                                 |   _ => ()
                             val _ =
                                 case Array.sub(labelRefs, falseJump) of
                                     [_] => Array.update(processed, falseJump, true)
                                 |   _ => ()
                             val BasicBlock { flow, ...} = Vector.sub(blocks, trueJump)
                             val addSet =
                                 case flow of
                                     ExitCode => []
                                 |   Unconditional dest => [dest]
                                 |   _ => raise InternalError "FlowCodeCMove"
                         in
                             genCode(addSet @ stillToDo, flow, code)
                         end
                 end
         in
             val ops = genCode([0], ExitCode, [(SetLabel startOfFunctionLabel)])
         end
     in
         generateFinalCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure,
                                    parameters=debugSwitches, profileObject=profileObject,  labelMaker= labelMaker}
     end
 
     structure Sharing =
     struct
         type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock
         and xReg        = xReg
         and vReg        = vReg
         and closureRef  = closureRef
     end
 
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML
index 80f41e32..bc9a0686 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML
@@ -1,883 +1,886 @@
 (*
     Copyright (c) 2021-2 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64IdentifyReferences(
     structure Arm64ICode: ARM64ICODE
     structure Debug: DEBUG
     structure IntSet: INTSET
 ): ARM64IDENTIFYREFERENCES =
 struct
     open Arm64ICode
     open IntSet
 
     type regState =
     { 
         active: int, refs: int, pushState: bool, prop: regProperty
     }
 
     (* CC states before and after.  Currently no instruction uses the
        condition; conditional branches are handled at the block level.
        The result of executing the instruction may be to set the condition code to a
        defined state, an undefined state or leave it unchanged. *)
     datatype outCCState = CCSet of ccRef | CCIndeterminate | CCUnchanged
     and inCCState = CCNeeded of ccRef | CCUnused
     
     datatype extendedBasicBlock =
         ExtendedBasicBlock of
         {
             block: {instr: iCodeAbstract, current: intSet, active: intSet, kill: intSet } list,
             flow: controlFlow,
             locals: intSet, (* Defined and used entirely within the block. *)
             imports: intSet, (* Defined outside the block, used inside it, but not needed afterwards. *)
             exports: intSet, (* Defined within the block, possibly used inside, but used outside. *)
             passThrough: intSet, (* Active throughout the block. May be referred to by it but needed afterwards. *)
             loopRegs: intSet, (* Destination registers for a loop.  They will be updated by this block. *)
             initialStacks: intSet, (* Stack items required at the start i.e. imports+passThrough for stack items. *)
             inCCState: ccRef option, (* The state this block assumes.  If SOME _ all predecessors must set it. *)
             outCCState: ccRef option (* The condition code set by this block.  SOME _ if at least one successor needs it. *)
         }
     
     exception InternalError = Misc.InternalError
 
     (* Return the list of blocks that are the immediate successor of this. *)
     fun blockSuccessors(BasicBlock{flow, ...}) = successorBlocks flow
 
     fun getOptReg(SomeReg reg) = [reg] | getOptReg ZeroReg = []
 
     fun getInstructionState(MoveRegister { source, dest, ...}) =
         { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadNonAddressConstant { dest, ...}) =
         { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadAddressConstant { dest, ...}) =
         { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadWithConstantOffset { base, dest, ...}) =
         { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadFPWithConstantOffset { base, dest, ...}) =
         { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadWithIndexedOffset { base, dest, index, ...}) =
         { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadFPWithIndexedOffset { base, dest, index, ...}) =
         { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(GetThreadId { dest, ...}) =
         { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(ObjectIndexAddressToAbsolute { source, dest, ...}) =
         { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(AbsoluteToObjectIndex { source, dest, ...}) =
         { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(AllocateMemoryFixed { dest, ...}) =
         { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(AllocateMemoryVariable{size, dest, ...}) =
             { sources=[size], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(InitialiseMem{size, addr, init}) =
             { sources=[size, addr, init], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BeginLoop) =
             (* This is just a marker.  It doesn't actually generate any code. *)
             { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(JumpLoop{regArgs, stackArgs, ...}) =
         let
             fun getSourceFromRegs({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs)
             |   getSourceFromRegs({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs)
             and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs)
             |   getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs)
             val (regSources, stackSources) =
                 List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs
         in
             { sources=regSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
         end
 
     |   getInstructionState(StoreWithConstantOffset { base, source, ...}) =
         { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(StoreFPWithConstantOffset { base, source, ...}) =
         { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(StoreWithIndexedOffset { base, source, index, ...}) =
         { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(StoreFPWithIndexedOffset { base, source, index, ...}) =
         { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(AddSubImmediate{ source, dest, ccRef, ... }) =
         { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[],
           ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc }
 
     |   getInstructionState(AddSubRegister{ base, shifted, dest, ccRef, ... }) =
         { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[],
           ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc }
 
     |   getInstructionState(LogicalImmediate{ source, dest, ccRef, ... }) =
         { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[],
           ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc }
 
     |   getInstructionState(LogicalRegister{ base, shifted, dest, ccRef, ... }) =
         { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[],
           ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc }
 
     |   getInstructionState(ShiftRegister{ source, shift, dest, ... }) =
         { sources=[source, shift], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(Multiplication{ dest, sourceA, sourceM, sourceN, ... }) =
         { sources=getOptReg sourceA @ [sourceM, sourceN],
           dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(Division{ dest, dividend, divisor, ... }) =
         { sources=[dividend, divisor], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BeginFunction {regArgs, stackArgs, ...}) =
             { sources=[], dests=map #1 regArgs, sStacks=[], dStacks=stackArgs, ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(FunctionCall{regArgs, stackArgs, dests, containers, ...}) =
         let
             (* Non-tail-recursive.  Behaves as a normal reference to sources. *)
             fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs)
             |   getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs)
             and getSourceFromStack(ArgInReg reg, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs)
             |   getSourceFromStack(ArgOnStack { container, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs)
             val (argSources, stackSources) =
                 List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs
         in
             { sources=argSources, dests=List.map #1 dests, sStacks=stackSources @ containers, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
         end
 
     |   getInstructionState(TailRecursiveCall{regArgs, stackArgs, ...}) =
         let
             (* Tail recursive call.  References the argument sources but exits. *)
             fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs)
             |   getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs)
             and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs)
             |   getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs)
             val (argSources, stackSources) =
                 List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs
         in
             { sources=argSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
         end
 
     |   getInstructionState(ReturnResultFromFunction{results, returnReg, ...}) =
             { sources=returnReg :: List.map #1 results, dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(RaiseExceptionPacket{packetReg}) =
             { sources=[packetReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(PushToStack{ source, container, ... }) =
             { sources=[source], dests=[], sStacks=[], dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadStack{ dest, container, ... }) =
             { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(StoreToStack{ source, container, ... }) =
         (* Although this stores into the container it must already exist. *)
             { sources=[source], dests=[], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(ContainerAddress{ dest, container, ... }) =
             { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(ResetStackPtr _) =
             { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(TagValue{source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(UntagValue{source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BoxLarge{source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(UnboxLarge{source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BoxTagFloat{source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(UnboxTagFloat{source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadAcquire { base, dest, ...}) =
         { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(StoreRelease { base, source, ...}) =
         { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BitFieldShift{ source, dest, ... }) =
         { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BitFieldInsert{ source, destAsSource, dest, ... }) =
         { sources=[source, destAsSource], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(IndexedCaseOperation{ testReg }) =
         { sources=[testReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(PushExceptionHandler) =
         { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(PopExceptionHandler) =
         { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BeginHandler{packetReg}) =
         (* The packet register is a destination since this provides its definition. *)
         { sources=[], dests=[packetReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) =
             { sources=[vec1Addr, vec2Addr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
 
     |   getInstructionState(BlockMove{srcAddr, destAddr, length, ...}) =
             { sources=[srcAddr, destAddr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     |   getInstructionState(AddSubXSP{source, dest, ...}) =
             { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(TouchValue{source}) =
             { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(LoadAcquireExclusive{base, dest}) =
             { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(StoreReleaseExclusive{base, source, result}) =
             { sources=[base] @ getOptReg source, dests=[result], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(MemoryBarrier) =
             { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(ConvertIntToFloat{ source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(ConvertFloatToInt{ source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(UnaryFloatingPt{ source, dest, ...}) =
             { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(BinaryFloatingPoint{ arg1, arg2, dest, ...}) =
             { sources=[arg1, arg2], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
 
     |   getInstructionState(CompareFloatingPoint{ arg1, arg2, ccRef, ...}) =
             { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef }
 
+    |   getInstructionState(CPUYield) =
+            { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged }
+
     |   getInstructionState(CacheCheck{ arg1, arg2}) =
             { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate }
 
     (* These instructions can be eliminated if their register sources are not used.
        There may be other cases. *)
     fun eliminateable(MoveRegister _) = true
     |   eliminateable(LoadNonAddressConstant _) = true
     |   eliminateable(LoadAddressConstant _) = true
     |   eliminateable(LoadWithConstantOffset _) = true
     |   eliminateable(LoadWithIndexedOffset _) = true
     |   eliminateable(ObjectIndexAddressToAbsolute _) = true
     |   eliminateable(TagValue _) = true
     |   eliminateable(UntagValue _) = true
     |   eliminateable(BoxLarge _) = true
     |   eliminateable(UnboxLarge _) = true
 
     |   eliminateable _ = false
    
 
     fun identifyRegs(blockVector, pregProps): extendedBasicBlock vector * regState vector =
     let
         val maxPRegs = Vector.length pregProps
         val vectorLength = Vector.length blockVector
         (* Initial arrays - declarationArray is the set of registers given
            values by the block, importArray is the set of registers referenced by
            the block and not declared locally. *)
         val declarationArray = Array.array(vectorLength, emptySet)
         and importArray = Array.array(vectorLength, emptySet)
         val stackDecArray = Array.array(vectorLength, emptySet)
         and stackImportArray = Array.array(vectorLength, emptySet)
         and localLoopRegArray = Array.array(vectorLength, emptySet)
         
         (* References - this is used locally to see if a register is ever
            actually used and also included in the result which uses it as
            part of the choice of which register to spill. *)
         val regRefs = Array.array(maxPRegs, 0)
         (* Registers that must be pushed because they are required after
            a function call.  For cache registers this means "discard". *)
         and requirePushOrDiscard = Array.array(maxPRegs, false)
 
         fun incrRef r = Array.update(regRefs, r, Array.sub(regRefs, r)+1)
         
         (* Contains the, possibly filtered, code for each block. *)
         val resultCode = Array.array(vectorLength, NONE)
         
         val ccInStates = Array.array(vectorLength, CCUnused)
         and ccOutStates = Array.array(vectorLength, CCIndeterminate)
         
         (* First pass - for each block build up the sets of registers defined and
            used in the block.  We do this depth-first so that we can use "refs" to
            see if a register is used.  If this is an instruction that can be eliminated
            we don't need to generate it and can ignore any references it makes. *)
         local
             fun blockScan blockNo =
             if isSome(Array.sub(resultCode, blockNo)) then ()
             else
             let
                 val () = Array.update(resultCode, blockNo, SOME []) (* Prevent looping. *)
                 val thisBlock as BasicBlock { block, flow, ...} = Vector.sub(blockVector, blockNo)
                 val successors = blockSuccessors thisBlock
                 (* Visit everything reachable first. *)
                 val () = List.app blockScan successors
                 
                 fun scanCode(instr, original as { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... }) =
                 let
                     val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ccIn, ccOut, ... } =
                         getInstructionState instr
                     fun regNo(PReg i) = i
                     and stackNo(StackLoc{rno, ...}) = rno
                     val destRegNos = map regNo dests
                     and sourceRegNos = map regNo sources
                     val stackDestRegNos = map stackNo stackDests
                     and stackSourceRegNos = map stackNo stackSrcs
                     (* If this instruction requires a cc i.e. is SetToCondition or X87FPGetCondition we
                        need to set this as a requirement earlier.  If this sets the CC and it is the condition
                        we've been expecting we've satisfied it and can set the previous condition to Unused.
                        We could use this to decide if a comparison is no longer required.  That can only
                        happen in very specific circumstances e.g. some tests in Test176.ML so it's
                        not worthwhile. *)
                     val newInCC =
                         case (ccIn, ccOut, occIn) of
                             (cc as CCNeeded _, _, _) => cc (* This instr needs a particular cc. *)
                         |   (CCUnused, CCSet _, _) => CCUnused
                         |   (CCUnused, _, occIn) => occIn
                     (* If this instruction modifies the CC check to see if it is setting an requirement. *)
                     val _ =
                         case (occIn, ccOut) of
                             (CCNeeded ccRIn, CCSet ccRout) =>
                                 if ccRIn = ccRout then () else raise InternalError "CCCheck failed"
                         |   (CCNeeded _, CCIndeterminate) => raise InternalError "CCCheck failed"
                         |   _ => ()
                     (* The output CC is the last CC set.  Tail instructions that don't change
                        the CC state are ignored until we reach an instruction that sets it. *)
                     val newOutCC = case occOut of CCUnchanged => ccOut | _ => occOut
 
                     val instrLoopRegs =
                         case instr of
                             JumpLoop{regArgs, ...} => listToSet (map (regNo o #dst) regArgs)
                         |   _ => emptySet
                 in
                     if eliminateable instr andalso
                         List.all(fn dReg => Array.sub(regRefs, dReg) = 0) destRegNos
                     then original (* Don't include this instruction. *)
                     else
                     let
                         (* Only mark the sources as referred after we know we're going to need this.
                            In that way we may eliminate the instruction that created this source. *)
                         val () = List.app incrRef sourceRegNos
                     in
                         { code = instr :: code, decs = union(listToSet destRegNos, decs), refs = union(listToSet sourceRegNos, refs),
                           sDecs = union(listToSet stackDestRegNos, sDecs), sRefs = union(listToSet stackSourceRegNos, sRefs),
                           occIn = newInCC, occOut = newOutCC, loopRegs = union(loopRegs, instrLoopRegs)}
                     end
                 end
                 
                 (* If we have a conditional branch at the end we need the condition code.  It should either
                    be set here or in a preceding block. *)
                 val inCC = case flow of Conditional { ccRef, ...} => CCNeeded ccRef | _ => CCUnused
 
                 val { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... } =
                     List.foldr scanCode
                         {code=[], decs=emptySet, refs=emptySet, sDecs=emptySet, sRefs=emptySet, occIn=inCC, occOut=CCUnchanged, loopRegs=emptySet} block
             in
                 Array.update(declarationArray, blockNo, decs);
                 (* refs includes local declarations. Remove before adding to the result. *)
                 Array.update(importArray, blockNo, minus(refs, decs));
                 Array.update(localLoopRegArray, blockNo, loopRegs);
                 Array.update(stackDecArray, blockNo, sDecs);
                 Array.update(stackImportArray, blockNo, minus(sRefs, sDecs));
                 Array.update(resultCode, blockNo, SOME code);
                 Array.update(ccInStates, blockNo, occIn);
                 Array.update(ccOutStates, blockNo, occOut)
             end
         in
             val () = blockScan 0 (* Start with the root block. *)
         end
         
         (* Second phase - Propagate reference information between the blocks.
            We need to consider loops here.  Do a depth-first scan marking each
            block.  If we find a loop we save the import information we've used.
            If when we come to process that block we find the import information
            is different we need to reprocess. *)
         (* Pass through array - values used in other blocks after this that
            are not declared in this block. *)
         val passThroughArray = Array.array(vectorLength, emptySet)
         val stackPassThroughArray = Array.array(vectorLength, emptySet)
         (* Exports - those of our declarations that are used in other blocks. *)
         val exportArray = Array.array(vectorLength, emptySet)
         val stackExportArray = Array.array(vectorLength, emptySet)
         (* Loop registers.  This contains the registers that are not exported
            from or passed through this block but are used subsequently as
            loop registers. *)
         val loopRegArray = Array.array(vectorLength, emptySet)
         val () = Array.copy{src=localLoopRegArray, dst=loopRegArray, di=0}
         (* If any one of the successors requires the CC then this is set.
            Otherwise we leave it as Unused. *)
         val ccRequiredOut = Array.array(vectorLength, CCUnused)
         local
             datatype loopData =
                 Unprocessed | Processing | Processed
             |   Looped of { regSet: intSet, loopSet: intSet, stackSet: intSet, ccState: inCCState }
             
             fun reprocessLoop () =
             let
                 val reprocess = ref false
                 val loopArray = Array.array(vectorLength, Unprocessed)
             
                 fun processBlocks blockNo =
                     case Array.sub(loopArray, blockNo) of
                         Processed => (* Already seen this by a different route. *)
                             {
                                 regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)),
                                 stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)),
                                 ccState = Array.sub(ccInStates, blockNo),
                                 loopSet = Array.sub(loopRegArray, blockNo)
                             }
                     |   Looped s => s (* We've already seen this in a loop. *)
                     |   Processing => (* We have a loop. *)
                         let
                             (* Use the existing input array. *)
                             val inputs =
                             {
                                 regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)),
                                 stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)),
                                 ccState = Array.sub(ccInStates, blockNo),
                                 loopSet = Array.sub(loopRegArray, blockNo)
                             }
                             val () = Array.update(loopArray, blockNo, Looped inputs)
                         in
                             inputs
                         end
                     |   Unprocessed => (* Normal case - not visited yet. *)
                         let
                             val () = Array.update(loopArray, blockNo, Processing)
                             val thisBlock = Vector.sub(blockVector, blockNo)
                             val ourDeclarations = Array.sub(declarationArray, blockNo)
                             and ourStackDeclarations = Array.sub(stackDecArray, blockNo)
                             and ourLocalLoopRegs = Array.sub(localLoopRegArray, blockNo)
                             val successors = blockSuccessors thisBlock
 
                             fun addSuccessor b =
                             let
                                 val {regSet=theirImports, stackSet=theirStackImports, ccState=theirInState, loopSet=theirLoops} = processBlocks b
                                 (* Remove loop regs from the imports if they are actually given new
                                    values by this block.  We don't want to pass the old loop regs through here. *)
                                 val theirImports = minus(theirImports, ourLocalLoopRegs)
                                 (* Split the imports.  If a register is a local declaration then
                                    it becomes an export.  If it is not it becomes part of our
                                    passThrough. *)
                                 val (addToExp, addToImp) =
                                     IntSet.partition (fn i => member(i, ourDeclarations)) theirImports
                                 val (addToStackExp, addToStackImp) =
                                     IntSet.partition (fn i => member(i, ourStackDeclarations)) theirStackImports
                                 (* Merge the input states from each of the successors. *)
                                 val () =
                                     case (theirInState, Array.sub(ccRequiredOut, blockNo)) of
                                         (CCNeeded ts, CCNeeded req) =>
                                             if ts = req then () else raise InternalError "Mismatched states"
                                     |   (ts as CCNeeded _, _) => Array.update(ccRequiredOut, blockNo, ts)
                                     |   _ => ()
                                 (* Add loop registers to the set if they are not declared here.  The
                                    only place they are declared is at the entry to the loop so that
                                    stops them being propagated further. *)
                                 val addToLoops = minus(theirLoops, ourDeclarations)
                             in
                                 Array.update(exportArray, blockNo,
                                     union(Array.sub(exportArray, blockNo), addToExp));
                                 Array.update(passThroughArray, blockNo,
                                     union(Array.sub(passThroughArray, blockNo), addToImp));
                                 Array.update(stackExportArray, blockNo,
                                     union(Array.sub(stackExportArray, blockNo), addToStackExp));
                                 Array.update(stackPassThroughArray, blockNo,
                                     union(Array.sub(stackPassThroughArray, blockNo), addToStackImp));
                                 Array.update(loopRegArray, blockNo,
                                     union(Array.sub(loopRegArray, blockNo), addToLoops))
                             end
                             val () = List.app addSuccessor successors
                             val ourInputs =
                                 union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo))
                             val ourStackInputs =
                                 union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo))
                         in
                             (* Check that we supply the required state. *)
                             case (Array.sub(ccRequiredOut, blockNo), Array.sub(ccOutStates, blockNo)) of
                                 (CCNeeded ccReq, CCSet ccSet) =>
                                     if ccReq = ccSet then () else raise InternalError "Mismatched cc states"
                             |   (CCNeeded _, CCIndeterminate) => raise InternalError "Mismatched cc states"
                             |   (cc as CCNeeded needOut, CCUnchanged) =>
                                 (
                                     (* We pass through the state.  If we don't use the state then we
                                        need to set this as the input.  If we do use the state it must be
                                        the same. *)
                                     case Array.sub(ccInStates, blockNo) of
                                         CCUnused => Array.update(ccInStates, blockNo, cc)
                                     |   CCNeeded needIn =>
                                             if needOut = needIn then () else raise InternalError "Mismatched cc states"
                                 )
                             |   _ => ();
                             (* Was this block used in a loop?  If so we should not be requiring a CC. *)
                             case Array.sub(loopArray, blockNo) of
                                 Looped {regSet, stackSet, ...} =>
                                 (
                                     case Array.sub(ccInStates, blockNo) of
                                         CCNeeded _ => raise InternalError "Looped state needs cc" | _ => ();
                                     if setToList regSet = setToList ourInputs andalso
                                         setToList stackSet = setToList ourStackInputs
                                     then ()
                                     else reprocess := true
                                 )
                             |   _ => ();
                             Array.update(loopArray, blockNo, Processed);
                             { regSet = ourInputs, stackSet = ourStackInputs,
                               ccState = Array.sub(ccInStates, blockNo), loopSet=Array.sub(loopRegArray, blockNo)}
                         end
             in
                 reprocess := false;
                 processBlocks 0;
                 if !reprocess then reprocessLoop () else ()
             end
         in
             val () = reprocessLoop ()
         end
         
         (* Third pass - Build the result list with the active registers for each
            instruction.  We don't include registers in the passThrough set since
            they are active throughout the block. *)
         local
             (* Number of instrs for which this is active.  We use this to try to select a
                register to push to the stack if we have too many.  Registers that have
                only a short lifetime are less likely to be pushed than those that are
                active longer. *)
             val regActive = Array.array(maxPRegs, 0)
             fun addActivity n r = Array.update(regActive, r, Array.sub(regActive, r)+n)
             
             fun createResultInstrs (passThrough, stackPassThrough)
                 (instr, (tail, activeAfterThis, stackActiveAfterThis)) =
             let
                 val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ... } = getInstructionState instr
             in
                 (* Eliminate instructions if their results are not required.  The earlier check for this
                    will remove most cases but if we have duplicated a block we may have a register that
                    is required elsewhere but not in this particular branch.  *)
                 if not(List.exists(fn PReg d => member(d, activeAfterThis)) dests) andalso eliminateable instr
                 then (tail, activeAfterThis, stackActiveAfterThis)
                 else
                 let
                     fun regNo(PReg i) = i
                     fun stackNo(StackLoc{rno, ...}) = rno
                     val destRegNos = map regNo dests
                     and sourceRegNos = map regNo sources
                     val destSet = listToSet destRegNos
                     (* Remove any sources that are present in passThrough since
                        they are going to be active throughout the block. *)
                     and sourceSet = minus(listToSet sourceRegNos, passThrough)
                     val stackDestRegNos = map stackNo stackDests
                     and stackSourceRegNos = map stackNo stackSrcs
                     val stackDestSet = listToSet stackDestRegNos
                     and stackSourceSet = minus(listToSet stackSourceRegNos, stackPassThrough)
 
                     (* To compute the active set for the PREVIOUS instruction (we're processing from the
                        end back to the start) we remove any registers that have been given values in this
                        instruction and add anything that we are using in this instruction since they will
                        now need to have values. *)
                     val afterRemoveDests = minus(activeAfterThis, destSet)
                     val stackAfterRemoveDests = minus(stackActiveAfterThis, stackDestSet)
                     val activeForPrevious = union(sourceSet, afterRemoveDests)
                     val stackActiveForPrevious = union(stackSourceSet, stackAfterRemoveDests)
             
                     (* The "active" set is the set of registers that need to be active DURING the
                        instruction.  It includes destinations, which will usually be in
                        "activeAfterThis", because there may be destinations that are not actually used
                        subsequently but still need a register. *)
                     val activeForInstr =
                         case instr of
                             FunctionCall _ => sourceSet (* Is this still needed? *)
                         |   TailRecursiveCall _ =>
                                 (* Set the active set to the total set of registers we require including
                                    the work register.  This ensures that we will spill as many registers
                                    as we require when we look at the size of the active set. *)
                                 union(sourceSet, destSet)
                         |   BoxLarge _ =>
                                 (* We can only store the value in the box after the box is allocated. *)
                                 union(activeAfterThis, union(sourceSet, destSet))
                         |   BoxTagFloat _ =>
                                 (* Since the source must be a V register and the destination an
                                    X register there isn't actually a problem here, but do this
                                    anyway. *)
                                 union(activeAfterThis, union(sourceSet, destSet))
                         |   _ => union(activeAfterThis, destSet)
                 
                     val () = List.app(addActivity 1) (setToList activeForInstr)
 
                     local
                         (* If we are allocating memory we have to save the current registers if
                            they could contain an address.  We mustn't push untagged registers
                            and we mustn't push the destination. *)
                         fun getSaveSet includeReg =
                         let
                             val activeAfter = union(activeAfterThis, passThrough)
                             (* Remove any registers marked - must-not-push.  These are
                                registers holding non-address values.  They will actually
                                be saved by the RTS across any GC but not checked or
                                modified by the GC.
                                Exclude the result register. *)
                             fun getSave i =
                                 if includeReg i
                                 then case Vector.sub(pregProps, i) of
                                     RegPropGeneral => SOME(PReg i)
                                 |   RegPropCacheTagged => SOME(PReg i)
                                 |   RegPropUntagged => NONE
                                 |   RegPropStack _ => NONE
                                 |   RegPropCacheUntagged => NONE
                                 |   RegPropMultiple => raise InternalError "getSave: RegPropMultiple"
                                 else NONE
                         in
                             List.mapPartial getSave (setToList activeAfter)
                         end
                     in
                         (* Sometimes we need to modify the instruction e.g. to include the set
                            of registers to save. *)
                         val convertedInstr =
                             case instr of
                                 AllocateMemoryFixed{bytesRequired, dest, saveRegs=_} =>
                                     AllocateMemoryFixed{bytesRequired=bytesRequired, dest=dest,
                                         saveRegs=getSaveSet(fn i => i <> regNo dest)}
 
                             |   AllocateMemoryVariable{size, dest, saveRegs=_} =>
                                     AllocateMemoryVariable{size=size, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)}
 
                             |   BoxLarge{source, dest, saveRegs=_} =>
                                     BoxLarge{source=source, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)}
 
                             |   BoxTagFloat{source, dest, floatSize, saveRegs=_} =>
                                     BoxTagFloat{source=source, dest=dest, floatSize=floatSize,
                                         saveRegs=getSaveSet(fn i => i <> regNo dest)}
                        
                             |   JumpLoop{regArgs, stackArgs, checkInterrupt = SOME _, ...} =>
                                 let
                                     (* If we have to check for interrupts we must preserve registers across
                                        the RTS call. *)
                                     fun getSave i =
                                         case Vector.sub(pregProps, i) of
                                         RegPropGeneral => SOME(PReg i)
                                     |   RegPropCacheTagged => SOME(PReg i)
                                     |   RegPropUntagged => NONE
                                     |   RegPropStack _ => NONE
                                     |   RegPropCacheUntagged => NONE
                                     |   RegPropMultiple => raise InternalError "getSave: RegPropMultiple"
                                     val currentRegs = union(activeAfterThis, passThrough)
                                     (* Have to include the loop registers.  These were previously included
                                        automatically because they were part of the import set. *)
                                     val check = List.mapPartial getSave (map (regNo o #dst) regArgs @ setToList currentRegs)
                                 in
                                     JumpLoop{regArgs=regArgs, stackArgs=stackArgs, checkInterrupt=SOME check}
                                 end
 
                             |   FunctionCall{regArgs, stackArgs=[], dests, callKind as ConstantCode m,
                                              saveRegs=_, containers} =>
                                 (* If this is arbitrary precision push the registers rather than marking them as "save".
                                    stringOfWord returns 'CODE "PolyAddArbitrary"' etc. *)
                                 if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m))
                                 then
                                 let
                                     val destRegs = List.map (regNo o #1) dests
                                     fun includeInSave i = not(List.exists(fn r => r=i) destRegs)
                                 in
                                     FunctionCall{regArgs=regArgs, stackArgs=[], callKind=callKind, dests=dests,
                                         containers=containers, saveRegs=getSaveSet includeInSave}
                                 end
                                 else instr
 
                             |   _ => instr
                     end
                 
                     (* FunctionCall must mark all registers as "push". *)
                     local
                         fun pushRegisters () =
                         let
                             val activeAfter = union(activeAfterThis, passThrough)
                             fun pushAllButDests i =
                                 if List.exists(fn j => i=j) destRegNos
                                 then ()
                                 else case Vector.sub(pregProps, i) of
                                     RegPropCacheTagged => raise InternalError "pushRegisters: cache reg"
                                 |   RegPropCacheUntagged => raise InternalError "pushRegisters: cache reg"
                                 |  _ => Array.update(requirePushOrDiscard, i, true)
                         in
                             (* We need to push everything active after this
                                except the result register. *)
                             List.app pushAllButDests (setToList activeAfter)
                         end
                     in
                         val () =
                            case instr of
                                 FunctionCall{ stackArgs=[], callKind=ConstantCode m, ...} =>
                                 if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m))
                                 then ()
                                 else pushRegisters ()
                             
                             |   FunctionCall _ => pushRegisters ()
                             
                                 (* It should no longer be necessary to push across a handler but
                                    there still seem to be cases that need it. *)
                             (*|   BeginHandler _ => pushRegisters ()*)
 
                             |   _ => ()
                     end
                 
                     (* Which entries are active in this instruction but not afterwards? *)
                     val kill = union(minus(stackSourceSet, stackActiveAfterThis), minus(sourceSet, activeAfterThis))
                 in
                     ({instr=convertedInstr, active=activeForInstr, current=activeAfterThis, kill=kill} :: tail, activeForPrevious,
                      stackActiveForPrevious)
                 end
             end
 
             fun createResult blockNo =
             let
                 val BasicBlock{ flow, ...} = Vector.sub(blockVector, blockNo)
                 val declSet   = Array.sub(declarationArray, blockNo)
                 and importSet = Array.sub(importArray, blockNo)
                 and passSet   = Array.sub(passThroughArray, blockNo)
                 and loopSet   = Array.sub(loopRegArray, blockNo)
                 and exportSet = Array.sub(exportArray, blockNo)
                 and stackPassSet = Array.sub(stackPassThroughArray, blockNo)
                 and stackImportSet = Array.sub(stackImportArray, blockNo)
                 and stackExportSet = Array.sub(stackExportArray, blockNo)
                 val filteredCode = getOpt(Array.sub(resultCode, blockNo), [])
                 (* At the end of the block we should have the exports active. *)
                 val (resultInstrs, _, _) = List.foldr (createResultInstrs (passSet, stackPassSet)) ([], exportSet, stackExportSet) filteredCode
                 (* Set the active count for the pass through. *)
                 val instrCount = List.length filteredCode
                 val () = List.app(addActivity instrCount) (setToList passSet)
                 val inCCState =
                     case Array.sub(ccInStates, blockNo) of CCNeeded s => SOME s | CCUnused => NONE
                 val outCCState =
                     case Array.sub(ccRequiredOut, blockNo) of CCNeeded s => SOME s | CCUnused => NONE
             in
                 ExtendedBasicBlock {
                     block = resultInstrs,
                     flow=flow,
                     locals = minus(declSet, exportSet),
                     imports = importSet,
                     exports = exportSet,
                     passThrough = passSet,
                     loopRegs = loopSet,
                     initialStacks = union(stackPassSet, stackImportSet),
                     inCCState = inCCState,
                     outCCState = outCCState
                 }
             end
         in
             val resultBlocks = Vector.tabulate(vectorLength, createResult)
             val regActive = regActive
         end
         
         val registerState: regState vector =
             Vector.tabulate(maxPRegs,
                 fn i => {
                     active = Array.sub(regActive, i),
                     refs = Array.sub(regRefs, i),
                     pushState = Array.sub(requirePushOrDiscard, i),
                     prop = Vector.sub(pregProps, i)
                 }
             )
     in
         (resultBlocks, registerState)
     end
     (* Exported function.  First filter out unreferenced blocks then process the
        registers themselves. *)
     fun identifyRegisters(blockVector, pregProps) =
     let
         val vectorLength = Vector.length blockVector
         val mapArray = Array.array(vectorLength, NONE)
         and resArray = Array.array(vectorLength, NONE)
         val count = ref 0
         
         fun setReferences label =
             case Array.sub(mapArray, label) of
                 NONE => (* Not yet visited *)
                 let
                     val BasicBlock{flow, block} = Vector.sub(blockVector, label)
                     (* Create a new entry for it. *)
                     val newLabel = ! count before count := !count + 1
                     (* Add it to the map.  Any other references will use this
                        without reprocessing. *)
                     val () = Array.update(mapArray, label, SOME newLabel)
                     val newFlow =
                         case flow of
                             Unconditional l => Unconditional(setReferences l)
                         |   Conditional{trueJump, falseJump, ccRef, condition} =>
                                 Conditional{trueJump=setReferences trueJump, falseJump=setReferences falseJump,
                                         ccRef=ccRef, condition=condition}
                         |   ExitCode => ExitCode
                         |   IndexedBr list => IndexedBr(map setReferences list)
                         |   SetHandler{handler, continue} =>
                                 SetHandler{handler=setReferences handler, continue=setReferences continue}
                         |   UnconditionalHandle l => UnconditionalHandle(setReferences l)
                         |   ConditionalHandle{handler, continue} =>
                                 ConditionalHandle{handler=setReferences handler, continue=setReferences continue}
 
                     val () = Array.update(resArray, newLabel, SOME(BasicBlock{flow=newFlow, block=block}))
                 in
                     newLabel
                 end
             |   SOME lab => lab
        
         val _ = setReferences 0
         
         val newBlockVector =
             Vector.tabulate(!count, fn i => valOf(Array.sub(resArray, i)))
     in
         identifyRegs(newBlockVector, pregProps)
     end
 
     (* Exported for use in GetConflictSets *)
     fun getInstructionRegisters instr =
     let
         val {sources, dests, ...} = getInstructionState instr
     in
         {sources=sources, dests=dests}
     end
     
     (* Exported for use in ICodeOptimise *)
     val getInstructionCC = #ccOut o getInstructionState
 
     structure Sharing =
     struct
         type ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode
         and preg                = preg
         and pregOrZero          = pregOrZero
         and intSet              = intSet
         and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock
         and extendedBasicBlock  = extendedBasicBlock
         and controlFlow         = controlFlow
         and regProperty         = regProperty
         and ccRef               = ccRef
         and outCCState          = outCCState
     end
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML
index ae35a130..1b991cab 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML
@@ -1,1073 +1,1077 @@
 (*
     Copyright (c) 2021-2 David C. J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     Licence version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public Licence for more details.
     
     You should have received a copy of the GNU Lesser General Public
     Licence along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *)
 
 functor Arm64PreAssembly(
 
     structure Arm64Assembly: ARM64ASSEMBLY
     structure Debug: DEBUG
     structure Pretty: PRETTY
 
 ): ARM64PREASSEMBLY =
 struct
     open Arm64Assembly
     
     exception InternalError = Misc.InternalError
 
     (* Reversed cons and append to make the code easier to read. *)
     infix 5 <::> <@>
     fun tl <::> hd = hd :: tl
     and snd <@> fst = fst @ snd
 
     (* Many of the datatypes are inherited from Arm64Assembly *)
 
     datatype loadType = Load64 | Load32 | Load16 | Load8
     and opSize = OpSize32 | OpSize64
     and logicalOp = LogAnd | LogOr | LogXor
     and floatSize = Float32 | Double64
     and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic
     and multKind =
         MultAdd32 | MultSub32 | MultAdd64 | MultSub64 |
         SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) |
         SignedMultHigh (* High order part of 64bit*64Bit *)
     and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat
     and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP
     and unscaledType = NoUpdate | PreIndex | PostIndex
     and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate
     and bitfieldKind = BFUnsigned | BFSigned | BFInsert
     and brRegType = BRRBranch | BRRAndLink | BRRReturn
 
     datatype label = Label of int
     type labelMaker = int ref
     fun createLabelMaker() = ref 0
     fun createLabel(r as ref n) = Label n before r := n+1
 
     datatype precode =
         (* Basic instructions *)
         AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool}
     |   SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool}
     |   AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool}
     |   SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool}
     |   AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool}
     |   SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool}
     |   MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind}
     |   DivideRegs of
             {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize}
     |   LogicalShiftedReg of
             {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool}
     |   LoadRegScaled of
             {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType}
     |   LoadFPRegScaled of
             {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize}
     |   StoreRegScaled of
             {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType}
     |   StoreFPRegScaled of
             {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize}
     |   LoadRegUnscaled of
             {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   StoreRegUnscaled of
             {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   LoadFPRegUnscaled of
             {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   StoreFPRegUnscaled of
             {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend}
     |   StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend}
     |   LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend}
     |   StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend}
         (* LoadAcquire and StoreRelease are used for mutables. *)
     |   LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType}
     |   StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType}
         (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *)
     |   LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg}
     |   StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg}
     |   MemBarrier
     |   LoadRegPair of
             { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   StoreRegPair of
             { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   LoadFPRegPair of
             { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   StoreFPRegPair of
             { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   ConditionalSet of
             {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize}
     |   BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind}
     |   ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection}
     |   BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp}
         (* Floating point *)
     |   MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize}
     |   MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize}
     |   CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize}
     |   CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize}
     |   FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary}
     |   FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize}
     |   FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary}
         (* Branches and Labels. *)
     |   SetLabel of label
     |   ConditionalBranch of condition * label
     |   UnconditionalBranch of label
     |   BranchAndLink of label
     |   BranchReg of {regD: xReg, brRegType: brRegType }
     |   LoadLabelAddress of xReg * label
     |   TestBitBranch of { test: xReg, bit: Word8.word, label: label, onZero: bool }
     |   CompareBranch of { test: xReg, label: label, onZero: bool, opSize: opSize }
         (* Composite instructions *)
     |   MoveXRegToXReg of {sReg: xReg, dReg: xReg}
     |   LoadNonAddr of xReg * Word64.word
     |   LoadAddr of xReg * machineWord
     |   RTSTrap of { rtsEntry: int, work: xReg, save: xReg list }
     |   AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg }
     |   AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg }
         (* Branch table for indexed case. startLabel is the address of the first label in
            the list.  The branch table is a sequence of unconditional branches. *)
     |   BranchTable of { startLabel: label, brTable: label list }
     |   LoadGlobalHeapBaseInCallback of xReg
+    |   Yield
 
 
     (* Optimise the pre-assembler code and then generate the final code. *)
     fun generateFinalCode {instrs, name, parameters, resultClosure, profileObject, labelMaker=ref labelCount} =
     let
         val labelTargets = Array.tabulate(labelCount, fn i => (Arm64Assembly.createLabel(), i) )
 
         (* Follow the chain of forwarded labels. *)
         local
             fun forwardLab(labelNo, labels) =
             let
                 val dest as (_, dNo) = Array.sub(labelTargets, labelNo)
             in
                 if dNo = labelNo
                 then dest
                 (* This should not happen but just in case... *)
                 else if List.exists(fn i => i = dNo) labels
                 then raise InternalError "Infinite loop"
                 else forwardLab(dNo, dNo::labels)
             end
         in
             fun getLabel labelNo = forwardLab(labelNo, [labelNo])
             val getLabelTarget = #1 o getLabel
         end                
 
         fun toAssembler([], code) = code
 
         |   toAssembler(AddImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => addImmediate
                     |   (OpSize32, false) => addImmediate32
                     |   (OpSize64, true) => addSImmediate
                     |   (OpSize32, true) => addSImmediate32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted})
             end
 
         |   toAssembler(SubImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => subImmediate
                     |   (OpSize32, false) => subImmediate32
                     |   (OpSize64, true) => subSImmediate
                     |   (OpSize32, true) => subSImmediate32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted})
             end
 
         |   toAssembler(AddShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => addShiftedReg
                     |   (OpSize32, false) => addShiftedReg32
                     |   (OpSize64, true) => addSShiftedReg
                     |   (OpSize32, true) => addSShiftedReg32
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift})
             end
 
         |   toAssembler(SubShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => subShiftedReg
                     |   (OpSize32, false) => subShiftedReg32
                     |   (OpSize64, true) => subSShiftedReg
                     |   (OpSize32, true) => subSShiftedReg32
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift})
             end
 
         |   toAssembler(AddExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) =
             (* Add/SubExtended are only used to access XSP. *)
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => addExtendedReg
                     |   (OpSize32, false) => raise InternalError "AddExtendedReg; 32"
                     |   (OpSize64, true) => addSExtendedReg
                     |   (OpSize32, true) => raise InternalError "AddExtendedReg; 32"
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend})
             end
 
         |   toAssembler(SubExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => subExtendedReg
                     |   (OpSize32, false) => raise InternalError "AddExtendedReg; 32"
                     |   (OpSize64, true) => subSExtendedReg
                     |   (OpSize32, true) => raise InternalError "AddExtendedReg; 32"
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend})
             end
 
         |   toAssembler(MultiplyAndAddSub{regM, regN, regA, regD, multKind} :: rest, code) =
             let
                 val instr =
                     case multKind of
                         MultAdd32 => multiplyAndAdd32{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   MultSub32 => multiplyAndSub32{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   MultAdd64 => multiplyAndAdd{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   MultSub64 => multiplyAndSub{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   SignedMultAddLong => signedMultiplyAndAddLong{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   SignedMultHigh => signedMultiplyHigh{regM=regM, regN=regN, regD=regD}
             in
                 toAssembler(rest, code <::> instr)
             end
 
         |   toAssembler(DivideRegs{regM, regN, regD, isSigned, opSize} :: rest, code) =
             let
                 val instr =
                     case (isSigned, opSize) of
                         (true, OpSize64) => signedDivide
                     |   (true, OpSize32) => signedDivide32
                     |   (false, OpSize64) => unsignedDivide
                     |   (false, OpSize32) => unsignedDivide32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD})
             end
 
         |   toAssembler(LogicalShiftedReg{regM, regN, regD, shift, logOp, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (logOp, setFlags, opSize) of
                         (LogAnd, false, OpSize64) => andShiftedReg
                     |   (LogAnd, true, OpSize64) => andsShiftedReg
                     |   (LogOr, false, OpSize64) => orrShiftedReg
                     |   (LogXor, false, OpSize64) => eorShiftedReg
 
                     |   (LogAnd, false, OpSize32) => andShiftedReg32
                     |   (LogAnd, true, OpSize32) => andsShiftedReg32
                     |   (LogOr, false, OpSize32) => orrShiftedReg32
                     |   (LogXor, false, OpSize32) => eorShiftedReg32
 
                     |   _ => raise InternalError "setFlags not valid with OR or XOR"
                 (* There are also versions of AND/OR/XOR which operate on a complement (NOT)
                    of the shifted register.  It's probably not worth looking for a use for them. *)
             in
                 toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD, shift=shift})
             end
 
         |   toAssembler(LoadRegScaled{regT, regN, unitOffset, loadType} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => loadRegScaled
                     |   Load32 => loadRegScaled32
                     |   Load16 => loadRegScaled16
                     |   Load8 => loadRegScaledByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreRegScaled{regT, regN, unitOffset, loadType} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => storeRegScaled
                     |   Load32 => storeRegScaled32
                     |   Load16 => storeRegScaled16
                     |   Load8 => storeRegScaledByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(LoadFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => loadRegScaledFloat
                     |   Double64 => loadRegScaledDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => storeRegScaledFloat
                     |   Double64 => storeRegScaledDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(LoadRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => loadRegUnscaled
                     |   (Load32, NoUpdate) => loadRegUnscaled32
                     |   (Load16, NoUpdate) => loadRegUnscaled16
                     |   (Load8, NoUpdate) => loadRegUnscaledByte
                     |   (Load64, PreIndex) => loadRegPreIndex
                     |   (Load32, PreIndex) => loadRegPreIndex32
                     |   (Load16, PreIndex) => raise InternalError "loadRegPreIndex16"
                     |   (Load8, PreIndex) => loadRegPreIndexByte
                     |   (Load64, PostIndex) => loadRegPostIndex
                     |   (Load32, PostIndex) => loadRegPostIndex32
                     |   (Load16, PostIndex) => raise InternalError "loadRegPostIndex16"
                     |   (Load8, PostIndex) => loadRegPostIndexByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(LoadFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (floatSize, unscaledType) of
                         (Float32, NoUpdate) => loadRegUnscaledFloat
                     |   (Double64, NoUpdate) => loadRegUnscaledDouble
                     |   _ => raise InternalError "LoadFPRegUnscaled: pre/post indexed"
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(StoreRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => storeRegUnscaled
                     |   (Load32, NoUpdate) => storeRegUnscaled32
                     |   (Load16, NoUpdate) => storeRegUnscaled16
                     |   (Load8, NoUpdate) => storeRegUnscaledByte
                     |   (Load64, PreIndex) => storeRegPreIndex
                     |   (Load32, PreIndex) => storeRegPreIndex32
                     |   (Load16, PreIndex) => raise InternalError "storeRegPreIndex16"
                     |   (Load8, PreIndex) => storeRegPreIndexByte
                     |   (Load64, PostIndex) => storeRegPostIndex
                     |   (Load32, PostIndex) => storeRegPostIndex32
                     |   (Load16, PostIndex) => raise InternalError "storeRegPostIndex16"
                     |   (Load8, PostIndex) => storeRegPostIndexByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(StoreFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (floatSize, unscaledType) of
                         (Float32, NoUpdate) => storeRegUnscaledFloat
                     |   (Double64, NoUpdate) => storeRegUnscaledDouble
                     |   _ => raise InternalError "StoreFPRegUnscaled: pre/post indexed"
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(LoadRegIndexed{regT, regN, regM, loadType, option} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => loadRegIndexed
                     |   Load32 => loadRegIndexed32
                     |   Load16 => loadRegIndexed16
                     |   Load8 => loadRegIndexedByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(StoreRegIndexed{regT, regN, regM, loadType, option} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => storeRegIndexed
                     |   Load32 => storeRegIndexed32
                     |   Load16 => storeRegIndexed16
                     |   Load8 => storeRegIndexedByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(LoadFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => loadRegIndexedFloat
                     |   Double64 => loadRegIndexedDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(StoreFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => storeRegIndexedFloat
                     |   Double64 => storeRegIndexedDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(LoadAcquireReg{regN, regT, loadType} :: rest, code) =
             let
                 val loadInstr  =
                     case loadType of
                         Load64 => loadAcquire
                     |   Load32 => loadAcquire32
                     |   Load8 => loadAcquireByte
                     |   _ => raise InternalError "LoadAcquire: Unsupported size" (* Not used *)
             in
                 toAssembler(rest, code <::> loadInstr{regT=regT, regN=regN})
             end
 
         |   toAssembler(StoreReleaseReg{regN, regT, loadType} :: rest, code) =
             let
                 val storeInstr  =
                     case loadType of
                         Load64 => storeRelease
                     |   Load32 => storeRelease32
                     |   Load8 => storeReleaseByte
                     |   _ => raise InternalError "StoreRelease: Unsupported size" (* Not used *)
             in
                 toAssembler(rest, code <::> storeInstr{regT=regT, regN=regN})
             end
 
         |   toAssembler(LoadAcquireExclusiveRegister{regN, regT} :: rest, code) =
                 toAssembler(rest, code <::> loadAcquireExclusiveRegister{regN=regN, regT=regT})
 
         |   toAssembler(StoreReleaseExclusiveRegister{regN, regT, regS} :: rest, code) =
                 toAssembler(rest, code <::> storeReleaseExclusiveRegister{regN=regN, regT=regT, regS=regS})
 
         |   toAssembler(MemBarrier :: rest, code) =
                 toAssembler(rest, code <::> dmbIsh)
 
         |   toAssembler(LoadRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) =
             let
                 val _ = regT1 <> regT2 orelse raise InternalError "LoadRegPair: same register"
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => loadPairOffset
                     |   (Load64, PreIndex) => loadPairPreIndexed
                     |   (Load64, PostIndex) => loadPairPostIndexed
                     |   (Load32, NoUpdate) => loadPairOffset32
                     |   (Load32, PreIndex) => loadPairPreIndexed32
                     |   (Load32, PostIndex) => loadPairPostIndexed32
                     |   _ => raise InternalError "LoadRegPair: unimplemented"
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => storePairOffset
                     |   (Load64, PreIndex) => storePairPreIndexed
                     |   (Load64, PostIndex) => storePairPostIndexed
                     |   (Load32, NoUpdate) => storePairOffset32
                     |   (Load32, PreIndex) => storePairPreIndexed32
                     |   (Load32, PostIndex) => storePairPostIndexed32
                     |   _ => raise InternalError "StoreRegPair: unimplemented"
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(LoadFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val _ = regT1 <> regT2 orelse raise InternalError "LoadRegPair: same register"
                 val instr =
                     case (floatSize, unscaledType) of
                         (Double64, NoUpdate) => loadPairOffsetDouble
                     |   (Double64, PreIndex) => loadPairPreIndexedDouble
                     |   (Double64, PostIndex) => loadPairPostIndexedDouble
                     |   (Float32, NoUpdate) => loadPairOffsetFloat
                     |   (Float32, PreIndex) => loadPairPreIndexedFloat
                     |   (Float32, PostIndex) => loadPairPostIndexedFloat
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (floatSize, unscaledType) of
                         (Double64, NoUpdate) => storePairOffsetDouble
                     |   (Double64, PreIndex) => storePairPreIndexedDouble
                     |   (Double64, PostIndex) => storePairPostIndexedDouble
                     |   (Float32, NoUpdate) => storePairOffsetFloat
                     |   (Float32, PreIndex) => storePairPreIndexedFloat
                     |   (Float32, PostIndex) => storePairPostIndexedFloat
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(ConditionalSet{regD, regTrue, regFalse, cond, condSet, opSize} :: rest, code) =
             let
                 val instr =
                     case (condSet, opSize) of
                         (CondSet, OpSize64) => conditionalSet
                     |   (CondSetIncr, OpSize64) => conditionalSetIncrement
                     |   (CondSetInvert, OpSize64) => conditionalSetInverted
                     |   (CondSetNegate, OpSize64) => conditionalSetNegated
                     |   (CondSet, OpSize32) => conditionalSet32
                     |   (CondSetIncr, OpSize32) => conditionalSetIncrement32
                     |   (CondSetInvert, OpSize32) => conditionalSetInverted32
                     |   (CondSetNegate, OpSize32) => conditionalSetNegated32
             in
                 toAssembler(rest, code <::> instr{regD=regD, regTrue=regTrue, regFalse=regFalse, cond=cond})
             end
 
         |   toAssembler(BitField{immr, imms, regN, regD, opSize, bitfieldKind} :: rest, code) =
             let
                 val bfInstr =
                     case (bitfieldKind, opSize) of
                         (BFSigned, OpSize64) => signedBitfieldMove64
                     |   (BFUnsigned, OpSize64) => unsignedBitfieldMove64
                     |   (BFInsert, OpSize64) => bitfieldMove64
                     |   (BFSigned, OpSize32) => signedBitfieldMove32
                     |   (BFUnsigned, OpSize32) => unsignedBitfieldMove32
                     |   (BFInsert, OpSize32) => bitfieldMove32
             in
                 toAssembler(rest, code <::> bfInstr{immr=immr, imms=imms, regN=regN, regD=regD})
             end
 
         |   toAssembler(ShiftRegisterVariable{regM, regN, regD, opSize, shiftDirection} :: rest, code) =
             let
                 val instr =
                     case (shiftDirection, opSize) of
                         (ShiftLeft, OpSize64) => logicalShiftLeftVariable
                     |   (ShiftLeft, OpSize32) => logicalShiftLeftVariable32
                     |   (ShiftRightLogical, OpSize64) => logicalShiftRightVariable
                     |   (ShiftRightLogical, OpSize32) => logicalShiftRightVariable32
                     |   (ShiftRightArithmetic, OpSize64) => arithmeticShiftRightVariable
                     |   (ShiftRightArithmetic, OpSize32) => arithmeticShiftRightVariable32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD})
             end
 
         |   toAssembler(BitwiseLogical{ bits, regN, regD, opSize, setFlags, logOp} :: rest, code) =
             let
                 val instr =
                     case (logOp, setFlags, opSize) of
                         (LogAnd, false, OpSize64) => bitwiseAndImmediate
                     |   (LogAnd, true, OpSize64) => bitwiseAndSImmediate
                     |   (LogOr, false, OpSize64) => bitwiseOrImmediate
                     |   (LogXor, false, OpSize64) => bitwiseXorImmediate
 
                     |   (LogAnd, false, OpSize32) => bitwiseAndImmediate32
                     |   (LogAnd, true, OpSize32) => bitwiseAndSImmediate32
                     |   (LogOr, false, OpSize32) => bitwiseOrImmediate32
                     |   (LogXor, false, OpSize32) => bitwiseXorImmediate32
 
                     |   _ => raise InternalError "flags not valid with OR or XOR"
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD, bits=bits})
             end
 
         |   toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Float32} :: rest, code) =
                 toAssembler(rest, code <::> moveGeneralToFloat{regN=regN, regD=regD})
         |   toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Double64} :: rest, code) =
                 toAssembler(rest, code <::> moveGeneralToDouble{regN=regN, regD=regD})
 
         |   toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Float32} :: rest, code) =
                 toAssembler(rest, code <::> moveFloatToGeneral{regN=regN, regD=regD})
         |   toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Double64} :: rest, code) =
                 toAssembler(rest, code <::> moveDoubleToGeneral{regN=regN, regD=regD})
 
         |   toAssembler(CvtIntToFP{ regN, regD, floatSize, opSize} :: rest, code) =
             let
                 val instr =
                     case (opSize, floatSize) of
                         (OpSize32, Float32) => convertInt32ToFloat
                     |   (OpSize64, Float32) => convertIntToFloat
                     |   (OpSize32, Double64) => convertInt32ToDouble
                     |   (OpSize64, Double64) => convertIntToDouble
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD})
             end
 
         |   toAssembler(CvtFloatToInt{ round, regN, regD, floatSize, opSize} :: rest, code) =
             let
                 val instr =
                     case (floatSize, opSize) of
                         (Float32, OpSize32) => convertFloatToInt32
                     |   (Float32, OpSize64) => convertFloatToInt
                     |   (Double64, OpSize32) => convertDoubleToInt32
                     |   (Double64, OpSize64) => convertDoubleToInt
             in
                 toAssembler(rest, code <::> instr round {regN=regN, regD=regD})
             end
 
         |   toAssembler(FPBinaryOp{ regM, regN, regD, floatSize, fpOp} :: rest, code) =
             let
                 val instr =
                     case (fpOp, floatSize) of
                         (MultiplyFP, Float32) => multiplyFloat
                     |   (DivideFP, Float32) => divideFloat
                     |   (AddFP, Float32) => addFloat
                     |   (SubtractFP, Float32) => subtractFloat
                     |   (MultiplyFP, Double64) => multiplyDouble
                     |   (DivideFP, Double64) => divideDouble
                     |   (AddFP, Double64) => addDouble
                     |   (SubtractFP, Double64) => subtractDouble
             in
                 toAssembler(rest, code <::> instr {regN=regN, regM=regM, regD=regD})
             end
 
         |   toAssembler(FPComparison{ regM, regN, floatSize} :: rest, code) =
                 toAssembler(rest, code <::> (case floatSize of Float32 => compareFloat | Double64 => compareDouble){regN=regN, regM=regM})
 
         |   toAssembler(FPUnaryOp{ regN, regD, fpOp} :: rest, code) =
             let
                 val instr =
                     case fpOp of
                         NegFloat => negFloat | NegDouble => negDouble
                     |   AbsFloat => absFloat | AbsDouble => absDouble
                     |   ConvFloatToDble => convertFloatToDouble
                     |   ConvDbleToFloat => convertDoubleToFloat
             in
                 toAssembler(rest, code <::> instr {regN=regN, regD=regD})
             end
 
         |   toAssembler(SetLabel(Label lab) :: rest, code) = toAssembler(rest, code <::> setLabel(getLabelTarget lab))
 
         |   toAssembler(ConditionalBranch(cond, Label lab) :: rest, code) = toAssembler(rest, code <::> conditionalBranch(cond, getLabelTarget lab))
 
         |   toAssembler(UnconditionalBranch(Label lab) :: rest, code) = toAssembler(rest, code <::> unconditionalBranch(getLabelTarget lab))
 
         |   toAssembler(BranchAndLink(Label lab) :: rest, code) = toAssembler(rest, code <::> branchAndLink(getLabelTarget lab))
 
         |   toAssembler(BranchReg{regD, brRegType=BRRBranch} :: rest, code) = toAssembler(rest, code <::> branchRegister regD)
         |   toAssembler(BranchReg{regD, brRegType=BRRAndLink} :: rest, code) = toAssembler(rest, code <::> branchAndLinkReg regD)
         |   toAssembler(BranchReg{regD, brRegType=BRRReturn} :: rest, code) = toAssembler(rest, code <::> returnRegister regD)
 
         |   toAssembler(LoadLabelAddress(reg, Label lab) :: rest, code) = toAssembler(rest, code <::> loadLabelAddress(reg, getLabelTarget lab))
 
         |   toAssembler(TestBitBranch{ test, bit, label=Label lab, onZero } :: rest, code) =
                 toAssembler(rest, code <::> (if onZero then testBitBranchZero else testBitBranchNonZero)(test, bit, getLabelTarget lab))
 
         |   toAssembler(CompareBranch{ test, label=Label lab, onZero, opSize } :: rest, code) =
             let
                 val instr =
                     case (onZero, opSize) of
                         (true, OpSize64) => compareBranchZero
                     |   (false, OpSize64) => compareBranchNonZero
                     |   (true, OpSize32) => compareBranchZero32
                     |   (false, OpSize32) => compareBranchNonZero32
             in
                 toAssembler(rest, code <::> instr(test, getLabelTarget lab))
             end
 
             (* Register-register moves - special case for XSP. *)
         |   toAssembler(MoveXRegToXReg{sReg=XSP, dReg} :: rest, code) =
                 toAssembler(rest, code <::> addImmediate{regN=XSP, regD=dReg, immed=0w0, shifted=false})
         |   toAssembler(MoveXRegToXReg{sReg, dReg=XSP} :: rest, code) =
                 toAssembler(rest, code <::> addImmediate{regN=sReg, regD=XSP, immed=0w0, shifted=false})
         |   toAssembler(MoveXRegToXReg{sReg, dReg} :: rest, code) =
                 toAssembler(rest, code <::> orrShiftedReg{regN=XZero, regM=sReg, regD=dReg, shift=ShiftNone})
 
         |   toAssembler(LoadNonAddr(xReg, value) :: rest, code) =
             let
                 (* Load a non-address constant.  Tries to use movz/movn/movk if
                    that can be done easily, othewise uses loadNonAddressConstant to
                    load the value from the non-address constant area. *)
                 fun extW (v, h) = Word.andb(Word.fromLarge(LargeWord.>>(Word64.toLarge v, h*0w16)), 0wxffff)
                 val hw0 = extW(value, 0w3) and hw1 = extW(value, 0w2)
                 and hw2 = extW(value, 0w1) and hw3 = extW(value, 0w0)
                 val nextCode =
                     if value < 0wx100000000
                     then
                     let
                         (* 32-bit constants can be loaded using at most a movz and movk but
                            various cases can be reduced since all 32-bit operations set
                            the top word to zero. *)
                         val hi = hw2
                         and lo = hw3
                     in
                         (* 32-bit constants can be loaded with at most a movz and a movk but
                            it may be that there is something shorter. *)
                         if hi = 0w0
                         then code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0}
                         else if hi = 0wxffff
                         then code <::> moveNot32{regD=xReg, immediate=Word.xorb(0wxffff, lo), shift=0w0}
                         else if lo = 0w0
                         then code <::> moveZero32{regD=xReg, immediate=hi, shift=0w16}
                         else if isEncodableBitPattern(value, WordSize32)
                         then code <::> bitwiseOrImmediate32{bits=value, regN=XZero, regD=xReg}
                         else (* Have to use two instructions *)
                             code <::>
                                 moveZero32{regD=xReg, immediate=lo, shift=0w0} <::>
                                 moveKeep{regD=xReg, immediate=hi, shift=0w16}
                     end
                     else if hw0 = 0wxffff andalso hw1 = 0wxffff andalso hw2 = 0wxffff
                     then code <::> moveNot{regD=xReg, immediate=Word.xorb(0wxffff, hw3), shift=0w0}
                     else if hw1 = 0w0 andalso hw2 = 0w0
                     then (* This is common for length words with a flags byte *)
                         code <::> moveZero32{regD=xReg, immediate=hw3, shift=0w0} <::>
                             moveKeep{regD=xReg, immediate=hw0, shift=0w48} 
                     else code <::> loadNonAddressConstant(xReg, value)
             in
                 toAssembler(rest, nextCode)
             end
 
         |   toAssembler(LoadAddr(dReg, source) :: rest, code) = toAssembler(rest, loadAddressConstant(dReg, source) :: code)
 
         |   toAssembler(RTSTrap{ rtsEntry, work, save } :: rest, code) =
             let
                 (* Because X30 is used in the branchAndLink it has to be pushed
                    across any trap. *)
                 val saveX30 = List.exists (fn r => r = X30) save
                 val preserve = List.filter (fn r => r <> X30) save
             in
                 toAssembler(rest,
                     code <@>
                         (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::>
                     loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=rtsEntry} <::>
                     branchAndLinkReg work <::>
                     registerMask preserve <@>
                     (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else [])
                 )
             end
 
         |   toAssembler(AllocateMemoryFixedSize{ bytes, dest, save, work } :: rest, code) =
             let
                 val label = Arm64Assembly.createLabel()
                 val saveX30 = List.exists (fn r => r = X30) save
                 val preserve = List.filter (fn r => r <> X30) save
 
                 val allocCode =
                     code <@>
                     (* Subtract the number of bytes required from the heap pointer. *)
                     (if bytes >= 0w4096
                     then [subShiftedReg{regM=work, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftNone},
                           loadNonAddressConstant(work, Word.toLarge bytes)]
                     else [subImmediate{regN=X_MLHeapAllocPtr, regD=dest, immed=bytes, shifted=false}]) <::>
                     (* Compare the result with the heap limit. *)
                     subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::>
                     conditionalBranch(CondCarrySet, label) <@>
                         (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::>
                     loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::>
                     branchAndLinkReg work <::>
                     registerMask preserve <@>
                     (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::>
                     setLabel label <::>
                     (* Update the heap pointer. *)
                     orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone}
             in
                 toAssembler(rest, allocCode)
             end
 
         |   toAssembler(AllocateMemoryVariableSize{ sizeReg, dest, save, work } :: rest, code) =
             let
                 val trapLabel = Arm64Assembly.createLabel() and noTrapLabel = Arm64Assembly.createLabel()
                 val saveX30 = List.exists (fn r => r = X30) save
                 val preserve = List.filter (fn r => r <> X30) save
             
                 val allocCode =
                     (
                         (* Subtract the size into the result register.  Subtract a further word for
                            the length word and round down in 32-in-64. *)
                         if is32in64
                         then code <::>
                             subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w2} <::>
                             subImmediate{regN=dest, regD=dest, immed=0w4, shifted=false} <::>
                             bitwiseAndImmediate{bits= ~ 0w8, regN=dest, regD=dest}
                         else code <::>
                             subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w3} <::>
                             subImmediate{regN=dest, regD=dest, immed=0w8, shifted=false}
                     ) <::>
                     (* Check against the limit. If the size is large enough it is possible that this could wrap round. 
                        To check for that we trap if either the result is less than the limit or if it is
                        now greater than the allocation pointer. *)
                     subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::>
                     conditionalBranch(CondCarryClear, trapLabel) <::>
                     subSShiftedReg{regM=X_MLHeapAllocPtr, regN=dest, regD=XZero, shift=ShiftNone} <::>
                     conditionalBranch(CondCarryClear, noTrapLabel) <::>
                     setLabel trapLabel <@>
                         (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::>
                     loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::>
                     branchAndLinkReg work <::>
                     registerMask preserve <@>
                     (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::>
                     setLabel noTrapLabel <::>
                     (* Update the heap pointer. *)
                     orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone}
             in
                 toAssembler(rest, allocCode)
             end
 
         |   toAssembler(BranchTable{ startLabel=Label lab, brTable } :: rest, code) =
                 toAssembler(rest,
                     List.foldl (fn (Label lab, code) => (unconditionalBranch(getLabelTarget lab)) :: code)
                         (code <::> setLabel(getLabelTarget lab)) brTable)
 
         |   toAssembler(LoadGlobalHeapBaseInCallback dest :: rest, code) =
                 toAssembler(rest,
                     code <@> List.rev(loadGlobalHeapBaseInCallback dest))
 
+        |   toAssembler(Yield :: rest, code) =
+                toAssembler(rest, code <::> yield)
+
         (* Optimisation passes. *)
         fun isValidForPair(offset1, offset2) =
             let val v = Int.min(offset1, offset2) in v >= ~64 andalso v < 64 end
 
         fun forward([], list, rep) = reverse(list, [], rep)
 
         |   forward(SetLabel(Label srcLab) :: (ubr as UnconditionalBranch(Label destLab)) :: tl, list, _) =
             if srcLab = destLab
             (* We should never get this because there should always be a stack-check to
                allow a loop to be broken.  If that ever changes we need to retain the label. *)
             then raise InternalError "Infinite loop detected"
             else (* Mark this to forward to its destination. *)
             (
                 Array.update(labelTargets, srcLab, getLabel destLab);
                 forward(ubr :: tl, list, true)
             )
 
         |   forward(SetLabel(Label jmpLab1) :: (tl as SetLabel(Label jmpLab2) :: _), list, _) =
             (* Eliminate adjacent labels.  They complicate the other tests although they
                don't incur any run-time cost. *)
             (
                 (* Any reference to the first label is forwarded to the second. *)
                 Array.update(labelTargets, jmpLab1, getLabel jmpLab2);
                 forward(tl, list, true)
             )
 
         |   forward((ubr as UnconditionalBranch(Label ubrLab)) :: (tl as SetLabel(Label jumpLab) :: _), list, rep) =
                 (* Eliminate unconditional jumps to the next instruction. *)
             if ubrLab = jumpLab
             then forward(tl, list, true)
             else forward(tl, ubr :: list, rep)
 
         |   forward((cbr as ConditionalBranch(test, Label cbrLab)) :: (ubr as UnconditionalBranch(Label ubrLab)) ::
                     (tl as SetLabel(Label jumpLab) :: _), list, rep) =
             if cbrLab = jumpLab
             then (* We have a conditional branch followed by an unconditional branch followed by the destination of
                     the conditional branch.  Eliminate the unconditional branch by reversing the test.
                     This can often happen if one branch of an if-then-else has been reduced to zero
                     because the same register has been chosen for the input and output. *)
                 forward(tl (* Leave the label just in case it's used elsewhere*),
                     ConditionalBranch(invertTest test, Label ubrLab) :: list, true)
 
             else forward(ubr :: tl, cbr :: list, rep)
 
         |   forward((load as LoadRegScaled{regT=regT1, regN=regN1, unitOffset=offset1, loadType=lt1}) ::
                      (tl1 as LoadRegScaled{regT=regT2, regN=regN2, unitOffset=offset2, loadType=lt2} ::tl2), list, rep) =
             (* Two adjacent loads - can this be converted to load-pair?  N.B.  We have to be careful about the
                sequence ldr x0,[x0]; ldr x1,[x0+8] which isn't the same at all. *)
             if regN1 = regN2 andalso regN1 <> regT1 andalso lt1 = lt2 andalso (offset2 = offset1 + 1 orelse offset2 = offset1 - 1) andalso
                 (case lt1 of Load64 => true | Load32 => true | _ => false) andalso isValidForPair(offset1, offset2)
             then
             let
                 val (reg1, reg2, offset) =
                     if offset1 < offset2 then (regT1, regT2, offset1) else (regT2, regT1, offset2)
             in
                 forward(tl2,
                     LoadRegPair{ regT1=reg1, regT2=reg2, regN=regN1, unitOffset=offset, loadType=lt1, unscaledType=NoUpdate} :: list, true)
             end
             else forward(tl1, load :: list, rep)
 
         |   forward((store as StoreRegScaled{regT=regT1, regN=regN1, unitOffset=offset1, loadType=lt1}) ::
                      (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=offset2, loadType=lt2} ::tl2), list, rep) =
             (* Two adjacent stores - can this be converted to store-pair? *)
             if regN1 = regN2 andalso lt1 = lt2 andalso (offset2 = offset1 + 1 orelse offset2 = offset1 - 1) andalso
                 (case lt1 of Load64 => true | Load32 => true | _ => false) andalso isValidForPair(offset1, offset2)
             then
             let
                 val (reg1, reg2, offset) =
                     if offset1 < offset2 then (regT1, regT2, offset1) else (regT2, regT1, offset2)
             in
                 forward(tl2,
                     StoreRegPair{ regT1=reg1, regT2=reg2, regN=regN1, unitOffset=offset, loadType=lt1, unscaledType=NoUpdate} :: list, true)
             end
             else forward(tl1, store :: list, rep)
 
         |   forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~8, loadType=Load64, unscaledType=NoUpdate}) ::
                      (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=0, loadType=Load64} ::tl2), list, rep) =
             (* Common case - store length word and then the first word of the cell. *)
             if regN1 = regN2
             then forward(tl2,
                     StoreRegPair{ regT1=regT1, regT2=regT2, regN=regN1, unitOffset= ~1, loadType=Load64, unscaledType=NoUpdate} :: list, true)
             else forward(tl1, store :: list, rep)
 
         |   forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~4, loadType=Load32, unscaledType=NoUpdate}) ::
                      (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=0, loadType=Load32} ::tl2), list, rep) =
             (* Common case - store length word and then the first word of the cell. *)
             if regN1 = regN2
             then forward(tl2,
                     StoreRegPair{ regT1=regT1, regT2=regT2, regN=regN1, unitOffset= ~1, loadType=Load32, unscaledType=NoUpdate} :: list, true)
             else forward(tl1, store :: list, rep)
 
         |   forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) ::
                      (tl1 as StoreRegUnscaled{regT=regT2, regN=regN2, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex} :: tl2), list, rep) =
             (* Adjacent pushes T2 is in the lower address so the order is T2, T1.  The stack is always 64-bit aligned so
                this works on both native addressing and 32-in-64. *)
             if regN1 = regN2
             then forward(tl2,
                     StoreRegPair{ regT1=regT2, regT2=regT1, regN=regN1, unitOffset= ~2, loadType=Load64, unscaledType=PreIndex} :: list, true)
             else forward(tl1, store :: list, rep)
 
         |   forward((add1 as AddImmediate{regN=regN1, regD=regD1, immed=immed1, shifted=false, opSize=OpSize64, setFlags=false}) ::
                     (tl1 as AddImmediate{regN=regN2, regD=regD2, immed=immed2, shifted=false, opSize=OpSize64, setFlags=false}  ::tl2), list, rep) =
             (* Adjacent stack resets.  This can apply more generally but only if the result registers are the same.  If they're
                not we may need the intermediate result.  We put the result back into the input stream in case it can be combined
                with another stack reset. *)
             if regN2 = regD2 andalso regD1 = regD2 andalso immed2+immed1 < 0w4096
             then forward(AddImmediate{regN=regN1, regD=regD2, immed=immed2+immed1, shifted=false, opSize=OpSize64, setFlags=false} :: tl2, list, true)
             else forward(tl1, add1 :: list, rep)
 
         |   forward(BitwiseLogical{bits=0w1, regN, regD=XZero, logOp=LogAnd, opSize=_, setFlags=true} ::
                     ConditionalBranch(CondEqual, label) :: tl2, list, _) =
             (* Test the tag bit: bit 0.  This is very common to test for nil/not nil.  We could include other
                values but they're far less likely. *)
                 forward(TestBitBranch{test=regN, bit=0w0, label=label, onZero=true} :: tl2, list, true)
 
         |   forward(BitwiseLogical{bits=0w1, regN, regD=XZero, logOp=LogAnd, opSize=_, setFlags=true} ::
                     ConditionalBranch(CondNotEqual, label) :: tl2, list, _) =
                 forward(TestBitBranch{test=regN, bit=0w0, label=label, onZero=false} :: tl2, list, true)
 
         |   forward(hd :: tl, list, rep) = forward(tl, hd :: list, rep) 
         
         and reverse([], list, rep) = (list, rep)
 
         |   reverse((add as AddImmediate{regN=regN2, regD=regD2, immed, shifted=false, opSize=OpSize64, setFlags=false}) ::
                      (tl1 as LoadRegScaled{regT=regT1, regN=regN1, unitOffset=0, loadType=Load64} ::tl2), list, rep) =
             (* A stack reset occurring after a load.  This is usually the ML SP but can also occur with C memory ops.
                It might be possible to consider other cases. *)
             if regN1 = regD2 andalso regN2 = regD2 andalso regT1 <> regN1 andalso immed < 0w256
             then reverse(tl2,
                     LoadRegUnscaled{regT=regT1, regN=regN1, byteOffset=Word.toInt immed, loadType=Load64, unscaledType=PostIndex} :: list, true)
             else reverse(tl1, add :: list, rep)
 
         |   reverse((add as AddImmediate{regN=regN2, regD=regD2, immed, shifted=false, opSize=OpSize64, setFlags=false}) ::
                      (tl1 as LoadRegPair{regT1=regT1, regT2=regT2, regN=regN1, unitOffset=0, loadType=Load64, unscaledType=NoUpdate} ::tl2), list, rep) =
             (* A stack reset occurring after a load pair *)
             if regN1 = regD2 andalso regN2 = regD2 andalso regT1 <> regN1 andalso regT2 <> regN1 andalso immed < 0w64 * 0w8
             then reverse(tl2,
                     LoadRegPair{regT1=regT1, regT2=regT2, regN=regN1, unitOffset=Word.toInt(immed div 0w8),
                                 loadType=Load64, unscaledType=PostIndex} :: list, true)
             else reverse(tl1, add :: list, rep)
 
         |   reverse(hd :: tl, list, rep) = reverse(tl, hd :: list, rep)
 
         (* Repeat scans through the code until there are no further changes. *)
         fun repeat ops =
             case forward(ops, [], false) of
                 (list, false) => list
             |   (list, true) => repeat list
 
         val optimised = repeat instrs
 
     in
         generateCode{instrs=List.rev(toAssembler(optimised, [])), name=name, parameters=parameters,
                      resultClosure=resultClosure, profileObject=profileObject}
     end
 
     (* Constant shifts are encoded in the immr and imms fields of the bit-field instruction. *)
     fun shiftConstant{ direction, regD, regN, shift, opSize } =
     let
         val (bitfieldKind, immr, imms) =
             case (direction, opSize) of
                 (ShiftLeft, OpSize64) => (BFUnsigned, Word.~ shift mod 0w64, 0w64-0w1-shift)
             |   (ShiftLeft, OpSize32) => (BFUnsigned, Word.~ shift mod 0w32, 0w32-0w1-shift)
             |   (ShiftRightLogical, OpSize64) => (BFUnsigned, shift, 0wx3f)
             |   (ShiftRightLogical, OpSize32) => (BFUnsigned, shift, 0wx1f)
             |   (ShiftRightArithmetic, OpSize64) => (BFSigned, shift, 0wx3f)
             |   (ShiftRightArithmetic, OpSize32) => (BFSigned, shift, 0wx1f)
     in
         BitField{ regN=regN, regD=regD, opSize=opSize, immr=immr, imms=imms, bitfieldKind=bitfieldKind }
     end
 
     (* These sequences are used both in the ML code-generator and in the FFI code so it
        is convenient to have them here and share the code. *)
     local
         fun allocateWords(fixedReg, workReg, words, bytes, regMask, code) =
         let
             val (lengthWord, setLength, flagShift) = if is32in64 then (~4, Load32, 0w24) else (~8, Load64, 0w56)
         in
             code <::>
             AllocateMemoryFixedSize{ bytes=bytes, dest=fixedReg, save=regMask, work=X16 } <::>
             LoadNonAddr(workReg,
                     Word64.orb(words, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), flagShift))) <::>
             (* Store the length word.  Have to use the unaligned version because offset is -ve. *)
             StoreRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= lengthWord, loadType=setLength, unscaledType=NoUpdate}
         end
 
         fun absoluteAddressToIndex(reg, code) =
         if is32in64
         then
             code <::>
             SubShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone, opSize=OpSize64, setFlags=false} <::>
             shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=0w2, opSize=OpSize64}
         else code
     in
         fun boxDouble({source, destination, workReg, saveRegs}, code) =
             absoluteAddressToIndex(destination,
                 allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::>
                     StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Double64})
                 
         and boxSysWord({source, destination, workReg, saveRegs}, code) =
             absoluteAddressToIndex(destination,
                 allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::>
                     StoreRegScaled{regT=source, regN=destination, unitOffset=0, loadType=Load64})
 
         and boxFloat({source, destination, workReg, saveRegs}, code) =
             absoluteAddressToIndex(destination, 
                 allocateWords(destination, workReg, 0w1, 0w8, saveRegs, code) <::>
                     StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Float32})
     end
 
     structure Sharing =
     struct
         type closureRef = closureRef
         type loadType = loadType
         type opSize = opSize
         type logicalOp = logicalOp
         type floatSize = floatSize
         type shiftDirection = shiftDirection
         type multKind = multKind
         type fpUnary = fpUnary
         type fpBinary = fpBinary
         type unscaledType = unscaledType
         type condSet = condSet
         type bitfieldKind = bitfieldKind
         type brRegType = brRegType
         type precode = precode
         type xReg = xReg
         type vReg = vReg
         type label = label
         type labelMaker = labelMaker
         type condition = condition
         type shiftType = shiftType
         type wordSize = wordSize
         type 'a extend = 'a extend
         type scale = scale
         type instr = instr
     end
 
 end;
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML
index 3e35b063..b00be2fd 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML
@@ -1,1163 +1,1165 @@
 (*
     Copyright David C. J. Matthews 2021-2
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64PushRegisters(
     structure Arm64ICode: ARM64ICODE
     structure IntSet: INTSET
     structure Identify: ARM64IDENTIFYREFERENCES
     sharing Arm64ICode.Sharing = Identify.Sharing = IntSet
 ) : ARM64PUSHREGISTERS
 =
 struct
     open Arm64ICode
     open IntSet
     open Identify
     
     type basicBlockAbstract = (preg, pregOrZero, preg) basicBlock
     
     (* Curried subscript functions *)
     fun asub a i = Array.sub(a, i)
     and vsub v i = Vector.sub(v, i)
     
     exception InternalError = Misc.InternalError
 
     (* Each preg in the input is mapped to either a new preg or the stack. *)
     datatype pregMapType = Unset | ToPReg of preg | ToStack of int * stackLocn
 
     (* The stack contains both entries in the input code and entries added here.
        It is really used to ensure that the stack at run time is the same size
        at the start of a block whichever block has jumped to it. *)
     datatype stackEntry =
         NewEntry of {pregNo: int} (* pregNo is the original preg that has been pushed here. *)
     |   OriginalEntry of { stackLoc: stackLocn }
     |   HandlerEntry
 
 
     fun addRegisterPushes{code: extendedBasicBlock vector, pushVec: bool vector, pregProps, firstPass=_} =
     let
         val maxPRegs = Vector.length pregProps
         val numberOfBlocks = Vector.length code
         (* Output registers and properties. *)
         val pregCounter = ref 0
         val pregPropList = ref []
         val pregMap = Array.array(maxPRegs, Unset)
  
         val maxStack = ref 0
         (* The stack size we've assumed for the block.  Also indicates if
            a block has already been processed. *)
         val inputStackSizes = Array.array(numberOfBlocks, NONE)
         (* The result of processing a block. *)
         val blockOutput = Array.array(numberOfBlocks, {code=[], stackCount=0})
         
         (* Extra blocks to adjust the stack are added here. *)
         val extraBlocks: basicBlockAbstract list ref = ref []
         val blockCounter = ref numberOfBlocks
         
         (* Get the blocks that are inputs for each one. *)
         local
             val blockRefs = Array.array(numberOfBlocks, [])
             
             fun setReferences fromBlock =
             let
                 val ExtendedBasicBlock{ flow, ...} = vsub code fromBlock
                 val refs = successorBlocks flow
                 fun setRefs toBlock =
                 let
                     val oldRefs = asub blockRefs toBlock
                 in
                     Array.update(blockRefs, toBlock, fromBlock :: oldRefs);
                     if null oldRefs
                     then setReferences toBlock
                     else ()
                 end
             in
                 List.app setRefs refs
             end
 
             val () = setReferences 0
         in
             val blockRefs = blockRefs
         end
 
         (* Recursive scan of the blocks.  For each block we produce an input and output state.
            The input state is the output state of the predecessor i.e. some block that jumps to
            this, but with any entries removed that are not used in this block.  It is then
            necessary to match the input state, if necessary by adding extra blocks that just
            do the matching. *)
         local
             val haveProcessed = isSome o asub inputStackSizes
         
             fun processBlocks toDo =
             case List.filter (fn (n, _) => not(haveProcessed n)) toDo of
                 [] => () (* Nothing left to do *)
 
             |   stillToDo as head :: _ =>
                 let
                     (* Try to find a block all of whose predecessors have been processed.  That
                        increases the chances that we will have cached items.
                        TODO: This is no longer necessary since we don't do any caching here
                        now so could may be simplified. *)
                     fun available(dest, _) = List.all haveProcessed (Array.sub(blockRefs, dest))
                     val (blockNo, lastOutputState) =
                         case List.find available stillToDo of
                             SOME c => c
                         |   NONE => head
                 
                     (* This is the first time we've come to this block.  *)
                     val ExtendedBasicBlock{ block, flow, imports, passThrough, loopRegs, initialStacks, ...} = vsub code blockNo
 
                     (* Remove any items from the input state that are no longer needed for
                        this block.  They could be local to the previous block or needed by
                        a different successor.  Although the values in loopRegs are not
                        required the stack space is so that they can be updated. *)
                     fun removeItems(result as {stack=[], stackCount=0}) = result
                 
                     |   removeItems{stack=[], ...} = raise InternalError "removeItems - stack size"
 
                     |   removeItems (thisStack as {stack=NewEntry{pregNo} :: rest, stackCount}) =
                         if member(pregNo, imports) orelse member(pregNo, passThrough) orelse member(pregNo, loopRegs)
                         then thisStack
                         else removeItems{stack=rest, stackCount=stackCount-1}
 
                     |   removeItems (thisStack as {stack=OriginalEntry{stackLoc=StackLoc{rno, size}, ...} :: rest, stackCount}) =
                         if member(rno, initialStacks)
                         then thisStack
                         else removeItems{stack=rest, stackCount=stackCount-size}
 
                     |   removeItems result = result
 
                     val {stackCount=newSp, stack=newStack} = removeItems lastOutputState
                     
                     (* References to hold the current stack count (number of words on the stack)
                        and the list of items on the stack.  The list is not used directly to map
                        stack addresses.  Instead it is used to match the stack at the beginning
                        and end of a block. *)
                     val stackCount = ref newSp
                     val stack = ref newStack
                     (* Items from the stack that have been marked as deleted but not yet
                        removed.  We only remove items from the top of the stack to avoid
                        quadratic behaviour with a very deep stack. *)
                     val deletedItems = ref []
                     
                     (* Save the stack size in case we come by a different route. *)
                     val () = Array.update(inputStackSizes, blockNo, SOME newSp)
                     
                     fun pushItemToStack item =
                     let
                         val size =
                             case item of
                                 NewEntry _ => 1
                             |   OriginalEntry{stackLoc=StackLoc{size, ...}, ...} => size
                             |   HandlerEntry => 2
                     in
                         stackCount := ! stackCount+size;
                         stack := item :: ! stack;
                         maxStack := Int.max(!maxStack, !stackCount)
                     end
  
                     fun newPReg propKind =
                     let
                         val regNo = !pregCounter before pregCounter := !pregCounter + 1
                         val () = pregPropList := propKind :: !pregPropList
                     in
                         PReg regNo
                     end
         
                     and newStackLoc size =
                     let
                         val regNo = !pregCounter before pregCounter := !pregCounter + 1
                         val () = pregPropList := RegPropStack size :: !pregPropList
                     in
                         StackLoc{size=size, rno=regNo}
                     end
         
                     (* Map a source register.  This always loads the argument. *)
                     fun mapSrcReg(PReg n) =
                         case Array.sub(pregMap, n) of
                             Unset => raise InternalError "mapSrcReg - unset"
                         |   ToPReg preg => (preg, [])
                         |   ToStack(stackLoc, container as StackLoc{size, ...}) =>
                             let
                                 (* Make a new untagged register. That will prevent us pushing it if
                                    we have to spill registers. *)
                                 val newReg = newPReg RegPropUntagged
                             in
                                 (newReg,
                                     [LoadStack{wordOffset= !stackCount-stackLoc-size, container=container,
                                                field=0, dest=newReg}])
                             end
 
                     fun mapDestReg(PReg n) =
                     let
                         val currentLocation = Array.sub(pregMap, n)
                         val kind = Vector.sub(pregProps, n)
                     in
                         if Vector.sub(pushVec, n)
                         then
                         let
                             (* This should not have been seen before. *)
                             val _ = case currentLocation of Unset => () | _ => raise InternalError "mapDestReg - already set"
                             val newReg = newPReg kind
                             val newContainer = newStackLoc 1
                             val () = Array.update(pregMap, n, ToStack (!stackCount, newContainer))
                             val () = pushItemToStack(NewEntry{pregNo=n})
                         in
                             (newReg, [PushToStack{source= newReg, container=newContainer, copies=1}])
                         end
                         else
                         let
                         (* See if we already have a number for it.  We may encounter the same preg
                            as a destination when returning the result from a conditional in which
                            case we have to use the same number.  We shouldn't have pushed it. *)
                             val newReg =
                                 case (currentLocation, kind) of
                                     (Unset, _) =>
                                     let
                                         val newReg = newPReg kind
                                         val () = Array.update(pregMap, n, ToPReg newReg)
                                     in
                                         newReg
                                     end
                                 |   (ToPReg preg, RegPropMultiple) => preg
                                 |   _ => raise InternalError "mapDestReg - multiply defined non-merge reg"
                         in
                             (newReg, [])
                         end
                     end
                     
                     (* Optional destination for arithmetic and logical ops. *)
                     fun mapOptDest ZeroReg = (ZeroReg, [])
                     |   mapOptDest (SomeReg destReg) =
                         let val (destVal, destCode) = mapDestReg destReg in (SomeReg destVal, destCode) end
 
                     fun mapOptSrc ZeroReg = (ZeroReg, [])
                     |   mapOptSrc (SomeReg srcReg) =
                         let val (srcVal, srcCode) = mapSrcReg srcReg in (SomeReg srcVal, srcCode) end
 
                     (* Adjust a stack offset from the old state to the new state. *)
                     fun mapContainerAndStack(StackLoc{rno, size}, field) =
                     let
                         val (newStackAddr, newContainer) =
                         case Array.sub(pregMap, rno) of
                             Unset => raise InternalError "mapContainer - unset"
                         |   ToPReg _ => raise InternalError "mapContainer - ToPReg"
                         |   ToStack stackContainer => stackContainer
 
                         val newOffset = !stackCount-(newStackAddr+size) + field
                     in
                         (newOffset, newContainer)
                     end
         
                     (* Add an entry for an existing stack entry. *)
                     fun mapDestContainer(StackLoc{rno, size}, locn) =
                     (
                         case Array.sub(pregMap, rno) of
                             Unset =>
                             let
                                 val newContainer = newStackLoc size
                                 val () = Array.update(pregMap, rno, ToStack(locn, newContainer))
                             in
                                 newContainer
                             end
                         |   _ => raise InternalError "mapDestContainer: already set"
                     )
 
                     (* Map a function argument which could be a register or a stack entry.  A register
                        entry could have been pushed. *)
                     fun mapArgument(ArgInReg (PReg r)) =
                         (
                             case Array.sub(pregMap, r) of
                                 Unset => raise InternalError "mapSource - unset"
                             |   ToPReg preg => ArgInReg preg
                             |   ToStack(stackLoc, container as StackLoc{size, ...}) =>
                                 ArgOnStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0}
                         )
 
                     |   mapArgument(ArgOnStack{container, field, ...}) =
                             let
                                 val (newOffset, newContainer) = mapContainerAndStack(container, field)
                             in
                                 ArgOnStack{container=newContainer, wordOffset=newOffset, field=field}
                             end
 
                     (* Rewrite the code, replacing any registers that need to be pushed with references to
                        the stack.  The result is built up in reverse order and then reversed. *)
                     fun pushRegisters({instr=MoveRegister{ source, dest as PReg dReg }, ...}, code) =
                         if Vector.sub(pushVec, dReg)
                         then (* We're going to push this. *)
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                             (* TODO: Since we're pushing it we don't need to move it first. *)
                         in
                             destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code
                         end
                         else
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=LoadNonAddressConstant { dest, source}, ...}, code) =
                         let
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ LoadNonAddressConstant { dest=destVal, source=source} :: code
                         end
 
                     |   pushRegisters({instr=LoadAddressConstant { dest, source}, ...}, code) =
                         let
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ LoadAddressConstant { dest=destVal, source=source} :: code
                         end
 
                     |   pushRegisters({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...}, code) =
                         let
                             val (baseVal, baseCode) = mapSrcReg base
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 LoadWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, loadType=loadType} ::
                                     baseCode @ code
                         end
 
                     |   pushRegisters({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...}, code) =
                         let
                             val (baseVal, baseCode) = mapSrcReg base
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 LoadFPWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, floatSize=floatSize} ::
                                     baseCode @ code
                         end
 
                     |   pushRegisters({instr=LoadWithIndexedOffset { base, dest, index, loadType, signExtendIndex}, ...}, code) =
                         let
                             val (baseVal, baseCode) = mapSrcReg base
                             val (indexVal, indexCode) = mapSrcReg index
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 LoadWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal,
                                                         loadType=loadType, signExtendIndex=signExtendIndex} ::
                                     indexCode @ baseCode @ code
                         end
 
                     |   pushRegisters({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize, signExtendIndex}, ...}, code) =
                         let
                             val (baseVal, baseCode) = mapSrcReg base
                             val (indexVal, indexCode) = mapSrcReg index
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 LoadFPWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal,
                                                           floatSize=floatSize, signExtendIndex=signExtendIndex} ::
                                     indexCode @ baseCode @ code
                         end
 
                     |   pushRegisters({instr=GetThreadId { dest}, ...}, code) =
                         let
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ GetThreadId { dest=destVal} :: code
                         end
 
                     |   pushRegisters({instr=ObjectIndexAddressToAbsolute { source, dest}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ ObjectIndexAddressToAbsolute { source=sourceVal, dest=destVal} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=AbsoluteToObjectIndex { source, dest}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ AbsoluteToObjectIndex { source=sourceVal, dest=destVal} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=AllocateMemoryFixed { bytesRequired, dest, ...}, ...}, code) =
                         let
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ AllocateMemoryFixed { dest=destVal, bytesRequired=bytesRequired, saveRegs=[]} :: code
                         end
 
                     |   pushRegisters({instr=AllocateMemoryVariable{size, dest, ...}, ...}, code) =
                         let
                             val (sizeVal, sizeCode) = mapSrcReg size
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ AllocateMemoryVariable{size=sizeVal, dest=destVal, saveRegs=[]} :: sizeCode @ code
                         end
 
                     |   pushRegisters({instr=InitialiseMem{size, addr, init}, ...}, code) =
                         let
                             val (sizeVal, sizeCode) = mapSrcReg size
                             val (addrVal, addrCode) = mapSrcReg addr
                             val (initVal, initCode) = mapSrcReg init
                         in
                             InitialiseMem{size=sizeVal, addr=addrVal, init=initVal} :: initCode @ addrCode @ sizeCode @ code
                         end
 
                     |   pushRegisters({instr=BeginLoop, ...}, code) = BeginLoop :: code
 
                     |   pushRegisters({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}, code) =
                         let
                             (* Normally JumpLoop will be the last item in a block but it is possible that we've
                                added a reset-stack after it. *)
                             fun getValues [] = ([], [])
                             |   getValues ({src=source, dst=PReg n} :: rest) =
                                 let
                                     val (otherRegArgs, otherStackArgs) = getValues rest
                                 in
                                     case Array.sub(pregMap, n) of
                                         ToPReg lReg => ({src=mapArgument source, dst=lReg} :: otherRegArgs, otherStackArgs)
 
                                     |   ToStack(stackloc, stackC as StackLoc{size, ...}) =>
                                         let
                                             val sourceVal = mapArgument source
                                             val stackOff = !stackCount - stackloc - size
                                         in
                                             (otherRegArgs, {src=sourceVal, wordOffset=stackOff, stackloc=stackC} :: otherStackArgs)
                                         end
 
                                     |   Unset =>
                                         (* Drop it.  It's never used. This can happen if we are folding a
                                            function over a list such that it always returns the last value and
                                            then discard the result of the fold. *)
                                             (otherRegArgs, otherStackArgs)
                                 end
                             val (newRegArguments, newStackArgs) = getValues regArgs
                             fun loadStackArg({src=source, stackloc=destC, ...}, otherArgs) =
                             let
                                 val sourceVal = mapArgument source
                                 val (newOffset, newContainer) = mapContainerAndStack(destC, 0)
                             in
                                 {src=sourceVal, wordOffset=newOffset, stackloc=newContainer} :: otherArgs
                             end
                 
                             val oldStackArgs = List.foldr loadStackArg [] stackArgs
                             val check = case checkInterrupt of NONE => NONE | SOME _ => SOME []
                         in
                             JumpLoop{ regArgs=newRegArguments, stackArgs=oldStackArgs @ newStackArgs, checkInterrupt=check} :: code
                         end
 
                     |   pushRegisters({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (baseVal, baseCode) = mapSrcReg base 
                         in
                             StoreWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, loadType=loadType} ::
                                 baseCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (baseVal, baseCode) = mapSrcReg base 
                         in
                             StoreFPWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, floatSize=floatSize} ::
                                 baseCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=StoreWithIndexedOffset { base, source, index, loadType, signExtendIndex}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (baseVal, baseCode) = mapSrcReg base 
                             val (indexVal, indexCode) = mapSrcReg index 
                         in
                             StoreWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal,
                                                     loadType=loadType, signExtendIndex=signExtendIndex} ::
                                 indexCode @ baseCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=StoreFPWithIndexedOffset { base, source, index, floatSize, signExtendIndex}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (baseVal, baseCode) = mapSrcReg base 
                             val (indexVal, indexCode) = mapSrcReg index 
                         in
                             StoreFPWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal,
                                                       floatSize=floatSize, signExtendIndex=signExtendIndex} ::
                                 indexCode @ baseCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapOptDest dest
                         in
                             destCode @ AddSubImmediate { source=sourceVal, dest=destVal, ccRef=ccRef,
                                                          immed=immed, isAdd=isAdd, length=length} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...}, code) =
                         let
                             val (op1Val, op1Code) = mapSrcReg base
                             val (op2Val, op2Code) = mapSrcReg shifted
                             val (destVal, destCode) = mapOptDest dest
                         in
                             destCode @ AddSubRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef,
                                                          isAdd=isAdd, length=length, shift=shift} :: op2Code @ op1Code @ code
                         end
 
                     |   pushRegisters({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapOptDest dest
                         in
                             destCode @ LogicalImmediate { source=sourceVal, dest=destVal, ccRef=ccRef,
                                                           immed=immed, logOp=logOp, length=length} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...}, code) =
                         let
                             val (op1Val, op1Code) = mapSrcReg base
                             val (op2Val, op2Code) = mapSrcReg shifted
                             val (destVal, destCode) = mapOptDest dest
                         in
                             destCode @ LogicalRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef,
                                                          logOp=logOp, length=length, shift=shift} :: op2Code @ op1Code @ code
                         end
 
                     |   pushRegisters({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...}, code) =
                         let
                             val (srcVal, op1Code) = mapSrcReg source
                             val (shiftVal, op2Code) = mapSrcReg shift
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ ShiftRegister { source=srcVal, shift=shiftVal, dest=destVal,
                                                        direction=direction, opSize=opSize} :: op2Code @ op1Code @ code
                         end
 
                     |   pushRegisters({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...}, code) =
                         let
                             val (srcAVal, srcACode) = mapOptSrc sourceA
                             val (srcMVal, srcMCode) = mapSrcReg sourceM
                             val (srcNVal, srcNCode) = mapSrcReg sourceN
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ Multiplication { kind=kind, sourceA=srcAVal, sourceM=srcMVal, sourceN=srcNVal, dest=destVal} ::
                                 srcNCode @ srcMCode @ srcACode @ code
                         end
 
                     |   pushRegisters({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...}, code) =
                         let
                             val (dividendVal, dividendCode) = mapSrcReg dividend
                             val (divisorVal, divisorCode) = mapSrcReg divisor
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ Division { isSigned=isSigned, dividend=dividendVal, divisor=divisorVal, dest=destVal,
                                                        opSize=opSize} :: divisorCode @ dividendCode @ code
                         end
 
                     |   pushRegisters({instr=BeginFunction {regArgs, stackArgs}, ...}, code) =
                         let
                             (* Create a new container list.  The offsets begin at -numArgs. *)
                             fun newContainers(src :: srcs, offset) =
                                 let
                                     val newContainer = mapDestContainer(src, offset)
                                 in
                                     newContainer :: newContainers(srcs, offset+1)
                                 end
                             |   newContainers _ = []
 
                             val newStackArgs = newContainers(stackArgs, ~ (List.length stackArgs))
                             (* Push any registers that need to be pushed. *)
                             fun pushReg((preg, rreg), (others, code)) =
                             let
                                 val (newReg, newCode) = mapDestReg(preg)
                             in
                                 ((newReg, rreg) :: others, newCode @ code)
                             end
                             val (newRegArgs, pushCode) = List.foldl pushReg ([], []) regArgs
                         in
                             pushCode @ BeginFunction {regArgs=newRegArgs, stackArgs=newStackArgs} :: code
                         end
 
                     |   pushRegisters({instr=FunctionCall{callKind, regArgs, stackArgs, dests, containers, ...}, ...}, code) =
                         let
                             (* It's possible that this could lead to having to spill registers in order
                                to load others.  Leave that problem for the moment. *)
                             fun loadStackArg (arg, otherArgs) =
                             let
                                 val argVal = mapArgument arg
                             in
                                 argVal :: otherArgs
                             end
                             val newStackArgs = List.foldr loadStackArg [] stackArgs
 
                             fun loadRegArg ((arg, reg), otherArgs) =
                             let
                                 val argVal = mapArgument arg
                             in
                                 (argVal, reg) :: otherArgs
                             end
                             val newRegArgs = List.foldr loadRegArg [] regArgs
 
                             (* Push any result registers that need to be pushed. *)
                             fun pushResults((preg, rreg), (others, code)) =
                             let
                                 val (newReg, newCode) = mapDestReg preg
                             in
                                 ((newReg, rreg) :: others, newCode @ code)
                             end
                             val (destVals, destCode) = List.foldl pushResults ([], []) dests
 
                             val newContainers = List.map(fn c => #2(mapContainerAndStack(c, 0))) containers
                         in
                             destCode @ FunctionCall{ callKind=callKind, regArgs=newRegArgs, stackArgs=newStackArgs,
                                                   dests=destVals, saveRegs=[], containers=newContainers} :: code
                         end
 
                     |   pushRegisters({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, ...}, ...}, code) =
                         let
                             val newStackOffset = !stackCount
                             
                             fun loadStackArg ({src, stack}, (otherLoads, otherArgs)) =
                             let
                                 val (argVal, loadCode) =
                                     case mapArgument src of
                                         (source as ArgOnStack{wordOffset, container, field}) =>
                                             (* If we're leaving it in its old location or we're pushing it
                                                above the current top we're ok.  We're also ok if
                                                we're moving it from a somewhere above the last argument.
                                                Otherwise we have to load it.
                                                It goes into a normal tagged register which may mean that it
                                                could be pushed onto the stack in a subsequent pass. *)
                                             if wordOffset = stack+newStackOffset orelse stack+newStackOffset < 0
                                                 orelse newStackOffset-wordOffset > ~ stackAdjust
                                             then (source, [])
                                             else
                                             let
                                                 val preg = newPReg RegPropGeneral
                                             in
                                                 (ArgInReg preg,
                                                     [LoadStack{wordOffset=wordOffset, container=container, field=field, dest=preg}])
                                             end
                                     |   argCode => (argCode, [])
                             in
                                 (loadCode @ otherLoads, {src=argVal, stack=stack} :: otherArgs)
                             end
                             val (stackArgLoads, newStackArgs) = List.foldr loadStackArg ([], []) stackArgs
                 
                             fun loadRegArg ((arg, reg), otherArgs) =
                             let
                                 val argVal = mapArgument arg
                             in
                                 (argVal, reg) :: otherArgs
                             end
                             val newRegArgs = List.foldr loadRegArg [] regArgs
 
                         in
                             TailRecursiveCall{ callKind=callKind, regArgs=newRegArgs,
                                     stackArgs=newStackArgs, stackAdjust=stackAdjust, currStackSize=newStackOffset} ::
                                         stackArgLoads @ code
                         end
 
                     |   pushRegisters({instr=ReturnResultFromFunction{results, returnReg, numStackArgs}, ...}, code) =
                         let
                             fun loadResults((preg, rreg), (others, code)) =
                             let
                                 val (newReg, newCode) = mapSrcReg preg
                             in
                                 ((newReg, rreg) :: others, newCode @ code)
                             end
                             val (resultValues, loadResults) = List.foldr loadResults ([], []) results
 
                             val (returnValue, loadReturn) = mapSrcReg returnReg
                             val resetCode =
                                 if !stackCount = 0 then [] else [ResetStackPtr{numWords= !stackCount}]
                         in
                             ReturnResultFromFunction{results=resultValues, returnReg=returnValue, numStackArgs=numStackArgs} ::
                                 resetCode @ loadReturn @ loadResults @ code
                         end
 
                     |   pushRegisters({instr=RaiseExceptionPacket{packetReg}, ...}, code) =
                         let
                             val (packetVal, packetCode) = mapSrcReg packetReg
                         in
                             RaiseExceptionPacket{packetReg=packetVal} :: packetCode @ code
                         end
 
                     |   pushRegisters({instr=PushToStack{ source, container, copies }, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             (* This was a push from a previous pass.  Treat as a container of size "copies". *)
                             val newContainer = mapDestContainer(container, !stackCount)
                             val () = pushItemToStack(OriginalEntry{stackLoc=container})
                         in
                             PushToStack{source=sourceVal, container=newContainer, copies=copies} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=LoadStack{ dest, container, field, ... }, ...}, code) =
                         let
                             val (newOffset, newContainer) = mapContainerAndStack(container, field)
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ LoadStack{ dest=destVal, container=newContainer, field=field,
                                                   wordOffset=newOffset } :: code
                         end
 
                     |   pushRegisters({instr=StoreToStack{source, container, field, ...}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (newOffset, newContainer) = mapContainerAndStack(container, field)
                         in
                             StoreToStack{source=sourceVal, container=newContainer, field=field, stackOffset=newOffset} ::
                                     sourceCode @ code
                         end
 
                     |   pushRegisters({instr=ContainerAddress{ dest, container, ... }, ...}, code) =
                         let
                             val (newOffset, newContainer) = mapContainerAndStack(container, 0)
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ ContainerAddress{ dest=destVal, container=newContainer, stackOffset=newOffset } :: code
                         end
 
                     |   pushRegisters({instr=ResetStackPtr _, ...}, code) = code
                             (* Added in a previous pass - discard it. *)
 
                     |   pushRegisters({instr=TagValue{source, dest, isSigned, opSize}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ TagValue{source=sourceVal, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...}, code) =
                         let
                             val (loadedSource, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ UntagValue{source=loadedSource, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=BoxLarge{source, dest, ...}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ BoxLarge{source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=UnboxLarge{source, dest, ...}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ UnboxLarge{source=sourceVal, dest=destVal} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=BoxTagFloat{floatSize, source, dest, ...}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ BoxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=UnboxTagFloat{floatSize, source, dest, ...}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ UnboxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=LoadAcquire { base, dest, loadType}, ...}, code) =
                         let
                             val (baseVal, baseCode) = mapSrcReg base
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ LoadAcquire { base=baseVal, dest=destVal, loadType=loadType} :: baseCode @ code
                         end
 
                     |   pushRegisters({instr=StoreRelease { base, source, loadType}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (baseVal, baseCode) = mapSrcReg base 
                         in
                             StoreRelease{ base=baseVal, source=sourceVal, loadType=loadType} :: baseCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 BitFieldShift { source=sourceVal, dest=destVal, isSigned=isSigned, immr=immr, imms=imms, length=length} ::
                                     sourceCode @ code
                         end
 
                     |   pushRegisters({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destSrcVal, destSrcCode) = mapSrcReg destAsSource
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 BitFieldInsert { source=sourceVal, destAsSource=destSrcVal, dest=destVal, immr=immr, imms=imms, length=length} ::
                                     destSrcCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=IndexedCaseOperation{testReg}, ...}, code) =
                         let
                             val (testVal, testCode) = mapSrcReg testReg
                         in
                             IndexedCaseOperation{testReg=testVal} :: testCode @ code
                         end
 
                     |   pushRegisters({instr=PushExceptionHandler, ...}, code) =
                         let
                             (* Add a handler entry to the stack. *)
                             val () = pushItemToStack HandlerEntry
                         in
                             PushExceptionHandler :: code
                         end
 
                     |   pushRegisters({instr=PopExceptionHandler, ...}, code) =
                         let
                             (* Appears at the end of the block whose exceptions are being handled.  Delete the
                                handler and anything above it. *)
                             (* Get the state after removing the handler. *)
                             fun popContext ([], _) = raise InternalError "pushRegisters - pop handler"
                             |   popContext (HandlerEntry :: tl, new) = (tl, new-2)
                             |   popContext (OriginalEntry{stackLoc=StackLoc{size, ...}, ...} :: tl, new) = popContext(tl, new-size)
                             |   popContext (NewEntry _ :: tl, new) = popContext(tl, new-1)
                             val (newStack, nnCount) = popContext(!stack, !stackCount)
                             val () = stack := newStack
                             val oldStackPtr = ! stackCount
                             val () = stackCount := nnCount
                             (* Reset the stack to just above the two words of the handler. *)
                             val resetCode =
                                 if oldStackPtr <> nnCount+2
                                 then [ResetStackPtr{numWords=oldStackPtr-nnCount-2}]
                                 else []
                         in
                             PopExceptionHandler :: resetCode @ code
                         end
 
                     |   pushRegisters({instr=BeginHandler{packetReg}, ...}, code) =
                         let
                             (* Start of a handler.  The top active entry should be the handler. *)
                             val () =
                                 case !stack of
                                     HandlerEntry :: tl => stack := tl
                                 |   _ => raise InternalError "pushRegisters: BeginHandler"
                             val () = stackCount := !stackCount - 2
                             val (packetVal, packetCode) = mapDestReg packetReg
                         in
                             packetCode @ BeginHandler{packetReg=packetVal} :: code
                         end
 
                     |   pushRegisters({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...}, code) =
                         let
                             val (vec1Val, vec1Code) = mapSrcReg vec1Addr
                             val (vec2Val, vec2Code) = mapSrcReg vec2Addr
                             val (lenVal, lenCode) = mapSrcReg length
                         in
                             CompareByteVectors{vec1Addr=vec1Val, vec2Addr=vec2Val, length=lenVal, ccRef=ccRef} ::
                                 lenCode @ vec2Code @ vec1Code @ code
                         end
 
                     |   pushRegisters({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...}, code) =
                         let
                             val (srcVal, srcCode) = mapSrcReg srcAddr
                             val (destVal, destCode) = mapSrcReg destAddr
                             val (lenVal, lenCode) = mapSrcReg length
                         in
                             BlockMove{srcAddr=srcVal, destAddr=destVal, length=lenVal, isByteMove=isByteMove} ::
                                 lenCode @ destCode @ srcCode @ code
                         end
 
 
                     |   pushRegisters({instr=AddSubXSP{source, dest, isAdd}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapOptDest dest
                         in
                             destCode @ AddSubXSP { source=sourceVal, dest=destVal, isAdd=isAdd} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=TouchValue{source, ...}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                         in
                             TouchValue { source=sourceVal} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=LoadAcquireExclusive{ base, dest }, ...}, code) =
                         let
                             val (baseVal, baseCode) = mapSrcReg base
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ LoadAcquireExclusive { base=baseVal, dest=destVal} :: baseCode @ code
                         end
 
                     |   pushRegisters({instr=StoreReleaseExclusive{ base, source, result }, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapOptSrc source
                             val (baseVal, baseCode) = mapSrcReg base 
                             val (resVal, resCode) = mapDestReg result
                         in
                             resCode @ StoreReleaseExclusive{ base=baseVal, source=sourceVal, result=resVal} :: baseCode @ sourceCode @ code
                         end
 
                     |   pushRegisters({instr=MemoryBarrier, ...}, code) = MemoryBarrier :: code
 
                     |   pushRegisters({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 ConvertIntToFloat{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize} ::
                                 sourceCode @ code
                         end
 
                     |   pushRegisters({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @
                                 ConvertFloatToInt{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize, rounding=rounding} ::
                                 sourceCode @ code
                         end
 
                     |   pushRegisters({instr=UnaryFloatingPt{ source, dest, fpOp}, ...}, code) =
                         let
                             val (sourceVal, sourceCode) = mapSrcReg source
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ UnaryFloatingPt{ source=sourceVal, dest=destVal, fpOp=fpOp} :: sourceCode @ code
                         end
 
                     |   pushRegisters({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...}, code) =
                         let
                             val (arg1Val, arg1Code) = mapSrcReg arg1
                             val (arg2Val, arg2Code) = mapSrcReg arg2
                             val (destVal, destCode) = mapDestReg dest
                         in
                             destCode @ BinaryFloatingPoint{ arg1=arg1Val, arg2=arg2Val, dest=destVal, fpOp=fpOp, opSize=opSize} ::
                                 arg2Code @ arg1Code @ code
                         end
 
                     |   pushRegisters({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...}, code) =
                         let
                             val (arg1Val, arg1Code) = mapSrcReg arg1
                             val (arg2Val, arg2Code) = mapSrcReg arg2
                         in
                             CompareFloatingPoint{ arg1=arg1Val, arg2=arg2Val, opSize=opSize, ccRef=ccRef} ::
                                 arg2Code @ arg1Code @ code
                         end
 
+                    |   pushRegisters({instr=CPUYield, ...}, code) = CPUYield :: code
+
                     |   pushRegisters({instr=CacheCheck _, ...}, _) = raise InternalError "pushRegisters: CacheCheck"
 
                     local
                         fun doPush(instr as {kill, ...}, code) =
                         let
                             val newCode = pushRegisters(instr, code)
                             (* Can we pop the stack? *)
                             val stackReset =
                                 case setToList (minus(kill, loopRegs)) of
                                     [] => []
                                 |   killList =>
                                     let
                                         (* See if any of the kill items are at the top of the stack.
                                            If they are we can pop them and perhaps items we've
                                            previously marked for deletion but not been able to pop. *)
                                         val oldStack = !stackCount
                                         fun checkAndAdd(r, output) =
                                             case Array.sub(pregMap, r) of
                                                 ToStack(stackLoc, StackLoc{size, ...}) =>
                                                     if stackLoc < 0
                                                     then r :: output (* We can have arguments and return address. *)
                                                     else if !stackCount = stackLoc+size
                                                     then
                                                     (
                                                         stack := tl (!stack);
                                                         stackCount := stackLoc;
                                                         output
                                                     )
                                                     else r :: output
                                             |   _ => r :: output
                                         val toAdd = List.foldl checkAndAdd [] killList
                                         fun reprocess list =
                                         let
                                             val prevStack = !stackCount
                                             val outlist = List.foldl checkAndAdd [] list
                                         in
                                             if !stackCount = prevStack
                                             then list
                                             else reprocess outlist
                                         end
                                         val () =
                                             if !stackCount = oldStack
                                             then deletedItems := toAdd @ !deletedItems
                                             else deletedItems := reprocess(toAdd @ !deletedItems)
                                         val _ = oldStack >= !stackCount orelse raise InternalError "negative stack offset"
                                     in
                                         if !stackCount = oldStack then []
                                         else [ResetStackPtr{numWords=oldStack - !stackCount}]
                                     end
                         in
                             stackReset @ newCode
                         end
                     in
                         val codeResult = List.foldl doPush [] block
                         val outputCount = ! stackCount
                         val results = {code=codeResult, stackCount= outputCount}
                         val stateResult = { stackCount= outputCount, stack= !stack }
                         
                         val () = Array.update(blockOutput, blockNo,  results)
                     end
 
                     val addSet =
                         case flow of
                             ExitCode => []
                         |   IndexedBr cases => cases
                         |   Unconditional dest => [dest]
                         |   Conditional {trueJump, falseJump, ...} => [falseJump, trueJump]
                         |   SetHandler { handler, continue } => [handler, continue]
                         |   UnconditionalHandle _ => []
                         |   ConditionalHandle { continue, ...} => [continue]
                         
                     val addItems = List.map(fn m => (m, stateResult)) addSet
                 in
                     processBlocks(addItems @ stillToDo)
                 end
         in
             val () = processBlocks([(0, {stack=[], stackCount=0})])
         end
 
 
         (* Put together the result code and blocks. *)
         local
             fun createBlock blockNo =
             (* Skip unreferenced blocks apart from block 0. *)
             if blockNo <> 0 andalso null (asub blockRefs blockNo)
             then BasicBlock{block=[], flow=ExitCode}
             else
             let
                 val ExtendedBasicBlock{ flow, ...} = vsub code blockNo
                 val {code=codeResult, stackCount=outputCount, ...} = asub blockOutput blockNo
                 
                 (* Process the successor.  If we need a stack adjustment this will require
                    an adjustment block.  TODO: We could put a pre-adjustment if we only have one
                    branch to this block. *)
                 fun matchStacks targetBlock =
                 let
                     (* Process the destination.  If it hasn't been processed. *)
                     val expectedInput = valOf (asub inputStackSizes targetBlock)
                 in
                     if expectedInput = outputCount
                     then targetBlock
                     else
                     let
                         val _ = outputCount > expectedInput orelse raise InternalError "adjustStack"
                         val adjustCode = [ResetStackPtr{numWords=outputCount-expectedInput}]
                         val newBlock = BasicBlock{block=adjustCode, flow=Unconditional targetBlock}
                         val newBlockNo = !blockCounter before blockCounter := !blockCounter+1
                         val () = extraBlocks := newBlock :: !extraBlocks
                     in
                         newBlockNo
                     end
                 end
                 
                 val (finalCode, newFlow) =
                     case flow of
                         ExitCode => (codeResult, ExitCode)
                     
                     |   Unconditional m =>
                         let
                             (* Process the block.  Since we're making an unconditional jump
                                we can include any stack adjustment needed to match the
                                destination in here.  In particular this includes loops. *)
                             val expectedInput = valOf (asub inputStackSizes m)
                             val _ = outputCount >= expectedInput orelse raise InternalError "negative reset"
                             val resultCode =
                                 if expectedInput = outputCount
                                 then codeResult
                                 else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult
                         in
                             (resultCode, Unconditional m)
                         end
 
                         (* For any of these, if we need to adjust the stack we have to add an
                            adjustment block. *)
                     |   Conditional {trueJump, falseJump, ccRef, condition} =>
                         (codeResult,
                             Conditional{trueJump=matchStacks trueJump, falseJump=matchStacks falseJump,
                                 ccRef=ccRef, condition=condition})
                     
                     |   SetHandler{ handler, continue } =>
                         (codeResult, SetHandler{ handler=matchStacks handler, continue=matchStacks continue})
                     
                     |   IndexedBr cases => (codeResult, IndexedBr(map matchStacks cases))
                     
                     |   u as UnconditionalHandle _ => (codeResult, u)
                     
                     |   c as ConditionalHandle{ continue, ... } =>
                         let
                             (* As for unconditional branch *)
                             val expectedInput = valOf (asub inputStackSizes continue)
                             val _ = outputCount >= expectedInput orelse raise InternalError "negative reset"
                             val resultCode =
                                 if expectedInput = outputCount
                                 then codeResult
                                 else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult
                         in
                             (resultCode, c)
                         end
              in
                 BasicBlock{block=List.rev finalCode, flow=newFlow}
             end
         in
             val resultBlocks = Vector.tabulate(numberOfBlocks, createBlock)
         end
         
         (* Add any extra blocks to the result. *)
         val finalResult =
             case !extraBlocks of
                 [] => resultBlocks
             |   blocks => Vector.concat[resultBlocks, Vector.fromList(List.rev blocks)]
         
         val pregProperties = Vector.fromList(List.rev(! pregPropList))
     in
         {code=finalResult, pregProps=pregProperties, maxStack= !maxStack}
     end
 
     structure Sharing =
     struct
         type extendedBasicBlock = extendedBasicBlock
         and ('genReg, 'optGenReg, 'fpReg) basicBlock =  ('genReg, 'optGenReg, 'fpReg) basicBlock
         and regProperty = regProperty
         and preg = preg
         and pregOrZero = pregOrZero
     end
 end;
diff --git a/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml b/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml
index a5117b97..d7190762 100644
--- a/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml
+++ b/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml
@@ -1,739 +1,739 @@
 (*
-    Copyright (c) 2012, 2016-21 David C.J. Matthews
+    Copyright (c) 2012, 2016-22 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (* Intermediate code tree for the back end of the compiler. *)
 
 structure BackendIntermediateCode: BACKENDINTERMEDIATECODE =
 struct
     open Address
     
     structure BuiltIns =
     struct
         datatype testConditions =
             TestEqual
         |   TestLess
         |   TestLessEqual
         |   TestGreater
         |   TestGreaterEqual
         |   TestUnordered (* Reals only. *)
 
         datatype arithmeticOperations =
             ArithAdd
         |   ArithSub
         |   ArithMult
         |   ArithQuot
         |   ArithRem
         |   ArithDiv
         |   ArithMod
 
         datatype logicalOperations =
             LogicalAnd
         |   LogicalOr
         |   LogicalXor
     
         datatype shiftOperations =
             ShiftLeft
         |   ShiftRightLogical
         |   ShiftRightArithmetic
 
         datatype unaryOps =
             NotBoolean
         |   IsTaggedValue
         |   MemoryCellLength
         |   MemoryCellFlags
         |   ClearMutableFlag
         |   LongWordToTagged
         |   SignedToLongWord
         |   UnsignedToLongWord
         |   RealAbs of precision
         |   RealNeg of precision
         |   RealFixedInt of precision
         |   FloatToDouble
         |   DoubleToFloat
         |   RealToInt of precision * IEEEReal.rounding_mode
         |   TouchAddress
         |   AllocCStack
         |   LockMutex
         |   TryLockMutex
         |   UnlockMutex
 
         and precision = PrecSingle | PrecDouble
 
         and binaryOps =
             WordComparison of { test: testConditions, isSigned: bool }
         |   FixedPrecisionArith of arithmeticOperations
         |   WordArith of arithmeticOperations
         |   WordLogical of logicalOperations
         |   WordShift of shiftOperations
         |   AllocateByteMemory
         |   LargeWordComparison of testConditions
         |   LargeWordArith of arithmeticOperations
         |   LargeWordLogical of logicalOperations
         |   LargeWordShift of shiftOperations
         |   RealComparison of testConditions * precision
         |   RealArith of arithmeticOperations * precision
         |   PointerEq
         |   FreeCStack
 
         and nullaryOps =
             GetCurrentThreadId
-        |   CheckRTSException
+        |   CPUPause
         |   CreateMutex
 
         fun unaryRepr NotBoolean = "NotBoolean"
         |   unaryRepr IsTaggedValue = "IsTaggedValue"
         |   unaryRepr MemoryCellLength = "MemoryCellLength"
         |   unaryRepr MemoryCellFlags = "MemoryCellFlags"
         |   unaryRepr ClearMutableFlag = "ClearMutableFlag"
         |   unaryRepr LongWordToTagged = "LongWordToTagged"
         |   unaryRepr SignedToLongWord = "SignedToLongWord"
         |   unaryRepr UnsignedToLongWord = "UnsignedToLongWord"
         |   unaryRepr (RealAbs prec) = "RealAbs" ^ precRepr prec
         |   unaryRepr (RealNeg prec) = "RealNeg" ^ precRepr prec
         |   unaryRepr (RealFixedInt prec) = "RealFixedInt" ^ precRepr prec
         |   unaryRepr FloatToDouble = "FloatToDouble"
         |   unaryRepr DoubleToFloat = "DoubleToFloat"
         |   unaryRepr (RealToInt (prec, mode)) = "RealToInt" ^ precRepr prec ^ rndModeRepr mode
         |   unaryRepr TouchAddress = "TouchAddress"
         |   unaryRepr AllocCStack = "AllocCStack"
         |   unaryRepr LockMutex = "LockMutex"
         |   unaryRepr TryLockMutex = "TryLockMutex"
         |   unaryRepr UnlockMutex = "UnlockMutex"
 
         and binaryRepr (WordComparison{test, isSigned}) =
                 "Test" ^ (testRepr test) ^ (if isSigned then "Signed" else "Unsigned")
         |   binaryRepr (FixedPrecisionArith arithOp) = (arithRepr arithOp) ^ "Fixed"
         |   binaryRepr (WordArith arithOp) =  (arithRepr arithOp) ^ "Word"
         |   binaryRepr (WordLogical logOp) =  (logicRepr logOp) ^ "Word"
         |   binaryRepr (WordShift shiftOp) =  (shiftRepr shiftOp) ^ "Word"
         |   binaryRepr AllocateByteMemory = "AllocateByteMemory"
         |   binaryRepr (LargeWordComparison test) = "Test" ^ (testRepr test) ^ "LargeWord"
         |   binaryRepr (LargeWordArith arithOp) =  (arithRepr arithOp) ^ "LargeWord"
         |   binaryRepr (LargeWordLogical logOp) =  (logicRepr logOp) ^ "LargeWord"
         |   binaryRepr (LargeWordShift shiftOp) =  (shiftRepr shiftOp) ^ "LargeWord"
         |   binaryRepr (RealComparison (test, prec)) = "Test" ^ testRepr test ^ precRepr prec
         |   binaryRepr (RealArith (arithOp, prec)) = arithRepr arithOp ^ precRepr prec
         |   binaryRepr PointerEq = "PointerEq"
         |   binaryRepr FreeCStack = "FreeCStack"
         
         and nullaryRepr GetCurrentThreadId = "GetCurrentThreadId"
-        |   nullaryRepr CheckRTSException = "CheckRTSException"
+        |   nullaryRepr CPUPause = "CPUPause"
         |   nullaryRepr CreateMutex = "CreateMutex"
         
         and testRepr TestEqual          = "Equal"
         |   testRepr TestLess           = "Less"
         |   testRepr TestLessEqual      = "LessEqual"
         |   testRepr TestGreater        = "Greater"
         |   testRepr TestGreaterEqual   = "GreaterEqual"
         |   testRepr TestUnordered      = "Unordered"
         
         and arithRepr ArithAdd          = "Add"
         |   arithRepr ArithSub          = "Sub"
         |   arithRepr ArithMult         = "Mult"
         |   arithRepr ArithQuot         = "Quot"
         |   arithRepr ArithRem          = "Rem"
         |   arithRepr ArithDiv          = "Div"
         |   arithRepr ArithMod          = "Mod"
 
         and logicRepr LogicalAnd        = "And"
         |   logicRepr LogicalOr         = "Or"
         |   logicRepr LogicalXor        = "Xor"
         
         and shiftRepr ShiftLeft         = "Left"
         |   shiftRepr ShiftRightLogical = "RightLogical"
         |   shiftRepr ShiftRightArithmetic = "RightArithmetic"
         
         and precRepr PrecSingle         = "Single"
         |   precRepr PrecDouble         = "Double"
 
         and rndModeRepr IEEEReal.TO_NEAREST = "Round"
         |   rndModeRepr IEEEReal.TO_NEGINF = "Down"
         |   rndModeRepr IEEEReal.TO_POSINF = "Up"
         |   rndModeRepr IEEEReal.TO_ZERO = "Trunc"
 
     end
 
     datatype argumentType =
         GeneralType
     |   DoubleFloatType
     |   SingleFloatType
     |   ContainerType of int
 
     datatype backendIC =
         BICNewenv of bicCodeBinding list * backendIC (* Set of bindings with an expression. *)
 
     |   BICConstnt of machineWord * Universal.universal list (* Load a constant *)
 
     |   BICExtract of bicLoadForm (* Get a local variable, an argument or a closure value *)
 
     |   BICField of {base: backendIC, offset: int }
          (* Load a field from a tuple or record *)
     
     |   BICEval of (* Evaluate a function with an argument list. *)
         {
             function:  backendIC,
             argList:   (backendIC * argumentType) list,
             resultType: argumentType
         }
 
         (* Built-in functions. *)
     |   BICNullary of {oper: BuiltIns.nullaryOps}
     |   BICUnary of {oper: BuiltIns.unaryOps, arg1: backendIC}
     |   BICBinary of {oper: BuiltIns.binaryOps, arg1: backendIC, arg2: backendIC}
     
     |   BICArbitrary of
             {oper: BuiltIns.arithmeticOperations, shortCond: backendIC, arg1: backendIC, arg2: backendIC, longCall: backendIC}
 
     |   BICLambda of bicLambdaForm (* Lambda expressions. *)
 
     |   BICCond of backendIC * backendIC * backendIC (* If-then-else expression *)
 
     |   BICCase of (* Case expressions *)
         {
             cases   : backendIC option list, (* NONE means "jump to the default". *)
             test    : backendIC,
             default : backendIC,
             isExhaustive: bool,
             firstIndex: word
         }
     
     |   BICBeginLoop of (* Start of tail-recursive inline function. *)
         { loop: backendIC, arguments: (bicSimpleBinding * argumentType) list }
 
     |   BICLoop of (backendIC * argumentType) list (* Jump back to start of tail-recursive function. *)
 
     |   BICRaise of backendIC (* Raise an exception *)
 
     |   BICHandle of (* Exception handler. *) { exp: backendIC, handler: backendIC, exPacketAddr: int }
 
     |   BICTuple of backendIC list (* Tuple *)
 
     |   BICSetContainer of (* Copy a tuple to a container. *)
         {
             container: backendIC,
             tuple:     backendIC,
             filter:    BoolVector.vector
         }
     
     |   BICLoadContainer of {base: backendIC, offset: int } 
 
     |   BICTagTest of { test: backendIC, tag: word, maxTag: word }
     
     |   BICLoadOperation of { kind: loadStoreKind, address: bicAddress }
     
     |   BICStoreOperation of { kind: loadStoreKind, address: bicAddress, value: backendIC }
     
     |   BICBlockOperation of
             { kind: blockOpKind, sourceLeft: bicAddress, destRight: bicAddress, length: backendIC }
 
     |   BICAllocateWordMemory of {numWords: backendIC, flags: backendIC, initial: backendIC}
 
     and bicCodeBinding =
         BICDeclar  of bicSimpleBinding (* Make a local declaration or push an argument *)
     |   BICRecDecs of { addr: int, lambda: bicLambdaForm } list (* Set of mutually recursive declarations. *)
     |   BICNullBinding of backendIC (* Just evaluate the expression and discard the result. *)
     |   BICDecContainer of { addr: int, size: int } (* Create a container for a tuple on the stack. *)
 
     and caseType =
         CaseWord        (* Word or fixed-precision integer. *)
     |   CaseTag of word
 
     and bicLoadForm =
         BICLoadLocal of int (* Local binding *)
     |   BICLoadArgument of int (* Argument - 0 is first arg etc.*)
     |   BICLoadClosure of int (* Closure - 0 is first closure item etc *)
     |   BICLoadRecursive (* Recursive call *)
 
     and loadStoreKind =
         LoadStoreMLWord of {isImmutable: bool} (* Load/Store an ML word in an ML word cell. *)
     |   LoadStoreMLByte of {isImmutable: bool} (* Load/Store a byte, tagging and untagging as appropriate, in an ML byte cell. *)
     |   LoadStoreC8         (* Load/Store C values - The base address is a boxed SysWord.word value. *)
     |   LoadStoreC16
     |   LoadStoreC32
     |   LoadStoreC64
     |   LoadStoreCFloat
     |   LoadStoreCDouble
     |   LoadStoreUntaggedUnsigned
 
     and blockOpKind =
         BlockOpMove of {isByteMove: bool}
     |   BlockOpEqualByte
     |   BlockOpCompareByte
 
     withtype bicSimpleBinding = 
     { (* Declare a value or push an argument. *)
         value:      backendIC,
         addr:       int
     }
 
     and bicLambdaForm =
     { (* Lambda expressions. *)
         body          : backendIC,
         name          : string,
         closure       : bicLoadForm list,
         argTypes      : argumentType list,
         resultType    : argumentType,
         localCount    : int
     }
 
     and bicAddress =
         (* Address form used in loads, store and block operations.  The base is an ML
            address if this is to/from ML memory or a (boxed) SysWord.word if it is
            to/from C memory.  The index is a value in units of the size of the item
            being loaded/stored and the offset is always in bytes. *)
         {base: backendIC, index: backendIC option, offset: int}
 
     structure CodeTags =
     struct
         open Universal
         val tupleTag: universal list list tag = tag()
 
         fun splitProps _ [] = (NONE, [])
         |   splitProps tag (hd::tl) =
                 if Universal.tagIs tag hd
                 then (SOME hd, tl)
                 else let val (p, l) = splitProps tag tl in (p, hd :: l) end
 
         fun mergeTupleProps(p, []) = p
         |   mergeTupleProps([], p) = p
         |   mergeTupleProps(m, n) =
             (
                 case (splitProps tupleTag m, splitProps tupleTag n) of
                     ((SOME mp, ml), (SOME np, nl)) =>
                     let
                         val mpl = Universal.tagProject tupleTag mp
                         and npl = Universal.tagProject tupleTag np
                         val merge = ListPair.mapEq mergeTupleProps (mpl, npl)
                     in
                         Universal.tagInject tupleTag merge :: (ml @ nl)
                     end
                 |   _ => m @ n
             )
     end
     
     fun loadStoreKindRepr(LoadStoreMLWord {isImmutable=true}) = "MLWordImmutable"
     |   loadStoreKindRepr(LoadStoreMLWord {isImmutable=false}) = "MLWord"
     |   loadStoreKindRepr(LoadStoreMLByte {isImmutable=true}) = "MLByteImmutable"
     |   loadStoreKindRepr(LoadStoreMLByte {isImmutable=false}) = "MLByte"
     |   loadStoreKindRepr LoadStoreC8 = "C8Bit"
     |   loadStoreKindRepr LoadStoreC16 = "C16Bit"
     |   loadStoreKindRepr LoadStoreC32 = "C32Bit"
     |   loadStoreKindRepr LoadStoreC64 = "C64Bit"
     |   loadStoreKindRepr LoadStoreCFloat = "CFloat"
     |   loadStoreKindRepr LoadStoreCDouble = "CDouble"
     |   loadStoreKindRepr LoadStoreUntaggedUnsigned = "MLWordUntagged"
 
     fun blockOpKindRepr (BlockOpMove{isByteMove=false}) = "MoveWord"
     |   blockOpKindRepr (BlockOpMove{isByteMove=true}) = "MoveByte"
     |   blockOpKindRepr BlockOpEqualByte = "EqualByte"
     |   blockOpKindRepr BlockOpCompareByte = "CompareByte"
 
     open Pretty
 
     fun pList ([]: 'b list, _: string, _: 'b->pretty) = []
     |   pList ([h],    _, disp) = [disp h]
     |   pList (h::t, sep, disp) =
         PrettyBlock (0, false, [],
             [
                 disp h,
                 PrettyBreak (0, 0),
                 PrettyString sep
             ]
         ) ::
         PrettyBreak (1, 0) ::
         pList (t, sep, disp)
 
     fun pretty (pt : backendIC) : pretty =
     let
         
         fun printList(start, lst, sep) : pretty =
             PrettyBlock (1, true, [],
                 PrettyString (start ^ "(") ::
                 pList(lst, sep, pretty) @
                 [ PrettyBreak (0, 0), PrettyString (")") ]
             )
 
         fun prettyArgType GeneralType = PrettyString "G"
         |   prettyArgType DoubleFloatType = PrettyString "D"
         |   prettyArgType SingleFloatType = PrettyString "F"
         |   prettyArgType (ContainerType m) = PrettyString ("M"^Int.toString m)
         
         fun prettyArg (c, t) =
                 PrettyBlock(1, false, [], [pretty c, PrettyBreak (1, 0), prettyArgType t])
 
         fun prettyArgs(start, lst, sep) : pretty =
             PrettyBlock (1, true, [],
                 PrettyString (start ^ "(") ::
                 pList(lst, sep, prettyArg) @
                 [ PrettyBreak (0, 0), PrettyString (")") ]
             )
 
         fun prettyAddress({base, index, offset}: bicAddress): pretty =
         let
         in
             PrettyBlock (1, true, [],
                 [
                     PrettyString "[", PrettyBreak (0, 3),
                     pretty base,
                     PrettyBreak (0, 0), PrettyString ",", PrettyBreak (1, 0), 
                     case index of NONE => PrettyString "-" | SOME i => pretty i,
                     PrettyBreak (0, 0), PrettyString ",", PrettyBreak (1, 0),
                     PrettyString(Int.toString offset), PrettyBreak (0, 0), PrettyString "]"
                 ])
         end
 
     in
         case pt of
             BICEval {function, argList, resultType} =>
             let
                 val prettyArgs =
                     PrettyBlock (1, true, [],
                         PrettyString ("$(") ::
                         pList(argList, ",", prettyArg) @
                         [ PrettyBreak (0, 0), PrettyString (")") ]
                     )
             in
                 PrettyBlock (3, false, [],
                     [ pretty function, PrettyBreak(1, 0), prettyArgType resultType, PrettyBreak(1, 0), prettyArgs ]
                 )
             end
 
         |   BICUnary { oper, arg1 } =>
                 PrettyBlock (3, false, [],
                     [ PrettyString(BuiltIns.unaryRepr oper), PrettyBreak(1, 0), printList("", [arg1], ",") ]
                 )
 
         |   BICBinary { oper, arg1, arg2 } =>
                 PrettyBlock (3, false, [],
                     [ PrettyString(BuiltIns.binaryRepr oper), PrettyBreak(1, 0), printList("", [arg1, arg2], ",") ]
                 )
 
         |   BICNullary { oper } => PrettyString(BuiltIns.nullaryRepr oper)
 
         |   BICArbitrary { oper, shortCond, arg1, arg2, longCall } =>
                 PrettyBlock (3, false, [],
                     [ PrettyString(BuiltIns.arithRepr oper), PrettyBreak(1, 0),
                         printList("", [shortCond, arg1, arg2, longCall], ",") ]
                 )
 
         |   BICAllocateWordMemory { numWords, flags, initial } =>
                 PrettyBlock (3, false, [],
                     [ PrettyString "AllocateWordMemory", PrettyBreak(1, 0), printList("", [numWords, flags, initial], ",") ]
                 )
 
         |   BICExtract (BICLoadLocal addr) =>
             let
                 val str : string =
                     concat ["LOCAL(", Int.toString addr, ")"]
             in
                 PrettyString str
             end
          
         |   BICExtract (BICLoadArgument addr) =>
             let
                 val str : string =
                     concat ["PARAM(", Int.toString addr, ")"]
             in
                 PrettyString str
             end
 
         |   BICExtract (BICLoadClosure addr) =>
             let
                 val str : string =
                     concat ["CLOS(", Int.toString addr, ")"]
             in
                 PrettyString str
             end
 
         |   BICExtract (BICLoadRecursive) =>
             let
                 val str : string =
                     concat ["RECURSIVE(", ")"]
             in
                 PrettyString str
             end
 
         |   BICField {base, offset} =>
             let
                 val str = "INDIRECT(" ^ Int.toString offset ^ ", ";
             in
                 PrettyBlock(0, false, [],
                     [ PrettyString str, pretty base, PrettyString ")" ]
                 )
             end
         
         |   BICLambda {body, name, closure, argTypes, resultType, localCount} =>
             let
                 fun prettyArgTypes [] = []
                 |   prettyArgTypes [last] = [prettyArgType last]
                 |   prettyArgTypes (hd::tl) = prettyArgType hd :: PrettyBreak(1, 0) :: prettyArgTypes tl
             in
                 PrettyBlock (1, true, [],
                     [
                         PrettyString ("LAMBDA("),
                         PrettyBreak (1, 0),
                         PrettyString name,
                         PrettyBreak (1, 0),
                         PrettyString (" LOCALS=" ^ Int.toString localCount),
                         PrettyBreak(1, 0),
                         PrettyBlock (1, false, [], PrettyString "ARGS=" :: prettyArgTypes argTypes),
                         PrettyBreak(1, 0),
                         PrettyBlock (1, false, [], [PrettyString "RES=", prettyArgType resultType]),
                         printList (" CLOS=", map BICExtract closure, ","),
                         PrettyBreak (1, 0),
                         pretty body,
                         PrettyString "){LAMBDA}"
                     ]
                 )
             end
         
         |   BICConstnt (w, _) => PrettyString (stringOfWord w)
         
         |   BICCond (f, s, t) =>
             PrettyBlock (1, true, [],
                 [
                     PrettyString "IF(",
                     pretty f,
                     PrettyString ", ",
                     PrettyBreak (0, 0),
                     pretty s,
                     PrettyString ", ",
                     PrettyBreak (0, 0),
                     pretty t,
                     PrettyBreak (0, 0),
                     PrettyString (")")
                 ]
             )
 
         |   BICNewenv(decs, final) =>
             PrettyBlock (1, true, [],
                 PrettyString ("BLOCK" ^ "(") ::
                 pList(decs, ";", prettyBinding) @
                 [ PrettyBreak (1, 0), pretty final, PrettyBreak (0, 0), PrettyString (")") ]
             )
 
         |   BICBeginLoop{loop=loopExp, arguments=args } =>
             let
                 fun prettyArg (c, t) =
                     PrettyBlock(1, false, [],
                         [prettySimpleBinding c, PrettyBreak (1, 0), prettyArgType t])
             in
                 PrettyBlock (3, false, [],
                     [
                         PrettyBlock (1, true, [],
                             PrettyString ("BEGINLOOP(") ::
                             pList(args, ",", prettyArg) @
                             [ PrettyBreak (0, 0), PrettyString (")") ]
                         ),
                         PrettyBreak (0, 0),
                         PrettyString "(",
                         PrettyBreak (0, 0),
                         pretty loopExp,
                         PrettyBreak (0, 0),
                         PrettyString ")"
                     ]
                 )
             end
         
         |   BICLoop ptl => prettyArgs("LOOP", ptl, ",")
         
         |   BICRaise c =>
             PrettyBlock (1, true, [],
                 [
                     PrettyString "RAISE(",
                     pretty c,
                     PrettyBreak (0, 0),
                     PrettyString (")")
                 ]
             )
         
         |   BICHandle {exp, handler, exPacketAddr} =>
             PrettyBlock (3, false, [],
                 [
                     PrettyString "HANDLE(",
                     pretty exp,
                     PrettyString ("WITH exid=" ^ Int.toString exPacketAddr),
                     PrettyBreak (1, 0),
                     pretty handler,
                     PrettyString ")"
                 ]
             )
 
         |   BICCase {cases, test, default, isExhaustive, firstIndex, ...} =>
             PrettyBlock (1, true, [],
                 PrettyString "CASE (" ::
                 pretty test ::
                 PrettyBreak (1, 0) ::
                 PrettyString ("( from " ^ Word.toString firstIndex ^ (if isExhaustive then " exhaustive" else "")) ::
                 PrettyBreak (1, 0) ::
                 pList(cases, ",",
                     fn (SOME exp) =>
                         PrettyBlock (1, true, [],
                             [
                                 PrettyString "=>",
                                 PrettyBreak (1, 0),
                                 pretty exp
                             ])
                     |   NONE => PrettyString "=> default"
                     ) @
                 [
                     PrettyBreak (1, 0),
                     PrettyBlock (1, false, [],
                         [
                             PrettyString "ELSE:",
                             PrettyBreak (1, 0),
                             pretty default
                         ]
                     ),
                     PrettyBreak (1, 0), 
                     PrettyString (") {"^"CASE"^"}")
                 ]
             )
          
         |   BICTuple ptl => printList("RECCONSTR", ptl, ",")
         
         |   BICSetContainer{container, tuple, filter} =>
             let
                 val source = BoolVector.length filter
                 val dest = BoolVector.foldl(fn (true, n) => n+1 | (false, n) => n) 0 filter
             in
                 PrettyBlock (3, false, [],
                     [
                         PrettyString (concat["SETCONTAINER(", Int.toString dest, "/", Int.toString source, ", "]),
                         pretty container,
                         PrettyBreak (0, 0),
                         PrettyString ",",
                         PrettyBreak (1, 0),
                         pretty tuple,
                         PrettyBreak (0, 0),
                         PrettyString ")"
                     ]
                 )
             end
 
         |   BICLoadContainer {base, offset} =>
             let
                 val str = "INDIRECTCONTAINER(" ^ Int.toString offset ^ ", ";
             in
                 PrettyBlock(0, false, [],
                     [ PrettyString str, pretty base, PrettyString ")" ]
                 )
             end
 
         |   BICTagTest { test, tag, maxTag } =>
             PrettyBlock (3, false, [],
                 [
                     PrettyString (concat["TAGTEST(", Word.toString tag, ", ", Word.toString maxTag, ","]),
                     PrettyBreak (1, 0),
                     pretty test,
                     PrettyBreak (0, 0),
                     PrettyString ")"
                 ]
             )
 
         |   BICLoadOperation{ kind, address } =>
             PrettyBlock (3, false, [],
                 [
                     PrettyString("Load" ^ loadStoreKindRepr kind),
                     PrettyBreak (1, 0),
                     prettyAddress address
                 ]
             )
 
         |   BICStoreOperation{ kind, address, value } =>
             PrettyBlock (3, false, [],
                 [
                     PrettyString("Store" ^ loadStoreKindRepr kind),
                     PrettyBreak (1, 0),
                     prettyAddress address,
                     PrettyBreak (1, 0),
                     PrettyString "<=",
                     PrettyBreak (1, 0),
                     pretty value
                 ]
             )
 
         |   BICBlockOperation{ kind, sourceLeft, destRight, length } =>
             PrettyBlock (3, false, [],
                 [
                     PrettyString(blockOpKindRepr kind ^ "("),
                     PrettyBreak (1, 0),
                     prettyAddress sourceLeft,
                     PrettyBreak (1, 0), PrettyString ",",
                     prettyAddress destRight,
                     PrettyBreak (1, 0), PrettyString ",",
                     pretty length,
                     PrettyBreak (1, 0), PrettyString ")"
                 ]
             )
 
         (* That list should be exhaustive! *)
     end (* pretty *)
 
     and prettyBinding(BICDeclar dec) = prettySimpleBinding dec
        
     |   prettyBinding(BICRecDecs ptl) =
         let
             fun prettyRDec {lambda, addr} =
             PrettyBlock (1, false, [],
                 [
                     PrettyString (concat ["DECL #", Int.toString addr, "="]),
                     PrettyBreak (1, 0),
                     pretty(BICLambda lambda)
                 ]
             )
         in
             PrettyBlock (1, true, [],
                 PrettyString ("MUTUAL" ^ "(") ::
                 pList(ptl, " AND ", prettyRDec) @
                 [ PrettyBreak (0, 0), PrettyString (")") ]
             )
         end
 
     |   prettyBinding(BICNullBinding c) = pretty c
         
     |   prettyBinding(BICDecContainer{addr, size}) =
             PrettyString (concat ["CONTAINER #", Int.toString addr, "=", Int.toString size])
 
     and prettySimpleBinding{value, addr} =
         PrettyBlock (1, false, [],
             [
                 PrettyString (concat ["DECL #", Int.toString addr, "="]),
                 PrettyBreak (1, 0),
                 pretty value
             ]
         )
 
     structure Sharing =
     struct
         type backendIC = backendIC
         and  bicLoadForm = bicLoadForm
         and  caseType = caseType
         and  pretty = pretty
         and  argumentType = argumentType
         and  bicCodeBinding = bicCodeBinding
         and  bicSimpleBinding = bicSimpleBinding
         and  loadStoreKind = loadStoreKind
         and  blockOpKind = blockOpKind
         and  unaryOps = BuiltIns.unaryOps
         and  binaryOps = BuiltIns.binaryOps
         and  nullaryOps = BuiltIns.nullaryOps
         and  testConditions = BuiltIns.testConditions
         and  arithmeticOperations = BuiltIns.arithmeticOperations
     end
 
 end;
diff --git a/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML b/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML
index de758fb6..a0adb9fe 100644
--- a/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML
+++ b/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML
@@ -1,1237 +1,1237 @@
 (*
     Copyright (c) 2000
         Cambridge University Technical Services Limited
         
-    Further development copyright David C.J. Matthews 2016-18,2020-21
+    Further development copyright David C.J. Matthews 2016-18,2020-22
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (*
     Title:      Generate interpretable code for Poly system from the code tree.
     Author:     Dave Matthews, Cambridge University Computer Laboratory
     Copyright   Cambridge University 1985
 *)
 
 (* This generates byte-code that is interpreted by the run-time system.  It
    is now used as a fall-back to allow Poly/ML to run on non-X86 architectures.
    Early versions were used as a porting aid while a native code-generator
    was being developed and the "enter-int" instructions that were needed
    for that have been retained although they no longer actually generate code. *)
 functor IntGCode (
     structure CodeCons : INTCODECONS
     structure BackendTree: BACKENDINTERMEDIATECODE
     structure CodeArray: CODEARRAY
     
     sharing CodeCons.Sharing = BackendTree.Sharing = CodeArray.Sharing
 
 ) : GENCODE =
 
 struct
 
     open CodeCons
     open Address
     open BackendTree
     open Misc
     open CodeArray
 
     val word0 = toMachineWord 0;
   
     val DummyValue : machineWord = word0; (* used as result of "raise e" etc. *)
 
     type caseForm =
         {
             cases   : (backendIC * word) list,
             test    : backendIC,
             caseType: caseType,
             default : backendIC
         }
    
     (* Where the result, if any, should go *)
     datatype whereto =
         NoResult     (* discard result *)
     |   ToStack     (* Need a result but it can stay on the pseudo-stack *);
   
     (* Are we at the end of the function. *)
     datatype tail =
         EndOfProc
     |   NotEnd
 
     (* Code generate a function or global declaration *)
     fun codegen (pt, cvec, resultClosure, numOfArgs, localCount, parameters) =
     let
         datatype decEntry =
             StackAddr of int
         |   Empty
     
         val decVec = Array.array (localCount, Empty)
     
         (* Count of number of items on the stack. *)
         val realstackptr = ref 1 (* The closure ptr is already there *)
         
         (* Maximum size of the stack. *)
         val maxStack = ref 1
 
         (* Push a value onto the stack. *)
         fun incsp () =
         (
             realstackptr := !realstackptr + 1;
             if !realstackptr > !maxStack
             then maxStack := !realstackptr
             else ()
         )
 
         (* An entry has been removed from the stack. *)
         fun decsp () = realstackptr := !realstackptr - 1;
  
         fun pushLocalStackValue addr = ( genLocal(!realstackptr + addr, cvec); incsp() )
 
         (* Loads a local, argument or closure value; translating local
            stack addresses to real stack offsets. *)
         fun locaddr(BICLoadArgument locn) = pushLocalStackValue (numOfArgs-locn)
 
         |   locaddr(BICLoadLocal locn) =
             (
                 (* positive address - on the stack. *)
                     case Array.sub (decVec, locn) of
                         StackAddr n => pushLocalStackValue (~ n)
                     |   _ => (* Should be on the stack, not a function. *)
                         raise InternalError "locaddr: bad stack address"
             )
 
         |   locaddr(BICLoadClosure locn) = (* closure-pointer relative *)
             (
                 genIndirectClosure{addr = !realstackptr-1, item=locn, code=cvec};
                 incsp()
             )
 
         |   locaddr BICLoadRecursive =
                 pushLocalStackValue ~1 (* The closure itself - first value on the stack. *)
 
    (* generates code from the tree *)
    fun gencde (pt : backendIC, whereto : whereto, tailKind : tail, loopAddr) : unit =
    let
      (* Save the stack pointer value here. We may want to reset the stack. *)
      val oldsp = !realstackptr;
 
         (* Operations on ML memory always have the base as an ML address.
            Word operations are always word aligned.  The higher level will
            have extracted any constant offset and scaled it if necessary.
            That's helpful for the X86 but not for the interpreter.  We
            have to turn them back into indexes. *)
         fun genMLAddress({base, index, offset}, scale) =
         (
             gencde (base, ToStack, NotEnd, loopAddr);
             offset mod scale = 0 orelse raise InternalError "genMLAddress";
             case (index, offset div scale) of
                 (NONE, soffset) => (pushConst (toMachineWord soffset, cvec); incsp())
             |   (SOME indexVal, 0) => gencde (indexVal, ToStack, NotEnd, loopAddr)
             |   (SOME indexVal, soffset) =>
                 (
                     gencde (indexVal, ToStack, NotEnd, loopAddr);
                     pushConst (toMachineWord soffset, cvec);
                     genOpcode(opcode_wordAdd, cvec)
                 )
        )
        
        (* Load the address, index value and offset for non-byte operations.
           Because the offset has already been scaled by the size of the operand
           we have to load the index and offset separately. *)
        fun genCAddress{base, index, offset} =
         (
             gencde (base, ToStack, NotEnd, loopAddr);
             case index of
                 NONE => (pushConst (toMachineWord 0, cvec); incsp())
             |   SOME indexVal => gencde (indexVal, ToStack, NotEnd, loopAddr);
             pushConst (toMachineWord offset, cvec); incsp()
         )
 
      val () =
        case pt of
             BICEval evl => genEval (evl, tailKind)
 
         |   BICExtract ext =>
             (* This may just be being used to discard a value which isn't
               used on this branch. *)
                 if whereto = NoResult then () else locaddr ext
 
         |   BICField {base, offset} =>
                 (gencde (base, ToStack, NotEnd, loopAddr); genIndirect (offset, cvec))
 
         |   BICLoadContainer {base, offset} =>
                 (gencde (base, ToStack, NotEnd, loopAddr); genIndirectContainer (offset, cvec))
        
         |   BICLambda lam => genProc (lam, false, fn () => ())
            
         |   BICConstnt(w, _) =>
             let
                 val () = pushConst (w, cvec);
             in
                 incsp ()
             end
   
         |   BICCond (testPart, thenPart, elsePart) =>
                 genCond (testPart, thenPart, elsePart, whereto, tailKind, loopAddr)
   
         |   BICNewenv(decls, exp) =>
             let         
                 (* Processes a list of entries. *)
             
                 (* Mutually recursive declarations. May be either lambdas or constants. Recurse down
                    the list pushing the addresses of the closure vectors, then unwind the 
                    recursion and fill them in. *)
                 fun genMutualDecs [] = ()
 
                 |   genMutualDecs ({lambda, addr, ...} :: otherDecs) =
                         genProc (lambda, true,
                             fn() =>
                             (
                                 Array.update (decVec, addr, StackAddr (! realstackptr));
                                 genMutualDecs (otherDecs)
                             ))
 
                 fun codeDecls(BICRecDecs dl) = genMutualDecs dl
 
                 |   codeDecls(BICDecContainer{size, addr}) =
                     (
                         (* If this is a container we have to process it here otherwise it
                            will be removed in the stack adjustment code. *)
                         genContainer(size, cvec); (* Push the address of this container. *)
                         realstackptr := !realstackptr + size + 1; (* Pushes N words plus the address. *)
                         Array.update (decVec, addr, StackAddr(!realstackptr))
                     )
 
                 |   codeDecls(BICDeclar{value, addr, ...}) =
                     (
                         gencde (value, ToStack, NotEnd, loopAddr);
                         Array.update (decVec, addr, StackAddr(!realstackptr))
                     )
                 |   codeDecls(BICNullBinding exp) = gencde (exp, NoResult, NotEnd, loopAddr)
             in
                 List.app codeDecls decls;
                 gencde (exp, whereto, tailKind, loopAddr)
             end
           
         |   BICBeginLoop {loop=body, arguments} =>
             (* Execute the body which will contain at least one Loop instruction.
                There will also be path(s) which don't contain Loops and these
                will drop through. *)
             let
                 val args = List.map #1 arguments
                 (* Evaluate each of the arguments, pushing the result onto the stack. *)
                 fun genLoopArg ({addr, value, ...}) =
                     (
                      gencde (value, ToStack, NotEnd, loopAddr);
                      Array.update (decVec, addr, StackAddr (!realstackptr));
                      !realstackptr (* Return the posn on the stack. *)
                     )
                 val argIndexList = map genLoopArg args;
 
                 val startSp = ! realstackptr; (* Remember the current top of stack. *)
                 val startLoop = createLabel ()
                 val () = setLabel(startLoop, cvec) (* Start of loop *)
             in
                 (* Process the body, passing the jump-back address down for the Loop instruction(s). *)
                 gencde (body, whereto, tailKind, SOME(startLoop, startSp, argIndexList))
                 (* Leave the arguments on the stack.  They can be cleared later if needed. *)
             end
 
         |   BICLoop argList => (* Jump back to the enclosing BeginLoop. *)
             let
                 val (startLoop, startSp, argIndexList) =
                     case loopAddr of
                         SOME l => l
                     |   NONE => raise InternalError "No BeginLoop for Loop instr"
                 (* Evaluate the arguments.  First push them to the stack because evaluating
                    an argument may depend on the current value of others.  Only when we've
                    evaluated all of them can we overwrite the original argument positions. *)
                 fun loadArgs ([], []) = !realstackptr - startSp (* The offset of all the args. *)
                   | loadArgs (arg:: argList, _ :: argIndexList) =
                     let
                         (* Evaluate all the arguments. *)
                         val () = gencde (arg, ToStack, NotEnd, NONE);
                         val argOffset = loadArgs(argList, argIndexList);
                     in
                         genSetStackVal(argOffset, cvec); (* Copy the arg over. *)
                         decsp(); (* The argument has now been popped. *)
                         argOffset
                     end
                   | loadArgs _ = raise InternalError "loadArgs: Mismatched arguments";
 
                 val _: int = loadArgs(List.map #1 argList, argIndexList)
             in
                 if !realstackptr <> startSp
                 then resetStack (!realstackptr - startSp, false, cvec) (* Remove any local variables. *)
                 else ();
             
                 (* Jump back to the start of the loop. *)
                 putBranchInstruction(JumpBack, startLoop, cvec)
             end
   
         |   BICRaise exp =>
             (
                 gencde (exp, ToStack, NotEnd, loopAddr);
                 genRaiseEx cvec
             )
   
         |   BICHandle {exp, handler, exPacketAddr} =>
             let
                 (* Save old handler *)
                 val () = genPushHandler cvec
                 val () = incsp ()
                 val handlerLabel = createLabel()
                 val () = putBranchInstruction (SetHandler, handlerLabel, cvec)
                 val () = incsp()
                 (* Code generate the body; "NotEnd" because we have to come back
                  to remove the handler; "ToStack" because delHandler needs
                  a result to carry down. *)
                 val () = gencde (exp, ToStack, NotEnd, loopAddr)
       
                 (* Now get out of the handler and restore the old one. *)
                 val () = genOpcode(opcode_deleteHandler, cvec)
                 val skipHandler = createLabel()
                 val () = putBranchInstruction (Jump, skipHandler, cvec)
           
                 (* Now process the handler itself. First we have to reset the stack.
                    Note that we have to use "ToStack" again to be consistent with
                    the stack-handling in the body-part. If we actually wanted "NoResult",
                    the stack adjustment code at the end of gencde will take care
                    of this. This means that I don't want to do any clever "end-of-function"
                    optimisation either. SPF 6/1/97
                 *)
                 val () = realstackptr := oldsp
                 val () = setLabel (handlerLabel, cvec)
                 (* If we were executing machine code we must re-enter the interpreter. *)
                 val () = genEnterIntCatch cvec
                 (* Push the exception packet and set the address. *)
                 val () = genLdexc cvec
                 val () = incsp ()
                 val () = Array.update (decVec, exPacketAddr, StackAddr(!realstackptr))
                 val () = gencde (handler, ToStack, NotEnd, loopAddr)
                 (* Have to remove the exception packet. *)
                 val () = resetStack(1, true, cvec)
                 val () = decsp()
           
                 (* Finally fix-up the jump around the handler *)
                 val () = setLabel (skipHandler, cvec)
             in
                 ()
             end
   
         |   BICCase ({cases, test, default, firstIndex, ...}) =>
             let
                 val () = gencde (test, ToStack, NotEnd, loopAddr)
                 (* Label to jump to at the end of each case. *)
                 val exitJump = createLabel()
 
                 val () =
                     if firstIndex = 0w0 then ()
                     else
                     (   (* Subtract lower limit.  Don't check for overflow.  Instead
                            allow large value to wrap around and check in "case" instruction. *)
                         pushConst(toMachineWord firstIndex, cvec);
                         genOpcode(opcode_wordSub, cvec)
                     )
 
                 (* Generate the case instruction followed by the table of jumps.  *)
                 val nCases = List.length cases
                 val caseLabels = genCase (nCases, cvec)
                 val () = decsp ()
 
                 (* The default case, if any, follows the case statement. *)
                 (* If we have a jump to the default set it to jump here. *)
                 local
                     fun fixDefault(NONE, defCase) = setLabel(defCase, cvec)
                     |   fixDefault(SOME _, _) = ()
                 in
                     val () = ListPair.appEq fixDefault (cases, caseLabels)
                 end
                 val () = gencde (default, whereto, tailKind, loopAddr);
 
                 fun genCases(SOME body, label) =
                     (
                         (* First exit from the previous case or the default if
                            this is the first. *)
                         putBranchInstruction(Jump, exitJump, cvec);
                         (* Remove the result - the last case will leave it. *)
                         case whereto of ToStack => decsp () | NoResult => ();
                         (* Fix up the jump to come here. *)
                         setLabel(label, cvec);
                         gencde (body, whereto, tailKind, loopAddr)
                     )
                 |   genCases(NONE, _) = ()
                 
                 val () = ListPair.appEq genCases (cases, caseLabels)
      
                 (* Finally set the exit jump to come here. *)
                 val () = setLabel (exitJump, cvec)
             in
                 ()
             end
   
         |   BICTuple recList =>
             let
                 val size = List.length recList
             in
                 (* Move the fields into the vector. *)
                 List.app(fn v => gencde (v, ToStack, NotEnd, loopAddr)) recList;
                 genTuple (size, cvec);
                 realstackptr := !realstackptr - (size - 1)
             end
 
         |   BICSetContainer{container, tuple, filter} =>
             (* Copy the contents of a tuple into a container.  If the tuple is a
                Tuple instruction we can avoid generating the tuple and then
                unpacking it and simply copy the fields that make up the tuple
                directly into the container. *)
             (
                 case tuple of
                     BICTuple cl =>
                         (* Simply set the container from the values. *)
                     let
                         (* Load the address of the container. *)
                         val _ = gencde (container, ToStack, NotEnd, loopAddr);
                         fun setValues([], _, _) = ()
 
                         |   setValues(v::tl, sourceOffset, destOffset) =
                             if sourceOffset < BoolVector.length filter andalso BoolVector.sub(filter, sourceOffset)
                             then
                             (
                                 gencde (v, ToStack, NotEnd, loopAddr);
                                 (* Move the entry into the container. This instruction
                                    pops the value to be moved but not the destination. *)
                                 genMoveToContainer(destOffset, cvec);
                                 decsp();
                                 setValues(tl, sourceOffset+1, destOffset+1)
                             )
                             else setValues(tl, sourceOffset+1, destOffset)
                     in
                         setValues(cl, 0, 0)
                         (* The container address is still on the stack. *)
                     end
 
                 |   _ =>
                     let (* General case. *)
                         (* First the target tuple, then the container. *)
                         val () = gencde (tuple, ToStack, NotEnd, loopAddr)
                         val () = gencde (container, ToStack, NotEnd, loopAddr)
                         val last = BoolVector.foldli(fn (i, true, _) => i | (_, false, n) => n) ~1 filter
 
                         fun copy (sourceOffset, destOffset) =
                             if BoolVector.sub(filter, sourceOffset)
                             then
                             (
                                 (* Duplicate the tuple address . *)
                                 genLocal(1, cvec);
                                 genIndirect(sourceOffset, cvec);
                                 genMoveToContainer(destOffset, cvec);
                                 if sourceOffset = last
                                 then ()
                                 else copy (sourceOffset+1, destOffset+1)
                             )
                             else copy(sourceOffset+1, destOffset)
                     in
                         copy (0, 0)
                         (* The container and tuple addresses are still on the stack. *)
                     end
             )
 
         |   BICTagTest { test, tag, ... } =>
             (
                 gencde (test, ToStack, NotEnd, loopAddr);
                 genEqualWordConst(tag, cvec)
             )
 
         |   BICNullary {oper=BuiltIns.GetCurrentThreadId} =>
             (
                 genOpcode(opcode_getThreadId, cvec);
                 incsp()
             )
 
-        |   BICNullary {oper=BuiltIns.CheckRTSException} =>
-            ( (* Do nothing.  This is done in the RTS call. *)
+        |   BICNullary {oper=BuiltIns.CPUPause} =>
+            ( (* Do nothing.  It's really only a hint. *)
             )
 
         |   BICNullary {oper=BuiltIns.CreateMutex} =>
             (
                 genOpcode(opcode_createMutex, cvec);
                 incsp()
             )
 
         |   BICUnary { oper, arg1 } =>
             let
                 open BuiltIns
                 val () = gencde (arg1, ToStack, NotEnd, loopAddr)
             in
                 case oper of
                     NotBoolean => genOpcode(opcode_notBoolean, cvec)
                 |   IsTaggedValue => genIsTagged cvec
                 |   MemoryCellLength => genOpcode(opcode_cellLength, cvec)
                 |   MemoryCellFlags => genOpcode(opcode_cellFlags, cvec)
                 |   ClearMutableFlag => genOpcode(opcode_clearMutable, cvec)
                 |   LongWordToTagged => genOpcode(opcode_longWToTagged, cvec)
                 |   SignedToLongWord => genOpcode(opcode_signedToLongW, cvec)
                 |   UnsignedToLongWord => genOpcode(opcode_unsignedToLongW, cvec)
                 |   RealAbs PrecDouble => genOpcode(opcode_realAbs, cvec)
                 |   RealNeg PrecDouble => genOpcode(opcode_realNeg, cvec)
                 |   RealFixedInt PrecDouble => genOpcode(opcode_fixedIntToReal, cvec)
                 |   RealAbs PrecSingle => genOpcode(opcode_floatAbs, cvec)
                 |   RealNeg PrecSingle => genOpcode(opcode_floatNeg, cvec)
                 |   RealFixedInt PrecSingle => genOpcode(opcode_fixedIntToFloat, cvec)
                 |   FloatToDouble => genOpcode(opcode_floatToReal, cvec)
                 |   DoubleToFloat => genDoubleToFloat cvec
                 |   RealToInt (PrecDouble, rnding) => genRealToInt(rnding, cvec)
                 |   RealToInt (PrecSingle, rnding) => genFloatToInt(rnding, cvec)
                 |   TouchAddress => resetStack(1, false, cvec) (* Discard this *)
                 |   AllocCStack => genOpcode(opcode_allocCSpace, cvec)
                 |   LockMutex => genOpcode(opcode_lockMutex, cvec)
                 |   TryLockMutex => genOpcode(opcode_tryLockMutex, cvec)
                 |   UnlockMutex => genOpcode(opcode_atomicReset, cvec)
             end
 
         |   BICBinary { oper=BuiltIns.WordComparison{test=BuiltIns.TestEqual, ...}, arg1, arg2=BICConstnt(w, _) } =>
             let
                 val () = gencde (arg1, ToStack, NotEnd, loopAddr)
             in
                 genEqualWordConst(toShort w, cvec)
             end
 
         |   BICBinary { oper=BuiltIns.WordComparison{test=BuiltIns.TestEqual, ...}, arg1=BICConstnt(w, _), arg2 } =>
             let
                 val () = gencde (arg2, ToStack, NotEnd, loopAddr)
             in
                 genEqualWordConst(toShort w, cvec)
             end
 
         |   BICBinary { oper, arg1, arg2 } =>
             let
                 open BuiltIns
                 val () = gencde (arg1, ToStack, NotEnd, loopAddr)
                 val () = gencde (arg2, ToStack, NotEnd, loopAddr)
             in
                 case oper of
                     WordComparison{test=TestEqual, ...} => genOpcode(opcode_equalWord, cvec)
                 |   WordComparison{test=TestLess, isSigned=true} => genOpcode(opcode_lessSigned, cvec)
                 |   WordComparison{test=TestLessEqual, isSigned=true} => genOpcode(opcode_lessEqSigned, cvec)
                 |   WordComparison{test=TestGreater, isSigned=true} => genOpcode(opcode_greaterSigned, cvec)
                 |   WordComparison{test=TestGreaterEqual, isSigned=true} => genOpcode(opcode_greaterEqSigned, cvec)
                 |   WordComparison{test=TestLess, isSigned=false} => genOpcode(opcode_lessUnsigned, cvec)
                 |   WordComparison{test=TestLessEqual, isSigned=false} => genOpcode(opcode_lessEqUnsigned, cvec)
                 |   WordComparison{test=TestGreater, isSigned=false} => genOpcode(opcode_greaterUnsigned, cvec)
                 |   WordComparison{test=TestGreaterEqual, isSigned=false} => genOpcode(opcode_greaterEqUnsigned, cvec)
                 |   WordComparison{test=TestUnordered, ...} => raise InternalError "WordComparison: TestUnordered"
 
                 |   PointerEq => genOpcode(opcode_equalWord, cvec)
 
                 |   FixedPrecisionArith ArithAdd => genOpcode(opcode_fixedAdd, cvec)
                 |   FixedPrecisionArith ArithSub => genOpcode(opcode_fixedSub, cvec)
                 |   FixedPrecisionArith ArithMult => genOpcode(opcode_fixedMult, cvec)
                 |   FixedPrecisionArith ArithQuot => genOpcode(opcode_fixedQuot, cvec)
                 |   FixedPrecisionArith ArithRem => genOpcode(opcode_fixedRem, cvec)
                 |   FixedPrecisionArith ArithDiv => raise InternalError "TODO: FixedPrecisionArith ArithDiv"
                 |   FixedPrecisionArith ArithMod => raise InternalError "TODO: FixedPrecisionArith ArithMod"
 
                 |   WordArith ArithAdd => genOpcode(opcode_wordAdd, cvec)
                 |   WordArith ArithSub => genOpcode(opcode_wordSub, cvec)
                 |   WordArith ArithMult => genOpcode(opcode_wordMult, cvec)
                 |   WordArith ArithDiv => genOpcode(opcode_wordDiv, cvec)
                 |   WordArith ArithMod => genOpcode(opcode_wordMod, cvec)
                 |   WordArith _ => raise InternalError "WordArith - unimplemented instruction"
                 
                 |   WordLogical LogicalAnd => genOpcode(opcode_wordAnd, cvec)
                 |   WordLogical LogicalOr => genOpcode(opcode_wordOr, cvec)
                 |   WordLogical LogicalXor => genOpcode(opcode_wordXor, cvec)
 
                 |   WordShift ShiftLeft => genOpcode(opcode_wordShiftLeft, cvec)
                 |   WordShift ShiftRightLogical => genOpcode(opcode_wordShiftRLog, cvec)
                 |   WordShift ShiftRightArithmetic => genOpcode(opcode_wordShiftRArith, cvec)
                  
                 |   AllocateByteMemory => genOpcode(opcode_allocByteMem, cvec)
                 
                 |   LargeWordComparison TestEqual => genOpcode(opcode_lgWordEqual, cvec)
                 |   LargeWordComparison TestLess => genOpcode(opcode_lgWordLess, cvec)
                 |   LargeWordComparison TestLessEqual => genOpcode(opcode_lgWordLessEq, cvec)
                 |   LargeWordComparison TestGreater => genOpcode(opcode_lgWordGreater, cvec)
                 |   LargeWordComparison TestGreaterEqual => genOpcode(opcode_lgWordGreaterEq, cvec)
                 |   LargeWordComparison TestUnordered => raise InternalError "LargeWordComparison: TestUnordered"
                 
                 |   LargeWordArith ArithAdd => genOpcode(opcode_lgWordAdd, cvec)
                 |   LargeWordArith ArithSub => genOpcode(opcode_lgWordSub, cvec)
                 |   LargeWordArith ArithMult => genOpcode(opcode_lgWordMult, cvec)
                 |   LargeWordArith ArithDiv => genOpcode(opcode_lgWordDiv, cvec)
                 |   LargeWordArith ArithMod => genOpcode(opcode_lgWordMod, cvec)
                 |   LargeWordArith _ => raise InternalError "LargeWordArith - unimplemented instruction"
 
                 |   LargeWordLogical LogicalAnd => genOpcode(opcode_lgWordAnd, cvec)
                 |   LargeWordLogical LogicalOr => genOpcode(opcode_lgWordOr, cvec)
                 |   LargeWordLogical LogicalXor => genOpcode(opcode_lgWordXor, cvec)
                 |   LargeWordShift ShiftLeft => genOpcode(opcode_lgWordShiftLeft, cvec)
                 |   LargeWordShift ShiftRightLogical => genOpcode(opcode_lgWordShiftRLog, cvec)
                 |   LargeWordShift ShiftRightArithmetic => genOpcode(opcode_lgWordShiftRArith, cvec)
 
                 |   RealComparison (TestEqual, PrecDouble) => genOpcode(opcode_realEqual, cvec)
                 |   RealComparison (TestLess, PrecDouble) => genOpcode(opcode_realLess, cvec)
                 |   RealComparison (TestLessEqual, PrecDouble) => genOpcode(opcode_realLessEq, cvec)
                 |   RealComparison (TestGreater, PrecDouble) => genOpcode(opcode_realGreater, cvec)
                 |   RealComparison (TestGreaterEqual, PrecDouble) => genOpcode(opcode_realGreaterEq, cvec)
                 |   RealComparison (TestUnordered, PrecDouble) => genOpcode(opcode_realUnordered, cvec)
 
                 |   RealComparison (TestEqual, PrecSingle) => genOpcode(opcode_floatEqual, cvec)
                 |   RealComparison (TestLess, PrecSingle) => genOpcode(opcode_floatLess, cvec)
                 |   RealComparison (TestLessEqual, PrecSingle) => genOpcode(opcode_floatLessEq, cvec)
                 |   RealComparison (TestGreater, PrecSingle) => genOpcode(opcode_floatGreater, cvec)
                 |   RealComparison (TestGreaterEqual, PrecSingle) => genOpcode(opcode_floatGreaterEq, cvec)
                 |   RealComparison (TestUnordered, PrecSingle) => genOpcode(opcode_floatUnordered, cvec)
 
                 |   RealArith (ArithAdd, PrecDouble) => genOpcode(opcode_realAdd, cvec)
                 |   RealArith (ArithSub, PrecDouble) => genOpcode(opcode_realSub, cvec)
                 |   RealArith (ArithMult, PrecDouble) => genOpcode(opcode_realMult, cvec)
                 |   RealArith (ArithDiv, PrecDouble) => genOpcode(opcode_realDiv, cvec)
 
                 |   RealArith (ArithAdd, PrecSingle) => genOpcode(opcode_floatAdd, cvec)
                 |   RealArith (ArithSub, PrecSingle) => genOpcode(opcode_floatSub, cvec)
                 |   RealArith (ArithMult, PrecSingle) => genOpcode(opcode_floatMult, cvec)
                 |   RealArith (ArithDiv, PrecSingle) => genOpcode(opcode_floatDiv, cvec)
 
                 |   RealArith _ => raise InternalError "RealArith - unimplemented instruction"
                 
                 |   FreeCStack => genOpcode(opcode_freeCSpace, cvec)
 
                  ;
                 decsp() (* Removes one item from the stack. *)
             end
             
         |   BICAllocateWordMemory {numWords as BICConstnt(length, _), flags as BICConstnt(flagByte, _), initial } =>
             if isShort length andalso toShort length = 0w1 andalso isShort flagByte andalso toShort flagByte = 0wx40
             then (* This is a very common case. *)
             (
                 gencde (initial, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_alloc_ref, cvec)
             )
             else
             let
                 val () = gencde (numWords, ToStack, NotEnd, loopAddr)
                 val () = gencde (flags, ToStack, NotEnd, loopAddr)
                 val () = gencde (initial, ToStack, NotEnd, loopAddr)
             in
                 genOpcode(opcode_allocWordMemory, cvec);
                 decsp(); decsp()
             end
 
         |   BICAllocateWordMemory { numWords, flags, initial } =>
             let
                 val () = gencde (numWords, ToStack, NotEnd, loopAddr)
                 val () = gencde (flags, ToStack, NotEnd, loopAddr)
                 val () = gencde (initial, ToStack, NotEnd, loopAddr)
             in
                 genOpcode(opcode_allocWordMemory, cvec);
                 decsp(); decsp()
             end
 
         |   BICLoadOperation { kind=LoadStoreMLWord _, address={base, index=NONE, offset}} =>
             (
                 (* If the index is a constant, frequently zero, we can use indirection.
                    The offset is a byte count so has to be divided by the word size but
                    it should always be an exact multiple. *)
                 gencde (base, ToStack, NotEnd, loopAddr);
                 offset mod Word.toInt wordSize = 0 orelse raise InternalError "gencde: BICLoadOperation - not word multiple";
                 genIndirect (offset div Word.toInt wordSize, cvec)
             )
 
         |   BICLoadOperation { kind=LoadStoreMLWord _, address} =>
             (
                 genMLAddress(address, Word.toInt wordSize);
                 genOpcode(opcode_loadMLWord, cvec);
                 decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreMLByte _, address} =>
             (
                 genMLAddress(address, 1);
                 genOpcode(opcode_loadMLByte, cvec);
                 decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreC8, address} =>
             (
                 genCAddress address;
                 genOpcode(opcode_loadC8, cvec);
                 decsp(); decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreC16, address} =>
             (
                 genCAddress address;
                 genOpcode(opcode_loadC16, cvec);
                 decsp(); decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreC32, address} =>
             (
                 genCAddress address;
                 genOpcode(opcode_loadC32, cvec);
                 decsp(); decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreC64, address} =>
             (
                 genCAddress address;
                 genOpcode(opcode_loadC64, cvec);
                 decsp(); decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreCFloat, address} =>
             (
                 genCAddress address;
                 genOpcode(opcode_loadCFloat, cvec);
                 decsp(); decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreCDouble, address} =>
             (
                 genCAddress address;
                 genOpcode(opcode_loadCDouble, cvec);
                 decsp(); decsp()
             )
 
         |   BICLoadOperation { kind=LoadStoreUntaggedUnsigned, address} =>
             (
                 genMLAddress(address, Word.toInt wordSize);
                 genOpcode(opcode_loadUntagged, cvec);
                 decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreMLWord _, address, value } =>
             (
                 genMLAddress(address, Word.toInt wordSize);
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeMLWord, cvec);
                 decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreMLByte _, address, value } =>
             (
                 genMLAddress(address, 1);
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeMLByte, cvec);
                 decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreC8, address, value} =>
             (
                 genCAddress address;
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeC8, cvec);
                 decsp(); decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreC16, address, value} =>
             (
                 genCAddress address;
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeC16, cvec);
                 decsp(); decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreC32, address, value} =>
             (
                 genCAddress address;
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeC32, cvec);
                 decsp(); decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreC64, address, value} =>
             (
                 genCAddress address;
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeC64, cvec);
                 decsp(); decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreCFloat, address, value} =>
             (
                 genCAddress address;
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeCFloat, cvec);
                 decsp(); decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreCDouble, address, value} =>
             (
                 genCAddress address;
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeCDouble, cvec);
                 decsp(); decsp(); decsp()
             )
 
         |   BICStoreOperation { kind=LoadStoreUntaggedUnsigned, address, value} =>
             (
                 genMLAddress(address, Word.toInt wordSize);
                 gencde (value, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_storeUntagged, cvec);
                 decsp(); decsp()
             )
 
         |   BICBlockOperation { kind=BlockOpMove{isByteMove=true}, sourceLeft, destRight, length } =>
             (
                 genMLAddress(sourceLeft, 1);
                 genMLAddress(destRight, 1);
                 gencde (length, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_blockMoveByte, cvec);
                 decsp(); decsp(); decsp(); decsp()
             )
 
         |   BICBlockOperation { kind=BlockOpMove{isByteMove=false}, sourceLeft, destRight, length } =>
             (
                 genMLAddress(sourceLeft, Word.toInt wordSize);
                 genMLAddress(destRight, Word.toInt wordSize);
                 gencde (length, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_blockMoveWord, cvec);
                 decsp(); decsp(); decsp(); decsp()
             )
 
         |   BICBlockOperation { kind=BlockOpEqualByte, sourceLeft, destRight, length } =>
             (
                 genMLAddress(sourceLeft, 1);
                 genMLAddress(destRight, 1);
                 gencde (length, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_blockEqualByte, cvec);
                 decsp(); decsp(); decsp(); decsp()
             )
 
         |   BICBlockOperation { kind=BlockOpCompareByte, sourceLeft, destRight, length } =>
             (
                 genMLAddress(sourceLeft, 1);
                 genMLAddress(destRight, 1);
                 gencde (length, ToStack, NotEnd, loopAddr);
                 genOpcode(opcode_blockCompareByte, cvec);
                 decsp(); decsp(); decsp(); decsp()
             )
        
        |    BICArbitrary { oper, arg1, arg2, ... } =>
             let
                 open BuiltIns
                 val () = gencde (arg1, ToStack, NotEnd, loopAddr)
                 val () = gencde (arg2, ToStack, NotEnd, loopAddr)
             in
                 case oper of
                     ArithAdd  => genOpcode(opcode_arbAdd, cvec)
                 |   ArithSub  => genOpcode(opcode_arbSubtract, cvec)
                 |   ArithMult => genOpcode(opcode_arbMultiply, cvec)
                 |   _ => raise InternalError "Unknown arbitrary precision operation";
                 decsp() (* Removes one item from the stack. *)
             end
 
     in (* body of gencde *) 
 
       (* This ensures that there is precisely one item on the stack if
          whereto = ToStack and no items if whereto = NoResult. 
          There are two points to note carefully here:
            (1) Negative stack adjustments are legal if we have exited.
                This is because matchFailFn can cut the stack back too
                far for its immediately enclosing expression. This is
                harmless because the code actually exits that expression.
            (2) A stack adjustment of ~1 is legal if we're generating
                a declaration in "ToStack" mode, because not all declarations
                actually generate the dummy value that we expect. This
                used to be handled in resetStack itself, but it's more
                transparent to do it here. (In addition, there was a bug in
                resetStack - it accumulated the stack resets, but didn't
                correctly accumulate these "~1" dummy value pushes.)
                It's all much better now.
                SPF 9/1/97
      *)
         case whereto of
             ToStack =>
             let
                 val newsp = oldsp + 1;
                 val adjustment = !realstackptr - newsp
 
                 val () =
                     if adjustment = 0
                     then ()
                     else if adjustment < ~1
                     then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment)
                     (* Hack for declarations that should push values, but don't *)
                     else if adjustment = ~1
                     then pushConst (DummyValue, cvec)
                     else resetStack (adjustment, true, cvec)
             in
                 realstackptr := newsp
             end
           
         |   NoResult =>
             let
                 val adjustment = !realstackptr - oldsp
 
                 val () =
                     if adjustment = 0
                     then ()
                     else if adjustment < 0
                     then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment)
                     else resetStack (adjustment, false, cvec)
             in
                 realstackptr := oldsp
             end
     end (* gencde *)
 
    (* doNext is only used for mutually recursive functions where a
      function may not be able to fill in its closure if it does not have
      all the remaining declarations. *)
     (* TODO: This always creates the closure on the heap even when makeClosure is false. *) 
    and genProc ({ closure=[], localCount, body, argTypes, name, ...}: bicLambdaForm, mutualDecs, doNext: unit -> unit) : unit =
         let
             (* Create a one word item for the closure.  This is returned for recursive references
                and filled in with the address of the code when we've finished. *)
             val closure = makeConstantClosure()
             val newCode : code = codeCreate(name, parameters);
 
             (* Code-gen function. No non-local references. *)
              val () =
                codegen (body, newCode, closure, List.length argTypes, localCount, parameters);
             val () = pushConst(closureAsAddress closure, cvec);
             val () = incsp();
         in
             if mutualDecs then doNext () else ()
         end
 
     |   genProc ({ localCount, body, name, argTypes, closure, ...}, mutualDecs, doNext) =
         let (* Full closure required. *)
             val resClosure = makeConstantClosure()
             val newCode = codeCreate (name, parameters)
             (* Code-gen function. *)
             val () = codegen (body, newCode, resClosure, List.length argTypes, localCount, parameters)
             val closureVars = List.length closure (* Size excluding the code address *)
         in
             if mutualDecs
             then
             let (* Have to make the closure now and fill it in later. *)
                 val () = pushConst(toMachineWord resClosure, cvec)
                 val () = genAllocMutableClosure(closureVars, cvec)
                 val () = incsp ()
            
                 val entryAddr : int = !realstackptr
 
                 val () = doNext () (* Any mutually recursive functions. *)
 
                 (* Push the address of the vector - If we have processed other
                    closures the vector will no longer be on the top of the stack. *)
                 val () = pushLocalStackValue (~ entryAddr)
 
                 (* Load items for the closure. *)
                 fun loadItems ([], _) = ()
                 |   loadItems (v :: vs, addr : int) =
                 let
                     (* Generate an item and move it into the clsoure *)
                     val () = gencde (BICExtract v, ToStack, NotEnd, NONE)
                     (* The closure "address" excludes the code address. *)
                     val () = genMoveToMutClosure(addr, cvec)
                     val () = decsp ()
                 in
                     loadItems (vs, addr + 1)
                 end
              
                 val () = loadItems (closure, 0)
                 val () = genLock cvec (* Lock it. *)
            
                 (* Remove the extra reference. *)
                 val () = resetStack (1, false, cvec)
             in
                 realstackptr := !realstackptr - 1
             end
          
             else
             let
                 (* Put it on the stack. *)
                 val () = pushConst (toMachineWord resClosure, cvec)
                 val () = incsp ()
                 val () = List.app (fn pt => gencde (BICExtract pt, ToStack, NotEnd, NONE)) closure
                 val () = genClosure (closureVars, cvec)
             in
                 realstackptr := !realstackptr - closureVars
             end
         end
 
     and genCond (testCode, thenCode, elseCode, whereto, tailKind, loopAddr) =
     let
         (* andalso and orelse are turned into conditionals with constants.
            Convert this into a series of tests. *)
         fun genTest(BICConstnt(w, _), jumpOn, targetLabel) =
             let
                 val cVal = case toShort w of 0w0 => false | 0w1 => true | _ => raise InternalError "genTest"
             in
                 if cVal = jumpOn
                 then putBranchInstruction (Jump, targetLabel, cvec)
                 else ()
             end
 
         |   genTest(BICUnary { oper=BuiltIns.NotBoolean, arg1 }, jumpOn, targetLabel) =
                 genTest(arg1, not jumpOn, targetLabel)
 
         |   genTest(BICCond (testPart, thenPart, elsePart), jumpOn, targetLabel) =
             let
                 val toElse = createLabel() and exitJump = createLabel()
             in
                 genTest(testPart, false, toElse);
                 genTest(thenPart, jumpOn, targetLabel);
                 putBranchInstruction (Jump, exitJump, cvec);
                 setLabel (toElse, cvec);
                 genTest(elsePart, jumpOn, targetLabel);
                 setLabel (exitJump, cvec)
             end
 
         |   genTest(testCode, jumpOn, targetLabel) =
             (
                 gencde (testCode, ToStack, NotEnd, loopAddr);
                 putBranchInstruction(if jumpOn then JumpTrue else JumpFalse, targetLabel, cvec);
                 decsp() (* conditional branch pops a value. *)
             )
 
         val toElse = createLabel() and exitJump = createLabel()
         val () = genTest(testCode, false, toElse)
         val () = gencde (thenCode, whereto, tailKind, loopAddr)
         (* Get rid of the result from the stack. If there is a result then the
         ``else-part'' will push it. *)
         val () = case whereto of ToStack => decsp () | NoResult => ()
 
         val () = putBranchInstruction (Jump, exitJump, cvec)
 
         (* start of "else part" *)
         val () = setLabel (toElse, cvec)
         val () = gencde (elseCode, whereto, tailKind, loopAddr)
         val () = setLabel (exitJump, cvec)
     in
         ()
     end (* genCond *)
 
     and genEval (eval, tailKind : tail) : unit =
     let
         val argList : backendIC list = List.map #1 (#argList eval)
         val argsToPass : int = List.length argList;
 
         (* Load arguments *)
         fun loadArgs [] = ()
         |   loadArgs (v :: vs) =
         let (* Push each expression onto the stack. *)
             val () = gencde(v, ToStack, NotEnd, NONE)
         in
             loadArgs vs
         end;
 
         (* Called after the args and the closure to call have been pushed
             onto the stack. *)
         fun callClosure () : unit =
             case tailKind of
                 NotEnd => (* Normal call. *) genCallClosure cvec
          
             |   EndOfProc => (* Tail recursive call. *)
                 let
                     (* Get the return address onto the top of the stack. *)
                     val () = pushLocalStackValue 0
            
                     (* Slide the return address, closure and args over the
                       old closure, return address and args, and reset the
                       stack. Then jump to the closure. *)
                     val () =
                         genTailCall(argsToPass + 2, !realstackptr - 1 + (numOfArgs - argsToPass), cvec);
                         (* It's "-1" not "-2", because we didn't bump the realstackptr
                            when we pushed the return address. SPF 3/1/97 *)
                 in
                     ()
                 end
 
         (* Have to guarantee that the expression to return the function
           is evaluated before the arguments. *)
 
         (* Returns true if evaluating it later is safe. *)
         fun safeToLeave (BICConstnt _) = true
         |   safeToLeave (BICLambda _) = true
         |   safeToLeave (BICExtract _) = true
         |   safeToLeave (BICField {base, ...}) = safeToLeave base
         |   safeToLeave (BICLoadContainer {base, ...}) = safeToLeave base
         |   safeToLeave _ = false
 
         val () =
             if (case argList of [] => true | _ => safeToLeave (#function eval))
             then
             let
                 (* Can load the args first. *)
                 val () = loadArgs argList
             in 
                 gencde (#function eval, ToStack, NotEnd, NONE)
             end
 
             else
             let
                 (* The expression for the function is too complicated to
                    risk leaving. It might have a side-effect and we must
                    ensure that any side-effects it has are done before the
                    arguments are loaded. *)
                 val () = gencde(#function eval, ToStack, NotEnd, NONE);
                 val () = loadArgs(argList);
                 (* Load the function again. *)
                 val () = genLocal(argsToPass, cvec);
             in
                 incsp ()
             end
 
         val () = callClosure () (* Call the function. *)
 
         (* Make sure we interpret when we return from the call *)
         val () = genEnterIntCall (cvec, argsToPass)
 
     in (* body of genEval *)
         realstackptr := !realstackptr - argsToPass (* Args popped by caller. *)
     end
 
    (* Generate the function. *)
    (* Assume we always want a result. There is otherwise a problem if the
       called routine returns a result of type void (i.e. no result) but the
       caller wants a result (e.g. the identity function). *)
     val () = gencde (pt, ToStack, EndOfProc, NONE)
 
     val () = genReturn (numOfArgs, cvec);
     in (* body of codegen *)
        (* Having code-generated the body of the function, it is copied
           into a new data segment. *)
         copyCode{code = cvec, maxStack = !maxStack, resultClosure=resultClosure, numberOfArguments=numOfArgs}
     end (* codegen *);
 
     fun gencodeLambda({ name, body, argTypes, localCount, ...}:bicLambdaForm, parameters, closure) =
     let
         (* make the code buffer for the new function. *)
         val newCode : code = codeCreate (name, parameters)
         (* This function must have no non-local references. *)
     in
         codegen (body, newCode, closure, List.length argTypes, localCount, parameters)
     end
 
     local
         val makeEntryPoint: string -> machineWord = RunCall.rtsCallFull1 "PolyCreateEntryPointObject"
 
         fun rtsCall makeCall (entryName: string, numOfArgs, debugArgs: Universal.universal list): machineWord =
         let
             open Address
             val cvec = codeCreate (entryName, debugArgs)
             
             val entryPointAddr = makeEntryPoint entryName
 
             (* Each argument is at the same offset, essentially we're just shifting them *)
             fun genLocals 0 = ()
             |   genLocals n = (genLocal(numOfArgs +1, cvec); genLocals (n-1))
             val () = genLocals numOfArgs
 
             val () = pushConst(entryPointAddr, cvec)
             val () = makeCall(numOfArgs, cvec)
             val () = genReturn (numOfArgs, cvec)
             val closure = makeConstantClosure()
         
             val () =
                 copyCode{code=cvec, maxStack=numOfArgs+1, numberOfArguments=numOfArgs, resultClosure=closure}
         in
             closureAsAddress closure
         end
     in
         structure Foreign = 
         struct
 
             val rtsCallFast = rtsCall genRTSCallFast
             
             fun rtsCallFastRealtoReal(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastRealtoReal c) (entryName, 1, debugArgs)
             and rtsCallFastRealRealtoReal(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastRealRealtoReal c) (entryName, 2, debugArgs)
             and rtsCallFastGeneraltoReal(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastGeneraltoReal c) (entryName, 1, debugArgs)
             and rtsCallFastRealGeneraltoReal(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastRealGeneraltoReal c) (entryName, 2, debugArgs)
             
             fun rtsCallFastFloattoFloat(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastFloattoFloat c) (entryName, 1, debugArgs)
             and rtsCallFastFloatFloattoFloat(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastFloatFloattoFloat c) (entryName, 2, debugArgs)
             and rtsCallFastGeneraltoFloat(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastGeneraltoFloat c) (entryName, 1, debugArgs)
             and rtsCallFastFloatGeneraltoFloat(entryName, debugArgs) =
                 rtsCall (fn (_, c) => genRTSCallFastFloatGeneraltoFloat c) (entryName, 2, debugArgs)
                 
             
             type abi = int
 
             (* This must match the type in Foreign.LowLevel.  Once this is bootstrapped we could use that
                type but note that this is the type we use within the compiler and we build Foreign.LowLevel
                AFTER compiling this. *)
             datatype cTypeForm =
                 CTypeFloatingPt | CTypePointer | CTypeSignedInt | CTypeUnsignedInt
             |   CTypeStruct of cType list | CTypeVoid
             withtype cType = { typeForm: cTypeForm, align: word, size: word }
 
             val abiList: unit -> (string * abi) list =
                 RunCall.rtsCallFull0 "PolyInterpretedGetAbiList"
 
             type cif = Foreign.Memory.voidStar
             val createCIF: abi * cType * cType list -> cif=
                 RunCall.rtsCallFull3 "PolyInterpretedCreateCIF"
             val callCFunction: cif * LargeWord.word * LargeWord.word * LargeWord.word -> unit =
                 RunCall.rtsCallFull4 "PolyInterpretedCallFunction"
             
             (* foreignCall returns a function that actually calls the foreign function. *)
             fun foreignCall(abi, argTypes, resultType) =
             let
                 val memocif = Foreign.Memory.memoise (fn () => createCIF(abi, resultType, argTypes)) ()
                 val closure = makeConstantClosure()
                 (* For compatibility with the native code version we have to
                    construct a function that takes three arguments rather than
                    a single triple. *)
                 val bodyCode =
                     BICEval{function=BICConstnt(toMachineWord callCFunction, []),
                         argList=[
                             (BICTuple[
                                 BICEval{
                                     function=BICConstnt(toMachineWord memocif, []),
                                     argList=[(BICConstnt(toMachineWord 0, []), GeneralType)], (* Unit. *)
                                     resultType=GeneralType
                                 },
                                 BICExtract(BICLoadArgument 0),
                                 BICExtract(BICLoadArgument 2),
                                 BICExtract(BICLoadArgument 1)], GeneralType)
                         ],
                         resultType=GeneralType}
                 val lambdaCode =
                 { body = bodyCode, name = "foreignCall", closure=[], argTypes=[GeneralType, GeneralType, GeneralType],
                   resultType = GeneralType, localCount=0}
                 val () = gencodeLambda(lambdaCode, [], closure)
             in
                 closureAsAddress closure
             end
             
             fun buildCallBack((*abi*) _, (*argTypes*) _, (*resultType*)_) =
             let
                 fun buildClosure ((*mlFun*)_: LargeWord.word*LargeWord.word -> unit) =
                     (* The result is the SysWord.word holding the C function. *)
                     raise Foreign.Foreign "foreignCall not implemented"
             in
                 Address.toMachineWord buildClosure
             end
         end
     end
 
     structure Sharing =
     struct
         open BackendTree.Sharing
         type closureRef = closureRef
     end
 end;
 
diff --git a/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML b/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML
index 815defde..d46502c3 100644
--- a/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML
+++ b/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML
@@ -1,610 +1,613 @@
 (*
-    Copyright (c) 2012,13,15-21 David C.J. Matthews
+    Copyright (c) 2012,13,15-22 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor CodeTreeConstruction (
 
 structure Debug: DEBUG
 structure Pretty : PRETTY
 structure BaseCodeTree: BASECODETREE
 structure CodetreeFunctions: CODETREEFUNCTIONS
 
 structure Backend:
 sig
     type codetree
     type machineWord = Address.machineWord
     val codeGenerate:
         codetree * int * Universal.universal list -> (unit -> machineWord) * Universal.universal list
     structure Foreign: FOREIGNCALL
     structure Sharing : sig type codetree = codetree end
 end
 
 structure Optimiser:
 sig
     type codetree  and envSpecial and codeBinding
     val codetreeOptimiser: codetree  * Universal.universal list * int ->
         { numLocals: int, general: codetree, bindings: codeBinding list, special: envSpecial }
     structure Sharing: sig type codetree = codetree and envSpecial = envSpecial and codeBinding = codeBinding end
 end
 
 sharing type
     Pretty.pretty
 =   BaseCodeTree.pretty
 
 sharing
     BaseCodeTree.Sharing
 =   CodetreeFunctions.Sharing
 =   Backend.Sharing
 =   Optimiser.Sharing
 
 ) : CODETREE =
 struct
     open Address
     open StretchArray
     open BaseCodeTree
     open Pretty
     open CodetreeFunctions
   
     exception InternalError = Misc.InternalError
     and Interrupt = Thread.Thread.Interrupt
   
     infix 9 sub;
 
     fun mkDec (laddr, res) = Declar{value = res, addr = laddr, use=[]}
 
     fun deExtract(Extract ext) = ext | deExtract _ = raise InternalError "deExtract"
  
     datatype level =
         Level of { lev: int, closure: createClosure, lookup: int * int * bool -> loadForm }
 
     local
         (* We can have locals at the outer level. *)
         fun bottomLevel(addr, 0, false) =
             if addr < 0 then raise InternalError "load: negative"
             else LoadLocal addr
         |   bottomLevel _ = (* Either the level is wrong or it's a parameter. *)
                 raise InternalError "bottom level"
     in
         val baseLevel =
             Level { lev = 0, closure = makeClosure(), lookup = bottomLevel }
     end
   
     fun newLevel (Level{ lev, lookup, ...}) =
     let
         val closureList = makeClosure()
         val makeClosure = addToClosure closureList
 
         fun check n = if n < 0 then raise InternalError "load: negative" else n
 
         fun thisLevel(addr, level, isParam) =
         if level < 0 then raise InternalError "mkLoad: level must be non-negative"
         else if level > 0
         then makeClosure(lookup(addr, level-1, isParam))
         else (* This level *) if isParam
         then LoadArgument(check addr)
         else LoadLocal(check addr)
     in
         Level { lev = lev+1, closure = closureList, lookup = thisLevel }
     end
     
     fun getClosure(Level{ closure, ...})  = List.map Extract (extractClosure closure)
  
     fun mkLoad (addr, Level { lev = newLevel, lookup, ... } , Level { lev = oldLevel, ... }) =
         Extract(lookup(addr, newLevel - oldLevel, false))
 
     and mkLoadParam(addr, Level { lev = newLevel, lookup, ... } , Level { lev = oldLevel, ... }) =
         Extract(lookup(addr, newLevel - oldLevel, true))
 
     (* Transform a function so that free variables are converted to closure form.  Returns the
        maximum local address used. *)
 
     fun genCode(pt, debugSwitches, numLocals) =
     let
         val printCodeTree      = Debug.getParameter Debug.codetreeTag debugSwitches
         and compilerOut        = Pretty.getCompilerOutput debugSwitches
         
 (*        val printCodeTree = true
         and compilerOut = PRETTY.prettyPrint(TextIO.print, 70) *)
 
         (* If required, print it first.  This is the code that the front-end
            has produced. *)
         val () = if printCodeTree then compilerOut(pretty pt) else ()
 
         (* This ensures that everything is printed just before
            it is code-generated. *) 
         fun codeAndPrint(code, localCount) =
         let
             val () = if printCodeTree then compilerOut (BaseCodeTree.pretty code) else ();
         in
             Backend.codeGenerate(code, localCount, debugSwitches)
         end
 
         (* Optimise it. *)
         val { numLocals = localCount, general = gen, bindings = decs, special = spec } =
             Optimiser.codetreeOptimiser(pt, debugSwitches, numLocals)
 
         (* At this stage we have a "general" value and also, possibly a "special"
            value.  We could simply create mkEnv(decs, gen) and run preCode
            and genCode on that.  However, we would lose the ability to insert
            any inline functions from this code into subsequent top-level
            expressions.  We can't simply retain the "special" entry either
            because that may refer to values that have to be created once when
            the code is run.  Such values will be referenced by "load" entries
            which refer to entries in the "decs".  We construct a tuple which
            will contain the actual values after the code is run.  Then if
            we want the value at some time in the future when we use something
            from the "special" entry we can extract the corresponding value
            from this tuple.
            Previously, this code always generated a tuple containing every
            declaration.  That led to some very long compilation times because
            the back-end has some code which is quadratic in the number of entries
            on the stack.  We now try to prune bindings by only generating the tuple
            if we have an inline function somewhere and only generating bindings
            we actually need. *)
         fun simplifySpec (EnvSpecTuple(size, env)) =
             let
                 (* Get all the field entries. *)
                 fun simpPair (gen, spec) = (gen, simplifySpec spec)
                 val fields = List.tabulate(size, simpPair o env)
             in
                 if List.all(fn (_, EnvSpecNone) => true | _ => false) fields
                 then EnvSpecNone
                 else EnvSpecTuple(size, fn n => List.nth(fields, n))
             end
         |   simplifySpec s = s (* None or inline function. *)
 
     in
         case simplifySpec spec of
             EnvSpecNone =>
             let
                 val (code, props) = codeAndPrint (mkEnv(decs, gen), localCount)
             in
                 fn () => Constnt(code(), props)
             end
 
         |   simpleSpec  =>
             let
                 (* The bindings are marked using a three-valued mark.  A binding is needed
                    if it is referenced in any way.  During the scan to find the references
                    we need to avoid processing an entry that has already been processed but
                    it is possible that a binding may be referenced as a general value only
                    (e.g. from a function closure) and separately as a special value.  See
                    Test148.ML *)
                 datatype visit = UnVisited | VisitedGeneral | VisitedSpecial
 
                 local
                     val refArray = Array.array(localCount, UnVisited)
 
                     fun findDecs EnvSpecNone = ()
 
                     |   findDecs (EnvSpecTuple(size, env)) =
                         let
                             val fields = List.tabulate(size, env)
                         in
                             List.app processGenAndSpec fields
                         end
 
                     |   findDecs (EnvSpecInlineFunction({closure, ...}, env)) =
                         let
                             val closureItems = List.tabulate(List.length closure, env)
                         in
                             List.app processGenAndSpec closureItems
                         end
 
                     |   findDecs (EnvSpecUnary _) = ()
                     |   findDecs (EnvSpecBinary _) = ()
 
                     and processGenAndSpec (gen, spec) =
                         (* The spec part needs only to be processed if this entry has
                            not yet been visited, *)
                         case gen of
                             EnvGenLoad(LoadLocal addr) =>
                                 let
                                     val previous = Array.sub(refArray, addr)
                                 in
                                     case (previous, spec) of
                                         (VisitedSpecial, _) => () (* Fully done *)
                                     |   (VisitedGeneral, EnvSpecNone) => () (* Nothing useful *)
                                     |   (_, EnvSpecNone) =>
                                             (* We need this entry but we don't have any special
                                                entry to process.  We could find another reference with a
                                                special entry. *)
                                             Array.update(refArray, addr, VisitedGeneral)
                                     |   (_, _) =>
                                             (
                                                 (* This has a special entry.  Mark it and process. *)
                                                 Array.update(refArray, addr, VisitedSpecial);
                                                 findDecs spec
                                             )
                                 end
                         |   EnvGenConst _ => ()
                         |   _ => raise InternalError "doGeneral: not LoadLocal or Constant"
 
                     val () = findDecs simpleSpec
                 in
                     (* Convert to an immutable data structure.  This will continue
                        to be referenced in any inline function after the code has run. *)
                     val refVector = Array.vector refArray
                 end
  
                 val decArray = Array.array(localCount, CodeZero)
                 
                 fun addDec(addr, dec) =
                     if Vector.sub(refVector, addr) <> UnVisited then Array.update(decArray, addr, dec) else ()
     
                 fun addDecs(Declar{addr, ...}) = addDec(addr, mkLoadLocal addr)
                 |   addDecs(RecDecs decs) = List.app(fn {addr, ...} => addDec(addr, mkLoadLocal addr)) decs
                 |   addDecs(NullBinding _) = ()
                 |   addDecs(Container{addr, size, ...}) = addDec(addr, mkTupleFromContainer(addr, size))
 
                 val () = List.app addDecs decs
 
                 (* Construct the tuple and add the "general" value at the start. *)
                 val resultTuple = mkTuple(gen :: Array.foldr(op ::) nil decArray)
                 (* Now generate the machine code and return it as a function that can be called. *)
                 val (code, codeProps) = codeAndPrint (mkEnv (decs, resultTuple), localCount)
             in
                 (* Return a function that executes the compiled code and then creates the
                    final "global" value as the result. *)
                 fn () =>
                     let
                         local
                             (* Execute the code.  This will perform any side-effects the user
                                has programmed and may raise an exception if that is required. *)
                             val resVector = code ()
 
                             (* The result is a vector containing the "general" value as the
                                first word and the evaluated bindings for any "special"
                                entries in subsequent words. *)
                             val decVals : address =
                                 if isShort resVector
                                 then raise InternalError "Result vector is not an address"
                                 else toAddress resVector
                         in
                             fun resultWordN n = loadWord (decVals, n)
                             (* Get the general value, the zero'th entry in the vector. *)
                             val generalVal = resultWordN 0w0
                             (* Get the properties for a field in the tuple.  Because the result is
                                a tuple all the properties should be contained in a tupleTag entry. *)
                             val fieldProps =
                                 case Option.map (Universal.tagProject CodeTags.tupleTag)
                                         (List.find(Universal.tagIs CodeTags.tupleTag) codeProps) of
                                     NONE => (fn _ => [])
                                 |   SOME p => (fn n => List.nth(p, n))
                             val generalProps = fieldProps 0
                         end
 
                         (* Construct a new environment so that when an entry is looked 
                            up the corresponding constant is returned. *) 
                         fun newEnviron (oldEnv) args =
                         let
                             val (oldGeneral, oldSpecial) = oldEnv args
             
                             val genPair =
                                 case oldGeneral of
                                     EnvGenLoad(LoadLocal addr) =>
                                         (
                                             (* For the moment retain this check.  It's better to have an assertion
                                                failure than a segfault. *)
                                             Vector.sub(refVector, addr) <> UnVisited orelse raise InternalError "Reference to non-existent binding";
                                             (resultWordN(Word.fromInt addr+0w1), fieldProps(addr+1))
                                         )
                                 |   EnvGenConst c => c
                                 |   _ => raise InternalError "codetree newEnviron: Not Extract or Constnt"
                
                             val specVal = mapSpec oldSpecial
                         in
                             (EnvGenConst genPair, specVal)
                         end
                         and mapSpec EnvSpecNone = EnvSpecNone
                         |   mapSpec (EnvSpecTuple(size, env)) = EnvSpecTuple(size, newEnviron env)
                         |   mapSpec (EnvSpecInlineFunction(spec, env)) = EnvSpecInlineFunction(spec, (newEnviron env))
                         |   mapSpec (EnvSpecUnary _) = EnvSpecNone
                         |   mapSpec (EnvSpecBinary _) = EnvSpecNone
                     in 
                         (* and return the whole lot as a global value. *)
                         Constnt(generalVal, setInline(mapSpec simpleSpec) generalProps)
                     end
             end
     end (* genCode *)
 
 
     (* Constructor functions for the front-end of the compiler. *)
     local
         fun mkSimpleFunction inlineType (lval, args, name, closure, numLocals) =
               {
                 body          = lval,
                 isInline      = inlineType,
                 name          = if name = "" then "<anon>" else name,
                 closure       = map deExtract closure,
                 argTypes      = List.tabulate(args, fn _ => (GeneralType, [])),
                 resultType    = GeneralType,
                 localCount    = numLocals,
                 recUse        = []
               }
     in
         val mkProc = Lambda o mkSimpleFunction DontInline (* Normal function *)
         and mkInlproc = Lambda o mkSimpleFunction InlineAlways (* Explicitly inlined by the front-end *)
 
         (* Unless Compiler.inlineFunctor is false functors are treated as macros and expanded
            when they are applied.  Unlike core-language functions they are not first-class
            values so if they are inline the "value" returned in the initial binding can just
            be zero except if there is something in the closure. Almost always
            the closure will be empty since free variables will come from previous topdecs and will
            be constants,  The exception is if a structure and a functor using the structure appear
            in the same topdec (no semicolon between them).  In that case we can't leave it.  We
            would have to update the closure even if we leave the body untouched but we could
            have closure entries that are constants.
            e.g. structure S = struct val x = 1 end functor F() = struct open S end *)
         fun mkMacroProc (args as (_, _, _, [], _)) =
             Constnt(toMachineWord 0,
                 setInline (
                     EnvSpecInlineFunction(mkSimpleFunction InlineAlways args,
                         fn _ => raise InternalError "mkMacroProc: closure")) [])
 
         |   mkMacroProc args = Lambda(mkSimpleFunction InlineAlways args)
     end
 
     local
         fun mkFunWithTypes inlineType { body, argTypes=argsAndTypes, resultType, name, closure, numLocals } =
             Lambda
             {
                 body          = body,
                 isInline      = inlineType,
                 name          = if name = "" then "<anon>" else name,
                 closure       = map deExtract closure,
                 argTypes      = map (fn t => (t, [])) argsAndTypes,
                 resultType    = resultType,
                 localCount    = numLocals,
                 recUse        = []
             }
     in
         val mkFunction = mkFunWithTypes DontInline
         and mkInlineFunction = mkFunWithTypes InlineAlways
     end
 
     fun mkEval (ct, clist)   =
     Eval {
         function = ct,
         argList = List.map(fn c => (c, GeneralType)) clist,
         resultType=GeneralType
     }
 
     fun mkCall(func, argsAndTypes, resultType) =
     Eval {
         function = func,
         argList = argsAndTypes,
         resultType=resultType
     }
 
     (* Basic built-in operations. *)
     fun mkUnary (oper, arg1) = Unary { oper = oper, arg1 = arg1 }
     and mkBinary (oper, arg1, arg2) =
         Binary { oper = oper, arg1 = arg1, arg2 = arg2 }
 
     val getCurrentThreadId = Nullary{oper=BuiltIns.GetCurrentThreadId}
     val getCurrentThreadIdFn =
         mkInlproc(getCurrentThreadId, 1 (* Ignores argument *), "GetThreadId()", [], 0)
-    
-    val checkRTSException = Nullary{oper=BuiltIns.CheckRTSException}
+
+    val cpuPause = Nullary{oper=BuiltIns.CPUPause}
+    val cpuPauseFn =
+        mkInlproc(cpuPause, 1 (* Ignores argument *), "CPUPause()", [], 0)
+
 
     val createMutex = Nullary{oper=BuiltIns.CreateMutex}
     val createMutexFn =
         mkInlproc(createMutex, 1 (* Ignores argument *), "CreateMutex()", [], 0)
     
     fun mkAllocateWordMemory (numWords, flags, initial) =
         AllocateWordMemory { numWords = numWords, flags = flags, initial = initial }
     val mkAllocateWordMemoryFn =
         mkInlproc(
             mkAllocateWordMemory(mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0), mkInd(2, mkLoadArgument 0)),
             1, "AllocateWordMemory()", [], 0)
 
     (* Builtins wrapped as functions.  N.B.  These all take a single argument which may be a tuple. *)
     fun mkUnaryFn oper =
         mkInlproc(mkUnary(oper, mkLoadArgument 0), 1, BuiltIns.unaryRepr oper ^ "()", [], 0)
     and mkBinaryFn oper =
         mkInlproc(mkBinary(oper, mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0)), 1, BuiltIns.binaryRepr oper ^ "()", [], 0)
         
     local
         open BuiltIns
         (* Word equality.  The value of isSigned doesn't matter. *)
         val eqWord = WordComparison{test=TestEqual, isSigned=false}
     in
         fun mkNot arg = Unary{oper=NotBoolean, arg1=arg}
         and mkIsShort arg = Unary{oper=IsTaggedValue, arg1=arg}
         and mkEqualTaggedWord (arg1, arg2) =
             Binary{oper=eqWord, arg1=arg1, arg2=arg2}
         and mkEqualPointerOrWord (arg1, arg2) =
             Binary{oper=PointerEq, arg1=arg1, arg2=arg2}
         val equalTaggedWordFn = (* This takes two words, not a tuple. *)
             mkInlproc(mkBinary(eqWord, mkLoadArgument 0, mkLoadArgument 1), 2, "EqualWord()", [], 0)
         and equalPointerOrWordFn = (* This takes two words, not a tuple. *)
             mkInlproc(mkBinary(PointerEq, mkLoadArgument 0, mkLoadArgument 1), 2, "EqualWord()", [], 0)
     end
 
     fun mkLoadOperation(oper, base, index) =
         LoadOperation{kind=oper, address={base=base, index=SOME index, offset=0}}
 
     fun mkLoadOperationFn oper =
         mkInlproc(mkLoadOperation(oper, mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0)), 1,
             "loadOperation()", [], 0)
 
     fun mkStoreOperation(oper, base, index, value) =
         StoreOperation{kind=oper, address={base=base, index=SOME index, offset=0}, value=value}
 
     fun mkStoreOperationFn oper =
         mkInlproc(mkStoreOperation(oper, mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0), mkInd(2, mkLoadArgument 0)), 1,
             "storeOperation()", [], 0)
 
     fun mkBlockOperation {kind, leftBase, leftIndex, rightBase, rightIndex, length } =
         BlockOperation { kind = kind,
             sourceLeft={base=leftBase, index=SOME leftIndex, offset=0},
             destRight={base=rightBase, index=SOME rightIndex, offset=0}, length=length}
 
     (* Construct a function that takes five arguments.  The order is left-base, right-base, left-index, right-index, length. *)
     fun mkBlockOperationFn kind =
         mkInlproc(
             mkBlockOperation{kind=kind, leftBase=mkInd(0, mkLoadArgument 0), rightBase=mkInd(1, mkLoadArgument 0),
                   leftIndex=mkInd(2, mkLoadArgument 0), rightIndex=mkInd(3, mkLoadArgument 0), length=mkInd(4, mkLoadArgument 0)}, 1,
             "blockOperation()", [], 0)
 
     fun identityFunction (name : string) : codetree = 
         mkInlproc (mkLoadArgument 0, 1, name, [], 0) (* Returns its argument. *);
   
     (* Test a tag value. *)
     fun mkTagTest(test: codetree, tagValue: word, maxTag: word) =
         TagTest {test=test, tag=tagValue, maxTag=maxTag }
 
     fun mkHandle (exp, handler, exId) =
         Handle {exp = exp, handler = handler, exPacketAddr = exId}
 
     fun mkStr (strbuff:string) = Constnt (toMachineWord strbuff, [])
 
   (* If we have multiple references to a piece of code we may have to save
      it in a temporary and then use it from there. If the code has side-effects
       we certainly must do that to ensure that the side-effects are done
       exactly once and in the correct order, however if the code is just a
       constant or a load we can reduce the amount of code we generate by
       simply returning the original code. *)
     fun multipleUses (code as Constnt _, _, _) = 
         {load = (fn _ => code), dec = []}
 (*
     |   multipleUses (code as Extract(LoadLegacy{addr, level=loadLevel, ...}), _, level) = 
         let (* May have to adjust the level. *)
             fun loadFn lev =
                 if lev = level
                 then code 
                 else mkLoad (addr, loadLevel + lev, level))
         in
             {load = loadFn, dec = []}
         end
 
     |   multipleUses (code as Extract(LoadLocal addr), _, level) = 
         let (* May have to adjust the level. *)
             fun loadFn lev =
                 if lev = level
                 then code 
                 else mkLoad (addr, lev - level)
         in
             {load = loadFn, dec = []}
         end
 
     |   multipleUses (code as Extract(LoadArgument _), _, level) = 
         let (* May have to adjust the level. *)
             fun loadFn lev =
                 if lev = level
                 then code
                 else raise InternalError "multipleUses: different level"
                 (*else mkLoad (addr, lev - level)*)
         in
             {load = loadFn, dec = []}
         end
 
     |   multipleUses (Extract _, _, _) = raise InternalError "multipleUses: TODO"
 *)
     |   multipleUses (code, nextAddress, level) = 
         let
             val addr       = nextAddress();
             fun loadFn lev = mkLoad (addr, lev, level);
         in
             {load = loadFn, dec = [mkDec (addr, code)]}
         end (* multipleUses *);
 
     fun mkMutualDecs [] = raise InternalError "mkMutualDecs: empty declaration list"
     |   mkMutualDecs l =
     let
         fun convertDec(a, Lambda lam) = {lambda = lam, addr = a, use=[]}
         |   convertDec _ = raise InternalError "mkMutualDecs: Recursive declaration is not a function"
     in
         RecDecs(List.map convertDec l)
     end
 
     val mkNullDec = NullBinding
     
     fun mkContainer(addr, size, setter) =
         Container{addr=addr, size=size, use=[], setter=setter}
 
     val mkIf                = Cond
     and mkRaise             = Raise
 
     fun mkConst v = Constnt(v, [])
 
     (* For the moment limit these to general arguments. *)
     fun mkLoop args = Loop (List.map(fn c => (c, GeneralType)) args)
     and mkBeginLoop(exp, args) =
         BeginLoop{loop=exp, arguments=List.map(fn(i, v) => ({value=v, addr=i, use=[]}, GeneralType)) args}
 
     fun mkWhile(b, e) = (* Generated as   if b then (e; <loop>) else (). *)
         mkBeginLoop(mkIf(b, mkEnv([NullBinding e], mkLoop[]), CodeZero), [])
 
     (* We previously had conditional-or and conditional-and as separate
        instructions.  I've taken them out since they can be implemented
        just as efficiently as a normal conditional.  In addition they
        were interfering with the optimisation where the second expression
        contained the last reference to something.  We needed to add a
        "kill entry" to the other branch but there wasn't another branch
        to add it to.   DCJM 7/12/00. *)
     fun mkCor(xp1, xp2)  = mkIf(xp1, CodeTrue, xp2);
     fun mkCand(xp1, xp2)  = mkIf(xp1, xp2, CodeZero);
 
     val mkSetContainer =
         fn (container, tuple, size) => mkSetContainer(container, tuple, BoolVector.tabulate(size, fn _ => true))
 
     (* We don't generate the +, -, < etc operations directly here.  Instead we create functions
        that the basis library can use to create the final versions by applying these functions
        to the arguments and an RTS function.  The inline expansion system takes care of all the
        optimisation. 
        An arbitrary precision operation takes a tuple consisting of a pair of arguments and
        a function.  The code that is constructed checks both arguments to see if they are
        short.  If they are not or the short precision operation overflows the code to
        call the function is executed. *)
     local
         val argX = mkInd(0, mkLoadArgument 0) and argY = mkInd(1, mkLoadArgument 0)
         val testShort = mkCand(mkIsShort argX, mkIsShort argY)
         val longCall = mkEval(mkInd(2, mkLoadArgument 0), [mkTuple[argX, argY]])
     in
 
         fun mkArbitraryFn (oper as ArbArith arith) =
             mkInlproc(
                 Arbitrary{oper=oper, shortCond=testShort, arg1=argX, arg2=argY, longCall=longCall },
                 1, "Arbitrary" ^  BuiltIns.arithRepr arith ^ "()", [], 0)
 
         |   mkArbitraryFn (oper as ArbCompare test) =
             (* The long function here is PolyCompareArbitrary which returns -1,0,+1 so the
                result has to be compared with zero. *)
             let
                 val comparedResult =
                     Binary{oper=BuiltIns.WordComparison{test=test, isSigned=true}, arg1=longCall, arg2=CodeZero}
             in
                 mkInlproc(
                     Arbitrary{oper=oper, shortCond=testShort, arg1=argX, arg2=argY, longCall=comparedResult },
                     1, "Arbitrary" ^ BuiltIns.testRepr test ^ "()", [], 0)
             end
     end
 
     structure Foreign = Backend.Foreign
 
     structure Sharing =
     struct
         type machineWord = machineWord
         type codetree    = codetree
         type pretty      = pretty
         type argumentType=argumentType
         type codeBinding     = codeBinding
         type level       = level
     end
 
 end (* CODETREE functor body *);
diff --git a/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML b/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML
index 1ad5b27f..67bbe391 100644
--- a/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML
+++ b/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML
@@ -1,625 +1,625 @@
 (*
-    Copyright (c) 2012,13,16,18-21 David C.J. Matthews
+    Copyright (c) 2012,13,16,18-22 David C.J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (* Miscellaneous construction and operation functions on the code-tree. *)
 
 functor CodetreeFunctions(
     structure BaseCodeTree: BASECODETREE
     structure Strongly:
         sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end
 ) : CODETREEFUNCTIONS
 =
 struct
     open BaseCodeTree
     open Strongly
     open Address
     exception InternalError = Misc.InternalError
     
     fun mkEnv([], exp) = exp
     |   mkEnv(decs, exp) = Newenv(decs, exp)
 
     val word0 = toMachineWord 0
     and word1 = toMachineWord 1
 
     val False = word0  
     and True  = word1 
 
     val F_mutable_words : Word8.word = Word8.orb (F_words, F_mutable)
 
     val CodeFalse = Constnt(False, [])
     and CodeTrue  = Constnt(True, [])
     and CodeZero  = Constnt(word0, [])
    
     (* Properties of code.  This indicates the extent to which the
        code has side-effects (i.e. where even if the result is unused
        the code still needs to be produced) or is applicative
        (i.e. where its value depends only arguments and can safely
        be reordered). *)
 
     (* The RTS has a table of properties for RTS functions.  The 103 call
        returns these Or-ed into the register mask. *)
     val PROPWORD_NORAISE  = 0wx40000000
     and PROPWORD_NOUPDATE = 0wx20000000
     and PROPWORD_NODEREF  = 0wx10000000
 
     (* Since RTS calls are being eliminated leave residual versions of these. *)
     fun earlyRtsCall _ = false
     and sideEffectFreeRTSCall _ = false
 
     local
         infix orb andb
         val op orb = Word.orb and op andb = Word.andb
         val noSideEffect = PROPWORD_NORAISE orb PROPWORD_NOUPDATE
         val applicative = noSideEffect orb PROPWORD_NODEREF
     in
         fun codeProps (Lambda _) = applicative
 
         |   codeProps (Constnt _) = applicative
 
         |   codeProps (Extract _) = applicative
 
         |   codeProps (TagTest{ test, ... }) = codeProps test
 
         |   codeProps (Cond(i, t, e)) = codeProps i andb codeProps t andb codeProps e
 
         |   codeProps (Newenv(decs, exp)) =
                 List.foldl (fn (d, r) => bindingProps d andb r) (codeProps exp) decs
 
         |   codeProps (Handle { exp, handler, ... }) =
                 (* A handler processes all the exceptions in the body *)
                 (codeProps exp orb PROPWORD_NORAISE) andb codeProps handler
 
         |   codeProps (Tuple { fields, ...}) = testList fields
 
         |   codeProps (Indirect{base, ...}) = codeProps base
 
             (* A built-in function may be side-effect free.  This can
                occur if we have, for example, "if exp1 orelse exp2"
                where exp2 can be reduced to "true", typically because it's
                inside an inline function and some of the arguments to the
                function are constants.  This then gets converted to
                (exp1; true) and we can eliminate exp1 if it is simply
                a comparison. *)
         |   codeProps (Unary{oper, arg1}) =
             let
                 open BuiltIns
                 val operProps =
                     case oper of
                         NotBoolean => applicative
                     |   IsTaggedValue => applicative
                     |   MemoryCellLength => applicative
                         (* MemoryCellFlags could return a different result if a mutable cell was locked. *)
                     |   MemoryCellFlags => applicative
                     |   ClearMutableFlag => Word.orb(PROPWORD_NODEREF, PROPWORD_NORAISE)
                     |   LongWordToTagged => applicative
                     |   SignedToLongWord => applicative
                     |   UnsignedToLongWord => applicative
                     |   RealAbs _ => applicative (* Does not depend on rounding setting. *)
                     |   RealNeg _ => applicative (* Does not depend on rounding setting. *)
                         (* If we float a 64-bit int to a 64-bit floating point value we may
                            lose precision so this depends on the current rounding mode. *)
                     |   RealFixedInt _ => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
                     |   FloatToDouble => applicative
                         (* This also depends on the current rounding mode. *)
                     |   DoubleToFloat => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
                         (* May raise the overflow exception *)
                     |   RealToInt _ => PROPWORD_NOUPDATE orb PROPWORD_NODEREF
                     |   TouchAddress => PROPWORD_NORAISE (* Treat as updating a notional reference count. *)
                     |   AllocCStack => PROPWORD_NORAISE
                     |   LockMutex => PROPWORD_NORAISE
                     |   TryLockMutex => PROPWORD_NORAISE
                     |   UnlockMutex => PROPWORD_NORAISE
             in
                 operProps andb codeProps arg1
             end
 
         |   codeProps (Binary{oper, arg1, arg2}) =
             let
                 open BuiltIns
                 val mayRaise = PROPWORD_NOUPDATE orb PROPWORD_NODEREF
                 val operProps =
                     case oper of
                         WordComparison _ => applicative
                     |   FixedPrecisionArith _ => mayRaise
                     |   WordArith _ => applicative (* Quot and Rem don't raise exceptions - zero checking is done before. *)
                     |   WordLogical _ => applicative
                     |   WordShift _ => applicative
                     |   AllocateByteMemory => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
                             (* Allocation returns a different value on each call. *)
                     |   LargeWordComparison _ => applicative
                     |   LargeWordArith _ => applicative (* Quot and Rem don't raise exceptions - zero checking is done before. *)
                     |   LargeWordLogical _ => applicative
                     |   LargeWordShift _ => applicative
                     |   RealComparison _ => applicative
                         (* Real arithmetic operations depend on the current rounding setting. *)
                     |   RealArith _ => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
                     |   FreeCStack => PROPWORD_NORAISE orb PROPWORD_NODEREF
                     |   PointerEq => applicative
             in
                 operProps andb codeProps arg1 andb codeProps arg2
             end
 
         |   codeProps (Nullary{oper=BuiltIns.GetCurrentThreadId}) = Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
-        |   codeProps (Nullary{oper=BuiltIns.CheckRTSException}) = PROPWORD_NOUPDATE
+        |   codeProps (Nullary{oper=BuiltIns.CPUPause}) = PROPWORD_NORAISE
         |   codeProps (Nullary{oper=BuiltIns.CreateMutex}) = Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
 
         |   codeProps (Arbitrary{shortCond, arg1, arg2, longCall, ...}) =
                 (* Arbitrary precision operations are applicative but the longCall is
                    a function call.  It should never have a side-effect so it might
                    be better to remove it. *)
                 codeProps shortCond andb codeProps arg1 andb codeProps arg2 andb codeProps longCall
 
         |   codeProps (AllocateWordMemory {numWords, flags, initial}) =
             let
                 val operProps = Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
             in
                 operProps andb codeProps numWords andb codeProps flags andb codeProps initial
             end
 
         |   codeProps (Eval _) = 0w0
 
         |   codeProps(Raise exp) = codeProps exp andb (Word.notb PROPWORD_NORAISE)
 
             (* Treat these as unsafe at least for the moment. *)
         |   codeProps(BeginLoop _) = 0w0
 
         |   codeProps(Loop _) = 0w0
 
         |   codeProps (SetContainer _) = 0w0
 
         |   codeProps (LoadOperation {address, kind}) =
             let
                 val operProps =
                     case kind of
                         LoadStoreMLWord {isImmutable=true} => applicative
                     |   LoadStoreMLByte {isImmutable=true} => applicative
                     |   _ => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE)
             in
                 operProps andb addressProps address
             end
 
         |   codeProps (StoreOperation {address, value, ...}) =
                 Word.orb(PROPWORD_NODEREF, PROPWORD_NORAISE) andb addressProps address andb codeProps value
         
         |   codeProps (BlockOperation {kind, sourceLeft, destRight, length}) =
             let
                 val operProps =
                     case kind of
                     BlockOpMove _ => PROPWORD_NORAISE
                 |   BlockOpEqualByte => applicative
                 |   BlockOpCompareByte => applicative
             in
                 operProps andb addressProps sourceLeft andb addressProps destRight andb codeProps length
             end
 
         and testList t = List.foldl(fn (c, r) => codeProps c andb r) applicative t
     
         and bindingProps(Declar{value, ...}) = codeProps value
         |   bindingProps(RecDecs _) = applicative (* These should all be lambdas *)
         |   bindingProps(NullBinding c) = codeProps c
         |   bindingProps(Container{setter, ...}) = codeProps setter
         
         and addressProps{base, index=NONE, ...} = codeProps base
         |   addressProps{base, index=SOME index, ...} = codeProps base andb codeProps index
 
         (* sideEffectFree - does not raise an exception or make an assignment. *)
         fun sideEffectFree c = (codeProps c andb noSideEffect) = noSideEffect
         (* reorderable - does not raise an exception or access a reference. *)
         and reorderable c = codeProps c = applicative
     end
 
     (* Return the inline property if it is set. *)
     fun findInline [] = EnvSpecNone
     |   findInline (h::t) =
             if Universal.tagIs CodeTags.inlineCodeTag h
             then Universal.tagProject CodeTags.inlineCodeTag h
             else findInline t
 
     (* Makes a constant value from an expression which is known to be
        constant but may involve inline functions, tuples etc. *)
     fun makeConstVal (cVal:codetree) =
     let
         fun makeVal (c as Constnt _) = c
              (* should just be a tuple  *)
             (* Get a vector, copy the entries into it and return it as a constant. *)
         |   makeVal (Tuple {fields= [], ...}) = CodeZero (* should have been optimised already! *)
         |   makeVal (Tuple {fields, ...}) =
             let
                 val tupleSize = List.length fields
                 val vec : address = allocWordData(Word.fromInt tupleSize, F_mutable_words, word0)
                 val fieldCode = map makeVal fields
       
                 fun copyToVec ([], _) = []
                 |   copyToVec (Constnt(w, prop) :: t, locn) =
                     (
                         assignWord (vec, locn, w);
                         prop :: copyToVec (t, locn + 0w1)
                     )
                 |   copyToVec _ = raise InternalError "not constant"
                 
                 val props = copyToVec(fieldCode, 0w0)
                 (* If any of the constants have properties create a tuple property
                    for the result. *)
                 val tupleProps =
                     if List.all null props
                     then []
                     else
                     let
                         (* We also need to construct an EnvSpecTuple property because findInline
                            does not look at tuple properties. *)
                         val inlineProps = map findInline props
                         val inlineProp =
                             if List.all (fn EnvSpecNone => true | _ => false) inlineProps
                             then []
                             else
                             let
                                 fun tupleEntry n =
                                     (EnvGenConst(loadWord(vec, Word.fromInt n), List.nth(props, n)),
                                      List.nth(inlineProps, n))
                             in
                                 [Universal.tagInject CodeTags.inlineCodeTag (EnvSpecTuple(tupleSize, tupleEntry))]
                             end
                     in
                         Universal.tagInject CodeTags.tupleTag props :: inlineProp
                     end
             in
                 lock vec;
                 Constnt(toMachineWord vec, tupleProps)
             end
         |   makeVal _ = raise InternalError "makeVal - not constant or tuple"
     in
         makeVal cVal
     end
 
     local
         fun allConsts []       = true
         |   allConsts (Constnt _ :: t) = allConsts t
         |   allConsts _ = false
         
         fun mkRecord isVar xp =
         let
             val tuple = Tuple{fields = xp, isVariant = isVar }
         in
             if allConsts xp
             then (* Make it now. *) makeConstVal tuple
             else tuple
         end;
         
     in  
         val mkTuple = mkRecord false
         and mkDatatype = mkRecord true
     end
 
     (* Set the inline property.  If the property is already
        present it is replaced.  If the property we are setting is
        EnvSpecNone no property is set. *)
     fun setInline p (h::t) =
             if Universal.tagIs CodeTags.inlineCodeTag h
             then setInline p t
             else h :: setInline p t
     |   setInline EnvSpecNone [] = []
     |   setInline p [] = [Universal.tagInject CodeTags.inlineCodeTag p]
 
     (* These are very frequently used and it might be worth making
        special bindings for values such as 0, 1, 2, 3 etc to reduce
        garbage. *)
     fun checkNonZero n = if n < 0 then raise InternalError "mkLoadxx: argument negative" else n
     val mkLoadLocal = Extract o LoadLocal o checkNonZero
     and mkLoadArgument = Extract o LoadArgument o checkNonZero
     and mkLoadClosure = Extract o LoadClosure o checkNonZero
 
     (* Set the container to the fields of the record.  Try to push this
        down as far as possible. *)
     fun mkSetContainer(container, Cond(ifpt, thenpt, elsept), filter) =
         Cond(ifpt, mkSetContainer(container, thenpt, filter),
             mkSetContainer(container, elsept, filter))
 
     |  mkSetContainer(container, Newenv(decs, exp), filter) =
             Newenv(decs, mkSetContainer(container, exp, filter))
 
     |  mkSetContainer(_, r as Raise _, _) =
         r (* We may well have the situation where one branch of an "if" raises an
              exception.  We can simply raise the exception on that branch. *)
 
     |   mkSetContainer(container, Handle {exp, handler, exPacketAddr}, filter) =
             Handle{exp=mkSetContainer(container, exp, filter),
                    handler=mkSetContainer(container, handler, filter),
                    exPacketAddr = exPacketAddr}
 
     |   mkSetContainer(container, tuple, filter) =
             SetContainer{container = container, tuple = tuple, filter = filter }
 
     local
         val except: exn = InternalError "Invalid load encountered in compiler"
         (* Exception value to use for invalid cases.  We put this in the code
            but it should never actually be executed.  *)
         val raiseError = Raise (Constnt (toMachineWord except, []))
     in
         (* Look for an entry in a tuple. Used in both the optimiser and in mkInd. *)
         fun findEntryInBlock (Tuple { fields, isVariant, ...}, offset, isVar) =
             (
                 isVariant = isVar orelse raise InternalError "findEntryInBlock: tuple/datatype mismatch";
                 if offset < List.length fields
                 then List.nth(fields, offset)
                 (* This can arise if we're processing a branch of a case discriminating on
                    a datatype which won't actually match at run-time. e.g. Tests/Succeed/Test030. *)
                 else if isVar
                 then raiseError
                 else raise InternalError "findEntryInBlock: invalid address"
             )
 
         |   findEntryInBlock (Constnt (b, props), offset, isVar) =
             let
                 (* Find the tuple property if it is present and extract the field props. *)
                 val fieldProps =
                     case List.find(Universal.tagIs CodeTags.tupleTag) props of
                         NONE => []
                     |   SOME p => List.nth(Universal.tagProject CodeTags.tupleTag p, offset)
             in
                 case findInline props of
                     EnvSpecTuple(_, env) =>
                     (* Do the selection now.  This is especially useful if we
                        have a global structure  *)
                     (* At the moment at least we assume that we can get all the
                        properties from the tuple selection. *)
                     (
                         case env offset of
                             (EnvGenConst(w, p), inl) => Constnt(w, setInline inl p)
                         (* The general value from selecting a field from a constant tuple must be a constant. *)
                         |   _ => raise InternalError "findEntryInBlock: not constant"
                     )
                 |   _ =>
                       (* The ML compiler may generate loads from invalid addresses as a
                          result of a val binding to a constant which has the wrong shape.
                          e.g. val a :: b = nil
                          It will always result in a Bind exception being generated 
                          before the invalid load, but we have to be careful that the
                          optimiser does not fall over.  *)
                     if isShort b
                         orelse not (Address.isWords (toAddress b))
                         orelse Address.length (toAddress b) <= Word.fromInt offset
                     then if isVar
                     then raiseError
                     else raise InternalError "findEntryInBlock: invalid address"
                     else Constnt (loadWord (toAddress b, Word.fromInt offset), fieldProps)
             end
 
         |   findEntryInBlock(base, offset, isVar) =
                 Indirect {base = base, offset = offset, indKind = if isVar then IndVariant else IndTuple} (* anything else *)
      end
         
     (* Exported indirect load operation i.e. load a field from a tuple.
        We can't use  findEntryInBlock in every case since that discards
        unused entries in a tuple and at this point we haven't checked
        that the unused entries don't have
        side-effects/raise exceptions e.g. #1 (1, raise Fail "bad") *)
     local
         fun mkIndirect isVar (addr, base as Constnt _) = findEntryInBlock(base, addr, isVar)
         |   mkIndirect isVar (addr, base) =
                 Indirect {base = base, offset = addr, indKind = if isVar then IndVariant else IndTuple}
     
     in
         val mkInd = mkIndirect false and mkVarField = mkIndirect true
     end
     
     fun mkIndContainer(addr, base) = Indirect{offset=addr, base=base, indKind=IndContainer}
 
     (* Create a tuple from a container. *)
     fun mkTupleFromContainer(addr, size) =
         Tuple{fields = List.tabulate(size, fn n => mkIndContainer(n, mkLoadLocal addr)), isVariant = false}
 
     (* Get the value from the code. *)
     fun evalue (Constnt(c, _)) = SOME c
     |   evalue _ = NONE
 
     (* This is really to simplify the change from mkEnv taking a codetree list to
        taking a codeBinding list * code.  This extracts the last entry which must
        be a NullBinding and packages the declarations with it. *)
     fun decSequenceWithFinalExp decs =
     let
         fun splitLast _ [] = raise InternalError "decSequenceWithFinalExp: empty"
         |   splitLast decs [NullBinding exp] = (List.rev decs, exp)
         |   splitLast _ [_] = raise InternalError "decSequenceWithFinalExp: last is not a NullDec"
         |   splitLast decs (hd::tl) = splitLast (hd:: decs) tl
     in
         mkEnv(splitLast [] decs)
     end
     
     local
         type node = { addr: int, lambda: lambdaForm, use: codeUse list }
         fun nodeAddress({addr, ...}: node) = addr
         and arcs({lambda={closure, ...}, ...}: node) =
             List.foldl(fn (LoadLocal addr, l) => addr :: l | (_, l) => l) [] closure
     in
         val stronglyConnected = stronglyConnectedComponents{nodeAddress=nodeAddress, arcs=arcs}
     end
 
     (* In general any mutually recursive declaration can refer to any
        other.  It's better to partition the recursive declarations into
        strongly connected components i.e. those that actually refer
        to each other.  *)
     fun partitionMutualBindings(RecDecs rlist) =
         let
             val processed = stronglyConnected rlist
             (* Convert the result.  Note that stronglyConnectedComponents returns the
                dependencies in the reverse order i.e. if X depends on Y but not the other
                way round then X will appear before Y in the list.  We need to reverse
                it so that X goes after Y. *)
             fun rebuild ([{lambda, addr, use}], tl) =
                    Declar{addr=addr, use=use, value=Lambda lambda} :: tl
             |   rebuild (multiple, tl) = RecDecs multiple :: tl
         in
             List.foldl rebuild [] processed
         end
         (* This is only intended for RecDecs but it's simpler to handle all bindings. *)
     |   partitionMutualBindings other = [other]
 
 
     (* Functions to help in building a closure. *)
     datatype createClosure = Closure of (loadForm * int) list ref
     
     fun makeClosure() = Closure(ref [])
 
         (* Function to build a closure.  Items are added to the closure if they are not already there. *)
     fun addToClosure (Closure closureList) (ext: loadForm): loadForm =
         case (List.find (fn (l, _) => l = ext) (!closureList), ! closureList) of
             (SOME(_, n), _) => (* Already there *) LoadClosure n
         |   (NONE, []) => (* Not there - first *) (closureList := [(ext, 0)]; LoadClosure 0)
         |   (NONE, cl as (_, n) :: _) => (closureList := (ext, n+1) :: cl; LoadClosure(n+1))
 
     fun extractClosure(Closure (ref closureList)) =
         List.foldl (fn ((ext, _), l) => ext :: l) [] closureList
 
     datatype inlineTest =
         TooBig
     |   NonRecursive
     |   TailRecursive of bool vector
     |   NonTailRecursive of bool vector
 
     fun evaluateInlining(function, numArgs, maxInlineSize) =
     let
         (* This checks for the possibility of inlining a function.  It sees if it is
            small enough according to some rough estimate of the cost and it also looks
            for recursive uses of the function.
            Typically if the function is small enough to inline there will be only
            one recursive use but we consider the possibility of more than one.  If
            the only uses are tail recursive we can replace the recursive calls by
            a Loop with a BeginLoop outside it.  If there are non-tail recursive
            calls we may be able to lift out arguments that are unchanged.  For
            example for fun map f [] = [] | map f (a::b) = f a :: map f b 
            it may be worth lifting out f and generating specific mapping
            functions for each application. *)
         val hasRecursiveCall = ref false (* Set to true if rec call *)
         val allTail = ref true (* Set to false if non recursive *)
         (* An element of this is set to false if the actual value if anything
            other than the original argument.  At the end we are then
            left with the arguments that are unchanged. *)
         val argMod = Array.array(numArgs, true)
 
         infix 6 --
         (* Subtract y from x but return 0 rather than a negative number. *)
         fun x -- y = if x >= y then x-y else 0
 
         (* Check for the code size and also recursive references.  N,B. We assume in hasLoop
            that tail recursion applies only with Cond, Newenv and Handler. *)
         fun checkUse _ (_, 0, _) = 0 (* The function is too big to inline. *)
  
         |   checkUse isMain (Newenv(decs, exp), cl, isTail) =
             let
                 fun checkBind (Declar{value, ...}, cl) = checkUse isMain(value, cl, false)
                 |   checkBind (RecDecs decs, cl) = List.foldl(fn ({lambda, ...}, n) => checkUse isMain (Lambda lambda, n, false)) cl decs
                 |   checkBind (NullBinding c, cl) = checkUse isMain (c, cl, false)
                 |   checkBind (Container{setter, ...}, cl) = checkUse isMain(setter, cl -- 1, false)
             in
                 checkUse isMain (exp, List.foldl checkBind cl decs, isTail)
             end
 
         |   checkUse _      (Constnt(w, _), cl, _) = if isShort w then cl else cl -- 1
 
             (* A recursive reference in any context other than a call prevents any inlining. *)
         |   checkUse true   (Extract LoadRecursive, _, _) = 0
         |   checkUse _      (Extract _, cl, _) = cl -- 1
 
         |   checkUse isMain (Indirect{base, ...}, cl, _) = checkUse isMain (base, cl -- 1, false)
 
         |   checkUse _      (Lambda {body, argTypes, closure, ...}, cl, _) =
                 (* For the moment, any recursive use in an inner function prevents inlining. *)
                 if List.exists (fn LoadRecursive => true | _ => false) closure
                 then 0
                 else checkUse false (body, cl -- (List.length argTypes + List.length closure), false)
 
         |   checkUse true (Eval{function = Extract LoadRecursive, argList, ...}, cl, isTail) =
             let
                 (* If the actual argument is anything but the original argument
                    then the corresponding entry in the array is set to false. *)
                 fun testArg((exp, _), n) =
                 (
                     if (case exp of Extract(LoadArgument a) => n = a | _ => false)
                     then ()
                     else Array.update(argMod, n, false);
                     n+1
                 )
             in
                 List.foldl testArg 0 argList;
                 hasRecursiveCall := true;
                 if isTail then () else allTail := false;
                 List.foldl(fn ((e, _), n) => checkUse true (e, n, false)) (cl--3) argList
             end
 
         |   checkUse isMain (Eval{function, argList, ...}, cl, _) =
                 checkUse isMain (function, List.foldl(fn ((e, _), n) => checkUse isMain (e, n, false)) (cl--2) argList, false)
 
         |   checkUse _ (Nullary _, cl, _) = cl -- 1
         |   checkUse isMain (Unary{arg1, ...}, cl, _) = checkUse isMain (arg1, cl -- 1, false)
         |   checkUse isMain (Binary{arg1, arg2, ...}, cl, _) = checkUseList isMain ([arg1, arg2], cl -- 1)
         |   checkUse isMain (Arbitrary{arg1, arg2, ...}, cl, _) = checkUseList isMain ([arg1, arg2], cl -- 4)
         |   checkUse isMain (AllocateWordMemory {numWords, flags, initial}, cl, _) =
                 checkUseList isMain ([numWords, flags, initial], cl -- 1)
 
         |   checkUse isMain (Cond(i, t, e), cl, isTail) =
                 checkUse isMain (i, checkUse isMain (t, checkUse isMain (e, cl -- 2, isTail), isTail), false)
         |   checkUse isMain (BeginLoop { loop, arguments, ...}, cl, _) =
                 checkUse isMain (loop, List.foldl (fn (({value, ...}, _), n) => checkUse isMain (value, n, false)) cl arguments, false)
         |   checkUse isMain (Loop args, cl, _) = List.foldl(fn ((e, _), n) => checkUse isMain (e, n, false)) cl args
         |   checkUse isMain (Raise c, cl, _) = checkUse isMain (c, cl -- 1, false)
         |   checkUse isMain (Handle {exp, handler, ...}, cl, isTail) =
                 checkUse isMain (exp, checkUse isMain (handler, cl, isTail), false)
         |   checkUse isMain (Tuple{ fields, ...}, cl, _) = checkUseList isMain (fields, cl)
 
         |   checkUse isMain (SetContainer{container, tuple = Tuple { fields, ...}, ...}, cl, _) =
                 (* This can be optimised *)
                 checkUse isMain (container, checkUseList isMain (fields, cl), false)
         |   checkUse isMain (SetContainer{container, tuple, filter}, cl, _) =
                 checkUse isMain (container, checkUse isMain (tuple, cl -- (BoolVector.length filter), false), false)
 
         |   checkUse isMain (TagTest{test, ...}, cl, _) = checkUse isMain (test, cl -- 1, false)
 
         |   checkUse isMain (LoadOperation{address, ...}, cl, _) = checkUseAddress isMain (address, cl -- 1)
 
         |   checkUse isMain (StoreOperation{address, value, ...}, cl, _) =
                 checkUse isMain (value, checkUseAddress isMain (address, cl -- 1), false)
 
         |   checkUse isMain (BlockOperation{sourceLeft, destRight, length, ...}, cl, _) =
                 checkUse isMain (length,
                     checkUseAddress isMain (destRight, checkUseAddress isMain (sourceLeft, cl -- 1)), false)
         
         and checkUseList isMain (elems, cl) =
             List.foldl(fn (e, n) => checkUse isMain (e, n, false)) cl elems
 
         and checkUseAddress isMain ({base, index=NONE, ...}, cl) = checkUse isMain (base, cl, false)
         |   checkUseAddress isMain ({base, index=SOME index, ...}, cl) = checkUseList isMain ([base, index], cl)
         
         val costLeft = checkUse true (function, maxInlineSize, true)
     in
         if costLeft = 0
         then TooBig
         else if not (! hasRecursiveCall) 
         then NonRecursive
         else if ! allTail then TailRecursive(Array.vector argMod)
         else NonTailRecursive(Array.vector argMod)
     end
     
     structure Sharing =
     struct
         type codetree = codetree
         and codeBinding = codeBinding
         and loadForm = loadForm
         and createClosure = createClosure
         and envSpecial = envSpecial
     end
 
 end;
diff --git a/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML b/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML
index 815ea678..84942f0d 100644
--- a/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML
+++ b/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML
@@ -1,4089 +1,4090 @@
 (*
     Copyright David C. J. Matthews 2016-22
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor X86CodetreeToICode(
     structure BackEndTree: BACKENDINTERMEDIATECODE
     structure X86ICode: X86ICODE
     structure Debug: DEBUG
     structure X86Foreign: FOREIGNCALL
     structure ICodeTransform: X86ICODETRANSFORM
     structure CodeArray: CODEARRAY
 
     sharing X86ICode.Sharing = ICodeTransform.Sharing = CodeArray.Sharing
 ): GENCODE =
 struct
     open BackEndTree
     open Address
     open X86ICode
     open CodeArray
  
     exception InternalError = Misc.InternalError
 
     local
         val regs =
             case targetArch of
                 Native32Bit => [eax, ebx]
             |   Native64Bit => [eax, ebx, r8, r9, r10]
             |   ObjectId32Bit => [eax, esi, r8, r9, r10]
         val fpResult = case targetArch of Native32Bit => FPReg fp0 | _ => XMMReg xmm0
         val fpArgRegs = case targetArch of Native32Bit => [] | _ => [xmm0, xmm1, xmm2]
     in
         val generalArgRegs = List.map GenReg regs
         val floatingPtArgRegs = List.map XMMReg fpArgRegs
         fun resultReg GeneralType = GenReg eax
         |   resultReg DoubleFloatType = fpResult
         |   resultReg SingleFloatType = fpResult
         |   resultReg (ContainerType _) = GenReg eax (* Doesn't need a result. *)
     end
 
     (* tag a short constant *)
     fun tag c = 2 * c + 1
   
     (* shift a short constant, but don't set tag bit *)
     fun semitag c = 2 * c
 
     (* Reverse a list and append the second.  This is used a lot when converting
        between the reverse and forward list versions. e.g. codeToICode and codeToICodeRev *)
     fun revApp([], l) = l
     |   revApp(hd :: tl, l) = revApp(tl, hd :: l)
     
     datatype blockStruct =
         BlockSimple of x86ICode
     |   BlockExit of x86ICode
     |   BlockLabel of int
     |   BlockFlow of controlFlow
     |   BlockBegin of { regArgs: (preg * reg) list, stackArgs: stackLocn list }
     |   BlockRaiseAndHandle of x86ICode * int
     |   BlockOptionalHandle of {call: x86ICode, handler: int, label: int }
 
     local
         open RunCall
         val F_mutable_bytes =  Word.fromLargeWord(Word8.toLargeWord(Word8.orb (F_mutable, F_bytes)))
         fun makeRealConst l =
         let
             val r = allocateByteMemory(0wx8 div bytesPerWord, F_mutable_bytes)
             fun setBytes([], _) = ()
             |   setBytes(hd::tl, n) = (storeByte(r, n, hd); setBytes(tl, n+0wx1))
             val () = setBytes(l, 0w0)
             val () = clearMutableBit r
         in
             r
         end
     in
         (* These are floating point constants used to change and mask the sign bit. *)
         val realSignBit: machineWord = makeRealConst [0wx00, 0wx00, 0wx00, 0wx00, 0wx00, 0wx00, 0wx00, 0wx80]
         and realAbsMask: machineWord = makeRealConst [0wxff, 0wxff, 0wxff, 0wxff, 0wxff, 0wxff, 0wxff, 0wx7f]
         and floatSignBit: machineWord = makeRealConst [0wx00, 0wx00, 0wx00, 0wx80, 0wx00, 0wx00, 0wx00, 0wx00]
         and floatAbsMask: machineWord = makeRealConst [0wxff, 0wxff, 0wxff, 0wx7f, 0wx00, 0wx00, 0wx00, 0wx00]
     end
 
     datatype commutative = Commutative | NonCommutative
 
     (* Check that a large-word constant looks right and get the value as a large int*)
     fun largeWordConstant value =
         if isShort value then raise InternalError "largeWordConstant: invalid"
         else
         let
             val addr = toAddress value
         in
             if length addr <> nativeWordSize div wordSize orelse flags addr <> F_bytes
             then raise InternalError "largeWordConstant: invalid"
             else ();
             LargeWord.toLargeInt(RunCall.unsafeCast addr)
         end
 
     fun codeFunctionToX86({body, localCount, name, argTypes, resultType=fnResultType, closure, ...}:bicLambdaForm, debugSwitches, resultClosure) =
     let
         (* Pseudo-registers are allocated sequentially and the properties added to the list. *)
         val pregCounter = ref 0
         val pregPropList = ref []
         
         fun newPReg() =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropGeneral :: !pregPropList
         in
             PReg regNo
         end
         
         and newUReg() =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropUntagged :: !pregPropList
         in
             PReg regNo
         end
         
         and newStackLoc size =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropStack size :: !pregPropList
         in
             StackLoc{size=size, rno=regNo}
         end
         
         and newMergeReg() =
         let
             val regNo = !pregCounter before pregCounter := !pregCounter + 1
             val () = pregPropList := RegPropMultiple :: !pregPropList
         in
             PReg regNo
         end
         
         datatype locationValue =
             NoLocation
         |   PregLocation of preg
         |   ContainerLocation of { container: stackLocn, stackOffset: int }
 
         val locToPregArray = Array.array(localCount, NoLocation)
         val labelCounter = ref 1 (* Start at 1.  Zero is used for the root. *)
         fun newLabel() = !labelCounter before labelCounter := !labelCounter + 1
         val ccRefCounter = ref 0
         fun newCCRef() = CcRef(!ccRefCounter) before ccRefCounter := !ccRefCounter + 1
 
         (* The profile object is a single mutable with the F_bytes bit set. *)
         val profileObject = createProfileObject()
 
         (* Switch to indicate if we want to trace where live data has been allocated. *)
         (* TODO: This should be used in AllocateMemoryOperation and BoxValue and possibly AllocateMemoryVariable. *)
         val addAllocatingFunction =
             Debug.getParameter Debug.profileAllocationTag debugSwitches = 1
 
         fun constantAsArgument value =
             if isShort value
             then IntegerConstant(tag(Word.toLargeIntX(toShort value)))
             else AddressConstant value
 
         (* Create the branch condition from the test, isSigned and jumpOn values.
            (In)equality tests are the same for signed and unsigned values. *)
         local
             open BuiltIns
         in
             fun testAsBranch(TestEqual,         _,      true)       = JE
             |   testAsBranch(TestEqual,         _,      false)      = JNE
                 (* Signed tests *)
             |   testAsBranch(TestLess,          true,   true)       = JL
             |   testAsBranch(TestLess,          true,   false)      = JGE
             |   testAsBranch(TestLessEqual,     true,   true)       = JLE
             |   testAsBranch(TestLessEqual,     true,   false)      = JG
             |   testAsBranch(TestGreater,       true,   true)       = JG
             |   testAsBranch(TestGreater,       true,   false)      = JLE
             |   testAsBranch(TestGreaterEqual,  true,   true)       = JGE
             |   testAsBranch(TestGreaterEqual,  true,   false)      = JL
                 (* Unsigned tests *)
             |   testAsBranch(TestLess,          false,  true)       = JB
             |   testAsBranch(TestLess,          false,  false)      = JNB
             |   testAsBranch(TestLessEqual,     false,  true)       = JNA
             |   testAsBranch(TestLessEqual,     false,  false)      = JA
             |   testAsBranch(TestGreater,       false,  true)       = JA
             |   testAsBranch(TestGreater,       false,  false)      = JNA
             |   testAsBranch(TestGreaterEqual,  false,  true)       = JNB
             |   testAsBranch(TestGreaterEqual,  false,  false)      = JB
             |   testAsBranch(TestUnordered,     _,      _)          = raise InternalError "TestUnordered"
             
             (* Switch the direction of a test if we turn  c op x into x op c. *)
             fun leftRightTest TestEqual         = TestEqual
             |   leftRightTest TestLess          = TestGreater
             |   leftRightTest TestLessEqual     = TestGreaterEqual
             |   leftRightTest TestGreater       = TestLess
             |   leftRightTest TestGreaterEqual  = TestLessEqual
             |   leftRightTest TestUnordered     = TestUnordered
         end
         
         (* Overflow check.  This raises Overflow if the overflow bit is set in the cc.  This generates
            a single block for the function unless there is a handler.
            As well as reducing the size of the code this also means that overflow checks are generally
            JO instructions to the end of the code.  Since the default branch prediction is not to take
            forward jumps this should improve prefetching on the normal, non-overflow, path. *)
         fun checkOverflow ({currHandler=NONE, overflowBlock=ref(SOME overFlowLab), ...}) ccRef =
             (* It's already been set and there's no surrounding handler - use this. *)
             let
                 val noOverflowLab = newLabel()
             in
                 [
                     BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=overFlowLab, falseJump=noOverflowLab }),
                     BlockLabel noOverflowLab
                 ]
             end
 
         |   checkOverflow ({currHandler=NONE, overflowBlock, ...}) ccRef =
             let
                 (* *)
                 val overFlowLab = newLabel() and noOverflowLab = newLabel()
                 val packetReg = newPReg()
                 val () = overflowBlock := SOME overFlowLab
             in
                 [
                     BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=overFlowLab, falseJump=noOverflowLab }),
                     BlockLabel overFlowLab,
                     BlockSimple(LoadArgument{source=AddressConstant(toMachineWord(Overflow)), dest=packetReg, kind=movePolyWord}),
                     BlockExit(RaiseExceptionPacket{packetReg=packetReg}),
                     BlockLabel noOverflowLab
                 ]
             end
 
         |   checkOverflow ({currHandler=SOME h, ...}) ccRef =
             let
                 val overFlowLab = newLabel() and noOverflowLab = newLabel()
                 val packetReg = newPReg()
             in
                 [
                     BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=overFlowLab, falseJump=noOverflowLab }),
                     BlockLabel overFlowLab,
                     BlockSimple(LoadArgument{source=AddressConstant(toMachineWord(Overflow)), dest=packetReg, kind=movePolyWord}),
                     BlockRaiseAndHandle(RaiseExceptionPacket{packetReg=packetReg}, h),
                     BlockLabel noOverflowLab
                 ]
             end
         
         fun setAndRestoreRounding (rndMode, doWithRounding) =
         let
             open IEEEReal
             val savedRnd = newUReg() and setRnd = newUReg()
         in
             case fpMode of
                 FPModeX87 => [BlockSimple(GetX87ControlReg{dest=savedRnd})] @
                     (* Set the appropriate bits in the control word. *)
                     (case rndMode of
                         TO_NEAREST => (* The bits need to be zero - just mask them. *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=AND, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0xf3ff, ccRef=newCCRef(), opSize=OpSize32})]
                     |   TO_NEGINF =>
                         let
                             val wrk = newUReg()
                         in
                             (* Mask the bits and set to 01 *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd,
                                     operand2=IntegerConstant 0xf3ff, ccRef=newCCRef(), opSize=OpSize32}),
                              BlockSimple(
                                 ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0x400, ccRef=newCCRef(), opSize=OpSize32})]
                         end
                     |   TO_POSINF =>
                         let
                             val wrk = newUReg()
                         in
                             (* Mask the bits and set to 10 *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd,
                                     operand2=IntegerConstant 0xf3ff, ccRef=newCCRef(), opSize=OpSize32}),
                              BlockSimple(
                                 ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0x800, ccRef=newCCRef(), opSize=OpSize32})]
                         end
                     |   TO_ZERO => (* The bits need to be one - just set them. *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0xc00, ccRef=newCCRef(), opSize=OpSize32})]) @
                     [BlockSimple(SetX87ControlReg{source=setRnd})] @
                     doWithRounding() @
                     (* Restore the original rounding. *)
                     [BlockSimple(SetX87ControlReg{source=savedRnd})]
 
             |   FPModeSSE2 => [BlockSimple(GetSSE2ControlReg{dest=savedRnd})] @
                     (* Set the appropriate bits in the control word. *)
                     (case rndMode of
                         TO_NEAREST => (* The bits need to be zero - just mask them. *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=AND, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0xffff9fff, ccRef=newCCRef(), opSize=OpSize32})]
                     |   TO_NEGINF =>
                         let
                             val wrk = newUReg()
                         in
                             (* Mask the bits and set to 01 *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd,
                                     operand2=IntegerConstant 0xffff9fff, ccRef=newCCRef(), opSize=OpSize32}),
                              BlockSimple(
                                 ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0x2000, ccRef=newCCRef(), opSize=OpSize32})]
                         end
                     |   TO_POSINF =>
                         let
                             val wrk = newUReg()
                         in
                             (* Mask the bits and set to 10 *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd,
                                     operand2=IntegerConstant 0xffff9fff, ccRef=newCCRef(), opSize=OpSize32}),
                              BlockSimple(
                                 ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0x4000, ccRef=newCCRef(), opSize=OpSize32})]
                         end
                     |   TO_ZERO => (* The bits need to be one - just set them. *)
                             [BlockSimple(
                                 ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd,
                                     operand2=IntegerConstant 0x6000, ccRef=newCCRef(), opSize=OpSize32})]) @
                     [BlockSimple(SetSSE2ControlReg{source=setRnd})] @
                     doWithRounding() @
                     [BlockSimple(SetSSE2ControlReg{source=savedRnd})]
         end
 
         (* Put a floating point value into a box or tag it so the value can be held in
            a general register. *)
         fun boxOrTagReal(srcReg, destReg, precision) =
             if precision = BuiltIns.PrecDouble orelse wordSize <> 0w8
             then
             let
                 open BuiltIns
                 val boxFloat =
                     case (fpMode, precision) of
                         (FPModeX87, PrecDouble) => BoxX87Double
                     |   (FPModeX87, PrecSingle) => BoxX87Float
                     |   (FPModeSSE2, PrecDouble) => BoxSSE2Double
                     |   (FPModeSSE2, PrecSingle) => BoxSSE2Float
             in
                 [BlockSimple(BoxValue{boxKind=boxFloat, source=srcReg, dest=destReg, saveRegs=[]})]
             end
             else [BlockSimple(TagFloat{source=srcReg, dest=destReg})]
 
         
         (* Indicate that the base address is actually an object index where appropriate. *)
         val memIndexOrObject = case targetArch of ObjectId32Bit => ObjectIndex | _ => NoMemIndex
 
         (* Generally we have an offset in words and no index register. *)
         fun wordOffsetAddress(offset, baseReg: preg): argument =
             MemoryLocation{offset=offset*Word.toInt wordSize, base=baseReg, index=memIndexOrObject, cache=NONE}
    
         (* The large-word operations all work on the value within the box pointed at
            by the register.  We generate all large-word operations using this even
            where the X86 instruction requires a register.  This allows the next level
            to optimise cases of cascaded instructions and avoid creating boxes for
            intermediate values. *)
         fun wordAt reg = wordOffsetAddress(0, reg)
 
         val returnAddressEntry = newStackLoc 1
 
         datatype argLoc =
             ArgInReg of { realReg: reg, argReg: preg }
         |   ArgOnStack of { stackOffset: int, stackReg: stackLocn }
 
         (* Pseudo-regs for the result, the closure and the args that were passed in real regs. *)
         val resultTarget = newPReg()
         val closureRegAddr = newPReg()
 
         (* Create a map for the arguments indicating their register or stack location. *)
         local
             (* Select the appropriate argument register depending on the argument type. *)
             fun argTypesToArgEntries([], _, _, _) = ([], [], [], [])
 
             |   argTypesToArgEntries(DoubleFloatType :: tl, gRegs, fpReg :: fpRegs, n) =
                 let
                     val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, fpRegs, n-1)
                     val pRegArg = newPReg() and uRegArg = newUReg()
                 in
                     (ArgInReg{realReg=fpReg, argReg=pRegArg} :: argTypes,
                      boxOrTagReal(uRegArg, pRegArg, BuiltIns.PrecDouble) @ argCode, (uRegArg, fpReg) :: argRegs, stackArgs)
                 end
 
             |   argTypesToArgEntries(SingleFloatType :: tl, gRegs, fpReg :: fpRegs, n) =
                 let
                     val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, fpRegs, n-1)
                     val pRegArg = newPReg() and uRegArg = newUReg()
                 in
                     (ArgInReg{realReg=fpReg, argReg=pRegArg} :: argTypes,
                      boxOrTagReal(uRegArg, pRegArg, BuiltIns.PrecSingle) @ argCode, (uRegArg, fpReg) :: argRegs, stackArgs)
                 end
 
             |   argTypesToArgEntries(_ :: tl, gReg :: gRegs, fpRegs, n) =
                 (* This deals with general arguments but also with extra floating point arguments.
                    They are boxed as usual. *)
                 let
                     val (argTypes, argCode, argRegs, stackArgs) =
                         argTypesToArgEntries(tl, gRegs, fpRegs, n-1)
                     val argReg=newPReg()
                 in
                     (ArgInReg{realReg=gReg, argReg=argReg} :: argTypes, argCode, (argReg, gReg) :: argRegs, stackArgs)
                 end
 
             |   argTypesToArgEntries(_ :: tl, [], fpRegs, n) =
                 let
                     val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, [], fpRegs, n-1)
                     val stackLoc = newStackLoc 1
                 in
                     (ArgOnStack {stackOffset=n, stackReg = stackLoc } :: argTypes, argCode, argRegs, stackLoc :: stackArgs)
                 end
 
             val (argEntries, argCode, argRegs, stackArguments) =
                 argTypesToArgEntries(argTypes, generalArgRegs, floatingPtArgRegs, List.length argTypes)
             val clReg = case closure of [] => [] | _ => [(closureRegAddr, GenReg edx)]
         in
             val argumentVector = Vector.fromList argEntries
 
             (* Start code for the function. *)
             val beginInstructions = argCode @
                 [BlockBegin{regArgs=clReg @ argRegs, stackArgs=stackArguments @ [returnAddressEntry]}]
 
             (* The number of arguments on the stack.  Needed in return instrs and tail calls. *)
             val currentStackArgs = List.length stackArguments
         end
         
         (* The return instruction.  This can be added on to various tails but there is always
            one at the end anyway. *)
         fun returnInstruction({stackPtr, ...}, target, tailCode) =
         let
             val (returnCode, resReg) =
                 case fnResultType of
                     GeneralType => ([], target)
                 |   ContainerType _ => ([], target)
                 |   DoubleFloatType =>
                     let
                         val resReg = newUReg()
                     in
                         ([BlockSimple(LoadArgument{source=wordAt target, dest=resReg, kind=MoveDouble})], resReg)
                     end
                 |   SingleFloatType =>
                     let
                         val resReg = newUReg()
                         val unpack =
                             if wordSize = 0w8
                             then BlockSimple(UntagFloat{source=RegisterArgument target, dest=resReg, cache=NONE})
                             else BlockSimple(LoadArgument{source=wordAt target, dest=resReg, kind=MoveFloat})
                     in
                         
                         ([unpack], resReg)
                     end
         in
             BlockExit(ReturnResultFromFunction{resultReg=resReg, realReg=resultReg fnResultType, numStackArgs=currentStackArgs}) ::
                 returnCode @
                 (if stackPtr <> 0
                 then BlockSimple(ResetStackPtr{numWords=stackPtr, preserveCC=false}) :: tailCode
                 else tailCode)
         end
 
         (* This controls what codeAsArgument returns.  Different instructions have different
            requirements.  If an option is set to false the value is instead loaded into a
            new preg.  "const32s" means that it will fit into 32-bits.  Any constant
            satisfies that on X86/32 but on the X86/64 we don't allow addresses because
            we can't be sure whether they will fit or not. *)
         type allowedArgument =
             { anyConstant: bool, const32s: bool, memAddr: bool, existingPreg: bool }
         val allowInMemMove = (* We can move a 32-bit constant into memory but not a long constant. *)
             { anyConstant=false, const32s=true, memAddr=false, existingPreg=true }
         and allowInPReg =
             { anyConstant=false, const32s=false, memAddr=false, existingPreg=true }
         (* AllowDefer can be used to ensure that any side-effects are done before
            something else but otherwise we only evaluate afterwards. *)
         and allowDefer =
             { anyConstant=true, const32s=true, memAddr=true, existingPreg=true }
 
         datatype destination =
             SpecificPReg of preg
         |   NoResult
         |   Allowed of allowedArgument
         
         (* Context type. *)
         type context =
             { loopArgs: (preg list * int * int) option, stackPtr: int, currHandler: int option,
               overflowBlock: int option ref }
 
         (* If a preg has been provided, use that, otherwise generate a new one. *)
         fun asTarget(SpecificPReg preg) = preg
         |   asTarget NoResult = newPReg()
         |   asTarget(Allowed _) = newPReg()
 
         fun moveIfNotAllowed(NoResult, code, arg) = (code, arg, false)
 
         |   moveIfNotAllowed(Allowed{anyConstant=true, ...}, code, arg as AddressConstant _) = (code, arg, false)
         
         |   moveIfNotAllowed(Allowed{anyConstant=true, ...}, code, arg as IntegerConstant _) = (code, arg, false)
         
         |   moveIfNotAllowed(dest as Allowed{const32s=true, ...}, code, arg as IntegerConstant value) =
             (* This is allowed if the value is within 32-bits *)
                 if is32bit value
                 then (code, arg, false)
                 else moveToTarget(dest, code, arg)
 
         |   moveIfNotAllowed(dest as Allowed{const32s=true, ...}, code, arg as AddressConstant _) =
                 if targetArch = Native32Bit
                 then (code, arg, false) (* We can store the address directly *)
                 else moveToTarget(dest, code, arg)
 
         |   moveIfNotAllowed(Allowed{existingPreg=true, ...}, code, arg as RegisterArgument(PReg _)) = (code, arg, false)
 
         |   moveIfNotAllowed(Allowed{memAddr=true, ...}, code, arg as MemoryLocation _) = (code, arg, false)
 
         |   moveIfNotAllowed(dest, code, arg) = moveToTarget(dest, code, arg)
 
         and moveToTarget(dest, code, arg) =
             let
                 val target = asTarget dest
                 val moveSize =
                     case arg of
                         AddressConstant _ => movePolyWord
                     |   MemoryLocation _ => movePolyWord
                     |   _ => moveNativeWord
             in
                 (code @ [BlockSimple(LoadArgument{source=arg, dest=target, kind=moveSize})], RegisterArgument target, false)
             end
 
         (* Create a bool result from a test by returning true or false. *)
         fun makeBoolResultRev(condition, ccRef, target, testCode) =
         let
             val trueLab = newLabel() and falseLab = newLabel() and mergeLab = newLabel()
             val mergeReg = newMergeReg()
         in
             BlockSimple(LoadArgument{dest=target, source=RegisterArgument mergeReg, kind=Move32Bit}) ::
             BlockLabel mergeLab ::
             BlockFlow(Unconditional mergeLab) ::
             BlockSimple(LoadArgument{dest=mergeReg, source=IntegerConstant(tag 0), kind=Move32Bit}) ::
             BlockLabel falseLab ::
             BlockFlow(Unconditional mergeLab) ::
             BlockSimple(LoadArgument{dest=mergeReg, source=IntegerConstant(tag 1), kind=Move32Bit}) ::
             BlockLabel trueLab ::
             BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=trueLab, falseJump=falseLab }) ::
             testCode
         end
 
 
         fun moveIfNotAllowedRev(NoResult, code, arg) = (code, arg, false)
 
         |   moveIfNotAllowedRev(Allowed{anyConstant=true, ...}, code, arg as AddressConstant _) = (code, arg, false)
         
         |   moveIfNotAllowedRev(Allowed{anyConstant=true, ...}, code, arg as IntegerConstant _) = (code, arg, false)
         
         |   moveIfNotAllowedRev(dest as Allowed{const32s=true, ...}, code, arg as IntegerConstant value) =
             (* This is allowed if the value is within 32-bits *)
                 if is32bit value
                 then (code, arg, false)
                 else moveToTargetRev(dest, code, arg)
 
         |   moveIfNotAllowedRev(dest as Allowed{const32s=true, ...}, code, arg as AddressConstant _) =
                 if targetArch = Native32Bit
                 then (code, arg, false)
                 else moveToTargetRev(dest, code, arg)
 
         |   moveIfNotAllowedRev(Allowed{existingPreg=true, ...}, code, arg as RegisterArgument(PReg _)) = (code, arg, false)
 
         |   moveIfNotAllowedRev(Allowed{memAddr=true, ...}, code, arg as MemoryLocation _) = (code, arg, false)
 
         |   moveIfNotAllowedRev(dest, code, arg) = moveToTargetRev(dest, code, arg)
 
         and moveToTargetRev(dest, code, arg) =
             let
                 val target = asTarget dest
                 val moveSize =
                     case arg of
                         AddressConstant _ => movePolyWord
                     |   MemoryLocation _ => movePolyWord
                     |   _ => moveNativeWord
             in
                 (BlockSimple(LoadArgument{source=arg, dest=target, kind=moveSize}) :: code, RegisterArgument target, false)
             end
 
         (* Allocate a fixed size cell with a reference to the profile object if we want
            to trace the location of live data.  Currently only used for tuples and for closures
            in native 32/64 bit. *)
         and allocateWithProfileRev(n, flags, memAddr, tlCode) =
             if addAllocatingFunction
             then
             let
                 val restAndAlloc =
                     BlockSimple(AllocateMemoryOperation{size=n+1, flags=Word8.orb(flags, Address.F_profile), dest=memAddr, saveRegs=[]}) ::
                         tlCode
                 val (code2, source, _) =
                     moveIfNotAllowedRev(Allowed allowInMemMove, restAndAlloc, AddressConstant profileObject)
                 val storeValue =
                     BlockSimple(StoreArgument{ source=source, offset=n*Word.toInt wordSize, base=memAddr,
                         index=memIndexOrObject, kind=movePolyWord, isMutable=false})
             in
                 storeValue :: code2
             end
             else BlockSimple(AllocateMemoryOperation{size=n, flags=flags, dest=memAddr, saveRegs=[]}) :: tlCode
 
         (* Use a move if there's no offset or index.  We could use an add if there's no index. *)
         and loadAddress{base, offset=0, index=NoMemIndex, dest} =
                 LoadArgument{source=RegisterArgument base, dest=dest, kind=movePolyWord}
         |   loadAddress{base, offset, index, dest} =
                 LoadEffectiveAddress{base=SOME base, offset=offset, dest=dest, index=index, opSize=nativeWordOpSize}
 
         and codeToICodeTarget(instr, context: context, isTail, target) =
         (* This is really for backwards compatibility.  *)
         let
             val (code, _, _) = codeToICode(instr, context, isTail, SpecificPReg target)
         in
             code
         end
         
         and codeToPReg(instr, context) =
         let (* Many instructions require an argument in a register.  If it's already in a
                register use that rather than creating a new one. *)
             val (code, result, _) = codeToICode(instr, context, false, Allowed allowInPReg)
             val preg = case result of RegisterArgument pr => pr | _ => raise InternalError "codeToPReg"
         in
             (code, preg)
         end
         
         and codeToPRegRev(instr, context, tailCode) =
         let (* Many instructions require an argument in a register.  If it's already in a
                register use that rather than creating a new one. *)
             val (code, result, _) = codeToICodeRev(instr, context, false, Allowed allowInPReg, tailCode)
             val preg = case result of RegisterArgument pr => pr | _ => raise InternalError "codeToPRegRev"
         in
             (code, preg)
         end
         
         and codeToICode(instr, context, isTail, destination) =
         let
             val (code, dest, haveExited) = codeToICodeRev(instr, context, isTail, destination, [])
         in
             (List.rev code, dest, haveExited)
         end
         
         (* Main function to turn the codetree into ICode.  Optimisation is generally
            left to later passes.  This does detect tail recursion.
            This builds the result up in reverse order.  There was an allocation hotspot in loadFields
            in the BICTuple case which was eliminated by building the list in reverse and then
            reversing the result.  It seems better to build the list in reverse generally but for
            the moment there are too many special cases to do everything. *)
         and codeToICodeRev(BICNewenv (bindings, exp), context: context as {stackPtr=initialSp, ...} , isTail, destination, tailCode) =
             let
                 (* Process a list of bindings.  We need to accumulate the space used by
                    any containers and reset the stack pointer at the end if necessary. *)
                 fun doBindings([], context, tailCode) = (tailCode, context)
  
                 |   doBindings(BICDeclar{value=BICExtract(BICLoadLocal l), addr, ...} :: decs, context, tailCode) =
                     let
                         (* Giving a new name to an existing entry.  This should have been removed
                            at a higher level but it doesn't always seem to be.  In particular we
                            must treat this specially if it's a container. *)
                         val original = Array.sub(locToPregArray, l)
                         val () = Array.update(locToPregArray, addr, original)
                     in
                         doBindings(decs, context, tailCode)
                     end
 
                 |   doBindings(BICDeclar{value, addr, ...} :: decs, context, tailCode) =
                     let
                         val (code, dest) = codeToPRegRev(value, context, tailCode)
                         val () = Array.update(locToPregArray, addr, PregLocation dest)
                     in
                         doBindings(decs, context, code)
                     end
 
                 |   doBindings(BICRecDecs [{lambda, addr, ...}] :: decs, context, tailCode) =
                     (* We shouldn't have single entries in RecDecs but it seems to occur at the moment. *)
                     let
                         val dest = newPReg()
                         val (code, _, _) = codeToICodeRev(BICLambda lambda, context, false, SpecificPReg dest, tailCode)
                         val () = Array.update(locToPregArray, addr, PregLocation dest)
                     in
                         doBindings(decs, context, code)
                     end
 
                 |   doBindings(BICRecDecs recDecs :: decs, context, tailCode) =
                     let
                         val destRegs = map (fn _ => newPReg()) recDecs
 
                         (* First build the closures as mutable cells containing zeros.  Set the
                            entry in the address table to the register containing the address. *)
                         fun makeClosure({lambda={closure, ...}, addr, ...}, dest, c) =
                         let
                             val () = Array.update(locToPregArray, addr, PregLocation dest)
                             val sizeClosure = List.length closure + (if targetArch = ObjectId32Bit then 2 else 1)
                             open Address
 
                             fun clear n =
                                 if n = sizeClosure
                                 then [BlockSimple(AllocateMemoryOperation{size=sizeClosure,
                                         flags=if targetArch = ObjectId32Bit then Word8.orb(F_mutable, F_closure) else F_mutable, dest=dest, saveRegs=[]})]
                                 else
                                     (clear (n+1) @
                                         [BlockSimple(
                                             StoreArgument{source=IntegerConstant(tag 0), base=dest, offset=n*Word.toInt wordSize, index=memIndexOrObject,
                                                           kind=movePolyWord, isMutable=false})])
                         in
                             c @ clear 0 @ [BlockSimple InitialisationComplete]
                         end
                     
                         val allocClosures = ListPair.foldlEq makeClosure [] (recDecs, destRegs)
                     
                         fun setClosure({lambda as {closure, ...}, ...}, dest, l) =
                         let
                             val clResult = makeConstantClosure()
                             val () = codeFunctionToX86(lambda, debugSwitches, clResult)
                             (* Basically the same as tuple except we load the address of the closure we've made. *)
                             fun loadFields([], _) = []
                             |   loadFields(f :: rest, n) =
                                 let
                                     val (code, source, _) = codeToICode(BICExtract f, context, false, Allowed allowInMemMove)
                                     val storeValue =
                                         [BlockSimple(StoreArgument{ source=source, base=dest, offset=n*Word.toInt wordSize, index=memIndexOrObject, kind=movePolyWord, isMutable=false })]
                                 in
                                     code @ storeValue @ loadFields(rest, n+1)
                                 end
                             val setCodeAddress =
                                 if targetArch = ObjectId32Bit
                                 then
                                 let (* We can't get the code address until run time. *)
                                     val codeReg = newUReg()
                                     val closureReg = newPReg()
                                 in
                                     map BlockSimple
                                     [
                                         LoadArgument{ source=AddressConstant(toMachineWord clResult), dest=closureReg, kind=movePolyWord},
                                         LoadArgument{ source=MemoryLocation{offset=0, base=closureReg, index=ObjectIndex, cache=NONE},
                                             dest=codeReg, kind=Move64Bit},
                                         StoreArgument{ source=RegisterArgument codeReg, offset=0, base=dest, index=ObjectIndex,
                                             kind=moveNativeWord, isMutable=false}
                                     ]
                                 end
                                 else
                                 let
                                     val codeAddr = codeAddressFromClosure clResult
                                     val (code, source, _) =
                                         moveIfNotAllowed(Allowed allowInMemMove, [], AddressConstant codeAddr)
                                 in
                                     code @
                                         [BlockSimple(
                                             StoreArgument{ source=source, base=dest, offset=0, index=NoMemIndex, kind=movePolyWord, isMutable=false })]
                                 end
                             val setFields =
                                 setCodeAddress @ loadFields(closure, if targetArch = ObjectId32Bit then 2 else 1)
                         in
                             l @ setFields @ [BlockSimple(LockMutable{addr=dest})]
                         end
                         val setClosures = ListPair.foldlEq setClosure [] (recDecs, destRegs)
                         
                         val code = List.rev(allocClosures @ setClosures) 
                     in
                         doBindings(decs, context, code @ tailCode)
                     end
 
                 |   doBindings(BICNullBinding exp :: decs, context, tailCode) =
                     let
                         val (code, _, _) = codeToICodeRev(exp, context, false, NoResult, tailCode) (* And discard result. *)
                     in
                         doBindings(decs, context, code)
                     end
        
                 |   doBindings(BICDecContainer{ addr, size } :: decs, {loopArgs, stackPtr, currHandler, overflowBlock}, tailCode) =
                     let
                         val containerReg = newStackLoc size
                         val () = Array.update(locToPregArray, addr,
                                     ContainerLocation{container=containerReg, stackOffset=stackPtr+size})
                     in
                         doBindings(decs,
                             {loopArgs=loopArgs, stackPtr=stackPtr+size, currHandler=currHandler, overflowBlock=overflowBlock},
                             BlockSimple(ReserveContainer{size=size, container=containerReg}) :: tailCode)
                     end
 
                 val (codeBindings, resContext as {stackPtr=finalSp, ...}) = doBindings(bindings, context, tailCode)
                 (* If we have had a container we'll need to reset the stack *)
             in
                 if initialSp <> finalSp
                 then
                 let
                     val _ = finalSp >= initialSp orelse raise InternalError "codeToICode - stack ptr"
                     val bodyReg = newPReg() and resultReg = asTarget destination
                     val (codeExp, result, haveExited) =
                         codeToICodeRev(exp, resContext, isTail, SpecificPReg bodyReg, codeBindings)
                     val afterAdjustSp =
                         if haveExited
                         then codeExp
                         else
                             BlockSimple(LoadArgument{source=result, dest=resultReg, kind=movePolyWord}) ::
                             BlockSimple(ResetStackPtr{numWords=finalSp-initialSp, preserveCC=false}) :: codeExp
                 in
                     (afterAdjustSp, RegisterArgument resultReg, haveExited)
                 end
                 else codeToICodeRev(exp, resContext, isTail, destination, codeBindings)
             end
 
         |   codeToICodeRev(BICConstnt(value, _), _, _, destination, tailCode) =
                 moveIfNotAllowedRev(destination, tailCode, constantAsArgument value)
 
         |   codeToICodeRev(BICExtract(BICLoadLocal l), {stackPtr, ...}, _, destination, tailCode) =
             (
                 case Array.sub(locToPregArray, l) of
                     NoLocation => raise InternalError "codeToICodeRev - local unset"
                 |   PregLocation preg => moveIfNotAllowedRev(destination, tailCode, RegisterArgument preg)
                 |   ContainerLocation{container, stackOffset} =>
                         (* This always returns a ContainerAddr whatever the "allowed". *)
                         (tailCode, ContainerAddr{container=container, stackOffset=stackPtr-stackOffset}, false)
             )
 
         |   codeToICodeRev(BICExtract(BICLoadArgument a), {stackPtr, ...}, _, destination, tailCode) =
             (
                 case Vector.sub(argumentVector, a) of
                     ArgInReg{argReg, ...} => (* It was originally in a register.  It's now in a preg. *)
                         moveIfNotAllowedRev(destination, tailCode, RegisterArgument argReg)
                 |   ArgOnStack{stackOffset, stackReg} => (* Pushed before call. *)
                     let
                         val target = asTarget destination
                     in
                         (BlockSimple(LoadArgument{
                             source=StackLocation{wordOffset=stackOffset+stackPtr, container=stackReg, field=0, cache=NONE},
                             dest=target, kind=moveNativeWord}) :: tailCode,
                          RegisterArgument target, false)
                     end
             )
         
         |   codeToICodeRev(BICExtract(BICLoadClosure c), _, _, destination, tailCode) =
             let
                 (* Add the number of words for the code address.  This is 1 in native but 2 in 32-in-64. *)
                 val offset = case targetArch of ObjectId32Bit => c+2 | _ => c+1
             in
                 if c >= List.length closure then raise InternalError "BICExtract: closure" else ();
                 (* N.B.  We need to add one to the closure entry because zero is the code address. *)
                 moveIfNotAllowedRev(destination, tailCode, wordOffsetAddress(offset, closureRegAddr))
             end
 
         |   codeToICodeRev(BICExtract BICLoadRecursive, _, _, destination, tailCode) =
                 (* If the closure is empty we must use the constant.  We can't guarantee that
                    the caller will actually load the closure register if it knows the closure
                    is empty. *)
                 moveIfNotAllowedRev(destination, tailCode,
                     case closure of
                         [] => AddressConstant(closureAsAddress resultClosure)
                     |   _ => RegisterArgument closureRegAddr)
 
         |   codeToICodeRev(BICField{base, offset}, context, _, destination, tailCode) =
             let
                 val (codeBase, baseEntry, _) = codeToICodeRev(base, context, false, Allowed allowInPReg, tailCode)
             in
                 (* This should not be used with a container. *)
                 case baseEntry of
                     RegisterArgument baseR =>
                         moveIfNotAllowedRev(destination, codeBase, wordOffsetAddress(offset, baseR))
                 |   _ =>   raise InternalError "codeToICodeRev-BICField"                      
             end
 
         |   codeToICodeRev(BICLoadContainer{base, offset}, context, _, destination, tailCode) =
             let
                 val (codeBase, baseEntry, _) = codeToICodeRev(base, context, false, Allowed allowInPReg, tailCode)
                 val multiplier = Word.toInt(nativeWordSize div wordSize)
             in
                 (* If this is a local container we extract the field. *)
                 case baseEntry of
                     RegisterArgument baseR =>
                         moveIfNotAllowedRev(destination, codeBase, wordOffsetAddress(offset*multiplier, baseR))
                 |   ContainerAddr{container, stackOffset} =>
                     let
                         val target = asTarget destination
                         val finalOffset = stackOffset+offset
                         val _ = finalOffset >= 0 orelse raise InternalError "offset"
                     in
                         (BlockSimple(LoadArgument{
                             source=StackLocation{wordOffset=finalOffset, container=container, field=offset, cache=NONE},
                             dest=target, kind=moveNativeWord}) :: tailCode,
                         RegisterArgument target, false)
                     end
                 |   _ =>   raise InternalError "codeToICodeRev-BICField"                      
             end
 
         |   codeToICodeRev(BICEval{function, argList, resultType, ...}, context as { currHandler, ...}, isTail, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* Create pregs for the closure and each argument. *)
                 val clPReg = newPReg()
                 (* If we have a constant closure we can go directly to the entry point.
                    If the closure is a single word we don't need to load the closure register. *)
                 val (functionCode, closureEntry, callKind) =
                     case function of
                         BICConstnt(addr, _) =>
                         let
                             val addrAsAddr = toAddress addr
                             (* If this is a closure we're still compiling we can't get the code address.
                                However if this is directly recursive we can use the recursive
                                convention. *)
                         in
                             if wordEq(closureAsAddress resultClosure, addr)
                             then (tailCode, [], Recursive)
                             else if flags addrAsAddr <> Address.F_words andalso flags addrAsAddr <> Address.F_closure
                             then (BlockSimple(LoadArgument{source=AddressConstant addr, dest=clPReg, kind=movePolyWord}) :: tailCode,
                                       [(RegisterArgument clPReg, GenReg edx)], FullCall)
                             else if targetArch = ObjectId32Bit
                             then (* We can't actually load the code address here. *)
                             let
                                 val addrLength = length addrAsAddr
                                 val _ = addrLength >= 0w1 orelse raise InternalError "BICEval address"
                                 val _ = flags addrAsAddr = Address.F_closure orelse raise InternalError "BICEval address not a closure"
                             in
                                 if addrLength = 0w2
                                 then (tailCode, [], ConstantCode addr)
                                 else (BlockSimple(LoadArgument{source=AddressConstant addr, dest=clPReg, kind=movePolyWord}) :: tailCode,
                                       [(RegisterArgument clPReg, GenReg edx)], ConstantCode addr)
                             end
                             else (* Native 32 or 64-bits. *)
                             let
                                 val addrLength = length addrAsAddr
                                 val _ = addrLength >= 0w1 orelse raise InternalError "BICEval address"
                                 val codeAddr = loadWord(addrAsAddr, 0w0)
                                 val _ = isCode (toAddress codeAddr) orelse raise InternalError "BICEval address not code"
                             in
                                 if addrLength = 0w1
                                 then (tailCode, [], ConstantCode codeAddr)
                                 else (BlockSimple(LoadArgument{source=AddressConstant addr, dest=clPReg, kind=movePolyWord}) :: tailCode,
                                       [(RegisterArgument clPReg, GenReg edx)], ConstantCode codeAddr)
                             end
                         end
 
                     |   BICExtract BICLoadRecursive =>
                         (
                             (* If the closure is empty we don't need to load rdx *)
                             case closure of
                                 [] => (tailCode, [], Recursive)
                             |   _ =>
                                     (BlockSimple(LoadArgument {source=RegisterArgument closureRegAddr, dest=clPReg, kind=movePolyWord}) :: tailCode,
                                      [(RegisterArgument clPReg, GenReg edx)], Recursive)
                         )
 
                     |   function => (* General case. *)
                             (#1 (codeToICodeRev(function, context, false, SpecificPReg clPReg, tailCode)), [(RegisterArgument clPReg, GenReg edx)], FullCall)
                 (* Optimise arguments.  We have to be careful with tail-recursive functions because they
                    need to save any stack arguments that could be overwritten.  This is complicated
                    because we overwrite the stack before loading the register arguments.  In some
                    circumstances it could be safe but for the moment leave it.  This should be safe
                    in the new code-transform but not the old codeICode.
                    Currently we don't allow memory arguments at all.  There's the potential for
                    problems later.  Memory arguments could possibly lead to aliasing of the stack
                    if the memory actually refers to a container on the stack.  That would mess
                    up the code that ensures that stack arguments are stored in the right order. *)
                 (* We don't allow long constants in stack arguments to a tail-recursive call
                    because we may use a memory move to set them.  We also don't allow them in
                    32-in-64 because we can't push an address constant. *)
                 val allowInStackArg =
                     Allowed {anyConstant=not isTail andalso targetArch <> ObjectId32Bit,
                                const32s=true, memAddr=false, existingPreg=not isTail }
                 and allowInRegArg =
                     Allowed {anyConstant=true, const32s=true, memAddr=false, existingPreg=not isTail }
 
                 (* Load the first arguments into registers and the rest to the stack. *)
                 fun loadArgs ([], _, _, tailCode) = (tailCode, [], [])
 
                 |   loadArgs ((arg, DoubleFloatType) :: args, gRegs, fpReg :: fpRegs, tailCode) =
                     let (* Floating point register argument. *)
                         val (c, r) = codeToPRegRev(arg, context, tailCode)
                         val r1 = newUReg()
                         val c1 =
                             BlockSimple(LoadArgument{source=wordAt r, dest=r1, kind=MoveDouble}) :: c
                         val (code, regArgs, stackArgs) = loadArgs(args, gRegs, fpRegs, c1)
                     in
                         (code, (RegisterArgument r1, fpReg) :: regArgs, stackArgs)
                     end
 
                 |   loadArgs ((arg, SingleFloatType) :: args, gRegs, fpReg :: fpRegs, tailCode) =
                     let (* Floating point register argument. *)
                         val (c, r) = codeToPRegRev(arg, context, tailCode)
                         val r1 = newUReg()
                         val c1 =
                             if wordSize = 0w8
                             then BlockSimple(UntagFloat{source=RegisterArgument r, dest=r1, cache=NONE}) :: c
                             else BlockSimple(LoadArgument{source=wordAt r, dest=r1, kind=MoveFloat}) :: c
                         val (code, regArgs, stackArgs) = loadArgs(args, gRegs, fpRegs, c1)
                     in
                         (code, (RegisterArgument r1, fpReg) :: regArgs, stackArgs)
                     end
 
                 |   loadArgs ((arg, _) :: args, gReg::gRegs, fpRegs, tailCode) =
                     let (* General register argument. *)
                         val (c, r, _) = codeToICodeRev(arg, context, false, allowInRegArg, tailCode)
                         val (code, regArgs, stackArgs) = loadArgs(args, gRegs, fpRegs, c)
                     in
                         (code, (r, gReg) :: regArgs, stackArgs)
                     end
 
                 |   loadArgs ((arg, _) :: args, [], fpRegs, tailCode) =
                     let (* Stack argument. *)
                         val (c, r, _) = codeToICodeRev(arg, context, false, allowInStackArg, tailCode)
                         val (code, regArgs, stackArgs) = loadArgs(args, [], fpRegs, c)
                     in
                         (code, regArgs, r :: stackArgs)
                     end
 
                 val (codeArgs, regArgs, stackArgs) = loadArgs(argList, generalArgRegs, floatingPtArgRegs, functionCode)
                 
                 (* If this is at the end of the function and the result types are the
                    same we can use a tail-recursive call. *)
                 val tailCall = isTail andalso resultType = fnResultType
                 
                 val callCode =
                     if tailCall
                     then
                     let
                         val {stackPtr, ...} = context
                         (* The number of arguments currently on the stack. *)
                         val currentStackArgCount = currentStackArgs
                         val newStackArgCount = List.length stackArgs
                         (* The offset of the first argument or the return address if there are
                            no stack arguments.  N.B. We actually have currentStackArgCount+1
                            items on the stack including the return address.  Offsets can be
                            negative. *)
                         val stackOffset = stackPtr
                         val firstArgumentAddr = currentStackArgCount
                         fun makeStackArgs([], _) = []
                         |   makeStackArgs(arg::args, offset) = {src=arg, stack=offset} :: makeStackArgs(args, offset-1)
                         val stackArgs = makeStackArgs(stackArgs, firstArgumentAddr)
                         (* The stack adjustment needed to compensate for any items that have been pushed
                            and the differences in the number of arguments.  May be positive or negative.
                            This is also the destination address of the return address so when we enter
                            the new function the return address will be the first item on the stack. *)
                         val stackAdjust = firstArgumentAddr - newStackArgCount
                         (* Add an entry for the return address to the stack arguments. *)
                         val returnEntry =
                             {src=StackLocation{wordOffset=stackPtr, container=returnAddressEntry, field=0, cache=NONE}, stack=stackAdjust}
                         (* Because we're storing into the stack we may be overwriting values we want.  If the source of
                            any value is a stack location below the current stack pointer we load it except in the special
                            case where the destination is the same as the source (which is often the case with the return
                            address). *)
                         local
                             fun loadArgs [] = ([], [])
                             |   loadArgs (arg :: rest) =
                                 let
                                     val (loadCode, loadedArgs) = loadArgs rest
                                 in
                                     case arg of
                                         {src as StackLocation{wordOffset, ...}, stack} =>
                                             if wordOffset = stack+stackOffset (* Same location *)
                                                 orelse stack+stackOffset < 0 (* Storing above current top of stack *)
                                                 orelse stackOffset+wordOffset > ~ stackAdjust (* Above the last argument *)
                                             then (loadCode, arg :: loadedArgs)
                                             else
                                             let
                                                 val preg = newPReg()
                                             in
                                                 (BlockSimple(LoadArgument{source=src, dest=preg, kind=moveNativeWord}) :: loadCode,
                                                     {src=RegisterArgument preg, stack=stack} :: loadedArgs)
                                             end
                                     |   _ => (loadCode, arg :: loadedArgs)
                                 end
                         in
                             val (loadStackArgs, loadedStackArgs) = loadArgs(returnEntry :: stackArgs)
                         end 
                     in
                         BlockExit(TailRecursiveCall{regArgs=closureEntry @ regArgs, stackArgs=loadedStackArgs,
                                   stackAdjust = stackAdjust, currStackSize=stackOffset, callKind=callKind, workReg=newPReg()}) ::
                                     loadStackArgs @ codeArgs
                     end
                     else
                     let
                         val (moveResult, resReg) =
                             case resultType of
                                 GeneralType => ([], target)
                             |   ContainerType _ =>  ([], target)
                             |   DoubleFloatType =>
                                 let
                                     val fpRegDest = newUReg()
                                 in
                                     (boxOrTagReal(fpRegDest, target, BuiltIns.PrecDouble), fpRegDest)
                                 end
                             |   SingleFloatType =>
                                 let
                                     val fpRegDest = newUReg()
                                 in
                                     (boxOrTagReal(fpRegDest, target, BuiltIns.PrecSingle), fpRegDest)
                                 end
                         val call =
                             FunctionCall{regArgs=closureEntry @ regArgs, stackArgs=stackArgs, dest=resReg,
                                          realDest=resultReg resultType, callKind=callKind, saveRegs=[]}
                         val callBlock =
                             case currHandler of
                                 NONE => BlockSimple call :: codeArgs
                             |   SOME h => BlockOptionalHandle{call=call, handler=h, label=newLabel()}  :: codeArgs
                     in
                         moveResult @ callBlock
                     end
             in
                 (callCode, RegisterArgument target, tailCall (* We've exited if this was a tail jump *))
             end
 
         |   codeToICodeRev(BICNullary{oper=BuiltIns.GetCurrentThreadId}, _, _, destination, tailCode) =
             (* Get the ID of the current thread. *)
             let
                 val target = asTarget destination
             in
                 (BlockSimple(LoadMemReg{offset=memRegThreadSelf, dest=target, kind=movePolyWord}) :: tailCode, RegisterArgument target, false)
             end
 
-        |   codeToICodeRev(BICNullary{oper=BuiltIns.CheckRTSException}, _, _, destination, tailCode) =
-                moveIfNotAllowedRev(destination, tailCode, (* Unit result *) IntegerConstant(tag 0))
+        |   codeToICodeRev(BICNullary{oper=BuiltIns.CPUPause}, _, _, destination, tailCode) =
+            (* Pause during spinlock phase of mutex locking. *)
+                moveIfNotAllowedRev(destination, BlockSimple PauseCPU :: tailCode, (* Unit result *) IntegerConstant(tag 0))
 
         |   codeToICodeRev(BICNullary{oper=BuiltIns.CreateMutex}, _, _, destination, tailCode) =
             let
                 (* Allocate memory for a mutex.  Use a native word as a mutable, weak, no-overwrite, byte cell
                    which is the same as a volatileRef. This ensures that it will always be cleared when it is
                    loaded even if it was locked when it was saved. *)
                 val target = asTarget destination
                 val flags = Word8.orb(F_mutable, Word8.orb(F_weak, Word8.orb(F_noOverwrite, F_bytes))) (* 0wx69 *)
             in
                 (BlockSimple InitialisationComplete ::
                  BlockSimple(StoreArgument{source=IntegerConstant 0, base=target, offset=0,
                     index=memIndexOrObject, kind=moveNativeWord, isMutable=false }) ::
                  BlockSimple(AllocateMemoryOperation{size=Word.toInt(nativeWordSize div wordSize), flags=flags, dest=target, saveRegs=[]}) ::
                     tailCode, RegisterArgument target, false)
             end
 
         |   codeToICodeRev(BICUnary instr, context, isTail, destination, tailCode) =
                 codeToICodeUnaryRev(instr, context, isTail, destination, tailCode)
 
         |   codeToICodeRev(BICBinary instr, context, isTail, destination, tailCode) =
                 codeToICodeBinaryRev(instr, context, isTail, destination, tailCode)
 
         |   codeToICodeRev(BICArbitrary{oper, shortCond, arg1, arg2, longCall}, context, _, destination, tailCode) =
             let
                 val startLong = newLabel() and resultLabel = newLabel()
                 val target = asTarget destination
                 val condResult = newMergeReg()
                 (* Overflow check - if there's an overflow jump to the long precision case. *)
                 fun jumpOnOverflow ccRef =
                 let
                     val noOverFlow = newLabel()
                 in
                     [BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=startLong, falseJump=noOverFlow }),
                      BlockLabel noOverFlow]
                 end
                 val (longCode, _, _) = codeToICode(longCall, context, false, SpecificPReg condResult)
                 
                      (* We could use a tail jump here if this is a tail. *)
                 val (code, dest, haveExited) =
                 (
                     (* Test the tag bits and skip to the long case if either is clear. *)
                     List.rev(codeConditionRev(shortCond, context, false, startLong, [])) @
                     (* Try evaluating as fixed precision and jump if we get an overflow. *)
                     codeFixedPrecisionArith(oper, arg1, arg2, context, condResult, jumpOnOverflow) @
                     (* If we haven't had an overflow jump to the result. *)
                     [BlockFlow(Unconditional resultLabel),
                      (* If we need to use the full long-precision call we come here. *)
                      BlockLabel startLong] @ longCode @
                     [BlockLabel resultLabel,
                      BlockSimple(LoadArgument{source=RegisterArgument condResult, dest=target, kind=movePolyWord})],
                     RegisterArgument target, false)
             in
                 (revApp(code, tailCode), dest, haveExited)
             end
 
         |   codeToICodeRev(BICAllocateWordMemory instr, context, isTail, destination, tailCode) =
             let
                 val (code, dest, haveExited) = codeToICodeAllocate(instr, context, isTail, destination)
             in
                 (revApp(code, tailCode), dest, haveExited)
             end
 
         |   codeToICodeRev(BICLambda(lambda as { closure = [], ...}), _, _, destination, tailCode) =
             (* Empty closure - create a constant closure for any recursive calls. *)
             let
                 val closure = makeConstantClosure()
                 val () = codeFunctionToX86(lambda, debugSwitches, closure)
                 (* Return the closure itself as the value. *)
             in
                 moveIfNotAllowedRev(destination, tailCode, AddressConstant(closureAsAddress closure))
             end
 
         |   codeToICodeRev(BICLambda(lambda as { closure, ...}), context, isTail, destination, tailCode) =
             (* Non-empty closure.  Ignore stack closure option at the moment. *)
             let
                 val closureRef = makeConstantClosure()
                 val () = codeFunctionToX86(lambda, debugSwitches, closureRef)
             in
                 if targetArch = ObjectId32Bit
                 then
                 let
                     val target = asTarget destination
                     val memAddr = newPReg()
                     fun loadFields([], n, tlCode) =
                         let
                             val codeReg = newUReg()
                             val closureReg = newPReg()
                         in
                             (* The code address occupies the first native word but we need to extract it at
                                run-time.  We don't currently have a way to have 64-bit constants. *)
                             BlockSimple(
                                 StoreArgument{ source=RegisterArgument codeReg, offset=0, base=memAddr, index=ObjectIndex, kind=moveNativeWord, isMutable=false}) ::
                             BlockSimple(LoadArgument{ source=MemoryLocation{offset=0, base=closureReg, index=ObjectIndex, cache=NONE}, dest=codeReg, kind=Move64Bit}) ::
                             BlockSimple(LoadArgument{ source=AddressConstant(toMachineWord closureRef), dest=closureReg, kind=movePolyWord}) ::
                             BlockSimple(AllocateMemoryOperation{size=n, flags=F_closure, dest=memAddr, saveRegs=[]}) :: tlCode
                         end
                     |   loadFields(f :: rest, n, tlCode) =
                         let
                             (* Defer the evaluation if possible.  We may have a constant that we can't move
                                directly but it's better to load it after the allocation otherwise we will
                                have to push the register if we need to GC. *)
                             val (code1, source1, _) = codeToICodeRev(BICExtract f, context, false, Allowed allowDefer, tlCode)
                             val restAndAlloc = loadFields(rest, n+1, code1)
                             val (code2, source, _)  = moveIfNotAllowedRev(Allowed allowInMemMove, restAndAlloc, source1)
                             val storeValue =
                                 BlockSimple(StoreArgument{ source=source, offset=n*Word.toInt wordSize, base=memAddr,
                                     index=ObjectIndex, kind=movePolyWord, isMutable=false})
                         in
                             storeValue :: code2
                         end
                     val code =
                         BlockSimple InitialisationComplete ::
                             BlockSimple(LoadArgument{source=RegisterArgument memAddr, dest=target, kind=movePolyWord}) ::
                             loadFields(closure, 2, tailCode)
                 in
                     (code, RegisterArgument target, false)
                 end
                 (* Treat it as a tuple with the code as the first field. *)
                 else codeToICodeRev(BICTuple(BICConstnt(codeAddressFromClosure closureRef, []) :: map BICExtract closure), context, isTail, destination, tailCode)
             end
 
         |   codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, NoResult, tailCode) =
             let
                 (* If we don't want the result but are only evaluating for side-effects we
                    may be able to optimise special cases.  This was easier in the forward
                    case but for now we don't bother and leave it to the lower levels. *)
                 val startElse = newLabel() and skipElse = newLabel()
                 val codeTest = codeConditionRev(test, context, false, startElse, tailCode)
                 val (codeThen, _, _) =
                     codeToICodeRev(thenPt, context, isTail, NoResult, codeTest)
                 val (codeElse, _, _) =
                      codeToICodeRev(elsePt, context, isTail, NoResult,
                         BlockLabel startElse ::
                         BlockFlow(Unconditional skipElse) :: codeThen)
             in
                 (BlockLabel skipElse :: codeElse, (* Unit result *) IntegerConstant(tag 0), false)
             end
 
         |   codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, destination, tailCode) =
             let
                 (* Because we may push the result onto the stack we have to create a new preg to
                    hold the result and then copy that to the final result. *)
                 (* If this is a tail each arm will exit separately and neither will return a result. *)
                 val target = asTarget destination
                 val condResult = newMergeReg()
                 val thenTarget = if isTail then newPReg() else condResult
                 val startElse = newLabel()
                 val testCode = codeConditionRev(test, context, false, startElse, tailCode)
                 
                 (* Put the result in the target register. *)
                 val (thenCode, _, thenExited) = codeToICodeRev(thenPt, context, isTail, SpecificPReg thenTarget, testCode)
                 (* Add a jump round the else-part except that if this is a tail we
                    return.  The then-part could have exited e.g. with a raise or a loop. *)
                 val (exitThen, thenLabel, elseTarget) =
                     if thenExited then (thenCode, [], target (* Can use original target. *))
                     else if isTail then (returnInstruction(context, thenTarget, thenCode), [], newPReg())
                     else
                     let
                         val skipElse = newLabel()
                     in
                         (BlockFlow(Unconditional skipElse) :: thenCode,
                          [BlockSimple(LoadArgument{source=RegisterArgument condResult, dest=target, kind=movePolyWord}),
                           BlockLabel skipElse],
                          condResult)
                     end
                 val (elseCode, _, elseExited) =
                     codeToICodeRev(elsePt, context, isTail, SpecificPReg elseTarget,
                         BlockLabel startElse :: exitThen)
                 (* Add a return to the else-part if necessary so we will always exit on a tail. *)
                 val exitElse =
                     if isTail andalso not elseExited
                     then returnInstruction(context, elseTarget, elseCode) else elseCode
             in
                 (thenLabel @ exitElse, RegisterArgument target, isTail orelse thenExited andalso elseExited)
             end
 
         |   codeToICodeRev(BICCase { cases, test, default, isExhaustive, firstIndex}, context, isTail, destination, tailCode) =
             let
                 (* We have to create a new preg for the result in case we need to push
                    it to the stack. *)
                 val targetReg = newMergeReg()
                 
                 local
                     val initialTestReg = newPReg()
                     val (testCode, _, _) = codeToICodeRev(test, context, false, SpecificPReg initialTestReg, tailCode)
                     (* Subtract the minimum value so the value we're testing is always in the range of
                        (tagged) 0 to the maximum.  It is possible to adjust the value when computing the index
                        but that can lead to overflows during compilation if the minimum is very large or small.
                        We can ignore overflow and allow values to wrap round. *)
                 in
                     val (testCode, testReg) =
                         if firstIndex = 0w0
                         then (testCode, initialTestReg)
                         else
                         let
                             val newTestReg = newPReg()
                             val subtract =
                                 BlockSimple(ArithmeticFunction{oper=SUB, resultReg=newTestReg, operand1=initialTestReg,
                                                    operand2=IntegerConstant(semitag(Word.toLargeInt firstIndex)), ccRef=newCCRef(),
                                                    opSize=polyWordOpSize})
                         in
                             (subtract :: testCode, newTestReg)
                         end
                 end
 
                 val workReg = newPReg()
                
                 (* Unless this is exhaustive we need to add a range check. *)
                 val (rangeCheck, extraDefaults) =
                     if isExhaustive
                     then (testCode, [])
                     else
                     let
                         val defLab1 = newLabel() 
                         val tReg1 = newPReg()
                         val ccRef1 = newCCRef()
                         (* Since we've subtracted any minimum we only have to check whether the value is greater (unsigned)
                            than the maximum. *)
                         val numberOfCases = LargeInt.fromInt(List.length cases)
                         val continueLab = newLabel()
                         val testCode2 =
                                 BlockLabel continueLab ::
                                 BlockFlow(Conditional{ccRef=ccRef1, condition=JNB, trueJump=defLab1, falseJump=continueLab}) ::
                                 BlockSimple(WordComparison{arg1=tReg1, arg2=IntegerConstant(tag numberOfCases), ccRef=ccRef1, opSize=polyWordOpSize}) ::
                                 BlockSimple(LoadArgument {source=RegisterArgument testReg, dest=tReg1, kind=movePolyWord}) :: testCode
                     in
                         (testCode2, [defLab1])
                     end
                 
                 (* Make a label for each item in the list. *)
                 val codeLabels = map (fn _ => newLabel()) cases
                 
                 (* Create an exit label in case it's needed. *)
                 val labelForExit = if isTail then ~1 (* Illegal label. *) else newLabel()
 
                 (* Generate the code for each of the cases and the default.  We need to put an
                    unconditional branch after each to skip the other cases. *)
                 fun codeCases (SOME c :: otherCases, startLabel :: otherLabels, tailCode) =
                     let
                         val caseTarget = if isTail then newPReg() else targetReg
                         (* Put in the case with a jump to the end of the sequence. *)
                         val (codeThisCase, _, caseExited) =
                             codeToICodeRev(c, context, isTail, SpecificPReg caseTarget,
                                 BlockLabel startLabel :: tailCode) 
                         val exitThisCase =
                             if caseExited then codeThisCase
                             else if isTail then returnInstruction(context, caseTarget, codeThisCase)
                             else BlockFlow(Unconditional labelForExit) :: codeThisCase
                     in
                         codeCases(otherCases, otherLabels, exitThisCase)
                     end
 
                 |   codeCases(NONE :: otherCases, _ :: otherLabels, tailCode) = codeCases(otherCases, otherLabels, tailCode)
                         
                 |   codeCases ([], [], tailCode) =
                     let
                         (* We need to add labels for all the gaps we filled and also for a "default" label for
                            the indexed-case instruction itself as well as any range checks. *)
                         fun addDefault (startLabel, NONE, l) = BlockLabel startLabel :: l
                         |   addDefault (_, SOME _, l) = l
                         fun asForward l = BlockLabel l
                         val dLabs = map asForward extraDefaults @ tailCode
                         val defLabels = ListPair.foldlEq addDefault dLabs (codeLabels, cases)
                         val defaultTarget = if isTail then newPReg() else targetReg
                         val (defaultCode, _, defaultExited) =
                             codeToICodeRev(default, context, isTail, SpecificPReg defaultTarget, defLabels)
                     in
                         (* Put in the default.  Because this is the last we don't need to
                            jump round it.  However if this is a tail and we haven't exited
                            we put in a return.  That way the case will always have
                            exited if this is a tail. *)
                          if isTail andalso not defaultExited
                          then returnInstruction(context, defaultTarget, defaultCode)
                          else defaultCode
                     end
 
                 |   codeCases _ = raise InternalError "codeCases: mismatch"
                     
                 val codedCases =
                     codeCases(cases, codeLabels,
                         BlockFlow(IndexedBr codeLabels) ::
                         BlockSimple(IndexedCaseOperation{testReg=testReg, workReg=workReg}) ::
                         rangeCheck)
                 (* We can now copy to the target.  If we need to push the result this load
                    will be converted into a push. *)
                 val target = asTarget destination
                 val copyToTarget =
                     if isTail then codedCases
                     else BlockSimple(LoadArgument{source=RegisterArgument targetReg, dest=target, kind=movePolyWord}) ::
                             BlockLabel labelForExit :: codedCases
             in
                 (copyToTarget, RegisterArgument target, isTail (* We have always exited on a tail. *))
             end
 
         |   codeToICodeRev(BICBeginLoop {loop, arguments}, context as { stackPtr, currHandler, overflowBlock, ...},
                            isTail, destination, tailCode) =
             let
                 val target = asTarget destination
                 
                 fun codeArgs ([], tailCode) = ([], tailCode)
                 |   codeArgs (({value, addr}, _) :: rest, tailCode) =
                     let
                         val pr = newPReg()
                         val () = Array.update(locToPregArray, addr, PregLocation pr)
                         val (code, _, _) = codeToICodeRev(value, context, false, SpecificPReg pr, tailCode)
                         val (pregs, othercode) = codeArgs(rest, code)
                     in
                         (pr::pregs, othercode)
                     end
                 val (loopRegs, argCode) = codeArgs(arguments, tailCode)
 
                 val loopLabel = newLabel()
                 val (loopBody, _, loopExited) =
                     codeToICodeRev(loop,
                         {loopArgs=SOME (loopRegs, loopLabel, stackPtr), stackPtr=stackPtr,
                          currHandler=currHandler, overflowBlock=overflowBlock },
                             isTail, SpecificPReg target, BlockLabel loopLabel :: BlockSimple BeginLoop :: argCode)
             in
                 (loopBody, RegisterArgument target, loopExited)
             end
 
         |   codeToICodeRev(BICLoop args, context as {loopArgs=SOME (loopRegs, loopLabel, loopSp), stackPtr, currHandler, ...}, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* Registers to receive the evaluated arguments.  We can't put the
                    values into the loop variables yet because the values could depend
                    on the current values of the loop variables. *)
                 val argPRegs = map(fn _ => newPReg()) args
                 val codeArgs =
                     ListPair.foldlEq(fn ((arg, _), pr, l) =>
                         #1 (codeToICodeRev(arg, context, false, SpecificPReg pr, l))) tailCode
                         (args, argPRegs)
                 val jumpArgs = ListPair.mapEq(fn (s, l) => (RegisterArgument s, l)) (argPRegs, loopRegs)
                 (* If we've allocated a container in the loop we have to remove it before jumping back. *)
                 val stackReset =
                     if loopSp = stackPtr then codeArgs
                     else BlockSimple(ResetStackPtr{numWords=stackPtr-loopSp, preserveCC=false}) :: codeArgs
                 val jumpLoop = JumpLoop{regArgs=jumpArgs, stackArgs=[], checkInterrupt=SOME[], workReg=NONE}
                 (* "checkInterrupt" could result in a Interrupt exception so we treat this like
                    a function call. *)
                 val code =
                     case currHandler of
                         NONE => BlockFlow(Unconditional loopLabel) :: BlockSimple jumpLoop :: stackReset
                     |   SOME h => BlockOptionalHandle{call=jumpLoop, handler=h, label=loopLabel} :: stackReset
             in
                 (code, RegisterArgument target, true)
             end
 
         |   codeToICodeRev(BICLoop _, {loopArgs=NONE, ...}, _, _, _) = raise InternalError "BICLoop without BICBeginLoop"
 
 
         |   codeToICodeRev(BICRaise exc, context as { currHandler, ...}, _, destination, tailCode) =
             let
                 val packetReg = newPReg()
                 val (code, _, _) =
                     codeToICodeRev(exc, context, false, SpecificPReg packetReg, tailCode)
                 val raiseCode = RaiseExceptionPacket{packetReg=packetReg}
                 val block =
                     case currHandler of
                         NONE => BlockExit raiseCode | SOME h => BlockRaiseAndHandle(raiseCode, h)
             in
                 (block :: code, RegisterArgument(asTarget destination), true (* Always exits *))
             end
 
         |   codeToICodeRev(BICHandle{exp, handler, exPacketAddr}, context as { stackPtr, loopArgs, overflowBlock, ... }, isTail, destination, tailCode) =
             let
                 (* As with BICCond and BICCase we need to create a new register for the
                    result in case we need to push it to the stack. *)
                 val handleResult = newMergeReg()
                 val handlerLab = newLabel() and startHandling = newLabel()
                 val (bodyTarget, handlerTarget) =
                     if isTail then (newPReg(), newPReg()) else (handleResult, handleResult)
                 (* TODO: Even if we don't actually want a result we force one in here by
                    using "asTarget".  *)
                 (* The expression cannot be treated as a tail because the handler has
                    to be removed after.  It may "exit" if it has raised an unconditional
                    exception.  If it has we mustn't generate a PopExceptionHandler because
                    there won't be any result for resultReg.
                    We need to add two words to the stack to account for the items pushed by
                    PushExceptionHandler.
                    We create an instruction to push the handler followed by a block fork to
                    the start of the code and, potentially the handler, then a label to start
                    the code that the handler is in effect for. *)
                 val initialCode =
                     BlockLabel startHandling ::
                     BlockFlow(SetHandler{handler=handlerLab, continue=startHandling}) ::
                     BlockSimple(PushExceptionHandler{workReg=newPReg()}) :: tailCode
                 val (expCode, _, expExit) =
                     codeToICodeRev(exp, {stackPtr=stackPtr+2, loopArgs=loopArgs, currHandler=SOME handlerLab, overflowBlock=overflowBlock},
                         false (* Not tail *), SpecificPReg bodyTarget, initialCode)
                 (* If this is the tail we can replace the jump at the end of the
                    handled code with returns.  If the handler has exited we don't need
                    a return there.  Otherwise we need to add an unconditional jump to
                    skip the handler. *)
                 val (atExpEnd, skipExpLabel) =
                     case (isTail, expExit) of
                         (true, true) => (* Tail and exited. *) (expCode, NONE)
                     |   (true, false) => (* Tail and not exited. *)
                             (returnInstruction(context, bodyTarget,
                                 BlockSimple(PopExceptionHandler{workReg=newPReg()}) :: expCode), NONE)
                     |   (false, true) => (* Not tail but exited. *) (expCode, NONE)
                     |   (false, false) =>
                         let
                             val skipHandler = newLabel()
                         in
                             (BlockFlow(Unconditional skipHandler) ::
                              BlockSimple(PopExceptionHandler{workReg=newPReg()}) :: expCode, SOME skipHandler)
                         end
                 (* Make a register to hold the exception packet and put eax into it. *)
                 val packetAddr = newPReg()
                 val () = Array.update(locToPregArray, exPacketAddr, PregLocation packetAddr)
                 val (handleCode, _, handleExit) =
                     codeToICodeRev(handler, context, isTail, SpecificPReg handlerTarget,
                         BlockSimple(BeginHandler{workReg=newPReg(), packetReg=packetAddr}) :: BlockLabel handlerLab :: atExpEnd)
                 val target = asTarget destination
                 val afterHandler =
                     case (isTail, handleExit) of
                         (true, true) => (* Tail and exited. *) handleCode
                     |   (true, false) => (* Tail and not exited. *)
                             returnInstruction(context, handlerTarget, handleCode)
                     |   (false, _) => (* Not tail. *) handleCode
                 
                 val addLabel =
                     case skipExpLabel of
                         SOME lab => BlockLabel lab:: afterHandler
                     |   NONE => afterHandler
             in
                 (BlockSimple(LoadArgument{source=RegisterArgument handleResult, dest=target, kind=movePolyWord}) :: addLabel,
                     RegisterArgument target, isTail)
             end
 
         |   codeToICodeRev(BICTuple fields, context, _, destination, tailCode) =
             let
                 (* TODO: This is a relic of the old fall-back code-generator.  It required
                    the result of a tuple to be at the top of the stack.  It should be changed. *)
                 val target = asTarget destination (* Actually we want this. *)
                 val memAddr = newPReg()
 
                 fun loadFields([], n, tlCode) = allocateWithProfileRev(n, 0w0, memAddr, tlCode)
                     
                 |   loadFields(f :: rest, n, tlCode) =
                     let
                         (* Defer the evaluation if possible.  We may have a constant that we can't move
                            directly but it's better to load it after the allocation otherwise we will
                            have to push the register if we need to GC. *)
                         val (code1, source1, _) = codeToICodeRev(f, context, false, Allowed allowDefer, tlCode)
                         val restAndAlloc = loadFields(rest, n+1, code1)
                         val (code2, source, _)  = moveIfNotAllowedRev(Allowed allowInMemMove, restAndAlloc, source1)
                         val storeValue =
                             BlockSimple(StoreArgument{ source=source, offset=n*Word.toInt wordSize, base=memAddr,
                                 index=memIndexOrObject, kind=movePolyWord, isMutable=false})
                     in
                         storeValue :: code2
                     end
                 val code =
                     BlockSimple InitialisationComplete ::
                         BlockSimple(LoadArgument{source=RegisterArgument memAddr, dest=target, kind=movePolyWord}) ::
                         loadFields(fields, 0, tailCode)
             in
                 (code, RegisterArgument target, false)
             end
 
             (* Copy the source tuple into the container.  There are important special cases for
                both the source tuple and the container.  If the source tuple is a BICTuple we have
                the fields and can store them without creating a tuple on the heap.  If the
                destination is a local container we can store directly into the stack. *)
         |   codeToICodeRev(BICSetContainer{container, tuple, filter}, context as {stackPtr, ...}, _, destination, tailCode) =
             let
                 local
                     fun createStore containerReg (source, destWord) =
                         StoreArgument{source=source, offset=destWord*Word.toInt nativeWordSize, base=containerReg, index=NoMemIndex, kind=moveNativeWord, isMutable=false}
                 in
                     val findContainer =
                         case container of
                             BICExtract(BICLoadLocal l) =>
                             (
                                 case Array.sub(locToPregArray, l) of
                                     ContainerLocation{container, stackOffset} =>
                                     let
                                         fun storeToStack(source, destWord) =
                                             StoreToStack{source=source, container=container, field=destWord,
                                                 stackOffset=stackPtr-stackOffset+destWord}
                                     in
                                         SOME storeToStack
                                     end
                                |    _ => NONE
                            )
                        |    _ => NONE
 
                     val (codeContainer, storeInstr) =
                         case findContainer of
                             SOME storeToStack => (tailCode, storeToStack)
                         |   NONE => 
                             let
                                 val containerTarget = newPReg()
                                 val (codeContainer, _, _) =
                                     codeToICodeRev(container, context, false, SpecificPReg containerTarget, tailCode)
                             in
                                 (codeContainer, createStore containerTarget)
                             end
                 end
                 
                 val filterLength = BoolVector.length filter
 
                 val code =
                     case tuple of
                         BICTuple cl =>
                         let
                             (* In theory it's possible that the tuple could contain fields that are not
                                used but nevertheless need to be evaluated for their side-effects.
                                Create all the fields and push to the stack. *)
                             fun codeField(arg, (regs, tailCode)) =
                             let
                                 val (c, r, _) =
                                     codeToICodeRev(arg, context, false, Allowed allowInMemMove, tailCode)
                             in
                                 (r :: regs, c)
                             end
 
                             val (pregsRev, codeFields) = List.foldl codeField ([], codeContainer) cl
                             val pregs = List.rev pregsRev
 
                             fun copyField(srcReg, (sourceWord, destWord, tailCode)) =
                                 if sourceWord < filterLength andalso BoolVector.sub(filter, sourceWord)
                                 then (sourceWord+1, destWord+1, BlockSimple(storeInstr(srcReg, destWord)) :: tailCode)
                                 else (sourceWord+1, destWord, tailCode)
                             
                             val (_, _, resultCode) = List.foldl copyField (0, 0, codeFields) pregs
                         in
                             resultCode
                         end
 
                     |   tuple =>
                         let (* Copy a heap tuple.  It is possible that this is another container in which case
                                we must load the fields directly.  We mustn't load its address and then copy
                                because loading the address would be the last reference and might cause
                                the container to be reused prematurely. *)
                             val findContainer =
                                 case tuple of
                                     BICExtract(BICLoadLocal l) =>
                                     (
                                         case Array.sub(locToPregArray, l) of
                                             ContainerLocation{container, stackOffset} =>
                                             let
                                                 fun getAddr sourceWord =
                                                     StackLocation{wordOffset=stackPtr-stackOffset+sourceWord, container=container,
                                                                   field=sourceWord, cache=NONE}
                                             in
                                                 SOME getAddr
                                             end
                                         |   _ => NONE
                                    )
                                 |   _ => NONE
 
                             val (codeTuple, loadField) =
                                 case findContainer of
                                     SOME getAddr => (codeContainer, getAddr)
                                 |   NONE =>
                                     let
                                         val tupleTarget = newPReg()
                                         val (codeTuple, _, _) = codeToICodeRev(tuple, context, false, SpecificPReg tupleTarget, codeContainer)
                                         fun loadField sourceWord = wordOffsetAddress(sourceWord, tupleTarget)
                                     in
                                         (codeTuple, loadField)
                                     end
 
                             fun copyContainer(sourceWord, destWord, tailCode) =
                             if sourceWord = filterLength
                             then tailCode
                             else if BoolVector.sub(filter, sourceWord)
                             then
                             let
                                 val loadReg = newPReg()
                                 val code =
                                     BlockSimple(storeInstr(RegisterArgument loadReg, destWord)) ::
                                     BlockSimple(LoadArgument{source=loadField sourceWord, dest=loadReg, kind=movePolyWord}) ::
                                     tailCode
                             in
                                 copyContainer(sourceWord+1, destWord+1, code)
                             end
                             else copyContainer(sourceWord+1, destWord, tailCode)
                         in
                             copyContainer(0, 0, codeTuple)
                         end
             in
                 moveIfNotAllowedRev(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeRev(BICTagTest{test, tag=tagValue, ...}, context, _, destination, tailCode) =
             (* Check the "tag" word of a union (datatype).  N.B.  Not the same as testing the
                tag bit of a word. *)
             let
                 val ccRef = newCCRef()
                 val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true }
                 val (testCode, tagArg, _) = codeToICodeRev(test, context, false, Allowed memOrReg, tailCode)
                 val target = asTarget destination
             in
                 (makeBoolResultRev(JE, ccRef, target,
                     (* Use CompareLiteral because the tag must fit in 32-bits. *)
                     BlockSimple(CompareLiteral{arg1=tagArg,
                             arg2=tag(Word.toLargeInt tagValue), opSize=polyWordOpSize, ccRef=ccRef}) :: testCode),
                     RegisterArgument target, false)
             end
 
         |   codeToICodeRev(BICLoadOperation instr, context, isTail, destination, tailCode) =
             let
                 val (code, dest, haveExited) = codeToICodeLoad(instr, context, isTail, destination)
             in
                 (revApp(code, tailCode), dest, haveExited)
             end
 
         |   codeToICodeRev(BICStoreOperation instr, context, isTail, destination, tailCode) =
             let
                 val (code, dest, haveExited) = codeToICodeStore(instr, context, isTail, destination)
             in
                 (revApp(code, tailCode), dest, haveExited)
             end
 
         |   codeToICodeRev(BICBlockOperation ({kind=BlockOpEqualByte, sourceLeft, destRight, length}), context, _, destination, tailCode) =
             let
                 val vec1Reg = newUReg() and vec2Reg = newUReg()
                 val ccRef = newCCRef()
                 val (leftCode, leftUntag, {base=leftBase, offset=leftOffset, index=leftIndex, ...}) =
                     codeAddressRev(sourceLeft, true, context, tailCode)
                 val (rightCode, rightUntag, {base=rightBase, offset=rightOffset, index=rightIndex, ...}) =
                     codeAddressRev(destRight, true, context, leftCode)
                 val (lengthCode, lengthUntag, lengthArg) = codeAsUntaggedToRegRev(length, false (* unsigned *), context, rightCode)
                 val target = asTarget destination
                 val code =
                     makeBoolResultRev(JE, ccRef, target,
                         BlockSimple(CompareByteVectors{ vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lengthArg, ccRef=ccRef }) ::
                         lengthUntag @ BlockSimple(loadAddress{base=rightBase, offset=rightOffset, index=rightIndex, dest=vec2Reg}) ::
                         rightUntag @ BlockSimple(loadAddress{base=leftBase, offset=leftOffset, index=leftIndex, dest=vec1Reg}) ::
                         leftUntag @ lengthCode)
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeRev(BICBlockOperation instr, context, isTail, destination, tailCode) =
             let
                 val (code, dest, haveExited) = codeToICodeBlock(instr, context, isTail, destination)
             in
                 (revApp(code, tailCode), dest, haveExited)
             end
 
         and codeToICodeUnaryRev({oper=BuiltIns.NotBoolean, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val ccRef = newCCRef()
                 val allow = Allowed {anyConstant=false, const32s=false, memAddr=true, existingPreg=true}
                 val (argCode, testDest, _) = codeToICodeRev(arg1, context, false, allow, tailCode)
             in
                 (* Test the argument and return a boolean result.  If either the argument is a condition
                    or the result is used in a test this will be better than using XOR. *)
                 (makeBoolResultRev(JNE, ccRef, target,
                         BlockSimple(CompareLiteral{arg1=testDest, arg2=tag 1, opSize=polyWordOpSize, ccRef=ccRef}) ::
                             argCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.IsTaggedValue, arg1}, context, _, destination, tailCode) =
             let
                 val ccRef = newCCRef()
                 val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true }
                 val (testCode, testResult, _) = codeToICodeRev(arg1, context, false, Allowed memOrReg, tailCode)
                 (* Test the tag bit.  This sets the zero bit if the value is untagged. *)
                 val target = asTarget destination
             in
                 (makeBoolResultRev(JNE, ccRef, target,
                     BlockSimple(TestTagBit{arg=testResult, ccRef=ccRef}) :: testCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.MemoryCellLength, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val argReg1 = newUReg() and argReg2 = newUReg() and argReg3 = newUReg() (* These are untagged until the tag is put in. *)
                 and ccRef1 = newCCRef() and ccRef2 = newCCRef() and ccRef3 = newCCRef()
                 (* Get the length of a memory cell (heap object).  We need to mask out the
                    top byte containing the flags and to tag the result.  The mask is 56 bits on
                    64-bit which won't fit in an inline constant.  Since we have to shift it
                    anyway we might as well do this by shifts. *)
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
             in
                 (BlockSimple(ArithmeticFunction{oper=OR, resultReg=target, operand1=argReg3, operand2=IntegerConstant 1, ccRef=ccRef3, opSize=polyWordOpSize}) ::
                     BlockSimple(ShiftOperation{shift=SHR, resultReg=argReg3, operand=argReg2, shiftAmount=IntegerConstant 7 (* 8-tagshift*), ccRef=ccRef2, opSize=polyWordOpSize }) ::
                     BlockSimple(ShiftOperation{shift=SHL, resultReg=argReg2, operand=argReg1, shiftAmount=IntegerConstant 8, ccRef=ccRef1, opSize=polyWordOpSize }) ::
                     BlockSimple(LoadArgument{source=wordOffsetAddress(~1, addrReg), dest=argReg1, kind=movePolyWord}) :: argCode,
                 RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.MemoryCellFlags, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val argReg1 = newUReg()
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
             in
                 (BlockSimple(TagValue{ source=argReg1, dest=target, isSigned=false, opSize=OpSize32 }) ::
                  BlockSimple(LoadArgument{source=MemoryLocation{offset= ~1, base=addrReg, index=memIndexOrObject, cache=NONE}, dest=argReg1, kind=MoveByte}) ::
                  argCode, RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.ClearMutableFlag, arg1}, context, _, destination, tailCode) =
             let
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
             in
                 moveIfNotAllowedRev(destination, BlockSimple(LockMutable{addr=addrReg}) :: argCode, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.LongWordToTagged, arg1}, context, _, destination, tailCode) =
             let (* This is exactly the same as StringLengthWord at the moment.
                    TODO: introduce a new ICode entry so that the next stage can optimise
                    longword operations. *)
                 val target = asTarget destination
                 val argReg1 = newUReg()
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
                 val code =
                     BlockSimple(TagValue{ source=argReg1, dest=target, isSigned=false, opSize=polyWordOpSize }) ::
                     (* Use movePolyWord even on 32-in-64 since we're producing a 32-bit value anyway. *)
                     BlockSimple(LoadArgument{source=wordAt addrReg, dest=argReg1, kind=movePolyWord}) ::
                     argCode
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.SignedToLongWord, arg1}, context, _, destination, tailCode) =
             let
                 val addrReg = newPReg() and untagArg = newUReg()
                 val (argCode, argReg1) = codeToPRegRev(arg1, context, tailCode)
                 val (signExtend, sxReg) =
                     case targetArch of
                         ObjectId32Bit =>
                         let
                             val sReg = newUReg()
                         in
                             ([BlockSimple(SignExtend32To64{source=RegisterArgument argReg1, dest=sReg})], sReg)
                         end
                     |   _ => ([], argReg1)
                 val code =
                     BlockSimple(BoxValue{boxKind=BoxLargeWord,  source=untagArg, dest=addrReg, saveRegs=[]}) ::
                     BlockSimple(UntagValue{source=sxReg, dest=untagArg, isSigned=true, cache=NONE, opSize=nativeWordOpSize}) ::
                     signExtend @ argCode
             in
                 moveIfNotAllowedRev(destination, code, RegisterArgument addrReg)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.UnsignedToLongWord, arg1}, context, _, destination, tailCode) =
             let
                 val addrReg = newPReg() and untagArg = newUReg()
                 val (argCode, argReg1) = codeToPRegRev(arg1, context, tailCode)
                 val code =
                     BlockSimple(BoxValue{boxKind=BoxLargeWord,  source=untagArg, dest=addrReg, saveRegs=[]}) ::
                     (* We can just use a polyWord operation to untag the unsigned value. *)
                     BlockSimple(UntagValue{source=argReg1, dest=untagArg, isSigned=false, cache=NONE, opSize=polyWordOpSize}) ::
                     argCode
             in
                 moveIfNotAllowedRev(destination, code, RegisterArgument addrReg)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.RealNeg precision, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val fpRegSrc = newUReg() and fpRegDest = newUReg() and sse2ConstReg = newUReg()
                 (* The SSE2 code uses an SSE2 logical operation to flip the sign bit.  This
                    requires the values to be loaded into registers first because the logical
                    operations require 128-bit operands. *)
                 val (argCode, aReg1) = codeToPReg(arg1, context)
                 (* Double precision values are always boxed and single precision values if they won't
                    fit in a word.  Otherwise we can using tagging. *)
                 open BuiltIns
                 val load =
                     if precision = PrecDouble
                     then BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveDouble})
                     else if wordSize = 0w8
                     then BlockSimple(UntagFloat{source=RegisterArgument aReg1, dest=fpRegSrc, cache=NONE})
                     else BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveFloat})
                 val code =
                     case fpMode of
                         FPModeX87 =>
                             [BlockSimple(X87FPUnaryOps{ fpOp=FCHS, dest=fpRegDest, source=fpRegSrc})]
                     |   FPModeSSE2 =>
                         let
                             (* In single precision mode the sign bit is in the low 32-bits.  There
                                may be a better way to load it. *)
                             val signBit = if precision = PrecDouble then realSignBit else floatSignBit
                         in
                             [BlockSimple(LoadArgument{source=AddressConstant signBit, dest=sse2ConstReg, kind=MoveDouble}),
                              BlockSimple(SSE2FPBinary{opc=SSE2BXor, resultReg=fpRegDest, arg1=fpRegSrc, arg2=RegisterArgument sse2ConstReg})]
                         end
                 val result = boxOrTagReal(fpRegDest, target, precision)
             in
                 (revApp(argCode @ load :: code @ result, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.RealAbs precision, arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val fpRegSrc = newUReg() and fpRegDest = newUReg() and sse2ConstReg = newUReg()
                 val (argCode, aReg1) = codeToPReg(arg1, context)
                 open BuiltIns
                 val load =
                     if precision = PrecDouble
                     then BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveDouble})
                     else if wordSize = 0w8
                     then BlockSimple(UntagFloat{source=RegisterArgument aReg1, dest=fpRegSrc, cache=NONE})
                     else BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveFloat})
                 val code =
                     case fpMode of
                         FPModeX87 => [BlockSimple(X87FPUnaryOps{ fpOp=FABS, dest=fpRegDest, source=fpRegSrc})]
                     |   FPModeSSE2 =>
                         let
                             val mask = if precision = PrecDouble then realAbsMask else floatAbsMask
                         in
                             [BlockSimple(LoadArgument{source=AddressConstant mask, dest=sse2ConstReg, kind=MoveDouble}),
                              BlockSimple(SSE2FPBinary{opc=SSE2BAnd, resultReg=fpRegDest, arg1=fpRegSrc, arg2=RegisterArgument sse2ConstReg})]
                         end
                 val result = boxOrTagReal(fpRegDest, target, precision)
             in
                 (revApp(argCode @ load :: code @ result, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.RealFixedInt precision, arg1}, context, _, destination, tailCode) =
             let
                 open BuiltIns
                 val target = asTarget destination
                 val untagReg = newUReg() and fpReg = newUReg()
                 val (argCode, aReg1) = codeToPReg(arg1, context)
                 val floatOp =
                     case fpMode of
                         FPModeX87 => X87Float{ dest=fpReg, source=RegisterArgument untagReg}
                     |   FPModeSSE2 => SSE2IntToReal{ dest=fpReg, source=RegisterArgument untagReg, isDouble=precision=PrecDouble}
                 val code = argCode @
                  [BlockSimple(UntagValue{source=aReg1, dest=untagReg, isSigned=true, cache=NONE, opSize=polyWordOpSize}),
                   BlockSimple floatOp] @
                   boxOrTagReal(fpReg, target, precision)
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.FloatToDouble, arg1}, context, _, destination, tailCode) =
             let
                 (* Convert a single precision floating point value to double precision. *)
                 val target = asTarget destination
                 val fpReg = newUReg() and fpReg2 = newUReg()
                 val (argCode, aReg1) = codeToPReg(arg1, context)
                 (* MoveFloat always converts from single to double-precision. *)
                 val unboxOrUntag =
                     case (fpMode, wordSize) of
                         (FPModeX87, _) => [BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpReg2, kind=MoveFloat})]
                     |   (FPModeSSE2, 0w4) =>
                         [BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpReg, kind=MoveFloat}),
                          BlockSimple(SSE2FPUnary{opc=SSE2UFloatToDouble, resultReg=fpReg2, source=RegisterArgument fpReg})]
                     |   (FPModeSSE2, _) =>
                         [BlockSimple(UntagFloat{source=RegisterArgument aReg1, dest=fpReg, cache=NONE}),
                          BlockSimple(SSE2FPUnary{opc=SSE2UFloatToDouble, resultReg=fpReg2, source=RegisterArgument fpReg})]
                 val boxFloat = case fpMode of FPModeX87 => BoxX87Double | FPModeSSE2 => BoxSSE2Double
                 val code = argCode @ unboxOrUntag @
                     [BlockSimple(BoxValue{boxKind=boxFloat, source=fpReg2, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.DoubleToFloat, arg1}, context, _, destination, tailCode) =
             let
                 (* Convert a double precision value to a single precision using the current rounding
                    mode.  This is simpler than setting the rounding mode and then restoring it. *)
                 val target = asTarget destination
                 val fpReg = newUReg() and fpReg2 = newUReg()
                 val (argCode, aReg1) = codeToPReg(arg1, context)
                 (* In 32-bit mode we need to box the float.  In 64-bit mode we can tag it. *)
                 val boxOrTag =
                     case fpMode of
                         FPModeX87 => [BlockSimple(BoxValue{boxKind=BoxX87Float, source=fpReg, dest=target, saveRegs=[]})]
                     |   FPModeSSE2 =>
                             BlockSimple(SSE2FPUnary{opc=SSE2UDoubleToFloat, resultReg=fpReg2, source=RegisterArgument fpReg}) ::
                                 boxOrTagReal(fpReg2, target, BuiltIns.PrecSingle)
                 val code = argCode @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpReg, kind=MoveDouble})] @ boxOrTag
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.RealToInt(precision, rndMode), arg1}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val chkOverflow = newCCRef()
                 val convResult = newUReg() and wrkReg2 = newUReg()
                 (* Convert a floating point value to an integer.  We need to raise overflow if the
                    result is out of range.  We first convert the value to 32/64 bits then tag it.
                    An overflow can happen either because the real number does not fit in 32/64
                    bits or if it is not a 31/63 bit value.  Fortunately, if the first conversion
                    fails the result is a value that causes an overflow when we try it shift it
                    so the check for overflow only needs to happen there.
                    There is an SSE2 instruction that implements truncation (round to zero)
                    directly but in other cases we need to set the rounding mode. *)
                 val doConvert =
                     case (fpMode, precision) of
                         (FPModeX87, _) =>
                         let
                         
                             val fpReg = newUReg()
                             val (argCode, aReg) = codeToPReg(arg1, context)
                             fun doConvert() = [BlockSimple(X87RealToInt{source=fpReg, dest=convResult })]
                         in
                             argCode @
                                 [BlockSimple(LoadArgument{source=wordAt aReg, dest=fpReg, kind=MoveDouble})] @
                                     setAndRestoreRounding(rndMode, doConvert)
                         end
 
                     |   (FPModeSSE2, BuiltIns.PrecDouble) =>
                         let
                             val (argCode, argReg) = codeToPReg(arg1, context)
                             fun doConvert() =
                                 [BlockSimple(
                                     SSE2RealToInt{source=wordAt argReg, dest=convResult, isDouble=true,
                                         isTruncate = rndMode = IEEEReal.TO_ZERO }) ]
                         in
                             argCode @ (
                                 case rndMode of
                                     IEEEReal.TO_ZERO => doConvert()
                                 |   _ => setAndRestoreRounding(rndMode, doConvert))
                         end
 
                     |   (FPModeSSE2, BuiltIns.PrecSingle) =>
                         let
                             val (argCode, aReg) = codeToPReg(arg1, context)
                             val fpReg = newUReg()
                             fun doConvert() =
                                 [BlockSimple(
                                     SSE2RealToInt{source=RegisterArgument fpReg, dest=convResult, isDouble=false,
                                         isTruncate = rndMode = IEEEReal.TO_ZERO })]
                         in
                             argCode @ [BlockSimple(UntagFloat{source=RegisterArgument aReg, dest=fpReg, cache=NONE})] @
                             (
                                 case rndMode of
                                     IEEEReal.TO_ZERO => doConvert()
                                 |   _ => setAndRestoreRounding(rndMode, doConvert)
                             )
                         end
 
                 val checkAndTag =
                     BlockSimple(ShiftOperation{ shift=SHL, resultReg=wrkReg2, operand=convResult, shiftAmount=IntegerConstant 1, ccRef=chkOverflow, opSize=polyWordOpSize}) ::
                         checkOverflow context chkOverflow @
                         [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=wrkReg2, operand2=IntegerConstant 1, ccRef = newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(doConvert @ checkAndTag, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.TouchAddress, arg1}, context, _, destination, tailCode) =
             let
                 (* Put the value in a register.  This is not entirely necessary but ensures that if the value is
                    a constant the constant will be included in the code. *)
                 val (argCode, aReg) = codeToPRegRev(arg1, context, tailCode)
             in
                 moveIfNotAllowedRev(destination, BlockSimple(TouchArgument{source=aReg}) :: argCode,
                         (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.AllocCStack, arg1}, context, _, destination, tailCode) =
             (* Allocate space on the C stack.  Assumes that the argument has already been aligned. *)
             let
                 val target = asTarget destination
                 val (argCode, untaggedArg) =
                     case arg1 of
                         BICConstnt(value, _) => (tailCode, IntegerConstant(Word.toLargeInt(toShort value)) (* Leave untagged *))
                     |   _ =>
                         let
                             val (argCode, aReg) = codeToPRegRev(arg1, context, tailCode)
                             val arg1Untagged = newUReg()
                         in
                             (
                                 BlockSimple(UntagValue{source=aReg, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}) :: argCode,
                                 RegisterArgument arg1Untagged
                             )
                         end
                 val argReg1 = newUReg() and resReg1 = newUReg()
                 val code =
                     BlockSimple(BoxValue{boxKind=BoxLargeWord, source=resReg1, dest=target, saveRegs=[]}) ::
                     BlockSimple(StoreMemReg{offset=memRegCStackPtr, source=resReg1, kind=moveNativeWord}) ::
                     BlockSimple(ArithmeticFunction{oper=SUB, resultReg=resReg1, operand1=argReg1, operand2=untaggedArg,
                             ccRef=newCCRef(), opSize=nativeWordOpSize}) ::
                     BlockSimple(LoadMemReg{offset=memRegCStackPtr, dest=argReg1, kind=moveNativeWord}) ::
                     argCode
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.LockMutex, arg1}, context, _, destination, tailCode) =
             let
                 (* Temporarily don't bother with the spin-lock. *)
                 val incrReg = newUReg() and resultReg = newUReg() and ccRef = newCCRef()
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
                 val target = asTarget destination
                 val code =
                     BlockSimple(CompareLiteral{arg1=RegisterArgument resultReg, arg2=0, opSize=nativeWordOpSize, ccRef=ccRef }) ::
                     BlockSimple(AtomicExchangeAndAdd{base=addrReg, source=incrReg, resultReg=resultReg}) ::
                     BlockSimple(LoadArgument{source=IntegerConstant 1, dest=incrReg, kind=moveNativeWord}) ::
                     argCode
             in
                 (makeBoolResultRev(JE, ccRef, target, code), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.TryLockMutex, arg1}, context, _, destination, tailCode) =
             let
                 val toStoreReg = newUReg() and compareReg = newUReg() and resultReg = newUReg() and ccRef = newCCRef()
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
                 val target = asTarget destination
                 (* If the current value is zero (unlocked) set it to one (locked) otherwise do nothing. *)
                 val code =
                     BlockSimple(AtomicCompareAndExchange{base=addrReg, compare=compareReg,
                                                          toStore=toStoreReg, resultReg=resultReg, ccRef=ccRef}) ::
                     BlockSimple(LoadArgument{source=IntegerConstant 0, dest=compareReg, kind=moveNativeWord}) ::
                     BlockSimple(LoadArgument{source=IntegerConstant 1, dest=toStoreReg, kind=moveNativeWord}) ::
                     argCode
             in
                 (makeBoolResultRev(JE, ccRef, target, code), RegisterArgument target, false)
             end
 
         |   codeToICodeUnaryRev({oper=BuiltIns.UnlockMutex, arg1}, context, _, destination, tailCode) =
             let (* Unlock a mutex - (atomically) exchange with 0 (unlock) and return true if the
                    previous value was 1 i.e. only this thread has locked it. *)
                 val newValReg = newUReg() and resultReg = newUReg() and ccRef = newCCRef()
                 val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode)
                 val target = asTarget destination
                 val code =
                     BlockSimple(CompareLiteral{arg1=RegisterArgument resultReg, arg2=1, opSize=nativeWordOpSize, ccRef=ccRef }) ::
                     BlockSimple(AtomicExchange{base=addrReg, source=newValReg, resultReg=resultReg}) ::
                     BlockSimple(LoadArgument{source=IntegerConstant 0, dest=newValReg, kind=moveNativeWord}) ::
                     argCode
             in
                 (makeBoolResultRev(JE, ccRef, target, code), RegisterArgument target, false)
             end
 
         and codeToICodeBinaryRev({oper=BuiltIns.WordComparison{test, isSigned}, arg1, arg2=BICConstnt(arg2Value, _)}, context, _, destination, tailCode) =
             let
                 (* Comparisons.  Because this is also used for pointer equality and even for exception matching it
                    is perfectly possible that the argument could be an address.
                    The higher levels used to generate this for pointer equality. *)
                 val ccRef = newCCRef()
                 val comparison =
                     (* If the argument is a tagged value that will fit in 32-bits we can use
                        the literal version.  Use toLargeIntX here because the value will be
                        sign-extended even if we're actually doing an unsigned comparison. *)
                     if isShort arg2Value andalso is32bit(tag(Word.toLargeIntX(toShort arg2Value)))
                     then
                     let
                         val allow = Allowed {anyConstant=false, const32s=false, memAddr=true, existingPreg=true}
                     in
                         (* We're often comparing with a character or a string length field that has to be
                            untagged.  In that case we can avoid loading it into a register and untagging it
                            by doing the comparison directly. *)
                         case arg1 of
                             BICLoadOperation{kind=LoadStoreUntaggedUnsigned, address} =>
                             let
                                 val (codeBaseIndex, codeUntag, memLoc) = codeAddressRev(address, false, context, tailCode)
                                 val literal = Word.toLargeIntX(toShort arg2Value)
                             in
                                 BlockSimple(CompareLiteral{arg1=MemoryLocation memLoc, arg2=literal, opSize=polyWordOpSize, ccRef=ccRef}) ::
                                     codeUntag @ codeBaseIndex
                             end
                         |   BICLoadOperation{kind=LoadStoreMLByte _, address} =>
                             let
                                 val (codeBaseIndex, codeUntag, {base, index, offset, ...}) =
                                     codeAddressRev(address, true, context, tailCode)
                                 val _ = toShort arg2Value >= 0w0 andalso toShort arg2Value < 0w256
                                             orelse raise InternalError "Compare byte not a byte"
                                 val literal = Word8.fromLargeWord(Word.toLargeWord(toShort arg2Value))
                             in
                                 BlockSimple(CompareByteMem{arg1={base=base, index=index, offset=offset}, arg2=literal, ccRef=ccRef}) ::
                                     codeUntag @ codeBaseIndex
                             end
                         |   BICUnary({oper=BuiltIns.MemoryCellFlags, arg1}) =>
                                 (* This occurs particularly in arbitrary precision comparisons. *)
                             let
                                 val (baseCode, baseReg) = codeToPRegRev(arg1, context, tailCode)
                                 val _ = toShort arg2Value >= 0w0 andalso toShort arg2Value < 0w256
                                             orelse raise InternalError "Compare memory cell not a byte"
                                 val literal = Word8.fromLargeWord(Word.toLargeWord(toShort arg2Value))
                             in
                                 BlockSimple(CompareByteMem{arg1={base=baseReg, index=memIndexOrObject, offset= ~1}, arg2=literal, ccRef=ccRef}) ::
                                     baseCode
                             end
                         |    _ =>
                             let
                                 (* TODO: We could include rarer cases of tagging by looking at
                                    the code and seeing if it's a TagValue. *)
                                 val (testCode, testDest, _) = codeToICodeRev(arg1, context, false, allow, tailCode)
                                 val literal = tag(Word.toLargeIntX(toShort arg2Value))
                             in
                                 BlockSimple(CompareLiteral{arg1=testDest, arg2=literal, opSize=polyWordOpSize, ccRef=ccRef}) ::
                                     testCode
                             end    
                     end
                     else (* Addresses or larger values.  We need to use a register comparison. *)
                     let
                         val (testCode, testReg) = codeToPRegRev(arg1, context, tailCode)
                         val arg2Arg = constantAsArgument arg2Value
                     in
                         BlockSimple(WordComparison{arg1=testReg, arg2=arg2Arg, ccRef=ccRef, opSize=polyWordOpSize}) ::
                             testCode
                     end
                 val target = asTarget destination
             in
                 (makeBoolResultRev(testAsBranch(test, isSigned, true), ccRef, target, comparison), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordComparison{test, isSigned}, arg1=BICConstnt(arg1Value, _), arg2}, context, _, destination, tailCode) =
             let
                 (* If we have the constant first we need to reverse the test so the first argument is a register. *)
                 val ccRef = newCCRef()
                 val comparison =
                     if isShort arg1Value andalso is32bit(tag(Word.toLargeIntX(toShort arg1Value)))
                     then
                     let
                         val allow = Allowed {anyConstant=false, const32s=false, memAddr=true, existingPreg=true}
                         val (testCode, testDest, _) = codeToICodeRev(arg2, context, false, allow, tailCode)
                         val literal = tag(Word.toLargeIntX(toShort arg1Value))
                     in
                         BlockSimple(CompareLiteral{arg1=testDest, arg2=literal, opSize=polyWordOpSize, ccRef=ccRef}) ::
                             testCode
                     end
                     else (* Addresses or larger values.  We need to use a register comparison. *)
                     let
                         val (testCode, testReg) = codeToPRegRev(arg2, context, tailCode)
                         val arg1Arg = constantAsArgument arg1Value
                     in
                         BlockSimple(WordComparison{arg1=testReg, arg2=arg1Arg, ccRef=ccRef, opSize=polyWordOpSize}) ::
                             testCode
                     end
 
                 val target = asTarget destination
             in
                 (makeBoolResultRev(testAsBranch(leftRightTest test, isSigned, true), ccRef, target, comparison),
                      RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordComparison {test, isSigned}, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val ccRef = newCCRef()
                 val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true }
                 val (arg1Code, arg1Result, _) = codeToICodeRev(arg1, context, false, Allowed memOrReg, tailCode)
                 val (arg2Code, arg2Result, _) = codeToICodeRev(arg2, context, false, Allowed memOrReg, arg1Code)
                 val target = asTarget destination
                 val code =
                     case (arg1Result, arg2Result) of
                         (RegisterArgument arg1Reg, arg2Result) =>
                             makeBoolResultRev(testAsBranch(test, isSigned, true), ccRef, target,
                                 BlockSimple(WordComparison{arg1=arg1Reg, arg2=arg2Result, ccRef=ccRef, opSize=polyWordOpSize}) ::
                                     arg2Code)
                     |   (arg1Result, RegisterArgument arg2Reg) =>
                             (* The second argument is in a register - switch the sense of the test. *)
                             makeBoolResultRev(testAsBranch(leftRightTest test, isSigned, true), ccRef, target,
                                 BlockSimple(WordComparison{arg1=arg2Reg, arg2=arg1Result, ccRef=ccRef, opSize=polyWordOpSize}) ::
                                     arg2Code)
                     |   (arg1Result, arg2Result) =>
                         let (* Have to load an argument - pick the first. *)
                             val arg1Reg = newPReg()
                         in
                             makeBoolResultRev(testAsBranch(test, isSigned, true), ccRef, target,
                                 BlockSimple(WordComparison{arg1=arg1Reg, arg2=arg2Result, ccRef=ccRef, opSize=polyWordOpSize}) ::
                                 BlockSimple(LoadArgument{source=arg1Result, dest=arg1Reg, kind=movePolyWord})  :: arg2Code)
                         end
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.PointerEq, arg1, arg2}, context, isTail, destination, tailCode) =
                 (* Equality of general values which can include pointers. This can be treated exactly as a word equality.
                    It has to be analysed differently for indexed cases. *)
                 codeToICodeBinaryRev({oper=BuiltIns.WordComparison{test=BuiltIns.TestEqual, isSigned=false}, arg1=arg1, arg2=arg2},
                                       context, isTail, destination, tailCode)
 
         |   codeToICodeBinaryRev({oper=BuiltIns.FixedPrecisionArith oper, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val code = codeFixedPrecisionArith(oper, arg1, arg2, context, target, checkOverflow context)
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithAdd, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* If the argument is a constant we can subtract the tag beforehand.
                    N.B. it is possible to have type-incorrect values in dead code. i.e. code that will
                    never be executed because of a run-time check.  *)
                 val constVal =
                     if isShort value
                     then semitag(Word.toLargeIntX(toShort value))
                     else 0
                 val (arg1Code, aReg1) = codeToPRegRev(arg1, context, tailCode)
             in
                 (BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef = newCCRef(), opSize=polyWordOpSize}) ::
                     arg1Code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithAdd, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *)
                 val constVal =
                     if isShort value
                     then semitag(Word.toLargeIntX(toShort value))
                     else 0
                 val (arg2Code, aReg2) = codeToPRegRev(arg2, context, tailCode)
             in
                 (BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg2, operand2=IntegerConstant constVal, ccRef = newCCRef(), opSize=polyWordOpSize}) ::
                     arg2Code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithAdd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 (* Use LEA to do the addition since we're not concerned with overflow.  This is shorter than
                    subtracting the tag and adding the values and also moves the result into the
                    appropriate register. *)
                 val code =
                     arg1Code @ arg2Code @
                     [BlockSimple(LoadEffectiveAddress{base=SOME aReg1, offset= ~1, index=MemIndex1 aReg2, dest=target, opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithSub, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *)
                 val constVal =
                     if isShort value
                     then semitag(Word.toLargeIntX(toShort value))
                     else 0
                 val (arg1Code, aReg1) = codeToPRegRev(arg1, context, tailCode)
             in
                 (BlockSimple(ArithmeticFunction{oper=SUB, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef=newCCRef(), opSize=polyWordOpSize}) ::
                     arg1Code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithSub, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val aReg3 = newPReg()
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val code =
                     arg1Code @ arg2Code @
                     (* Do the subtraction and add in the tag bit.  This could be reordered if we have cascaded operations
                        since we don't need to check for overflow. *)
                     [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=aReg1, operand2=RegisterArgument aReg2, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg3, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMult, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
                 codeMultiplyConstantWordRev(arg1, context, destination, if isShort value then toShort value else 0w0, tailCode)
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMult, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) =
                 codeMultiplyConstantWordRev(arg2, context, destination, if isShort value then toShort value else 0w0, tailCode)
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMult, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val arg1Untagged = newUReg()
                 and arg2Untagged = newUReg() and resUntagged = newUReg()
                 val code =
                     arg1Code @ arg2Code @
                     (* Shift one argument and subtract the tag from the other.  It's possible this could be reordered
                        if we have a value that is already untagged. *)
                     [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(ArithmeticFunction{oper=SUB, resultReg=arg2Untagged, operand1=aReg2, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(Multiplication{resultReg=resUntagged, operand1=arg1Untagged, operand2=RegisterArgument arg2Untagged, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithDiv, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val arg1Untagged = newUReg() and arg2Untagged = newUReg()
                 val quotient = newUReg() and remainder = newUReg()
                 val code = arg1Code @ arg2Code @
                     (* Shift both of the arguments to remove the tags.  We don't test for zero here - that's done explicitly. *)
                     [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(Division { isSigned = false, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged,
                                 quotient=quotient, remainder=remainder, opSize=polyWordOpSize }),
                      BlockSimple(TagValue { source=quotient, dest=target, isSigned=false, opSize=polyWordOpSize })]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMod, arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* Identical to Quot except that the result is the remainder. *)
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val arg1Untagged = newUReg() and arg2Untagged = newUReg()
                 val quotient = newUReg() and remainder = newUReg()
                 val code = arg1Code @ arg2Code @
                     (* Shift both of the arguments to remove the tags. *)
                     [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(Division { isSigned = false, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged,
                                 quotient=quotient, remainder=remainder, opSize=polyWordOpSize }),
                      BlockSimple(TagValue { source=remainder, dest=target, isSigned=false, opSize=polyWordOpSize })]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordArith _, ...}, _, _, _, _) =
                 raise InternalError "codeToICodeNonRev: WordArith - unimplemented operation"
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordLogical logOp, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, arg1Reg) = codeToPReg(arg1, context)
                 (* Use a semitagged value for XOR.  This preserves the tag bit.  Use toLargeIntX here because
                    the operations will sign-extend 32-bit values. *)
                 val constVal =
                     if isShort value
                     then (case logOp of BuiltIns.LogicalXor => semitag | _ => tag) (Word.toLargeIntX(toShort value))
                     else 0
                 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR
                 (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *)
                 val opSize =
                     if logOp = BuiltIns.LogicalAnd andalso constVal <= 0xffffffff andalso constVal >= 0
                     then OpSize32 else polyWordOpSize
                 val code =
                     arg1Code @
                     [BlockSimple(ArithmeticFunction{oper=oper, resultReg=target, operand1=arg1Reg, operand2=IntegerConstant constVal,
                                  ccRef=newCCRef(), opSize=opSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordLogical logOp, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg2Code, arg2Reg) = codeToPReg(arg2, context)
                 (* Use a semitagged value for XOR.  This preserves the tag bit. *)
                 val constVal =
                     if isShort value
                     then (case logOp of BuiltIns.LogicalXor => semitag | _ => tag) (Word.toLargeIntX(toShort value))
                     else 0
                 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR
                 (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *)
                 val opSize =
                     if logOp = BuiltIns.LogicalAnd andalso constVal <= 0xffffffff andalso constVal >= 0
                     then OpSize32 else polyWordOpSize
                 val code =
                     arg2Code @
                     [BlockSimple(ArithmeticFunction{oper=oper, resultReg=target, operand1=arg2Reg, operand2=IntegerConstant constVal,
                                  ccRef=newCCRef(), opSize=opSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordLogical BuiltIns.LogicalOr, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, arg1Reg) = codeToPReg(arg1, context)
                 val (arg2Code, arg2Reg) = codeToPReg(arg2, context)
                 val code =
                     arg1Code @ arg2Code @
                     (* Or-ing preserves the tag bit. *)
                     [BlockSimple(ArithmeticFunction{oper=OR, resultReg=target, operand1=arg1Reg, operand2=RegisterArgument arg2Reg, ccRef=newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordLogical BuiltIns.LogicalAnd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, arg1Reg) = codeToPReg(arg1, context)
                 val (arg2Code, arg2Reg) = codeToPReg(arg2, context)
                 val code =
                     arg1Code @ arg2Code @
                     (* Since they're both tagged the result will be tagged. *)
                     [BlockSimple(ArithmeticFunction{oper=AND, resultReg=target, operand1=arg1Reg, operand2=RegisterArgument arg2Reg, ccRef=newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordLogical BuiltIns.LogicalXor, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, arg1Reg) = codeToPReg(arg1, context)
                 val (arg2Code, arg2Reg) = codeToPReg(arg2, context)
                 val aReg3 = newPReg()
                 val code = arg1Code @ arg2Code @
                     (* We need to restore the tag bit after the operation. *)
                     [BlockSimple(ArithmeticFunction{oper=XOR, resultReg=aReg3, operand1=arg1Reg, operand2=RegisterArgument arg2Reg, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(ArithmeticFunction{oper=OR, resultReg=target, operand1=aReg3, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordShift BuiltIns.ShiftLeft, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
                 (* Use the general case multiplication code.  This will use a shift except for small values.
                    It does detect special cases such as multiplication by 4 and 8 which can be implemented with LEA. *)
                 codeMultiplyConstantWordRev(arg1, context, destination, if isShort value then Word.<<(0w1, toShort value) else 0w1, tailCode)
 
         |   codeToICodeBinaryRev({oper=BuiltIns.WordShift shift, arg1, arg2}, context, _, destination, tailCode) =
                 (* N.B.  X86 shifts of greater than the word length mask the higher bits.  That isn't what ML wants
                    but that is dealt with at a higher level *)
             let
                 open BuiltIns
                 val target = asTarget destination
                 (* Load the value into an untagged register.  If this is a left shift we
                    need to clear the tag bit.  We don't need to do that for right shifts.  *)
                 val argRegUntagged = newUReg()
                 val arg1Code =
                     case arg1 of
                         BICConstnt(value, _) =>
                         let
                             (* Remove the tag bit.  This isn't required for right shifts. *)
                             val cnstntVal = if isShort value then semitag(Word.toLargeInt(toShort value)) else 1
                         in
                             [BlockSimple(LoadArgument{source=IntegerConstant cnstntVal, dest=argRegUntagged, kind=movePolyWord})]
                         end
                     |   _ =>
                         let
                             val (arg1Code, arg1Reg) = codeToPReg(arg1, context)
                             val removeTag =
                                 case shift of
                                     ShiftLeft =>
                                         ArithmeticFunction{oper=SUB, resultReg=argRegUntagged, operand1=arg1Reg,
                                                         operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}
                                 |   _ => LoadArgument{source=RegisterArgument arg1Reg, dest=argRegUntagged, kind=movePolyWord}
                         in
                             arg1Code @ [BlockSimple removeTag]
                         end
 
                 (* The shift amount can usefully be a constant. *)
                 val (arg2Code, untag2Code, arg2Arg) = codeAsUntaggedByte(arg2, false, context)
                 val resRegUntagged = newUReg()
                 val shiftOp = case shift of ShiftLeft => SHL | ShiftRightLogical => SHR | ShiftRightArithmetic => SAR
                 val code = arg1Code @ arg2Code @ untag2Code @
                  [BlockSimple(ShiftOperation{ shift=shiftOp, resultReg=resRegUntagged, operand=argRegUntagged, shiftAmount=arg2Arg, ccRef=newCCRef(), opSize=polyWordOpSize }),
                   (* Set the tag by ORing it in.  This will work whether or not a right shift has shifted a 1 into this position. *)
                   BlockSimple(
                     ArithmeticFunction{oper=OR, resultReg=target, operand1=resRegUntagged,
                                        operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.AllocateByteMemory, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val sizeReg = newPReg() and baseReg = newPReg()
                 val sizeCode = codeToICodeTarget(arg1, context, false, sizeReg)
                 val (flagsCode, flagUntag, flagArg) = codeAsUntaggedByte(arg2, false, context)
                 val code =sizeCode @ flagsCode @
                  [BlockSimple(AllocateMemoryVariable{size=sizeReg, dest=baseReg, saveRegs=[]})] @
                   flagUntag @
                   [BlockSimple(StoreArgument{ source=flagArg, base=baseReg, offset= ~1, index=memIndexOrObject, kind=MoveByte, isMutable=false}),
                   BlockSimple InitialisationComplete,
                   BlockSimple(LoadArgument{ source=RegisterArgument baseReg, dest=target, kind=movePolyWord})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordComparison test, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val ccRef = newCCRef()
                 val (arg1Code, arg1Reg) = codeToPRegRev(arg1, context, tailCode)
                 (* In X64 we can extract the word from a constant and do the comparison
                    directly.  That can't be done in X86/32 because the value isn't tagged
                    and might look like an address.  The RTS scans for comparisons with
                    inline constant addresses. *)
                 val (arg2Code, arg2Operand) =
                     if targetArch <> Native32Bit
                     then (* Native 64-bit or 32-in-64. *)
                     (
                         case arg2 of
                             BICConstnt(value, _) => (arg1Code, IntegerConstant(largeWordConstant value))
                         |   _ =>
                             let
                                 val (code, reg) = codeToPRegRev(arg2, context, arg1Code)
                             in
                                 (code, wordAt reg)
                             end
                     )
                     else
                     let
                         val (code, reg) = codeToPRegRev(arg2, context, arg1Code)
                     in
                         (code, wordAt reg)
                     end
                 val argReg = newUReg()
                 val target = asTarget destination
                 val code =
                     makeBoolResultRev(testAsBranch(test, false, true), ccRef, target,
                         BlockSimple(WordComparison{arg1=argReg, arg2=arg2Operand, ccRef=ccRef, opSize=nativeWordOpSize}) ::
                         BlockSimple(LoadArgument{source=wordAt arg1Reg, dest=argReg, kind=moveNativeWord}) :: arg2Code)
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithAdd, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val constantValue = largeWordConstant value
                 val code =arg1Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=ADD, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithAdd, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val constantValue = largeWordConstant value
                 val code = arg2Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg2, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=ADD, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithAdd, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val code = arg1Code @ arg2Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=ADD, resultReg=aReg3, operand1=argReg, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithSub, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val constantValue = largeWordConstant value
                 val code = arg1Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithSub, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val code = arg1Code @ arg2Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=argReg, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithMult, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val resValue = newUReg()
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val argReg1 = newUReg()
                 val code = arg1Code @ arg2Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg1, kind=moveNativeWord}),
                      BlockSimple(Multiplication{resultReg=resValue, operand1=argReg1, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=resValue, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithDiv, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val quotient = newUReg() and remainder = newUReg()
                 val dividendReg = newUReg() and divisorReg = newUReg()
                 val code = arg1Code @ arg2Code @
                     (* We don't test for zero here - that's done explicitly. *)
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=dividendReg, kind=moveNativeWord}),
                      BlockSimple(LoadArgument{source=wordAt aReg2, dest=divisorReg, kind=moveNativeWord}),
                      BlockSimple(Division { isSigned = false, dividend=dividendReg, divisor=RegisterArgument divisorReg,
                                 quotient=quotient, remainder=remainder, opSize=nativeWordOpSize }),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=quotient, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithMod, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val quotient = newUReg() and remainder = newUReg()
                 val dividendReg = newUReg() and divisorReg = newUReg()
                 val code = arg1Code @ arg2Code @
                     (* We don't test for zero here - that's done explicitly. *)
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=dividendReg, kind=moveNativeWord}),
                      BlockSimple(LoadArgument{source=wordAt aReg2, dest=divisorReg, kind=moveNativeWord}),
                      BlockSimple(Division { isSigned = false, dividend=dividendReg, divisor=RegisterArgument divisorReg,
                                 quotient=quotient, remainder=remainder, opSize=nativeWordOpSize }),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=remainder, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith _, ...}, _, _, _, _) =
                 raise InternalError "codeToICodeNonRev: LargeWordArith - unimplemented operation"
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordLogical logOp, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val constantValue = largeWordConstant value
                 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR
                 (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *)
                 val opSize =
                     if logOp = BuiltIns.LogicalAnd andalso constantValue <= 0xffffffff andalso constantValue >= 0
                     then OpSize32 else nativeWordOpSize
                 val code = arg1Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=oper, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue,
                                                     ccRef=newCCRef(), opSize=opSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordLogical logOp, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val constantValue = largeWordConstant value
                 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR
                 (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *)
                 val opSize =
                     if logOp = BuiltIns.LogicalAnd andalso constantValue <= 0xffffffff andalso constantValue >= 0
                     then OpSize32 else nativeWordOpSize
                 val code = arg2Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg2, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=oper, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue,
                                                     ccRef=newCCRef(), opSize=opSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordLogical logOp, arg1, arg2}, context, _, destination, tailCode) =
             let
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
                 val aReg3 = newUReg()
                 val argReg = newUReg()
                 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR
                 val code = arg1Code @ arg2Code @
                     [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}),
                      BlockSimple(ArithmeticFunction{oper=oper, resultReg=aReg3, operand1=argReg, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.LargeWordShift shift, arg1, arg2}, context, _, destination, tailCode) =
                 (* The shift is always a Word.word value i.e. tagged.  There is a check at the higher level
                    that the shift does not exceed 32/64 bits. *)
             let
                 open BuiltIns
                 val target = asTarget destination
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, untag2Code, arg2Arg) = codeAsUntaggedByte(arg2, false, context)
                 val aReg3 = newUReg()
                 val shiftOp = case shift of ShiftLeft => SHL | ShiftRightLogical => SHR | ShiftRightArithmetic => SAR
                 val argReg = newUReg()
                 val code = arg1Code @ arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord})] @ untag2Code @
                  [BlockSimple(ShiftOperation{ shift=shiftOp, resultReg=aReg3, operand=argReg, shiftAmount=arg2Arg, ccRef=newCCRef(), opSize=nativeWordOpSize }),
                   BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})]
             in
                 (revApp(code, tailCode), RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.RealArith(fpOpPrec as (fpOp, fpPrec)), arg1, arg2}, context, _, destination, tailCode) =
             let
                 open BuiltIns
                 val commutative =
                     case fpOp of
                         ArithSub => NonCommutative
                     |   ArithDiv => NonCommutative
                     |   ArithAdd => Commutative
                     |   ArithMult => Commutative
                     |   _ => raise InternalError "codeToICodeNonRev: RealArith - unimplemented operation"
 
                 val (argCodeRev, fpRegSrc, arg2Value) = codeFPBinaryArgsRev(arg1, arg2, fpPrec, commutative, context, [])
                 val argCode = List.rev argCodeRev
                 
                 val target = asTarget destination
                 val fpRegDest = newUReg()
                 val arith =
                     case fpMode of
                         FPModeX87 =>
                         let
                             val fpOp =
                                 case fpOp of
                                     ArithAdd => FADD
                                 |   ArithSub => FSUB
                                 |   ArithMult => FMUL
                                 |   ArithDiv => FDIV
                                 |   _ => raise InternalError "codeToICodeNonRev: RealArith - unimplemented operation"
                             val isDouble = case fpPrec of PrecSingle => false |  PrecDouble => true
                         in
                             [BlockSimple(X87FPArith{ opc=fpOp, resultReg=fpRegDest, arg1=fpRegSrc, arg2=arg2Value, isDouble=isDouble})]
                         end
                     |   FPModeSSE2 =>
                         let
                             val fpOp =
                                 case fpOpPrec of
                                     (ArithAdd, PrecSingle) => SSE2BAddSingle
                                 |   (ArithSub, PrecSingle) => SSE2BSubSingle
                                 |   (ArithMult, PrecSingle) => SSE2BMulSingle
                                 |   (ArithDiv, PrecSingle) => SSE2BDivSingle
                                 |   (ArithAdd, PrecDouble) => SSE2BAddDouble
                                 |   (ArithSub, PrecDouble) => SSE2BSubDouble
                                 |   (ArithMult, PrecDouble) => SSE2BMulDouble
                                 |   (ArithDiv, PrecDouble) => SSE2BDivDouble
                                 |   _ => raise InternalError "codeToICodeNonRev: RealArith - unimplemented operation"
                         in
                             [BlockSimple(SSE2FPBinary{ opc=fpOp, resultReg=fpRegDest, arg1=fpRegSrc, arg2=arg2Value})]
                         end
                 (* Box or tag the result. *)
                 val result = boxOrTagReal(fpRegDest, target, fpPrec)
             in
                 (revApp(argCode @ arith @ result, tailCode), RegisterArgument target, false)
             end
 
            (* Floating point comparison.  This is complicated because we have different
               instruction sequences for SSE2 and X87.  We also have to get the handling
               of unordered (NaN) values right.  All the tests are treated as false
               if either argument is a NaN.  To combine that test with the other tests
               we sometimes have to reverse the comparison. *)
         |   codeToICodeBinaryRev({oper=BuiltIns.RealComparison(BuiltIns.TestEqual, precision), arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* Get the arguments.  It's commutative. *)
                 val (arg2Code, fpReg, arg2Val) = codeFPBinaryArgsRev(arg1, arg2, precision, Commutative, context, tailCode)
                 val ccRef1 = newCCRef() and ccRef2 = newCCRef()
                 val testReg1 = newUReg() and testReg2 = newUReg() and testReg3 = newUReg()
                 (* If this is X87 we get the condition into RAX and test it there.  If
                    it is SSE2 we have to treat the unordered result (parity set) specially. *)
                 val isDouble = precision = BuiltIns.PrecDouble
                 val target = asTarget destination
                 
                 val code =
                 case fpMode of
                     FPModeX87 =>
                         makeBoolResultRev(JE, ccRef2, target,
                             BlockSimple(ArithmeticFunction{
                                 oper=XOR, resultReg=testReg3, operand1=testReg2, operand2=IntegerConstant 0x4000, ccRef=ccRef2, opSize=OpSize32 }) ::
                             BlockSimple(ArithmeticFunction{
                                 oper=AND, resultReg=testReg2, operand1=testReg1, operand2=IntegerConstant 0x4400, ccRef=newCCRef(), opSize=OpSize32 }) ::
                             BlockSimple(X87FPGetCondition { ccRef=ccRef1, dest=testReg1 }) ::
                             BlockSimple(X87Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) ::
                             arg2Code)
                 |   FPModeSSE2 =>
                     let
                         val noParityLabel = newLabel()
                         
                         val resultLabel = newLabel()
                         val falseLabel = newLabel()
                         val trueLabel = newLabel()
                         val mergeReg = newMergeReg()
                     in
                         BlockSimple(LoadArgument{ source=RegisterArgument mergeReg, dest=target, kind=Move32Bit }) ::
                         BlockLabel resultLabel ::
                         BlockFlow(Unconditional resultLabel) ::
                         (* Result is false if parity is set i.e. unordered or if unequal. *)
                         BlockSimple(LoadArgument{ source=IntegerConstant(tag 0), dest=mergeReg, kind=Move32Bit }) ::
                         BlockLabel falseLabel ::
                         BlockFlow(Unconditional resultLabel) ::
                         (* Result is true if it's ordered and equal. *)
                         BlockSimple(LoadArgument{ source=IntegerConstant(tag 1), dest=mergeReg, kind=Move32Bit }) ::
                         BlockLabel trueLabel ::
                         (* Not unordered - test the equality *)
                         BlockFlow(Conditional{ccRef=ccRef1, condition=JE, trueJump=trueLabel, falseJump=falseLabel}) ::
                         BlockLabel noParityLabel ::
                         (* Go to falseLabel if unordered and therefore not equal. *)
                         BlockFlow(Conditional{ccRef=ccRef1, condition=JP, trueJump=falseLabel, falseJump=noParityLabel}) ::
                         BlockSimple(SSE2Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) ::
                         arg2Code
                     end
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.RealComparison(BuiltIns.TestUnordered, precision), arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* The unordered test is really included because it is easy to implement and is the
                    simplest way of implementing isNan.  *)
                 (* Get the arguments.  It's commutative. *)
                 val (arg2Code, fpReg, arg2Val) = codeFPBinaryArgsRev(arg1, arg2, precision, Commutative, context, tailCode)
                 val ccRef1 = newCCRef() and ccRef2 = newCCRef()
                 val testReg1 = newUReg() and testReg2 = newUReg() and testReg3 = newUReg()
 
                (* If this is X87 we get the condition into RAX and test it there.  If
                    it is SSE2 we have to treat the unordered result (parity set) specially. *)
                 val isDouble = precision = BuiltIns.PrecDouble
                 val target = asTarget destination
                 val code =
                     case fpMode of
                         FPModeX87 =>
                             (* And with 0x4500.  We have to use XOR rather than CMP to avoid having an untagged constant comparison. *)
                             makeBoolResultRev(JE, ccRef2, target,
                                 BlockSimple(ArithmeticFunction{
                                     oper=XOR, resultReg=testReg3, operand1=testReg2, operand2=IntegerConstant 0x4500, ccRef=ccRef2, opSize=OpSize32 }) ::
                                 BlockSimple(ArithmeticFunction{
                                     oper=AND, resultReg=testReg2, operand1=testReg1, operand2=IntegerConstant 0x4500, ccRef=newCCRef(), opSize=OpSize32 }) ::
                                 BlockSimple(X87FPGetCondition { ccRef=ccRef1, dest=testReg1 }) ::
                                 BlockSimple(X87Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) ::
                                 arg2Code)
                     |   FPModeSSE2 =>
                             makeBoolResultRev(JP, ccRef1, target,
                                 BlockSimple(SSE2Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) ::
                                 arg2Code)
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.RealComparison(comparison, precision), arg1, arg2}, context, _, destination, tailCode) =
             let
                 (* Ordered comparisons are complicated because they are all defined to be false
                    if either argument is a NaN.  We have two different tests for a > b and a >= b
                    and implement a < b and a <= b by changing the order of the arguments. *)
                 val (arg1Code, arg1Value) = codeFPArgument(arg1, precision, context, tailCode)
                 val (arg2Code, arg2Value) = codeFPArgument(arg2, precision, context, arg1Code)
                 
                 val (regArg, opArg, isGeq) =
                     case comparison of
                         BuiltIns.TestGreater      => (arg1Value, arg2Value, false)
                     |   BuiltIns.TestLess         => (arg2Value, arg1Value, false) (* Reversed: a<b is b>a. *)
                     |   BuiltIns.TestGreaterEqual => (arg1Value, arg2Value, true)
                     |   BuiltIns.TestLessEqual    => (arg2Value, arg1Value, true)  (* Reversed: a<=b is b>=a. *)
                     |   _ => raise InternalError "RealComparison: unimplemented operation"
                 
                 (* Load the first operand into a register. *)
                 val (fpReg, loadCode) =
                     case regArg of
                         RegisterArgument fpReg => (fpReg, arg2Code)
                     |   regArg =>
                         let
                             val fpReg = newUReg()
                             val moveOp =
                                 case precision of
                                     BuiltIns.PrecDouble => MoveDouble | BuiltIns.PrecSingle => MoveFloat
                         in
                             (fpReg, BlockSimple(LoadArgument{source=regArg, dest=fpReg, kind=moveOp}) :: arg2Code)
                         end
                 
                 val isDouble = precision = BuiltIns.PrecDouble
                 val target = asTarget destination
 
                 val code =
                 case fpMode of
                     FPModeX87 =>
                     let
                         val testReg1 = newUReg() and testReg2 = newUReg()
                         val ccRef1 = newCCRef() and ccRef2 = newCCRef()
                         val testBits = if isGeq then 0x500 else 0x4500
                     in
                         makeBoolResultRev(JE, ccRef2, target,
                             BlockSimple(ArithmeticFunction{
                                 oper=AND, resultReg=testReg2, operand1=testReg1, operand2=IntegerConstant testBits, ccRef=ccRef2, opSize=OpSize32 }) ::
                             BlockSimple(X87FPGetCondition { ccRef=ccRef1, dest=testReg1 }) ::
                             BlockSimple(X87Compare{arg1=fpReg, arg2=opArg, ccRef=ccRef1, isDouble = isDouble}) ::
                             loadCode)
                     end
                         
                 |   FPModeSSE2 =>
                     let
                         val ccRef1 = newCCRef()
                         val condition = if isGeq then JNB (* >=, <= *) else  JA  (* >, < *)
                     in
                         makeBoolResultRev(condition, ccRef1, target,
                             BlockSimple(SSE2Compare{arg1=fpReg, arg2=opArg, ccRef=ccRef1, isDouble = isDouble}) :: loadCode)
                     end
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeBinaryRev({oper=BuiltIns.FreeCStack, arg1, arg2}, context, _, destination, tailCode) =
             (* Free space on the C stack by storing the address in the argument into the "memory register".
                This is a binary operation that takes the base address and the size.  The base address isn't
                used in this version.  *)
             let
                 val (arg2Code, untaggedLength) =
                     case arg2 of
                         BICConstnt(value, _) => (tailCode, IntegerConstant(Word.toLargeInt(toShort value)) (* Leave untagged *))
                     |   _ =>
                         let
                             val (arg2Code, lengthReg) = codeToPRegRev(arg2, context, tailCode)
                             val lengthUntagged = newUReg()
                         in
                             (
                                 BlockSimple(UntagValue{source=lengthReg, dest=lengthUntagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}) :: arg2Code,
                                 RegisterArgument lengthUntagged
                             )
                         end
 
                 (* Evaluate the first argument for side-effects but discard it. *)
                 val (arg1Code, _, _) = codeToICodeRev(arg1, context, false, Allowed allowDefer, arg2Code)
                 val addrReg = newUReg() and resAddrReg = newUReg()
                 val code =
                     BlockSimple(StoreMemReg{offset=memRegCStackPtr, source=resAddrReg, kind=moveNativeWord}) ::
                     BlockSimple(ArithmeticFunction{oper=ADD, resultReg=resAddrReg, operand1=addrReg, operand2=untaggedLength,
                                     ccRef=newCCRef(), opSize=nativeWordOpSize}) ::
                     BlockSimple(LoadMemReg{offset=memRegCStackPtr, dest=addrReg, kind=moveNativeWord}) ::
                     arg1Code
             in
                 moveIfNotAllowedRev(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
         
         (* Multiply tagged word by a constant.  We're not concerned with overflow so it's possible to use
            various short cuts. *)
         and codeMultiplyConstantWordRev(arg, context, destination, multiplier, tailCode) =
         let
             val target = asTarget destination
             val (argCode, aReg) = codeToPReg(arg, context)
             
             val doMultiply =
                 case multiplier of
                     0w0 => [BlockSimple(LoadArgument{source=IntegerConstant 1, dest=target, kind=movePolyWord})]
                 |   0w1 => [BlockSimple(LoadArgument{source=RegisterArgument aReg, dest=target, kind=movePolyWord})]
                 |   0w2 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~1, index=MemIndex1 aReg, dest=target, opSize=polyWordOpSize})]
                 |   0w3 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~2, index=MemIndex2 aReg, dest=target, opSize=polyWordOpSize})]
                 |   0w4 => [BlockSimple(LoadEffectiveAddress{base=NONE, offset= ~3, index=MemIndex4 aReg, dest=target, opSize=polyWordOpSize})]
                 |   0w5 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~4, index=MemIndex4 aReg, dest=target, opSize=polyWordOpSize})]
                 |   0w8 => [BlockSimple(LoadEffectiveAddress{base=NONE, offset= ~7, index=MemIndex8 aReg, dest=target, opSize=polyWordOpSize})]
                 |   0w9 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~8, index=MemIndex8 aReg, dest=target, opSize=polyWordOpSize})]
                 
                 |   _ =>
                     let
                         val tReg = newUReg()
                         val tagCorrection = Word.toLargeInt multiplier - 1
                         fun getPower2 n =
                         let
                             fun p2 (n, l) =
                                 if n = 0w1 then SOME l
                                 else if Word.andb(n, 0w1) = 0w1 then NONE
                                 else p2(Word.>>(n, 0w1), l+0w1)
                         in
                             if n = 0w0 then NONE else p2(n,0w0)
                         end
                         val multiply =
                             case getPower2 multiplier of
                                 SOME power =>
                                     (* Shift it including the tag. *)
                                     BlockSimple(ShiftOperation{ shift=SHL, resultReg=tReg, operand=aReg,
                                         shiftAmount=IntegerConstant(Word.toLargeInt power), ccRef=newCCRef(), opSize=polyWordOpSize })
                             |   NONE => (* Multiply including the tag. *)
                                     BlockSimple(Multiplication{resultReg=tReg, operand1=aReg,
                                         operand2=IntegerConstant(Word.toLargeInt multiplier), ccRef=newCCRef(), opSize=polyWordOpSize})
                     in
                         [multiply,
                             BlockSimple(ArithmeticFunction{oper=SUB, resultReg=target, operand1=tReg,
                                 operand2=IntegerConstant tagCorrection, ccRef=newCCRef(), opSize=polyWordOpSize})]
                     end
         in
             
             (revApp(argCode @ doMultiply, tailCode), RegisterArgument target, false)
         end
 
         and codeToICodeAllocate({numWords as BICConstnt(length, _), flags as BICConstnt(flagValue, _), initial}, context, _, destination) =
             (* Constant length and flags is used for ref.  We could handle other cases. *)
             if  isShort length andalso isShort flagValue andalso toShort length = 0w1
             then
             let
                 val target = asTarget destination (* Force a different register. *)
                 val vecLength = Word.toInt(toShort length)
                 val flagByte = Word8.fromLargeWord(Word.toLargeWord(toShort flagValue))
                 val memAddr = newPReg() and valueReg = newPReg()
                 fun initialise n =
                     BlockSimple(StoreArgument{ source=RegisterArgument valueReg, offset=n*Word.toInt wordSize, base=memAddr, index=memIndexOrObject, kind=movePolyWord, isMutable=false})
                 val code =
                     codeToICodeTarget(initial, context, false, valueReg) @
                     [BlockSimple(AllocateMemoryOperation{size=vecLength, flags=flagByte, dest=memAddr, saveRegs=[]})] @
                     List.tabulate(vecLength, initialise) @
                     [BlockSimple InitialisationComplete,
                      BlockSimple(LoadArgument{source=RegisterArgument memAddr, dest=target, kind=movePolyWord})]
             in
                 (code, RegisterArgument target, false)
             end
             else (* If it's longer use the full run-time form. *)
                 allocateMemoryVariable(numWords, flags, initial, context, destination)
 
         |   codeToICodeAllocate({numWords, flags, initial}, context, _, destination) =
                 allocateMemoryVariable(numWords, flags, initial, context, destination)
 
 
         and codeToICodeLoad({kind=LoadStoreMLWord _, address}, context, _, destination) =
             let
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeAddress(address, false, context)
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument {source=MemoryLocation memLoc, dest=target, kind=movePolyWord})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreMLByte _, address}, context, _, destination) =
             let
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeAddress(address, true, context)
                 val untaggedResReg = newUReg()
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveByte}),
                      BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize32})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreC8, address}, context, _, destination) =
             let
                 (* Load a byte from C memory.  This is almost exactly the same as LoadStoreMLByte except
                    that the base address is a LargeWord.word value. *)
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w1, context)
                 val untaggedResReg = newUReg()
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveByte}),
                      BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize32})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreC16, address}, context, _, destination) =
             let
                 (* Load a 16-bit value from C memory. *)
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w2, context)
                 val untaggedResReg = newUReg()
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=Move16Bit}),
                      BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize32})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreC32, address}, context, _, destination) =
             let
                 (* Load a 32-bit value from C memory.  If this is 64-bit mode we can tag it but
                    if this is 32-bit mode we need to box it. *)
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w4, context)
                 val untaggedResReg = newUReg()
                 val boxTagCode =
                     if targetArch = Native64Bit
                     then BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize64 (* It becomes 33 bits *)})
                     else BlockSimple(BoxValue{boxKind=BoxLargeWord, source=untaggedResReg, dest=target, saveRegs=[]})
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=Move32Bit}), boxTagCode], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreC64, address}, context, _, destination) =
             let
                 (* Load a 64-bit value from C memory.  This is only allowed in 64-bit mode.  The result
                    is a boxed value. *)
                 val _ = targetArch <> Native32Bit orelse raise InternalError "codeToICodeNonRev: BICLoadOperation LoadStoreC64 in 32-bit"
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w8, context)
                 val untaggedResReg = newUReg()
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=Move64Bit}),
                      BlockSimple(BoxValue{boxKind=BoxLargeWord, source=untaggedResReg, dest=target, saveRegs=[]})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreCFloat, address}, context, _, destination) =
             let
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w4, context)
                 val untaggedResReg = newUReg()
                 val boxFloat = case fpMode of FPModeX87 => BoxX87Double | FPModeSSE2 => BoxSSE2Double
                 (* We need to convert the float into a double. *)
                 val loadArg =
                     case fpMode of
                         FPModeX87 => BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveFloat})
                     |   FPModeSSE2 => BlockSimple(SSE2FPUnary { source=MemoryLocation memLoc, resultReg=untaggedResReg, opc=SSE2UFloatToDouble})
             in
                 (codeBaseIndex @ codeUntag @
                     [loadArg,
                      BlockSimple(BoxValue{boxKind=boxFloat, source=untaggedResReg, dest=target, saveRegs=[]})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreCDouble, address}, context, _, destination) =
             let
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w8, context)
                 val untaggedResReg = newUReg()
                 val boxFloat = case fpMode of FPModeX87 => BoxX87Double | FPModeSSE2 => BoxSSE2Double
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveDouble}),
                      BlockSimple(BoxValue{boxKind=boxFloat, source=untaggedResReg, dest=target, saveRegs=[]})], RegisterArgument target, false)
             end
 
         |   codeToICodeLoad({kind=LoadStoreUntaggedUnsigned, address}, context, _, destination) =
             let
                 val target = asTarget destination
                 val (codeBaseIndex, codeUntag, memLoc) = codeAddress(address, false, context)
                 val untaggedResReg = newUReg()
             in
                 (codeBaseIndex @ codeUntag @
                     [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=movePolyWord}),
                      BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=polyWordOpSize})], RegisterArgument target, false)
             end
 
 
         and codeToICodeStore({kind=LoadStoreMLWord _, address, value}, context, _, destination) =
             let
                 val (sourceCode, source, _) = codeToICode(value, context, false, Allowed allowInMemMove)
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeAddress(address, false, context)
                 val code =
                     codeBaseIndex @ sourceCode @ codeUntag @
                         [BlockSimple(StoreArgument {source=source, base=base, offset=offset, index=index, kind=movePolyWord, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreMLByte _, address, value}, context, _, destination) =
             let
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeAddress(address, true, context)
                 (* We have to untag the value to store. *)
                 val (valueCode, untagValue, valueArg) = codeAsUntaggedByte(value, false, context)
                 val code =
                     codeBaseIndex @ valueCode @ untagValue @ codeUntag @
                     [BlockSimple(StoreArgument {source=valueArg, base=base, offset=offset, index=index, kind=MoveByte, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreC8, address, value}, context, _, destination) =
             let
                 (* Store a byte to C memory.  Almost exactly the same as LoadStoreMLByte. *)
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w1, context)
                 val (valueCode, untagValue, valueArg) = codeAsUntaggedByte(value, false, context)
                 val code =
                     codeBaseIndex @ valueCode @ untagValue @ codeUntag @
                     [BlockSimple(StoreArgument {source=valueArg, base=base, offset=offset, index=index, kind=MoveByte, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreC16, address, value}, context, _, destination) =
             let
                 (* Store a 16-bit value to C memory. *)
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w2, context)
                 (* We don't currently implement 16-bit constant moves so this must always be in a reg. *)
                 val (valueCode, untagValue, valueArg) = codeAsUntaggedToReg(value, false, context)
                 val code =
                     codeBaseIndex @ valueCode @ untagValue @ codeUntag @
                     [BlockSimple(StoreArgument {source=RegisterArgument valueArg, base=base, offset=offset, index=index, kind=Move16Bit, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreC32, address, value}, context, _, destination) =
                 (* Store a 32-bit value.  If this is 64-bit mode we untag it but if this is 32-bit mode we unbox it. *)
             let
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w4, context)
 
                 val code =
                     if targetArch = Native64Bit
                     then
                     let
                         (* We don't currently implement 32-bit constant moves so this must always be in a reg. *)
                         val (valueCode, untagValue, valueArg) = codeAsUntaggedToReg(value, false, context)
                     in
                         codeBaseIndex @ valueCode @ untagValue @ codeUntag @
                         [BlockSimple(StoreArgument {source=RegisterArgument valueArg, base=base, offset=offset, index=index, kind=Move32Bit, isMutable=true})]
                     end
                     else
                     let
                         val (valueCode, valueReg) = codeToPReg(value, context)
                         val valueReg1 = newUReg()
                     in
                         codeBaseIndex @ valueCode @ BlockSimple(LoadArgument{source=wordAt valueReg, dest=valueReg1, kind=Move32Bit}) :: codeUntag @
                         [BlockSimple(StoreArgument {source=RegisterArgument valueReg1, base=base, offset=offset, index=index, kind=Move32Bit, isMutable=true})]
                     end
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreC64, address, value}, context, _, destination) =
             let
                 (* Store a 64-bit value. *)
                 val _ = targetArch <> Native32Bit orelse raise InternalError "codeToICodeNonRev: BICStoreOperation LoadStoreC64 in 32-bit"
                 val (valueCode, valueReg) = codeToPReg(value, context)
                 val valueReg1 = newUReg()
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w8, context)
                 val code =
                     codeBaseIndex @ valueCode @ codeUntag @
                     [BlockSimple(LoadArgument{source=wordAt valueReg, dest=valueReg1, kind=Move64Bit}),
                      BlockSimple(StoreArgument {source=RegisterArgument valueReg1, base=base, offset=offset, index=index, kind=Move64Bit, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreCFloat, address, value}, context, _, destination) =
             let
                 val floatReg = newUReg() and float2Reg = newUReg()
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w4, context)
                 val (valueCode, valueReg) = codeToPReg(value, context)
                 (* If we're using an SSE2 reg we have to convert it from double to single precision. *)
                 val (storeReg, cvtCode) =
                     case fpMode of
                         FPModeSSE2 =>
                             (float2Reg,
                                 [BlockSimple(SSE2FPUnary{opc=SSE2UDoubleToFloat, resultReg=float2Reg, source=RegisterArgument floatReg})])
                     |   FPModeX87 => (floatReg, [])
                 val code =
                     codeBaseIndex @ valueCode @ codeUntag @
                     BlockSimple(LoadArgument{source=wordAt valueReg, dest=floatReg, kind=MoveDouble}) :: cvtCode @
                     [BlockSimple(StoreArgument {source=RegisterArgument storeReg, base=base, offset=offset, index=index, kind=MoveFloat, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreCDouble, address, value}, context, _, destination) =
             let
                 val floatReg = newUReg()
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w8, context)
                 val (valueCode, valueReg) = codeToPReg(value, context)
                 val code =
                     codeBaseIndex @ valueCode @ codeUntag @
                     [BlockSimple(LoadArgument{source=wordAt valueReg, dest=floatReg, kind=MoveDouble}),
                      BlockSimple(StoreArgument {source=RegisterArgument floatReg, base=base, offset=offset, index=index, kind=MoveDouble, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
         |   codeToICodeStore({kind=LoadStoreUntaggedUnsigned, address, value}, context, _, destination) =
             let
                 (* We have to untag the value to store. *)
                 val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeAddress(address, false, context)
                 (* See if it's a constant.  This is frequently used to set the last word of a string to zero. *)
                 (* We have to be a bit more careful on the X86.  We use moves to store constants that
                    can include addresses.  To avoid problems we only use a move if the value is
                    zero or odd and so looks like a tagged value. *)
                 val storeAble =
                     case value of
                         BICConstnt(value, _) =>
                             if not(isShort value)
                             then NONE
                             else
                             let
                                 val ival = Word.toLargeIntX(toShort value)
                             in
                                 if targetArch = Native64Bit
                                 then if is32bit ival then SOME ival else NONE
                                 else if ival = 0 orelse ival mod 2 = 1 then SOME ival else NONE
                             end
                     |   _ => NONE
                 val (storeVal, valCode) =
                     case storeAble of
                         SOME value => (IntegerConstant value (* Leave untagged *), [])
                     |   NONE =>
                         let
                             val valueReg = newPReg() and valueReg1 = newUReg()
                         in
                             (RegisterArgument valueReg1,
                                 codeToICodeTarget(value, context, false, valueReg) @
                                 [BlockSimple(UntagValue{dest=valueReg1, source=valueReg, isSigned=false, cache=NONE, opSize=polyWordOpSize})])
                         end
                 val code =
                     codeBaseIndex @ valCode @ codeUntag @
                     [BlockSimple(StoreArgument {source=storeVal, base=base, offset=offset, index=index, kind=movePolyWord, isMutable=true})]
             in
                 moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
             end
 
 
         and codeToICodeBlock({kind=BlockOpCompareByte, sourceLeft, destRight, length}, context, _, destination) =
             let
                 (* This is effectively a big-endian comparison since we compare the bytes until we
                    find an inequality. *)
                 val target = asTarget destination
                 val mergeResult = newMergeReg()
                 val vec1Reg = newUReg() and vec2Reg = newUReg()
                 val (leftCode, leftUntag, {base=leftBase, offset=leftOffset, index=leftIndex, ...}) =
                     codeAddress(sourceLeft, true, context)
                 val (rightCode, rightUntag, {base=rightBase, offset=rightOffset, index=rightIndex, ...}) =
                     codeAddress(destRight, true, context)
                 val ccRef = newCCRef()
                 val labLess = newLabel() and labGreater = newLabel() and exitLab = newLabel()
                 val labNotLess = newLabel() and labNotGreater = newLabel()
                 
                 val (lengthCode, lengthUntag, lengthArg) = codeAsUntaggedToReg(length, false (* unsigned *), context)
 
                 val code =
                     leftCode @ rightCode @ lengthCode @
                     leftUntag @ [BlockSimple(loadAddress{base=leftBase, offset=leftOffset, index=leftIndex, dest=vec1Reg})] @
                     rightUntag @ [BlockSimple(loadAddress{base=rightBase, offset=rightOffset, index=rightIndex, dest=vec2Reg})] @
                     lengthUntag @
                     [BlockSimple(CompareByteVectors{ vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lengthArg, ccRef=ccRef }),
                      (* N.B. These are unsigned comparisons. *)
                      BlockFlow(Conditional{ ccRef=ccRef, condition=JB, trueJump=labLess, falseJump=labNotLess }),
                      BlockLabel labNotLess,
                      BlockFlow(Conditional{ ccRef=ccRef, condition=JA, trueJump=labGreater, falseJump=labNotGreater }),
                      BlockLabel labNotGreater,
                      BlockSimple(LoadArgument{ source=IntegerConstant(tag 0), dest=mergeResult, kind=movePolyWord }),
                      BlockFlow(Unconditional exitLab),
                      BlockLabel labLess,
                      BlockSimple(LoadArgument{ source=IntegerConstant(tag ~1), dest=mergeResult, kind=movePolyWord }),
                      BlockFlow(Unconditional exitLab),
                      BlockLabel labGreater,
                      BlockSimple(LoadArgument{ source=IntegerConstant(tag 1), dest=mergeResult, kind=movePolyWord }),
                      BlockLabel exitLab,
                      BlockSimple(LoadArgument{ source=RegisterArgument mergeResult, dest=target, kind=movePolyWord })]
             in
                 (code, RegisterArgument target, false)
             end
 
         |   codeToICodeBlock({kind=BlockOpMove {isByteMove}, sourceLeft, destRight, length}, context, _, destination) =
             let
                 (* Moves of 4 or 8 bytes can be done as word moves provided the alignment is correct.
                    Although this will work for strings it is really to handle moves between SysWord and
                    volatileRef in Foreign.Memory.  Moves of 1, 2 or 3 bytes or words are converted into a
                    sequence of byte or word moves. *)
                 val isWordMove =
                     case (isByteMove, length) of
                         (true, BICConstnt(l, _)) =>
                         if not (isShort l) orelse (toShort l <> 0w4 andalso toShort l <> nativeWordSize)
                         then NONE
                         else
                         let
                             val leng = Word.toInt(toShort l)
                             val moveKind =
                                 if toShort l = nativeWordSize
                                 then moveNativeWord
                                 else Move32Bit
                             val isLeftAligned =
                                 case sourceLeft of
                                     {index=NONE, offset:int, ...} => offset mod leng = 0
                                 |   _ => false
                             val isRightAligned =
                                 case destRight of
                                     {index=NONE, offset: int, ...} => offset mod leng = 0
                                 |   _ => false
                         in
                             if isLeftAligned andalso isRightAligned
                             then SOME moveKind
                             else NONE
                         end
                     |   _ => NONE
             in
                 case isWordMove of
                     SOME moveKind =>
                     let
                         val (leftCode, leftUntag, leftMem) =
                             codeAddress(sourceLeft, isByteMove, context)
                         val (rightCode, rightUntag, {base, offset, index, ...}) =
                             codeAddress(destRight, isByteMove, context)
                         val untaggedResReg = newUReg()
                         val code =
                             leftCode @ rightCode @ leftUntag @ rightUntag @
                             [BlockSimple(LoadArgument { source=MemoryLocation leftMem, dest=untaggedResReg, kind=moveKind}),
                              BlockSimple(StoreArgument
                                 {source=RegisterArgument untaggedResReg, base=base, offset=offset, index=index, kind=moveKind, isMutable=true})]
                     in
                         moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
                     end
                 |   _ =>
                     let
                         val vec1Reg = newUReg() and vec2Reg = newUReg()
                         val (leftCode, leftUntag, {base=leftBase, offset=leftOffset, index=leftIndex, ...}) =
                             codeAddress(sourceLeft, isByteMove, context)
                         val (rightCode, rightUntag, {base=rightBase, offset=rightOffset, index=rightIndex, ...}) =
                             codeAddress(destRight, isByteMove, context)
                         val (lengthCode, lengthUntag, lengthArg) = codeAsUntaggedToReg(length, false (* unsigned *), context)
                         val code =
                             leftCode @ rightCode @ lengthCode @
                             leftUntag @ [BlockSimple(loadAddress{base=leftBase, offset=leftOffset, index=leftIndex, dest=vec1Reg})] @
                             rightUntag @ [BlockSimple(loadAddress{base=rightBase, offset=rightOffset, index=rightIndex, dest=vec2Reg})] @
                             lengthUntag @
                             [BlockSimple(BlockMove{ srcAddr=vec1Reg, destAddr=vec2Reg, length=lengthArg, isByteMove=isByteMove })]
                     in
                         moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0))
                     end
             end
 
         |   codeToICodeBlock({kind=BlockOpEqualByte, ...}, _, _, _) =
                 (* TODO: Move the code from codeToICodeRev.  However, that is already reversed. *)
                 raise InternalError "codeToICodeBlock - BlockOpEqualByte" (* Already done *)
 
         and codeConditionRev(condition, context, jumpOn, jumpLabel, tailCode) =
            (* General case.  Load the value into a register and compare it with 1 (true) *)
             let
                 val ccRef = newCCRef()
                 val (testCode, testReg) = codeToPRegRev(condition, context, tailCode)
                 val noJumpLabel = newLabel()
             in
                 BlockLabel noJumpLabel ::
                 BlockFlow(Conditional{ccRef=ccRef,
                            condition=if jumpOn then JE else JNE, trueJump=jumpLabel, falseJump=noJumpLabel}) ::
                 BlockSimple(CompareLiteral{arg1=RegisterArgument testReg, arg2=tag 1, opSize=OpSize32, ccRef=ccRef}) ::
                 testCode
             end
 
         (* The fixed precision functions are also used for arbitrary precision but instead of raising Overflow we
            need to jump to the code that handles the long format. *)
         and codeFixedPrecisionArith(BuiltIns.ArithAdd, arg1, BICConstnt(value, _), context, target, onOverflow) =
             let
                 val ccRef = newCCRef()
                 (* If the argument is a constant we can subtract the tag beforehand.
                    This should always be a tagged value if the type is correct.  However it's possible for it not to
                    be if we have an arbitrary precision value.  There will be a run-time check that the value is
                    short and so this code will never be executed.  It will generally be edited out by the higher
                    level be we can't rely on that.  Because it's never executed we can just put in zero. *)
                 val constVal =
                     if isShort value
                     then semitag(Word.toLargeIntX(toShort value))
                     else 0
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
             in
                 arg1Code @
                     [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef=ccRef, opSize=polyWordOpSize})] @
                     onOverflow ccRef
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithAdd, BICConstnt(value, _), arg2, context, target, onOverflow) =
             let
                 val ccRef = newCCRef()
                 (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *)
                 val constVal =
                     if isShort value
                     then semitag(Word.toLargeIntX(toShort value))
                     else 0
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
             in
                 arg2Code @
                     [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg2, operand2=IntegerConstant constVal, ccRef=ccRef, opSize=polyWordOpSize})] @
                     onOverflow ccRef
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithAdd, arg1, arg2, context, target, onOverflow) =
             let
                 val aReg3 = newPReg() and ccRef = newCCRef()
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
             in
                 arg1Code @ arg2Code @
                     (* Subtract the tag bit from the second argument, do the addition and check for overflow. *)
                     (* TODO: We should really do the detagging in the transform phase.  It can make a better choice of
                        the argument if one of the arguments is already untagged or if we have a constant argument. *)
                     [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=aReg1, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg3, operand2=RegisterArgument aReg2, ccRef=ccRef, opSize=polyWordOpSize})] @
                     onOverflow ccRef
             end
 
             (* Subtraction.  We can handle the special case of the second argument being a constant but not the first. *)
         |   codeFixedPrecisionArith(BuiltIns.ArithSub, arg1, BICConstnt(value, _), context, target, onOverflow) =
             let
                 val ccRef = newCCRef()
                 (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *)
                 val constVal =
                     if isShort value
                     then semitag(Word.toLargeIntX(toShort value))
                     else 0
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
             in
                 arg1Code @
                     [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef=ccRef, opSize=polyWordOpSize})] @
                     onOverflow ccRef
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithSub, arg1, arg2, context, target, onOverflow) =
             let
                 val aReg3 = newPReg()
                 val ccRef = newCCRef()
                 val (arg1Code, aReg1) = codeToPReg(arg1, context)
                 val (arg2Code, aReg2) = codeToPReg(arg2, context)
             in
                 arg1Code @ arg2Code @
                     (* Do the subtraction, test for overflow and afterwards add in the tag bit. *)
                     [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=aReg1, operand2=RegisterArgument aReg2, ccRef=ccRef, opSize=polyWordOpSize})] @
                     onOverflow ccRef @
                     [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg3, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithMult, arg1, BICConstnt(value, _), context, target, onOverflow) =
             let
                 val aReg = newPReg() and argUntagged = newUReg()
                 and resUntagged = newUReg()
                 val mulCC = newCCRef()
                 (* Is it better to untag the constant or the register argument? *)
                 val constVal = if isShort value then Word.toLargeIntX(toShort value) else 0
             in
                 codeToICodeTarget(arg1, context, false, aReg) @
                     [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=argUntagged, operand1=aReg, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(Multiplication{resultReg=resUntagged, operand1=argUntagged, operand2=IntegerConstant constVal, ccRef=mulCC, opSize=polyWordOpSize} )] @
                      onOverflow mulCC @
                      [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithMult, BICConstnt(value, _), arg2, context, target, onOverflow) =
             let
                 val aReg = newPReg() and argUntagged = newUReg()
                 and resUntagged = newUReg()
                 val mulCC = newCCRef()
                 (* Is it better to untag the constant or the register argument? *)
                 val constVal = if isShort value then Word.toLargeIntX(toShort value) else 0
             in
                 codeToICodeTarget(arg2, context, false, aReg) @
                     [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=argUntagged, operand1=aReg, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(Multiplication{resultReg=resUntagged, operand1=argUntagged, operand2=IntegerConstant constVal, ccRef=mulCC, opSize=polyWordOpSize} )] @
                      onOverflow mulCC @
                      [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithMult, arg1, arg2, context, target, onOverflow) =
             let
                 val aReg1 = newPReg() and aReg2 = newPReg() and arg1Untagged = newUReg()
                 and arg2Untagged = newUReg() and resUntagged = newUReg()
                 val mulCC = newCCRef()
                 (* This is almost the same as the word operation except we use a signed shift and check for overflow. *)
             in
                 codeToICodeTarget(arg1, context, false, aReg1) @ codeToICodeTarget(arg2, context, false, aReg2) @
                     (* Shift one argument and subtract the tag from the other.  It's possible this could be reordered
                        if we have a value that is already untagged. *)
                     [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=true (* Signed shift here. *), cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(ArithmeticFunction{oper=SUB, resultReg=arg2Untagged, operand1=aReg2, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}),
                      BlockSimple(Multiplication{resultReg=resUntagged, operand1=arg1Untagged, operand2=RegisterArgument arg2Untagged, ccRef=mulCC, opSize=polyWordOpSize} )] @
                      onOverflow mulCC @
                      [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})]
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithQuot, arg1, arg2, context, target, _) =
             let
                 val aReg1 = newPReg() and aReg2 = newPReg()
                 val arg1Untagged = newUReg() and arg2Untagged = newUReg()
                 val quotient = newUReg() and remainder = newUReg()
             in
                 codeToICodeTarget(arg1, context, false, aReg1) @ codeToICodeTarget(arg2, context, false, aReg2) @
                     (* Shift both of the arguments to remove the tags.  We don't test for zero here - that's done explicitly. *)
                     [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(Division { isSigned = true, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged,
                                 quotient=quotient, remainder=remainder, opSize=polyWordOpSize }),
                      BlockSimple(TagValue { source=quotient, dest=target, isSigned=true, opSize=polyWordOpSize})]
             end
 
         |   codeFixedPrecisionArith(BuiltIns.ArithRem, arg1, arg2, context, target, _) =
             let
                 (* Identical to Quot except that the result is the remainder. *)
                 val aReg1 = newPReg() and aReg2 = newPReg()
                 val arg1Untagged = newUReg() and arg2Untagged = newUReg()
                 val quotient = newUReg() and remainder = newUReg()
             in
                 codeToICodeTarget(arg1, context, false, aReg1) @ codeToICodeTarget(arg2, context, false, aReg2) @
                     (* Shift both of the arguments to remove the tags. *)
                     [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}),
                      BlockSimple(Division { isSigned = true, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged,
                                 quotient=quotient, remainder=remainder, opSize=polyWordOpSize }),
                      BlockSimple(TagValue { source=remainder, dest=target, isSigned=true, opSize=polyWordOpSize})]
             end
 
         |   codeFixedPrecisionArith(_, _, _, _, _, _) =
                 raise InternalError "codeToICode: FixedPrecisionArith - unimplemented operation"
 
         (* Generate code for floating point arguments where one of the arguments must be
            in a register.  If the first argument is in a register use that, if the second is
            in a register and it's commutative use that otherwise load the first argument
            into a register. *)
         and codeFPBinaryArgsRev(arg1, arg2, precision, commutative, context, tailCode) =
         let
             val (arg1Code, arg1Value) = codeFPArgument(arg1, precision, context, tailCode)
             val (arg2Code, arg2Value) = codeFPArgument(arg2, precision, context, arg1Code)
         in
             case (arg1Value, arg2Value, commutative) of
                 (RegisterArgument fpReg, _, _) => (arg2Code, fpReg, arg2Value)
 
             |   (_, RegisterArgument fpReg, Commutative) => (arg2Code, fpReg, arg1Value)
 
             |   (arg1Val, _, _) =>
                 let
                     val fpReg = newUReg()
                     val moveOp =
                         case precision of
                             BuiltIns.PrecDouble => MoveDouble | BuiltIns.PrecSingle => MoveFloat
                 in
                     (BlockSimple(LoadArgument{source=arg1Val, dest=fpReg, kind=moveOp}) :: arg2Code, fpReg, arg2Value)
                 end
         end
 
         (* Generate code to evaluate a floating point argument.  The aim of this code is to avoid
            the overhead of untagging a short-precision floating point value in memory. *)
         and codeFPArgument(BICConstnt(value, _), _, _, tailCode) =
             let
                 val argVal =
                     (* Single precision constants in 64-bit mode are represented by the value
                        shifted left 32 bits.  A word is shifted left one bit so the result is 0w31. *)
                     if isShort value
                     then IntegerConstant(Word.toLargeInt(Word.>>(toShort value, 0w31)))
                     else AddressConstant value
             in
                 (tailCode, argVal)
             end
 
         |   codeFPArgument(arg, precision, context, tailCode) =
         (
             case (precision, wordSize) of
                 (BuiltIns.PrecSingle, 0w8) =>
                     (* If this is a single precision value and the word size is 8 the values are tagged.
                        If it is memory we can load the value directly from the high-order word. *)
                 let
                     val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true }
                     val (code, result, _) = codeToICodeRev(arg, context, false, Allowed memOrReg, tailCode)
                 in
                     case result of
                         RegisterArgument argReg =>
                         let
                             val fpReg = newUReg()
                         in
                             (BlockSimple(UntagFloat{source=RegisterArgument argReg, dest=fpReg, cache=NONE}) :: code,
                                 RegisterArgument fpReg)
                         end
                     |   MemoryLocation{offset, base, index, ...} =>
                             (code, MemoryLocation{offset=offset+4, base=base, index=index, cache=NONE})
 
                     |   _ => raise InternalError "codeFPArgument"
                 end
 
             |   _ =>
                 (* Otherwise the value is boxed. *)
                 let
                     val (argCode, argReg) = codeToPRegRev(arg, context, tailCode)
                 in
                     (argCode, wordAt argReg)
                 end
         )
 
         (* Code an address.  The index is optional. *)
         and codeAddressRev({base, index=SOME index, offset}, true (* byte move *), context, tailCode) =
             let
                 (* Byte address with index.  The index needs to be untagged. *)
                 val indexReg1 = newUReg()
                 val (codeBase, baseReg) = codeToPRegRev(base, context, tailCode)
                 val (codeIndex, indexReg) = codeToPRegRev(index, context, codeBase)
                 val untagCode = [BlockSimple(UntagValue{dest=indexReg1, source=indexReg, isSigned=false, cache=NONE, opSize=polyWordOpSize})]
                 val (codeLoadAddr, realBase) =
                     if targetArch = ObjectId32Bit
                     then
                     let
                         val addrReg = newUReg()
                     in
                         ([BlockSimple(LoadEffectiveAddress{ base=SOME baseReg, offset=0, index=ObjectIndex, dest=addrReg, opSize=nativeWordOpSize})], addrReg)
                     end
                     else ([], baseReg)
                 val memResult = {base=realBase, offset=offset, index=MemIndex1 indexReg1, cache=NONE}
             in
                 (codeLoadAddr @ codeIndex, untagCode, memResult)
             end
 
         |   codeAddressRev({base, index=SOME index, offset}, false (* word move *), context, tailCode) =
             let
                 (* Word address with index.  We can avoid untagging the index by adjusting the
                    multiplier and offset *) 
                 val (codeBase, baseReg) = codeToPRegRev(base, context, tailCode)
                 val (codeIndex, indexReg) = codeToPRegRev(index, context, codeBase)
                 val (codeLoadAddr, realBase) =
                     if targetArch = ObjectId32Bit
                     then
                     let
                         val addrReg = newUReg()
                     in
                         ([BlockSimple(LoadEffectiveAddress{ base=SOME baseReg, offset=0, index=ObjectIndex, dest=addrReg, opSize=nativeWordOpSize})], addrReg)
                     end
                     else ([], baseReg)
                 val memResult =
                     if wordSize = 0w8
                     then {base=realBase, offset=offset-4, index=MemIndex4 indexReg, cache=NONE}
                     else {base=realBase, offset=offset-2, index=MemIndex2 indexReg, cache=NONE}
             in
                 (codeLoadAddr @ codeIndex, [], memResult)
             end
 
         |   codeAddressRev({base, index=NONE, offset}, _, context, tailCode) =
             let
                 val (codeBase, baseReg) = codeToPRegRev(base, context, tailCode)
                 val memResult = {offset=offset, base=baseReg, index=memIndexOrObject, cache=NONE}
             in
                 (codeBase, [], memResult)
             end
 
         and codeAddress(addr, isByte, context) =
         let
             val (code, untag, res) = codeAddressRev(addr, isByte, context, [])
         in
             (List.rev code, untag, res)
         end
 
         (* C-memory operations are slightly different.  The base address is a LargeWord.word value.
            The index is a byte index so may have to be untagged. *)
         and codeCAddress({base, index=SOME index, offset}, 0w1, context) =
             let
                 (* Byte address with index.  The index needs to be untagged. *)
                 val untaggedBaseReg = newUReg()
                 val (codeBase, baseReg) = codeToPReg(base, context)
                 and (codeIndex, indexReg) = codeToPReg(index, context)
                 (* The index needs to untagged and, if necessary, sign-extended to the native word size. *)
                 val (untagCode, sxReg) =
                     if targetArch = ObjectId32Bit
                     then
                     let
                         val sReg1 = newUReg() and sReg2 = newUReg()
                     in
                         ([BlockSimple(SignExtend32To64{dest=sReg1, source=RegisterArgument indexReg}),
                           BlockSimple(UntagValue{dest=sReg2, source=sReg1, isSigned=true, cache=NONE, opSize=nativeWordOpSize})], sReg2)
                     end
                     else
                     let
                         val sReg = newUReg()
                     in
                         ([BlockSimple(UntagValue{dest=sReg, source=indexReg, isSigned=true, cache=NONE, opSize=nativeWordOpSize})], sReg)
                     end
                 val loadCode = [BlockSimple(LoadArgument{source=wordAt baseReg, dest=untaggedBaseReg, kind=moveNativeWord})]
                 val memResult = {base=untaggedBaseReg, offset=offset, index=MemIndex1 sxReg, cache=NONE}
             in
                 (codeBase @ codeIndex, loadCode @ untagCode, memResult)
             end
 
         |   codeCAddress({base, index=SOME index, offset}, size, context) =
             let
                 (* Non-byte address with index.  By using an appropriate multiplier we can avoid
                    having to untag the index. *)
                 val untaggedBaseReg = newUReg()
                 val (codeBase, baseReg) = codeToPReg(base, context)
                 and (codeIndex, indexReg) = codeToPReg(index, context)
                 (* The index is signed i.e. negative index values are legal.  We don't have
                    to do anything special on the native code versions but on 32-in-64 we need
                    to sign extend. *)
                 val (untagCode, sxReg) =
                     if targetArch = ObjectId32Bit
                     then
                     let
                         val sReg = newUReg()
                     in
                         ([BlockSimple(SignExtend32To64{source=RegisterArgument indexReg, dest=sReg})], sReg)
                     end
                     else ([], indexReg)
                 val loadCode = [BlockSimple(LoadArgument{source=wordAt baseReg, dest=untaggedBaseReg, kind=moveNativeWord})]
                 val memResult =
                     case size of
                         0w2 => {base=untaggedBaseReg, offset=offset-1, index=MemIndex1 sxReg, cache=NONE}
                     |   0w4 => {base=untaggedBaseReg, offset=offset-2, index=MemIndex2 sxReg, cache=NONE}
                     |   0w8 => {base=untaggedBaseReg, offset=offset-4, index=MemIndex4 sxReg, cache=NONE}
                     |   _ => raise InternalError "codeCAddress: unknown size"
             in
                 (codeBase @ codeIndex, loadCode @ untagCode, memResult)
             end
 
         |   codeCAddress({base, index=NONE, offset}, _, context) =
             let
                 val untaggedBaseReg = newUReg()
                 val (codeBase, baseReg) = codeToPReg(base, context)
                 val untagCode = [BlockSimple(LoadArgument{source=wordAt baseReg, dest=untaggedBaseReg, kind=moveNativeWord})]
                 val memResult = {offset=offset, base=untaggedBaseReg, index=NoMemIndex, cache=NONE}
             in
                 (codeBase, untagCode, memResult)
             end
 
         (* Return an untagged value.  If we have a constant just return it.  Otherwise
            return the code to evaluate the argument, the code to untag it and the
            reference to the untagged register. *)
         and codeAsUntaggedToRegRev(BICConstnt(value, _), isSigned, _, tailCode) =
             let
                 (* Should always be short except for unreachable code. *)
                 val untagReg = newUReg()
                 val cval = if isShort value then toShort value else 0w0
                 val cArg = IntegerConstant(if isSigned then Word.toLargeIntX cval else Word.toLargeInt cval) (* Don't tag *)
                 val untag = [BlockSimple(LoadArgument{source=cArg, dest=untagReg, kind=movePolyWord})]
             in
                 (tailCode, untag, untagReg) (* Don't tag. *)
             end
         |   codeAsUntaggedToRegRev(arg, isSigned, context, tailCode) =
             let
                 val untagReg = newUReg()
                 val (code, srcReg) = codeToPRegRev(arg, context, tailCode)
                 val untag = [BlockSimple(UntagValue{source=srcReg, dest=untagReg, isSigned=isSigned, cache=NONE, opSize=polyWordOpSize})]
             in
                 (code, untag, untagReg)
             end
 
         and codeAsUntaggedToReg(arg, isSigned, context) =
         let
             val (code, untag, untagReg) = codeAsUntaggedToRegRev(arg, isSigned, context, [])
         in
             (List.rev code, untag, untagReg)
         end
 
         (* Return the argument as an untagged value.  We separate evaluating the argument from
            untagging because we may have to evaluate other arguments and that could involve a
            function call and we can't save the value to the stack after we've untagged it.
            Currently this is only used for byte values but we may have to be careful if
            we use it for word values on the X86.  Moving an untagged value into a register
            might look like loading a constant address. *)
         and codeAsUntaggedByte(BICConstnt(value, _), isSigned, _) =
             let
                 val cval = if isShort value then toShort value else 0w0
                 val cArg = IntegerConstant(if isSigned then Word.toLargeIntX cval else Word.toLargeInt cval) (* Don't tag *)
             in
                 ([], [], cArg)
             end
         |   codeAsUntaggedByte(arg, isSigned, context) =
             let
                 val untagReg = newUReg()
                 val (code, argReg) = codeToPReg(arg, context)
                 val untag = [BlockSimple(UntagValue{source=argReg, dest=untagReg, isSigned=isSigned, cache=NONE, opSize=OpSize32})]
             in
                 (code, untag, RegisterArgument untagReg)
             end
 
         (* Allocate memory.  This is used both for true variable length cells and also
            for longer constant length cells. *)
         and allocateMemoryVariable(numWords, flags, initial, context, destination) =
         let
             val target = asTarget destination
             (* With the exception of flagReg all these registers are modified by the code.
                So, we have to copy the size value into a new register. *)
             val sizeReg = newPReg() and initReg = newPReg()
             val sizeReg2 = newPReg()
             val untagSizeReg = newUReg() and initAddrReg = newPReg() and allocReg = newPReg()
             val sizeCode = codeToICodeTarget(numWords, context, false, sizeReg)
             and (flagsCode, flagUntag, flagArg) = codeAsUntaggedByte(flags, false, context)
             (* We're better off deferring the initialiser if possible.  If the value is
                a constant we don't have to save it. *)
             val (initCode, initResult, _) = codeToICode(initial, context, false, Allowed allowDefer)
          in
             (sizeCode @ flagsCode @ initCode
               @
              [(* We need to copy the size here because AllocateMemoryVariable modifies the
                  size in order to store the length word.  This is unfortunate especially as
                  we're going to untag it anyway. *)
               BlockSimple(LoadArgument{source=RegisterArgument sizeReg, dest=sizeReg2, kind=movePolyWord}),
               BlockSimple(AllocateMemoryVariable{size=sizeReg, dest=allocReg, saveRegs=[]})] @
               flagUntag @
               [BlockSimple(StoreArgument{ source=flagArg, base=allocReg, offset= ~1, index=memIndexOrObject, kind=MoveByte, isMutable=false}),
               (* We need to copy the address here because InitialiseMem modifies all its arguments. *)
               BlockSimple(
                 if targetArch = ObjectId32Bit
                 then LoadEffectiveAddress{ base=SOME allocReg, offset=0, index=ObjectIndex, dest=initAddrReg, opSize=nativeWordOpSize}
                 else LoadArgument{source=RegisterArgument allocReg, dest=initAddrReg, kind=movePolyWord}),
               BlockSimple(UntagValue{source=sizeReg2, dest=untagSizeReg, isSigned=false, cache=NONE, opSize=polyWordOpSize}),
               BlockSimple(LoadArgument{source=initResult, dest=initReg, kind=movePolyWord}),
               BlockSimple(InitialiseMem{size=untagSizeReg, init=initReg, addr=initAddrReg}),
               BlockSimple InitialisationComplete,
               BlockSimple(LoadArgument{source=RegisterArgument allocReg, dest=target, kind=movePolyWord})], RegisterArgument target, false)
         end
 
         (*Turn the codetree structure into icode. *)
         val bodyContext = {loopArgs=NONE, stackPtr=0, currHandler=NONE, overflowBlock=ref NONE}
         val (bodyCode, _, bodyExited) =
             codeToICodeRev(body, bodyContext, true, SpecificPReg resultTarget, beginInstructions)
         val icode = if bodyExited then bodyCode else returnInstruction(bodyContext, resultTarget, bodyCode)
         
         (* Turn the icode list into basic blocks.  The input list is in reverse so as part of
            this we reverse the list. *)
         local
             val resArray = Array.array(!labelCounter, BasicBlock{ block=[], flow=ExitCode })
             
             fun createEntry (blockNo, block, flow) =
                 Array.update(resArray, blockNo, BasicBlock{ block=block, flow=flow})
             
             fun splitCode([], _, _) = 
                 (* End of code.  We should have had a BeginFunction. *)
                 raise InternalError "splitCode - no begin"
             
             |   splitCode(BlockBegin args :: _, sinceLabel, flow) =
                     (* Final instruction.  Create the initial block and exit. *)
                     createEntry(0, BeginFunction args ::sinceLabel, flow)
             
             |   splitCode(BlockSimple instr :: rest, sinceLabel, flow) =
                     splitCode(rest, instr :: sinceLabel, flow)
 
             |   splitCode(BlockLabel label :: rest, sinceLabel, flow) =
                     (* Label - finish this block and start another. *)
                 (
                     createEntry(label, sinceLabel, flow);
                     (* Default to a jump to this label.  That is used if we have
                        assumed a drop-through. *)
                     splitCode(rest, [], Unconditional label)
                 )
             
             |   splitCode(BlockExit instr :: rest, _, _) =
                     splitCode(rest, [instr], ExitCode)
 
             |   splitCode(BlockFlow flow :: rest, _, _) =
                     splitCode(rest, [], flow)
             
             |   splitCode(BlockRaiseAndHandle(instr, handler) :: rest, _, _) =
                     splitCode(rest, [instr], UnconditionalHandle handler)
 
             |   splitCode(BlockOptionalHandle{call, handler, label} :: rest, sinceLabel, flow) =
                 let
                     (* A function call within a handler.  This could go to the handler but
                        if there is no exception will go to the next instruction.
                        Also includes JumpLoop since the stack check could result in an
                        Interrupt exception. *)
                 in
                     createEntry(label, sinceLabel, flow);
                     splitCode(rest, [call], ConditionalHandle{handler=handler, continue=label})
                 end
 
         in
             val () = splitCode(icode, [], ExitCode)
             val resultVector = Array.vector resArray
         end
       
         open ICodeTransform
         
         val pregProperties = Vector.fromList(List.rev(! pregPropList))
     in
         codeICodeFunctionToX86{blocks = resultVector, functionName = name, pregProps = pregProperties,
             ccCount= ! ccRefCounter, debugSwitches = debugSwitches, resultClosure = resultClosure,
             profileObject = profileObject}
     end
 
     fun gencodeLambda(lambda, debugSwitches, closure) =
     let
         open Debug Universal
         (*val debugSwitches =
             [tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)),
             tagInject assemblyCodeTag true] @ debugSwitches*)
     in
         codeFunctionToX86(lambda, debugSwitches, closure)
     end
     
     structure Foreign = X86Foreign
     
     structure Sharing =
     struct
         type backendIC = backendIC
         and  bicLoadForm = bicLoadForm
         and argumentType = argumentType
         and closureRef = closureRef
     end
     
 end;
diff --git a/mlsource/MLCompiler/INITIALISE_.ML b/mlsource/MLCompiler/INITIALISE_.ML
index e9d87c18..f6c7ffac 100644
--- a/mlsource/MLCompiler/INITIALISE_.ML
+++ b/mlsource/MLCompiler/INITIALISE_.ML
@@ -1,2079 +1,2074 @@
 (*
     Copyright (c) 2000
         Cambridge University Technical Services Limited
 
     Updated David C.J. Matthews 2008-9, 2012, 2013, 2015-21
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     License version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public License for more details.
     
     You should have received a copy of the GNU Lesser General Public
     License along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (*
     Title:      Initialise ML Global Declarations.
     Author:     Dave Matthews,Cambridge University Computer Laboratory
     Copyright   Cambridge University 1985
 *)
 
 functor INITIALISE_ (
 
     structure LEX: LEXSIG
     structure TYPETREE : TYPETREESIG
     structure STRUCTVALS : STRUCTVALSIG
     structure VALUEOPS : VALUEOPSSIG
     structure CODETREE : CODETREE
     structure EXPORTTREE: EXPORTTREESIG
     structure DATATYPEREP: DATATYPEREPSIG
     structure TYPEIDCODE: TYPEIDCODESIG
     structure MAKE: MAKESIG
     structure ADDRESS : AddressSig
     structure DEBUG: DEBUG
     structure DEBUGGER : DEBUGGER
     structure PRETTY : PRETTY
     structure PRINTTABLE: PRINTTABLESIG
 
     structure MISC :
     sig
       val unescapeString : string -> string
       exception Conversion of string;     (* string to int conversion failure *)
     end
 
     structure VERSION:
     sig
        val compilerVersion: string
        val versionNumber: int
     end
 
     structure UNIVERSALTABLE:
     sig
         type universal = Universal.universal
         type univTable
         type 'a tag = 'a Universal.tag
 
         val univLookup: univTable * 'a tag * string -> 'a option
 
         val fold: (string * universal * 'a -> 'a) -> 'a -> univTable -> 'a
     end
 
     sharing STRUCTVALS.Sharing = VALUEOPS.Sharing = TYPETREE.Sharing = EXPORTTREE.Sharing
             = PRETTY.Sharing = CODETREE.Sharing = MAKE.Sharing = ADDRESS = DATATYPEREP.Sharing
             = TYPEIDCODE.Sharing = DEBUGGER.Sharing = LEX.Sharing = PRINTTABLE.Sharing
 
     sharing STRUCTVALS.Sharing = VALUEOPS.Sharing = TYPETREE.Sharing = EXPORTTREE.Sharing
             = PRETTY.Sharing = CODETREE.Sharing = MAKE.Sharing = ADDRESS = DATATYPEREP.Sharing
             = TYPEIDCODE.Sharing = DEBUGGER.Sharing = LEX.Sharing = PRINTTABLE.Sharing
             = UNIVERSALTABLE
 ) : 
 
 sig
   type gEnv
   val initGlobalEnv : {globalTable  : gEnv, intIsArbitraryPrecision: bool } -> unit
 end =
 
 struct
     open STRUCTVALS;
     open TYPETREE
     open VALUEOPS;
     open CODETREE;
     open ADDRESS;
     open MAKE;
     open MISC;
     open EXPORTTREE
     open DATATYPEREP
 
     val intInfType = mkTypeConstruction ("int",     intInfConstr,    [], [])
     and realType   = mkTypeConstruction ("real",    realConstr,   [], [])
     and charType   = mkTypeConstruction ("char",    charConstr,   [], [])
     and wordType   = mkTypeConstruction ("word",    wordConstr,   [], [])
     
     val declInBasis = [DeclaredAt inBasis]
 
     fun applyList _ []       = ()
     |   applyList f (h :: t) = (f h : unit; applyList f t);
 
     fun initGlobalEnv{globalTable  : gEnv, intIsArbitraryPrecision: bool } =
     let
         val Env globalEnv = MAKE.gEnvAsEnv globalTable
    
         val enterGlobalValue  = #enterVal  globalEnv;
         val enterGlobalType   = #enterType globalEnv;
 
         (* Some routines to help make the types. *)
         local
             (* careful - STRUCTVALS.intType differs from TYPETREE.intType *)
             open TYPETREE;
         in
             (* Make some type variables *)
             fun makeEqTV  () = mkTypeVar (generalisable, true,  false, false)
             fun makeTV    () = mkTypeVar (generalisable, false, false, false)
             fun makePrintTV() = mkTypeVar (generalisable, false,  false, true)
             fun makeTypeVariable() =
                 makeTv {value=emptyType, level=generalisable, equality=false,
                         nonunifiable=false, printable=false}
             
             (* Make some functions *)
             infixr 5 ->>
             fun a ->> b = mkFunctionType (a, b);
             
             infix 7 **;
             fun a ** b = mkProductType [a, b];
             
             (* Type identifiers for the types of the declarations. *)
             val Int    =
                 if intIsArbitraryPrecision then intInfType else fixedIntType
             val String = stringType;
             val Bool   = boolType;
             val Unit   = unitType;
             val Char   = charType;
             val Word   = wordType;
             val Real   = realType
             val Exn    = exnType
             
             val mkTypeConstruction = mkTypeConstruction;
             
             val () = setPreferredInt(if intIsArbitraryPrecision then intInfConstr else fixedIntConstr)
         end;
 
         fun makePolymorphic(tvs, c) =
         let
             open TYPEIDCODE
             val tvs =
                 List.filter(fn TypeVar tv => not justForEqualityTypes orelse tvEquality tv | _ => false) tvs
         in
             if null tvs then c else mkInlproc(c, List.length tvs, "", [], 0)
         end
 
         (* Function to make a type identifier with a pretty printer that just prints "?".
            None of the types are equality types so the equality function is empty. *)
         local
             fun monotypePrinter _ = PRETTY.PrettyString "?"
         in
             fun defaultEqAndPrintCode () =
                 let
                     open TypeValue
                     val code =
                         createTypeValue{
                             eqCode = CodeZero, printCode = mkConst (toMachineWord (ref monotypePrinter)),
                             boxedCode = boxedNever, sizeCode = singleWord }
                 in
                     Global (genCode(code, [], 0) ())
                 end
         end
         
         fun makeTypeAbbreviation(name, fullName, typeVars, typeResult, locations) =
             makeTypeConstructor(
                 name, typeVars, makeTypeFunction(basisDescription fullName, (typeVars, typeResult)),
                 locations)
 
         (* Make an opaque type and add it to an environment. *)
         fun makeAndDeclareOpaqueType(typeName, fullName, env) =
         let
             val typeconstr =
                 makeTypeConstructor(typeName, [],
                     makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription fullName),
                     declInBasis);
         in
             #enterType env (typeName, TypeConstrSet(typeconstr, []));
             mkTypeConstruction (typeName, typeconstr, [], declInBasis)
         end;
 
         (* List of something *)
         fun List (base : types) : types =
             mkTypeConstruction ("list", tsConstr listConstr, [base], declInBasis);
 
         (* ref something *)
         fun Ref (base : types) : types  =
             mkTypeConstruction ("ref", refConstr, [base], declInBasis);
         
         fun Option (base : types) : types  =
             mkTypeConstruction ("option", tsConstr optionConstr, [base], declInBasis);
         
         
         (* Type-dependent functions. *)
         fun mkSpecialFun (name:string, typeof:types, opn: typeDependent) : values =
             makeOverloaded (name, typeof, opn);
         
         (* Overloaded functions. *)
         fun mkOverloaded (name:string) (typeof: types)
             : values = mkSpecialFun(name, typeof, TypeDep)
 
         (* Make a structure.  Returns the table as an
            environment so that entries can be added to the structure. *)
         fun makeStructure(parentEnv, name) =
         let
             val str as  Struct{signat=Signatures{tab, ...}, ...} = makeEmptyGlobal name
             val () = #enterStruct parentEnv (name, str)
             val Env env = makeEnv tab
         in
             env
         end
 
         val () = enterGlobalType ("unit", TypeConstrSet(unitConstr, []));
 
         local
             val falseCons =
                 mkGconstr ("false", Bool,
                     createNullaryConstructor(EnumForm{tag=0w0, maxTag=0w1}, [], "false"), true, 2, declInBasis)
             val trueCons  =
                 mkGconstr ("true",  Bool,
                     createNullaryConstructor(EnumForm{tag=0w1, maxTag=0w1}, [], "true"), true, 2, declInBasis)
             val boolEnv = makeStructure(globalEnv, "Bool") (* Bool structure *)
             val notFn =
                 mkGvar("not", Bool ->> Bool, mkUnaryFn BuiltIns.NotBoolean, declInBasis)
         in
             val () = #enterType boolEnv ("bool",  TypeConstrSet(boolConstr, [trueCons, falseCons]))
             val () = #enterVal boolEnv ("true",  trueCons)
             val () = #enterVal boolEnv ("false", falseCons)
             val () = #enterVal boolEnv ("not", notFn)
         end;
         
         val () = enterGlobalType ("int", TypeConstrSet(if intIsArbitraryPrecision then intInfConstr else fixedIntConstr, []))
         val () = enterGlobalType ("char", TypeConstrSet(charConstr, []))
         val () = enterGlobalType ("string", TypeConstrSet(stringConstr, []))
         val () = enterGlobalType ("real", TypeConstrSet(realConstr, []))
 
         val () = (* Enter :: and nil. *)
             List.app(fn(tv as Value{name, ...}) => enterGlobalValue(name, tv))
                 (tsConstructors listConstr)
         val () = enterGlobalType  ("list", listConstr);
 
         val () = (* Enter NONE and SOME. *)
             List.app(fn(tv as Value{name, ...}) => enterGlobalValue(name, tv))
                 (tsConstructors optionConstr)
         val () = enterGlobalType  ("option", optionConstr);
 
         local
             val refCons =
                 let
                     val a = mkTypeVar(generalisable, false, false, false)
                 in
                     mkGconstr ("ref", a ->> Ref a,
                         createUnaryConstructor(RefForm, [a], "ref"), false, 1, declInBasis)
                 end
         in
             val () = enterGlobalType  ("ref", TypeConstrSet(refConstr, [refCons]));
             val () = enterGlobalValue ("ref", refCons);
         end
         
         local
             open BuiltIns
             fun monoTypePrinter _ = PRETTY.PrettyString "?"
 
             val idCode =
                 let
                     open TypeValue
                     val equalLongWordFn =
                         mkInlproc(
                             mkBinary(LargeWordComparison TestEqual, mkLoadArgument 0, mkLoadArgument 1), 2, "EqualLargeWord()", [], 0)
                     val code =
                         createTypeValue{
                             eqCode=equalLongWordFn,
                             printCode=mkConst (toMachineWord (ref monoTypePrinter)),
                             boxedCode = boxedNever,
                             sizeCode = singleWord
                             }
                 in
                    Global (genCode(code, [], 0) ())
                 end
         in
             val largeWordType =
                 makeTypeConstructor("word", [],
                     makeFreeId(0, idCode, true, basisDescription "LargeWord.word"), declInBasis)
             val LargeWord = mkTypeConstruction ("LargeWord.word", largeWordType, [], declInBasis)
         end
 
         val () = enterGlobalType ("exn", TypeConstrSet(exnConstr, []));
 
         val () = enterGlobalType ("word", TypeConstrSet(wordConstr, []));
 
         val runCallEnv = makeStructure(globalEnv, "RunCall")
         
         fun enterRunCall (name : string, entry : codetree, typ : types) : unit =
         let
             val value = mkGvar (name, typ, entry, declInBasis);
         in
             #enterVal runCallEnv (name, value)
         end
   
         local
             (* unsafeCast.  Can be used to convert any value to any type. *)
             val a = makeTV ()
             val b = makeTV ()
 
             val unsafeCastEntry =
                 mkInlproc (mkLoadArgument 0  (* just the parameter *), 1, "unsafeCast(1)", [], 0)
         in
             val () =
                 enterRunCall ("unsafeCast", makePolymorphic([a, b], unsafeCastEntry), a ->> b)
         end
 
         local
             val a = makeTV() and b = makeTV()
             open BuiltIns
         in
             (* isShort - test if a value is tagged rather than being an address. *)
             val () =
                 enterRunCall ("isShort", makePolymorphic([a], mkUnaryFn IsTaggedValue), a ->> Bool)
             (* pointer equality *)
             val () =
                 enterRunCall ("pointerEq",
                     makePolymorphic([a], mkBinaryFn PointerEq), a ** a ->> Bool)
             (* load a word. The index is in words and is always zero or positive. *)
             val () = enterRunCall ("loadWord",
                     makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLWord{isImmutable=false})), a ** Word ->> b)
             (* Load a word from an immutable.  *)
             val () = enterRunCall ("loadWordFromImmutable",
                     makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLWord{isImmutable=true})), a ** Word ->> b)
             (* load a byte. The index is in bytes and is always zero or positive.  Probably the
                result should be a Word8.word value or a char. *)
             val () = enterRunCall ("loadByte",
                     makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLByte{isImmutable=false})), a ** Word ->> b)
             (* Load a byte from an immutable.  *)
             val () = enterRunCall ("loadByteFromImmutable",
                     makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLByte{isImmutable=true})), a ** Word ->> b)
             (* Get the length of a heap cell. *)
             val () = enterRunCall ("memoryCellLength", makePolymorphic([a], mkUnaryFn MemoryCellLength), a ->> Word)
             (* Return the flags.  Perhaps this could return a Word8.word value instead of a word. *)
             val () = enterRunCall ("memoryCellFlags", makePolymorphic([a], mkUnaryFn MemoryCellFlags), a ->> Word)
             (* Return the number of bytes per word.  This is a constant since we have separate pre-built
                compilers for 32-bit and 64-bit.  N.B. The byte order is not a constant since we
                only have a single pre-built compiler for little-endian and big-endian interpreted code. *)
             val () = enterRunCall ("bytesPerWord", mkConst(toMachineWord wordSize), Word)
             (* Store a word *)
             val () = enterRunCall ("storeWord",
                     makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLWord{isImmutable=false})), mkProductType[a, Word, b] ->> Unit)
             (* Store a byte *)
             val () = enterRunCall ("storeByte",
                     makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLByte{isImmutable=false})), mkProductType[a, Word, b] ->> Unit)
             (* Store a word without release semantics. *)
             val () = enterRunCall ("storeWordInitialise",
                     makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLWord{isImmutable=true})), mkProductType[a, Word, b] ->> Unit)
             (* Store a byte without release semantics *)
             val () = enterRunCall ("storeByteInitialise",
                     makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLByte{isImmutable=true})), mkProductType[a, Word, b] ->> Unit)
             (* Lock a mutable cell. *)
             val () = enterRunCall ("clearMutableBit",
                     makePolymorphic([a], mkUnaryFn ClearMutableFlag), a ->> Unit)
             (* Allocate a byte cell.  The second argument is the flags byte.  It might be better
                if this were a Word8.word value.  *)
             val () = enterRunCall ("allocateByteMemory",
                     makePolymorphic([a], mkBinaryFn AllocateByteMemory), Word ** Word ->> a)
             (* Allocate a word cell. *)
             val () = enterRunCall ("allocateWordMemory",
                     makePolymorphic([a, b], mkAllocateWordMemoryFn), mkProductType[Word, Word, a] ->> b)
             (* Byte vector operations. *)
             val () = enterRunCall ("byteVectorEqual",
                     makePolymorphic([a], mkBlockOperationFn BlockOpEqualByte), mkProductType[a, a, Word, Word, Word] ->> Bool)
             val () = enterRunCall ("byteVectorCompare",
                     makePolymorphic([a], mkBlockOperationFn BlockOpCompareByte), mkProductType[a, a, Word, Word, Word] ->> Int)
             (* Block moves. *)
             val () = enterRunCall ("moveBytes",
                     makePolymorphic([a], mkBlockOperationFn (BlockOpMove{isByteMove=true})), mkProductType[a, a, Word, Word, Word] ->> Unit)
             val () = enterRunCall ("moveWords",
                     makePolymorphic([a], mkBlockOperationFn (BlockOpMove{isByteMove=false})), mkProductType[a, a, Word, Word, Word] ->> Unit)
             (* Untagged loads and stores. *)
             val () = enterRunCall ("loadUntagged",
                     mkLoadOperationFn LoadStoreUntaggedUnsigned, String ** Word ->> Word)
             val () = enterRunCall ("storeUntagged",
                     mkStoreOperationFn LoadStoreUntaggedUnsigned, mkProductType[String, Word, Word] ->> Unit)
             val () = enterRunCall ("touch",
                         makePolymorphic([a], mkUnaryFn TouchAddress), a ->> Unit)
         end
 
         local
             val debugOpts = [] (* Place to add debugging if necessary. *)
             (* [tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)),
                tagInject assemblyCodeTag true] *)
  
             fun makeCall rtsCall n entryName = rtsCall (entryName, n, debugOpts)
             val makeFastCall = makeCall CODETREE.Foreign.rtsCallFast
             (* We need to wrap this so that the arguments are passed in registers. *)
 
             fun makeRunCallTupled (argTypes, resultType, callN) : codetree =
             let
                 val width = List.length argTypes
                 val name = "rtsCall" ^ Int.toString width;
 
                 local
                     val f     = mkLoadClosure 0        (* first item from enclosing scope *)
                     val tuple = mkLoadArgument 0       (* the inner parameter *)
                     val args  =
                         case argTypes of
                             [singleType] => [(tuple, singleType)]
                         |   argTypes =>
                             let
                                 val argVals = List.tabulate(width, fn n => mkInd (n, tuple))
                             in
                                 ListPair.zipEq(argVals, argTypes)
                             end
                 in
                     val innerBody = mkCall (f, args, resultType)
                 end
 
                 local
                     (* The closure contains the address of the RTS call. *)
                     val f = mkEval(mkConst callN, [mkLoadArgument 0])
                     val innerLambda = mkInlproc (innerBody, 1, name ^ "(1)", [mkLoadLocal 0], 0)
                 in
                     val outerBody = mkEnv([mkDec (0, f)], innerLambda)
                 end
 
                 val outerLambda  = mkInlproc (outerBody, 1, name, [], 1)
             in
                 outerLambda
             end
 
             (* "Full" calls pass the thread Id as the first parameter. *)
             fun makeRunCallTupledFull (argTypes, resultType) =
             let
                 val width = List.length argTypes
                 val callN = toMachineWord(makeFastCall(width + 1))
                 val name = "rtsCall" ^ Int.toString width;
 
                 local
                     val f     = mkLoadClosure 0        (* first item from enclosing scope *)
                     val tuple = mkLoadArgument 0       (* the inner parameter *)
                     val args  =
                         case argTypes of
                             [singleType] => [(tuple, singleType)]
                         |   argTypes =>
                             let
                                 val argVals = List.tabulate(width, fn n => mkInd (n, tuple))
                             in
                                 ListPair.zipEq(argVals, argTypes)
                             end
                 in
-                    val innerBody =
-                        mkEnv(
-                            [
-                            mkDec(0, mkCall (f, (getCurrentThreadId, GeneralType) :: args, resultType)),
-                            mkNullDec checkRTSException
-                            ], mkLoadLocal 0)
+                    val innerBody = mkCall (f, (getCurrentThreadId, GeneralType) :: args, resultType)
                 end
 
                 local
                     (* The closure contains the address of the RTS call. *)
                     val f = mkEval(mkConst callN, [mkLoadArgument 0]) (* This creates the actual call. *)
-                    val innerLambda = mkInlproc (innerBody, 1, name ^ "(1)", [mkLoadLocal 0], 1)
+                    val innerLambda = mkInlproc (innerBody, 1, name ^ "(1)", [mkLoadLocal 0], 0)
                 in
                     val outerBody = mkEnv([mkDec (0, f)], innerLambda)
                 end
 
                 val outerLambda  = mkInlproc (outerBody, 1, name, [], 1)
             in
                 outerLambda
             end
 
             local
                 val a = makeTV ()
                 and b = makeTV ()
 
                 fun makeInlCode(makeCall, name) =
                 let
                     val call1 = toMachineWord(makeCall 1)
                     val body = mkEval(mkConst call1, [mkLoadArgument 0])
                     val proc = mkInlproc (body, 1, name, [], 0)
                 in
                     makePolymorphic([a, b], proc)
                 end
             in
                 val rtsCallFast1Entry = makeInlCode(makeFastCall, "rtsCallFast1")
             end
             
             local
                 val a = makeTV ()
                 and b = makeTV ()
                 and c = makeTV ()
                 and d = makeTV ()
                 and e = makeTV ()
                 and f = makeTV ()
                 
                 fun makeRtsCall(n, makeCall) = makeRunCallTupled(List.tabulate(n, fn _ => GeneralType), GeneralType, toMachineWord(makeCall n))
                 fun makeFullRtsCall n = makeRunCallTupledFull(List.tabulate(n, fn _ => GeneralType), GeneralType)
             in
                 val rtsCallFull0Entry = makePolymorphic([a], makeFullRtsCall 0)
                 and rtsCallFast0Entry = makePolymorphic([a], makeRtsCall(0, makeFastCall))
                 val rtsCall0Type = String ->> Unit ->> a
  
                 val rtsCall1Type = String ->> a ->> b
                 val rtsCallFull1Entry = makePolymorphic([a, b], makeFullRtsCall 1)
                 val rtsCallFull2Entry = makePolymorphic([a, b, c], makeFullRtsCall 2)
                 and rtsCallFast2Entry = makePolymorphic([a, b, c], makeRtsCall(2, makeFastCall))
                 val rtsCall2Type = String ->> TYPETREE.mkProductType [a,b] ->> c
                 val rtsCallFull3Entry = makePolymorphic([a, b, c, d], makeFullRtsCall 3)
                 val rtsCallFast3Entry = makePolymorphic([a, b, c, d], makeRtsCall(3, makeFastCall))
                 val rtsCall3Type = String ->> TYPETREE.mkProductType [a,b,c] ->> d
                 val rtsCallFull4Entry = makePolymorphic([a, b, c, d, e], makeFullRtsCall 4)
                 val rtsCallFast4Entry = makePolymorphic([a, b, c, d, e], makeRtsCall(4, makeFastCall))
                 val rtsCall4Type = String ->> TYPETREE.mkProductType [a,b,c,d] ->> e
                 val rtsCallFull5Entry = makePolymorphic([a, b, c, d, e, f], makeFullRtsCall 5)
                 val rtsCall5Type = String ->> TYPETREE.mkProductType [a,b,c,d,e] ->> f
             end
         in
             val () = enterRunCall ("rtsCallFull0", rtsCallFull0Entry, rtsCall0Type)
             val () = enterRunCall ("rtsCallFast0", rtsCallFast0Entry, rtsCall0Type)
             val () = enterRunCall ("rtsCallFull1", rtsCallFull1Entry, rtsCall1Type)
             val () = enterRunCall ("rtsCallFast1", rtsCallFast1Entry, rtsCall1Type)
             val () = enterRunCall ("rtsCallFull2", rtsCallFull2Entry, rtsCall2Type)
             val () = enterRunCall ("rtsCallFast2", rtsCallFast2Entry, rtsCall2Type)
             val () = enterRunCall ("rtsCallFull3", rtsCallFull3Entry, rtsCall3Type)
             val () = enterRunCall ("rtsCallFast3", rtsCallFast3Entry, rtsCall3Type)
             val () = enterRunCall ("rtsCallFast4", rtsCallFast4Entry, rtsCall4Type)
             val () = enterRunCall ("rtsCallFull4", rtsCallFull4Entry, rtsCall4Type)
             val () = enterRunCall ("rtsCallFull5", rtsCallFull5Entry, rtsCall5Type)
             
             val makeRunCallTupled = makeRunCallTupled (* Needed for reals. *)
         end
 
         local
             (* Create nullary exception. *)
             fun makeException0(name, id) =
             let
                 val exc =
                     Value{ name = name, typeOf = TYPETREE.exnType,
                            access = Global(mkConst(toMachineWord id)),
                            class = Exception, locations = declInBasis,
                            references = NONE, instanceTypes=NONE }
             in
                 #enterVal runCallEnv (name, exc)
             end
             (* Create exception with parameter. *)
             and makeException1(name, id, exType) =
             let
                 val exc =
                     Value{ name = name, typeOf = exType ->> TYPETREE.exnType,
                            access = Global(mkConst(toMachineWord id)),
                            class = Exception, locations = declInBasis,
                            references = NONE, instanceTypes=NONE }
             in
                 #enterVal runCallEnv (name, exc)
             end
             (* Exception numbers.  Most of these are hard-coded in the RTS. *)
             val EXC_interrupt   = 1
             val EXC_syserr      = 2
             val EXC_size        = 4
             val EXC_overflow    = 5
             val EXC_divide      = 7
             val EXC_conversion  = 8
             val EXC_XWindows    = 10
             val EXC_subscript   = 11
             val EXC_thread      = 12
             val EXC_Bind        = 100 (* In Match compiler. *)
             val EXC_Match       = 101
             val EXC_Fail        = 103
         in
             val () = List.app makeException0
                 [
                     ("Interrupt",   EXC_interrupt),
                     ("Size",        EXC_size),
                     ("Bind",        EXC_Bind),
                     ("Div",         EXC_divide),
                     ("Match",       EXC_Match),
                     ("Overflow",    EXC_overflow),
                     ("Subscript",   EXC_subscript)
                  ]
              val () = List.app makeException1
                 [
                     ("Fail",        EXC_Fail,           String),
                     ("Conversion",  EXC_conversion,     String),
                     ("XWindows",    EXC_XWindows,       String),
                     ("Thread",      EXC_thread,         String),
                     ("SysErr",      EXC_syserr,         String ** Option LargeWord)
                 ]
         end
         
 
         (* Standard Basis structures for basic types.  These contain the definitions of the basic
            types and operations on them.  The structures are extended in the basis library and
            overloaded functions are extracted from them. *)
         local
             val largeIntEnv = makeStructure(globalEnv, "LargeInt")
             (* The comparison operations take two arbitrary precision ints and
                a general "compare" function that returns a fixed precision int. *)
             val compareType =
                 mkProductType[intInfType, intInfType, intInfType ** intInfType ->> fixedIntType] ->> Bool
             val arithType =
                 mkProductType[intInfType, intInfType, intInfType ** intInfType ->> intInfType] ->> intInfType
 
             fun enterArbitrary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkArbitraryFn oper, declInBasis)
             in
                 #enterVal largeIntEnv (name, value)
             end
         in
             val () = #enterType largeIntEnv ("int", TypeConstrSet(intInfConstr, []))
             (* These functions are used internally. *)
             val () = enterArbitrary("less", ArbCompare BuiltIns.TestLess, compareType)
             val () = enterArbitrary("greater", ArbCompare BuiltIns.TestGreater, compareType)
             val () = enterArbitrary("lessEq", ArbCompare BuiltIns.TestLessEqual, compareType)
             val () = enterArbitrary("greaterEq", ArbCompare BuiltIns.TestGreaterEqual, compareType)
             val () = enterArbitrary("add", ArbArith BuiltIns.ArithAdd, arithType)
             val () = enterArbitrary("subtract", ArbArith BuiltIns.ArithSub, arithType)
             val () = enterArbitrary("multiply", ArbArith BuiltIns.ArithMult, arithType)
         end
 
         local
             val fixedIntEnv = makeStructure(globalEnv, "FixedInt")
             open BuiltIns
 
             fun enterBinary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis)
             in
                 #enterVal fixedIntEnv (name, value)
             end
             
             val compareType = fixedIntType ** fixedIntType ->> Bool
             and binaryType  = fixedIntType ** fixedIntType ->> fixedIntType
 
             fun enterComparison(name, test) =
                 enterBinary(name, WordComparison{test=test, isSigned=true}, compareType)
             and enterBinaryOp(name, oper) =
                 enterBinary(name, FixedPrecisionArith oper, binaryType)
             
         in
             val () = #enterType fixedIntEnv ("int", TypeConstrSet(fixedIntConstr, []))
             val () = enterComparison("<",   TestLess)
             val () = enterComparison("<=",  TestLessEqual)
             val () = enterComparison(">",   TestGreater)
             val () = enterComparison(">=",  TestGreaterEqual)
             val () = enterBinaryOp("+",     ArithAdd)
             val () = enterBinaryOp("-",     ArithSub)
             val () = enterBinaryOp("*",     ArithMult)
             val () = enterBinaryOp("quot",  ArithQuot)
             val () = enterBinaryOp("rem",   ArithRem)
         end
 
         local
             open BuiltIns
             val largeWordEnv = makeStructure(globalEnv, "LargeWord")
 
             fun enterBinary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis)
             in
                 #enterVal largeWordEnv (name, value)
             end
             
             val compareType = LargeWord ** LargeWord ->> Bool
             and binaryType  = LargeWord ** LargeWord ->> LargeWord
             and shiftType   = LargeWord ** Word ->> LargeWord (* The shift amount is a Word. *)
 
             fun enterComparison(name, test) =
                 enterBinary(name, LargeWordComparison test, compareType)
             and enterBinaryOp(name, oper) =
                 enterBinary(name, LargeWordArith oper, binaryType)
             and enterBinaryLogical(name, oper) =
                 enterBinary(name, LargeWordLogical oper, binaryType)
             and enterBinaryShift(name, oper) =
                 enterBinary(name, LargeWordShift oper, shiftType)
         in
             val () = #enterType largeWordEnv ("word", TypeConstrSet(largeWordType, []))
             val () = enterComparison("<",   TestLess)
             val () = enterComparison("<=",  TestLessEqual)
             val () = enterComparison(">",   TestGreater)
             val () = enterComparison(">=",  TestGreaterEqual)
             val () = enterBinaryOp("+",     ArithAdd)
             val () = enterBinaryOp("-",     ArithSub)
             val () = enterBinaryOp("*",     ArithMult)
             val () = enterBinaryOp("div",   ArithDiv)
             val () = enterBinaryOp("mod",   ArithMod)
             val () = enterBinaryLogical("orb",  LogicalOr)
             val () = enterBinaryLogical("andb", LogicalAnd)
             val () = enterBinaryLogical("xorb", LogicalXor)
             val () = enterBinaryShift("<<", ShiftLeft)
             val () = enterBinaryShift(">>", ShiftRightLogical)
             val () = enterBinaryShift("~>>", ShiftRightArithmetic)
             val LargeWord = LargeWord
         end
 
         local
             val wordStructEnv = makeStructure(globalEnv, "Word")
             open BuiltIns
 
             fun enterBinary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis)
             in
                 #enterVal wordStructEnv (name, value)
             end
             
             val compareType = Word ** Word ->> Bool
             and binaryType  = Word ** Word ->> Word
 
             fun enterComparison(name, test) =
                 enterBinary(name, WordComparison{test=test, isSigned=false}, compareType)
             and enterBinaryOp(name, oper) =
                 enterBinary(name, WordArith oper, binaryType)
             and enterBinaryLogical(name, oper) =
                 enterBinary(name, WordLogical oper, binaryType)
             and enterBinaryShift(name, oper) =
                 enterBinary(name, WordShift oper, binaryType)
                 
             val toLargeWordFn = mkGvar ("toLargeWord", Word ->> LargeWord, mkUnaryFn UnsignedToLongWord, declInBasis)
             and toLargeWordXFn = mkGvar ("toLargeWordX", Word ->> LargeWord, mkUnaryFn SignedToLongWord, declInBasis)
             and fromLargeWordFn = mkGvar ("fromLargeWord", LargeWord ->> Word, mkUnaryFn LongWordToTagged, declInBasis)
         in
             val () = #enterType wordStructEnv ("word", TypeConstrSet(wordConstr, []))
             val () = enterComparison("<",   TestLess)
             val () = enterComparison("<=",  TestLessEqual)
             val () = enterComparison(">",   TestGreater)
             val () = enterComparison(">=",  TestGreaterEqual)
             val () = enterBinaryOp("+",     ArithAdd)
             val () = enterBinaryOp("-",     ArithSub)
             val () = enterBinaryOp("*",     ArithMult)
             val () = enterBinaryOp("div",   ArithDiv)
             val () = enterBinaryOp("mod",   ArithMod)
             val () = enterBinaryLogical("orb",  LogicalOr)
             val () = enterBinaryLogical("andb", LogicalAnd)
             val () = enterBinaryLogical("xorb", LogicalXor)
             val () = enterBinaryShift("<<", ShiftLeft)
             val () = enterBinaryShift(">>", ShiftRightLogical)
             val () = enterBinaryShift("~>>", ShiftRightArithmetic)
             val () = #enterVal wordStructEnv ("toLargeWord", toLargeWordFn)
             val () = #enterVal wordStructEnv ("toLargeWordX", toLargeWordXFn)
             val () = #enterVal wordStructEnv ("fromLargeWord", fromLargeWordFn)
         end
  
         local
             val charEnv = makeStructure(globalEnv, "Char")
             open BuiltIns
             (* Comparison functions are the same as Word. *)
             fun enterComparison(name, test) =
             let
                 val typ = Char ** Char ->> Bool
                 val entry = mkBinaryFn(WordComparison{test=test, isSigned=false})
                 val value = mkGvar (name, typ, entry, declInBasis)
             in
                 #enterVal charEnv (name, value)
             end
         in
             val () = #enterType charEnv ("char", TypeConstrSet(charConstr, []))
             val () = enterComparison("<",   TestLess)
             val () = enterComparison("<=",  TestLessEqual)
             val () = enterComparison(">",   TestGreater)
             val () = enterComparison(">=",  TestGreaterEqual)
         end
 
         local
             val stringEnv = makeStructure(globalEnv, "String")
         in
             val () = #enterType stringEnv ("string", TypeConstrSet(stringConstr, []))
         end
 
         local                        
             val realEnv = makeStructure(globalEnv, "Real")
 
             (* These are only used in Real so are included here rather than in RunCall.
                rtsCallFastRealtoReal is used for functions such as sqrt.
                rtsCallFastGeneraltoReal is used for Real.fromLargeInt. *)
             val debugOpts = [] (* Place to add debugging if necessary. *)
 
             fun makeFastRealRealCall entryName = CODETREE.Foreign.rtsCallFastRealtoReal (entryName, debugOpts)
             and makeFastRealRealRealCall entryName = CODETREE.Foreign.rtsCallFastRealRealtoReal (entryName, debugOpts)
             and makeFastIntInfRealCall entryName = CODETREE.Foreign.rtsCallFastGeneraltoReal (entryName, debugOpts)
             and makeFastRealGeneralRealCall entryName = CODETREE.Foreign.rtsCallFastRealGeneraltoReal (entryName, debugOpts)
 
             val rtsCallFastR_REntry = makeRunCallTupled([DoubleFloatType], DoubleFloatType, toMachineWord makeFastRealRealCall)
 
             (* This needs to be tupled. *)
             val rtsCallFastRR_REntry = makeRunCallTupled([DoubleFloatType, DoubleFloatType], DoubleFloatType, toMachineWord makeFastRealRealRealCall)
             and rtsCallFastRI_REntry = makeRunCallTupled([DoubleFloatType, GeneralType], DoubleFloatType, toMachineWord makeFastRealGeneralRealCall)
 
             val rtsCallFastI_REntry = makeRunCallTupled([GeneralType], DoubleFloatType, toMachineWord makeFastIntInfRealCall)
 
             val rtsCallFastF_F = mkGvar ("rtsCallFastR_R", String ->> Real ->> Real, rtsCallFastR_REntry, declInBasis)
             val rtsCallFastFF_F = mkGvar ("rtsCallFastRR_R", String ->> Real ** Real ->> Real, rtsCallFastRR_REntry, declInBasis)
             val rtsCallFastFG_F = mkGvar ("rtsCallFastRI_R", String ->> Real ** Int ->> Real, rtsCallFastRI_REntry, declInBasis)
             val rtsCallFastG_F = mkGvar ("rtsCallFastI_R", String ->> intInfType ->> Real, rtsCallFastI_REntry, declInBasis)
 
             fun enterUnary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkUnaryFn oper, declInBasis)
             in
                 #enterVal realEnv (name, value)
             end
 
             fun enterBinary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis)
             in
                 #enterVal realEnv (name, value)
             end
             
             val compareType = Real ** Real ->> Bool
             and binaryType  = Real ** Real ->> Real
             and unaryType   = Real ->> Real
             and realToFixType = Real ->> fixedIntType
 
             open BuiltIns IEEEReal
 
             fun enterComparison(name, test) =
                 enterBinary(name, RealComparison(test, PrecDouble), compareType)
             and enterBinaryOp(name, oper) =
                 enterBinary(name, RealArith(oper, PrecDouble), binaryType)
         in
             val () = #enterType realEnv ("real", TypeConstrSet(realConstr, []))
             val () = #enterVal realEnv ("rtsCallFastR_R", rtsCallFastF_F)
             val () = #enterVal realEnv ("rtsCallFastRR_R", rtsCallFastFF_F)
             val () = #enterVal realEnv ("rtsCallFastRI_R", rtsCallFastFG_F)
             val () = #enterVal realEnv ("rtsCallFastI_R", rtsCallFastG_F)
             val () = enterComparison("<",   TestLess)
             val () = enterComparison("<=",  TestLessEqual)
             val () = enterComparison(">",   TestGreater)
             val () = enterComparison(">=",  TestGreaterEqual)
             val () = enterComparison("==",  TestEqual) (* real is not an eqtype. *)
             (* Included unordered mainly because it's easy to implement isNan. *)
             val () = enterComparison("unordered",  TestUnordered)
             val () = enterBinaryOp("+",     ArithAdd)
             val () = enterBinaryOp("-",     ArithSub)
             val () = enterBinaryOp("*",     ArithMult)
             val () = enterBinaryOp("/",     ArithDiv)
             val () = enterUnary("~",   RealNeg PrecDouble, unaryType)
             val () = enterUnary("abs", RealAbs PrecDouble, unaryType)
             val () = enterUnary("fromFixedInt", RealFixedInt PrecDouble, fixedIntType ->> Real)
             val () = enterUnary("truncFix", RealToInt(PrecDouble, TO_ZERO), realToFixType)
             val () = enterUnary("roundFix", RealToInt(PrecDouble, TO_NEAREST), realToFixType)
             val () = enterUnary("ceilFix", RealToInt(PrecDouble, TO_POSINF), realToFixType)
             val () = enterUnary("floorFix", RealToInt(PrecDouble, TO_NEGINF), realToFixType)
         end
         
         local
             val real32Env = makeStructure(globalEnv, "Real32")
             val floatType  = mkTypeConstruction ("real", floatConstr, [], [])
             val Float = floatType
             val debugOpts = [] (* Place to add debugging if necessary. *)
 
             fun makeFastFloatFloatCall entryName = CODETREE.Foreign.rtsCallFastFloattoFloat (entryName, debugOpts)
             and makeFastFloatFloatFloatCall entryName = CODETREE.Foreign.rtsCallFastFloatFloattoFloat (entryName, debugOpts)
             and makeFastIntInfFloatCall entryName = CODETREE.Foreign.rtsCallFastGeneraltoFloat (entryName, debugOpts)
             and makeFastFloatGeneralFloatCall entryName = CODETREE.Foreign.rtsCallFastFloatGeneraltoFloat (entryName, debugOpts)
 
             val rtsCallFastR_REntry = makeRunCallTupled([SingleFloatType], SingleFloatType, toMachineWord makeFastFloatFloatCall)
 
             (* This needs to be tupled. *)
             val rtsCallFastRR_REntry = makeRunCallTupled([SingleFloatType, SingleFloatType], SingleFloatType, toMachineWord makeFastFloatFloatFloatCall)
             and rtsCallFastRI_REntry = makeRunCallTupled([SingleFloatType, GeneralType], SingleFloatType, toMachineWord makeFastFloatGeneralFloatCall)
 
             val rtsCallFastI_REntry = makeRunCallTupled([GeneralType], SingleFloatType, toMachineWord makeFastIntInfFloatCall)
 
             val rtsCallFastF_F = mkGvar ("rtsCallFastF_F", String ->> Float ->> Float, rtsCallFastR_REntry, declInBasis)
             val rtsCallFastFF_F = mkGvar ("rtsCallFastFF_F", String ->> Float ** Float ->> Float, rtsCallFastRR_REntry, declInBasis)
             val rtsCallFastFG_F = mkGvar ("rtsCallFastFI_F", String ->> Float ** Int ->> Float, rtsCallFastRI_REntry, declInBasis)
             val rtsCallFastG_F = mkGvar ("rtsCallFastI_F", String ->> intInfType ->> Float, rtsCallFastI_REntry, declInBasis)
 
             fun enterUnary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkUnaryFn oper, declInBasis)
             in
                 #enterVal real32Env (name, value)
             end
             
             fun enterBinary(name, oper, typ) =
             let
                 val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis)
             in
                 #enterVal real32Env (name, value)
             end
             
             val compareType = Float ** Float ->> Bool
             and binaryType  = Float ** Float ->> Float
             and unaryType   = Float ->> Float
             and floatToFixType = Float ->> fixedIntType
 
             open BuiltIns IEEEReal
 
             fun enterComparison(name, test) =
                 enterBinary(name, RealComparison(test, PrecSingle), compareType)
             and enterBinaryOp(name, oper) =
                 enterBinary(name, RealArith(oper, PrecSingle), binaryType)
         in
             val () = #enterType real32Env ("real", TypeConstrSet(floatConstr, []))
             val () = enterUnary("toLarge", BuiltIns.FloatToDouble, floatType ->> Real)
             (* Conversion with the current rounding mode. *)
             and () = enterUnary("fromReal", BuiltIns.DoubleToFloat, Real  ->> floatType)
             val () = #enterVal real32Env ("rtsCallFastR_R", rtsCallFastF_F)
             val () = #enterVal real32Env ("rtsCallFastRR_R", rtsCallFastFF_F)
             val () = #enterVal real32Env ("rtsCallFastRI_R", rtsCallFastFG_F)
             val () = #enterVal real32Env ("rtsCallFastI_R", rtsCallFastG_F)
             val () = enterComparison("<",   TestLess)
             val () = enterComparison("<=",  TestLessEqual)
             val () = enterComparison(">",   TestGreater)
             val () = enterComparison(">=",  TestGreaterEqual)
             val () = enterComparison("==",  TestEqual) (* Real32.real is not an eqtype. *)
             val () = enterComparison("unordered",  TestUnordered)
             val () = enterBinaryOp("+",     ArithAdd)
             val () = enterBinaryOp("-",     ArithSub)
             val () = enterBinaryOp("*",     ArithMult)
             val () = enterBinaryOp("/",     ArithDiv)
             val () = enterUnary("~",   RealNeg PrecSingle, unaryType)
             val () = enterUnary("abs", RealAbs PrecSingle, unaryType)
             val () = enterUnary("fromFixedInt", RealFixedInt PrecSingle, fixedIntType ->> floatType)
             val () = enterUnary("truncFix", RealToInt(PrecSingle, TO_ZERO), floatToFixType)
             val () = enterUnary("roundFix", RealToInt(PrecSingle, TO_NEAREST), floatToFixType)
             val () = enterUnary("ceilFix", RealToInt(PrecSingle, TO_POSINF), floatToFixType)
             val () = enterUnary("floorFix", RealToInt(PrecSingle, TO_NEGINF), floatToFixType)
         end
 
         val bootstrapEnv = makeStructure(globalEnv, "Bootstrap")
         
         fun enterBootstrap (name : string, entry : codetree, typ : types) : unit =
         let
             val value = mkGvar (name, typ, entry, declInBasis)
         in
             #enterVal bootstrapEnv (name, value)
         end
 
         local
             val threadEnv = makeStructure(globalEnv, "Thread")
 
             open TypeValue
             fun monoTypePrinter _ = PRETTY.PrettyString "?"
             val code =
                 createTypeValue{
                     eqCode=equalPointerOrWordFn,
                     printCode=mkConst (toMachineWord (ref monoTypePrinter)),
                     boxedCode=boxedAlways,
                     sizeCode=singleWord
                 }
             (* Thread.thread type.  This is an equality type with pointer equality. *)
             val threadConstr= 
                 makeTypeConstructor (
                     "thread", [], makeFreeId(0, Global (genCode(code, [], 0) ()), true, basisDescription "thread"),
                     [DeclaredAt inBasis])
             val threadType = mkTypeConstruction ("thread", threadConstr, [], []);
             val selfFunction = mkGvar ("self", Unit ->> threadType, getCurrentThreadIdFn, declInBasis)
             val createMutexFunction = mkGvar("createMutex", Unit ->> Ref Word, createMutexFn, declInBasis)
             and lockMutexFunction = mkGvar("lockMutex", Ref Word ->> Bool, mkUnaryFn BuiltIns.LockMutex, declInBasis)
             and tryLockMutexFunction = mkGvar("tryLockMutex", Ref Word ->> Bool, mkUnaryFn BuiltIns.TryLockMutex, declInBasis)
             and unlockMutexFunction = mkGvar("unlockMutex", Ref Word ->> Bool, mkUnaryFn BuiltIns.UnlockMutex, declInBasis)
         in
             val () = #enterType threadEnv ("thread", TypeConstrSet(threadConstr, []))
             val () = #enterVal threadEnv ("self", selfFunction)
             val () = #enterVal threadEnv ("createMutex", createMutexFunction)
             val () = #enterVal threadEnv ("lockMutex", lockMutexFunction)
             val () = #enterVal threadEnv ("tryLockMutex", tryLockMutexFunction)
             val () = #enterVal threadEnv ("unlockMutex", unlockMutexFunction)
         end
 
         local
             val fmemEnv = makeStructure(globalEnv, "ForeignMemory")
             val a = makeTV()
             (* We don't have Word8.word or Word32.word at this point so the easiest way to
                deal with this is to make them polymorphic. *)
             val get8Function =
                 mkGvar("get8", LargeWord ** Word ->> a, makePolymorphic([a], mkLoadOperationFn LoadStoreC8), declInBasis)
             val get16Function =
                 mkGvar("get16", LargeWord ** Word ->> Word, mkLoadOperationFn LoadStoreC16, declInBasis)
             val get32Function =
                 mkGvar("get32", LargeWord ** Word ->> a, makePolymorphic([a], mkLoadOperationFn LoadStoreC32), declInBasis)
             val get64Function =
                 mkGvar("get64", LargeWord ** Word ->> LargeWord, mkLoadOperationFn LoadStoreC64, declInBasis)
             val getFloatFunction =
                 mkGvar("getFloat", LargeWord ** Word ->> Real, mkLoadOperationFn LoadStoreCFloat, declInBasis)
             val getDoubleFunction =
                 mkGvar("getDouble", LargeWord ** Word ->> Real, mkLoadOperationFn LoadStoreCDouble, declInBasis)
             val set8Function =
                 mkGvar("set8",
                     mkProductType[LargeWord, Word, a] ->> Unit, makePolymorphic([a], mkStoreOperationFn LoadStoreC8),
                     declInBasis)
             val set16Function =
                 mkGvar("set16",
                     mkProductType[LargeWord, Word, Word] ->> Unit, mkStoreOperationFn LoadStoreC16, declInBasis)
             val set32Function =
                 mkGvar("set32",
                     mkProductType[LargeWord, Word, a] ->> Unit, makePolymorphic([a], mkStoreOperationFn LoadStoreC32),
                     declInBasis)
             val set64Function =
                 mkGvar("set64",
                     mkProductType[LargeWord, Word, LargeWord] ->> Unit, mkStoreOperationFn LoadStoreC64, declInBasis)
             val setFloatFunction =
                 mkGvar("setFloat",
                     mkProductType[LargeWord, Word, Real] ->> Unit, mkStoreOperationFn LoadStoreCFloat, declInBasis)
             val setDoubleFunction =
                 mkGvar("setDouble",
                     mkProductType[LargeWord, Word, Real] ->> Unit, mkStoreOperationFn LoadStoreCDouble, declInBasis)
             val allocCStackFn =
                 mkGvar("allocCStack", Word ->> LargeWord, mkUnaryFn BuiltIns.AllocCStack, declInBasis)
             val freeCStackFn =
                 mkGvar("freeCStack", LargeWord ** Word ->> Unit, mkBinaryFn BuiltIns.FreeCStack, declInBasis)
         in
             val () = #enterVal fmemEnv ("get8", get8Function)
             val () = #enterVal fmemEnv ("get16", get16Function)
             val () = #enterVal fmemEnv ("get32", get32Function)
             val () = #enterVal fmemEnv ("get64", get64Function)
             val () = #enterVal fmemEnv ("getFloat", getFloatFunction)
             val () = #enterVal fmemEnv ("getDouble", getDoubleFunction)
             val () = #enterVal fmemEnv ("set8", set8Function)
             val () = #enterVal fmemEnv ("set16", set16Function)
             val () = #enterVal fmemEnv ("set32", set32Function)
             val () = #enterVal fmemEnv ("set64", set64Function)
             val () = #enterVal fmemEnv ("setFloat", setFloatFunction)
             val () = #enterVal fmemEnv ("setDouble", setDoubleFunction)
             val () = #enterVal fmemEnv ("allocCStack", allocCStackFn)
             (* Free is a binary operation that takes both the allocated address and the size.
                The size is used by the compiled code where this is implemented using the C-stack.
                The allocated address is intended for possible use by the interpreter where so
                that it can be implemented as malloc/free. *)
             val () = #enterVal fmemEnv ("freeCStack", freeCStackFn)
         end
         
         local
             val foreignEnv = makeStructure(globalEnv, "Foreign")
 
             local
                 val EXC_foreign     = 23
                 val foreignException =
                     Value{ name = "Foreign", typeOf = String ->> TYPETREE.exnType,
                            access = Global(mkConst(toMachineWord EXC_foreign)),
                            class = Exception, locations = declInBasis,
                         references = NONE, instanceTypes=NONE }
             in
                 val () = #enterVal foreignEnv ("Foreign", foreignException)
             end
             
             val arg0 = mkLoadArgument 0
             val arg1 = mkLoadArgument 1
             
             local
                 val callForeignCall = mkEval(mkConst (toMachineWord CODETREE.Foreign.foreignCall), [arg0])
                 val innerBody = mkEval(mkLoadClosure 0, [mkInd(0, arg0), mkInd(1, arg0), mkInd(2, arg0)])
                 val outerBody =
                     mkEnv([mkDec(0, callForeignCall)], mkInlproc(innerBody, 1, "foreignCall(1)(1)", [mkLoadLocal 0], 0))
             in
                 val foreignCallEntry = mkInlproc(outerBody, 1, "foreignCall(1)", [], 1)
             end
 
             local
                 (* Build a callback.  First apply the compiler to the abi/argtype/restype values.
                    Then apply the result to a function to generate the final C callback code.
                    The C callback code calls the function with two arguments.  Here we have to
                    pass it a function that expects a tuple and unwrap it. *)
                 val innerMost =
                     mkInlproc(mkEval(mkLoadClosure 0, [mkTuple[arg0, arg1]]), 2, "buildCallBack(1)(1)2", [mkLoadArgument 0], 0)
                 val resultFn =
                     mkInlproc(mkEval(mkLoadClosure 0, [innerMost]), 1, "buildCallBack(1)(1)", [mkLoadLocal 0], 0)
                 val firstBuild = mkEval(mkConst (toMachineWord CODETREE.Foreign.buildCallBack), [arg0])
                 val outerBody = mkEnv([mkDec(0, firstBuild)], resultFn)
             in
                 val buildCallBackEntry = mkInlproc(outerBody, 1, "buildCallBack(1)", [], 1)
             end
             
             (* Abi - an eqtype.  An enumerated type or short int. *)
             local
                 open TypeValue
                 fun monotypePrinter _ = PRETTY.PrettyString "?"
                 val code =
                     createTypeValue{
                         eqCode = equalTaggedWordFn, printCode = mkConst (toMachineWord (ref monotypePrinter)),
                         boxedCode = boxedNever, sizeCode = singleWord }
                 val abiEqAndPrint = Global (genCode(code, [], 0) ())
             in
                 val abiConstr =
                     makeTypeConstructor("abi", [],
                         makeFreeId(0, abiEqAndPrint, true,
                             basisDescription "Foreign.LowLevel.abi"), declInBasis)
             end
             val () = #enterType foreignEnv ("abi", TypeConstrSet(abiConstr, []))
             val abiType = mkTypeConstruction ("abi", abiConstr, [], declInBasis)
             (* It would be possible to put the definition of cType in here but it's complicated.
                It's easier to use an opaque type and put in a cast later. *)
             val ctypeConstr =
                 makeTypeConstructor("ctype", [],
                         makeFreeId(0, defaultEqAndPrintCode(), false,
                             basisDescription "Foreign.LowLevel.ctype"), declInBasis)
             val () = #enterType foreignEnv ("ctype", TypeConstrSet(ctypeConstr, []))
             val ffiType = mkTypeConstruction ("ctype", ctypeConstr, [], declInBasis)
             val foreignCallType =
                 mkProductType[abiType, List ffiType, ffiType] ->> mkProductType[LargeWord, LargeWord, LargeWord] ->> Unit
             val buildCallBackType =
                 mkProductType[abiType, List ffiType, ffiType] ->> (mkProductType[LargeWord, LargeWord] ->> Unit) ->> LargeWord
         
             fun enterForeign (name, entry, typ) =
                 #enterVal foreignEnv (name, mkGvar (name, typ, entry, declInBasis))
         in
             val () = enterForeign("foreignCall", foreignCallEntry, foreignCallType)
             val () = enterForeign("buildCallBack", buildCallBackEntry, buildCallBackType)
             (* Apply the abiList function here.  The ABIs depend on the platform in the interpreted version. *)
             val () = enterForeign("abiList", mkConst(toMachineWord(CODETREE.Foreign.abiList())), List (String ** abiType))
         end
 
         local
             fun addVal (name : string, value : 'a, typ : types) : unit =
                 enterBootstrap (name, mkConst (toMachineWord value), typ)
       
             (* These are only used during the bootstrap phase.  Replacements are installed once
                the appropriate modules of the basis library are compiled. *)
             fun intOfString s =
                 let
                 val radix =
                     if String.size s >= 3 andalso String.substring(s, 0, 2) = "0x"
                        orelse String.size s >= 4 andalso String.substring(s, 0, 3) = "~0x"
                     then StringCvt.HEX else StringCvt.DEC
                 in
                     case StringCvt.scanString (Int.scan radix) s of
                         NONE => raise Conversion "Invalid integer constant"
                       | SOME res => res
                 end
         
             fun wordOfString s =
                 let
                 val radix =
                     if String.size s > 2 andalso String.sub(s, 2) = #"x"
                     then StringCvt.HEX else StringCvt.DEC
                 in
                     case StringCvt.scanString (Word.scan radix) s of
                         NONE => raise Conversion "Invalid word constant"
                       | SOME res => res
                 end
             
             fun unescapeChar (s: string) : char =
             let
                 fun rdr i =
                     if i = size s then NONE
                     else SOME(String.sub(s, i), i+1)
             in
                 case Char.scan rdr 0 of
                     NONE => (* Bad conversion *)
                         raise Conversion "Invalid string constant"
                 |   SOME(res, _) => res
             end
             
             open PRINTTABLE
             
             val convstringCode = genCode(mkConst(toMachineWord unescapeString), [], 0) ()
             val convintCode = genCode(mkConst(toMachineWord intOfString), [], 0) ()
             val convwordCode = genCode(mkConst(toMachineWord wordOfString), [], 0) ()
             val convcharCode = genCode(mkConst(toMachineWord unescapeChar), [], 0) ()
         in
             (* We need this for compatibility with the 5.8.2 bootstrap. *)
             val () = addVal ("convString", unescapeString: string -> string, String ->> String)
             (* Flag to indicate which version of Int to compile *)
             val () = addVal ("intIsArbitraryPrecision", intIsArbitraryPrecision, Bool)
             (* Install the overloads now. *)
             val () = addOverload("convString", stringConstr, convstringCode)
             val () = addOverload("convInt", fixedIntConstr, convintCode)
             val () = addOverload("convInt", intInfConstr, convintCode)
             val () = addOverload("convWord", wordConstr, convwordCode)
             val () = addOverload("convChar", charConstr, convcharCode)
         end
 
     (* The only reason we have vector here is to get equality right.  We need
        vector to be an equality type and to have a specific equality function. *)
         local
             fun polyTypePrinter _ _ = PRETTY.PrettyString "?"
             (* The equality function takes the base equality type as an argument.
                The inner function takes two arguments which are the two vectors to
                compare, checks the lengths and if they're equal applies the
                base equality to each field. *)
             val eqCode =
                 mkInlproc(
                     mkProc(
                         mkEnv([
                             (* Length of the items. *)
                             mkDec(0, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 0)),
                             mkDec(1, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 1)),
                             mkMutualDecs[(2, (* Loop function. *)
                                 mkProc(
                                     mkIf(
                                         (* Finished? *)
                                         mkEqualTaggedWord(mkLoadClosure 0, mkLoadArgument 0),
                                         CodeTrue, (* Yes, all equal. *)
                                         mkIf(
                                             mkEval(
                                                 TypeValue.extractEquality(mkLoadClosure 2), (* Base equality fn *)
                                                 [
                                                     mkLoadOperation(LoadStoreMLWord{isImmutable=true},
                                                         mkLoadClosure 3, mkLoadArgument 0),
                                                     mkLoadOperation(LoadStoreMLWord{isImmutable=true},
                                                         mkLoadClosure 4, mkLoadArgument 0)
                                                 ]),
                                             mkEval(mkLoadClosure 1, (* Recursive call with index+1. *)
                                                 [
                                                     mkBinary(BuiltIns.WordArith BuiltIns.ArithAdd,
                                                         mkLoadArgument 0, mkConst(toMachineWord 1))
                                                 ]),
                                             CodeFalse (* Not equal elements - result false *)
                                         )
                                     ),
                                 1, "vector-loop",
                                     [mkLoadLocal 0 (* Length *), mkLoadLocal 2 (* Loop function *), 
                                      mkLoadClosure 0 (* Base equality function *), 
                                      mkLoadArgument 0 (* Vector 0 *), mkLoadArgument 1 (* Vector 1 *)], 0))]
                             ],
                             mkIf(
                                 (* Test the lengths. *)
                                 mkEqualTaggedWord(mkLoadLocal 0, mkLoadLocal 1),
                                 (* Equal - test the contents. *)
                                 mkEval(mkLoadLocal 2, [CodeZero]),
                                 CodeFalse (* Not same length- result false *)
                             )
                         ),
                         2, "vector-eq", [mkLoadArgument 0], 3),
                     1, "vector-eq()", [], 0)
 
             val idCode = (* Polytype *)
                 let
                     open TypeValue
                     val code =
                         createTypeValue{
                             eqCode=eqCode, printCode=mkConst (toMachineWord (ref polyTypePrinter)),
                             boxedCode=mkInlproc(boxedAlways, 1, "boxed-vector", [], 0),
                             sizeCode=mkInlproc(singleWord, 1, "size-vector", [], 0)}
                 in
                     Global (genCode(code, [], 0) ())
                 end
         in
             val vectorType =
                 makeTypeConstructor("vector", [makeTypeVariable()],
                     makeFreeId(1, idCode, true, basisDescription "vector"), declInBasis)
             val () = enterGlobalType  ("vector", TypeConstrSet(vectorType, []))
         end
 
         (* We also need a type with byte-wise equality. *)
         local
             fun monoTypePrinter _ = PRETTY.PrettyString "?"
             (* This is a monotype equality function that takes two byte vectors and compares them
                byte-by-byte for equality.  Because they are vectors of bytes it's unsafe to load
                the whole words which could look like addresses if the bottom bit happens to be zero. *)
             val eqCode =
                 mkProc(
                     mkEnv([
                         (* Length of the items. *)
                         mkDec(0, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 0)),
                         mkDec(1, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 1))
                         ],
                         mkIf(
                             (* Test the lengths. *)
                             mkEqualTaggedWord(mkLoadLocal 0, mkLoadLocal 1),
                             (* Equal - test the contents. *)
                             mkEnv([
                                 (* ByteVecEqual takes a byte length so we have to multiply by
                                    the number of bytes per word. *)
                                 mkDec(2,
                                     mkBinary(BuiltIns.WordArith BuiltIns.ArithMult,
                                         mkConst(toMachineWord RunCall.bytesPerWord), mkLoadLocal 0))
                                 ],
                                 mkBlockOperation{kind=BlockOpEqualByte,
                                     leftBase=mkLoadArgument 0, rightBase=mkLoadArgument 1, leftIndex=CodeZero,
                                     rightIndex=CodeZero, length=mkLoadLocal 2}),
                             CodeFalse (* Not same length- result false *)
                         )
                     ),
                     2, "byteVector-eq", [], 3)
 
             val idCode = (* Polytype *)
                 let
                     open TypeValue
                     val code =
                         createTypeValue{
                             eqCode=eqCode, printCode=mkConst (toMachineWord (ref monoTypePrinter)),
                             boxedCode=boxedAlways, sizeCode=singleWord}
                 in
                     Global (genCode(code, [], 0) ())
                 end
         in
             val byteVectorType =
                 makeTypeConstructor("byteVector", [],
                     makeFreeId(0, idCode, true, basisDescription "byteVector"), declInBasis)
             val () = #enterType bootstrapEnv ("byteVector", TypeConstrSet(byteVectorType, []))
         end
 
     (* We also need array and Array2.array to be passed through here so that
        they have the special property of being eqtypes even if their argument
        is not.   "array" is defined to be in the global environment. *)
         val () = enterGlobalType  ("array", TypeConstrSet(arrayConstr, []))
         val () = #enterType bootstrapEnv ("array", TypeConstrSet(array2Constr, []))
         val () = #enterType bootstrapEnv ("byteArray", TypeConstrSet(byteArrayConstr, []))
 
 
 (* "=', '<>', PolyML.print etc are type-specific function which appear
    to be polymorphic.  The compiler recognises these and treats them specially.
    For (in)equality that means generating type-specific versions of the equality
    operations; for print etc that means printing in a type-specific way.  They
    can become true polymorphic functions and lose their type-specificity.  For
    (in)equality that means defaulting to structure equality which is normal and
    expected behaviour.  For print etc that means losing the ability to print
    and just printing "?" so it's important to avoid that happening.  "open"
    treats type-specific functions specially and retains the type-specificity.
    That's important to allow the prelude code to expand the PolyML structure. *)
         local
             val eqType = let val a = makeEqTV () in a ** a ->> Bool end
             val eqVal  = mkSpecialFun("=", eqType, Equal)
         in
             val () = enterGlobalValue ("=", eqVal)
         end        
 
         local
             val neqType = let val a = makeEqTV () in a ** a ->> Bool end
             val neqVal  = mkSpecialFun("<>", neqType, NotEqual)
         in
             val () = enterGlobalValue ("<>", neqVal)
         end        
 
         val polyMLEnv = makeStructure(globalEnv, "PolyML")
         val enterPolyMLVal  = #enterVal polyMLEnv
 
         local
         (* This version of the environment must match that used in the NameSpace
            structure. *)
             open TYPETREE
             (* Create a new structure for them. *)
             val nameSpaceEnv = makeStructure(polyMLEnv, "NameSpace")
             (* Substructures. *)
             val valuesEnv = makeStructure(nameSpaceEnv, "Values")
             and typesEnv = makeStructure(nameSpaceEnv, "TypeConstrs")
             and fixesEnv = makeStructure(nameSpaceEnv, "Infixes")
             and structsEnv = makeStructure(nameSpaceEnv, "Structures")
             and sigsEnv = makeStructure(nameSpaceEnv, "Signatures")
             and functsEnv = makeStructure(nameSpaceEnv, "Functors")
 
             (* Types for the basic values.  These are opaque. *)    
             val valueVal = makeAndDeclareOpaqueType("value", "PolyML.NameSpace.Values.value", valuesEnv)
             (* Representation of the type of a value. *)
             val Types = makeAndDeclareOpaqueType("typeExpression", "PolyML.NameSpace.Values.typeExpression", valuesEnv)
             val typeVal = makeAndDeclareOpaqueType("typeConstr", "PolyML.NameSpace.TypeConstrs.typeConstr", typesEnv)
             val fixityVal = makeAndDeclareOpaqueType("fixity", "PolyML.NameSpace.Infixes.fixity", fixesEnv)
             val signatureVal = makeAndDeclareOpaqueType("signatureVal", "PolyML.NameSpace.Signatures.signatureVal", sigsEnv)
             val structureVal = makeAndDeclareOpaqueType("structureVal", "PolyML.NameSpace.Structures.structureVal", structsEnv)
             val functorVal = makeAndDeclareOpaqueType("functorVal", "PolyML.NameSpace.Functors.functorVal", functsEnv)
 
             (* nameSpace type.  Labelled record. *)
             fun createFields(name, vType): { name: string, typeof: types} list =
             let
                 val enterFun = String ** vType ->> Unit
                 val lookupFun = String ->> Option vType
                 val allFun = Unit ->> List (String ** vType)
             in
                 [mkLabelEntry("enter" ^ name, enterFun),
                  mkLabelEntry("lookup" ^ name, lookupFun),
                  mkLabelEntry("all" ^ name, allFun)]
             end
     
             (* We have to use the same names as we use in the env type because we're
                passing "env" values through the bootstrap. *)
             val valTypes = 
                [("Val", valueVal), ("Type", typeVal), ("Fix", fixityVal),
                 ("Struct", structureVal), ("Sig", signatureVal), ("Funct", functorVal)]
     
             val fields = List.foldl (fn (p,l) => createFields p @ l) [] valTypes
     
             val recordType =
                 makeTypeAbbreviation("nameSpace", "PolyML.NameSpace.nameSpace", [], mkLabelled(sortLabels fields, true), declInBasis);
             val () = #enterType nameSpaceEnv ("nameSpace", TypeConstrSet(recordType, []));
             
             (* The result type of the compiler includes valueVal etc. *)
             val resultFields = List.map TYPETREE.mkLabelEntry
                 [("values", List(String ** valueVal)),
                  ("fixes", List(String ** fixityVal)),
                  ("types", List(String ** typeVal)),
                  ("structures", List(String ** structureVal)),
                  ("signatures", List(String ** signatureVal)),
                  ("functors", List(String ** functorVal))]
           in
             val nameSpaceType = mkTypeConstruction ("nameSpace", recordType, [], declInBasis)
             val execResult = mkLabelled(sortLabels resultFields, true)
             type execResult =
                 { fixes: (string * fixStatus) list, values: (string * values) list,
                   structures: (string * structVals) list, signatures: (string * signatures) list,
                   functors: (string * functors) list, types: (string * typeConstrSet) list }
 
             val valueVal = valueVal
             val typeVal = typeVal
             val fixityVal = fixityVal
             val signatureVal = signatureVal
             val structureVal = structureVal
             val functorVal = functorVal
             
             val Types = Types
             
             val valuesEnv = valuesEnv
             and typesEnv = typesEnv
             and fixesEnv = fixesEnv
             and structsEnv = structsEnv
             and sigsEnv = sigsEnv
             and functsEnv = functsEnv
 
          end
          
         local
             val typeconstr = locationConstr
             val () = #enterType polyMLEnv ("location", typeconstr);
         in
             val Location = mkTypeConstruction ("location", tsConstr typeconstr, [], declInBasis)
         end
 
         (* Interface to the debugger. *)
         local
             open TYPETREE
             val debuggerEnv = makeStructure(polyMLEnv, "DebuggerInterface")
             (* Make these opaque at this level. *)
             val locationPropList =
                 makeAndDeclareOpaqueType("locationPropList", "PolyML.DebuggerInterface.locationPropList", debuggerEnv)
             val typeId =
                 makeAndDeclareOpaqueType("typeId", "PolyML.DebuggerInterface.typeId", debuggerEnv)
             val machineWordType =
                 makeAndDeclareOpaqueType("machineWord", "PolyML.DebuggerInterface.machineWord", debuggerEnv)
             (* For long term security keep these as different from global types and sigs.
                Values in the static environment need to be copied before they are global. *)
             val localType =
                 makeAndDeclareOpaqueType("localType", "PolyML.DebuggerInterface.localType", debuggerEnv)
             val localTypeConstr =
                 makeAndDeclareOpaqueType("localTypeConstr", "PolyML.DebuggerInterface.localTypeConstr", debuggerEnv)
             val localSig =
                 makeAndDeclareOpaqueType("localSig", "PolyML.DebuggerInterface.localSig", debuggerEnv)
             open DEBUGGER
             (* Entries in the static list.  This type is only used within the implementation of
                DebuggerInterface in the basis library and does not appear in the final signature. *)
             val environEntryConstr =
                 makeTypeConstructor("environEntry", [],
                     makeFreeId(0, defaultEqAndPrintCode(), false,
                         basisDescription "PolyML.DebuggerInterface.environEntry"), declInBasis)
             val environEntryType =
                 mkTypeConstruction ("environEntry", environEntryConstr, [], declInBasis)
             val constrs = (* Order is significant. *)
                [ ("EnvEndFunction",     mkProductType[String, Location, localType]),
                  ("EnvException",       mkProductType[String, localType, locationPropList]),
                  ("EnvStartFunction",   mkProductType[String, Location, localType]),
                  ("EnvStructure",       mkProductType[String, localSig, locationPropList]),
                  ("EnvTConstr",         String ** localTypeConstr),
                  ("EnvTypeid",          typeId ** typeId),
                  ("EnvVConstr",         mkProductType[String, localType, Bool, Int, locationPropList]),
                  ("EnvValue",           mkProductType[String, localType, locationPropList])
                  ]
             (* This representation must match the representation defined in DEBUGGER_.sml. *)
             val numConstrs = List.length constrs
             val {constrs=constrReps, ...} = chooseConstrRepr(constrs, [])
             val constructors =
                 ListPair.map (fn ((s,t), code) =>
                     mkGconstr(s, t ->> environEntryType, code, false, numConstrs, declInBasis))
                         (constrs, constrReps)
             val () = List.app (fn c => #enterVal debuggerEnv(valName c, c)) constructors
             (* Put these constructors onto the type. *)
             val () = #enterType debuggerEnv ("environEntry", TypeConstrSet(environEntryConstr, constructors))
             (* Debug state type. *)
             val debugStateConstr =
                 makeTypeAbbreviation("debugState", "PolyML.DebuggerInterface.debugState", [],
                     mkProductType[List environEntryType, List machineWordType, Location], declInBasis)
             val () = #enterType debuggerEnv ("debugState", TypeConstrSet(debugStateConstr, []))
             val debugStateType = mkTypeConstruction ("debugState", debugStateConstr, [], declInBasis)
         in
             val () = applyList (fn (name, v, t) =>
                                 #enterVal debuggerEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                 [
                     ("makeValue",
                         toMachineWord(makeValue: debugState -> string * types * locationProp list * machineWord -> values),
                         debugStateType ->> mkProductType[String, localType, locationPropList, machineWordType] ->> valueVal),
                     ("makeException",
                         toMachineWord(makeException: debugState -> string * types * locationProp list * machineWord -> values),
                         debugStateType ->> mkProductType[String, localType, locationPropList, machineWordType] ->> valueVal),
                     ("makeConstructor",
                         toMachineWord(makeConstructor: debugState -> string * types * bool * int * locationProp list * machineWord -> values),
                         debugStateType ->> mkProductType[String, localType, Bool, Int, locationPropList, machineWordType] ->> valueVal),
                     ("makeAnonymousValue",
                         toMachineWord(makeAnonymousValue: debugState -> types * machineWord -> values),
                         debugStateType ->> mkProductType[localType, machineWordType] ->> valueVal),
                     ("makeStructure",
                         toMachineWord(makeStructure: debugState -> string * signatures * locationProp list * machineWord -> structVals),
                         debugStateType ->> mkProductType[String, localSig, locationPropList, machineWordType] ->> structureVal),
                     ("makeTypeConstr",
                         toMachineWord(makeTypeConstr: debugState -> typeConstrSet -> typeConstrSet),
                         debugStateType ->> localTypeConstr ->> typeVal),
                     ("unitValue", toMachineWord(mkGvar("", unitType, CodeZero, []): values), valueVal), (* Used as a default *)
 
                     
                     ("setOnEntry", toMachineWord(setOnEntry: (string * PolyML.location -> unit) option -> unit),
                         Option (String ** Location ->> Unit) ->> Unit),
                     ("setOnExit", toMachineWord(setOnExit: (string * PolyML.location -> unit) option -> unit),
                         Option (String ** Location ->> Unit) ->> Unit),
                     ("setOnExitException", toMachineWord(setOnExitException: (string * PolyML.location -> exn -> unit) option -> unit),
                         Option (String ** Location ->> Exn ->> Unit) ->> Unit),
                     ("setOnBreakPoint", toMachineWord(setOnBreakPoint: (PolyML.location * bool ref -> unit) option -> unit),
                         Option (Location ** Ref Bool ->> Unit) ->> Unit)
                 ]
         end
 
         local
             val typeconstr = contextConstr
         in
             val () = #enterType polyMLEnv ("context", typeconstr);
             val () = List.app(fn(tv as Value{name, ...}) => #enterVal polyMLEnv(name, tv))
                         (tsConstructors typeconstr)
         end
 
         local
             val typeconstr = prettyConstr
         in
             val () = #enterType polyMLEnv ("pretty", typeconstr);
             val () = List.app(fn(tv as Value{name, ...}) => #enterVal polyMLEnv(name, tv))
                         (tsConstructors typeconstr)
             val PrettyType = mkTypeConstruction ("pretty", tsConstr typeconstr, [], declInBasis)
         end
 
         local
             val printType = let val a = makePrintTV () in a ->> a end;
             val printVal  = mkSpecialFun("print", printType, Print);
         in
             val () = enterPolyMLVal ("print", printVal);
         end;
 
         local
             val makeStringType = let val a = makePrintTV () in a ->> String end;
             val makeStringVal  = mkSpecialFun("makestring", makeStringType, MakeString);
         in
             val () = enterPolyMLVal ("makestring", makeStringVal);
         end;
 
         local
             val prettyType = let val a = makePrintTV () in a ** fixedIntType ->> PrettyType end;
             val prettyVal  = mkSpecialFun("prettyRepresentation", prettyType, GetPretty);
         in
             val () = enterPolyMLVal ("prettyRepresentation", prettyVal);
         end;
  
         local
             (* addPrettyPrinter is the new function to install a pretty printer. *)
             val a = makeTV ()
             val b = makeTV ()
         
             val addPrettyType = (TYPETREE.fixedIntType ->> b ->> a ->> PrettyType) ->> Unit;
             val addPrettyVal  = mkSpecialFun("addPrettyPrinter", addPrettyType, AddPretty);
         in
             val () = enterPolyMLVal ("addPrettyPrinter", addPrettyVal);
         end;
 
         (* This goes in RunCall since it's only for the basis library. *)
         local
             val addOverloadType =
                 let val a = makeTV () and b = makeTV () in (a ->> b) ->> String ->> Unit end;
             val addOverloadVal  = mkSpecialFun("addOverload", addOverloadType, AddOverload);
         in
             val () = #enterVal runCallEnv ("addOverload", addOverloadVal);
         end
 
         local
             (* Add a function to switch the default integer type. *)
             fun setType isArbitrary =
                 setPreferredInt(if isArbitrary then intInfConstr else fixedIntConstr)
         in
             val () = #enterVal runCallEnv
                         ("setDefaultIntTypeArbitrary",
                             mkGvar ("setDefaultIntTypeArbitrary", Bool ->> Unit, mkConst (toMachineWord setType), declInBasis))
         end
 
         local
             val sourceLocVal  = mkSpecialFun("sourceLocation", Unit ->> Location, GetLocation);
         in
             val () = enterPolyMLVal ("sourceLocation", sourceLocVal);
         end;
 
         local
             (* This is used as one of the arguments to the compiler function. *)
             open TYPETREE
             val uniStructEnv = makeStructure(bootstrapEnv, "Universal")
 
             fun enterUniversal (name : string, entry : codetree, typ : types) : unit =
             let
                 val value = mkGvar (name, typ, entry, declInBasis);
             in
                 #enterVal uniStructEnv (name, value)
             end;
 
             local
                 fun polyTypePrinter _ _ = PRETTY.PrettyString "?"
                 open TypeValue
                 val idCode =
                 let
                     val code =
                         createTypeValue{
                                 eqCode=CodeZero, (* Not an equality type *)
                                 printCode=mkConst (toMachineWord (ref polyTypePrinter)),
                                 boxedCode=mkInlproc(boxedEither(* Assume worst case *), 1, "boxed-tag", [], 0),
                                 sizeCode=mkInlproc(singleWord, 1, "size-tag", [], 0)}
                 in
                     Global (genCode(code, [], 0) ())
                 end
             in
                 (* type 'a tag *)
                 val tagConstr =
                     makeTypeConstructor("tag", [makeTypeVariable()],
                         makeFreeId(1, idCode, false, basisDescription "tag"), declInBasis);
                 val () = #enterType uniStructEnv ("tag", TypeConstrSet(tagConstr, []))
             end
 
             (* type universal *)
             val univConstr =
                 makeTypeConstructor("universal", [],
                         makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription "universal"), declInBasis);
             val () = #enterType uniStructEnv ("universal",  TypeConstrSet(univConstr, []));
 
             fun Tag base = mkTypeConstruction ("tag", tagConstr, [base], declInBasis)
             val Universal = mkTypeConstruction ("universal", univConstr, [], declInBasis)
 
             val a = makeTV()
             (* val tagInject  : 'a tag -> 'a -> universal *)
             val injectType = Tag a ->> a ->> Universal
             val () = enterUniversal ("tagInject",
                         makePolymorphic([a],
                             mkConst (toMachineWord (Universal.tagInject: 'a Universal.tag -> 'a -> Universal.universal))),
                             injectType)
             (* We don't actually need tagIs and tagProject since this is only used for
                the compiler.  Universal is redefined in the basis library. *)          
             val projectType = Tag a ->> Universal ->> a 
             val () = enterUniversal ("tagProject",
                         makePolymorphic([a],
                             mkConst (toMachineWord(Universal.tagProject: 'a Universal.tag -> Universal.universal -> 'a))),
                             projectType)
             val testType = Tag a ->> Universal ->> Bool
             val () = enterUniversal ("tagIs",
                         makePolymorphic([a],
                             mkConst (toMachineWord(Universal.tagIs: 'a Universal.tag -> Universal.universal -> bool))),
                             testType)
          in
             val Tag = Tag and Universal = Universal
         end
         
         local
             open TYPETREE
             (* Parsetree properties datatype. *)
             val propConstr =
                 makeTypeConstructor("ptProperties", [],
                     makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription "PolyML.ptProperties"), declInBasis);
             val PtProperties = mkTypeConstruction ("ptProperties", propConstr, [], declInBasis)
 
             (* Parsetree type. *)
             val parseTreeConstr =
                 makeTypeAbbreviation("parseTree", "PolyML.parseTree", [], Location ** List PtProperties, declInBasis);    
             val ParseTree = mkTypeConstruction ("parseTree", parseTreeConstr, [], declInBasis)
             val () = #enterType polyMLEnv ("parseTree", TypeConstrSet(parseTreeConstr, []));
 
             val constrs = (* Order is significant. *)
                [ ("PTbreakPoint",       Ref Bool),
                  ("PTcompletions",      List String),
                  ("PTdeclaredAt",       Location),
                  ("PTdefId",            fixedIntType),
                  ("PTfirstChild",       Unit ->> ParseTree),
                  ("PTnextSibling",      Unit ->> ParseTree),
                  ("PTopenedAt",         Location),
                  ("PTparent",           Unit ->> ParseTree),
                  ("PTpreviousSibling",  Unit ->> ParseTree),
                  ("PTprint",            fixedIntType ->> PrettyType),
                  ("PTreferences",       Bool ** List Location),
                  ("PTrefId",            fixedIntType),
                  ("PTstructureAt",      Location),
                  ("PTtype",             Types)
                  ];
             (* This representation must match the representation defined in ExportTree.sml. *)
             val numConstrs = List.length constrs
             val {constrs=constrReps, ...} = chooseConstrRepr(constrs, [])
             val constructors =
                 ListPair.map (fn ((s,t), code) =>
                     mkGconstr(s, t ->> PtProperties, code, false, numConstrs, declInBasis))
                         (constrs, constrReps)
             val () = List.app (fn c => #enterVal polyMLEnv(valName c, c)) constructors
             (* Put these constructors onto the type. *)
             val () = #enterType polyMLEnv ("ptProperties", TypeConstrSet(propConstr, constructors));
 
         in
             val ParseTree = ParseTree
             and PtProperties = PtProperties
         end
 
         local
             open TYPETREE
  
             val compilerType : types =
                 mkProductType[nameSpaceType, Unit ->> Option Char, List Universal] ->>
                     mkProductType[Option ParseTree, Option (Unit ->> execResult)]
             type compilerType =
                     nameSpace * (unit -> char option) * Universal.universal list -> exportTree option * (unit->execResult) option
         in
             val () = enterBootstrap ("use", mkConst (toMachineWord ((useIntoEnv globalTable []): string -> unit)), String ->> Unit)            
             val () =
                 enterBootstrap ("useWithParms",
                     mkConst (toMachineWord ((useIntoEnv globalTable): Universal.universal list -> string -> unit)),
                     List Universal ->> String ->> Unit)            
             val () = enterPolyMLVal("compiler", mkGvar ("compiler", compilerType, mkConst (toMachineWord (compiler: compilerType)), declInBasis));
             val () = enterBootstrap("globalSpace", mkConst (toMachineWord(gEnvAsNameSpace globalTable: nameSpace)), nameSpaceType)
             (* Add a print function so we can print a message at the start of a bootstrap phase. *)
             val () = enterBootstrap("print", mkConst (toMachineWord TextIO.print), String ->> Unit)
         end;
         
         local
             val ty      = TYPETREE.mkOverloadSet[]
             val addType = ty ** ty ->> ty;
             val negType = ty ->> ty;
             val cmpType = ty ** ty ->> Bool;
         in
             val () = enterGlobalValue ("+", mkOverloaded "+"   addType);
             val () = enterGlobalValue ("-", mkOverloaded "-"   addType);
             val () = enterGlobalValue ("*", mkOverloaded "*"   addType);
             val () = enterGlobalValue ("~", mkOverloaded "~"   negType);
             val () = enterGlobalValue ("abs", mkOverloaded "abs" negType);
             val () = enterGlobalValue (">=", mkOverloaded ">="  cmpType);
             val () = enterGlobalValue ("<=", mkOverloaded "<="  cmpType);
             val () = enterGlobalValue (">", mkOverloaded ">"   cmpType);
             val () = enterGlobalValue ("<", mkOverloaded "<"   cmpType);
             (* The following overloads are added in ML97 *)
             val () = enterGlobalValue ("div", mkOverloaded "div"   addType);
             val () = enterGlobalValue ("mod", mkOverloaded "mod"   addType);
             val () = enterGlobalValue ("/", mkOverloaded "/"   addType);
         end;
 
         local
             open DEBUG;
             local
                 open TYPETREE
                 val fields =
                 [
                     mkLabelEntry("location", Location), mkLabelEntry("hard", Bool),
                     mkLabelEntry("message", PrettyType), mkLabelEntry("context", Option PrettyType)
                 ]
             in
                 val errorMessageProcType = mkLabelled(sortLabels fields, true) ->> Unit
                 type errorMessageProcType =
                     { location: location, hard: bool, message: pretty, context: pretty option } -> unit
             end
 
             local
                 open TYPETREE
                 val optNav = Option(Unit->>ParseTree)
                 val fields =
                 [
                     mkLabelEntry("parent", optNav),
                     mkLabelEntry("next", optNav),
                     mkLabelEntry("previous", optNav)
                 ]
             in
                 val navigationType = mkLabelled(sortLabels fields, true)
                 type navigationType =
                     { parent: (unit->exportTree) option, next: (unit->exportTree) option, previous: (unit->exportTree) option }
             end
             type 'a tag = 'a Universal.tag
         in
             val () = applyList (fn (name, v, t) => enterBootstrap(name, mkConst v, t))
                 [
                 ("compilerVersion",        toMachineWord (VERSION.compilerVersion: string),     String),
                 ("compilerVersionNumber",  toMachineWord (VERSION.versionNumber: int),          Int),
                 ("lineNumberTag",          toMachineWord (lineNumberTag : (unit->FixedInt.int) tag),     Tag (Unit->>fixedIntType)),
                 ("offsetTag",              toMachineWord (offsetTag: (unit->FixedInt.int) tag),          Tag (Unit->>fixedIntType)),
                 ("fileNameTag",            toMachineWord (fileNameTag: string tag),             Tag String),
                 ("bindingCounterTag",      toMachineWord (bindingCounterTag: (unit->FixedInt.int) tag),  Tag (Unit->>fixedIntType)),
                 ("maxInlineSizeTag",       toMachineWord (maxInlineSizeTag: FixedInt.int tag),           Tag fixedIntType),
                 ("assemblyCodeTag",        toMachineWord (assemblyCodeTag: bool tag),           Tag Bool),
                 ("parsetreeTag",           toMachineWord (parsetreeTag: bool tag),              Tag Bool),
                 ("codetreeTag",            toMachineWord (codetreeTag: bool tag),               Tag Bool),
                 ("icodeTag",               toMachineWord (icodeTag: bool tag),            Tag Bool),
                 ("lowlevelOptimiseTag",    toMachineWord (lowlevelOptimiseTag: bool tag),       Tag Bool),
                 ("codetreeAfterOptTag",    toMachineWord (codetreeAfterOptTag: bool tag),       Tag Bool),
                 ("inlineFunctorsTag",      toMachineWord (inlineFunctorsTag: bool tag),         Tag Bool),
                 ("compilerDebugTag",       toMachineWord (compilerDebugTag: int tag),           Tag Int),
                 ("debugTag",               toMachineWord (debugTag: bool tag),                  Tag Bool),
                 ("printDepthFunTag",       toMachineWord (DEBUG.printDepthFunTag: (unit->FixedInt.int) tag), Tag (Unit->>fixedIntType)),
                 ("errorDepthTag",          toMachineWord (DEBUG.errorDepthTag: FixedInt.int tag),        Tag fixedIntType),
                 ("lineLengthTag",          toMachineWord (DEBUG.lineLengthTag: FixedInt.int tag),        Tag fixedIntType),
                 ("profileAllocationTag",   toMachineWord (DEBUG.profileAllocationTag: FixedInt.int tag), Tag fixedIntType),
                 ("printOutputTag",         toMachineWord (PRETTY.printOutputTag: (pretty->unit) tag),  Tag (PrettyType->>Unit)) ,               
                 ("compilerOutputTag",      toMachineWord (PRETTY.compilerOutputTag: (pretty->unit) tag), Tag (PrettyType->>Unit)),
                 ("errorMessageProcTag",    toMachineWord (LEX.errorMessageProcTag: errorMessageProcType tag), Tag errorMessageProcType),
                 ("rootTreeTag",            toMachineWord (EXPORTTREE.rootTreeTag: navigation tag), Tag navigationType),
                 ("reportUnreferencedIdsTag", toMachineWord (reportUnreferencedIdsTag: bool tag), Tag Bool),
                 ("reportExhaustiveHandlersTag", toMachineWord (reportExhaustiveHandlersTag: bool tag), Tag Bool),
                 ("narrowOverloadFlexRecordTag", toMachineWord (narrowOverloadFlexRecordTag: bool tag), Tag Bool),
                 ("createPrintFunctionsTag", toMachineWord (createPrintFunctionsTag: bool tag), Tag Bool),
                 ("reportDiscardedValuesTag", toMachineWord (reportDiscardedValuesTag: FixedInt.int tag), Tag fixedIntType)
                  ]
         end;
  
 
     (* PolyML.CodeTree structure.  This exports the CodeTree structure into the ML space. *)
         local
             open CODETREE
             val codetreeEnv = makeStructure(polyMLEnv, "CodeTree")
 
             fun createType typeName =
                 makeAndDeclareOpaqueType(typeName, "PolyML.CodeTree." ^ typeName, codetreeEnv)
 
             val CodeTree = createType "codetree"
             and MachineWord = createType "machineWord"
             and CodeBinding = createType "codeBinding"
 
             (* For the moment export these only for the general argument and result types. *)
             fun simpleFn (code, nArgs, name, closure, nLocals) =
                 mkFunction{body=code, argTypes=List.tabulate(nArgs, fn _ => GeneralType),
                            resultType=GeneralType, name=name, closure=closure, numLocals=nLocals}
             and simpleInlineFn (code, nArgs, name, closure, nLocals) =
                 mkInlineFunction{body=code, argTypes=List.tabulate(nArgs, fn _ => GeneralType),
                            resultType=GeneralType, name=name, closure=closure, numLocals=nLocals}
             and simpleCall(func, args) =
                 mkCall(func, List.map (fn c => (c, GeneralType)) args, GeneralType)
 
         in
             val CodeTree = CodeTree
 
             val () = applyList (fn (name, v, t) =>
                                 #enterVal codetreeEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                 [
                 ("pretty", toMachineWord (CODETREE.pretty: codetree -> pretty), CodeTree ->> PrettyType),
                 ("mkConstant", toMachineWord(mkConst: machineWord -> codetree), MachineWord ->> CodeTree),
                 ("genCode", toMachineWord (genCode: codetree * Universal.universal list * int -> (unit->codetree)),
                         mkProductType[CodeTree, List Universal, Int] ->> (Unit ->> CodeTree)),
                 ("evalue", toMachineWord (evalue: codetree -> machineWord option), CodeTree ->> Option MachineWord),
                 ("mkFunction", toMachineWord (simpleFn: codetree * int * string * codetree list * int -> codetree),
                     mkProductType[CodeTree, Int, String, List CodeTree, Int] ->> CodeTree),
                 ("mkInlineFunction", toMachineWord (simpleInlineFn: codetree * int * string * codetree list * int -> codetree),
                     mkProductType[CodeTree, Int, String, List CodeTree, Int] ->> CodeTree),
                 ("mkCall", toMachineWord (simpleCall: codetree * codetree list -> codetree), CodeTree ** List CodeTree ->> CodeTree),
                 ("mkLoadLocal", toMachineWord (mkLoadLocal: int -> codetree), Int ->> CodeTree),
                 ("mkLoadArgument", toMachineWord (mkLoadArgument: int -> codetree), Int ->> CodeTree),
                 ("mkLoadClosure", toMachineWord (mkLoadClosure: int -> codetree), Int ->> CodeTree),
                 ("mkDec", toMachineWord (mkDec: int * codetree -> codeBinding), Int ** CodeTree ->> CodeBinding),
                 ("mkInd", toMachineWord (mkInd: int * codetree -> codetree), Int ** CodeTree ->> CodeTree),
                 ("mkIf", toMachineWord (mkIf: codetree * codetree * codetree -> codetree),
                     mkProductType[CodeTree, CodeTree, CodeTree] ->> CodeTree),
                 ("mkWhile", toMachineWord (mkWhile: codetree * codetree -> codetree), CodeTree ** CodeTree ->> CodeTree),
                 ("mkLoop", toMachineWord (mkLoop: codetree list -> codetree), List CodeTree ->> CodeTree),
                 ("mkBeginLoop", toMachineWord (mkBeginLoop: codetree * (int * codetree) list -> codetree),
                     CodeTree ** List(Int ** CodeTree) ->> CodeTree),
                 ("mkEnv", toMachineWord (mkEnv: codeBinding list * codetree -> codetree),
                     List CodeBinding ** CodeTree ->> CodeTree),
                 ("mkMutualDecs", toMachineWord (mkMutualDecs: (int * codetree) list -> codeBinding),
                     List(Int ** CodeTree) ->> CodeBinding),
                 ("mkTuple", toMachineWord (mkTuple: codetree list -> codetree), List CodeTree ->> CodeTree),
                 ("mkRaise", toMachineWord (mkRaise: codetree -> codetree), CodeTree ->> CodeTree),
                 ("mkHandle", toMachineWord (mkHandle: codetree * codetree * int -> codetree),
                         mkProductType[CodeTree, CodeTree, Int] ->> CodeTree),
                 ("mkNullDec", toMachineWord (mkNullDec: codetree -> codeBinding), CodeTree ->> CodeBinding)
                 ]
         end
 
         local (* Finish off the NameSpace structure now we have types such as pretty. *)
             open TYPETREE
  
             (* The exported versions expect full name spaces as arguments.  Because we convert
                the exported versions to machineWord and give them types as data structures the
                compiler can't actually check that the type we give matched the internal type. *)
             fun makeTypeEnv NONE =
                 { lookupType = fn _ => NONE, lookupStruct = fn _ => NONE }
             |   makeTypeEnv(SOME(nameSpace: nameSpace)): printTypeEnv =
                 {
                     lookupType = fn s => case #lookupType nameSpace s of NONE => NONE | SOME t => SOME(t, NONE),
                     lookupStruct = fn s => case #lookupStruct nameSpace s of NONE => NONE | SOME t => SOME(t, NONE)
                 }
 
             local (* Values substructure.  This also has operations related to type expressions. *)
                 fun codeForValue (Value{access = Global code, class = ValBound, ...}) = code
                 |   codeForValue _ = raise Fail "Not a global value"
                 and exportedDisplayTypeExp(ty, depth, nameSpace: nameSpace option) =
                     TYPETREE.display(ty, depth, makeTypeEnv nameSpace)
                 and exportedDisplayValues(valu, depth, nameSpace: nameSpace option) =
                         displayValues(valu, depth, makeTypeEnv nameSpace)
                 and propsForValue (Value {locations, typeOf, ...}) = PTtype typeOf :: mapLocationProps locations
 
                 fun isConstructor (Value{class = Exception, ...}) = true
                 |   isConstructor (Value{class = Constructor _, ...}) = true
                 |   isConstructor _ = false
                 
                 fun isException (Value{class = Exception, ...}) = true
                 |   isException _ = false
                 
             in
                 val () = applyList (fn (name, v, t) =>
                                     #enterVal valuesEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                     [
                         ("name",    toMachineWord (valName: values -> string), valueVal ->> String),
                         ("print",   toMachineWord (printValues: values * FixedInt.int -> pretty),
                                         mkProductType[valueVal, fixedIntType] ->> PrettyType),
                         ("printWithType", toMachineWord (exportedDisplayValues: values * FixedInt.int * nameSpace option -> pretty),
                                         mkProductType[valueVal, fixedIntType, Option nameSpaceType] ->> PrettyType),
                         ("printType",    toMachineWord(exportedDisplayTypeExp: types * FixedInt.int * nameSpace option -> pretty),
                             mkProductType[Types, fixedIntType, Option nameSpaceType] ->> PrettyType),
                         ("typeof",  toMachineWord (valTypeOf: values -> types), valueVal ->> Types),
                         ("code",    toMachineWord (codeForValue: values -> codetree), valueVal ->> CodeTree),
                         ("properties",   toMachineWord (propsForValue: values ->ptProperties list),
                             valueVal ->> List PtProperties),
                         ("isConstructor", toMachineWord(isConstructor: values -> bool), valueVal ->> Bool),
                         ("isException", toMachineWord(isException: values -> bool), valueVal ->> Bool)
                     ]
             end
 
             local (* TypeConstrs substructure. *)
                 fun exportedDisplayTypeConstr(tyCons, depth, nameSpace: nameSpace option) =
                     TYPETREE.displayTypeConstrs(tyCons, depth, makeTypeEnv nameSpace)
                 and propsForTypeConstr (TypeConstrSet(TypeConstrs {locations, ...}, _)) = mapLocationProps locations
 
                 and nameForType (TypeConstrSet(TypeConstrs{name, ...}, _)) = name
             in
                 val () = applyList (fn (name, v, t) =>
                                     #enterVal typesEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                     [
                     ("name", toMachineWord(nameForType: typeConstrSet -> string), typeVal ->> String),
                     ("print",
                         toMachineWord (exportedDisplayTypeConstr: typeConstrSet * FixedInt.int * nameSpace option -> pretty),
                         mkProductType[typeVal, fixedIntType, Option nameSpaceType] ->> PrettyType),
                     ("properties",   toMachineWord (propsForTypeConstr: typeConstrSet ->ptProperties list),
                         typeVal ->> List PtProperties)
                     ]
             end
 
             local (* Structures substructure *)
                 fun exportedDisplayStructs(str, depth, nameSpace: nameSpace option) =
                     displayStructures(str, depth, makeTypeEnv nameSpace)
 
                 and codeForStruct (Struct{access = Global code, ...}) = code
                 |   codeForStruct _ = raise Fail "Not a global structure"
 
                 and propsForStruct (Struct {locations, ...}) = mapLocationProps locations
 
                 and nameForStruct (Struct{name, ...}) = name
                 
                 fun nameSpaceForStruct(baseStruct as Struct{signat=Signatures { tab, ...}, ...}): nameSpace =
                 let
                     open UNIVERSALTABLE
   
                     fun lookupVal s =
                     case univLookup (tab, valueVar, s) of
                         NONE => NONE
                     |   SOME v => SOME(makeSelectedValue(v, baseStruct))
 
                     and lookupType s =
                     case univLookup (tab, typeConstrVar, s) of
                         NONE => NONE
                     |   SOME t => SOME(makeSelectedType(t, baseStruct))
 
                     and lookupStruct s =
                     case univLookup (tab, structVar, s) of
                         NONE => NONE
                     |   SOME s => SOME(makeSelectedStructure(s, baseStruct))
                     
                     local
                         fun extractItems t tab =
                             UNIVERSALTABLE.fold
                                 (fn (s, u, l) =>
                                     if Universal.tagIs t u
                                     then (s, Universal.tagProject t u) :: l else l
                                 ) [] tab
                     in
                         fun allValues() =
                             map(fn (s, v) => (s, makeSelectedValue(v, baseStruct))) (extractItems valueVar tab)
                         and allTypes() =
                             map(fn (s, t) => (s, makeSelectedType(t, baseStruct))) (extractItems typeConstrVar tab)
                         and allStructs() =
                             map(fn (s, v) => (s, makeSelectedStructure(v, baseStruct))) (extractItems structVar tab)
                     end
 
                     fun enterFunction _ = raise Fail "updating a structure is not possible."
                     (* Raise an exception for any attempt to enter a new value.  Return
                        empty for the classes that can't exist in a structure. *)
                 in
                     {
                         lookupVal = lookupVal,
                         lookupType = lookupType,
                         lookupStruct = lookupStruct,
                         lookupFix = fn _ => NONE,
                         lookupSig = fn _ => NONE,
                         lookupFunct = fn _ => NONE,
                         
                         enterVal = enterFunction,
                         enterType = enterFunction,
                         enterFix = enterFunction,
                         enterStruct = enterFunction,
                         enterSig = enterFunction,
                         enterFunct = enterFunction,
 
                         allVal = allValues,
                         allType = allTypes,
                         allStruct = allStructs,
                         allFix = fn () => [],
                         allSig = fn () => [],
                         allFunct = fn () => []
                     }
                 end
             in
                 val () = applyList (fn (name, v, t) =>
                                 #enterVal structsEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                     [
                     ("name", toMachineWord(nameForStruct: structVals -> string), structureVal ->> String),
                     ("print",
                         toMachineWord (exportedDisplayStructs: structVals * FixedInt.int * nameSpace option -> pretty),
                         mkProductType[structureVal, fixedIntType, Option nameSpaceType] ->> PrettyType),
                     ("code",   toMachineWord (codeForStruct: structVals -> codetree), structureVal ->> CodeTree),
                     ("properties",   toMachineWord (propsForStruct: structVals ->ptProperties list),
                         structureVal ->> List PtProperties),
                     ("contents", toMachineWord(nameSpaceForStruct: structVals -> nameSpace), structureVal ->> nameSpaceType) 
                     ]
             end
             
             local (* Signatures substructure *)
                 fun exportedDisplaySigs(sign, depth, nameSpace: nameSpace option) =
                         displaySignatures(sign, depth, makeTypeEnv nameSpace)
 
                 and propsForSig (Signatures {locations, ...}) = mapLocationProps locations
 
                 and nameForSig (Signatures{name, ...}) = name
             in
                 val () = applyList (fn (name, v, t) =>
                                     #enterVal sigsEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                     [
                     ("name", toMachineWord(nameForSig: signatures -> string), signatureVal ->> String),
                     ("print",
                         toMachineWord (exportedDisplaySigs: signatures * FixedInt.int * nameSpace option -> pretty),
                         mkProductType[signatureVal, fixedIntType, Option nameSpaceType] ->> PrettyType),
                     ("properties",   toMachineWord (propsForSig: signatures ->ptProperties list),
                         signatureVal ->> List PtProperties)
                    ]
             end
 
             local (* Functors substructure *)
                 fun exportedDisplayFunctors(funct, depth, nameSpace: nameSpace option) =
                         displayFunctors(funct, depth, makeTypeEnv nameSpace)
 
                 and codeForFunct (Functor{access = Global code, ...}) = code
                 |   codeForFunct _ = raise Fail "Not a global functor"
             
                 and propsForFunctor (Functor {locations, ...}) = mapLocationProps locations
 
                 and nameForFunctor (Functor{name, ...}) = name
             in
                 val () = applyList (fn (name, v, t) =>
                                     #enterVal functsEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                     [
                     ("name", toMachineWord(nameForFunctor: functors -> string), functorVal ->> String),
                     ("print",
                         toMachineWord (exportedDisplayFunctors: functors * FixedInt.int * nameSpace option -> pretty),
                         mkProductType[functorVal, fixedIntType, Option nameSpaceType] ->> PrettyType),
                     ("code",    toMachineWord (codeForFunct: functors -> codetree), functorVal ->> CodeTree),
                     ("properties",   toMachineWord (propsForFunctor: functors ->ptProperties list),
                         functorVal ->> List PtProperties)
                    ]
             end
 
             local (* Infixes substructure *)
                 fun nameForFix(FixStatus(s, _)) = s
             in
                 val () = applyList (fn (name, v, t) =>
                                     #enterVal fixesEnv (name, mkGvar (name, t, mkConst v, declInBasis)))
                     [
                     ("name", toMachineWord(nameForFix: fixStatus -> string), fixityVal ->> String),
                     ("print",
                         toMachineWord (displayFixStatus: fixStatus -> pretty),
                         fixityVal ->> PrettyType)
                    ]
             end
         in
         end
 
     in
         ()
     end (* initGlobalEnv *);
 end;