diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sml
index b8f412d8..e39a4625 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sml
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sml
@@ -1,2558 +1,2558 @@
 (*
     Copyright (c) 2021 David C. J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     Licence version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public Licence for more details.
     
     You should have received a copy of the GNU Lesser General Public
     Licence along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 functor Arm64Assembly (
     structure Debug: DEBUG
     and       Pretty: PRETTY
     and       CodeArray: CODEARRAY
 ) : ARM64ASSEMBLY =
 
 struct
     open CodeArray Address
     
     val is32in64 = Address.wordSize = 0w4
     
     val wordsPerNativeWord: word = Address.nativeWordSize div Address.wordSize
     
     local
         (* Almost every ARM64 platform is little-endian but it is possible to
            run it in big-endian mode.  Instructions are always little-endian.
            The value of isBigEndian will be determined when the structure is
            constructed.  That's not a problem since it will be built on the
            target machine. *)
         val isBigEndian: unit -> bool = RunCall.rtsCallFast1 "PolyIsBigEndian"
     in
         val isBigEndian = isBigEndian()
     end
     
     exception InternalError = Misc.InternalError
 
     infix 5 << <<+ <<- >> >>+ >>- ~>> ~>>+ ~>>- (* Shift operators *)
     infix 3 andb orb xorb andbL orbL xorbL andb8 orb8 xorb8
     
     val op << = Word32.<< and op >> = Word32.>> and op ~>> = Word32.~>>
     and op andb = Word32.andb and op orb = Word32.orb
 
     val word32ToWord8 = Word8.fromLargeWord o Word32.toLargeWord
     and word8ToWord32 = Word32.fromLargeWord o Word8.toLargeWord
     and word32ToWord = Word.fromLargeWord o Word32.toLargeWord
     and wordToWord32 = Word32.fromLargeWord o Word.toLargeWord
     and word8ToWord = Word.fromLargeWord o Word8.toLargeWord
 
     (* The maximum positive number that will fit in a signed "bits" field. *)
     fun maxSigned bits = Word.<<(0w1, bits-0w1) - 0w1
     fun willFitInRange(offset, bits) = offset <= Word.toInt(maxSigned bits) andalso offset >= ~ (Word.toInt(maxSigned bits)) - 1
    
     (* XReg is used for fixed point registers since X0 and W0 are
        the same register. *)
     datatype xReg = XReg of Word8.word | XZero | XSP
     (* VReg is used for the floating point registers since V0, D0 and
        S0 are the same register. *)
     and vReg = VReg of Word8.word
 
     (* A Label is a ref that is later set to the location.
        Several labels can be linked together so that they are only set
        at a single point.
        Only forward jumps are linked so when we come to finally set the
        label we will have the full list. *)
     type labels = Word.word ref list ref
 
     (* Condition codes. *)
 
     (* N.B. On subtraction and comparison the ARM uses an inverted carry
        flag for borrow.  The C flag is set if there is NO borrow.
        This is the reverse of the X86. *)
     datatype condition =
         CondEqual            (* Z=1 *)
     |   CondNotEqual         (* Z=0 *)
     |   CondCarrySet         (* C=1 *)
     |   CondCarryClear       (* C=0 *)
     |   CondNegative         (* N=1 *)
     |   CondPositive         (* N=0 imcludes zero *)
     |   CondOverflow         (* V=1 *)
     |   CondNoOverflow       (* V=0 *)
     |   CondUnsignedHigher   (* C=1 && Z=0 *)
     |   CondUnsignedLowOrEq  (* ! (C=1 && Z=0) *)
     |   CondSignedGreaterEq  (* N=V *)
     |   CondSignedLess       (* N<>V *)
     |   CondSignedGreater    (* Z==0 && N=V *)
     |   CondSignedLessEq     (* !(Z==0 && N=V) *)
 
     (* The negation of a test just involves inverting the bottom bit. *)
     fun invertTest CondEqual            = CondNotEqual
     |   invertTest CondNotEqual         = CondEqual
     |   invertTest CondCarrySet         = CondCarryClear
     |   invertTest CondCarryClear       = CondCarrySet
     |   invertTest CondNegative         = CondPositive
     |   invertTest CondPositive         = CondNegative
     |   invertTest CondOverflow         = CondNoOverflow
     |   invertTest CondNoOverflow       = CondOverflow
     |   invertTest CondUnsignedHigher   = CondUnsignedLowOrEq
     |   invertTest CondUnsignedLowOrEq  = CondUnsignedHigher
     |   invertTest CondSignedGreaterEq  = CondSignedLess
     |   invertTest CondSignedLess       = CondSignedGreaterEq
     |   invertTest CondSignedGreater    = CondSignedLessEq
     |   invertTest CondSignedLessEq     = CondSignedGreater
 
     fun condToString CondEqual            = "EQ"
     |   condToString CondNotEqual         = "NE"
     |   condToString CondCarrySet         = "CS"
     |   condToString CondCarryClear       = "CC"
     |   condToString CondNegative         = "MI"
     |   condToString CondPositive         = "PL"
     |   condToString CondOverflow         = "VS"
     |   condToString CondNoOverflow       = "VC"
     |   condToString CondUnsignedHigher   = "HI"
     |   condToString CondUnsignedLowOrEq  = "LS"
     |   condToString CondSignedGreaterEq  = "GE"
     |   condToString CondSignedLess       = "LT"
     |   condToString CondSignedGreater    = "GT"
     |   condToString CondSignedLessEq     = "LE"
 
 
     (* Condition codes to binary encoding. *)
     fun cCode CondEqual           = 0wx0: Word32.word
     |   cCode CondNotEqual        = 0wx1
     |   cCode CondCarrySet        = 0wx2 (* C=1 *)
     |   cCode CondCarryClear      = 0wx3 (* C=0 *)
     |   cCode CondNegative        = 0wx4 (* N=1 *)
     |   cCode CondPositive        = 0wx5 (* N=0 imcludes zero *)
     |   cCode CondOverflow        = 0wx6 (* V=1 *)
     |   cCode CondNoOverflow      = 0wx7 (* V=0 *)
     |   cCode CondUnsignedHigher  = 0wx8 (* C=1 && Z=0 *)
     |   cCode CondUnsignedLowOrEq = 0wx9 (* ! (C=1 && Z=0) *)
     |   cCode CondSignedGreaterEq = 0wxa (* N=V *)
     |   cCode CondSignedLess      = 0wxb (* N<>V *)
     |   cCode CondSignedGreater   = 0wxc (* Z==0 && N=V *)
     |   cCode CondSignedLessEq    = 0wxd (* !(Z==0 && N=V) *)
 
 
     (* Offsets in the assembly code interface pointed at by X26
        These are in units of 64-bits NOT bytes. *)
     val heapOverflowCallOffset  = 1
     and stackOverflowCallOffset = 2
     and stackOverflowXCallOffset= 3
     and exceptionHandlerOffset  = 5
     and stackLimitOffset        = 6
     and exceptionPacketOffset   = 7
     and threadIdOffset          = 8
     and heapLimitPtrOffset      = 42
     and heapAllocPtrOffset      = 43
     and mlStackPtrOffset        = 44
 
     (* 31 in the register field can either mean the zero register or
        the hardware stack pointer.  Which meaning depends on the instruction. *)
     fun xRegOrXZ(XReg w) = w
     |   xRegOrXZ XZero = 0w31
     |   xRegOrXZ XSP = raise InternalError "XSP not valid here"
     
     and xRegOrXSP(XReg w) = w
     |   xRegOrXSP XZero = raise InternalError "XZero not valid here"
     |   xRegOrXSP XSP = 0w31
     
     (* There are cases where it isn't clear. *)
     and xRegOnly (XReg w) = w
     |   xRegOnly XZero = raise InternalError "XZero not valid here"
     |   xRegOnly XSP = raise InternalError "XSP not valid here"
 
     val X0  = XReg 0w0  and X1  = XReg 0w1  and X2 = XReg 0w2   and X3  = XReg 0w3
     and X4  = XReg 0w4  and X5  = XReg 0w5  and X6 = XReg 0w6   and X7  = XReg 0w7
     and X8  = XReg 0w8  and X9  = XReg 0w9  and X10= XReg 0w10  and X11 = XReg 0w11
     and X12 = XReg 0w12 and X13 = XReg 0w13 and X14= XReg 0w14  and X15 = XReg 0w15
     and X16 = XReg 0w16 and X17 = XReg 0w17 and X18= XReg 0w18  and X19 = XReg 0w19
     and X20 = XReg 0w20 and X21 = XReg 0w21 and X22= XReg 0w22  and X23 = XReg 0w23
     and X24 = XReg 0w24 and X25 = XReg 0w25 and X26= XReg 0w26  and X27 = XReg 0w27
     and X28 = XReg 0w28 and X29 = XReg 0w29 and X30= XReg 0w30
     
     val X_MLHeapLimit       = X25 (* ML Heap limit pointer *)
     and X_MLAssemblyInt     = X26 (* ML assembly interface pointer. *)
     and X_MLHeapAllocPtr    = X27 (* ML Heap allocation pointer. *)
     and X_MLStackPtr        = X28 (* ML Stack pointer. *)
     and X_LinkReg           = X30 (* Link reg - return address *)
     and X_Base32in64        = X24 (* X24 is used for the heap base in 32-in-64. *)
     
     fun vReg(VReg v) = v
     (* Only the first eight registers are currently used by ML. *)
     val V0  = VReg 0w0  and V1  = VReg 0w1 and V2 = VReg 0w2   and V3  = VReg 0w3
     and V4  = VReg 0w4  and V5  = VReg 0w5 and V6 = VReg 0w6   and V7  = VReg 0w7
 
     (* Some data instructions include a possible shift. *)
     datatype shiftType =
         ShiftLSL of Word8.word
     |   ShiftLSR of Word8.word
     |   ShiftASR of Word8.word
     |   ShiftNone
 
     local
         fun checkImm6 w = if w > 0w63 then raise InternalError "shift > 63" else w
     in 
         fun shiftEncode(ShiftLSL w) = (0w0, checkImm6 w)
         |   shiftEncode(ShiftLSR w) = (0w1, checkImm6 w)
         |   shiftEncode(ShiftASR w) = (0w2, checkImm6 w)
         |   shiftEncode ShiftNone   = (0w0, 0w0)
     end
 
     (* Other instructions include an extension i.e. a sign- or zero-extended
        value from one of the argument registers.  When an extension is encoded
        there can also be a left shift which applies after the extension.
        I don't understand what difference, if any, there is between UXTX
        and SXTX.
        There's no ExtNone because we need to use either UXTW or UXTX depending
        on the length *)
     datatype 'a extend =
         ExtUXTB of 'a (* Unsigned extend byte *)
     |   ExtUXTH of 'a (* Unsigned extend byte *)
     |   ExtUXTW of 'a (* Unsigned extend byte *)
     |   ExtUXTX of 'a (* Left shift *)
     |   ExtSXTB of 'a (* Sign extend byte *)
     |   ExtSXTH of 'a (* Sign extend halfword *)
     |   ExtSXTW of 'a (* Sign extend word *)
     |   ExtSXTX of 'a (* Left shift *)
 
     (* Load/store instructions have only a single bit for the shift.  For byte
        operations this is one bit shift; for others it scales by the size of
        the operand if set. *)
     datatype scale =
         ScaleOrShift
     |   NoScale
 
     local
         (* Although there are three bits it seems that the shift is limited to 0 to 4. *)
         fun checkImm3 w = if w > 0w4 then raise InternalError "extend shift > 4" else w
     in
         fun extendArithEncode(ExtUXTB w) = (0w0, checkImm3 w)
         |   extendArithEncode(ExtUXTH w) = (0w1, checkImm3 w)
         |   extendArithEncode(ExtUXTW w) = (0w2, checkImm3 w)
         |   extendArithEncode(ExtUXTX w) = (0w3, checkImm3 w)
         |   extendArithEncode(ExtSXTB w) = (0w4, checkImm3 w)
         |   extendArithEncode(ExtSXTH w) = (0w5, checkImm3 w)
         |   extendArithEncode(ExtSXTW w) = (0w6, checkImm3 w)
         |   extendArithEncode(ExtSXTX w) = (0w7, checkImm3 w)
         
         fun extendLSEncode(ExtUXTB v) = (0w0, v)
         |   extendLSEncode(ExtUXTH v) = (0w1, v)
         |   extendLSEncode(ExtUXTW v) = (0w2, v)
         |   extendLSEncode(ExtUXTX v) = (0w3, v)
         |   extendLSEncode(ExtSXTB v) = (0w4, v)
         |   extendLSEncode(ExtSXTH v) = (0w5, v)
         |   extendLSEncode(ExtSXTW v) = (0w6, v)
         |   extendLSEncode(ExtSXTX v) = (0w7, v)
     end
 
     datatype wordSize = WordSize32 | WordSize64
 
     (* Bit patterns on the ARM64 are encoded using a complicated scheme and
        only certain values can be encoded.  An element can be 2, 4, 8, 16, 32 or
        64 bits and must be a sequence of at least one zero bits followed by at
        least one one bit.  This sequence can then be rotated within the element.
        Finally the element is replicated within the register up to 32 or
        64 bits.  All this information is encoded in 13 bits.
        N.B. Bit patterns of all zeros or all ones cannot be encoded. *)
 
     (* Encode the value if it is possible. *)
     fun encodeBitPattern(value, sf (* size flag *)) =
     (* Can't encode 0 or all ones. *)
     if value = 0w0 orelse value = Word64.notb 0w0
     then NONE
     (* If this is 32-bits we can't encode all ones in the
        low-order 32-bits or any value that won't fit in 32-bits, *)
     else if sf = WordSize32 andalso value >= 0wxffffffff
     then NONE
     else
     let
         val regSize = case sf of WordSize32 => 0w32 | WordSize64 => 0w64
         (* Get the element size.  Look for the repeat of the
            pattern. *)
         fun getElemSize size =
         let
             val ns = size div 0w2
             val mask = Word64.<<(0w1, ns)  - 0w1
         in
             if Word64.andb(value, mask) <> Word64.andb(Word64.>>(value, ns), mask)
             then size
             else if ns <= 0w2
             then ns
             else getElemSize ns
         end
         val elemSize = getElemSize regSize
         fun log2 0w1 = 0w0 | log2 n = 0w1 + log2(Word.>>(n, 0w1))
         val elemBits = log2 elemSize
 
         (* Find the rotation that puts as many of the zero bits in the
            element at the top. *)
         val elemMask = Word64.>>(Word64.notb 0w0, 0w64-elemSize)
         fun ror elt =
             Word64.orb((Word64.<<(Word64.andb(elt, 0w1), elemSize-0w1),
                 Word64.>>(elt, 0w1)))
         and rol elt =
             Word64.orb(Word64.andb(elemMask, Word64.<<(elt, 0w1)),
                 Word64.>>(elt, elemSize-0w1))
 
         fun findRotation(v, n) =
             if ror v < v then findRotation(ror v, (n-0w1) mod elemSize)
             else if rol v < v then findRotation(rol v, n+0w1)
             else (v, n)
 
         val (rotated, rotation) = findRotation(Word64.andb(value, elemMask), 0w0)
 
         (* Count out the low order ones.  If the result is zero
            then we;ve got a valid sequence of zeros followed by ones
            but if we discover a zero bit and the result isn't zero
            then we can't encode this. *)
         fun countLowOrderOnes(v, n) =
             if v = 0w0
             then SOME n
             else if Word64.andb(v, 0w1) = 0w1
             then countLowOrderOnes(Word64.>>(v, 0w1), n+0w1)
             else NONE
      in
         case countLowOrderOnes(rotated, 0w0) of
             NONE => NONE
         |   SOME lowOrderOnes =>
             let
                 (* Encode the element size. *)
                 val elemSizeEnc = 0wx7f - (Word.<<(0w1, elemBits+0w1) - 0w1)
                 val n = if Word.andb(elemSizeEnc, 0wx40) = 0w0 then 0w1 else 0w0
                 val imms = Word.andb(Word.orb(elemSizeEnc, lowOrderOnes-0w1), 0wx3f)
             in
                 SOME{n=n, imms=imms, immr=rotation}
             end
     end;
 
     (* Decode a pattern for printing. *)
     fun decodeBitPattern{sf, n, immr, imms} =
     let
         (* Find the highest bit set in N:NOT(imms) *)
         fun highestBitSet 0w0 = 0
         |   highestBitSet n = 1+highestBitSet(Word32.>>(n, 0w1))
         val len = highestBitSet(Word32.orb(Word32.<<(n, 0w6), Word32.xorb(imms, 0wx3f))) - 1
         val _ = if len < 0 then raise InternalError "decodeBitPattern: invalid" else ()
         val size = Word32.<<(0w1, Word.fromInt len)
         val r = Word32.andb(immr, size-0w1)
         and s = Word32.andb(imms, size-0w1)
         val _ = if s = size-0w1 then raise InternalError "decodeBitPattern: invalid" else ()
         val pattern = Word64.<<(0w1, word32ToWord(s+0w1)) - 0w1
         (* Rotate right: shift left and put the top bit in the high order bit*)
         fun ror elt =
             Word64.orb((Word64.<<(Word64.andb(elt, 0w1), word32ToWord(size-0w1)),
                 Word64.>>(elt, 0w1)))
 
         fun rotateBits(value, 0w0) = value
         |   rotateBits(value, n) = rotateBits(ror value, n-0w1)
 
         val rotated = rotateBits(pattern, r)
 
         val regSize = if sf = 0w0 then 0w32 else 0w64
 
         (* Replicate the rotated pattern to fill the register. *)
         fun replicate(pattern, size) =
             if size >= regSize
             then pattern
             else replicate(Word64.orb(pattern, Word64.<<(pattern, word32ToWord size)), size * 0w2)
     in
         replicate(rotated, size)
     end
 
     val isEncodableBitPattern = isSome o encodeBitPattern
 
 
     datatype instr =
         SimpleInstr of Word32.word
     |   LoadAddressLiteral of {reg: xReg, value: machineWord, length: brLength ref}
     |   LoadNonAddressLiteral of {reg: xReg, value: Word64.word, length: brLength ref}
     |   Label of labels
     |   UnconditionalBranch of {label: labels, andLink: bool}
     |   ConditionalBranch of { label: labels, jumpCondition: condition, length: brLength ref }
     |   LoadLabelAddress of { label: labels, reg: xReg, length: brLength ref }
     |   TestBitBranch of { label: labels, bitNo: Word8.word, brNonZero: bool, reg: xReg, length: brLength ref }
     |   CompareBranch of { label: labels, brNonZero: bool, size: wordSize, reg: xReg, length: brLength ref }
     
     and brLength = BrShort | BrExtended
 
     val nopCode  = 0wxD503201F
     and undefCode = 0wx00000000 (* Permanently undefined instruction. *)
 
     (* Add/subtract an optionally shifted 12-bit immediate (i.e. constant) to/from a register.
        The constant is zero-extended.  The versions that do not set the flags can use XSP as
        the destination; the versions that use the signs can use XZero as the destination i.e.
        they discard the result and act as a comparison. *)
     local
         fun addSubRegImmediate(sf, oper, s, xdOp) ({regN, regD, immed, shifted}) =
         let
             val () =
                 if immed >= 0wx1000 then raise InternalError "addSubRegImmediate: immed > 12 bits" else ()
         in
             SimpleInstr(
                 0wx11000000 orb (sf << 0w31) orb (oper << 0w30) orb (s << 0w29) orb
                 (if shifted then 0wx400000 else 0w0) orb
                 (wordToWord32 immed << 0w10) orb (word8ToWord32(xRegOrXSP regN) << 0w5) orb
                 word8ToWord32(xdOp regD))
         end
     in
         val addImmediate = addSubRegImmediate(0w1, 0w0, 0w0, xRegOrXSP)
         and addSImmediate = addSubRegImmediate(0w1, 0w0, 0w1, xRegOrXZ)
         and subImmediate = addSubRegImmediate(0w1, 0w1, 0w0, xRegOrXSP)
         and subSImmediate = addSubRegImmediate(0w1, 0w1, 0w1, xRegOrXZ)
         and addImmediate32 = addSubRegImmediate(0w0, 0w0, 0w0, xRegOrXSP)
         and addSImmediate32 = addSubRegImmediate(0w0, 0w0, 0w1, xRegOrXZ)
         and subImmediate32 = addSubRegImmediate(0w0, 0w1, 0w0, xRegOrXSP)
         and subSImmediate32 = addSubRegImmediate(0w0, 0w1, 0w1, xRegOrXZ)
     end
 
     (* Add/subtract a shifted register, optionally setting the flags. *)
     local
         (* X31 is XZ here unlike the extended version.*)
         fun addSubtractShiftedReg (sf, oper, s) ({regM, regN, regD, shift}) =
         let
             val (shift, imm6) = shiftEncode shift
         in
             SimpleInstr(0wx0b000000 orb (sf << 0w31) orb (oper << 0w30) orb (s << 0w29) orb
                 (shift << 0w22) orb (word8ToWord32(xRegOnly regM) << 0w16) orb
                 (word8ToWord32 imm6 << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb
                 word8ToWord32(xRegOrXZ regD))
         end
     in
         val addShiftedReg = addSubtractShiftedReg(0w1, 0w0, 0w0)
         and addSShiftedReg = addSubtractShiftedReg(0w1, 0w0, 0w1)
         and subShiftedReg = addSubtractShiftedReg(0w1, 0w1, 0w0)
         and subSShiftedReg = addSubtractShiftedReg(0w1, 0w1, 0w1)
         and addShiftedReg32 = addSubtractShiftedReg(0w0, 0w0, 0w0)
         and addSShiftedReg32 = addSubtractShiftedReg(0w0, 0w0, 0w1)
         and subShiftedReg32 = addSubtractShiftedReg(0w0, 0w1, 0w0)
         and subSShiftedReg32 = addSubtractShiftedReg(0w0, 0w1, 0w1)
     end
 
     (* Add/subtract an extended register, optionally setting the flags. *)
     local
         (* SP can be used as Xn and also for Xd for the non-flags versions. *)
         fun addSubtractExtendedReg (sf, oper, s, opt, xD) ({regM, regN, regD, extend}) =
         let
             val (option, imm3) = extendArithEncode extend
         in
             SimpleInstr(0wx0b200000 orb (sf << 0w31) orb (oper << 0w30) orb (s << 0w29) orb
                 (opt << 0w22) orb (word8ToWord32(xRegOnly regM) << 0w16) orb
                 (option << 0w13) orb (word8ToWord32 imm3 << 0w10) orb
                 (word8ToWord32(xRegOrXSP regN) << 0w5) orb
                 word8ToWord32(xD regD))
         end
     in
         val addExtendedReg = addSubtractExtendedReg(0w1, 0w0, 0w0, 0w0, xRegOrXSP)
         and addSExtendedReg = addSubtractExtendedReg(0w1, 0w0, 0w1, 0w0, xRegOrXZ)
         and subExtendedReg = addSubtractExtendedReg(0w1, 0w1, 0w0, 0w0, xRegOrXSP)
         and subSExtendedReg = addSubtractExtendedReg(0w1, 0w1, 0w1, 0w0, xRegOrXZ)
     end
 
     (* Logical operations on a shifted register. *)
     local
         fun logicalShiftedReg (sf, oper, n) ({regM, regN, regD, shift}) =
         let
             val (shift, imm6) = shiftEncode shift
         in
             SimpleInstr(0wx0a000000 orb (sf << 0w31) orb (oper << 0w29) orb
                 (shift << 0w22) orb (n << 0w21) orb (word8ToWord32(xRegOrXZ regM) << 0w16) orb
                 (word8ToWord32 imm6 << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb
                 word8ToWord32(xRegOrXZ regD))
         end
     in
         val andShiftedReg = logicalShiftedReg(0w1, 0w0, 0w0)
         and orrShiftedReg = logicalShiftedReg(0w1, 0w1, 0w0)
         and eorShiftedReg = logicalShiftedReg(0w1, 0w2, 0w0)
         and andsShiftedReg = logicalShiftedReg(0w1, 0w3, 0w0)
         val andShiftedReg32 = logicalShiftedReg(0w0, 0w0, 0w0)
         and orrShiftedReg32 = logicalShiftedReg(0w0, 0w1, 0w0)
         and eorShiftedReg32 = logicalShiftedReg(0w0, 0w2, 0w0)
         and andsShiftedReg32 = logicalShiftedReg(0w0, 0w3, 0w0)
         (* There are also versions that operate with an inverted version
            of the argument. *)
     end
 
     (* Two-source operations. *)
     local
         fun twoSourceInstr (sf, s, opcode) ({regM, regN, regD}) =
             SimpleInstr(0wx1ac00000 orb (sf << 0w31) orb (s << 0w29) orb
                 (word8ToWord32(xRegOnly regM) << 0w16) orb (opcode << 0w10) orb
                 (word8ToWord32(xRegOnly regN) << 0w5) orb
                 word8ToWord32(xRegOnly regD))
     in
         (* Signed and unsigned division. *)
         val unsignedDivide   = twoSourceInstr(0w1, 0w0, 0wx2)
         and signedDivide     = twoSourceInstr(0w1, 0w0, 0wx3)
         and unsignedDivide32 = twoSourceInstr(0w0, 0w0, 0wx2)
         and signedDivide32   = twoSourceInstr(0w0, 0w0, 0wx3)
         (* Logical shift left Rd = Rn << (Rm mod 0w64) *)
         and logicalShiftLeftVariable = twoSourceInstr(0w1, 0w0, 0wx8)
         (* Logical shift right Rd = Rn >> (Rm mod 0w64) *)
         and logicalShiftRightVariable = twoSourceInstr(0w1, 0w0, 0wx9)
         (* Arithmetic shift right Rd = Rn ~>> (Rm mod 0w64) *)
         and arithmeticShiftRightVariable = twoSourceInstr(0w1, 0w0, 0wxa)
         and logicalShiftLeftVariable32 = twoSourceInstr(0w0, 0w0, 0wx8)
         and logicalShiftRightVariable32 = twoSourceInstr(0w0, 0w0, 0wx9)
         and arithmeticShiftRightVariable32 = twoSourceInstr(0w0, 0w0, 0wxa)
     end
 
     (* Three source operations.  These are all variations of multiply. *)
     local
         fun threeSourceInstr (sf, op54, op31, o0) ({regM, regA, regN, regD}) =
             SimpleInstr(0wx1b000000 orb (sf << 0w31) orb (op54 << 0w29) orb
                 (op31 << 0w21) orb (word8ToWord32(xRegOnly regM) << 0w16) orb
                 (o0 << 0w15) orb (word8ToWord32(xRegOrXZ regA) << 0w10) orb
                 (word8ToWord32(xRegOnly regN) << 0w5) orb
                 word8ToWord32(xRegOnly regD))
     in
         (* regD = regA + regN * regM *)
         val multiplyAndAdd = threeSourceInstr(0w1, 0w0, 0w0, 0w0)
         (* regD = regA - regN * regM *)
         and multiplyAndSub = threeSourceInstr(0w1, 0w0, 0w0, 0w1)
         and multiplyAndAdd32 = threeSourceInstr(0w0, 0w0, 0w0, 0w0)
         and multiplyAndSub32 = threeSourceInstr(0w0, 0w0, 0w0, 0w1)
         (* Multiply two 32-bit quantities and add/subtract a 64-bit quantity. *)
         and signedMultiplyAndAddLong = threeSourceInstr(0w1, 0w0, 0w1, 0w0)
         and signedMultiplyAndSubLong = threeSourceInstr(0w1, 0w0, 0w1, 0w1)
         (* Return the high-order part of a signed multiplication. *)
         fun signedMultiplyHigh({regM, regN, regD}) =
             threeSourceInstr(0w1, 0w0, 0w2, 0w0) { regM=regM, regN=regN, regD=regD, regA=XZero}
     end
 
     (* Loads: There are two versions of this on the ARM.  There is a version that
        takes a signed 9-bit byte offset and a version that takes an unsigned
        12-bit word offset. *)
     
     local
         fun loadStoreRegScaled (size, v, opc, xD) ({regT, regN, unitOffset}) =
         let
             val _ = (unitOffset >= 0 andalso unitOffset < 0x1000)
                 orelse raise InternalError "loadStoreRegScaled: value out of range"
         in
             SimpleInstr(0wx39000000 orb (size << 0w30) orb (opc << 0w22) orb
                 (v << 0w26) orb (Word32.fromInt unitOffset << 0w10) orb
                 (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(xD regT))
         end
     in
         val loadRegScaled = loadStoreRegScaled(0w3, 0w0, 0w1, xRegOrXZ)
         and storeRegScaled = loadStoreRegScaled(0w3, 0w0, 0w0, xRegOrXZ)
         (* (Unsigned) byte operations.  There are also signed versions. *)
         and loadRegScaledByte = loadStoreRegScaled (0w0, 0w0, 0w1, xRegOrXZ)
         and storeRegScaledByte = loadStoreRegScaled (0w0, 0w0, 0w0, xRegOrXZ)
         and loadRegScaled16 = loadStoreRegScaled (0w1, 0w0, 0w1, xRegOrXZ)
         and storeRegScaled16 = loadStoreRegScaled (0w1, 0w0, 0w0, xRegOrXZ)
         and loadRegScaled32 = loadStoreRegScaled (0w2, 0w0, 0w1, xRegOrXZ)
         and storeRegScaled32 = loadStoreRegScaled (0w2, 0w0, 0w0, xRegOrXZ)
         and loadRegScaledDouble = loadStoreRegScaled(0w3, 0w1, 0w1, vReg)
         and storeRegScaledDouble = loadStoreRegScaled(0w3, 0w1, 0w0, vReg)
         and loadRegScaledFloat = loadStoreRegScaled(0w2, 0w1, 0w1, vReg)
         and storeRegScaledFloat = loadStoreRegScaled(0w2, 0w1, 0w0, vReg)
     end    
 
     local
         (* Loads and stores with a signed byte offset.  This includes simple
            unscaled addresses, pre-indexing and post-indexing. *)
         fun loadStoreByteAddress (op4, xD) (size, v, opc) ({regT, regN, byteOffset}) =
         let
             val _ = (byteOffset >= ~256 andalso byteOffset < 256)
                 orelse raise InternalError "loadStoreUnscaled: value out of range"
             val imm9 = Word32.fromInt byteOffset andb 0wx1ff
         in
             SimpleInstr(0wx38000000 orb (size << 0w30) orb (opc << 0w22) orb
                 (v << 0w26) orb (imm9 << 0w12) orb (op4 << 0w10) orb
                 (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(xD regT))
         end
         
         val loadStoreUnscaled = loadStoreByteAddress (0w0, xRegOrXZ)
         and loadStoreUnscaledSIMD = loadStoreByteAddress (0w0, vReg)
         and loadStorePostIndex = loadStoreByteAddress (0w1, xRegOrXZ)
         and loadStorePreIndex = loadStoreByteAddress (0w3, xRegOrXZ)
     in
         val loadRegUnscaled = loadStoreUnscaled (0w3, 0w0, 0w1)
         and storeRegUnscaled = loadStoreUnscaled (0w3, 0w0, 0w0)
         (* (Unsigned) byte operations.  There are also signed versions. *)
         and loadRegUnscaledByte = loadStoreUnscaled (0w0, 0w0, 0w1)
         and loadRegUnscaledSignedByteTo64 = loadStoreUnscaled (0w0, 0w0, 0w2)
         and loadRegUnscaledSignedByteTo32 = loadStoreUnscaled (0w0, 0w0, 0w3)
         and storeRegUnscaledByte = loadStoreUnscaled (0w0, 0w0, 0w0)
         and loadRegUnscaled16 = loadStoreUnscaled (0w1, 0w0, 0w1)
         and loadRegUnscaledSigned16To64 = loadStoreUnscaled (0w1, 0w0, 0w2)
         and loadRegUnscaledSigned16To32 = loadStoreUnscaled (0w1, 0w0, 0w3)
         and storeRegUnscaled16 = loadStoreUnscaled (0w1, 0w0, 0w0)
         and loadRegUnscaled32 = loadStoreUnscaled (0w2, 0w0, 0w1)
         and loadRegUnscaledSigned32To64 = loadStoreUnscaled (0w2, 0w0, 0w2)
         and storeRegUnscaled32 = loadStoreUnscaled (0w2, 0w0, 0w0)
         and loadRegUnscaledFloat = loadStoreUnscaledSIMD (0w2, 0w1, 0w1)
         and storeRegUnscaledFloat = loadStoreUnscaledSIMD (0w2, 0w1, 0w0)
         and loadRegUnscaledDouble = loadStoreUnscaledSIMD (0w3, 0w1, 0w1)
         and storeRegUnscaledDouble = loadStoreUnscaledSIMD (0w3, 0w1, 0w0)
 
         val loadRegPostIndex = loadStorePostIndex (0w3, 0w0, 0w1)
         and storeRegPostIndex = loadStorePostIndex (0w3, 0w0, 0w0)
         and loadRegPostIndex32 = loadStorePostIndex (0w2, 0w0, 0w1)
         and storeRegPostIndex32 = loadStorePostIndex (0w2, 0w0, 0w0)
         and loadRegPostIndexByte = loadStorePostIndex (0w0, 0w0, 0w1)
         and storeRegPostIndexByte = loadStorePostIndex (0w0, 0w0, 0w0)
 
         val loadRegPreIndex = loadStorePreIndex (0w3, 0w0, 0w1)
         and storeRegPreIndex = loadStorePreIndex (0w3, 0w0, 0w0)
         and loadRegPreIndex32 = loadStorePreIndex (0w2, 0w0, 0w1)
         and storeRegPreIndex32 = loadStorePreIndex (0w2, 0w0, 0w0)
         and loadRegPreIndexByte = loadStorePreIndex (0w0, 0w0, 0w1)
         and storeRegPreIndexByte = loadStorePreIndex (0w0, 0w0, 0w0)
     end
 
     (* Load/store with a register offset i.e. an index register. *)
     local
         fun loadStoreRegRegisterOffset (size, v, opc, xD) ({regT, regN, regM, option}) =
         let
             val (opt, s) =
                 case extendLSEncode option of
                     (opt, ScaleOrShift) => (opt, 0w1) | (opt, NoScale) => (opt, 0w0)
         in
             SimpleInstr(0wx38200800 orb (size << 0w30) orb (v << 0w26) orb (opc << 0w22) orb
                 (word8ToWord32(xRegOnly regM) << 0w16) orb (opt << 0w13) orb (s << 0w12) orb
                 (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(xD regT))
         end
     in
         val loadRegIndexed = loadStoreRegRegisterOffset(0w3, 0w0, 0w1, xRegOrXZ)
         and storeRegIndexed = loadStoreRegRegisterOffset(0w3, 0w0, 0w0, xRegOrXZ)
         and loadRegIndexedByte = loadStoreRegRegisterOffset(0w0, 0w0, 0w1, xRegOrXZ)
         and storeRegIndexedByte = loadStoreRegRegisterOffset(0w0, 0w0, 0w0, xRegOrXZ)
         and loadRegIndexed16 = loadStoreRegRegisterOffset(0w1, 0w0, 0w1, xRegOrXZ)
         and storeRegIndexed16 = loadStoreRegRegisterOffset(0w1, 0w0, 0w0, xRegOrXZ)
         and loadRegIndexed32 = loadStoreRegRegisterOffset(0w2, 0w0, 0w1, xRegOrXZ)
         and storeRegIndexed32 = loadStoreRegRegisterOffset(0w2, 0w0, 0w0, xRegOrXZ)
         and loadRegIndexedFloat = loadStoreRegRegisterOffset(0w2, 0w1, 0w1, vReg)
         and storeRegIndexedFloat = loadStoreRegRegisterOffset(0w2, 0w1, 0w0, vReg)
         and loadRegIndexedDouble = loadStoreRegRegisterOffset(0w3, 0w1, 0w1, vReg)
         and storeRegIndexedDouble = loadStoreRegRegisterOffset(0w3, 0w1, 0w0, vReg)
     end
 
     local
         (* Loads and stores with special ordering. *)
         fun loadStoreExclusive(size, o2, l, o1, o0) {regS, regT2, regN, regT} =
             SimpleInstr(0wx08000000 orb (size << 0w30) orb (o2 << 0w23) orb (l << 0w22) orb
             
                 (o1 << 0w21) orb (word8ToWord32(xRegOrXZ regS) << 0w16) orb (o0 << 0w15) orb
                 (word8ToWord32(xRegOrXZ regT2) << 0w10) orb (word8ToWord32(xRegOrXSP regN) << 0w5) orb
                  word8ToWord32(xRegOrXZ regT))
     in
         fun loadAcquire{regN, regT} =
             loadStoreExclusive(0w3, 0w1, 0w1, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
         and storeRelease{regN, regT} =
             loadStoreExclusive(0w3, 0w1, 0w0, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
         and loadAcquire32{regN, regT} =
             loadStoreExclusive(0w2, 0w1, 0w1, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
         and storeRelease32{regN, regT} =
             loadStoreExclusive(0w2, 0w1, 0w0, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
         and loadAcquireByte{regN, regT} =
             loadStoreExclusive(0w0, 0w1, 0w1, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
         and storeReleaseByte{regN, regT} =
             loadStoreExclusive(0w0, 0w1, 0w0, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
 
         (* Acquire exclusive access to a memory location and load its current value *)
         and loadAcquireExclusiveRegister{regN, regT} =
             loadStoreExclusive(0w3, 0w0, 0w1, 0w0, 0w1) {regS=XZero, regT2=XZero, regN=regN, regT=regT}
         (* Release exclusive access and test whether it succeeded.  Sets regS to 0
            if successful otherwise 1, in which case we have to repeat the operation. *)
         and storeReleaseExclusiveRegister{regN, regS, regT} =
             loadStoreExclusive(0w3, 0w0, 0w0, 0w0, 0w1) {regS=regS, regT2=XZero, regN=regN, regT=regT}
     end
 
     local
         (* Load and store pairs.  The offsets are signed scaled values. *)
         fun loadStorePair op2 (opc, v, l, rT) {regT1, regT2, regN, unitOffset} =
         let
             val _ = (unitOffset >= ~64 andalso unitOffset < 64)
                 orelse raise InternalError "loadStorePair: value out of range"
             val imm7 = Word32.fromInt unitOffset andb 0wx7f
         in
            SimpleInstr(0wx28000000 orb (opc << 0w30) orb (v << 0w26) orb (op2 << 0w23) orb
             (l << 0w22) orb (imm7 << 0w15) orb (word8ToWord32(rT regT2) << 0w10) orb
             (word8ToWord32(xRegOrXSP regN) << 0w5) orb word8ToWord32(rT regT1))
         end
         
         fun loadStorePairOffset args = loadStorePair 0w2 args
         and loadStorePairPostIndexed args = loadStorePair 0w1 args
         and loadStorePairPreIndexed args = loadStorePair 0w3 args
     in
         val storePairOffset = loadStorePairOffset(0w2, 0w0, 0w0, xRegOrXZ)
         and loadPairOffset =  loadStorePairOffset(0w2, 0w0, 0w1, xRegOrXZ)
         and storePairPostIndexed = loadStorePairPostIndexed(0w2, 0w0, 0w0, xRegOrXZ)
         and loadPairPostIndexed =  loadStorePairPostIndexed(0w2, 0w0, 0w1, xRegOrXZ)
         and storePairPreIndexed = loadStorePairPreIndexed(0w2, 0w0, 0w0, xRegOrXZ)
         and loadPairPreIndexed =  loadStorePairPreIndexed(0w2, 0w0, 0w1, xRegOrXZ)
         
         and storePairOffset32 = loadStorePairOffset(0w0, 0w0, 0w0, xRegOrXZ)
         and loadPairOffset32 =  loadStorePairOffset(0w0, 0w0, 0w1, xRegOrXZ)
         and storePairPostIndexed32 = loadStorePairPostIndexed(0w0, 0w0, 0w0, xRegOrXZ)
         and loadPairPostIndexed32 =  loadStorePairPostIndexed(0w0, 0w0, 0w1, xRegOrXZ)
         and storePairPreIndexed32 = loadStorePairPreIndexed(0w0, 0w0, 0w0, xRegOrXZ)
         and loadPairPreIndexed32 =  loadStorePairPreIndexed(0w0, 0w0, 0w1, xRegOrXZ)
 
         and storePairOffsetFloat = loadStorePairOffset(0w0, 0w1, 0w0, vReg)
         and loadPairOffsetFloat = loadStorePairOffset(0w0, 0w1, 0w1, vReg)
         and storePairPostIndexedFloat = loadStorePairPostIndexed(0w0, 0w1, 0w0, vReg)
         and loadPairPostIndexedFloat = loadStorePairPostIndexed(0w0, 0w1, 0w1, vReg)
         and storePairPreIndexedFloat = loadStorePairPreIndexed(0w0, 0w1, 0w0, vReg)
         and loadPairPreIndexedFloat = loadStorePairPreIndexed(0w0, 0w1, 0w1, vReg)
 
         and storePairOffsetDouble = loadStorePairOffset(0w0, 0w1, 0w0, vReg)
         and loadPairOffsetDouble = loadStorePairOffset(0w0, 0w1, 0w1, vReg)
         and storePairPostIndexedDouble = loadStorePairPostIndexed(0w1, 0w1, 0w0, vReg)
         and loadPairPostIndexedDouble = loadStorePairPostIndexed(0w1, 0w1, 0w1, vReg)
         and storePairPreIndexedDouble = loadStorePairPreIndexed(0w1, 0w1, 0w0, vReg)
         and loadPairPreIndexedDouble = loadStorePairPreIndexed(0w1, 0w1, 0w1, vReg)
     end
 
     (* Addresses must go in the constant area at the end of the code where they
        can be found by the GC. *)
     fun loadAddressConstant(xReg, valu) =
         LoadAddressLiteral{reg=xReg, value=valu, length=ref BrExtended}
 
     (* Non-address constants.  These may or may not be tagged values. *)
     fun loadNonAddressConstant(xReg, valu) =
         LoadNonAddressLiteral{reg=xReg, value=valu, length=ref BrExtended}
 
     local
         fun moveWideImmediate(sf, opc) {regD, immediate, shift} =
         let
             val hw =
                 case (shift, sf) of
                     (0w0, _) => 0w0
                 |   (0w16, _) => 0w1
                 |   (0w24, 0w1) => 0w2
                 |   (0w48, 0w1) => 0w3
                 |   _ => raise InternalError "moveWideImmediate: invalid shift"
             val _ =
                 immediate <= 0wxffff orelse raise InternalError "moveWideImmediate: immediate too large"
         in
             SimpleInstr(0wx12800000 orb (sf << 0w31) orb (opc << 0w29) orb
                 (hw << 0w21) orb (wordToWord32 immediate << 0w5) orb word8ToWord32(xRegOnly regD))
         end
     in
         val moveNot32 = moveWideImmediate(0w0, 0w0)
         and moveZero32 = moveWideImmediate(0w0, 0w2)
         and moveKeep32 = moveWideImmediate(0w0, 0w3)
         and moveNot = moveWideImmediate(0w1, 0w0)
         and moveZero = moveWideImmediate(0w1, 0w2)
         and moveKeep = moveWideImmediate(0w1, 0w3)
     end
 
     (* Instructions involved in thread synchonisation. *)
     val yield = SimpleInstr 0wxD503203F (* Yield inside a spin-lock. *)
     and dmbIsh = SimpleInstr 0wxD5033BBF (* Memory barrier. *)
     
     (* Jump to the address in the register and put the address of the
        next instruction into X30. *)
     fun branchAndLinkReg(dest) =
         SimpleInstr(0wxD63F0000 orb (word8ToWord32(xRegOnly dest) << 0w5))
 
     (* Jump to the address in the register. *)
     fun branchRegister(dest) =
         SimpleInstr(0wxD61F0000 orb (word8ToWord32(xRegOnly dest) << 0w5))
 
     (* Jump to the address in the register and hint this is a return. *)
     fun returnRegister(dest) =
         SimpleInstr(0wxD65F0000 orb (word8ToWord32(xRegOnly dest) << 0w5))
 
     (* Put a label into the code. *)
     val setLabel = Label
 
     (* Create a label. *)
     fun createLabel () = ref [ref 0w0]
 
     (* A conditional or unconditional branch. *)
     and conditionalBranch(cond, label) = ConditionalBranch{label=label, jumpCondition=cond, length=ref BrExtended }
     and unconditionalBranch label = UnconditionalBranch{label=label, andLink=false}
     and branchAndLink label = UnconditionalBranch{label=label, andLink=true}
     (* Put the address of a label into a register - used for handlers and cases. *)
     and loadLabelAddress(reg, label) = LoadLabelAddress{label=label, reg=reg, length=ref BrExtended}
     (* Test a bit in a register and branch if zero/nonzero *)
     and testBitBranchZero(reg, bit, label) =
         TestBitBranch{label=label, bitNo=bit, brNonZero=false, reg=reg, length=ref BrExtended}
     and testBitBranchNonZero(reg, bit, label) =
         TestBitBranch{label=label, bitNo=bit, brNonZero=true, reg=reg, length=ref BrExtended}
     (* Compare a register with zero and branch if zero/nonzero *)
     and compareBranchZero(reg,  label) =
         CompareBranch{label=label, brNonZero=false, size=WordSize64, reg=reg, length=ref BrExtended}
     and compareBranchNonZero(reg, label) =
         CompareBranch{label=label, brNonZero=true, size=WordSize64, reg=reg, length=ref BrExtended}
     and compareBranchZero32(reg, label) =
         CompareBranch{label=label, brNonZero=false, size=WordSize32, reg=reg, length=ref BrExtended}
     and compareBranchNonZero32(reg, label) =
         CompareBranch{label=label, brNonZero=true, size=WordSize32, reg=reg, length=ref BrExtended}
     
 
     (* Set the destination register to the value of the first reg if the
        condition is true otherwise to a, possibly modified, version of
        the second argument.  There are variants that set it unmodified,
        incremented, inverted and negated. *)
     local
         fun conditionalSelect (sf, opc, op2) {regD, regFalse, regTrue, cond} =
             SimpleInstr(0wx1A800000 orb (sf << 0w31) orb (opc << 0w30) orb
                 (word8ToWord32(xRegOrXZ regFalse) << 0w16) orb (cCode cond << 0w12) orb
                 (op2 << 0w10) orb (word8ToWord32(xRegOrXZ regTrue) << 0w5) orb
                 word8ToWord32(xRegOrXZ regD))
     in
         val conditionalSet = conditionalSelect(0w1, 0w0, 0w0)
         and conditionalSetIncrement = conditionalSelect(0w1, 0w0, 0w1)
         and conditionalSetInverted = conditionalSelect(0w1, 0w1, 0w0)
         and conditionalSetNegated = conditionalSelect(0w1, 0w1, 0w1)
         and conditionalSet32 = conditionalSelect(0w0, 0w0, 0w0)
         and conditionalSetIncrement32 = conditionalSelect(0w0, 0w0, 0w1)
         and conditionalSetInverted32 = conditionalSelect(0w0, 0w1, 0w0)
         and conditionalSetNegated32 = conditionalSelect(0w0, 0w1, 0w1)
     end
 
     (* This combines the effect of a left and right shift.  There are various
        derived forms of this depending on the relative values of immr and imms.
        if imms >= immr copies imms-immr-1 bits from bit position immr to the lsb
        bits of the destination.
        if imms < immr copies imms+1 bits from the lsb bit to bit position
        regsize-immr.
        How the remaining bits are affected depends on the instruction.
        BitField instructions do not affect other bits.
        UnsignedBitField instructions zero other bits.
        SignedBitField instructions set the high order bits to a copy of
        the high order bit copied and zero the low order bits. *)
     local
         fun bitfield (sf, opc, n) {immr, imms, regN, regD} =
             SimpleInstr(0wx13000000 orb (sf << 0w31) orb (opc << 0w29) orb (n << 0w22) orb
                 (wordToWord32 immr << 0w16) orb (wordToWord32 imms << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb
                 word8ToWord32(xRegOrXZ regD))
 
     in
         val signedBitfieldMove32 = bitfield(0w0, 0w0, 0w0)
         and bitfieldMove32 = bitfield(0w0, 0w1, 0w0)
         and unsignedBitfieldMove32 = bitfield(0w0, 0w2, 0w0)
         and signedBitfieldMove64 = bitfield(0w1, 0w0, 0w1)
         and bitfieldMove64 = bitfield(0w1, 0w1, 0w1)
         and unsignedBitfieldMove64 = bitfield(0w1, 0w2, 0w1)
 
         (* Derived forms. *)
         fun logicalShiftLeft{shift, regN, regD} =
                 unsignedBitfieldMove64{immr=Word.~ shift mod 0w64,
                     imms=0w64-0w1-shift, regN=regN, regD=regD}
         and logicalShiftLeft32{shift, regN, regD} =
                 unsignedBitfieldMove32{immr=Word.~ shift mod 0w32,
                     imms=0w32-0w1-shift, regN=regN, regD=regD}
 
         and logicalShiftRight{shift, regN, regD} =
                 unsignedBitfieldMove64{immr=shift, imms=0wx3f, regN=regN, regD=regD}
         and logicalShiftRight32{shift, regN, regD} =
                 unsignedBitfieldMove32{immr=shift, imms=0wx1f, regN=regN, regD=regD}
 
         and unsignedBitfieldInsertinZeros{lsb, width, regN, regD} =
                 unsignedBitfieldMove64{immr=Word.~ lsb mod 0w64,
                     imms=width-0w1, regN=regN, regD=regD}
         and unsignedBitfieldInsertinZeros32{lsb, width, regN, regD} =
                 unsignedBitfieldMove32{immr=Word.~ lsb mod 0w32,
                     imms=width-0w1, regN=regN, regD=regD}
 
         and arithmeticShiftRight{shift, regN, regD} =
                 signedBitfieldMove64{immr=shift, imms=0wx3f, regN=regN, regD=regD}
         and arithmeticShiftRight32{shift, regN, regD} =
                 signedBitfieldMove32{immr=shift, imms=0wx1f, regN=regN, regD=regD}
         and signedBitfieldExtract{lsb, width, regN, regD} =
                 signedBitfieldMove64{immr=lsb, imms=lsb+width-0w1, regN=regN, regD=regD}
 
         and bitfieldInsert{lsb, width, regN, regD} =
                 bitfieldMove64{immr=Word.~ lsb mod 0w64, imms=width-0w1, regN=regN, regD=regD}
         and bitfieldInsert32{lsb, width, regN, regD} =
                 bitfieldMove32{immr=Word.~ lsb mod 0w32, imms=width-0w1, regN=regN, regD=regD}
     end
 
     local
         (* Logical immediates.  AND, OR, XOR and ANDS.  Assumes that the immediate value
            has already been checked as valid.  The non-flags versions can use SP as the
            destination. *)
         fun logicalImmediate (s, opc, xD) {bits, regN, regD} =
         let
             val {n, imms, immr} = 
                 case encodeBitPattern(bits, if s = 0w0 then WordSize32 else WordSize64) of
                     NONE => raise InternalError "testBitPattern: unable to encode bit pattern"
                 |   SOME res => res
         in
             SimpleInstr(0wx12000000 orb (opc << 0w29) orb (s << 0w31) orb (n << 0w22) orb
                 (wordToWord32 immr << 0w16) orb (wordToWord32 imms << 0w10) orb (word8ToWord32(xRegOrXZ regN) << 0w5) orb
                 word8ToWord32(xD regD))
         end
     in
         val bitwiseAndImmediate = logicalImmediate (0w1, 0w0, xRegOrXSP)
         and bitwiseOrImmediate = logicalImmediate (0w1, 0w1, xRegOrXSP)
         and bitwiseXorImmediate = logicalImmediate (0w1, 0w2, xRegOrXSP)
         and bitwiseAndSImmediate = logicalImmediate (0w1, 0w3, xRegOrXZ)
         and bitwiseAndImmediate32 = logicalImmediate (0w0, 0w0, xRegOrXSP)
         and bitwiseOrImmediate32 = logicalImmediate (0w0, 0w1, xRegOrXSP)
         and bitwiseXorImmediate32 = logicalImmediate (0w0, 0w2, xRegOrXSP)
         and bitwiseAndSImmediate32 = logicalImmediate (0w0, 0w3, xRegOrXZ)
     end
 
     local
         (* Floating point operations - 2 source *)
         fun floatingPoint2Source (pt, opc) {regM, regN, regD} =
             SimpleInstr(0wx1E200800 orb (pt << 0w22) orb (word8ToWord32(vReg regM) << 0w16) orb
                 (opc << 0w12) orb (word8ToWord32(vReg regN) << 0w5) orb word8ToWord32(vReg regD))
     in
         val multiplyFloat = floatingPoint2Source(0w0, 0wx0)
         and divideFloat = floatingPoint2Source(0w0, 0wx1)
         and addFloat = floatingPoint2Source(0w0, 0wx2)
         and subtractFloat = floatingPoint2Source(0w0, 0wx3)
         and multiplyDouble = floatingPoint2Source(0w1, 0wx0)
         and divideDouble = floatingPoint2Source(0w1, 0wx1)
         and addDouble = floatingPoint2Source(0w1, 0wx2)
         and subtractDouble = floatingPoint2Source(0w1, 0wx3)
     end
 
     local
         (* Move between a floating point and a general register with or without conversion. *)
         fun fmoveGeneral (sf, s, ptype, mode, opcode, rN, rD) {regN, regD} =
             SimpleInstr(0wx1E200000 orb (sf << 0w31) orb (s << 0w29) orb (ptype << 0w22) orb
                 (mode << 0w19) orb (opcode << 0w16) orb
                 (word8ToWord32(rN regN) << 0w5) orb word8ToWord32(rD regD))
         open IEEEReal
     in
         (* Moves without conversion *)
         val moveGeneralToFloat = fmoveGeneral(0w0, 0w0, 0w0, 0w0, 0w7, xRegOrXZ, vReg)
         and moveFloatToGeneral = fmoveGeneral(0w0, 0w0, 0w0, 0w0, 0w6, vReg, xRegOnly)
         and moveGeneralToDouble = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w7, xRegOrXZ, vReg)
         and moveDoubleToGeneral = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w6, vReg, xRegOnly)
         (* Moves with conversion - signed.  The argument is a 64-bit value. *)
         and convertIntToFloat = fmoveGeneral(0w1, 0w0, 0w0, 0w0, 0w2, xRegOrXZ, vReg)
         and convertIntToDouble = fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w2, xRegOrXZ, vReg)
         and convertInt32ToFloat = fmoveGeneral(0w0, 0w0, 0w0, 0w0, 0w2, xRegOrXZ, vReg)
         and convertInt32ToDouble = fmoveGeneral(0w0, 0w0, 0w1, 0w0, 0w2, xRegOrXZ, vReg)
 
         fun convertFloatToInt TO_NEAREST =
                 fmoveGeneral(0w1, 0w0, 0w0, 0w0, 0w4, vReg, xRegOnly) (* fcvtas *)
         |   convertFloatToInt TO_NEGINF =
                 fmoveGeneral(0w1, 0w0, 0w0, 0w2, 0w0, vReg, xRegOnly) (* fcvtms *)
         |   convertFloatToInt TO_POSINF =
                 fmoveGeneral(0w1, 0w0, 0w0, 0w1, 0w0, vReg, xRegOnly) (* fcvtps *)
         |   convertFloatToInt TO_ZERO =
                 fmoveGeneral(0w1, 0w0, 0w0, 0w3, 0w0, vReg, xRegOnly) (* fcvtzs *)
 
         and convertDoubleToInt TO_NEAREST =
                 fmoveGeneral(0w1, 0w0, 0w1, 0w0, 0w4, vReg, xRegOnly) (* fcvtas *)
         |   convertDoubleToInt TO_NEGINF =
                 fmoveGeneral(0w1, 0w0, 0w1, 0w2, 0w0, vReg, xRegOnly) (* fcvtms *)
         |   convertDoubleToInt TO_POSINF =
                 fmoveGeneral(0w1, 0w0, 0w1, 0w1, 0w0, vReg, xRegOnly) (* fcvtps *)
         |   convertDoubleToInt TO_ZERO =
                 fmoveGeneral(0w1, 0w0, 0w1, 0w3, 0w0, vReg, xRegOnly) (* fcvtzs *)
 
         and convertFloatToInt32 TO_NEAREST =
                 fmoveGeneral(0w0, 0w0, 0w0, 0w0, 0w4, vReg, xRegOnly) (* fcvtas *)
         |   convertFloatToInt32 TO_NEGINF =
                 fmoveGeneral(0w0, 0w0, 0w0, 0w2, 0w0, vReg, xRegOnly) (* fcvtms *)
         |   convertFloatToInt32 TO_POSINF =
                 fmoveGeneral(0w0, 0w0, 0w0, 0w1, 0w0, vReg, xRegOnly) (* fcvtps *)
         |   convertFloatToInt32 TO_ZERO =
                 fmoveGeneral(0w0, 0w0, 0w0, 0w3, 0w0, vReg, xRegOnly) (* fcvtzs *)
 
         and convertDoubleToInt32 TO_NEAREST =
                 fmoveGeneral(0w0, 0w0, 0w1, 0w0, 0w4, vReg, xRegOnly) (* fcvtas *)
         |   convertDoubleToInt32 TO_NEGINF =
                 fmoveGeneral(0w0, 0w0, 0w1, 0w2, 0w0, vReg, xRegOnly) (* fcvtms *)
         |   convertDoubleToInt32 TO_POSINF =
                 fmoveGeneral(0w0, 0w0, 0w1, 0w1, 0w0, vReg, xRegOnly) (* fcvtps *)
         |   convertDoubleToInt32 TO_ZERO =
                 fmoveGeneral(0w0, 0w0, 0w1, 0w3, 0w0, vReg, xRegOnly) (* fcvtzs *)
     end
 
     local
         fun floatingPtCompare(ptype, opc) {regM, regN} =
             SimpleInstr(0wx1E202000 orb (ptype << 0w22) orb
                 (word8ToWord32(vReg regM) << 0w16) orb (word8ToWord32(vReg regN) << 0w5) orb
                 (opc << 0w3))
     in
         val compareFloat = floatingPtCompare(0w0, 0w0) (* fcmp *)
         and compareDouble = floatingPtCompare(0w1, 0w0)
         (* It is also possible to compare a single register with zero using opc=1/3 *)
     end
 
     local
         (* Floating point single source. *)
         fun floatingPtSingle (ptype, opc) {regN, regD} =
             SimpleInstr(0wx1E204000 orb (ptype << 0w22) orb (opc << 0w15) orb
                 (word8ToWord32(vReg regN) << 0w5) orb word8ToWord32(vReg regD))
     in
         val moveFloatToFloat = floatingPtSingle(0w0, 0wx0)
         and absFloat = floatingPtSingle(0w0, 0wx1)
         and negFloat = floatingPtSingle(0w0, 0wx2)
         and convertFloatToDouble = floatingPtSingle(0w0, 0wx5)
         and moveDoubleToDouble = floatingPtSingle(0w1, 0wx0)
         and absDouble = floatingPtSingle(0w1, 0wx1)
         and negDouble = floatingPtSingle(0w1, 0wx2)
         and convertDoubleToFloat = floatingPtSingle(0w1, 0wx4)
     end
 
     (* This word is put in after a call to the RTS trap-handler.  All the registers
        are saved and restored across a call to the trap-handler; the register
        mask contains those that may contain an address and so need to be scanned and
        possibly updated if there is a GC. *)
     fun registerMask(regs) =
     let
         fun addToMask(r, mask) =
         let
             val rno = word8ToWord(xRegOnly r)
         in
             if rno > 0w24 (* In particular this can't be X30. *)
             then raise InternalError ("registerMask: invalid register "^Word.toString rno)
             else mask orb (0w1 << word8ToWord(xRegOnly r))
         end
         val maskWord = List.foldl addToMask 0w0 regs
     in
         SimpleInstr(0wx02000000 (* Reserved instr range. *) orb maskWord)
     end
     
     (* This is a bit of a hack but is the only way to get round the problem that when
        a callback (FFI closure) is called the code has none of the global registers.
        This isn't a problem in the native addressing version because we have
        absolute addresses but in 32-in-64 we need at least one absolute address to
        begin.  This embeds the global heap base pointer as a constant in the
        non-address constant area.  It requires the RTS to be able to find it and
        update it when the code is loaded.  We insert a nop followed by the
        pc-relative load.  This MUST be the first instruction in the code. *)
     local
         val getHeapBase: unit -> LargeWord.word = RunCall.rtsCallFull0 "PolyGetHeapBase"
     in
         fun loadGlobalHeapBaseInCallback reg =
             if is32in64
             then [SimpleInstr nopCode, loadNonAddressConstant(reg, getHeapBase())]
             else raise InternalError "loadGlobalHeapBaseInCallback called with native addressing"
     end
 
     (* Size of each code word. *)
     fun codeSize (SimpleInstr _) = 1 (* Number of 32-bit words *)
     |   codeSize (LoadAddressLiteral{ length=ref BrShort, ...}) = 1
     |   codeSize (LoadAddressLiteral{ length=ref BrExtended, ...}) = 2
     |   codeSize (LoadNonAddressLiteral{ length=ref BrShort, ...}) = 1
     |   codeSize (LoadNonAddressLiteral{ length=ref BrExtended, ...}) = 2
     |   codeSize (Label _) = 0
     |   codeSize (UnconditionalBranch _) = 1
     |   codeSize (LoadLabelAddress { length=ref BrShort, ...}) = 1
     |   codeSize (LoadLabelAddress { length=ref BrExtended, ...}) = 2
     |   codeSize (ConditionalBranch { length=ref BrShort, ...}) = 1
     |   codeSize (ConditionalBranch { length=ref BrExtended, ...}) = 2
     |   codeSize (TestBitBranch { length=ref BrShort, ...}) = 1
     |   codeSize (TestBitBranch { length=ref BrExtended, ...}) = 2
     |   codeSize (CompareBranch { length=ref BrShort, ...}) = 1
     |   codeSize (CompareBranch { length=ref BrExtended, ...}) = 2
 
     (* Store a 32-bit value in the code.  Always little-endian. *)
     fun writeInstr(value, wordAddr, seg) =
     let
         fun putBytes(value, a, seg, i) =
         if i = 0w4 then ()
         else
         (
             byteVecSet(seg, a+i, word32ToWord8(value andb 0wxff));
             putBytes(value >> 0w8, a, seg, i+0w1)
         )
     in
         putBytes(value, Word.<<(wordAddr, 0w2), seg, 0w0)
     end
     
     (* Store a 64-bit constant in the code area. *)
     fun write64Bit(value, word64Addr, seg) =
     let
         fun putBytes(value, a, seg, i) =
         if i = 0w8 then ()
         else
         (
             byteVecSet(seg,
                 if not isBigEndian then a+i else a+0w8-i-0w1,
                 Word8.fromLarge(Word64.toLarge value));
             putBytes(Word64.>>(value, 0w8), a, seg, i+0w1)
         )
     in
         putBytes(value, Word.<<(word64Addr, 0w3), seg, 0w0)
     end
 
     (* Set the sizes of branches depending on the distance to the destination. *)
     fun setLabelsAndSizes(ops, maxConstantSize) =
     let
         (* Set the labels and get the current size of the code. *)
         fun setLabels(Label(ref labs) :: ops, ic) = (List.app(fn d => d := ic) labs; setLabels(ops, ic))
         |   setLabels(oper :: ops, ic) = setLabels(ops, ic + Word.fromInt(codeSize oper))
         |   setLabels([], ic) = ic
 
         (* Set the labels and adjust the sizes, repeating until it never gets smaller *)
         fun setLabAndSize(ops, lastSize) =
         let
             (* See if we can shorten any branches.  The "addr" is the original address since
                that's what we've used to set the labels.  *)
             fun adjust([], _) = ()
 
             |   adjust(ConditionalBranch { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) =
                 let
                     val dest = !(hd labs)
                     val offset = Word.toInt dest - Word.toInt addr
                 in
                     if willFitInRange(offset, 0w19) then length := BrShort else ();
                     adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *)
                 end
             
             |   adjust(TestBitBranch { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) =
                 let
                     val dest = !(hd labs)
                     val offset = Word.toInt dest - Word.toInt addr
                 in
                     if willFitInRange(offset, 0w14) then length := BrShort else ();
                     adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *)
                 end
 
             |   adjust(CompareBranch { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) =
                 let
                     val dest = !(hd labs)
                     val offset = Word.toInt dest - Word.toInt addr
                 in
                     if willFitInRange(offset, 0w19) then length := BrShort else ();
                     adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *)
                 end
 
             |   adjust(LoadAddressLiteral { length as ref BrExtended, ...} :: instrs, addr) =
                 let
                     val offset = Word.toInt (lastSize + maxConstantSize) - Word.toInt addr
                 in
                     (* We can only shorten these in 32-in-64.  In native 64-bits we may need to move
                        the constant area *)
                     if is32in64 andalso willFitInRange(offset, 0w19) then length := BrShort else ();
                     adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *)
                 end
 
             |   adjust(LoadNonAddressLiteral { length as ref BrExtended, ...} :: instrs, addr) =
                 let
                     val offset = Word.toInt (lastSize + maxConstantSize) - Word.toInt addr
                 in
                     if willFitInRange(offset, 0w19) then length := BrShort else ();
                     adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *)
                 end
 
             |   adjust(LoadLabelAddress { length as ref BrExtended, label=ref labs, ...} :: instrs, addr) =
                 let
                     val dest = !(hd labs)
                     val offset = Word.toInt dest - Word.toInt addr
                 in
                     if willFitInRange(offset, 0w19) then length := BrShort else ();
                     adjust(instrs, addr + 0w2) (* N.B. Size BEFORE any adjustment *)
                 end
 
             |   adjust(instr :: instrs, addr) = adjust(instrs, addr + Word.fromInt(codeSize instr))
 
             val () = adjust(ops, 0w0)
 
             val nextSize = setLabels(ops, 0w0)
         in
             if nextSize < lastSize then setLabAndSize(ops, nextSize)
             else if nextSize = lastSize then lastSize
             else raise InternalError "setLabAndSize - size increased"
         end
     in
         setLabAndSize(ops, setLabels(ops, 0w0))
     end
 
     fun genCode(ops, addressConsts, nonAddressConsts) =
     let
         val numNonAddrConsts = Word.fromInt(List.length nonAddressConsts)
         and numAddrConsts = Word.fromInt(List.length addressConsts) (* 32-bit words. *)
         val constSizePlusExtras = (* Number of extra (poly)words needed. *)
             numNonAddrConsts * wordsPerNativeWord + numAddrConsts + 0w4 (* 4 extra words *)
 
         val codeSize (* Number of 32-bit instructions *) =
             setLabelsAndSizes(ops, constSizePlusExtras  * (Address.wordSize div 0w4) + 0w2 (*allow 2 UDFs*))
 
         val wordsOfCode = (codeSize + 0w2) div 0w2 (* Round up to 64-bits with the UDF marker(s) added. *)
         (* Put one or two UDF instructions at the end as markers. *)
         val endOfCodeWords =
             if Word.andb(codeSize, 0w1) = 0w0 then [SimpleInstr undefCode, SimpleInstr undefCode] else [SimpleInstr undefCode]
  
         (* Segment size in Poly words. *)
         val segSize = wordsOfCode*wordsPerNativeWord + constSizePlusExtras
         val codeVec = byteVecMake segSize
         
         fun testBit(bitNo, brNonZero, offset, reg) =
             0wx36000000 orb (if bitNo >= 0w32 then 0wx80000000 else 0w0) orb
                 (if brNonZero then 0wx01000000 else 0w0) orb
                 (word8ToWord32(Word8.andb(bitNo, 0wx3f)) << 0w19) orb
                 ((offset andb 0wx3fff) << 0w5) orb word8ToWord32(xRegOnly reg)
         and compareBranch(size, brNonZero, offset, reg) =
             0wx34000000 orb (case size of WordSize64 => 0wx80000000 | WordSize32 => 0w0) orb
                 (if brNonZero then 0wx01000000 else 0w0) orb
                 ((offset andb 0wx7ffff) << 0w5) orb word8ToWord32(xRegOnly reg)
 
         fun genCodeWords([], _ , _, _) = ()
 
         |   genCodeWords(SimpleInstr code :: tail, wordNo, aConstNum, nonAConstNum) =
             (
                 writeInstr(code, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum)
             )
 
         |   genCodeWords(LoadAddressLiteral{reg, length=ref BrExtended, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 val code1 = 0wx90000000 orb word8ToWord32(xRegOnly reg)
                 val code2 =
                     (if is32in64 then loadRegScaled32 else loadRegScaled) {regT=reg, regN=reg, unitOffset=0}
             in
                 writeInstr(code1, wordNo, codeVec);
                 genCodeWords(code2 :: tail, wordNo+0w1, aConstNum+0w1, nonAConstNum)
             end
 
         |   genCodeWords(LoadAddressLiteral{reg, length=ref BrShort, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             (* Address literals can be shortened in 32-in-64 but are always 2 instrs in 64-bit.
                That allows for the constant area to be pulled out if necessary to make the
                code position-independent. *)
             let
                 (* The offset is in 32-bit words.  The first of the constants is
                    at offset wordsOfCode+3.  Non-address constants are always 8 bytes but
                    address constants are 4 bytes in 32-in-64. *)
                 val s = if is32in64 then 0w0 else 0w1 (* Load 64-bit word in 64-bit mode and 32-bits in 32-in-64. *)
                 val offsetOfConstant =
                     (wordsOfCode+numNonAddrConsts)*0w2 + (0w3+aConstNum)*(Address.wordSize div 0w4) - wordNo
                 val _ = willFitInRange(Word.toInt offsetOfConstant, 0w19) orelse raise InternalError "Offset to constant is too large"
                 val code =
                     0wx18000000 orb (s << 0w30) orb (wordToWord32 offsetOfConstant << 0w5) orb word8ToWord32(xRegOnly reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum+0w1, nonAConstNum)
             end
 
         |   genCodeWords(LoadNonAddressLiteral{reg, length=ref BrExtended, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 val code1 = 0wx90000000 orb word8ToWord32(xRegOnly reg)
                 (* The load instruction is always 64-bits even in 32-in-64. *)
                 val code2 = loadRegScaled{regT=reg, regN=reg, unitOffset=0}
             in
                 writeInstr(code1, wordNo, codeVec);
                 genCodeWords(code2 :: tail, wordNo+0w1, aConstNum, nonAConstNum+0w1)
             end
 
         |   genCodeWords(LoadNonAddressLiteral{reg, length=ref BrShort, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             (* These can be shortened since they're always part of the code. *)
             let
                 (* The offset is in 32-bit words.  These are always 64-bits. *)
                 val offsetOfConstant = (wordsOfCode+nonAConstNum)*0w2 - wordNo
                 val _ = willFitInRange(Word.toInt offsetOfConstant, 0w19) orelse raise InternalError "Offset to constant is too large"
                 val code = 0wx58000000 orb (wordToWord32 offsetOfConstant << 0w5) orb word8ToWord32(xRegOnly reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum+0w1)
             end
 
         |   genCodeWords(Label _ :: tail, wordNo, aConstNum, nonAConstNum) = 
                 genCodeWords(tail, wordNo, aConstNum, nonAConstNum) (* No code. *)
 
         |   genCodeWords(UnconditionalBranch{label=ref labs, andLink} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt wordNo
                 val _ = willFitInRange(offset, 0w26) orelse raise InternalError "genCodeWords: branch too far"
                 val linkBit = if andLink then 0wx80000000 else 0w0
             in
                 writeInstr(0wx14000000 orb linkBit orb (Word32.fromInt offset andb 0wx03ffffff), wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(ConditionalBranch{ label=ref labs, jumpCondition=cond, length=ref BrShort }:: tail, wordNo,
                             aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt wordNo
                 val _ = willFitInRange(offset, 0w19) orelse raise InternalError "genCodeWords: branch too far"
             in
                 writeInstr(0wx54000000 orb ((Word32.fromInt offset andb 0wx07ffff) << 0w5)
                         orb cCode cond, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(ConditionalBranch{ label=ref labs, jumpCondition, length=ref BrExtended }:: tail, wordNo,
                             aConstNum, nonAConstNum) =
             let (* Long form - put a conditional branch with reversed sense round an unconditional branch. *)
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt (wordNo + 0w1) (* Next instruction. *)
                 val _ = willFitInRange(offset, 0w26) orelse raise InternalError "genCodeWords: branch too far"
                 val revCond = invertTest jumpCondition
             in
                 writeInstr(0wx54000000 orb (0w2 << 0w5) orb cCode revCond, wordNo, codeVec);
                 writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec);
                 genCodeWords(tail, wordNo+0w2, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(LoadLabelAddress{reg, length=ref BrExtended, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 val code1 = 0wx90000000 orb word8ToWord32(xRegOnly reg)
                 val code2 = addImmediate{regN=reg, regD=reg, immed=0w0, shifted=false}
             in
                 writeInstr(code1, wordNo, codeVec);
                 genCodeWords(code2 :: tail, wordNo+0w1, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(LoadLabelAddress{label=ref labs, reg, length=ref BrShort, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt wordNo
                 val _ = willFitInRange(offset, 0w19) orelse raise InternalError "Offset to label address is too large"
                 val code = 0wx10000000 orb ((Word32.fromInt offset andb 0wx7ffff) << 0w5) orb word8ToWord32(xRegOnly reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(TestBitBranch{label=ref labs, bitNo, brNonZero, reg, length=ref BrExtended} :: tail,
                     wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt (wordNo + 0w1) (* Next instruction *)
                 val _ = willFitInRange(offset, 0w25) orelse raise InternalError "genCodeWords: branch too far"
                 val _ = bitNo <= 0w63 orelse
                     raise InternalError "TestBitBranch: bit number > 63"
                 val code = testBit(bitNo, (* Invert test *) not brNonZero, 0w2 (* Skip branch *), reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec);
                 genCodeWords(tail, wordNo+0w2, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(TestBitBranch{label=ref labs, bitNo, brNonZero, reg, length=ref BrShort} :: tail,
                     wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt wordNo
                 val _ = willFitInRange(offset, 0w14) orelse raise InternalError "TestBitBranch: Offset to label address is too large"
                 val _ = bitNo <= 0w63 orelse
                     raise InternalError "TestBitBranch: bit number > 63"
                 val code = testBit(bitNo, brNonZero, Word32.fromInt offset, reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(CompareBranch{label=ref labs, brNonZero, size, reg, length=ref BrExtended} :: tail,
                     wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt (wordNo+0w1)
                 val _ = willFitInRange(offset, 0w25) orelse raise InternalError "genCodeWords: branch too far"
                 val code = compareBranch(size, (* Invert test *) not brNonZero, 0w2, reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 writeInstr(0wx14000000 orb (Word32.fromInt offset andb 0wx03ffffff), wordNo+0w1, codeVec);
                 genCodeWords(tail, wordNo+0w2, aConstNum, nonAConstNum)
             end
 
         |   genCodeWords(CompareBranch{label=ref labs, brNonZero, size, reg, length=ref BrShort} :: tail,
                     wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs)
                 val offset = Word.toInt dest - Word.toInt wordNo
                 val _ = willFitInRange(offset, 0w19) orelse raise InternalError "CompareBranch: Offset to label address is too large"
                 val code = compareBranch(size, brNonZero, Word32.fromInt offset, reg)
             in
                 writeInstr(code, wordNo, codeVec);
                 genCodeWords(tail, wordNo+0w1, aConstNum, nonAConstNum)
             end
     in
         genCodeWords (ops @ endOfCodeWords, 0w0, 0w0, 0w0);
         (* Copy in the non-address constants. *)
         List.foldl(fn (cVal, addr) => (write64Bit(cVal, addr, codeVec); addr+0w1)) wordsOfCode nonAddressConsts;
         (codeVec (* Return the completed code. *), wordsOfCode+numNonAddrConsts (* And the size in 64-bit words. *))
     end
 
     (* Store a word, either 64-bit or 32-bit. *)
     fun setWord(value, wordNo, seg) =
     let
         val addrs = wordNo * Address.wordSize
         fun putBytes(value, a, seg, i) =
         if i = Address.wordSize then ()
         else
         (
             byteVecSet(seg,
                 if not isBigEndian then a+i else a+wordSize-i-0w1,
                 Word8.fromLarge value);
             putBytes(LargeWord.>>(value, 0w8), a, seg, i+0w1)
         )
     in
         putBytes(value, addrs, seg, 0w0)
     end
     
    
     (* Print the instructions in the code. *)
     fun printCode (codeVec, functionName, wordsOfCode, printStream) =
     let
         val numInstructions = wordsOfCode * (Address.wordSize div 0w4)
     
         fun printHex (v, n) =
         let
             val s = Word32.fmt StringCvt.HEX v
             val pad = CharVector.tabulate(Int.max(0, n-size s), fn _ => #"0")
         in
             printStream pad; printStream s
         end
 
         fun printCondition 0wx0 = printStream "eq"
         |   printCondition 0wx1 = printStream "ne"
         |   printCondition 0wx2 = printStream "cs"
         |   printCondition 0wx3 = printStream "cc"
         |   printCondition 0wx4 = printStream "mi"
         |   printCondition 0wx5 = printStream "pl"
         |   printCondition 0wx6 = printStream "vs"
         |   printCondition 0wx7 = printStream "vc"
         |   printCondition 0wx8 = printStream "hi"
         |   printCondition 0wx9 = printStream "ls"
         |   printCondition 0wxa = printStream "ge"
         |   printCondition 0wxb = printStream "lt"
         |   printCondition 0wxc = printStream "gt"
         |   printCondition 0wxd = printStream "le"
         |   printCondition 0wxe = printStream "al"
         |   printCondition _    = printStream "nv"
 
         (* Normal XReg with 31 being XZ *)
         fun prXReg 0w31 = printStream "xz"
         |   prXReg r = printStream("x" ^ Word32.fmt StringCvt.DEC r)
 
         (* XReg when 31 is SP *)
         fun prXRegOrSP 0w31 = printStream "sp"
         |   prXRegOrSP r = printStream("x" ^ Word32.fmt StringCvt.DEC r)
 
         (* Normal WReg with 31 being WZ *)
         fun prWReg 0w31 = printStream "wz"
         |   prWReg r = printStream("w" ^ Word32.fmt StringCvt.DEC r)
 
         (* WReg when 31 is WSP *)
         fun prWRegOrSP 0w31 = printStream "wsp"
         |   prWRegOrSP r = printStream("w" ^ Word32.fmt StringCvt.DEC r)
 
         (* Each instruction is 32-bytes. *)
         fun printWordAt wordNo =
         let
             val byteNo = Word.<<(wordNo, 0w2)
             val () = printHex(wordToWord32 byteNo, 6)  (* Address *)
             val () = printStream "\t"
             val wordValue =
                 word8ToWord32 (codeVecGet (codeVec, byteNo)) orb
                 (word8ToWord32 (codeVecGet (codeVec, byteNo+0w1)) << 0w8) orb
                 (word8ToWord32 (codeVecGet (codeVec, byteNo+0w2)) << 0w16) orb
                 (word8ToWord32 (codeVecGet (codeVec, byteNo+0w3)) << 0w24)
             val () = printHex(wordValue, 8) (* Instr as hex *)
             val () = printStream "\t"
         in
             if (wordValue andb 0wxfffffc1f) = 0wxD61F0000
             then
             let
                 val rN = (wordValue andb 0wx3e0) >> 0w5
             in
                 printStream "br\tx";
                 printStream(Word32.fmt StringCvt.DEC rN)
             end
 
             else if (wordValue andb 0wxfffffc1f) = 0wxD63F0000
             then
             let
                 val rN = (wordValue andb 0wx3e0) >> 0w5
             in
                 printStream "blr\tx";
                 printStream(Word32.fmt StringCvt.DEC rN)
             end
 
             else if (wordValue andb 0wxfffffc1f) = 0wxD65F0000
             then
             let
                 val rN = (wordValue andb 0wx3e0) >> 0w5
             in
                 printStream "ret\tx";
                 printStream(Word32.fmt StringCvt.DEC rN)
             end
 
             else if wordValue = 0wxD503201F
             then printStream "nop"
             else if wordValue = 0wxD503203F
             then printStream "yield"
             else if wordValue = 0wxD5033BBF
             then printStream "dmb\tish"
 
             else if (wordValue andb 0wx1f800000) = 0wx12800000
             then (* Move of constants.  Includes movn and movk. *)
             let
                 val rD = wordValue andb 0wx1f
                 val imm16 = Word32.toInt((wordValue >> 0w5) andb 0wxffff)
                 val isXReg = (wordValue andb 0wx80000000) <> 0w0
                 val opc = (wordValue >> 0w29) andb 0w3
                 val shift = (wordValue >> 0w21) andb 0w3
             in
                 printStream (if opc = 0w3 then "movk\t" else "mov\t");
                 printStream (if isXReg then "x" else "w");
                 printStream(Word32.fmt StringCvt.DEC rD);
                 printStream ",#";
                 printStream(Int.toString(if opc = 0w0 then ~1 - imm16 else imm16));
                 if shift = 0w0
                 then ()
                 else (printStream ",lsl #"; printStream(Word32.fmt StringCvt.DEC (shift*0w16)))
             end
 
             else if (wordValue andb 0wx3b000000) = 0wx39000000
             then (* Load/Store with unsigned, scaled offset. *)
             let
                 (* The offset is in units of the size of the operand. *)
                 val size = wordValue >> 0w30
                 and v = (wordValue >> 0w26) andb 0w1
                 and opc = (wordValue >> 0w22) andb 0w3
                 val rT = wordValue andb 0wx1f
                 and rN = (wordValue andb 0wx3e0) >> 0w5
                 and imm12 = (wordValue andb 0wx3ffc00) >> 0w10
                 val (opcode, r, scale) =
                     case (size, v, opc) of
                         (0w0, 0w0, 0w0) => ("strb", "w", 0w0)
                     |   (0w0, 0w0, 0w1) => ("ldrb", "w", 0w0)
                     |   (0w1, 0w0, 0w0) => ("strh", "w", 0w2)
                     |   (0w1, 0w0, 0w1) => ("ldrh", "w", 0w2)
                     |   (0w2, 0w0, 0w0) => ("str", "w", 0w4)
                     |   (0w2, 0w0, 0w1) => ("ldr", "w", 0w4)
                     |   (0w3, 0w0, 0w0) => ("str", "x", 0w8)
                     |   (0w3, 0w0, 0w1) => ("ldr", "x", 0w8)
                     |   (0w2, 0w1, 0w0) => ("str", "s", 0w4)
                     |   (0w2, 0w1, 0w1) => ("ldr", "s", 0w4)
                     |   (0w3, 0w1, 0w0) => ("str", "d", 0w8)
                     |   (0w3, 0w1, 0w1) => ("ldr", "d", 0w8)
                     |   _ => ("??", "?", 0w1)
             in
                 printStream opcode; printStream "\t"; printStream r; printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",["; prXRegOrSP rN;
                 printStream ",#"; printStream(Word32.fmt StringCvt.DEC(imm12*scale));
                 printStream "]"
             end
 
             else if (wordValue andb 0wx3b200c00) = 0wx38000000
             then (* Load/store unscaled immediate *)
             let
                 val size = wordValue >> 0w30
                 and v = (wordValue >> 0w26) andb 0w1
                 and opc = (wordValue >> 0w22) andb 0w3
                 val rT = wordValue andb 0wx1f
                 and rN = (wordValue andb 0wx3e0) >> 0w5
                 and imm9 = (wordValue andb 0wx1ff000) >> 0w12
                 val imm9Text =
                     if imm9 > 0wxff
                     then "-" ^ Word32.fmt StringCvt.DEC (0wx200 - imm9)
                     else Word32.fmt StringCvt.DEC imm9
                 val (opcode, r) =
                     case (size, v, opc) of
                         (0w0, 0w0, 0w0) => ("sturb", "w")
                     |   (0w0, 0w0, 0w1) => ("ldurb", "w")
                     |   (0w0, 0w0, 0w2) => ("ldursb", "w")
                     |   (0w0, 0w0, 0w3) => ("ldursb", "x")
                     |   (0w1, 0w0, 0w0) => ("sturh", "w")
                     |   (0w1, 0w0, 0w1) => ("ldurh", "w")
                     |   (0w1, 0w0, 0w2) => ("ldursh", "w")
                     |   (0w1, 0w0, 0w3) => ("ldursh", "x")
                     |   (0w2, 0w0, 0w0) => ("stur", "w")
                     |   (0w2, 0w0, 0w1) => ("ldur", "w")
                     |   (0w2, 0w0, 0w2) => ("ldursw", "x")
                     |   (0w3, 0w0, 0w0) => ("stur", "x")
                     |   (0w3, 0w0, 0w1) => ("ldur", "x")
                     |   (0w2, 0w1, 0w0) => ("stur", "s")
                     |   (0w2, 0w1, 0w1) => ("ldur", "s")
                     |   (0w3, 0w1, 0w0) => ("stur", "d")
                     |   (0w3, 0w1, 0w1) => ("ldur", "d")
                     |   _ => ("???", "?")
             in
                 printStream opcode; printStream "\t"; printStream r;
                 printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",["; prXRegOrSP rN;
                 printStream ",#"; printStream imm9Text; printStream "]"
             end
 
             else if (wordValue andb 0wx3b200c00) = 0wx38000400
             then (* Load/store immediate post-indexed *)
             let
                 val size = wordValue >> 0w30
                 and v = (wordValue >> 0w26) andb 0w1
                 and opc = (wordValue >> 0w22) andb 0w3
                 val rT = wordValue andb 0wx1f
                 and rN = (wordValue andb 0wx3e0) >> 0w5
                 and imm9 = (wordValue andb 0wx1ff000) >> 0w12
                 val imm9Text =
                     if imm9 > 0wxff
                     then "-" ^ Word32.fmt StringCvt.DEC (0wx200 - imm9)
                     else Word32.fmt StringCvt.DEC imm9
                 val (opcode, r) =
                     case (size, v, opc) of
                         (0w0, 0w0, 0w0) => ("strb", "w")
                     |   (0w0, 0w0, 0w1) => ("ldrb", "w")
                     |   (0w2, 0w0, 0w0) => ("str", "w")
                     |   (0w2, 0w0, 0w1) => ("ldr", "w")
                     |   (0w3, 0w0, 0w0) => ("str", "x")
                     |   (0w3, 0w0, 0w1) => ("ldr", "x")
                     |   _ => ("???", "?")
             in
                 printStream opcode; printStream "\t"; printStream r;
                 printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",["; prXRegOrSP rN;
                 printStream "],#"; printStream imm9Text
             end
 
             else if (wordValue andb 0wx3b200c00) = 0wx38000c00
             then (* Load/store immediate pre-indexed *)
             let
                 val size = wordValue >> 0w30
                 and v = (wordValue >> 0w26) andb 0w1
                 and opc = (wordValue >> 0w22) andb 0w3
                 val rT = wordValue andb 0wx1f
                 and rN = (wordValue andb 0wx3e0) >> 0w5
                 and imm9 = (wordValue andb 0wx1ff000) >> 0w12
                 val imm9Text =
                     if imm9 > 0wxff
                     then "-" ^ Word32.fmt StringCvt.DEC (0wx200 - imm9)
                     else Word32.fmt StringCvt.DEC imm9
                 val (opcode, r) =
                     case (size, v, opc) of
                         (0w0, 0w0, 0w0) => ("strb", "w")
                     |   (0w0, 0w0, 0w1) => ("ldrb", "w")
                     |   (0w2, 0w0, 0w0) => ("str", "w")
                     |   (0w2, 0w0, 0w1) => ("ldr", "w")
                     |   (0w3, 0w0, 0w0) => ("str", "x")
                     |   (0w3, 0w0, 0w1) => ("ldr", "x")
                     |   _ => ("???", "?")
             in
                 printStream opcode; printStream "\t"; printStream r;
                 printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",["; prXRegOrSP rN;
                 printStream ",#"; printStream imm9Text; printStream "]!"
             end
 
             else if (wordValue andb 0wx3b200c00) = 0wx38200800
             then (* Load/store with register offset i.e. an index register. *)
             let
                 val size = wordValue >> 0w30
                 and v = (wordValue >> 0w26) andb 0w1
                 and opc = (wordValue >> 0w22) andb 0w3
                 val rT = wordValue andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rM = (wordValue >> 0w16) andb 0wx1f
                 val option = (wordValue >> 0w13) andb 0w7
                 val s = (wordValue andb 0wx1000) <> 0w0 
                 val (opcode, r) =
                     case (size, v, opc) of
                         (0w0, 0w0, 0w0) => ("strb", "w")
                     |   (0w0, 0w0, 0w1) => ("ldrb", "w")
                     |   (0w1, 0w0, 0w0) => ("strh", "w")
                     |   (0w1, 0w0, 0w1) => ("ldrh", "w")
                     |   (0w2, 0w0, 0w0) => ("str", "w")
                     |   (0w2, 0w0, 0w1) => ("ldr", "w")
                     |   (0w3, 0w0, 0w0) => ("str", "x")
                     |   (0w3, 0w0, 0w1) => ("ldr", "x")
                     |   (0w2, 0w1, 0w0) => ("str", "s")
                     |   (0w2, 0w1, 0w1) => ("ldr", "s")
                     |   (0w3, 0w1, 0w0) => ("str", "d")
                     |   (0w3, 0w1, 0w1) => ("ldr", "d")
                     |   _ => ("???", "?")
                 val (extend, xr) =
                     case option of
                         0w2 => (" uxtw", "w")
                     |   0w3 => if s then (" lsl", "x") else ("", "x")
                     |   0w6 => (" sxtw", "w")
                     |   0w7 => (" sxtx", "x")
                     |   _   => ("?", "?")
                 val indexShift =
                     case (size, s) of
                         (0w0, true) => " #1"
                     |   (0w1, true) => " #1"
                     |   (0w2, true) => " #2"
                     |   (0w3, true) => " #3"
                     |   _ => ""
             in
                 printStream opcode; printStream "\t"; printStream r;
                 printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",["; prXRegOrSP rN;
                 printStream ","; printStream xr; printStream(Word32.fmt StringCvt.DEC rM);
                 printStream extend; printStream indexShift;
                 printStream "]"
             end
 
             else if (wordValue andb 0wx3f000000) = 0wx08000000
             then (* Loads and stores with special ordering. *)
             let
                 val size = (wordValue >> 0w30) andb 0w3
                 and o2 = (wordValue >> 0w23) andb 0w1
                 and l = (wordValue >> 0w22) andb 0w1
                 and o1 = (wordValue >> 0w21) andb 0w1
                 and o0 = (wordValue >> 0w15) andb 0w1
                 val rT = wordValue andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rS = (wordValue >> 0w16) andb 0wx1f
                 val (opcode, r) =
                     case (size, o2, l, o1, o0) of
                         (0w3, 0w1, 0w1, 0w0, 0w1) => ("ldar", "x")
                     |   (0w3, 0w1, 0w0, 0w0, 0w1) => ("stlr", "x")
                     |   (0w2, 0w1, 0w1, 0w0, 0w1) => ("ldar", "w")
                     |   (0w2, 0w1, 0w0, 0w0, 0w1) => ("stlr", "w")
                     |   (0w3, 0w0, 0w1, 0w0, 0w1) => ("ldaxr", "x")
                     |   (0w3, 0w0, 0w0, 0w0, 0w1) => ("stlxr", "x")
                     |   (0w0, 0w1, 0w1, 0w0, 0w1) => ("ldarb", "w")
                     |   (0w0, 0w1, 0w0, 0w0, 0w1) => ("stlrb", "w")
                     |   _ => ("??", "?")
             in
                 printStream opcode; printStream "\t";
                 if opcode = "stlxr"
                 then (printStream "w"; printStream(Word32.fmt StringCvt.DEC rS); printStream ",")
                 else ();
                 printStream r;
                 printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",["; prXRegOrSP rN; printStream "]"
             end
 
             else if (wordValue andb 0wx3a000000) = 0wx28000000
             then (* Load/store pairs of registers *)
             let
                 val opc = (wordValue >> 0w30) andb 0w3
                 and v = (wordValue >> 0w26) andb 0w1
                 and op2 = (wordValue >> 0w23) andb 0w3
                 and l = (wordValue >> 0w22) andb 0w1
                 and imm7 = (wordValue >> 0w15) andb 0wx7f
                 and rT2 = (wordValue >> 0w10) andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rT1 = wordValue andb 0wx1f
                 val (opcode, r, scale) =
                     case (opc, v, l) of
                         (0w0, 0w0, 0w0) => ("stp", "w", 0w4)
                     |   (0w0, 0w0, 0w1) => ("ldp", "w", 0w4)
                     |   (0w2, 0w0, 0w0) => ("stp", "x", 0w8)
                     |   (0w2, 0w0, 0w1) => ("ldp", "x", 0w8)
                     |   (0w0, 0w1, 0w0) => ("stp", "s", 0w4)
                     |   (0w0, 0w1, 0w1) => ("ldp", "s", 0w4)
                     |   (0w1, 0w1, 0w0) => ("stp", "d", 0w8)
                     |   (0w1, 0w1, 0w1) => ("ldp", "d", 0w8)
                     |   _ => ("??", "?", 0w1)
                 val imm7Text =
                     if imm7 > 0wx3f
                     then "-" ^ Word32.fmt StringCvt.DEC ((0wx80 - imm7) * scale)
                     else Word32.fmt StringCvt.DEC (imm7 * scale)
             in
                 printStream opcode; printStream "\t"; printStream r;
                 printStream(Word32.fmt StringCvt.DEC rT1); printStream ",";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rT2);
                 printStream ",["; prXRegOrSP rN;
                 case op2 of
                     0w1 => (* Post indexed *)
                         (printStream "],#"; printStream imm7Text)
                 |   0w2 => (* Offset *)
                         (printStream ",#"; printStream imm7Text; printStream "]")
                 |   0w3 => (* Pre indexed *)
                         (printStream ",#"; printStream imm7Text; printStream "]!")
                 |   _ => printStream "??"
             end
 
             else if (wordValue andb 0wx1f800000) = 0wx11000000
             then
             let
                 (* Add/Subtract a 12-bit immediate with possible shift. *)
                 val sf = (wordValue >> 0w31) andb 0w1
                 val rD = wordValue andb 0wx1f
                 and rN = (wordValue andb 0wx3e0) >> 0w5
                 and imm12 = (wordValue andb 0wx3ffc00) >> 0w10
                 and shiftBit = wordValue andb 0wx400000
                 val imm = if shiftBit <> 0w0 then imm12 << 0w12 else imm12
                 val oper = (wordValue andb 0wx40000000) = 0w0
                 val isS = (wordValue andb 0wx20000000) <> 0w0
                 val prReg = if sf = 0w1 then prXRegOrSP else prWRegOrSP
             in
                 if imm12 = 0w0 andalso (rN = 0w31 orelse rD = 0w31) andalso not isS
                 then (printStream "mov\t"; prReg rD; printStream ","; prReg rN)
                 else
                 (
                     if isS andalso rD = 0w31
                     then printStream(if oper then "cmn\t" else "cmp\t")
                     else
                     (
                         printStream(if oper then "add" else "sub"); printStream(if isS then "s\t" else "\t");
                         prReg rD; printStream ","
                     );
                     prReg rN; printStream ",#"; printStream(Word32.fmt StringCvt.DEC imm)
                 )
             end
 
             else if (wordValue andb 0wx7fe0ffe0) = 0wx2A0003E0
             then (* Move reg,reg.  This is a subset of ORR shifted register. *)
             let
                 val reg = if (wordValue andb 0wx80000000) <> 0w0 then "x" else "w"
             in
                 printStream "mov\t"; printStream reg;
                 printStream(Word32.fmt StringCvt.DEC(wordValue andb 0wx1f));
                 printStream ","; printStream reg;
                 printStream(Word32.fmt StringCvt.DEC((wordValue >> 0w16) andb 0wx1f))
             end
 
             else if (wordValue andb 0wx1f000000) = 0wx0A000000
             then
             let
                 (* Logical operations with shifted register. *)
                 val rD = wordValue andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rM = (wordValue >> 0w16) andb 0wx1f
                 and imm6 = (wordValue >> 0w10) andb 0wx3f
                 and shiftCode = (wordValue >> 0w22) andb 0wx3
                 val opc = (wordValue >> 0w29) andb 0wx3
                 val nBit = (wordValue >> 0w21) andb 0w1
                 val reg = if (wordValue andb 0wx80000000) <> 0w0 then "x" else "w"
                 val opcode =
                     case (opc, nBit) of
                         (0w0, 0w0) => "and"
                     |   (0w1, 0w0) => "orr"
                     |   (0w2, 0w0) => "eor"
                     |   (0w3, 0w0) => "ands"
                     |   _ => "??"
             in
                 if rD = 0w31 andalso opc=0w3 andalso nBit = 0w0
                 then printStream "tst\t"
                 else
                 (
                     printStream opcode; printStream"\t";
                     printStream reg;
                     printStream(Word32.fmt StringCvt.DEC rD); printStream ","
                 );
                 printStream reg; printStream(Word32.fmt StringCvt.DEC rN);
                 printStream ","; printStream reg; printStream(Word32.fmt StringCvt.DEC rM);
                 if imm6 <> 0w0
                 then
                 (
                     case shiftCode of
                         0w0 => printStream ",lsl #"
                     |   0w1 => printStream ",lsr #"
                     |   0w2 => printStream ",asr #"
                     |   _ => printStream ",?? #";
                     printStream(Word32.fmt StringCvt.DEC imm6)
                 )
                 else ()
             end
 
             else if (wordValue andb 0wx1f200000) = 0wx0B000000
             then
             let
                 (* Add/subtract shifted register. *)
                 val rD = wordValue andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rM = (wordValue >> 0w16) andb 0wx1f
                 and imm6 = (wordValue >> 0w10) andb 0wx3f
                 and shiftCode = (wordValue >> 0w22) andb 0wx3
                 val oper = (wordValue andb 0wx40000000) = 0w0
                 val isS = (wordValue andb 0wx20000000) <> 0w0
                 val pReg = if (wordValue andb 0wx80000000) <> 0w0 then prXReg else prWReg
             in
                 if isS andalso rD = 0w31
                 then printStream(if oper then "cmn\t" else "cmp\t")
                 else
                 (
                     printStream(if oper then "add" else "sub"); printStream(if isS then "s\t" else "\t");
                     pReg rD; printStream ","
                 );
                 pReg rN;
                 printStream ","; pReg rM;
                 if imm6 <> 0w0
                 then
                 (
                     case shiftCode of
                         0w0 => printStream ",lsl #"
                     |   0w1 => printStream ",lsr #"
                     |   0w2 => printStream ",asr #"
                     |   _ => printStream ",?? #";
                     printStream(Word32.fmt StringCvt.DEC imm6)
                 )
                 else ()
             end
 
             else if (wordValue andb 0wx1fe00000) = 0wx0b200000
             then
             let
                 (* Add/subtract extended register. *)
                 val rD = wordValue andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rM = (wordValue >> 0w16) andb 0wx1f
                 and extend = (wordValue >> 0w13) andb 0w7
                 and amount = (wordValue >> 0w10) andb 0w7
                 and sf = (wordValue >> 0w31) andb 0w1
                 and p = (wordValue >> 0w30) andb 0w1
                 and s = (wordValue >> 0w29) andb 0w1
             in
                 if s = 0w1 andalso rD = 0w31
                 then printStream(if p = 0w0 then "cmn\t" else "cmp\t")
                 else
                 (
                     printStream(if p = 0w0 then "add" else "sub");
                     printStream(if s = 0w1 then "s\t" else "\t");
                     (if sf = 0w1 then prXRegOrSP else prWRegOrSP) rD; printStream ","
                 );
                 (if sf = 0w1 then prXRegOrSP else prWRegOrSP) rN;
                 printStream ",";
                 (if extend = 0w3 orelse extend = 0w7 then prXReg else prWReg) rM;
                 case extend of
                     0w0 => printStream ",uxtb"
                 |   0w1 => printStream ",uxth"
                 |   0w2 => if amount = 0w0 andalso sf = 0w0 then () else printStream ",uxtw"
                 |   0w3 => if amount = 0w0 andalso sf = 0w1 then () else printStream ",uxtx"
                 |   0w4 => printStream ",sxtb"
                 |   0w5 => printStream ",sxth"
                 |   0w6 => printStream ",sxtw"
                 |   0w7 => printStream ",sxtx"
                 |   _ => printStream "?";
                
                 if amount <> 0w0
                 then printStream(" #" ^ Word32.fmt StringCvt.DEC amount)
                 else ()
             end
 
             else if (wordValue andb 0wxbf000000) = 0wx18000000
             then
             let
                 (* Load from a PC-relative address.  This may refer to the
                    address constant area or the non-address constant area. *)
                 val rT = wordValue andb 0wx1f
                 val s = (wordValue >> 0w30) andb 0w1
                 (* The offset is in 32-bit words *)
                 val byteAddr = word32ToWord(((wordValue andb 0wx00ffffe0) >> (0w5-0w2))) + byteNo
                 (* We must NOT use codeVecGetWord if this is in the non-address
                    area.  It may well not be a tagged value. *)
                 local
                     fun getConstant(cVal, 0w0) = cVal
                     |   getConstant(cVal, offset) =
                         let
                             val byteVal =
                                 Word64.fromLarge(Word8.toLarge(codeVecGet (codeVec, byteAddr+offset-0w1)))
                         in
                             getConstant(Word64.orb(Word64.<<(cVal, 0w8), byteVal), offset-0w1)
                         end
                 in
                     val constantValue = "0x" ^ Word64.toString(getConstant(0w0, 0w8)) (* It's a non-address constant *)
                 end
             in
                 printStream "ldr\t"; printStream (if s = 0w0 then "w" else "x");
                 printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",0x"; printStream(Word.fmt StringCvt.HEX byteAddr);
                 printStream "\t// "; printStream constantValue
             end
 
             else if (wordValue andb 0wxbf000000) = 0wx10000000
             then
             let
                 (* Put a pc-relative address into a register. *)
                 val rT = wordValue andb 0wx1f
                 val byteOffset =
                     ((wordValue andb 0wx00ffffe0) << (Word.fromInt Word32.wordSize - 0w23) ~>>
                         (Word.fromInt Word32.wordSize - 0w20)) + ((wordValue >> 0w29) andb 0w3)
             in
                 printStream "adr\tx"; printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",0x"; printStream(Word32.fmt StringCvt.HEX (wordToWord32 byteNo+byteOffset))
             end
 
             else if (wordValue andb 0wx9f000000) = 0wx90000000
             then (* ADRP *)
             let
                 val rT = wordValue andb 0wx1f
                 (* The value is a page offset *)
                 val pageOffset = ((wordValue >> 0w29) andb 0w3) (* immlo *) orb
                     ((wordValue >> 0w3) andb 0wx1fffc)
             in
                 printStream "adrp\tx"; printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ",0x"; printStream(Word32.fmt StringCvt.HEX (pageOffset*0w4096))
             end
 
             else if (wordValue andb 0wx7c000000) = 0wx14000000
             then (* Unconditional branch. *)
             let
                 (* The offset is signed and the destination may be earlier. *)
                 val byteOffset =
                     (wordValue andb 0wx03ffffff) << (Word.fromInt Word32.wordSize - 0w26) ~>>
                         (Word.fromInt Word32.wordSize - 0w28)
                 val opc = if (wordValue andb 0wx80000000) = 0w0 then "b" else "bl"
             in
                 printStream opc; printStream "\t0x";
                 printStream(Word32.fmt StringCvt.HEX (wordToWord32 byteNo + byteOffset))
             end
 
             else if (wordValue andb 0wxff000000) = 0wx54000000
             then (* Conditional branch *)
             let
                 val byteOffset =
                     (wordValue andb 0wx00ffffe0) << (Word.fromInt Word32.wordSize - 0w24) ~>>
                         (Word.fromInt Word32.wordSize - 0w21)
             in
                 printStream "b.";
                 printCondition(wordValue andb 0wxf);
                 printStream "\t0x";
                 printStream(Word32.fmt StringCvt.HEX (wordToWord32 byteNo+byteOffset))
             end
 
             else if (wordValue andb 0wx7e000000) = 0wx34000000
             then (* Compare and branch *)
             let
                 val byteOffset =
                     (wordValue andb 0wx00ffffe0) << (Word.fromInt Word32.wordSize - 0w24) ~>>
                         (Word.fromInt Word32.wordSize - 0w21)
                 val oper =
                     if (wordValue andb 0wx01000000) = 0w0
                     then "cbz" else "cbnz"
                 val r = if (wordValue andb 0wx80000000) = 0w0 then "w" else "x"
             in
                 printStream oper; printStream "\t";
                 printStream r; printStream(Word32.fmt StringCvt.DEC (wordValue andb 0wx1f));
                 printStream ",0x";
                 printStream(Word32.fmt StringCvt.HEX (wordToWord32 byteNo+byteOffset))
             end
 
             else if (wordValue andb 0wx7e000000) = 0wx36000000
             then (* Test bit and branch *)
             let
                 val byteOffset =
-                    (wordValue andb 0wx00ffffe0) << (Word.fromInt Word.wordSize - 0w19) ~>>
-                        (Word.fromInt Word.wordSize - 0w16)
+                    (wordValue andb 0wx000fffe0) << (Word.fromInt Word32.wordSize - 0w20) ~>>
+                        (Word.fromInt Word32.wordSize - 0w17)
                 val oper =
                     if (wordValue andb 0wx01000000) = 0w0
                     then "tbz" else "tbnz"
                 val b40 = (wordValue >> 0w19) andb 0wx1f
                 val bitNo = b40 orb ((wordValue >> 0w26) andb 0wx20)
                 val r = if bitNo < 0w32 then "w" else "x"
             in
                 printStream oper; printStream "\t";
                 printStream r; printStream(Word32.fmt StringCvt.DEC (wordValue andb 0wx1f));
                 printStream ",#"; printStream(Word32.fmt StringCvt.DEC bitNo); printStream ",0x";
-                printStream(Word.fmt StringCvt.HEX (byteNo+word32ToWord byteOffset))
+                printStream(Word32.fmt StringCvt.HEX (wordToWord32 byteNo+byteOffset))
             end
 
             else if (wordValue andb 0wx3fe00000) = 0wx1A800000
             then
             let
                 val sf = wordValue >> 0w31
                 val opc = (wordValue >> 0w30) andb 0w1
                 val op2 = (wordValue >> 0w10) andb 0w3
                 val rT = wordValue andb 0wx1f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rM = (wordValue >> 0w16) andb 0wx1f
                 val cond = (wordValue >> 0w12) andb 0wxf
                 val opcode =
                     case (opc, op2) of
                         (0w0, 0w0) => "csel"
                     |   (0w0, 0w1) => "csinc"
                     |   (0w1, 0w0) => "csinv"
                     |   (0w1, 0w1) => "csneg"
                     |   _ => "??"
                 val r = if sf = 0w0 then "w" else "x"
             in
                 printStream opcode; printStream "\t";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rT);
                 printStream ","; printStream r; printStream(Word32.fmt StringCvt.DEC rN);
                 printStream ","; printStream r; printStream(Word32.fmt StringCvt.DEC rM);
                 printStream ","; printCondition cond
             end
 
             else if (wordValue andb 0wx7f800000) = 0wx13000000
             then (* signed bitfield *)
             let
                 val sf = wordValue >> 0w31
                 (* N is always the same as sf. *)
                 (*val nBit = (wordValue >> 0w22) andb 0w1*)
                 val immr = (wordValue >> 0w16) andb 0wx3f
                 val imms = (wordValue >> 0w10) andb 0wx3f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rD = wordValue andb 0wx1f
                 val (r, wordSize) = if sf = 0w0 then ("w", 0w32) else if sf = 0w1 then ("x", 0w64) else raise InternalError "Neither"
             in
                 if imms = wordSize - 0w1
                 then printStream "asr\t"
                 else printStream "sbfm\t";
                 printStream r;
                 printStream(Word32.fmt StringCvt.DEC rD);
                 printStream ",";
                 printStream r;
                 printStream(Word32.fmt StringCvt.DEC rN);
                 if imms = wordSize - 0w1
                 then (printStream ",#0x"; printStream(Word32.toString immr))
                 else
                 (
                     printStream ",#0x"; printStream(Word32.toString immr);
                     printStream ",#0x"; printStream(Word32.toString imms)
                 )
             end
 
             else if (wordValue andb 0wx7f800000) = 0wx33000000
             then (* bitfield move *)
             let
                 val sf = wordValue >> 0w31
                 (* N is always the same as sf. *)
                 (*val nBit = (wordValue >> 0w22) andb 0w1*)
                 val immr = (wordValue >> 0w16) andb 0wx3f
                 val imms = (wordValue >> 0w10) andb 0wx3f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rD = wordValue andb 0wx1f
                 val (r, wordSize) = if sf = 0w0 then ("w", 0w32) else ("x", 0w64)
             in
                 if imms < immr
                 then if rD = 0wx31 then printStream "bfc\t" else printStream "bfi\t"
                 else printStream "bfxil\t";
                 if imms >= immr orelse rD <> 0w31
                 then
                 (
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rD);
                     printStream ","
                 )
                 else ();
                 printStream r;
                 printStream(Word32.fmt StringCvt.DEC rN);
                 (* Not certain that these are correct. *)
                 if imms < immr
                 then
                 (
                     printStream ",#0x"; printStream(Word32.toString(wordSize - immr));
                     printStream ",#0x"; printStream(Word32.toString(imms+0w1))
                 )
                 else
                 (
                     printStream ",#0x"; printStream(Word32.toString immr);
                     printStream ",#0x"; printStream(Word32.toString(imms+0w1-immr))
                 )
             end
 
             else if (wordValue andb 0wx7f800000) = 0wx53000000
             then (* unsigned bitfield move *)
             let
                 val sf = wordValue >> 0w31
                 (* N is always the same as sf. *)
                 (*val nBit = (wordValue >> 0w22) andb 0w1*)
                 val immr = (wordValue >> 0w16) andb 0wx3f
                 val imms = (wordValue >> 0w10) andb 0wx3f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rD = wordValue andb 0wx1f
                 val (r, wordSize) = if sf = 0w0 then ("w", 0w32) else ("x", 0w64)
             in
                 if imms + 0w1 = immr
                 then
                 (
                     printStream "lsl\t";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rD);
                     printStream ",";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rN);
                     printStream ",#0x"; printStream(Word32.toString(wordSize - immr))
                 )
                 else if imms = wordSize - 0w1
                 then
                 (
                     printStream "lsr\t";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rD);
                     printStream ",";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rN);
                     printStream ",#0x"; printStream(Word32.toString immr)
                 )
                 else if imms < immr
                 then
                 (
                     printStream "ubfiz\t";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rD);
                     printStream ",";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rN);
                     printStream ",#0x"; printStream(Word32.toString(wordSize - immr));
                     printStream ",#0x"; printStream(Word32.toString(imms+0w1))
                 )
                 else
                 (
                     printStream "ubfm\t";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rD);
                     printStream ",";
                     printStream r;
                     printStream(Word32.fmt StringCvt.DEC rN);
                     printStream ",#0x"; printStream(Word32.toString immr);
                     printStream ",#0x"; printStream(Word32.toString imms)
                 )
             end
 
             else if (wordValue andb 0wx1f800000) = 0wx12000000
             then (* logical immediate *)
             let
                 val sf = wordValue >> 0w31
                 val opc = (wordValue >> 0w29) andb 0w3
                 val nBit = (wordValue >> 0w22) andb 0w1
                 val immr = (wordValue >> 0w16) andb 0wx3f
                 val imms = (wordValue >> 0w10) andb 0wx3f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rD = wordValue andb 0wx1f
                 val (opcode, r) =
                     case (sf, opc, nBit) of
                         (0w0, 0w0, 0w0) => ("and", "w")
                     |   (0w0, 0w1, 0w0) => ("orr", "w")
                     |   (0w0, 0w2, 0w0) => ("eor", "w")
                     |   (0w0, 0w3, 0w0) => ("ands", "w")
                     |   (0w1, 0w0, _) => ("and", "x")
                     |   (0w1, 0w1, _) => ("orr", "x")
                     |   (0w1, 0w2, _) => ("eor", "x")
                     |   (0w1, 0w3, _) => ("ands", "x")
                     |   _ => ("??", "?")
             in
                 if rD = 0w31 andalso opc=0w3
                 then printStream "tst\t"
                 else
                 (
                     printStream opcode;
                     printStream "\t";
                     printStream r; printStream(Word32.fmt StringCvt.DEC rD); printStream ","
                 );
                 printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ",#0x";
                 printStream(Word64.toString(decodeBitPattern{sf=sf, n=nBit, immr=immr, imms=imms}))
             end
 
             else if (wordValue andb 0wx5fe00000) = 0wx1ac00000
             then (* Two source operations - shifts and divide. *)
             let
                 val sf = wordValue >> 0w31
                 val s = (wordValue >> 0w29) andb 0w1
                 val rM = (wordValue >> 0w16) andb 0wx1f
                 val opcode = (wordValue >> 0w10) andb 0wx3f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rD = wordValue andb 0wx1f
                 val (oper, r) =
                     case (sf, s, opcode) of
                         (0w1, 0w0, 0wx2) => ("udiv", "x")
                     |   (0w1, 0w0, 0wx3) => ("sdiv", "x")
                     |   (0w0, 0w0, 0wx2) => ("udiv", "w")
                     |   (0w0, 0w0, 0wx3) => ("sdiv", "w")
                     |   (0w1, 0w0, 0wx8) => ("lsl", "x")
                     |   (0w0, 0w0, 0wx8) => ("lsl", "w")
                     |   (0w1, 0w0, 0wx9) => ("lsr", "x")
                     |   (0w0, 0w0, 0wx9) => ("lsr", "w")
                     |   (0w1, 0w0, 0wxa) => ("asr", "x")
                     |   (0w0, 0w0, 0wxa) => ("asr", "w")
                     |   _ => ("??", "?")
             in
                 printStream oper;
                 printStream "\t";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rD); printStream ",";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ",";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rM)
             end
 
             else if (wordValue andb 0wx1f000000) = 0wx1b000000
             then (* Three source operations - multiply add/subtract. *)
             let
                 val sf = wordValue >> 0w31
                 val op54 = (wordValue >> 0w29) andb 0w3
                 val op31 = (wordValue >> 0w21) andb 0w7
                 val o0 = (wordValue >> 0w15) andb 0w1
                 val rM = (wordValue >> 0w16) andb 0wx1f
                 val rA = (wordValue >> 0w10) andb 0wx1f
                 val rN = (wordValue >> 0w5) andb 0wx1f
                 val rD = wordValue andb 0wx1f
                 val (oper, r1, r2) =
                     case (sf, op54, op31, o0, rA) of
                         (0w1, 0w0, 0w0, 0w0, 0w31) => ("mul", "x", "x")
                     |   (0w1, 0w0, 0w0, 0w0, _)    => ("madd", "x", "x")
                     |   (0w1, 0w0, 0w0, 0w1, 0w31) => ("mneg", "x", "x")
                     |   (0w1, 0w0, 0w0, 0w1, _)    => ("msub", "x", "x")
                     |   (0w0, 0w0, 0w0, 0w0, _)    => ("madd", "w", "w")
                     |   (0w0, 0w0, 0w0, 0w1, _)    => ("msub", "w", "w")
                     |   (0w1, 0w0, 0w2, 0w0, 0w31) => ("smulh", "x", "x")
                     |   (0w1, 0w0, 0w1, 0w0, 0w31) => ("smull", "x", "w")
                     |   (0w1, 0w0, 0w1, 0w0, _)    => ("smaddl", "x", "w")
                     |   (0w1, 0w0, 0w1, 0w1, _)    => ("smsubl", "x", "w")
                     |   _ => ("??", "?", "?")
             in
                 printStream oper;
                 printStream "\t";
                 printStream r1; printStream(Word32.fmt StringCvt.DEC rD); printStream ",";
                 printStream r2; printStream(Word32.fmt StringCvt.DEC rN); printStream ",";
                 printStream r2; printStream(Word32.fmt StringCvt.DEC rM);
                 if rA = 0w31 then ()
                 else (printStream ","; printStream r1; printStream(Word32.fmt StringCvt.DEC rA))
             end
 
             else if (wordValue andb 0wx7f20fc00) = 0wx1E200000
             then (* Moves between floating point and general regs. *)
             let
                 val sf = (wordValue >> 0w31) andb 0w1
                 and s = (wordValue >> 0w29) andb 0w1
                 and ptype = (wordValue >> 0w22) andb 0w3
                 and mode = (wordValue >> 0w19) andb 0w3
                 and opcode = (wordValue >> 0w16) andb 0w7
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rD = wordValue andb 0wx1f
                 val (opc, dr, nr) =
                     case (sf, s, ptype, mode, opcode) of
                         (0w0, 0w0, 0w0, 0w0, 0w7) => ("fmov", "s", "w") (* w -> s *)
                     |   (0w0, 0w0, 0w0, 0w0, 0w6) => ("fmov", "w", "s") (* s -> w *)
                     |   (0w1, 0w0, 0w1, 0w0, 0w7) => ("fmov", "d", "x") (* d -> x *)
                     |   (0w1, 0w0, 0w1, 0w0, 0w6) => ("fmov", "x", "d") (* x -> d *)
                     |   (0w0, 0w0, 0w0, 0w0, 0w2) => ("scvtf", "w", "s")
                     |   (0w0, 0w0, 0w1, 0w0, 0w2) => ("scvtf", "w", "d")
                     |   (0w1, 0w0, 0w0, 0w0, 0w2) => ("scvtf", "x", "s")
                     |   (0w1, 0w0, 0w1, 0w0, 0w2) => ("scvtf", "x", "d")
 
                     |   (0w0, 0w0, 0w0, 0w0, 0w4) => ("fcvtas", "w", "s") (* s -> w *)
                     |   (0w0, 0w0, 0w0, 0w2, 0w0) => ("fcvtms", "w", "s") (* s -> w *)
                     |   (0w0, 0w0, 0w0, 0w1, 0w0) => ("fcvtps", "w", "s") (* s -> w *)
                     |   (0w0, 0w0, 0w0, 0w3, 0w0) => ("fcvtzs", "w", "s") (* s -> w *)
                     |   (0w0, 0w0, 0w1, 0w0, 0w4) => ("fcvtas", "w", "d") (* d -> w *)
                     |   (0w0, 0w0, 0w1, 0w2, 0w0) => ("fcvtms", "w", "d") (* d -> w *)
                     |   (0w0, 0w0, 0w1, 0w1, 0w0) => ("fcvtps", "w", "d") (* d -> w *)
                     |   (0w0, 0w0, 0w1, 0w3, 0w0) => ("fcvtzs", "w", "d") (* d -> w *)
 
                     |   (0w1, 0w0, 0w0, 0w0, 0w4) => ("fcvtas", "x", "s") (* s -> x *)
                     |   (0w1, 0w0, 0w0, 0w2, 0w0) => ("fcvtms", "x", "s") (* s -> x *)
                     |   (0w1, 0w0, 0w0, 0w1, 0w0) => ("fcvtps", "x", "s") (* s -> x *)
                     |   (0w1, 0w0, 0w0, 0w3, 0w0) => ("fcvtzs", "x", "s") (* s -> x *)
                     |   (0w1, 0w0, 0w1, 0w0, 0w4) => ("fcvtas", "x", "d") (* d -> x *)
                     |   (0w1, 0w0, 0w1, 0w2, 0w0) => ("fcvtms", "x", "d") (* d -> x *)
                     |   (0w1, 0w0, 0w1, 0w1, 0w0) => ("fcvtps", "x", "d") (* d -> x *)
                     |   (0w1, 0w0, 0w1, 0w3, 0w0) => ("fcvtzs", "x", "d") (* d -> x *)
                     |   _ => ("?", "?", "?")
             in
                 printStream opc; printStream "\t";
                 printStream dr; printStream(Word32.fmt StringCvt.DEC rD); printStream ",";
                 printStream nr; printStream(Word32.fmt StringCvt.DEC rN)
             end
             
             else if (wordValue andb 0wxff200c00) = 0wx1E200800
             then (* Floating point two source operations. *)
             let
                 val pt = (wordValue >> 0w22) andb 0w3
                 and rM = (wordValue >> 0w16) andb 0wx1f
                 and opc = (wordValue >> 0w12) andb 0wxf
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rT = wordValue andb 0wx1f
                 val (opcode, r) =
                     case (pt, opc) of
                         (0w0, 0wx0) => ("fmul", "s")
                     |   (0w0, 0wx1) => ("fdiv", "s")
                     |   (0w0, 0wx2) => ("fadd", "s")
                     |   (0w0, 0wx3) => ("fsub", "s")
                     |   (0w1, 0wx0) => ("fmul", "d")
                     |   (0w1, 0wx1) => ("fdiv", "d")
                     |   (0w1, 0wx2) => ("fadd", "d")
                     |   (0w1, 0wx3) => ("fsub", "d")
                     |   _ => ("??", "?")
             in
                 printStream opcode; printStream "\t";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rT); printStream ",";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ",";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rM)
             end
 
             else if (wordValue andb 0wxff207c00) = 0wx1E204000
             then (* Floating point single source. *)
             let
                 val pt = (wordValue >> 0w22) andb 0w3
                 and opc = (wordValue >> 0w15) andb 0wx3f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and rT = wordValue andb 0wx1f
                 val (opcode, rS, rD) =
                     case (pt, opc) of
                         (0w0, 0wx0) => ("fmov", "s", "s")
                     |   (0w0, 0wx1) => ("fabs", "s", "s")
                     |   (0w0, 0wx2) => ("fneg", "s", "s")
                     |   (0w0, 0wx5) => ("fcvt", "s", "d")
                     |   (0w1, 0wx0) => ("fmov", "d", "d")
                     |   (0w1, 0wx1) => ("fabs", "d", "d")
                     |   (0w1, 0wx2) => ("fneg", "d", "d")
                     |   (0w1, 0wx4) => ("fcvt", "d", "s")
                     |   _ => ("??", "?", "?")
             in
                 printStream opcode; printStream "\t";
                 printStream rD; printStream(Word32.fmt StringCvt.DEC rT); printStream ",";
                 printStream rS; printStream(Word32.fmt StringCvt.DEC rN)
             end
 
             else if (wordValue andb 0wxff20fc07) = 0wx1E202000
             then (* Floating point comparison *)
             let
                 val pt = (wordValue >> 0w22) andb 0w3
                 and rM = (wordValue >> 0w16) andb 0wx1f
                 and rN = (wordValue >> 0w5) andb 0wx1f
                 and opc = (wordValue >> 0w3) andb 0w3
                 val (opcode, r) =
                     case (pt, opc) of
                         (0w0, 0wx0) => ("fcmp", "s")
                     |   (0w1, 0wx0) => ("fcmp", "d")
                     |   (0w0, 0wx2) => ("fcmpe", "s")
                     |   (0w1, 0wx2) => ("fcmpe", "d")
                     |   _ => ("??", "?")
             in
                 printStream opcode; printStream "\t";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rN); printStream ",";
                 printStream r; printStream(Word32.fmt StringCvt.DEC rM)
             end
 
             else if (wordValue andb 0wx1e000000) = 0wx02000000
             then (* This is an unallocated range.  We use it for the register mask. *)
             let
                 fun printMask (0w25, _) = ()
                 |   printMask (i, comma) =
                     if ((0w1 << i) andb wordValue) <> 0w0
                     then
                     (
                         if comma then printStream ", " else ();
                         printStream "x";
                         printStream(Word.fmt StringCvt.DEC i);
                         printMask(i+0w1, true)
                     )
                     else printMask(i+0w1, comma)
             in
                 printStream "["; printMask(0w0, false); printStream "]"
             end
 
             else printStream "?"
             ;
             printStream "\n"
         end
         
         fun printAll i =
             if i = numInstructions then ()
             else (printWordAt i; printAll(i+0w1))
     in
         printStream functionName;
         printStream ":\n";
         printAll 0w0
     end
 
     (* Set the offsets of ADRP+LDR and ADRP+ADD instruction pairs.  The values in these instructions are,
        to some extent, absolute addresses so this needs to be done by the RTS. firstNonAddrConst and
        firstAddrConst are the offsets in bytes. *)
     fun setADRPAddresses(ops, codeVec, firstNonAddrConst, firstAddrConst) =
     let
         fun setADRPAddrs([], _ , _, _) = ()
 
         |   setADRPAddrs(LoadAddressLiteral{length=ref BrExtended, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 (* Address constants are 32-bits in 32-in-64 and 64-bits in native 64-bits *)
                 val addrOfConstant (* byte offset *) = firstAddrConst + aConstNum * Address.wordSize
             in
                 codeVecPutConstant (codeVec, wordNo * 0w4, toMachineWord addrOfConstant,
                     if is32in64 then ConstArm64AdrpLdr32 else ConstArm64AdrpLdr64);
                 setADRPAddrs(tail, wordNo+0w2, aConstNum+0w1, nonAConstNum)
             end
 
         |   setADRPAddrs(LoadNonAddressLiteral{length=ref BrExtended, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 (* The offset is in 32-bit words.  These are always 64-bits. *)
                 val offsetOfConstant (* byte offset *) = firstNonAddrConst+nonAConstNum*0w8
             in
                 codeVecPutConstant (codeVec, wordNo * 0w4, toMachineWord offsetOfConstant, ConstArm64AdrpLdr64);
                 setADRPAddrs(tail, wordNo+0w2, aConstNum, nonAConstNum+0w1)
             end
 
         |   setADRPAddrs(LoadLabelAddress{label=ref labs, length=ref BrExtended, ...} :: tail, wordNo, aConstNum, nonAConstNum) =
             let
                 val dest = !(hd labs) * 0w4
             in
                 codeVecPutConstant (codeVec, wordNo * 0w4, toMachineWord dest, ConstArm64AdrpAdd);
                 setADRPAddrs(tail, wordNo+0w2, aConstNum, nonAConstNum)
             end
 
        |    setADRPAddrs(instr :: tail, wordNo, aConstNum, nonAConstNum) =
                 setADRPAddrs(tail, wordNo+Word.fromInt(codeSize instr), aConstNum, nonAConstNum)
     in
         setADRPAddrs (ops, 0w0, 0w0, 0w0)
     end
 
     (* Adds the constants onto the code, and copies the code into a new segment *)
     fun generateCode {instrs, name=functionName, parameters, resultClosure, profileObject} =
     let
         val printStream = Pretty.getSimplePrinter(parameters, [])
         and printAssemblyCode = Debug.getParameter Debug.assemblyCodeTag parameters
         
         local
             (* Extract the constants. *)
             fun getConsts(LoadAddressLiteral {value, ...}, (addrs, nonAddrs)) = (value::addrs, nonAddrs)
             |   getConsts(LoadNonAddressLiteral {value, ...}, (addrs, nonAddrs)) = (addrs, value::nonAddrs)
             |   getConsts(_, consts) = consts
 
             val (addrConsts, nonAddrConsts) = List.foldl getConsts ([], []) instrs
         in
             val addressConsts = List.rev addrConsts
             and nonAddressConsts = List.rev nonAddrConsts
         end
         
         (* Generate the code and set the constant addresses at the same time.
            TODO: The X86 code-generator sorts the constants to remove duplicates. *)
         val (byteVec, nativeWordsOfCode) = genCode(instrs, addressConsts, nonAddressConsts)
         val wordsOfCode = nativeWordsOfCode * wordsPerNativeWord
 
         (* +3 for profile count, function name and constants count *)
         val numOfConst = List.length addressConsts
         val segSize   = wordsOfCode + Word.fromInt numOfConst + 0w4
         val firstConstant = wordsOfCode + 0w3 (* Add 3 for no of consts, fn name and profile count. *)
     
         (* Put in the number of constants. This must go in before
            we actually put in any constants. *)
         local
             val lastWord = segSize - 0w1
         in
             val () = setWord(LargeWord.fromInt(numOfConst + 2), wordsOfCode, byteVec)
             (* Set the last word of the code to the (negative) byte offset of the start of the code area
                from the end of this word. *)
             val () = setWord(LargeWord.fromInt(numOfConst + 3) * ~(Word.toLarge Address.wordSize), lastWord, byteVec) 
         end
 
         (* Now we've filled in all the size info we need to convert the segment
            into a proper code segment before it's safe to put in any ML values. *)
         val codeVec = byteVecToCodeVec(byteVec, resultClosure)
 
         local
             val name     : string = functionName
             val nameWord : machineWord = toMachineWord name
         in
             val () = codeVecPutWord (codeVec, wordsOfCode+0w1, nameWord)
         end
         (* Profile ref.  A byte ref used by the profiler in the RTS. *)
         val () = codeVecPutWord (codeVec, wordsOfCode+0w2, profileObject)
 
         (* and then copy the constants from the constant list. *)
         local
             fun setConstant(value, num) =
             (
                 codeVecPutWord (codeVec, firstConstant + num, value);
                 num+0w1
             )
         in
             val _ = List.foldl setConstant 0w0 addressConsts
         end
         
         val () = setADRPAddresses(instrs, codeVec,
                     (nativeWordsOfCode-Word.fromInt(List.length nonAddressConsts)) * Address.nativeWordSize,
                     firstConstant * Address.wordSize)
     in
         if printAssemblyCode
         then (* print out the code *)
             (printCode (codeVec, functionName, wordsOfCode, printStream); printStream"\n")
         else ();
         codeVecLock(codeVec, resultClosure)
     end (* copyCode *)
 
 
     structure Sharing =
     struct
         type closureRef = closureRef
         type instr = instr
         type xReg = xReg
         type vReg = vReg
         type labels = labels
         type condition = condition
         type shiftType = shiftType
         type wordSize = wordSize
         type 'a extend = 'a extend
         type scale = scale
     end
 end;
 
diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML
index 1a495478..ae35a130 100644
--- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML
+++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML
@@ -1,1063 +1,1073 @@
 (*
     Copyright (c) 2021-2 David C. J. Matthews
 
     This library is free software; you can redistribute it and/or
     modify it under the terms of the GNU Lesser General Public
     Licence version 2.1 as published by the Free Software Foundation.
     
     This library is distributed in the hope that it will be useful,
     but WITHOUT ANY WARRANTY; without even the implied warranty of
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     Lesser General Public Licence for more details.
     
     You should have received a copy of the GNU Lesser General Public
     Licence along with this library; if not, write to the Free Software
     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 *)
 
 (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *)
 
 functor Arm64PreAssembly(
 
     structure Arm64Assembly: ARM64ASSEMBLY
     structure Debug: DEBUG
     structure Pretty: PRETTY
 
 ): ARM64PREASSEMBLY =
 struct
     open Arm64Assembly
     
     exception InternalError = Misc.InternalError
 
     (* Reversed cons and append to make the code easier to read. *)
     infix 5 <::> <@>
     fun tl <::> hd = hd :: tl
     and snd <@> fst = fst @ snd
 
     (* Many of the datatypes are inherited from Arm64Assembly *)
 
     datatype loadType = Load64 | Load32 | Load16 | Load8
     and opSize = OpSize32 | OpSize64
     and logicalOp = LogAnd | LogOr | LogXor
     and floatSize = Float32 | Double64
     and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic
     and multKind =
         MultAdd32 | MultSub32 | MultAdd64 | MultSub64 |
         SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) |
         SignedMultHigh (* High order part of 64bit*64Bit *)
     and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat
     and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP
     and unscaledType = NoUpdate | PreIndex | PostIndex
     and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate
     and bitfieldKind = BFUnsigned | BFSigned | BFInsert
     and brRegType = BRRBranch | BRRAndLink | BRRReturn
 
     datatype label = Label of int
     type labelMaker = int ref
     fun createLabelMaker() = ref 0
     fun createLabel(r as ref n) = Label n before r := n+1
 
     datatype precode =
         (* Basic instructions *)
         AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool}
     |   SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool}
     |   AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool}
     |   SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool}
     |   AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool}
     |   SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool}
     |   MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind}
     |   DivideRegs of
             {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize}
     |   LogicalShiftedReg of
             {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool}
     |   LoadRegScaled of
             {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType}
     |   LoadFPRegScaled of
             {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize}
     |   StoreRegScaled of
             {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType}
     |   StoreFPRegScaled of
             {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize}
     |   LoadRegUnscaled of
             {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   StoreRegUnscaled of
             {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   LoadFPRegUnscaled of
             {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   StoreFPRegUnscaled of
             {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend}
     |   StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend}
     |   LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend}
     |   StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend}
         (* LoadAcquire and StoreRelease are used for mutables. *)
     |   LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType}
     |   StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType}
         (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *)
     |   LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg}
     |   StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg}
     |   MemBarrier
     |   LoadRegPair of
             { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   StoreRegPair of
             { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType}
     |   LoadFPRegPair of
             { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   StoreFPRegPair of
             { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType}
     |   ConditionalSet of
             {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize}
     |   BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind}
     |   ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection}
     |   BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp}
         (* Floating point *)
     |   MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize}
     |   MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize}
     |   CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize}
     |   CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize}
     |   FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary}
     |   FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize}
     |   FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary}
         (* Branches and Labels. *)
     |   SetLabel of label
     |   ConditionalBranch of condition * label
     |   UnconditionalBranch of label
     |   BranchAndLink of label
     |   BranchReg of {regD: xReg, brRegType: brRegType }
     |   LoadLabelAddress of xReg * label
     |   TestBitBranch of { test: xReg, bit: Word8.word, label: label, onZero: bool }
     |   CompareBranch of { test: xReg, label: label, onZero: bool, opSize: opSize }
         (* Composite instructions *)
     |   MoveXRegToXReg of {sReg: xReg, dReg: xReg}
     |   LoadNonAddr of xReg * Word64.word
     |   LoadAddr of xReg * machineWord
     |   RTSTrap of { rtsEntry: int, work: xReg, save: xReg list }
     |   AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg }
     |   AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg }
         (* Branch table for indexed case. startLabel is the address of the first label in
            the list.  The branch table is a sequence of unconditional branches. *)
     |   BranchTable of { startLabel: label, brTable: label list }
     |   LoadGlobalHeapBaseInCallback of xReg
 
 
     (* Optimise the pre-assembler code and then generate the final code. *)
     fun generateFinalCode {instrs, name, parameters, resultClosure, profileObject, labelMaker=ref labelCount} =
     let
         val labelTargets = Array.tabulate(labelCount, fn i => (Arm64Assembly.createLabel(), i) )
 
         (* Follow the chain of forwarded labels. *)
         local
             fun forwardLab(labelNo, labels) =
             let
                 val dest as (_, dNo) = Array.sub(labelTargets, labelNo)
             in
                 if dNo = labelNo
                 then dest
                 (* This should not happen but just in case... *)
                 else if List.exists(fn i => i = dNo) labels
                 then raise InternalError "Infinite loop"
                 else forwardLab(dNo, dNo::labels)
             end
         in
             fun getLabel labelNo = forwardLab(labelNo, [labelNo])
             val getLabelTarget = #1 o getLabel
         end                
 
         fun toAssembler([], code) = code
 
         |   toAssembler(AddImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => addImmediate
                     |   (OpSize32, false) => addImmediate32
                     |   (OpSize64, true) => addSImmediate
                     |   (OpSize32, true) => addSImmediate32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted})
             end
 
         |   toAssembler(SubImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => subImmediate
                     |   (OpSize32, false) => subImmediate32
                     |   (OpSize64, true) => subSImmediate
                     |   (OpSize32, true) => subSImmediate32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted})
             end
 
         |   toAssembler(AddShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => addShiftedReg
                     |   (OpSize32, false) => addShiftedReg32
                     |   (OpSize64, true) => addSShiftedReg
                     |   (OpSize32, true) => addSShiftedReg32
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift})
             end
 
         |   toAssembler(SubShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => subShiftedReg
                     |   (OpSize32, false) => subShiftedReg32
                     |   (OpSize64, true) => subSShiftedReg
                     |   (OpSize32, true) => subSShiftedReg32
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift})
             end
 
         |   toAssembler(AddExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) =
             (* Add/SubExtended are only used to access XSP. *)
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => addExtendedReg
                     |   (OpSize32, false) => raise InternalError "AddExtendedReg; 32"
                     |   (OpSize64, true) => addSExtendedReg
                     |   (OpSize32, true) => raise InternalError "AddExtendedReg; 32"
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend})
             end
 
         |   toAssembler(SubExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (opSize, setFlags) of
                         (OpSize64, false) => subExtendedReg
                     |   (OpSize32, false) => raise InternalError "AddExtendedReg; 32"
                     |   (OpSize64, true) => subSExtendedReg
                     |   (OpSize32, true) => raise InternalError "AddExtendedReg; 32"
             in
                 toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend})
             end
 
         |   toAssembler(MultiplyAndAddSub{regM, regN, regA, regD, multKind} :: rest, code) =
             let
                 val instr =
                     case multKind of
                         MultAdd32 => multiplyAndAdd32{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   MultSub32 => multiplyAndSub32{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   MultAdd64 => multiplyAndAdd{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   MultSub64 => multiplyAndSub{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   SignedMultAddLong => signedMultiplyAndAddLong{regM=regM, regN=regN, regA=regA, regD=regD}
                     |   SignedMultHigh => signedMultiplyHigh{regM=regM, regN=regN, regD=regD}
             in
                 toAssembler(rest, code <::> instr)
             end
 
         |   toAssembler(DivideRegs{regM, regN, regD, isSigned, opSize} :: rest, code) =
             let
                 val instr =
                     case (isSigned, opSize) of
                         (true, OpSize64) => signedDivide
                     |   (true, OpSize32) => signedDivide32
                     |   (false, OpSize64) => unsignedDivide
                     |   (false, OpSize32) => unsignedDivide32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD})
             end
 
         |   toAssembler(LogicalShiftedReg{regM, regN, regD, shift, logOp, opSize, setFlags} :: rest, code) =
             let
                 val instr =
                     case (logOp, setFlags, opSize) of
                         (LogAnd, false, OpSize64) => andShiftedReg
                     |   (LogAnd, true, OpSize64) => andsShiftedReg
                     |   (LogOr, false, OpSize64) => orrShiftedReg
                     |   (LogXor, false, OpSize64) => eorShiftedReg
 
                     |   (LogAnd, false, OpSize32) => andShiftedReg32
                     |   (LogAnd, true, OpSize32) => andsShiftedReg32
                     |   (LogOr, false, OpSize32) => orrShiftedReg32
                     |   (LogXor, false, OpSize32) => eorShiftedReg32
 
                     |   _ => raise InternalError "setFlags not valid with OR or XOR"
                 (* There are also versions of AND/OR/XOR which operate on a complement (NOT)
                    of the shifted register.  It's probably not worth looking for a use for them. *)
             in
                 toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD, shift=shift})
             end
 
         |   toAssembler(LoadRegScaled{regT, regN, unitOffset, loadType} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => loadRegScaled
                     |   Load32 => loadRegScaled32
                     |   Load16 => loadRegScaled16
                     |   Load8 => loadRegScaledByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreRegScaled{regT, regN, unitOffset, loadType} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => storeRegScaled
                     |   Load32 => storeRegScaled32
                     |   Load16 => storeRegScaled16
                     |   Load8 => storeRegScaledByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(LoadFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => loadRegScaledFloat
                     |   Double64 => loadRegScaledDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => storeRegScaledFloat
                     |   Double64 => storeRegScaledDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(LoadRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => loadRegUnscaled
                     |   (Load32, NoUpdate) => loadRegUnscaled32
                     |   (Load16, NoUpdate) => loadRegUnscaled16
                     |   (Load8, NoUpdate) => loadRegUnscaledByte
                     |   (Load64, PreIndex) => loadRegPreIndex
                     |   (Load32, PreIndex) => loadRegPreIndex32
                     |   (Load16, PreIndex) => raise InternalError "loadRegPreIndex16"
                     |   (Load8, PreIndex) => loadRegPreIndexByte
                     |   (Load64, PostIndex) => loadRegPostIndex
                     |   (Load32, PostIndex) => loadRegPostIndex32
                     |   (Load16, PostIndex) => raise InternalError "loadRegPostIndex16"
                     |   (Load8, PostIndex) => loadRegPostIndexByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(LoadFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (floatSize, unscaledType) of
                         (Float32, NoUpdate) => loadRegUnscaledFloat
                     |   (Double64, NoUpdate) => loadRegUnscaledDouble
                     |   _ => raise InternalError "LoadFPRegUnscaled: pre/post indexed"
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(StoreRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => storeRegUnscaled
                     |   (Load32, NoUpdate) => storeRegUnscaled32
                     |   (Load16, NoUpdate) => storeRegUnscaled16
                     |   (Load8, NoUpdate) => storeRegUnscaledByte
                     |   (Load64, PreIndex) => storeRegPreIndex
                     |   (Load32, PreIndex) => storeRegPreIndex32
                     |   (Load16, PreIndex) => raise InternalError "storeRegPreIndex16"
                     |   (Load8, PreIndex) => storeRegPreIndexByte
                     |   (Load64, PostIndex) => storeRegPostIndex
                     |   (Load32, PostIndex) => storeRegPostIndex32
                     |   (Load16, PostIndex) => raise InternalError "storeRegPostIndex16"
                     |   (Load8, PostIndex) => storeRegPostIndexByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(StoreFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (floatSize, unscaledType) of
                         (Float32, NoUpdate) => storeRegUnscaledFloat
                     |   (Double64, NoUpdate) => storeRegUnscaledDouble
                     |   _ => raise InternalError "StoreFPRegUnscaled: pre/post indexed"
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset})
             end
 
         |   toAssembler(LoadRegIndexed{regT, regN, regM, loadType, option} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => loadRegIndexed
                     |   Load32 => loadRegIndexed32
                     |   Load16 => loadRegIndexed16
                     |   Load8 => loadRegIndexedByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(StoreRegIndexed{regT, regN, regM, loadType, option} :: rest, code) =
             let
                 val instr =
                     case loadType of
                         Load64 => storeRegIndexed
                     |   Load32 => storeRegIndexed32
                     |   Load16 => storeRegIndexed16
                     |   Load8 => storeRegIndexedByte
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(LoadFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => loadRegIndexedFloat
                     |   Double64 => loadRegIndexedDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(StoreFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) =
             let
                 val instr =
                     case floatSize of
                         Float32 => storeRegIndexedFloat
                     |   Double64 => storeRegIndexedDouble
             in
                 toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option})
             end
 
         |   toAssembler(LoadAcquireReg{regN, regT, loadType} :: rest, code) =
             let
                 val loadInstr  =
                     case loadType of
                         Load64 => loadAcquire
                     |   Load32 => loadAcquire32
                     |   Load8 => loadAcquireByte
                     |   _ => raise InternalError "LoadAcquire: Unsupported size" (* Not used *)
             in
                 toAssembler(rest, code <::> loadInstr{regT=regT, regN=regN})
             end
 
         |   toAssembler(StoreReleaseReg{regN, regT, loadType} :: rest, code) =
             let
                 val storeInstr  =
                     case loadType of
                         Load64 => storeRelease
                     |   Load32 => storeRelease32
                     |   Load8 => storeReleaseByte
                     |   _ => raise InternalError "StoreRelease: Unsupported size" (* Not used *)
             in
                 toAssembler(rest, code <::> storeInstr{regT=regT, regN=regN})
             end
 
         |   toAssembler(LoadAcquireExclusiveRegister{regN, regT} :: rest, code) =
                 toAssembler(rest, code <::> loadAcquireExclusiveRegister{regN=regN, regT=regT})
 
         |   toAssembler(StoreReleaseExclusiveRegister{regN, regT, regS} :: rest, code) =
                 toAssembler(rest, code <::> storeReleaseExclusiveRegister{regN=regN, regT=regT, regS=regS})
 
         |   toAssembler(MemBarrier :: rest, code) =
                 toAssembler(rest, code <::> dmbIsh)
 
         |   toAssembler(LoadRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) =
             let
                 val _ = regT1 <> regT2 orelse raise InternalError "LoadRegPair: same register"
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => loadPairOffset
                     |   (Load64, PreIndex) => loadPairPreIndexed
                     |   (Load64, PostIndex) => loadPairPostIndexed
                     |   (Load32, NoUpdate) => loadPairOffset32
                     |   (Load32, PreIndex) => loadPairPreIndexed32
                     |   (Load32, PostIndex) => loadPairPostIndexed32
                     |   _ => raise InternalError "LoadRegPair: unimplemented"
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (loadType, unscaledType) of
                         (Load64, NoUpdate) => storePairOffset
                     |   (Load64, PreIndex) => storePairPreIndexed
                     |   (Load64, PostIndex) => storePairPostIndexed
                     |   (Load32, NoUpdate) => storePairOffset32
                     |   (Load32, PreIndex) => storePairPreIndexed32
                     |   (Load32, PostIndex) => storePairPostIndexed32
                     |   _ => raise InternalError "StoreRegPair: unimplemented"
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(LoadFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val _ = regT1 <> regT2 orelse raise InternalError "LoadRegPair: same register"
                 val instr =
                     case (floatSize, unscaledType) of
                         (Double64, NoUpdate) => loadPairOffsetDouble
                     |   (Double64, PreIndex) => loadPairPreIndexedDouble
                     |   (Double64, PostIndex) => loadPairPostIndexedDouble
                     |   (Float32, NoUpdate) => loadPairOffsetFloat
                     |   (Float32, PreIndex) => loadPairPreIndexedFloat
                     |   (Float32, PostIndex) => loadPairPostIndexedFloat
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(StoreFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) =
             let
                 val instr =
                     case (floatSize, unscaledType) of
                         (Double64, NoUpdate) => storePairOffsetDouble
                     |   (Double64, PreIndex) => storePairPreIndexedDouble
                     |   (Double64, PostIndex) => storePairPostIndexedDouble
                     |   (Float32, NoUpdate) => storePairOffsetFloat
                     |   (Float32, PreIndex) => storePairPreIndexedFloat
                     |   (Float32, PostIndex) => storePairPostIndexedFloat
             in
                 toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset})
             end
 
         |   toAssembler(ConditionalSet{regD, regTrue, regFalse, cond, condSet, opSize} :: rest, code) =
             let
                 val instr =
                     case (condSet, opSize) of
                         (CondSet, OpSize64) => conditionalSet
                     |   (CondSetIncr, OpSize64) => conditionalSetIncrement
                     |   (CondSetInvert, OpSize64) => conditionalSetInverted
                     |   (CondSetNegate, OpSize64) => conditionalSetNegated
                     |   (CondSet, OpSize32) => conditionalSet32
                     |   (CondSetIncr, OpSize32) => conditionalSetIncrement32
                     |   (CondSetInvert, OpSize32) => conditionalSetInverted32
                     |   (CondSetNegate, OpSize32) => conditionalSetNegated32
             in
                 toAssembler(rest, code <::> instr{regD=regD, regTrue=regTrue, regFalse=regFalse, cond=cond})
             end
 
         |   toAssembler(BitField{immr, imms, regN, regD, opSize, bitfieldKind} :: rest, code) =
             let
                 val bfInstr =
                     case (bitfieldKind, opSize) of
                         (BFSigned, OpSize64) => signedBitfieldMove64
                     |   (BFUnsigned, OpSize64) => unsignedBitfieldMove64
                     |   (BFInsert, OpSize64) => bitfieldMove64
                     |   (BFSigned, OpSize32) => signedBitfieldMove32
                     |   (BFUnsigned, OpSize32) => unsignedBitfieldMove32
                     |   (BFInsert, OpSize32) => bitfieldMove32
             in
                 toAssembler(rest, code <::> bfInstr{immr=immr, imms=imms, regN=regN, regD=regD})
             end
 
         |   toAssembler(ShiftRegisterVariable{regM, regN, regD, opSize, shiftDirection} :: rest, code) =
             let
                 val instr =
                     case (shiftDirection, opSize) of
                         (ShiftLeft, OpSize64) => logicalShiftLeftVariable
                     |   (ShiftLeft, OpSize32) => logicalShiftLeftVariable32
                     |   (ShiftRightLogical, OpSize64) => logicalShiftRightVariable
                     |   (ShiftRightLogical, OpSize32) => logicalShiftRightVariable32
                     |   (ShiftRightArithmetic, OpSize64) => arithmeticShiftRightVariable
                     |   (ShiftRightArithmetic, OpSize32) => arithmeticShiftRightVariable32
             in
                 toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD})
             end
 
         |   toAssembler(BitwiseLogical{ bits, regN, regD, opSize, setFlags, logOp} :: rest, code) =
             let
                 val instr =
                     case (logOp, setFlags, opSize) of
                         (LogAnd, false, OpSize64) => bitwiseAndImmediate
                     |   (LogAnd, true, OpSize64) => bitwiseAndSImmediate
                     |   (LogOr, false, OpSize64) => bitwiseOrImmediate
                     |   (LogXor, false, OpSize64) => bitwiseXorImmediate
 
                     |   (LogAnd, false, OpSize32) => bitwiseAndImmediate32
                     |   (LogAnd, true, OpSize32) => bitwiseAndSImmediate32
                     |   (LogOr, false, OpSize32) => bitwiseOrImmediate32
                     |   (LogXor, false, OpSize32) => bitwiseXorImmediate32
 
                     |   _ => raise InternalError "flags not valid with OR or XOR"
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD, bits=bits})
             end
 
         |   toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Float32} :: rest, code) =
                 toAssembler(rest, code <::> moveGeneralToFloat{regN=regN, regD=regD})
         |   toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Double64} :: rest, code) =
                 toAssembler(rest, code <::> moveGeneralToDouble{regN=regN, regD=regD})
 
         |   toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Float32} :: rest, code) =
                 toAssembler(rest, code <::> moveFloatToGeneral{regN=regN, regD=regD})
         |   toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Double64} :: rest, code) =
                 toAssembler(rest, code <::> moveDoubleToGeneral{regN=regN, regD=regD})
 
         |   toAssembler(CvtIntToFP{ regN, regD, floatSize, opSize} :: rest, code) =
             let
                 val instr =
                     case (opSize, floatSize) of
                         (OpSize32, Float32) => convertInt32ToFloat
                     |   (OpSize64, Float32) => convertIntToFloat
                     |   (OpSize32, Double64) => convertInt32ToDouble
                     |   (OpSize64, Double64) => convertIntToDouble
             in
                 toAssembler(rest, code <::> instr{regN=regN, regD=regD})
             end
 
         |   toAssembler(CvtFloatToInt{ round, regN, regD, floatSize, opSize} :: rest, code) =
             let
                 val instr =
                     case (floatSize, opSize) of
                         (Float32, OpSize32) => convertFloatToInt32
                     |   (Float32, OpSize64) => convertFloatToInt
                     |   (Double64, OpSize32) => convertDoubleToInt32
                     |   (Double64, OpSize64) => convertDoubleToInt
             in
                 toAssembler(rest, code <::> instr round {regN=regN, regD=regD})
             end
 
         |   toAssembler(FPBinaryOp{ regM, regN, regD, floatSize, fpOp} :: rest, code) =
             let
                 val instr =
                     case (fpOp, floatSize) of
                         (MultiplyFP, Float32) => multiplyFloat
                     |   (DivideFP, Float32) => divideFloat
                     |   (AddFP, Float32) => addFloat
                     |   (SubtractFP, Float32) => subtractFloat
                     |   (MultiplyFP, Double64) => multiplyDouble
                     |   (DivideFP, Double64) => divideDouble
                     |   (AddFP, Double64) => addDouble
                     |   (SubtractFP, Double64) => subtractDouble
             in
                 toAssembler(rest, code <::> instr {regN=regN, regM=regM, regD=regD})
             end
 
         |   toAssembler(FPComparison{ regM, regN, floatSize} :: rest, code) =
                 toAssembler(rest, code <::> (case floatSize of Float32 => compareFloat | Double64 => compareDouble){regN=regN, regM=regM})
 
         |   toAssembler(FPUnaryOp{ regN, regD, fpOp} :: rest, code) =
             let
                 val instr =
                     case fpOp of
                         NegFloat => negFloat | NegDouble => negDouble
                     |   AbsFloat => absFloat | AbsDouble => absDouble
                     |   ConvFloatToDble => convertFloatToDouble
                     |   ConvDbleToFloat => convertDoubleToFloat
             in
                 toAssembler(rest, code <::> instr {regN=regN, regD=regD})
             end
 
         |   toAssembler(SetLabel(Label lab) :: rest, code) = toAssembler(rest, code <::> setLabel(getLabelTarget lab))
 
         |   toAssembler(ConditionalBranch(cond, Label lab) :: rest, code) = toAssembler(rest, code <::> conditionalBranch(cond, getLabelTarget lab))
 
         |   toAssembler(UnconditionalBranch(Label lab) :: rest, code) = toAssembler(rest, code <::> unconditionalBranch(getLabelTarget lab))
 
         |   toAssembler(BranchAndLink(Label lab) :: rest, code) = toAssembler(rest, code <::> branchAndLink(getLabelTarget lab))
 
         |   toAssembler(BranchReg{regD, brRegType=BRRBranch} :: rest, code) = toAssembler(rest, code <::> branchRegister regD)
         |   toAssembler(BranchReg{regD, brRegType=BRRAndLink} :: rest, code) = toAssembler(rest, code <::> branchAndLinkReg regD)
         |   toAssembler(BranchReg{regD, brRegType=BRRReturn} :: rest, code) = toAssembler(rest, code <::> returnRegister regD)
 
         |   toAssembler(LoadLabelAddress(reg, Label lab) :: rest, code) = toAssembler(rest, code <::> loadLabelAddress(reg, getLabelTarget lab))
 
         |   toAssembler(TestBitBranch{ test, bit, label=Label lab, onZero } :: rest, code) =
                 toAssembler(rest, code <::> (if onZero then testBitBranchZero else testBitBranchNonZero)(test, bit, getLabelTarget lab))
 
         |   toAssembler(CompareBranch{ test, label=Label lab, onZero, opSize } :: rest, code) =
             let
                 val instr =
                     case (onZero, opSize) of
                         (true, OpSize64) => compareBranchZero
                     |   (false, OpSize64) => compareBranchNonZero
                     |   (true, OpSize32) => compareBranchZero32
                     |   (false, OpSize32) => compareBranchNonZero32
             in
                 toAssembler(rest, code <::> instr(test, getLabelTarget lab))
             end
 
             (* Register-register moves - special case for XSP. *)
         |   toAssembler(MoveXRegToXReg{sReg=XSP, dReg} :: rest, code) =
                 toAssembler(rest, code <::> addImmediate{regN=XSP, regD=dReg, immed=0w0, shifted=false})
         |   toAssembler(MoveXRegToXReg{sReg, dReg=XSP} :: rest, code) =
                 toAssembler(rest, code <::> addImmediate{regN=sReg, regD=XSP, immed=0w0, shifted=false})
         |   toAssembler(MoveXRegToXReg{sReg, dReg} :: rest, code) =
                 toAssembler(rest, code <::> orrShiftedReg{regN=XZero, regM=sReg, regD=dReg, shift=ShiftNone})
 
         |   toAssembler(LoadNonAddr(xReg, value) :: rest, code) =
             let
                 (* Load a non-address constant.  Tries to use movz/movn/movk if
                    that can be done easily, othewise uses loadNonAddressConstant to
                    load the value from the non-address constant area. *)
                 fun extW (v, h) = Word.andb(Word.fromLarge(LargeWord.>>(Word64.toLarge v, h*0w16)), 0wxffff)
                 val hw0 = extW(value, 0w3) and hw1 = extW(value, 0w2)
                 and hw2 = extW(value, 0w1) and hw3 = extW(value, 0w0)
                 val nextCode =
                     if value < 0wx100000000
                     then
                     let
                         (* 32-bit constants can be loaded using at most a movz and movk but
                            various cases can be reduced since all 32-bit operations set
                            the top word to zero. *)
                         val hi = hw2
                         and lo = hw3
                     in
                         (* 32-bit constants can be loaded with at most a movz and a movk but
                            it may be that there is something shorter. *)
                         if hi = 0w0
                         then code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0}
                         else if hi = 0wxffff
                         then code <::> moveNot32{regD=xReg, immediate=Word.xorb(0wxffff, lo), shift=0w0}
                         else if lo = 0w0
                         then code <::> moveZero32{regD=xReg, immediate=hi, shift=0w16}
                         else if isEncodableBitPattern(value, WordSize32)
                         then code <::> bitwiseOrImmediate32{bits=value, regN=XZero, regD=xReg}
                         else (* Have to use two instructions *)
                             code <::>
                                 moveZero32{regD=xReg, immediate=lo, shift=0w0} <::>
                                 moveKeep{regD=xReg, immediate=hi, shift=0w16}
                     end
                     else if hw0 = 0wxffff andalso hw1 = 0wxffff andalso hw2 = 0wxffff
                     then code <::> moveNot{regD=xReg, immediate=Word.xorb(0wxffff, hw3), shift=0w0}
                     else if hw1 = 0w0 andalso hw2 = 0w0
                     then (* This is common for length words with a flags byte *)
                         code <::> moveZero32{regD=xReg, immediate=hw3, shift=0w0} <::>
                             moveKeep{regD=xReg, immediate=hw0, shift=0w48} 
                     else code <::> loadNonAddressConstant(xReg, value)
             in
                 toAssembler(rest, nextCode)
             end
 
         |   toAssembler(LoadAddr(dReg, source) :: rest, code) = toAssembler(rest, loadAddressConstant(dReg, source) :: code)
 
         |   toAssembler(RTSTrap{ rtsEntry, work, save } :: rest, code) =
             let
                 (* Because X30 is used in the branchAndLink it has to be pushed
                    across any trap. *)
                 val saveX30 = List.exists (fn r => r = X30) save
                 val preserve = List.filter (fn r => r <> X30) save
             in
                 toAssembler(rest,
                     code <@>
                         (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::>
                     loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=rtsEntry} <::>
                     branchAndLinkReg work <::>
                     registerMask preserve <@>
                     (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else [])
                 )
             end
 
         |   toAssembler(AllocateMemoryFixedSize{ bytes, dest, save, work } :: rest, code) =
             let
                 val label = Arm64Assembly.createLabel()
                 val saveX30 = List.exists (fn r => r = X30) save
                 val preserve = List.filter (fn r => r <> X30) save
 
                 val allocCode =
                     code <@>
                     (* Subtract the number of bytes required from the heap pointer. *)
                     (if bytes >= 0w4096
                     then [subShiftedReg{regM=work, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftNone},
                           loadNonAddressConstant(work, Word.toLarge bytes)]
                     else [subImmediate{regN=X_MLHeapAllocPtr, regD=dest, immed=bytes, shifted=false}]) <::>
                     (* Compare the result with the heap limit. *)
                     subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::>
                     conditionalBranch(CondCarrySet, label) <@>
                         (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::>
                     loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::>
                     branchAndLinkReg work <::>
                     registerMask preserve <@>
                     (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::>
                     setLabel label <::>
                     (* Update the heap pointer. *)
                     orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone}
             in
                 toAssembler(rest, allocCode)
             end
 
         |   toAssembler(AllocateMemoryVariableSize{ sizeReg, dest, save, work } :: rest, code) =
             let
                 val trapLabel = Arm64Assembly.createLabel() and noTrapLabel = Arm64Assembly.createLabel()
                 val saveX30 = List.exists (fn r => r = X30) save
                 val preserve = List.filter (fn r => r <> X30) save
             
                 val allocCode =
                     (
                         (* Subtract the size into the result register.  Subtract a further word for
                            the length word and round down in 32-in-64. *)
                         if is32in64
                         then code <::>
                             subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w2} <::>
                             subImmediate{regN=dest, regD=dest, immed=0w4, shifted=false} <::>
                             bitwiseAndImmediate{bits= ~ 0w8, regN=dest, regD=dest}
                         else code <::>
                             subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w3} <::>
                             subImmediate{regN=dest, regD=dest, immed=0w8, shifted=false}
                     ) <::>
                     (* Check against the limit. If the size is large enough it is possible that this could wrap round. 
                        To check for that we trap if either the result is less than the limit or if it is
                        now greater than the allocation pointer. *)
                     subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::>
                     conditionalBranch(CondCarryClear, trapLabel) <::>
                     subSShiftedReg{regM=X_MLHeapAllocPtr, regN=dest, regD=XZero, shift=ShiftNone} <::>
                     conditionalBranch(CondCarryClear, noTrapLabel) <::>
                     setLabel trapLabel <@>
                         (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::>
                     loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::>
                     branchAndLinkReg work <::>
                     registerMask preserve <@>
                     (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::>
                     setLabel noTrapLabel <::>
                     (* Update the heap pointer. *)
                     orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone}
             in
                 toAssembler(rest, allocCode)
             end
 
         |   toAssembler(BranchTable{ startLabel=Label lab, brTable } :: rest, code) =
                 toAssembler(rest,
                     List.foldl (fn (Label lab, code) => (unconditionalBranch(getLabelTarget lab)) :: code)
                         (code <::> setLabel(getLabelTarget lab)) brTable)
 
         |   toAssembler(LoadGlobalHeapBaseInCallback dest :: rest, code) =
                 toAssembler(rest,
                     code <@> List.rev(loadGlobalHeapBaseInCallback dest))
 
         (* Optimisation passes. *)
         fun isValidForPair(offset1, offset2) =
             let val v = Int.min(offset1, offset2) in v >= ~64 andalso v < 64 end
 
         fun forward([], list, rep) = reverse(list, [], rep)
 
         |   forward(SetLabel(Label srcLab) :: (ubr as UnconditionalBranch(Label destLab)) :: tl, list, _) =
             if srcLab = destLab
             (* We should never get this because there should always be a stack-check to
                allow a loop to be broken.  If that ever changes we need to retain the label. *)
             then raise InternalError "Infinite loop detected"
             else (* Mark this to forward to its destination. *)
             (
                 Array.update(labelTargets, srcLab, getLabel destLab);
                 forward(ubr :: tl, list, true)
             )
 
         |   forward(SetLabel(Label jmpLab1) :: (tl as SetLabel(Label jmpLab2) :: _), list, _) =
             (* Eliminate adjacent labels.  They complicate the other tests although they
                don't incur any run-time cost. *)
             (
                 (* Any reference to the first label is forwarded to the second. *)
                 Array.update(labelTargets, jmpLab1, getLabel jmpLab2);
                 forward(tl, list, true)
             )
 
         |   forward((ubr as UnconditionalBranch(Label ubrLab)) :: (tl as SetLabel(Label jumpLab) :: _), list, rep) =
                 (* Eliminate unconditional jumps to the next instruction. *)
             if ubrLab = jumpLab
             then forward(tl, list, true)
             else forward(tl, ubr :: list, rep)
 
         |   forward((cbr as ConditionalBranch(test, Label cbrLab)) :: (ubr as UnconditionalBranch(Label ubrLab)) ::
                     (tl as SetLabel(Label jumpLab) :: _), list, rep) =
             if cbrLab = jumpLab
             then (* We have a conditional branch followed by an unconditional branch followed by the destination of
                     the conditional branch.  Eliminate the unconditional branch by reversing the test.
                     This can often happen if one branch of an if-then-else has been reduced to zero
                     because the same register has been chosen for the input and output. *)
                 forward(tl (* Leave the label just in case it's used elsewhere*),
                     ConditionalBranch(invertTest test, Label ubrLab) :: list, true)
 
             else forward(ubr :: tl, cbr :: list, rep)
 
         |   forward((load as LoadRegScaled{regT=regT1, regN=regN1, unitOffset=offset1, loadType=lt1}) ::
                      (tl1 as LoadRegScaled{regT=regT2, regN=regN2, unitOffset=offset2, loadType=lt2} ::tl2), list, rep) =
             (* Two adjacent loads - can this be converted to load-pair?  N.B.  We have to be careful about the
                sequence ldr x0,[x0]; ldr x1,[x0+8] which isn't the same at all. *)
             if regN1 = regN2 andalso regN1 <> regT1 andalso lt1 = lt2 andalso (offset2 = offset1 + 1 orelse offset2 = offset1 - 1) andalso
                 (case lt1 of Load64 => true | Load32 => true | _ => false) andalso isValidForPair(offset1, offset2)
             then
             let
                 val (reg1, reg2, offset) =
                     if offset1 < offset2 then (regT1, regT2, offset1) else (regT2, regT1, offset2)
             in
                 forward(tl2,
                     LoadRegPair{ regT1=reg1, regT2=reg2, regN=regN1, unitOffset=offset, loadType=lt1, unscaledType=NoUpdate} :: list, true)
             end
             else forward(tl1, load :: list, rep)
 
         |   forward((store as StoreRegScaled{regT=regT1, regN=regN1, unitOffset=offset1, loadType=lt1}) ::
                      (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=offset2, loadType=lt2} ::tl2), list, rep) =
             (* Two adjacent stores - can this be converted to store-pair? *)
             if regN1 = regN2 andalso lt1 = lt2 andalso (offset2 = offset1 + 1 orelse offset2 = offset1 - 1) andalso
                 (case lt1 of Load64 => true | Load32 => true | _ => false) andalso isValidForPair(offset1, offset2)
             then
             let
                 val (reg1, reg2, offset) =
                     if offset1 < offset2 then (regT1, regT2, offset1) else (regT2, regT1, offset2)
             in
                 forward(tl2,
                     StoreRegPair{ regT1=reg1, regT2=reg2, regN=regN1, unitOffset=offset, loadType=lt1, unscaledType=NoUpdate} :: list, true)
             end
             else forward(tl1, store :: list, rep)
 
         |   forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~8, loadType=Load64, unscaledType=NoUpdate}) ::
                      (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=0, loadType=Load64} ::tl2), list, rep) =
             (* Common case - store length word and then the first word of the cell. *)
             if regN1 = regN2
             then forward(tl2,
                     StoreRegPair{ regT1=regT1, regT2=regT2, regN=regN1, unitOffset= ~1, loadType=Load64, unscaledType=NoUpdate} :: list, true)
             else forward(tl1, store :: list, rep)
 
         |   forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~4, loadType=Load32, unscaledType=NoUpdate}) ::
                      (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=0, loadType=Load32} ::tl2), list, rep) =
             (* Common case - store length word and then the first word of the cell. *)
             if regN1 = regN2
             then forward(tl2,
                     StoreRegPair{ regT1=regT1, regT2=regT2, regN=regN1, unitOffset= ~1, loadType=Load32, unscaledType=NoUpdate} :: list, true)
             else forward(tl1, store :: list, rep)
 
         |   forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) ::
-                     (tl1 as StoreRegUnscaled{regT=regT2, regN=regN2, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex} ::tl2), list, rep) =
+                     (tl1 as StoreRegUnscaled{regT=regT2, regN=regN2, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex} :: tl2), list, rep) =
             (* Adjacent pushes T2 is in the lower address so the order is T2, T1.  The stack is always 64-bit aligned so
                this works on both native addressing and 32-in-64. *)
             if regN1 = regN2
             then forward(tl2,
                     StoreRegPair{ regT1=regT2, regT2=regT1, regN=regN1, unitOffset= ~2, loadType=Load64, unscaledType=PreIndex} :: list, true)
             else forward(tl1, store :: list, rep)
 
         |   forward((add1 as AddImmediate{regN=regN1, regD=regD1, immed=immed1, shifted=false, opSize=OpSize64, setFlags=false}) ::
                     (tl1 as AddImmediate{regN=regN2, regD=regD2, immed=immed2, shifted=false, opSize=OpSize64, setFlags=false}  ::tl2), list, rep) =
             (* Adjacent stack resets.  This can apply more generally but only if the result registers are the same.  If they're
                not we may need the intermediate result.  We put the result back into the input stream in case it can be combined
                with another stack reset. *)
             if regN2 = regD2 andalso regD1 = regD2 andalso immed2+immed1 < 0w4096
             then forward(AddImmediate{regN=regN1, regD=regD2, immed=immed2+immed1, shifted=false, opSize=OpSize64, setFlags=false} :: tl2, list, true)
             else forward(tl1, add1 :: list, rep)
 
+        |   forward(BitwiseLogical{bits=0w1, regN, regD=XZero, logOp=LogAnd, opSize=_, setFlags=true} ::
+                    ConditionalBranch(CondEqual, label) :: tl2, list, _) =
+            (* Test the tag bit: bit 0.  This is very common to test for nil/not nil.  We could include other
+               values but they're far less likely. *)
+                forward(TestBitBranch{test=regN, bit=0w0, label=label, onZero=true} :: tl2, list, true)
+
+        |   forward(BitwiseLogical{bits=0w1, regN, regD=XZero, logOp=LogAnd, opSize=_, setFlags=true} ::
+                    ConditionalBranch(CondNotEqual, label) :: tl2, list, _) =
+                forward(TestBitBranch{test=regN, bit=0w0, label=label, onZero=false} :: tl2, list, true)
+
         |   forward(hd :: tl, list, rep) = forward(tl, hd :: list, rep) 
         
         and reverse([], list, rep) = (list, rep)
 
         |   reverse((add as AddImmediate{regN=regN2, regD=regD2, immed, shifted=false, opSize=OpSize64, setFlags=false}) ::
                      (tl1 as LoadRegScaled{regT=regT1, regN=regN1, unitOffset=0, loadType=Load64} ::tl2), list, rep) =
             (* A stack reset occurring after a load.  This is usually the ML SP but can also occur with C memory ops.
                It might be possible to consider other cases. *)
             if regN1 = regD2 andalso regN2 = regD2 andalso regT1 <> regN1 andalso immed < 0w256
             then reverse(tl2,
                     LoadRegUnscaled{regT=regT1, regN=regN1, byteOffset=Word.toInt immed, loadType=Load64, unscaledType=PostIndex} :: list, true)
             else reverse(tl1, add :: list, rep)
 
         |   reverse((add as AddImmediate{regN=regN2, regD=regD2, immed, shifted=false, opSize=OpSize64, setFlags=false}) ::
                      (tl1 as LoadRegPair{regT1=regT1, regT2=regT2, regN=regN1, unitOffset=0, loadType=Load64, unscaledType=NoUpdate} ::tl2), list, rep) =
             (* A stack reset occurring after a load pair *)
             if regN1 = regD2 andalso regN2 = regD2 andalso regT1 <> regN1 andalso regT2 <> regN1 andalso immed < 0w64 * 0w8
             then reverse(tl2,
                     LoadRegPair{regT1=regT1, regT2=regT2, regN=regN1, unitOffset=Word.toInt(immed div 0w8),
                                 loadType=Load64, unscaledType=PostIndex} :: list, true)
             else reverse(tl1, add :: list, rep)
 
         |   reverse(hd :: tl, list, rep) = reverse(tl, hd :: list, rep)
 
         (* Repeat scans through the code until there are no further changes. *)
         fun repeat ops =
             case forward(ops, [], false) of
                 (list, false) => list
             |   (list, true) => repeat list
 
         val optimised = repeat instrs
 
     in
         generateCode{instrs=List.rev(toAssembler(optimised, [])), name=name, parameters=parameters,
                      resultClosure=resultClosure, profileObject=profileObject}
     end
 
     (* Constant shifts are encoded in the immr and imms fields of the bit-field instruction. *)
     fun shiftConstant{ direction, regD, regN, shift, opSize } =
     let
         val (bitfieldKind, immr, imms) =
             case (direction, opSize) of
                 (ShiftLeft, OpSize64) => (BFUnsigned, Word.~ shift mod 0w64, 0w64-0w1-shift)
             |   (ShiftLeft, OpSize32) => (BFUnsigned, Word.~ shift mod 0w32, 0w32-0w1-shift)
             |   (ShiftRightLogical, OpSize64) => (BFUnsigned, shift, 0wx3f)
             |   (ShiftRightLogical, OpSize32) => (BFUnsigned, shift, 0wx1f)
             |   (ShiftRightArithmetic, OpSize64) => (BFSigned, shift, 0wx3f)
             |   (ShiftRightArithmetic, OpSize32) => (BFSigned, shift, 0wx1f)
     in
         BitField{ regN=regN, regD=regD, opSize=opSize, immr=immr, imms=imms, bitfieldKind=bitfieldKind }
     end
 
     (* These sequences are used both in the ML code-generator and in the FFI code so it
        is convenient to have them here and share the code. *)
     local
         fun allocateWords(fixedReg, workReg, words, bytes, regMask, code) =
         let
             val (lengthWord, setLength, flagShift) = if is32in64 then (~4, Load32, 0w24) else (~8, Load64, 0w56)
         in
             code <::>
             AllocateMemoryFixedSize{ bytes=bytes, dest=fixedReg, save=regMask, work=X16 } <::>
             LoadNonAddr(workReg,
                     Word64.orb(words, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), flagShift))) <::>
             (* Store the length word.  Have to use the unaligned version because offset is -ve. *)
             StoreRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= lengthWord, loadType=setLength, unscaledType=NoUpdate}
         end
 
         fun absoluteAddressToIndex(reg, code) =
         if is32in64
         then
             code <::>
             SubShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone, opSize=OpSize64, setFlags=false} <::>
             shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=0w2, opSize=OpSize64}
         else code
     in
         fun boxDouble({source, destination, workReg, saveRegs}, code) =
             absoluteAddressToIndex(destination,
                 allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::>
                     StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Double64})
                 
         and boxSysWord({source, destination, workReg, saveRegs}, code) =
             absoluteAddressToIndex(destination,
                 allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::>
                     StoreRegScaled{regT=source, regN=destination, unitOffset=0, loadType=Load64})
 
         and boxFloat({source, destination, workReg, saveRegs}, code) =
             absoluteAddressToIndex(destination, 
                 allocateWords(destination, workReg, 0w1, 0w8, saveRegs, code) <::>
                     StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Float32})
     end
 
     structure Sharing =
     struct
         type closureRef = closureRef
         type loadType = loadType
         type opSize = opSize
         type logicalOp = logicalOp
         type floatSize = floatSize
         type shiftDirection = shiftDirection
         type multKind = multKind
         type fpUnary = fpUnary
         type fpBinary = fpBinary
         type unscaledType = unscaledType
         type condSet = condSet
         type bitfieldKind = bitfieldKind
         type brRegType = brRegType
         type precode = precode
         type xReg = xReg
         type vReg = vReg
         type label = label
         type labelMaker = labelMaker
         type condition = condition
         type shiftType = shiftType
         type wordSize = wordSize
         type 'a extend = 'a extend
         type scale = scale
         type instr = instr
     end
 
 end;