diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML index 07be275a..105bdf13 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML @@ -1,1342 +1,1314 @@ (* Copyright David C. J. Matthews 2021 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeToArm64Code( structure Arm64PreAssembly: ARM64PREASSEMBLY structure Arm64Assembly: ARM64ASSEMBLY structure Debug: DEBUG structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET structure Pretty: PRETTY structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end sharing Arm64PreAssembly.Sharing = Arm64Assembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ICODEGENERATE = struct open Identify open Arm64ICode open Arm64PreAssembly open Arm64Assembly open Address exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd (* These aren't currently used for anything. *) val workReg1 = X16 and workReg2 = X17 fun icodeToArm64Code {blocks, functionName, stackRequired, debugSwitches, allocatedRegisters: reg vector, resultClosure, profileObject, ...} = let val numBlocks = Vector.length blocks fun getAllocatedReg(PReg r) = Vector.sub(allocatedRegisters, r) fun getAllocatedGenReg r = case getAllocatedReg r of GenReg r => r | FPReg _ => raise InternalError "getAllocateGenReg: returned FP Reg" and getAllocatedFPReg r = case getAllocatedReg r of FPReg r => r | GenReg _ => raise InternalError "getAllocatedFPReg: returned Gen Reg" fun getAllocatedGenRegOrZero ZeroReg = XZero | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg (* Load from and store to stack. *) fun loadFromStack(destReg, wordOffset, code) = if wordOffset >= 4096 then toInstr(LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: List.rev(toInstrs([LoadNonAddr(destReg, Word64.fromInt wordOffset)])) @ code else toInstr(LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code and storeToStack(sourceReg, wordOffset, workReg, code) = if wordOffset >= 4096 then toInstr(StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: List.rev(toInstrs([LoadNonAddr(workReg, Word64.fromInt wordOffset)])) @ code else toInstr(StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code datatype srcAndDest = IsInReg of xReg | IsOnStack of int local (* The registers are numbered from 0. Choose values that don't conflict with the stack addresses. *) fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31 type node = {src: srcAndDest, dst: srcAndDest } fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r | nodeAddress({dst=IsOnStack a, ...}) = a fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset] | arcs{src=IsInReg r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end (* This is a general function for moving values into registers or to the stack where it is possible that the source values might also be in use as destinations. The stack is used for destinations only for tail recursive calls. *) fun moveMultipleValues(moves, code) = let fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let (* stronglyConnectedComponents does two things. It detects loops where it's not possible to move items without breaking the loop but more importantly it orders the dependencies so that if there are no loops we can load the source and store it in the destination knowing that we won't overwrite anything we might later need. *) val ordered = stronglyConnected arguments fun loadIntoReg(IsInReg sReg, dReg, code) = if sReg = dReg then code else toInstr(MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | loadIntoReg(IsOnStack wordOffset, dReg, code) = loadFromStack(dReg, wordOffset, code) fun moveEachValue ([], code) = code | moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) = moveEachValue(rest, loadIntoReg(src, dReg, code)) | moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) = (* Storing into the stack. *) moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code)) | moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) = (* Copy a stack location - needs a load and store unless the address is the same. *) if dstOffset = srcOffset then moveEachValue(rest, code) else moveEachValue(rest, storeToStack(workReg2, dstOffset, workReg1, loadFromStack(workReg2, srcOffset, code))) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let (* We need to exchange some of the arguments. Doing an exchange here will set the destination with the correct source. However we have to process every subsequent entry with the swapped registers. That may well mean that one of those entries becomes trivial. We also need to rerun stronglyConnectedComponents on at least the rest of this cycle. It's easiest to flatten the rest and do everything. *) (* Exchange the source and destination. We don't have an exchange instruction and there's a further complication. We could be copying between stack locations and their offsets could be > 4096. Since we've only got two work registers we need to use the hardware stack as an extra location. Stack-stack exchange is very rare so the extra overhead to handle the general case is worth it. *) local fun storeToDest(sReg, IsInReg dReg, _, code) = toInstr(MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | storeToDest(sReg, IsOnStack wordOffset, work, code) = storeToStack(sReg, wordOffset, work, code) in fun exchange(IsInReg arg1Reg, arg2, code) = toInstr(MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) :: storeToDest(arg1Reg, arg2, workReg1, loadIntoReg(arg2, workReg2, code)) | exchange(arg1, IsInReg arg2Reg, code) = toInstr(MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) :: storeToDest(arg2Reg, arg1, workReg1, loadIntoReg(arg1, workReg2, code)) | exchange(arg1, arg2, code) = (* The hardware stack must be 16-byte aligned. *) storeToDest(workReg2, arg2, workReg1, toInstr(LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) :: storeToDest(workReg2, arg1, workReg1, loadIntoReg(arg2, workReg2, toInstr(StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) :: loadIntoReg(arg1, workReg2, code)))) end (* Try to find either a register-register move or a register-stack move. If not use the first. If there's a stack-register move there will also be a register-stack so we don't need to look for both. *) val {dst=selectDst, src=selectSrc} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun match(s1: srcAndDest, s2) = s1 = s2 fun swapSources{src, dst} = if match(src, selectSrc) then {src=destAsSource, dst=dst} else if match(src, destAsSource) then {src=selectSrc, dst=dst} else {src=src, dst=dst} val exchangeCode = exchange(selectDst, selectSrc, code) in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end (* Where we have multiple specific registers as either source or destination there is the potential that a destination register if currently in use as a source. *) fun moveMultipleRegisters(regPairList, code) = let val regPairsAsDests = List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList in moveMultipleValues(regPairsAsDests, code) end fun moveIfNecessary({src, dst}, code) = if src = dst then code else toInstr(MoveXRegToXReg{sReg=src, dReg=dst}) :: code (* Add a constant word to the source register and put the result in the destination. regW is used as a work register if necessary. This is used both for addition and subtraction. *) fun addConstantWord({regS, regD, value=0w0, ...}, code) = if regS = regD then code else toInstr(MoveXRegToXReg{sReg=regS, dReg=regD}) :: code | addConstantWord({regS, regD, regW, value}, code) = let (* If we have to load the constant it's better if the top 32-bits are zero if possible. *) val (isSub, unsigned) = if value > Word64.<<(0w1, 0w63) then (true, ~ value) else (false, value) in if unsigned < Word64.<<(0w1, 0w24) then (* We can put up to 24 in a shifted and an unshifted constant. *) let val w = Word.fromLarge(Word64.toLarge unsigned) val high = Word.andb(Word.>>(w, 0w12), 0wxfff) val low = Word.andb(w, 0wxfff) val addSub = if isSub then SubImmediate else AddImmediate in if high <> 0w0 then ( (if low <> 0w0 then [toInstr(addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64})] else []) @ toInstr(addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64}) :: code ) else toInstr(addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}) :: code end else let (* To minimise the constant and increase the chances that it will fit in a single word look to see if we can shift it. *) fun getShift(value, shift) = if Word64.andb(value, 0w1) = 0w0 then getShift(Word64.>>(value, 0w1), shift+0w1) else (value, shift) val (shifted, shift) = getShift(unsigned, 0w0) in code <@> List.rev(toInstrs([LoadNonAddr(regW, shifted)])) <::> toInstr((if isSub then SubShiftedReg else AddShiftedReg) {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64}) end end val getSaveRegs = List.map getAllocatedGenReg - - fun getSaveRegsAndSeparate saveRegs = - let - val realSaveRegs = getSaveRegs saveRegs - val saveX30 = List.exists (fn r => r = X30) realSaveRegs - val preserve = List.filter (fn r => r <> X30) realSaveRegs - in - { saveX30=saveX30, saveRegs=preserve } - end val startOfFunctionLabel = createLabel() (* Used for recursive calls/jumps *) val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel()) fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo) fun codeExtended _ (MoveRegister{source, dest, ...}, code) = moveIfNecessary({src=getAllocatedGenReg source, dst=getAllocatedGenReg dest}, code) | codeExtended _ (LoadNonAddressConstant{source, dest, ...}, code) = code <@> List.rev(toInstrs([LoadNonAddr(getAllocatedGenReg dest, source)])) | codeExtended _ (LoadAddressConstant{source, dest, ...}, code) = code <::> toInstr(LoadAddr(getAllocatedGenReg dest, source)) | codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then toInstr(LoadRegUnscaled{regT=getAllocatedGenReg dest, regN=getAllocatedGenReg base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in toInstr(LoadRegScaled{regT=getAllocatedGenReg dest, regN=getAllocatedGenReg base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then toInstr(LoadFPRegUnscaled{regT=getAllocatedFPReg dest, regN=getAllocatedGenReg base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in toInstr(LoadFPRegScaled{regT=getAllocatedFPReg dest, regN=getAllocatedGenReg base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, ...}, code) = let val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in toInstr(LoadRegIndexed{regT=getAllocatedGenReg dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, ...}, code) = let val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX in toInstr(LoadFPRegIndexed{regT=getAllocatedFPReg dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (GetThreadId { dest}, code) = (* Load the thread id. This is always a 64-bit value. *) toInstr(LoadRegScaled{regT=getAllocatedGenReg dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code | codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) = toInstr(AddShiftedReg{regM=getAllocatedGenReg source, regN=X_Base32in64, regD=getAllocatedGenReg dest, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code | codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) = let val destReg = getAllocatedGenReg dest in code <::> toInstr(SubShiftedReg{regM=X_Base32in64, regN=getAllocatedGenReg source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> toInstr(shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64}) end | codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs, ... }, code) = let val label = createLabel() val destReg = getAllocatedGenReg dest in code <@> (* Subtract the number of bytes required from the heap pointer and put in result reg. *) (if bytesRequired >= 0w4096 then [toInstr(SubShiftedReg{regM=workReg1, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}), toInstr(LoadNonAddr(workReg1, bytesRequired))] else [toInstr(SubImmediate{regN=X_MLHeapAllocPtr, regD=destReg, immed=Word.fromLarge bytesRequired, shifted=false, setFlags=false, opSize=OpSize64})] ) <::> (* Compare with heap limit. *) toInstr(SubShiftedReg{regM=X_MLHeapLimit, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> toInstr(ConditionalBranch(CondCarrySet, label)) <@> (* Skip the trap if it's ok. *) List.rev(toInstrs(List.rev([RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs}]))) <::> toInstr(SetLabel label) <::> toInstr(MoveXRegToXReg{sReg=destReg, dReg=X_MLHeapAllocPtr}) end | codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs, ... }, code) = let val trapLabel = createLabel() and noTrapLabel = createLabel() val destReg = getAllocatedGenReg dest and sizeReg = getAllocatedGenReg size (* Subtract the size into the result register. Subtract a further word for the length word and round down in 32-in-64. *) val subtractSize = if is32in64 then toInstr(BitwiseLogical{bits= ~ 0w8, regN=destReg, regD=destReg, logOp=LogAnd, opSize=OpSize64, setFlags=false}) :: toInstr(SubImmediate{regN=destReg, regD=destReg, immed=0w4, shifted=false, setFlags=false, opSize=OpSize64}) :: toInstr(SubShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code else toInstr(SubImmediate{regN=destReg, regD=destReg, immed=0w8, shifted=false, setFlags=false, opSize=OpSize64}) :: toInstr(SubShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftLSL 0w3, setFlags=false, opSize=OpSize64}) :: code (* Check against the limit. If the size is large enough it is possible that this could wrap round. To check for that we trap if either the result is less than the limit or if it is now greater than the allocation pointer. *) in subtractSize <::> toInstr(SubShiftedReg{regM=X_MLHeapLimit, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> toInstr(ConditionalBranch(CondCarryClear, trapLabel)) <::> toInstr(SubShiftedReg{regM=X_MLHeapAllocPtr, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> toInstr(ConditionalBranch(CondCarryClear, noTrapLabel)) <::> toInstr(SetLabel trapLabel) <@> List.rev(toInstrs(List.rev([RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs}]))) <::> toInstr(SetLabel noTrapLabel) <::> toInstr(MoveXRegToXReg{sReg=destReg, dReg=X_MLHeapAllocPtr}) end | codeExtended _ (InitialiseMem{ size, addr, init}, code) = let val sizeReg = getAllocatedGenReg size and addrReg = getAllocatedGenReg addr and initReg = getAllocatedGenReg init val exitLabel = createLabel() and loopLabel = createLabel() - (* Yhis uses a loop to initialise. It's possible the size is + (* This uses a loop to initialise. It's possible the size is zero so we have to check at the top of the loop. *) + val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64) in - setLabel exitLabel :: - unconditionalBranch loopLabel :: - (if is32in64 - then storeRegPreIndex32{regT=initReg, regN=workReg1, byteOffset= ~4} - else storeRegPreIndex{regT=initReg, regN=workReg1, byteOffset= ~8}) :: - conditionalBranch(CondEqual, exitLabel) :: - (* Are we at the start? *) - toInstr(SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) :: - setLabel loopLabel :: + code <::> (* Add the length in bytes so we point at the end. *) - toInstr(AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, - shift=ShiftLSL(if is32in64 then 0w2 else 0w3), setFlags=false, opSize=OpSize64}) :: code + toInstr(AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64}) <::> + toInstr(SetLabel loopLabel) <::> + (* Are we at the start? *) + toInstr(SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> + toInstr(ConditionalBranch(CondEqual, exitLabel)) <::> + toInstr(StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex }) <::> + toInstr(UnconditionalBranch loopLabel) <::> + toInstr(SetLabel exitLabel) end | codeExtended _ (BeginLoop, code) = code | codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) = let (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *) fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(getAllocatedGenReg reg) val extStackArgs = map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs val extRegArgs = map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code) in case checkInterrupt of NONE => code2 | SOME saveRegs => let - val {saveX30, saveRegs} = getSaveRegsAndSeparate saveRegs val skipCheck = createLabel() in + code2 <::> (* Put in stack-check code to allow this to be interrupted. *) - setLabel skipCheck :: - (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) @ - registerMask saveRegs :: - branchAndLinkReg workReg1 :: - loadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackOverflowCallOffset} :: - (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) @ - conditionalBranch(CondCarrySet, skipCheck) :: - subSShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone} :: - loadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset} :: - code2 + toInstr(LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64}) <::> + toInstr(SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> + toInstr(ConditionalBranch(CondCarrySet, skipCheck)) <@> + List.rev(toInstrs(List.rev([RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs}]))) <::> + toInstr(SetLabel skipCheck) end end | codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then toInstr(StoreRegUnscaled{regT=getAllocatedGenReg source, regN=getAllocatedGenReg base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in toInstr(StoreRegScaled{regT=getAllocatedGenReg source, regN=getAllocatedGenReg base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then toInstr(StoreFPRegUnscaled{regT=getAllocatedFPReg source, regN=getAllocatedGenReg base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in toInstr(StoreFPRegScaled{regT=getAllocatedFPReg source, regN=getAllocatedGenReg base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, ...}, code) = let val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in toInstr(StoreRegIndexed{regT=getAllocatedGenReg source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, ...}, code) = let val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX in toInstr(StoreFPRegIndexed{regT=getAllocatedFPReg source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) = let val destReg = case dest of NONE => XZero | SOME dreg => getAllocatedGenReg dreg in toInstr((if isAdd then AddImmediate else SubImmediate) {regN=getAllocatedGenReg source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) = let val destReg = case dest of NONE => XZero | SOME dreg => getAllocatedGenReg dreg in toInstr( (if isAdd then AddShiftedReg else SubShiftedReg) {regN=getAllocatedGenReg base, regM=getAllocatedGenReg shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) = let val destReg = case dest of NONE => XZero | SOME dreg => getAllocatedGenReg dreg in toInstr(BitwiseLogical{regN=getAllocatedGenReg source, regD=destReg, bits=immed, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) = let (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) val destReg = case dest of NONE => XZero | SOME dreg => getAllocatedGenReg dreg in toInstr(LogicalShiftedReg{regN=getAllocatedGenReg base, regM=getAllocatedGenReg shifted, regD=destReg, shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) = toInstr(ShiftRegisterVariable{regN=getAllocatedGenReg source, regM=getAllocatedGenReg shift, regD=getAllocatedGenReg dest, shiftDirection=direction, opSize=opSize}) :: code | codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) = let val destReg = getAllocatedGenReg dest and srcAReg = case sourceA of SOME srcA => getAllocatedGenReg srcA | NONE => XZero and srcNReg = getAllocatedGenReg sourceN and srcMReg = getAllocatedGenReg sourceM in toInstr(MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code end | codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) = toInstr(DivideRegs{regN=getAllocatedGenReg dividend, regM=getAllocatedGenReg divisor, regD=getAllocatedGenReg dest, isSigned=isSigned, opSize=opSize}) :: code | codeExtended _ (BeginFunction{regArgs, ...}, code) = let - (* The real registers used for arguments. X30 is there but saved separately. *) - val saveRegs = List.filter (fn r => r <> X30) (List.map #2 regArgs) val skipCheck = createLabel() val defaultWords = 10 (* This is wired into the RTS. *) val workRegister = workReg1 val debugTrapAlways = false (* Can be set to true for debugging *) (* Test with either the stack-pointer or a high-water value. The RTS assumes that X9 has been used as the high-water if it is called through stackOverflowXCallOffset rather than stackOverflowCallOffset *) val (testReg, entryPt, code1) = if stackRequired <= defaultWords then (X_MLStackPtr, stackOverflowCallOffset, code) else (X9, stackOverflowXCallOffset, addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister, value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code)) (* Skip the RTS call if there is enough stack. N.B. The RTS can modify the end-of-stack value to force a trap here even if there is really enough stack. *) val code2 = (if debugTrapAlways then [] else - [conditionalBranch(CondCarrySet, skipCheck), + [toInstr(ConditionalBranch(CondCarrySet, skipCheck)), toInstr(SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64})]) @ (* Load the end-of-stack value. *) - loadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset} :: code1 + toInstr(LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64}) :: code1 val code3 = - (* Call the RTS but save X30 across the call *) - setLabel skipCheck :: - loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8} :: - registerMask saveRegs :: - branchAndLinkReg X16 :: - loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=entryPt} :: - storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8} :: code2 + code2 <@> + List.rev(toInstrs(List.rev([RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs}]))) <::> + toInstr(SetLabel skipCheck) val usedRegs = regArgs fun mkPair(pr, rr) = {src=rr,dst=getAllocatedGenReg pr} val regPairs = List.map mkPair usedRegs in moveMultipleRegisters(regPairs, code3) end | codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, code: instr list) = let fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(getAllocatedGenReg reg) val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. That means storing the arguments in the right order to avoid overwriting a value that we are using for a different argument. *) fun codeTailCall(arguments, stackAdjust, code) = if stackAdjust < 0 then let (* If the function we're calling takes more arguments on the stack than the current function we will have to extend the stack. Do that by pushing the argument whose offset is at -1. Then adjust all the offsets and repeat. *) val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments) fun renumberArgs [] = [] | renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) | renumberArgs ({dst, src} :: args) = let val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest val newSrc = case src of IsOnStack wordOffset => IsOnStack(wordOffset+1) | other => other in {dst=newDest, src=newSrc} :: renumberArgs args end val pushCode = case argM1 of IsOnStack wordOffset => toInstr(StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: loadFromStack(workReg2, wordOffset, code) | IsInReg reg => toInstr(StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code in codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode) end else let val loadArgs = moveMultipleValues(arguments, code) in if stackAdjust = 0 then loadArgs else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs) end val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) val jumpToFunctionCode = case callKind of Recursive => [toInstr(UnconditionalBranch startOfFunctionLabel)] | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else [toInstr(BranchReg{regD=workReg1, brRegType=BRRBranch}), toInstr(LoadAddr(workReg1, m))] | FullCall => if is32in64 then [toInstr(BranchReg{regD=workReg1, brRegType=BRRBranch}), toInstr(LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}), toInstr(AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64})] else [toInstr(BranchReg{regD=workReg1, brRegType=BRRBranch}), toInstr(LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64})] in jumpToFunctionCode @ setArgumentsCode end | codeExtended _ (FunctionCall{callKind, regArgs=regArgs, stackArgs=stackArgs, dest, saveRegs, ...}, code: instr list) = let val destReg = getAllocatedGenReg dest local fun pushStackArgs ([], _, code) = code | pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) = let (* Have to adjust the offsets of stack arguments. *) val adjustedOffset = wordOffset+argNum in pushStackArgs(args, argNum+1, loadFromStack(workReg1, adjustedOffset, code) <::> toInstr(StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) end | pushStackArgs (ArgInReg reg ::args, argNum, code) = pushStackArgs(args, argNum+1, code <::> toInstr(StoreRegUnscaled{regT=getAllocatedGenReg reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) (* We have to adjust any stack offset to account for the arguments we've pushed. *) val numStackArgs = List.length stackArgs fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs) | convertArg(ArgInReg reg) = IsInReg(getAllocatedGenReg reg) in val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val loadArgs = moveMultipleValues(extRegArgs, pushedArgs) end (* Push the registers before the call and pop them afterwards. *) fun makeSavesAndCall([], code) = ( case callKind of Recursive => code <::> toInstr(BranchAndLink startOfFunctionLabel) | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else code <::> toInstr(LoadAddr(workReg1, m)) <::> toInstr(BranchReg{regD=workReg1, brRegType=BRRAndLink}) | FullCall => if is32in64 then code <::> toInstr(AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) <::> toInstr(LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}) <::> toInstr(BranchReg{regD=workReg1, brRegType=BRRAndLink}) else code <::> toInstr(LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}) <::> toInstr(BranchReg{regD=workReg1, brRegType=BRRAndLink}) ) | makeSavesAndCall(reg::regs, code) = let val areg = getAllocatedGenReg reg in makeSavesAndCall(regs, code <::> toInstr(StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex})) <::> toInstr(LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex}) end in moveIfNecessary({dst=destReg, src=X0}, makeSavesAndCall(saveRegs, loadArgs)) end | codeExtended _ (ReturnResultFromFunction { resultReg, returnReg, numStackArgs }, code) = let val resultReg = getAllocatedGenReg resultReg and returnReg = getAllocatedGenReg returnReg fun resetStack(0, code) = code | resetStack(nItems, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) in - returnRegister returnReg :: resetStack(numStackArgs, moveIfNecessary({src=resultReg, dst=X0}, code)) + toInstr(BranchReg{regD=returnReg, brRegType=BRRReturn}) :: resetStack(numStackArgs, moveIfNecessary({src=resultReg, dst=X0}, code)) end | codeExtended _ (RaiseExceptionPacket{ packetReg }, code) = (* We need a work register here. It can be any register other than X0 since we don't preserve registers across calls. *) (* Copy the handler "register" into the stack pointer. Then jump to the address in the first word. The second word is the next handler. This is set up in the handler. We have a lot more raises than handlers since most raises are exceptional conditions such as overflow so it makes sense to minimise the code in each raise. *) - branchRegister workReg1 :: - loadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0} :: - loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset} :: - moveIfNecessary({src=getAllocatedGenReg packetReg, dst=X0}, code) + moveIfNecessary({src=getAllocatedGenReg packetReg, dst=X0}, code) <::> + toInstr(LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64}) <::> + toInstr(LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64}) <::> + toInstr(BranchReg{regD=workReg1, brRegType=BRRBranch }) | codeExtended _ (PushToStack{ source, copies, ... }, code) = let val reg = getAllocatedGenReg source val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1" fun pushn(0, c) = c | pushn(n, c) = pushn(n-1, toInstr(StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c) in pushn(copies, code) end | codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) = loadFromStack(getAllocatedGenReg dest, wordOffset, code) | codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) = (* Store into the stack to set a field of a container. Always 64-bits. *) storeToStack(getAllocatedGenReg source, stackOffset, workReg1, code) | codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) = (* Set the register to an offset in the stack. *) let val destReg = getAllocatedGenReg dest val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset" val byteOffset = stackOffset * Word.toInt nativeWordSize in if byteOffset >= 4096 then code <@> List.rev(toInstrs([LoadNonAddr(destReg, Word64.fromInt byteOffset)])) <::> toInstr(AddShiftedReg{regN=X_MLStackPtr, regM=destReg, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) else toInstr(AddImmediate{regN=X_MLStackPtr, regD=destReg, immed=Word.fromInt byteOffset, shifted=false, setFlags=false, opSize=OpSize64}) :: code end | codeExtended _ (ResetStackPtr{ numWords, ... }, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code) | codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) = let val sourceReg = getAllocatedGenReg source and destReg = getAllocatedGenReg dest (* Shift left by one bit and add one. *) in case opSize of OpSize64 => bitwiseOrImmediate{regN=destReg, regD=destReg, bits=0w1} :: logicalShiftLeft{regN=sourceReg, regD=destReg, shift=0w1} :: code | OpSize32 => bitwiseOrImmediate32{regN=destReg, regD=destReg, bits=0w1} :: logicalShiftLeft32{regN=sourceReg, regD=destReg, shift=0w1} :: code end | codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) = let (* Shift right by one bit. The type of shift depends on the length and whether it's signed. *) val shiftType = case (isSigned, opSize) of (false, OpSize64) => logicalShiftRight | (false, OpSize32) => logicalShiftRight32 | (true, OpSize64) => arithmeticShiftRight | (true, OpSize32) => arithmeticShiftRight32 in shiftType{regN=getAllocatedGenReg source, regD=getAllocatedGenReg dest, shift=0w1} :: code end | codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) = code <@> List.rev(toInstrs( List.rev(boxSysWord({source=getAllocatedGenReg source, destination=getAllocatedGenReg dest, workReg=workReg1, saveRegs=getSaveRegs saveRegs}, [])))) | codeExtended _ (UnboxLarge{ source, dest }, code) = let (* Unbox a large word. The argument is a poly word. *) val destReg = getAllocatedGenReg dest and srcReg = getAllocatedGenReg source in if is32in64 then toInstr(LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64}) :: toInstr(AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code else toInstr(LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64}) :: code end | codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) = code <@> List.rev(toInstrs(List.rev(boxDouble({source=getAllocatedFPReg source, destination=getAllocatedGenReg dest, workReg=workReg1, saveRegs=getSaveRegs saveRegs}, [])))) | codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) = let val floatReg = getAllocatedFPReg source and fixedReg = getAllocatedGenReg dest in if is32in64 then code <@> List.rev(toInstrs(List.rev(boxFloat({source=floatReg, destination=fixedReg, workReg=workReg1, saveRegs=getSaveRegs saveRegs}, [])))) else bitwiseOrImmediate{regN=fixedReg, regD=fixedReg, bits=0w1} :: logicalShiftLeft{shift=0w32, regN=fixedReg, regD=fixedReg} :: moveFloatToGeneral{regN=floatReg, regD=fixedReg} :: code end | codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) = let val addrReg = getAllocatedGenReg source and valReg = getAllocatedFPReg dest in if is32in64 then loadRegScaledDouble{regT=valReg, regN=workReg1, unitOffset=0} :: toInstr(AddShiftedReg{regM=addrReg, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code else loadRegScaledDouble{regT=valReg, regN=addrReg, unitOffset=0} :: code end | codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) = let val addrReg = getAllocatedGenReg source and valReg = getAllocatedFPReg dest (* This is tagged in native 64-bits. In 32-in-64 we're loading 32-bits so we can use an indexed load directly. *) in if is32in64 then loadRegIndexedFloat{regN=X_Base32in64, regM=addrReg, regT=valReg, option=ExtUXTX ScaleOrShift} :: code else moveGeneralToFloat{regN=workReg1, regD=valReg} :: logicalShiftRight{shift=0w32, regN=addrReg, regD=workReg1} :: code end | codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) = toInstr(LoadAcquireReg{regT=getAllocatedGenReg dest, regN=getAllocatedGenReg base, loadType=loadType}) :: code | codeExtended _ (StoreRelease{source, base, loadType, ...}, code) = toInstr(StoreReleaseReg{regT=getAllocatedGenReg source, regN=getAllocatedGenReg base, loadType=loadType}) :: code | codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) = let - val bfInstr = - case (isSigned, length) of - (true, OpSize64) => signedBitfieldMove64 - | (false, OpSize64) => unsignedBitfieldMove64 - | (true, OpSize32) => signedBitfieldMove32 - | (false, OpSize32) => unsignedBitfieldMove32 val srcReg = getAllocatedGenReg source val destReg = getAllocatedGenReg dest in - bfInstr{immr=immr, imms=imms, regN=srcReg, regD=destReg} :: code + toInstr(BitField{immr=immr, imms=imms, regN=srcReg, regD=destReg, + bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length}) :: code end | codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) = let (* If we're using BitFieldMove we retain some of the bits of the destination. The higher levels require us to treat that as a source. *) val sourceReg = getAllocatedGenReg source and destReg = getAllocatedGenReg dest val _ = sourceReg = destReg andalso raise InternalError "codeExtended: bitfield: dest=source" - val bfInstr = - case length of OpSize64 => bitfieldMove64 | OpSize32 => bitfieldMove32 in - bfInstr{immr=immr, imms=imms, regN=getAllocatedGenReg source, regD=destReg} :: + toInstr(BitField{immr=immr, imms=imms, regN=getAllocatedGenReg source, regD=destReg, bitfieldKind=BFInsert, opSize=length}) :: moveIfNecessary({src=getAllocatedGenReg destAsSource, dst=destReg}, code) end | codeExtended {flow} (IndexedCaseOperation{testReg}, code) = let (* testReg contains the original value after the lowest value has been subtracted. Since both the original value and the lowest value were tagged it contains a shifted but untagged value. *) (* This should only be within a block with an IndexedBr flow type. *) val cases = case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" val caseLabels = map getBlockLabel cases val tableLabel = createLabel() val startOfCase = setLabel tableLabel :: branchRegister workReg1 :: (* Add the value shifted by one since it's already shifted. *) toInstr(AddShiftedReg{regN=workReg1, regD=workReg1, regM=getAllocatedGenReg testReg, shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64}) :: loadLabelAddress(workReg1, tableLabel) :: code val addCases = List.foldl (fn (label, code) => unconditionalBranch label :: code) startOfCase caseLabels in addCases end | codeExtended {flow} (PushExceptionHandler, code) = let (* This should only be within a block with a SetHandler flow type. *) val handleLabel = case flow of SetHandler{ handler, ...} => handler | _ => raise InternalError "codeGenICode: PushExceptionHandler" val labelRef = getBlockLabel handleLabel in (* Push the old handler and the handler entry point and set the "current handler" to point to the stack after we've pushed these. *) storeRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset} :: storeRegPreIndex{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8} :: storeRegPreIndex{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8} :: loadLabelAddress(workReg2, labelRef) :: loadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset} :: code end | codeExtended _ (PopExceptionHandler, code) = (* Remove and discard the handler we've set up. Pop the previous handler and put into "current handler". *) storeRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset} :: loadRegPostIndex{regT=workReg2, regN=X_MLStackPtr, byteOffset=8} :: loadRegPostIndex{regT=workReg1, regN=X_MLStackPtr, byteOffset=8} :: code | codeExtended _ (BeginHandler{packetReg}, code) = let val beginHandleCode = (* Remove the handler entry for this handler. *) storeRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset} :: loadRegPostIndex{regT=workReg2, regN=X_MLStackPtr, byteOffset=8} :: loadRegPostIndex{regT=workReg1, regN=X_MLStackPtr, byteOffset=8} :: (* The exception raise code resets the stack pointer to the value in the exception handler so this is probably redundant. Leave it for the moment, *) loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset} :: code in moveIfNecessary({src=X0, dst=getAllocatedGenReg packetReg }, beginHandleCode) end | codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length, ...}, code) = let (* Construct a loop to compare two vectors of bytes. *) val vec1Reg = getAllocatedGenReg vec1Addr and vec2Reg = getAllocatedGenReg vec2Addr and lenReg = getAllocatedGenReg length val loopLabel = createLabel() and exitLabel = createLabel() in code <::> (* Set the CC to Equal before we start in case length = 0 *) toInstr(SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> setLabel loopLabel <::> (* Start of loop *) compareBranchZero(lenReg, exitLabel) <::> (* Go to the end when len = zero *) (* Load the bytes for the comparison and increment each. *) toInstr(LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8}) <::> toInstr(LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8}) <::> toInstr(SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64}) <::> (* Decr len *) (* Compare *) toInstr(SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}) <::> conditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *) setLabel exitLabel end | codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) = let (* Construct a loop to move the data. *) val srcReg = getAllocatedGenReg srcAddr and destReg = getAllocatedGenReg destAddr and lenReg = getAllocatedGenReg length val loopLabel = createLabel() and exitLabel = createLabel() val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64) in code <::> setLabel loopLabel (* Start of loop *) <::> compareBranchZero(lenReg, exitLabel) <::> (* Exit when length = 0 *) toInstr(LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex}) <::> toInstr(StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex}) <::> toInstr(SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64}) <::> (* Decr len *) unconditionalBranch loopLabel <::> (* Back to the start *) setLabel exitLabel end | codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) = let val allocFreeCode = toInstr((if isAdd then AddExtendedReg else SubExtendedReg) {regM=getAllocatedGenReg source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64}) :: code in case dest of ZeroReg => allocFreeCode | SomeReg destReg => (* We have to use add here to get the SP into the destination instead of the usual move. *) toInstr(AddImmediate{regN=XSP, regD=getAllocatedGenReg destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64}) :: allocFreeCode end | codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *) (* Used in mutex operations. *) | codeExtended _ (LoadAcquireExclusive{ base, dest }, code) = loadAcquireExclusiveRegister{regN=getAllocatedGenReg base, regT=getAllocatedGenReg dest} :: code | codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) = storeReleaseExclusiveRegister{regS=getAllocatedGenReg result, regT=getAllocatedGenRegOrZero source, regN=getAllocatedGenReg base} :: code | codeExtended _ (MemoryBarrier, code) = code | codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) = toInstr(CvtIntToFP{regN=getAllocatedGenReg source, regD=getAllocatedFPReg dest, floatSize=destSize, opSize=srcSize}) :: code | codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) = toInstr(CvtFloatToInt{regN=getAllocatedFPReg source, regD=getAllocatedGenReg dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code | codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) = toInstr(FPUnaryOp{regN=getAllocatedFPReg source, regD=getAllocatedFPReg dest, fpOp=fpOp}) :: code | codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) = toInstr(FPBinaryOp{regN=getAllocatedFPReg arg1, regM=getAllocatedFPReg arg2, regD=getAllocatedFPReg dest, floatSize=opSize, fpOp=fpOp}) :: code | codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) = toInstr(FPComparison{regN=getAllocatedFPReg arg1, regM=getAllocatedFPReg arg2, floatSize=opSize}) :: code local (* processed - set to true when a block has been processed. *) val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) (* Find the blocks that reference this one. This isn't essential but allows us to try to generate blocks in the order of the control flow. This in turn may allow us to use short branches rather than long ones. *) val labelRefs = Array.array(numBlocks, []) datatype flowCode = FlowCodeSimple of int | FlowCodeCMove of {code: instr list, trueJump: int, falseJump: int} (* Process this recursively to set the references. If we have unreachable blocks, perhaps because they've been merged, we don't want to include them in the reference counting. This shouldn't happen now that IdentifyReferences removes unreferenced blocks. *) fun setReferences fromLabel toLabel = case Array.sub(labelRefs, toLabel) of [] => (* Not yet visited at all. *) let val ExtendedBasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) val refs = case flow of ExitCode => [] | Unconditional lab => [lab] | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] | IndexedBr labs => labs | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val () = if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () in List.app (setReferences toLabel) refs end | refs => (* We've visiting this at least once. Just add us to the list. *) Array.update(labelRefs, toLabel, fromLabel :: refs) val _ = setReferences 0 0 (* Process the blocks. We keep the "stack" explicit rather than using recursion because this allows us to select both arms of a conditional branch sooner. *) fun genCode(toDo, lastFlow, code) = case List.filter (not o haveProcessed) toDo of [] => let (* There's nothing left to do. We may need to add a final branch to the end. *) val finalBranch = case lastFlow of ExitCode => [] | IndexedBr _ => [] | Unconditional dest => [toInstr(UnconditionalBranch(getBlockLabel dest))] | Conditional { condition, trueJump, falseJump, ...} => [ toInstr(UnconditionalBranch(getBlockLabel falseJump)), toInstr(ConditionalBranch(condition, getBlockLabel trueJump)) ] | SetHandler { continue, ...} => [toInstr(UnconditionalBranch(getBlockLabel continue))] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [toInstr(UnconditionalBranch(getBlockLabel continue))] in finalBranch @ code (* Done. *) end | stillToDo as head :: _ => let local (* Check the references. If all the sources that lead up to this have already been we won't have any backward jumps. *) fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) val continuation = case lastFlow of ExitCode => NONE | IndexedBr _ => NONE (* We could put the last branch in here. *) | Unconditional dest => if not (haveProcessed dest) andalso available dest then SOME(FlowCodeSimple dest) else NONE | Conditional {trueJump, falseJump, condition, ...} => (* We can usually choose either destination and in nearly all cases it won't matter. The default branch is not to take forward jumps so if there is reason to believe that one branch is more likely we should follow that branch now and leave the other. If we have Cond(No)Overflow we assume that overflow is unusual. If one branch raises an exception we assume that that is unusual. *) let val (first, second) = case (condition, Vector.sub(blocks, falseJump)) of (CondNoOverflow, _) => (trueJump, falseJump) | (_, ExtendedBasicBlock{ flow=ExitCode, block, ...}) => if List.exists(fn{instr=RaiseExceptionPacket _, ...} => true | _ => false) block then (trueJump, falseJump) else (falseJump, trueJump) | _ => (falseJump, trueJump) in if not (haveProcessed first) andalso available first then SOME(FlowCodeSimple first) else if not (haveProcessed second) andalso available second then SOME(FlowCodeSimple second) else NONE end | SetHandler { continue, ... } => (* We want the continuation if possible. We'll need a branch round the handler so that won't help. *) if not (haveProcessed continue) andalso available continue then SOME(FlowCodeSimple continue) else NONE | UnconditionalHandle _ => NONE | ConditionalHandle _ => NONE in (* First choice - continue the existing block. Second choice - the first item whose sources have all been processed. Third choice - something from the list. *) val picked = case continuation of SOME c => c | NONE => case List.find available stillToDo of SOME c => FlowCodeSimple c | NONE => FlowCodeSimple head end in case picked of FlowCodeSimple picked => let val () = Array.update(processed, picked, true) (* Code to terminate the previous block. *) val startCode = case lastFlow of ExitCode => [] | IndexedBr _ => [] | UnconditionalHandle _ => [] | Unconditional dest => if dest = picked then [] else [toInstr(UnconditionalBranch(getBlockLabel dest))] | ConditionalHandle { continue, ...} => if continue = picked then [] else [toInstr(UnconditionalBranch(getBlockLabel continue))] | SetHandler { continue, ... } => if continue = picked then [] else [toInstr(UnconditionalBranch(getBlockLabel continue))] | Conditional { condition, trueJump, falseJump, ...} => if picked = falseJump (* Usual case. *) then [toInstr(ConditionalBranch(condition, getBlockLabel trueJump))] else if picked = trueJump then (* We have a jump to the true condition. Invert the jump. This is more than an optimisation. Because this immediately precedes the true block we're not going to generate a label. *) [toInstr(ConditionalBranch(invertTest condition, getBlockLabel falseJump))] else [ toInstr(UnconditionalBranch(getBlockLabel falseJump)), toInstr(ConditionalBranch(condition, getBlockLabel trueJump)) ] (* Code-generate the body with the code we've done so far at the end. Add a label at the start if necessary. *) local (* If the previous block dropped through to this and this was the only reference then we don't need a label. *) fun onlyJumpingHere (lab: int) = if lab <> picked then false else case Array.sub(labelRefs, picked) of [singleton] => singleton = lab | _ => false val noLabel = case lastFlow of ExitCode => picked = 0 (* Unless this was the first block. *) | Unconditional dest => onlyJumpingHere dest | Conditional { trueJump, falseJump, ...} => onlyJumpingHere trueJump orelse onlyJumpingHere falseJump | IndexedBr _ => false | SetHandler _ => false | UnconditionalHandle _ => false | ConditionalHandle { continue, ...} => onlyJumpingHere continue in val startLabel = if noLabel then [] else [toInstr(SetLabel(getBlockLabel picked))] end val ExtendedBasicBlock { flow, block, ...} = Vector.sub(blocks, picked) local fun genCodeBlock({instr, ...}, code) = codeExtended {flow=flow} (instr, code) in val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] in genCode(addSet @ stillToDo, flow, bodyCode) end | FlowCodeCMove{code, trueJump, falseJump} => let (* We've generated a conditional move and possibly a return. If the trueJump and falseJump are only ever referenced from this block they're done, otherwise we still need to do them. *) val _ = case Array.sub(labelRefs, trueJump) of [_] => Array.update(processed, trueJump, true) | _ => () val _ = case Array.sub(labelRefs, falseJump) of [_] => Array.update(processed, falseJump, true) | _ => () val ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, trueJump) val addSet = case flow of ExitCode => [] | Unconditional dest => [dest] | _ => raise InternalError "FlowCodeCMove" in genCode(addSet @ stillToDo, flow, code) end end in val ops = genCode([0], ExitCode, [toInstr(SetLabel startOfFunctionLabel)]) end in Arm64Assembly.generateCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure, parameters=debugSwitches, profileObject=profileObject} end structure Sharing = struct type intSet = intSet and extendedBasicBlock = extendedBasicBlock and regProperty = regProperty and reg = reg and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML index 64790f53..35cf0f0a 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML @@ -1,719 +1,727 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *) functor Arm64PreAssembly( structure Arm64Assembly: ARM64ASSEMBLY structure Debug: DEBUG structure Pretty: PRETTY ): ARM64PREASSEMBLY = struct open Arm64Assembly exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd (* Many of the datatypes are inherited from Arm64Assembly *) datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP and unscaledType = NoUpdate | PreIndex | PostIndex and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate and bitfieldKind = BFUnsigned | BFSigned | BFInsert and brRegType = BRRBranch | BRRAndLink | BRRReturn datatype precode = (* Basic instructions *) AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind} | DivideRegs of {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize} | LogicalShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool} | LoadRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | LoadFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | StoreRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | StoreFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | LoadRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} | StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} | LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType} | StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType} | LoadRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, unscaledType: unscaledType} | StoreRegPair of{ regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, unscaledType: unscaledType} | LoadFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, unscaledType: unscaledType} | StoreFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, unscaledType: unscaledType} | ConditionalSet of {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet} | BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind} | ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection} | BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp} (* Floating point *) | MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize} | MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize} | CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize} | CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize} | FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary} | FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize} | FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary} (* Branches and Labels. *) | SetLabel of labels | ConditionalBranch of condition * labels | UnconditionalBranch of labels | BranchAndLink of labels | BranchReg of {regD: xReg, brRegType: brRegType } | LoadLabelAddress of xReg * labels | TestBitBranch of { test: xReg, bit: Word8.word, label: labels, onZero: bool } | CompareBranch of { test: xReg, label: labels, onZero: bool, opSize: opSize } (* Composite instructions *) | MoveXRegToXReg of {sReg: xReg, dReg: xReg} | LoadNonAddr of xReg * Word64.word | LoadAddr of xReg * machineWord | RTSTrap of { rtsEntry: int, work: xReg, save: xReg list } fun toAssembler([], code) = code | toAssembler(AddImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => addImmediate | (OpSize32, false) => addImmediate32 | (OpSize64, true) => addSImmediate | (OpSize32, true) => addSImmediate32 in toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted}) end | toAssembler(SubImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subImmediate | (OpSize32, false) => subImmediate32 | (OpSize64, true) => subSImmediate | (OpSize32, true) => subSImmediate32 in toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted}) end | toAssembler(AddShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => addShiftedReg | (OpSize32, false) => addShiftedReg32 | (OpSize64, true) => addSShiftedReg | (OpSize32, true) => addSShiftedReg32 in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift}) end | toAssembler(SubShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subShiftedReg | (OpSize32, false) => subShiftedReg32 | (OpSize64, true) => subSShiftedReg | (OpSize32, true) => subSShiftedReg32 in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift}) end | toAssembler(AddExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) = (* Add/SubExtended are only used to access XSP. *) let val instr = case (opSize, setFlags) of (OpSize64, false) => addExtendedReg | (OpSize32, false) => raise InternalError "AddExtendedReg; 32" | (OpSize64, true) => addSExtendedReg | (OpSize32, true) => raise InternalError "AddExtendedReg; 32" in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend}) end | toAssembler(SubExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subExtendedReg | (OpSize32, false) => raise InternalError "AddExtendedReg; 32" | (OpSize64, true) => subSExtendedReg | (OpSize32, true) => raise InternalError "AddExtendedReg; 32" in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend}) end | toAssembler(MultiplyAndAddSub{regM, regN, regA, regD, multKind} :: rest, code) = let val instr = case multKind of MultAdd32 => multiplyAndAdd32{regM=regM, regN=regN, regA=regA, regD=regD} | MultSub32 => multiplyAndSub32{regM=regM, regN=regN, regA=regA, regD=regD} | MultAdd64 => multiplyAndAdd{regM=regM, regN=regN, regA=regA, regD=regD} | MultSub64 => multiplyAndSub{regM=regM, regN=regN, regA=regA, regD=regD} | SignedMultAddLong => signedMultiplyAndAddLong{regM=regM, regN=regN, regA=regA, regD=regD} | SignedMultHigh => signedMultiplyHigh{regM=regM, regN=regN, regD=regD} in toAssembler(rest, code <::> instr) end | toAssembler(DivideRegs{regM, regN, regD, isSigned, opSize} :: rest, code) = let val instr = case (isSigned, opSize) of (true, OpSize64) => signedDivide | (true, OpSize32) => signedDivide32 | (false, OpSize64) => unsignedDivide | (false, OpSize32) => unsignedDivide32 in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD}) end | toAssembler(LogicalShiftedReg{regM, regN, regD, shift, logOp, opSize, setFlags} :: rest, code) = let val instr = case (logOp, setFlags, opSize) of (LogAnd, false, OpSize64) => andShiftedReg | (LogAnd, true, OpSize64) => andsShiftedReg | (LogOr, false, OpSize64) => orrShiftedReg | (LogXor, false, OpSize64) => eorShiftedReg | (LogAnd, false, OpSize32) => andShiftedReg32 | (LogAnd, true, OpSize32) => andsShiftedReg32 | (LogOr, false, OpSize32) => orrShiftedReg32 | (LogXor, false, OpSize32) => eorShiftedReg32 | _ => raise InternalError "setFlags not valid with OR or XOR" (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD, shift=shift}) end | toAssembler(LoadRegScaled{regT, regN, unitOffset, loadType} :: rest, code) = let val instr = case loadType of Load64 => loadRegScaled | Load32 => loadRegScaled32 | Load16 => loadRegScaled16 | Load8 => loadRegScaledByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreRegScaled{regT, regN, unitOffset, loadType} :: rest, code) = let val instr = case loadType of Load64 => storeRegScaled | Load32 => storeRegScaled32 | Load16 => storeRegScaled16 | Load8 => storeRegScaledByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) = let val instr = case floatSize of Float32 => loadRegScaledFloat | Double64 => loadRegScaledDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) = let val instr = case floatSize of Float32 => storeRegScaledFloat | Double64 => storeRegScaledDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => loadRegUnscaled | (Load32, NoUpdate) => loadRegUnscaled32 | (Load16, NoUpdate) => loadRegUnscaled16 | (Load8, NoUpdate) => loadRegUnscaledByte | (Load64, PreIndex) => loadRegPreIndex | (Load32, PreIndex) => loadRegPreIndex32 | (Load16, PreIndex) => raise InternalError "loadRegPreIndex16" | (Load8, PreIndex) => loadRegPreIndexByte | (Load64, PostIndex) => loadRegPostIndex | (Load32, PostIndex) => loadRegPostIndex32 | (Load16, PostIndex) => raise InternalError "loadRegPostIndex16" | (Load8, PostIndex) => loadRegPostIndexByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(LoadFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Float32, NoUpdate) => loadRegUnscaledFloat | (Double64, NoUpdate) => loadRegUnscaledDouble | _ => raise InternalError "LoadFPRegUnscaled: pre/post indexed" in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(StoreRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => storeRegUnscaled | (Load32, NoUpdate) => storeRegUnscaled32 | (Load16, NoUpdate) => storeRegUnscaled16 | (Load8, NoUpdate) => storeRegUnscaledByte | (Load64, PreIndex) => storeRegPreIndex | (Load32, PreIndex) => storeRegPreIndex32 | (Load16, PreIndex) => raise InternalError "storeRegPreIndex16" | (Load8, PreIndex) => storeRegPreIndexByte | (Load64, PostIndex) => storeRegPostIndex | (Load32, PostIndex) => storeRegPostIndex32 | (Load16, PostIndex) => raise InternalError "storeRegPostIndex16" | (Load8, PostIndex) => storeRegPostIndexByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(StoreFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Float32, NoUpdate) => storeRegUnscaledFloat | (Double64, NoUpdate) => storeRegUnscaledDouble | _ => raise InternalError "StoreFPRegUnscaled: pre/post indexed" in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(LoadAcquireReg{regN, regT, loadType} :: rest, code) = let val loadInstr = case loadType of Load64 => loadAcquire | Load32 => loadAcquire32 | Load8 => loadAcquireByte | _ => raise InternalError "LoadAcquire: Unsupported size" (* Not used *) in toAssembler(rest, code <::> loadInstr{regT=regT, regN=regN}) end | toAssembler(LoadRegIndexed{regT, regN, regM, loadType, option} :: rest, code) = let val instr = case loadType of Load64 => loadRegIndexed | Load32 => loadRegIndexed32 | Load16 => loadRegIndexed16 | Load8 => loadRegIndexedByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreRegIndexed{regT, regN, regM, loadType, option} :: rest, code) = let val instr = case loadType of Load64 => storeRegIndexed | Load32 => storeRegIndexed32 | Load16 => storeRegIndexed16 | Load8 => storeRegIndexedByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(LoadFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) = let val instr = case floatSize of Float32 => loadRegIndexedFloat | Double64 => loadRegIndexedDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) = let val instr = case floatSize of Float32 => storeRegIndexedFloat | Double64 => storeRegIndexedDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreReleaseReg{regN, regT, loadType} :: rest, code) = let val storeInstr = case loadType of Load64 => storeRelease | Load32 => storeRelease32 | Load8 => storeReleaseByte | _ => raise InternalError "StoreRelease: Unsupported size" (* Not used *) in toAssembler(rest, code <::> storeInstr{regT=regT, regN=regN}) end | toAssembler(LoadRegPair{ regT1, regT2, regN, unitOffset, unscaledType} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(StoreRegPair{ regT1, regT2, regN, unitOffset, unscaledType} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(LoadFPRegPair{ regT1, regT2, regN, unitOffset, unscaledType} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(StoreFPRegPair{ regT1, regT2, regN, unitOffset, unscaledType} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(ConditionalSet{regD, regTrue, regFalse, cond, condSet} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(BitField{immr, imms, regN, regD, opSize, bitfieldKind} :: rest, code) = let - in - toAssembler(rest, code <::> (raise Fail "TODO")) + val bfInstr = + case (bitfieldKind, opSize) of + (BFSigned, OpSize64) => signedBitfieldMove64 + | (BFUnsigned, OpSize64) => unsignedBitfieldMove64 + | (BFInsert, OpSize64) => bitfieldMove64 + | (BFSigned, OpSize32) => signedBitfieldMove32 + | (BFUnsigned, OpSize32) => unsignedBitfieldMove32 + | (BFInsert, OpSize32) => bitfieldMove32 + in + toAssembler(rest, code <::> bfInstr{immr=immr, imms=imms, regN=regN, regD=regD}) end | toAssembler(ShiftRegisterVariable{regM, regN, regD, opSize, shiftDirection} :: rest, code) = let val instr = case (shiftDirection, opSize) of (ShiftLeft, OpSize64) => logicalShiftLeftVariable | (ShiftLeft, OpSize32) => logicalShiftLeftVariable32 | (ShiftRightLogical, OpSize64) => logicalShiftRightVariable | (ShiftRightLogical, OpSize32) => logicalShiftRightVariable32 | (ShiftRightArithmetic, OpSize64) => arithmeticShiftRightVariable | (ShiftRightArithmetic, OpSize32) => arithmeticShiftRightVariable32 in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD}) end | toAssembler(BitwiseLogical{ bits, regN, regD, opSize, setFlags, logOp} :: rest, code) = let val instr = case (logOp, setFlags, opSize) of (LogAnd, false, OpSize64) => bitwiseAndImmediate | (LogAnd, true, OpSize64) => bitwiseAndSImmediate | (LogOr, false, OpSize64) => bitwiseOrImmediate | (LogXor, false, OpSize64) => bitwiseXorImmediate | (LogAnd, false, OpSize32) => bitwiseAndImmediate32 | (LogAnd, true, OpSize32) => bitwiseAndSImmediate32 | (LogOr, false, OpSize32) => bitwiseOrImmediate32 | (LogXor, false, OpSize32) => bitwiseXorImmediate32 | _ => raise InternalError "flags not valid with OR or XOR" in toAssembler(rest, code <::> instr{regN=regN, regD=regD, bits=bits}) end | toAssembler(MoveGeneralToFP{ regN, regD, floatSize} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(MoveFPToGeneral{ regN, regD, floatSize} :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(CvtIntToFP{ regN, regD, floatSize, opSize} :: rest, code) = let val instr = case (opSize, floatSize) of (OpSize32, Float32) => convertInt32ToFloat | (OpSize64, Float32) => convertIntToFloat | (OpSize32, Double64) => convertInt32ToDouble | (OpSize64, Double64) => convertIntToDouble in toAssembler(rest, code <::> instr{regN=regN, regD=regD}) end | toAssembler(CvtFloatToInt{ round, regN, regD, floatSize, opSize} :: rest, code) = let val instr = case (floatSize, opSize) of (Float32, OpSize32) => convertFloatToInt32 | (Float32, OpSize64) => convertFloatToInt | (Double64, OpSize32) => convertDoubleToInt32 | (Double64, OpSize64) => convertDoubleToInt in toAssembler(rest, code <::> instr round {regN=regN, regD=regD}) end | toAssembler(FPBinaryOp{ regM, regN, regD, floatSize, fpOp} :: rest, code) = let val instr = case (fpOp, floatSize) of (MultiplyFP, Float32) => multiplyFloat | (DivideFP, Float32) => divideFloat | (AddFP, Float32) => addFloat | (SubtractFP, Float32) => subtractFloat | (MultiplyFP, Double64) => multiplyDouble | (DivideFP, Double64) => divideDouble | (AddFP, Double64) => addDouble | (SubtractFP, Double64) => subtractDouble in toAssembler(rest, code <::> instr {regN=regN, regM=regM, regD=regD}) end | toAssembler(FPComparison{ regM, regN, floatSize} :: rest, code) = toAssembler(rest, code <::> (case floatSize of Float32 => compareFloat | Double64 => compareDouble){regN=regN, regM=regM}) | toAssembler(FPUnaryOp{ regN, regD, fpOp} :: rest, code) = let val instr = case fpOp of NegFloat => negFloat | NegDouble => negDouble | AbsFloat => absFloat | AbsDouble => absDouble | ConvFloatToDble => convertFloatToDouble | ConvDbleToFloat => convertDoubleToFloat in toAssembler(rest, code <::> instr {regN=regN, regD=regD}) end | toAssembler(SetLabel label :: rest, code) = toAssembler(rest, code <::> setLabel label) | toAssembler(ConditionalBranch(cond, label) :: rest, code) = toAssembler(rest, code <::> conditionalBranch(cond, label)) | toAssembler(UnconditionalBranch label :: rest, code) = toAssembler(rest, code <::> unconditionalBranch label) | toAssembler(BranchAndLink label :: rest, code) = toAssembler(rest, code <::> branchAndLink label) | toAssembler(BranchReg{regD, brRegType=BRRBranch} :: rest, code) = toAssembler(rest, code <::> branchRegister regD) | toAssembler(BranchReg{regD, brRegType=BRRAndLink} :: rest, code) = toAssembler(rest, code <::> branchAndLinkReg regD) | toAssembler(BranchReg{regD, brRegType=BRRReturn} :: rest, code) = toAssembler(rest, code <::> returnRegister regD) | toAssembler(LoadLabelAddress(reg, label) :: rest, code) = toAssembler(rest, code <::> loadLabelAddress(reg, label)) | toAssembler(TestBitBranch{ test, bit, label, onZero } :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) | toAssembler(CompareBranch{ test, label, onZero, opSize } :: rest, code) = toAssembler(rest, code <::> (raise Fail "TODO")) (* Register-register moves - special case for XSP. *) | toAssembler(MoveXRegToXReg{sReg=XSP, dReg} :: rest, code) = toAssembler(rest, code <::> addImmediate{regN=XSP, regD=dReg, immed=0w0, shifted=false}) | toAssembler(MoveXRegToXReg{sReg, dReg=XSP} :: rest, code) = toAssembler(rest, code <::> addImmediate{regN=sReg, regD=XSP, immed=0w0, shifted=false}) | toAssembler(MoveXRegToXReg{sReg, dReg} :: rest, code) = toAssembler(rest, code <::> orrShiftedReg{regN=XZero, regM=sReg, regD=dReg, shift=ShiftNone}) | toAssembler(LoadNonAddr(xReg, value) :: rest, code) = let (* Load a non-address constant. Tries to use movz/movn/movk if that can be done easily, othewise uses loadNonAddressConstant to load the value from the non-address constant area. *) fun extW (v, h) = Word.andb(Word.fromLarge(LargeWord.>>(Word64.toLarge v, h*0w16)), 0wxffff) val hw0 = extW(value, 0w3) and hw1 = extW(value, 0w2) and hw2 = extW(value, 0w1) and hw3 = extW(value, 0w0) val nextCode = if value < 0wx100000000 then let (* 32-bit constants can be loaded using at most a movz and movk but various cases can be reduced since all 32-bit operations set the top word to zero. *) val hi = hw2 and lo = hw3 in (* 32-bit constants can be loaded with at most a movz and a movk but it may be that there is something shorter. *) if hi = 0w0 then code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0} else if hi = 0wxffff then code <::> moveNot32{regD=xReg, immediate=Word.xorb(0wxffff, lo), shift=0w0} else if lo = 0w0 then code <::> moveZero32{regD=xReg, immediate=hi, shift=0w16} else if isEncodableBitPattern(value, WordSize32) then code <::> bitwiseOrImmediate32{bits=value, regN=XZero, regD=xReg} else (* Have to use two instructions *) code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0} <::> moveKeep{regD=xReg, immediate=hi, shift=0w16} end else if hw0 = 0wxffff andalso hw1 = 0wxffff andalso hw2 = 0wxffff then code <::> moveNot{regD=xReg, immediate=Word.xorb(0wxffff, hw3), shift=0w0} else if hw1 = 0w0 andalso hw2 = 0w0 then (* This is common for length words with a flags byte *) code <::> moveZero32{regD=xReg, immediate=hw3, shift=0w0} <::> moveKeep{regD=xReg, immediate=hw0, shift=0w48} else code <::> loadNonAddressConstant(xReg, value) in toAssembler(rest, nextCode) end | toAssembler(LoadAddr(dReg, source) :: rest, code) = toAssembler(rest, loadAddressConstant(dReg, source) :: code) | toAssembler(RTSTrap{ rtsEntry, work, save } :: rest, code) = let (* Because X30 is used in the branchAndLink it has to be pushed across any trap. *) val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save in toAssembler(rest, code <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=rtsEntry} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) ) end fun toInstr precode = case toAssembler([precode], []) of [single] => single | _ => raise InternalError "toInstr" (* Take a forward order sequence of instructions and generate a forward order output sequence. *) fun toInstrs precode = List.rev(toAssembler(precode, [])) (* Constant shifts are encoded in the immr and imms fields of the bit-field instruction. *) fun shiftConstant{ direction, regD, regN, shift, opSize } = let val (bitfieldKind, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (BFUnsigned, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (BFUnsigned, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (BFUnsigned, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (BFUnsigned, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (BFSigned, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (BFSigned, shift, 0wx1f) in BitField{ regN=regN, regD=regD, opSize=opSize, immr=immr, imms=imms, bitfieldKind=bitfieldKind } end (* These sequences are used both in the ML code-generator and in the FFI code so it is convenient to have them here and share the code. *) local fun allocateWords(fixedReg, workReg, words, bytes, regMask, code) = let val label = createLabel() val (lengthWord, setLength, flagShift) = if is32in64 then (~4, Load32, 0w24) else (~8, Load64, 0w56) in code <::> (* Subtract the number of bytes required from the heap pointer. *) SubImmediate{regN=X_MLHeapAllocPtr, regD=fixedReg, immed=bytes, shifted=false, opSize=OpSize64, setFlags=false} <::> (* Compare the result with the heap limit. *) SubShiftedReg{regM=X_MLHeapLimit, regN=fixedReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, label) <::> RTSTrap{rtsEntry=heapOverflowCallOffset, work=X16, save=regMask} <::> SetLabel label <::> (* Update the heap pointer. *) MoveXRegToXReg{sReg=fixedReg, dReg=X_MLHeapAllocPtr} <::> LoadNonAddr(workReg, Word64.orb(words, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), flagShift))) <::> (* Store the length word. Have to use the unaligned version because offset is -ve. *) StoreRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= lengthWord, loadType=setLength, unscaledType=NoUpdate} end fun absoluteAddressToIndex(reg, code) = if is32in64 then code <::> SubShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone, opSize=OpSize64, setFlags=false} <::> shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=0w2, opSize=OpSize64} else code in fun boxDouble({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::> StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Double64}) and boxSysWord({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::> StoreRegScaled{regT=source, regN=destination, unitOffset=0, loadType=Load64}) and boxFloat({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, 0w1, 0w8, saveRegs, code) <::> StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Float32}) end fun generateFinalCode {instrs, name, parameters, resultClosure, profileObject} = raise Fail "TODO" structure Sharing = struct type closureRef = closureRef type loadType = loadType type opSize = opSize type logicalOp = logicalOp type floatSize = floatSize type shiftDirection = shiftDirection type multKind = multKind type fpUnary = fpUnary type fpBinary = fpBinary type unscaledType = unscaledType type condSet = condSet type bitfieldKind = bitfieldKind type brRegType = brRegType type precode = precode type xReg = xReg type vReg = vReg type labels = labels type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale type instr = instr end end;