diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig index af704a3e..c02b0c65 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig @@ -1,49 +1,56 @@ (* Copyright David C. J. Matthews 2021 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ALLOCATEREGISTERS = sig - type intSet and extendedBasicBlock and regProperty and reg - type address = Address.address + type intSet and extendedBasicBlock and regProperty and reg and ('genReg, 'optGenReg, 'fpReg) basicBlock + and xReg and vReg + + type address = Address.address + + type basicBlockConcrete = (xReg, xReg, vReg) basicBlock type conflictState = { conflicts: intSet, realConflicts: reg list } datatype allocateResult = - AllocateSuccess of reg vector + AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list val allocateRegisters : { blocks: extendedBasicBlock vector, regStates: conflictState vector, regProps: regProperty vector } -> allocateResult val nGenRegs: int structure Sharing: sig type intSet = intSet and extendedBasicBlock = extendedBasicBlock + and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg + and xReg = xReg + and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEGENERATE.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEGENERATE.sig index 6d3a596f..e3521e70 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEGENERATE.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEGENERATE.sig @@ -1,38 +1,38 @@ (* Copyright David C. J. Matthews 2021 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ICODEGENERATE = sig - type intSet and extendedBasicBlock and regProperty and reg - type closureRef + type ('genReg, 'optGenReg, 'fpReg) basicBlock and xReg and vReg and closureRef + + type basicBlockConcrete = (xReg, xReg, vReg) basicBlock val icodeToArm64Code : { - blocks: extendedBasicBlock vector, allocatedRegisters: reg vector, functionName: string, + blocks: basicBlockConcrete vector, functionName: string, stackRequired: int, debugSwitches: Universal.universal list, resultClosure: closureRef, profileObject: Address.machineWord } -> unit structure Sharing: sig - type intSet = intSet - and extendedBasicBlock = extendedBasicBlock - and regProperty = regProperty - and reg = reg - and closureRef = closureRef + type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock + and xReg = xReg + and vReg = vReg + and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML index 0bb96319..c2489b46 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML @@ -1,572 +1,811 @@ (* Copyright David C. J. Matthews 2016-21 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64AllocateRegisters( structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure ConflictSets: ARM64ICODECONFLICTS structure IntSet: INTSET sharing Arm64ICode.Sharing = Identify.Sharing = ConflictSets.Sharing = IntSet ): ARM64ALLOCATEREGISTERS = struct open Arm64ICode open Identify open ConflictSets open IntSet open Address exception InternalError = Misc.InternalError datatype allocateResult = - AllocateSuccess of reg vector + AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list (* General registers. X24 is used as the global heap base in 32-in-64. X30 is the return address set by blr but is otherwise a general register. Put the argument registers at the end of the list so they'll only be used when hinted. *) val generalRegisters = map GenReg ([X9, X10, X11, X12, X13, X14, X15, X19, X20, X21, X22, X23, X0, X1, X2, X3, X4, X5, X6, X7, X8, X30] @ (if is32in64 then [] else [X24])) val floatingPtRegisters = map FPReg [V7, V6, V5, V4, V3, V2, V1] fun allocateRegisters{blocks, regStates, regProps, ...} = let val maxPRegs = Vector.length regStates and numBlocks = Vector.length blocks (* Hint values. The idea of hints is that by using a hinted register we may avoid an unnecessary move instruction. realHints is set when a pseudo-register is going to be loaded from a specific register e.g. a register argument, or moved into one e.g. ecx for a shift. friends is set to the other pseudo-registers that may be associated with the pseudo-register. E.g. the argument and destination of an arithmetic operation where choosing the same register for each may avoid a move. *) val realHints = Array.array(maxPRegs, NONE: reg option) (* Sources and destinations. These indicate the registers that are the sources and destinations of the indexing register and are used as hints. If a register has been allocated for a source or destination we may be able to reuse it. *) val sourceRegs = Array.array(maxPRegs, []: int list) and destinationRegs = Array.array(maxPRegs, []: int list) local fun addRealHint(r, reg) = case Array.sub(realHints, r) of NONE => Array.update(realHints, r, SOME reg) | SOME _ => () fun addSourceAndDestinationHint{src, dst} = let val {conflicts, ...} = Vector.sub(regStates, src) in (* If they conflict we can't add them. *) if member(dst, conflicts) then () else let val currentDests = Array.sub(destinationRegs, src) val currentSources = Array.sub(sourceRegs, dst) in if List.exists(fn i => i=dst) currentDests then () else Array.update(destinationRegs, src, dst :: currentDests); if List.exists(fn i => i=src) currentSources then () else Array.update(sourceRegs, dst, src :: currentSources) end end in (* Add the hints to steer the register allocation. The idea is to avoid moves between registers by getting values into the appropriate register in advance. *) fun addHints{instr=MoveRegister{source=PReg sreg, dest=PReg dreg, ...}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints{instr=BitFieldInsert{destAsSource=PReg dsReg, dest=PReg dReg, ...}, ...} = (* The "destAsSource" is the destination if some bits are retained. *) addSourceAndDestinationHint {src=dsReg, dst=dReg} | addHints{instr=ReturnResultFromFunction { resultReg=PReg resReg, returnReg=PReg retReg, ... }, ...} = ( addRealHint(resReg, GenReg X0); addRealHint(retReg, GenReg X30) (* It may be there from earlier. *) ) | addHints{instr=JumpLoop{regArgs, ...}, ...} = let fun addRegArg {src=ArgInReg(PReg argReg), dst=PReg resReg} = addSourceAndDestinationHint {dst=resReg, src=argReg} | addRegArg {src=ArgOnStack _, ...} = () in List.app addRegArg regArgs end | addHints{instr=BeginFunction{regArgs, ...}, ...} = List.app (fn (PReg pr, reg) => addRealHint(pr, GenReg reg)) regArgs | addHints{instr=TailRecursiveCall{regArgs, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app setHint regArgs end | addHints{instr=FunctionCall{regArgs, dest=PReg dreg, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in addRealHint(dreg, GenReg X0); List.app setHint regArgs end (* Exception packets are in X0 *) | addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints{instr=BeginHandler{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints _ = () end val allocatedRegs = Array.array(maxPRegs, NONE: reg option) val failures = ref []: intSet list ref (* Find a real register for a preg. 1. If a register is already allocated use that. 2. Try the "preferred" register if one has been given. 3. Try the realHints value if there is one. 4. See if there is a "friend" that has an appropriate register 5. Look at all the registers and find one. *) fun findRegister(r, pref, regSet) = case Array.sub(allocatedRegs, r) of SOME reg => reg | NONE => let val {conflicts, realConflicts, ...} = Vector.sub(regStates, r) (* Find the registers we've already allocated that may conflict. *) val conflictingRegs = List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @ realConflicts fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs) fun tryAReg NONE = NONE | tryAReg (somePref as SOME prefReg) = if isFree prefReg then (Array.update(allocatedRegs, r, somePref); somePref) else NONE fun findAReg [] = ( (* This failed. We're going to have to spill something. *) failures := conflicts :: ! failures; hd regSet (* Return a register to satisfy everything. *) ) | findAReg (reg::regs) = if isFree reg then (Array.update(allocatedRegs, r, SOME reg); reg) else findAReg regs (* Search the sources and destinations to see if a register has already been allocated or there is a hint. *) fun findAFriend([], [], _) = NONE | findAFriend(aDest :: otherDests, sources, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aDest) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aDest)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the destinations of the destinations to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest)) in findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen) end end | findAFriend([], aSrc :: otherSrcs, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aSrc) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aSrc)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the sources of the sources to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc)) in findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen) end end (* See if there is a friend that has a register already or a hint. Friends are registers that don't conflict and can possibly avoid an extra move. *) (* fun findAFriend([], _) = NONE | findAFriend(friend :: tail, old) = let val possReg = case Array.sub(allocatedRegs, friend) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, friend)) in case possReg of reg as SOME _ => reg | NONE => let (* Add a friend of a friend to the list if we haven't already seen it and it doesn't conflict. *) fun newFriend f = not(List.exists (fn n => n=f) old) andalso not(List.exists (fn n => n=f) conflicts) val fOfF = List.filter newFriend (Array.sub(friends, friend)) in findAFriend(tail @ fOfF, friend :: old) end end*) in case tryAReg pref of SOME r => r | NONE => ( case tryAReg (Array.sub(realHints, r)) of SOME r => r | NONE => ( case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of SOME r => r (* Look through the registers to find one that's free. *) | NONE => findAReg regSet ) ) end fun allocateRegister args = ignore(findRegister args) val allocateFindRegister = findRegister fun allocateGenReg(PReg r) = allocateRegister(r, NONE, generalRegisters) and allocateFloatReg(PReg r) = allocateRegister(r, NONE, floatingPtRegisters) and allocateOptGenReg(SomeReg reg) = allocateGenReg reg | allocateOptGenReg ZeroReg = () val allocateGenRegs = List.app allocateGenReg and allocateFloatRegs = List.app allocateFloatReg fun registerAllocate{instr=MoveRegister{source=PReg sreg, dest=PReg dreg}, ...} = let val realDestReg = findRegister(dreg, NONE, generalRegisters) in allocateRegister(sreg, SOME realDestReg, generalRegisters) end | registerAllocate{instr=LoadNonAddressConstant{dest, ...}, ...} = allocateGenReg dest | registerAllocate{instr=LoadAddressConstant{dest, ...}, ...} = allocateGenReg dest | registerAllocate{instr=LoadWithConstantOffset{dest, base, ...}, ...} = ( allocateGenReg dest; allocateGenReg base ) | registerAllocate{instr=LoadFPWithConstantOffset{dest, base, ...}, ...} = ( allocateFloatReg dest; allocateGenReg base ) | registerAllocate{instr=LoadWithIndexedOffset{dest, base, index, ...}, ...} = ( allocateGenReg dest; allocateGenRegs[base, index] ) | registerAllocate{instr=LoadFPWithIndexedOffset{dest, base, index, ...}, ...} = ( allocateFloatReg dest; allocateGenRegs[base, index] ) | registerAllocate{instr=GetThreadId { dest, ...}, ...} = allocateGenReg dest | registerAllocate{instr=ObjectIndexAddressToAbsolute{dest, source, ...}, ...} = allocateGenRegs[dest, source] | registerAllocate{instr=AbsoluteToObjectIndex{dest, source, ...}, ...} = allocateGenRegs[dest, source] | registerAllocate({instr=AllocateMemoryFixed{ dest, saveRegs, ...}, ...}) = allocateGenRegs (dest :: saveRegs) | registerAllocate({instr=AllocateMemoryVariable{ size, dest, saveRegs, ...}, ...}) = allocateGenRegs (size :: dest :: saveRegs) | registerAllocate({instr=InitialiseMem{ size, addr, init}, ...}) = allocateGenRegs [size, addr, init] | registerAllocate{instr=BeginLoop, ...} = () | registerAllocate({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}) = ( List.app (fn {src=ArgInReg sreg, dst} => (allocateGenReg sreg; allocateGenReg dst) | _ => ()) regArgs; List.app (fn {src=ArgInReg sreg, ...} => allocateGenReg sreg | _ => ()) stackArgs; case checkInterrupt of SOME regs => List.app(fn reg => allocateGenReg reg) regs | NONE => () ) | registerAllocate{instr=StoreWithConstantOffset{source, base, ...}, ...} = allocateGenRegs[source, base] | registerAllocate{instr=StoreFPWithConstantOffset{source, base, ...}, ...} = ( allocateFloatReg source; allocateGenReg base ) | registerAllocate{instr=StoreWithIndexedOffset{source, base, index, ...}, ...} = allocateGenRegs[source, base, index] | registerAllocate{instr=StoreFPWithIndexedOffset{source, base, index, ...}, ...} = ( allocateFloatReg source; allocateGenRegs[base, index] ) | registerAllocate{instr=AddSubImmediate{ source, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenReg source ) | registerAllocate{instr=AddSubRegister{ base, shifted, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenRegs[base, shifted] ) | registerAllocate{instr=LogicalImmediate{ source, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenReg source ) | registerAllocate{instr=LogicalRegister{ base, shifted, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenRegs[base, shifted] ) | registerAllocate{instr=ShiftRegister{ dest, source, shift, ... }, ...} = allocateGenRegs[dest, source, shift] | registerAllocate{instr=Multiplication{ dest, sourceA, sourceM, sourceN, ... }, ...} = (allocateGenRegs[dest, sourceM, sourceN]; allocateOptGenReg sourceA) | registerAllocate{instr=Division{ dest, dividend, divisor, ... }, ...} = allocateGenRegs[dest, dividend, divisor] | registerAllocate{instr=BeginFunction{regArgs, ...}, ...} = (* Check that every argument has a register allocated including any that are unused. Unused arguments should be discarded at a higher level because we could allocate a different register and copy the argument register only to discard it. *) allocateGenRegs(List.map #1 regArgs) | registerAllocate({instr=TailRecursiveCall{regArgs=regArgs, stackArgs=stackArgs, ...}, ...}) = let fun allocateRegArg(ArgInReg argReg, _) = allocateGenReg argReg | allocateRegArg _ = () in (* We've already hinted the arguments but we want to allocate these first to reduce the chance that they'll be used for stack arguments. *) List.app allocateRegArg regArgs; List.app (fn {src=ArgInReg argReg, ...} => allocateGenReg argReg | _ => ()) stackArgs end | registerAllocate({instr=FunctionCall{regArgs=regArgs, stackArgs=stackArgs, dest=PReg dReg, saveRegs, ...}, ...}) = let fun allocateRegArg(ArgInReg argReg, _) = allocateGenReg argReg | allocateRegArg _ = () in (* We've already hinted the arguments but we want to allocate these first to reduce the chance that they'll be used for stack arguments. *) List.app allocateRegArg regArgs; List.app (fn ArgInReg argReg => allocateGenReg argReg | _ => ()) stackArgs; allocateGenRegs saveRegs; (* Result will be in X0. *) allocateRegister(dReg, SOME(GenReg X0), [GenReg X0]) end | registerAllocate{instr=ReturnResultFromFunction { resultReg=PReg resReg, returnReg, ... }, ...} = ( allocateRegister(resReg, SOME(GenReg X0), [GenReg X0] (* It MUST be in this register *)); allocateGenReg returnReg ) | registerAllocate{instr=RaiseExceptionPacket{packetReg}, ...} = allocateGenReg packetReg | registerAllocate{instr=PushToStack{ source, ... }, ...} = allocateGenReg source | registerAllocate{instr=LoadStack{ dest, ... }, ...} = allocateGenReg dest | registerAllocate{instr=StoreToStack{ source, ... }, ...} = allocateGenReg source | registerAllocate{instr=ContainerAddress{ dest, ... }, ...} = allocateGenReg dest | registerAllocate{instr=ResetStackPtr _, ...} = () | registerAllocate({instr=TagValue{source, dest, ...}, ...}) = allocateGenRegs[source, dest] | registerAllocate({instr=UntagValue{source, dest, ...}, ...}) = allocateGenRegs[source, dest] | registerAllocate({instr=BoxLarge{source, dest, saveRegs}, ...}) = (allocateGenRegs saveRegs; allocateGenRegs[source, dest]) | registerAllocate({instr=UnboxLarge{source, dest}, ...}) = allocateGenRegs[source, dest] | registerAllocate({instr=BoxTagFloat{source, dest, saveRegs, ...}, ...}) = ( allocateGenRegs saveRegs; allocateFloatReg source; allocateGenReg dest ) | registerAllocate({instr=UnboxTagFloat{source, dest, ...}, ...}) = ( allocateFloatReg dest; allocateGenReg source ) | registerAllocate{instr=LoadAcquire{dest, base, ...}, ...} = allocateGenRegs[dest, base] | registerAllocate{instr=StoreRelease{source, base, ...}, ...} = allocateGenRegs[source, base] | registerAllocate{instr=BitFieldShift{ source, dest, ... }, ...} = allocateGenRegs[source, dest] | registerAllocate{instr=BitFieldInsert{ source, dest, destAsSource, ... }, ...} = allocateGenRegs[source, destAsSource, dest] | registerAllocate({instr=IndexedCaseOperation{testReg}, ...}) = allocateGenReg testReg | registerAllocate({instr=PushExceptionHandler, ...}) = () | registerAllocate({instr=PopExceptionHandler, ...}) = () | registerAllocate({instr=BeginHandler{packetReg}, ...}) = allocateGenReg packetReg | registerAllocate({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ...}, ...}) = allocateGenRegs[vec1Addr, vec2Addr, length] | registerAllocate({instr=BlockMove{srcAddr, destAddr, length, ...}, ...}) = allocateGenRegs[srcAddr, destAddr, length] | registerAllocate({instr=AddSubXSP{source, dest, ...}, ...}) = ( allocateOptGenReg dest; allocateGenReg source ) | registerAllocate({instr=TouchValue{source, ...}, ...}) = allocateGenReg source | registerAllocate({instr=LoadAcquireExclusive{ base, dest }, ...}) = allocateGenRegs[base, dest] | registerAllocate({instr=StoreReleaseExclusive{ base, source, result }, ...}) = ( allocateGenRegs[base, result]; allocateOptGenReg source ) | registerAllocate({instr=MemoryBarrier, ...}) = () | registerAllocate({instr=ConvertIntToFloat{ source, dest, ...}, ...}) = (allocateFloatReg dest; allocateGenReg source) | registerAllocate({instr=ConvertFloatToInt{ source, dest, ...}, ...}) = (allocateGenReg dest; allocateFloatReg source) | registerAllocate({instr=UnaryFloatingPt{ source, dest, ...}, ...}) = allocateFloatRegs[source, dest] | registerAllocate({instr=BinaryFloatingPoint{ arg1, arg2, dest, ...}, ...}) = allocateFloatRegs[arg1, arg2, dest] | registerAllocate({instr=CompareFloatingPoint{ arg1, arg2, ...}, ...}) = allocateFloatRegs[arg1, arg2] (* Depth-first scan. *) val visited = Array.array(numBlocks, false) fun processBlocks blockNo = if Array.sub(visited, blockNo) then () (* Done or currently being done. *) else let val () = Array.update(visited, blockNo, true) val ExtendedBasicBlock { flow, block, passThrough, exports, ...} = Vector.sub(blocks, blockNo) (* Add the hints for this block before the actual allocation of registers. *) val _ = List.app addHints block val () = (* Process the dependencies first. *) case flow of ExitCode => () | Unconditional m => processBlocks m | Conditional {trueJump, falseJump, ...} => (processBlocks trueJump; processBlocks falseJump) | IndexedBr cases => List.app processBlocks cases | SetHandler{ handler, continue } => (processBlocks handler; processBlocks continue) | UnconditionalHandle _ => () | ConditionalHandle { continue, ...} => processBlocks continue (* Now this block. *) local (* We assume that anything used later will have been allocated a register. This is generally true except for a loop where the use may occur earlier. *) val exported = setToList passThrough @ setToList exports fun findAReg r = case Vector.sub(regProps, r) of RegPropStack _ => () | _ => ignore(allocateFindRegister(r, NONE, generalRegisters)) in val () = List.app findAReg exported end in List.foldr(fn (c, ()) => registerAllocate c) () block end + + (* Turn the abstract icode into a concrete version by allocating the registers. *) + local + fun getAllocatedReg(PReg r) = getOpt(Array.sub(allocatedRegs, r), GenReg XZero) + + fun getAllocatedGenReg r = + case getAllocatedReg r of GenReg r => r | FPReg _ => raise InternalError "getAllocateGenReg: returned FP Reg" + and getAllocatedFPReg r = + case getAllocatedReg r of FPReg r => r | GenReg _ => raise InternalError "getAllocatedFPReg: returned Gen Reg" + + fun getAllocatedGenRegOrZero ZeroReg = XZero + | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg + + fun getAllocatedArg(ArgInReg reg) = ArgInReg(getAllocatedGenReg reg) + | getAllocatedArg(ArgOnStack stackLoc) = ArgOnStack stackLoc + + val getSaveRegs = List.map getAllocatedGenReg + + fun absToConcrete({instr=MoveRegister{ source, dest}, ...}): iCodeConcrete = + MoveRegister { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} + + | absToConcrete({instr=LoadNonAddressConstant { dest, source}, ...}) = + LoadNonAddressConstant { dest=getAllocatedGenReg dest, source=source} + + | absToConcrete({instr=LoadAddressConstant { dest, source}, ...}) = + LoadAddressConstant { dest=getAllocatedGenReg dest, source=source} + + | absToConcrete({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...}) = + LoadWithConstantOffset { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest, byteOffset=byteOffset, loadType=loadType} + + | absToConcrete({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...}) = + LoadFPWithConstantOffset { base=getAllocatedGenReg base, dest=getAllocatedFPReg dest, byteOffset=byteOffset, floatSize=floatSize} + + | absToConcrete({instr=LoadWithIndexedOffset { base, dest, index, loadType}, ...}) = + LoadWithIndexedOffset { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest, index=getAllocatedGenReg index, loadType=loadType} + + | absToConcrete({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize}, ...}) = + LoadFPWithIndexedOffset { base=getAllocatedGenReg base, dest=getAllocatedFPReg dest, index=getAllocatedGenReg index, floatSize=floatSize} + + | absToConcrete({instr=GetThreadId { dest}, ...}) = GetThreadId { dest=getAllocatedGenReg dest} + + | absToConcrete({instr=ObjectIndexAddressToAbsolute { source, dest}, ...}) = + ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} + + | absToConcrete({instr=AbsoluteToObjectIndex { source, dest}, ...}) = + AbsoluteToObjectIndex { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} + + | absToConcrete({instr=AllocateMemoryFixed { bytesRequired, dest, saveRegs }, ...}) = + AllocateMemoryFixed { dest=getAllocatedGenReg dest, bytesRequired=bytesRequired, saveRegs=getSaveRegs saveRegs} + + | absToConcrete({instr=AllocateMemoryVariable{size, dest, saveRegs}, ...}) = + AllocateMemoryVariable{size=getAllocatedGenReg size, dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs} + + | absToConcrete({instr=InitialiseMem{size, addr, init}, ...}) = + InitialiseMem{size=getAllocatedGenReg size, addr=getAllocatedGenReg addr, init=getAllocatedGenReg init} + + | absToConcrete({instr=BeginLoop, ...}) = BeginLoop + + | absToConcrete({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}) = + let + fun getStackArg{src, wordOffset, stackloc} = + {src=getAllocatedArg src, wordOffset=wordOffset, stackloc=stackloc} + and getRegArg{src, dst} = {src=getAllocatedArg src, dst=getAllocatedGenReg dst} + in + JumpLoop{ regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, + checkInterrupt=Option.map getSaveRegs checkInterrupt} + end + + | absToConcrete({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...}) = + StoreWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, byteOffset=byteOffset, loadType=loadType} + + | absToConcrete({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...}) = + StoreFPWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, byteOffset=byteOffset, floatSize=floatSize} + + | absToConcrete({instr=StoreWithIndexedOffset { base, source, index, loadType}, ...}) = + StoreWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, index=getAllocatedGenReg index, loadType=loadType} + + | absToConcrete({instr=StoreFPWithIndexedOffset { base, source, index, floatSize}, ...}) = + StoreFPWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, index=getAllocatedGenReg index, floatSize=floatSize} + + | absToConcrete({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...}) = + AddSubImmediate { source=getAllocatedGenReg source, dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, + immed=immed, isAdd=isAdd, length=length} + + | absToConcrete({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...}) = + AddSubRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, + dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, + isAdd=isAdd, length=length, shift=shift} + + | absToConcrete({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...}) = + LogicalImmediate { source=getAllocatedGenReg source, dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, + immed=immed, logOp=logOp, length=length} + + | absToConcrete({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...}) = + LogicalRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, + dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, + logOp=logOp, length=length, shift=shift} + + | absToConcrete({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...}) = + ShiftRegister { source=getAllocatedGenReg source, shift=getAllocatedGenReg shift, dest=getAllocatedGenReg dest, + direction=direction, opSize=opSize} + + | absToConcrete({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...}) = + Multiplication { kind=kind, sourceA=getAllocatedGenRegOrZero sourceA, sourceM=getAllocatedGenReg sourceM, + sourceN=getAllocatedGenReg sourceN, dest=getAllocatedGenReg dest} + + | absToConcrete({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...}) = + Division { isSigned=isSigned, dividend=getAllocatedGenReg dividend, divisor=getAllocatedGenReg divisor, + dest=getAllocatedGenReg dest, opSize=opSize} + + | absToConcrete({instr=BeginFunction {regArgs, stackArgs}, ...}) = + BeginFunction {regArgs=map (fn (src, dst) => (getAllocatedGenReg src, dst)) regArgs, stackArgs=stackArgs} + + | absToConcrete({instr=FunctionCall{callKind, regArgs, stackArgs, dest, containers, saveRegs, ...}, ...}) = + let + fun getRegArg(src, dst) = (getAllocatedArg src, dst) + in + FunctionCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getAllocatedArg stackArgs, + dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs, containers=containers} + end + + | absToConcrete({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, ...}) = + let + fun getRegArg(src, dst) = (getAllocatedArg src, dst) + and getStackArg{src, stack} = {src=getAllocatedArg src, stack=stack} + in + TailRecursiveCall{ callKind=callKind, regArgs=map getRegArg regArgs, + stackArgs=map getStackArg stackArgs, stackAdjust=stackAdjust, currStackSize=currStackSize} + end + + | absToConcrete({instr=ReturnResultFromFunction{resultReg, returnReg, numStackArgs}, ...}) = + ReturnResultFromFunction{resultReg=getAllocatedGenReg resultReg, returnReg=getAllocatedGenReg returnReg, numStackArgs=numStackArgs} + + | absToConcrete({instr=RaiseExceptionPacket{packetReg}, ...}) = + RaiseExceptionPacket{packetReg=getAllocatedGenReg packetReg} + + | absToConcrete({instr=PushToStack{ source, container, copies }, ...}) = + PushToStack{source=getAllocatedGenReg source, container=container, copies=copies} + + | absToConcrete({instr=LoadStack{ dest, container, field, wordOffset}, ...}) = + LoadStack{ dest=getAllocatedGenReg dest, container=container, field=field, wordOffset=wordOffset } + + | absToConcrete({instr=StoreToStack{source, container, field, stackOffset}, ...}) = + StoreToStack{source=getAllocatedGenReg source, container=container, field=field, stackOffset=stackOffset} + + | absToConcrete({instr=ContainerAddress{ dest, container, stackOffset}, ...}) = + ContainerAddress{ dest=getAllocatedGenReg dest, container=container, stackOffset=stackOffset } + + | absToConcrete({instr=ResetStackPtr {numWords}, ...}) = ResetStackPtr {numWords=numWords} + + | absToConcrete({instr=TagValue{source, dest, isSigned, opSize}, ...}) = + TagValue{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, isSigned=isSigned, opSize=opSize} + + | absToConcrete({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...}) = + UntagValue{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, isSigned=isSigned, opSize=opSize} + + | absToConcrete({instr=BoxLarge{source, dest, saveRegs, ...}, ...}) = + BoxLarge{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs} + + | absToConcrete({instr=UnboxLarge{source, dest}, ...}) = + UnboxLarge{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} + + | absToConcrete({instr=BoxTagFloat{floatSize, source, dest, saveRegs}, ...}) = + BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, + dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs} + + | absToConcrete({instr=UnboxTagFloat{floatSize, source, dest}, ...}) = + UnboxTagFloat{floatSize=floatSize, source=getAllocatedGenReg source, dest=getAllocatedFPReg dest} + + | absToConcrete({instr=LoadAcquire { base, dest, loadType}, ...}) = + LoadAcquire { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest, loadType=loadType} + + | absToConcrete({instr=StoreRelease { base, source, loadType}, ...}) = + StoreRelease{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, loadType=loadType} + + | absToConcrete({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...}) = + BitFieldShift { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, isSigned=isSigned, + immr=immr, imms=imms, length=length} + + | absToConcrete({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...}) = + BitFieldInsert { source=getAllocatedGenReg source, destAsSource=getAllocatedGenReg destAsSource, dest=getAllocatedGenReg dest, + immr=immr, imms=imms, length=length} + + | absToConcrete({instr=IndexedCaseOperation{testReg}, ...}) = + IndexedCaseOperation{testReg=getAllocatedGenReg testReg} + + | absToConcrete({instr=PushExceptionHandler, ...}) = PushExceptionHandler + + | absToConcrete({instr=PopExceptionHandler, ...}) = PopExceptionHandler + + | absToConcrete({instr=BeginHandler{packetReg}, ...}) = BeginHandler{packetReg=getAllocatedGenReg packetReg} + + | absToConcrete({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...}) = + CompareByteVectors{vec1Addr=getAllocatedGenReg vec1Addr, vec2Addr=getAllocatedGenReg vec2Addr, + length=getAllocatedGenReg length, ccRef=ccRef} + + | absToConcrete({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...}) = + BlockMove{srcAddr=getAllocatedGenReg srcAddr, destAddr=getAllocatedGenReg destAddr, + length=getAllocatedGenReg length, isByteMove=isByteMove} + + | absToConcrete({instr=AddSubXSP{source, dest, isAdd}, ...}) = + AddSubXSP { source=getAllocatedGenReg source, dest=getAllocatedGenRegOrZero dest, isAdd=isAdd} + + | absToConcrete({instr=TouchValue{source}, ...}) = TouchValue { source=getAllocatedGenReg source} + + | absToConcrete({instr=LoadAcquireExclusive{ base, dest }, ...}) = + LoadAcquireExclusive { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest} + + | absToConcrete({instr=StoreReleaseExclusive{ base, source, result }, ...}) = + StoreReleaseExclusive{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source, + result=getAllocatedGenReg result} + + | absToConcrete({instr=MemoryBarrier, ...}) = MemoryBarrier + + | absToConcrete({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...}) = + ConvertIntToFloat{ source=getAllocatedGenReg source, dest=getAllocatedFPReg dest, srcSize=srcSize, destSize=destSize} + + | absToConcrete({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...}) = + ConvertFloatToInt{ source=getAllocatedFPReg source, dest=getAllocatedGenReg dest, srcSize=srcSize, + destSize=destSize, rounding=rounding} + + | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp}, ...}) = + UnaryFloatingPt{ source=getAllocatedFPReg source, dest=getAllocatedFPReg dest, fpOp=fpOp} + + | absToConcrete({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...}) = + BinaryFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, + dest=getAllocatedFPReg dest, fpOp=fpOp, opSize=opSize} + + | absToConcrete({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...}) = + CompareFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, opSize=opSize, ccRef=ccRef} + + in + fun concreteBlock(ExtendedBasicBlock{ block, flow, ...}) = + BasicBlock{block=map absToConcrete block, flow=flow} + end + in processBlocks 0; (* If the failures list is empty we succeeded. *) case !failures of [] => (* Return the allocation vector. We may have unused registers, *) - AllocateSuccess(Vector.tabulate(maxPRegs, fn i => getOpt(Array.sub(allocatedRegs, i), GenReg XZero))) + AllocateSuccess(Vector.map concreteBlock blocks) (* Else we'll have to spill something. *) | l => AllocateFailure l end val nGenRegs = List.length generalRegisters structure Sharing = struct type intSet = intSet and extendedBasicBlock = extendedBasicBlock + and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg + and xReg = xReg + and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML index 68e3093b..7d4bdfb2 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML @@ -1,1292 +1,1282 @@ (* Copyright David C. J. Matthews 2021 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeToArm64Code( structure Arm64PreAssembly: ARM64PREASSEMBLY structure Debug: DEBUG structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET structure Pretty: PRETTY structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end sharing Arm64PreAssembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ICODEGENERATE = struct open Identify open Arm64ICode open Arm64PreAssembly open Address exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd (* These aren't currently used for anything. *) val workReg1 = X16 and workReg2 = X17 - fun icodeToArm64Code {blocks, functionName, stackRequired, debugSwitches, allocatedRegisters: reg vector, resultClosure, profileObject, ...} = + fun icodeToArm64Code {blocks: basicBlockConcrete vector, functionName, stackRequired, debugSwitches, resultClosure, profileObject, ...} = let val numBlocks = Vector.length blocks - fun getAllocatedReg(PReg r) = Vector.sub(allocatedRegisters, r) - - fun getAllocatedGenReg r = - case getAllocatedReg r of GenReg r => r | FPReg _ => raise InternalError "getAllocateGenReg: returned FP Reg" - and getAllocatedFPReg r = - case getAllocatedReg r of FPReg r => r | GenReg _ => raise InternalError "getAllocatedFPReg: returned Gen Reg" - - fun getAllocatedGenRegOrZero ZeroReg = XZero - | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg (* Load from and store to stack. *) fun loadFromStack(destReg, wordOffset, code) = if wordOffset >= 4096 then (LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(destReg, Word64.fromInt wordOffset)] @ code else (LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code and storeToStack(sourceReg, wordOffset, workReg, code) = if wordOffset >= 4096 then (StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(workReg, Word64.fromInt wordOffset)] @ code else (StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code datatype srcAndDest = IsInReg of xReg | IsOnStack of int local (* The registers are numbered from 0. Choose values that don't conflict with the stack addresses. *) fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31 type node = {src: srcAndDest, dst: srcAndDest } fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r | nodeAddress({dst=IsOnStack a, ...}) = a fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset] | arcs{src=IsInReg r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end (* This is a general function for moving values into registers or to the stack where it is possible that the source values might also be in use as destinations. The stack is used for destinations only for tail recursive calls. *) fun moveMultipleValues(moves, code) = let fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let (* stronglyConnectedComponents does two things. It detects loops where it's not possible to move items without breaking the loop but more importantly it orders the dependencies so that if there are no loops we can load the source and store it in the destination knowing that we won't overwrite anything we might later need. *) val ordered = stronglyConnected arguments fun loadIntoReg(IsInReg sReg, dReg, code) = if sReg = dReg then code else (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | loadIntoReg(IsOnStack wordOffset, dReg, code) = loadFromStack(dReg, wordOffset, code) fun moveEachValue ([], code) = code | moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) = moveEachValue(rest, loadIntoReg(src, dReg, code)) | moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) = (* Storing into the stack. *) moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code)) | moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) = (* Copy a stack location - needs a load and store unless the address is the same. *) if dstOffset = srcOffset then moveEachValue(rest, code) else moveEachValue(rest, storeToStack(workReg2, dstOffset, workReg1, loadFromStack(workReg2, srcOffset, code))) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let (* We need to exchange some of the arguments. Doing an exchange here will set the destination with the correct source. However we have to process every subsequent entry with the swapped registers. That may well mean that one of those entries becomes trivial. We also need to rerun stronglyConnectedComponents on at least the rest of this cycle. It's easiest to flatten the rest and do everything. *) (* Exchange the source and destination. We don't have an exchange instruction and there's a further complication. We could be copying between stack locations and their offsets could be > 4096. Since we've only got two work registers we need to use the hardware stack as an extra location. Stack-stack exchange is very rare so the extra overhead to handle the general case is worth it. *) local fun storeToDest(sReg, IsInReg dReg, _, code) = (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | storeToDest(sReg, IsOnStack wordOffset, work, code) = storeToStack(sReg, wordOffset, work, code) in fun exchange(IsInReg arg1Reg, arg2, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) :: storeToDest(arg1Reg, arg2, workReg1, loadIntoReg(arg2, workReg2, code)) | exchange(arg1, IsInReg arg2Reg, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) :: storeToDest(arg2Reg, arg1, workReg1, loadIntoReg(arg1, workReg2, code)) | exchange(arg1, arg2, code) = (* The hardware stack must be 16-byte aligned. *) storeToDest(workReg2, arg2, workReg1, (LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) :: storeToDest(workReg2, arg1, workReg1, loadIntoReg(arg2, workReg2, (StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) :: loadIntoReg(arg1, workReg2, code)))) end (* Try to find either a register-register move or a register-stack move. If not use the first. If there's a stack-register move there will also be a register-stack so we don't need to look for both. *) val {dst=selectDst, src=selectSrc} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun match(s1: srcAndDest, s2) = s1 = s2 fun swapSources{src, dst} = if match(src, selectSrc) then {src=destAsSource, dst=dst} else if match(src, destAsSource) then {src=selectSrc, dst=dst} else {src=src, dst=dst} val exchangeCode = exchange(selectDst, selectSrc, code) in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end (* Where we have multiple specific registers as either source or destination there is the potential that a destination register if currently in use as a source. *) fun moveMultipleRegisters(regPairList, code) = let val regPairsAsDests = List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList in moveMultipleValues(regPairsAsDests, code) end fun moveIfNecessary({src, dst}, code) = if src = dst then code else MoveXRegToXReg{sReg=src, dReg=dst} :: code (* Add a constant word to the source register and put the result in the destination. regW is used as a work register if necessary. This is used both for addition and subtraction. *) fun addConstantWord({regS, regD, value=0w0, ...}, code) = if regS = regD then code else MoveXRegToXReg{sReg=regS, dReg=regD} :: code | addConstantWord({regS, regD, regW, value}, code) = let (* If we have to load the constant it's better if the top 32-bits are zero if possible. *) val (isSub, unsigned) = if value > Word64.<<(0w1, 0w63) then (true, ~ value) else (false, value) in if unsigned < Word64.<<(0w1, 0w24) then (* We can put up to 24 in a shifted and an unshifted constant. *) let val w = Word.fromLarge(Word64.toLarge unsigned) val high = Word.andb(Word.>>(w, 0w12), 0wxfff) val low = Word.andb(w, 0wxfff) val addSub = if isSub then SubImmediate else AddImmediate in if high <> 0w0 then ( (if low <> 0w0 then [addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}] else []) @ addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64} :: code ) else addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64} :: code end else let (* To minimise the constant and increase the chances that it will fit in a single word look to see if we can shift it. *) fun getShift(value, shift) = if Word64.andb(value, 0w1) = 0w0 then getShift(Word64.>>(value, 0w1), shift+0w1) else (value, shift) val (shifted, shift) = getShift(unsigned, 0w0) in code <::> LoadNonAddr(regW, shifted) <::> (if isSub then SubShiftedReg else AddShiftedReg) {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64} end end - val getSaveRegs = List.map getAllocatedGenReg + fun getSaveRegs r = r val startOfFunctionLabel = createLabel() (* Used for recursive calls/jumps *) val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel()) fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo) fun codeExtended _ (MoveRegister{source, dest, ...}, code) = - moveIfNecessary({src=getAllocatedGenReg source, dst=getAllocatedGenReg dest}, code) + moveIfNecessary({src=source, dst=dest}, code) | codeExtended _ (LoadNonAddressConstant{source, dest, ...}, code) = - code <::> LoadNonAddr(getAllocatedGenReg dest, source) + code <::> LoadNonAddr(dest, source) | codeExtended _ (LoadAddressConstant{source, dest, ...}, code) = - code <::> LoadAddr(getAllocatedGenReg dest, source) + code <::> LoadAddr(dest, source) | codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 - then LoadRegUnscaled{regT=getAllocatedGenReg dest, regN=getAllocatedGenReg base, byteOffset=byteOffset, + then LoadRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate} :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in - LoadRegScaled{regT=getAllocatedGenReg dest, regN=getAllocatedGenReg base, unitOffset=unitOffset, loadType=loadType} :: code + LoadRegScaled{regT=dest, regN=base, unitOffset=unitOffset, loadType=loadType} :: code end | codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 - then (LoadFPRegUnscaled{regT=getAllocatedFPReg dest, regN=getAllocatedGenReg base, byteOffset=byteOffset, + then (LoadFPRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in - (LoadFPRegScaled{regT=getAllocatedFPReg dest, regN=getAllocatedGenReg base, unitOffset=unitOffset, floatSize=floatSize}) :: code + (LoadFPRegScaled{regT=dest, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, ...}, code) = let - val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index + val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in - (LoadRegIndexed{regT=getAllocatedGenReg dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code + (LoadRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, ...}, code) = let - val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index + val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX in - (LoadFPRegIndexed{regT=getAllocatedFPReg dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code + (LoadFPRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (GetThreadId { dest}, code) = (* Load the thread id. This is always a 64-bit value. *) - (LoadRegScaled{regT=getAllocatedGenReg dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code + (LoadRegScaled{regT=dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code | codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) = - (AddShiftedReg{regM=getAllocatedGenReg source, regN=X_Base32in64, regD=getAllocatedGenReg dest, + (AddShiftedReg{regM=source, regN=X_Base32in64, regD=dest, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code | codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) = let - val destReg = getAllocatedGenReg dest + val destReg = dest in code <::> - (SubShiftedReg{regM=X_Base32in64, regN=getAllocatedGenReg source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> + (SubShiftedReg{regM=X_Base32in64, regN=source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> (shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64}) end | codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs, ... }, code) = let val label = createLabel() - val destReg = getAllocatedGenReg dest + val destReg = dest in code <@> (* Subtract the number of bytes required from the heap pointer and put in result reg. *) (if bytesRequired >= 0w4096 then [SubShiftedReg{regM=workReg1, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}, LoadNonAddr(workReg1, bytesRequired)] else [SubImmediate{regN=X_MLHeapAllocPtr, regD=destReg, immed=Word.fromLarge bytesRequired, shifted=false, setFlags=false, opSize=OpSize64}] ) <::> (* Compare with heap limit. *) SubShiftedReg{regM=X_MLHeapLimit, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, label) <::> (* Skip the trap if it's ok. *) RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs} <::> SetLabel label <::> MoveXRegToXReg{sReg=destReg, dReg=X_MLHeapAllocPtr} end | codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs, ... }, code) = let val trapLabel = createLabel() and noTrapLabel = createLabel() - val destReg = getAllocatedGenReg dest and sizeReg = getAllocatedGenReg size + val destReg = dest and sizeReg = size (* Subtract the size into the result register. Subtract a further word for the length word and round down in 32-in-64. *) val subtractSize = if is32in64 then BitwiseLogical{bits= ~ 0w8, regN=destReg, regD=destReg, logOp=LogAnd, opSize=OpSize64, setFlags=false} :: SubImmediate{regN=destReg, regD=destReg, immed=0w4, shifted=false, setFlags=false, opSize=OpSize64} :: SubShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else SubImmediate{regN=destReg, regD=destReg, immed=0w8, shifted=false, setFlags=false, opSize=OpSize64} :: SubShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftLSL 0w3, setFlags=false, opSize=OpSize64} :: code (* Check against the limit. If the size is large enough it is possible that this could wrap round. To check for that we trap if either the result is less than the limit or if it is now greater than the allocation pointer. *) in subtractSize <::> SubShiftedReg{regM=X_MLHeapLimit, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarryClear, trapLabel) <::> SubShiftedReg{regM=X_MLHeapAllocPtr, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarryClear, noTrapLabel) <::> SetLabel trapLabel <::> RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs} <::> SetLabel noTrapLabel <::> MoveXRegToXReg{sReg=destReg, dReg=X_MLHeapAllocPtr} end | codeExtended _ (InitialiseMem{ size, addr, init}, code) = let - val sizeReg = getAllocatedGenReg size - and addrReg = getAllocatedGenReg addr - and initReg = getAllocatedGenReg init + val sizeReg = size + and addrReg = addr + and initReg = init val exitLabel = createLabel() and loopLabel = createLabel() (* This uses a loop to initialise. It's possible the size is zero so we have to check at the top of the loop. *) val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64) in code <::> (* Add the length in bytes so we point at the end. *) AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Are we at the start? *) SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, exitLabel) <::> StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex } <::> UnconditionalBranch loopLabel <::> SetLabel exitLabel end | codeExtended _ (BeginLoop, code) = code | codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) = let (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *) fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset - | convertArg(ArgInReg reg) = IsInReg(getAllocatedGenReg reg) + | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs val extRegArgs = map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code) in case checkInterrupt of NONE => code2 | SOME saveRegs => let val skipCheck = createLabel() in code2 <::> (* Put in stack-check code to allow this to be interrupted. *) LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} <::> SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, skipCheck) <::> RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs} <::> SetLabel skipCheck end end | codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 - then (StoreRegUnscaled{regT=getAllocatedGenReg source, regN=getAllocatedGenReg base, byteOffset=byteOffset, + then (StoreRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in - (StoreRegScaled{regT=getAllocatedGenReg source, regN=getAllocatedGenReg base, unitOffset=unitOffset, loadType=loadType}) :: code + (StoreRegScaled{regT=source, regN=base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 - then (StoreFPRegUnscaled{regT=getAllocatedFPReg source, regN=getAllocatedGenReg base, byteOffset=byteOffset, + then (StoreFPRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in - (StoreFPRegScaled{regT=getAllocatedFPReg source, regN=getAllocatedGenReg base, unitOffset=unitOffset, floatSize=floatSize}) :: code + (StoreFPRegScaled{regT=source, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, ...}, code) = let - val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index + val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in - (StoreRegIndexed{regT=getAllocatedGenReg source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code + (StoreRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, ...}, code) = let - val baseReg = getAllocatedGenReg base and indexReg = getAllocatedGenReg index + val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX in - (StoreFPRegIndexed{regT=getAllocatedFPReg source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code + (StoreFPRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) = let - val destReg = getAllocatedGenRegOrZero dest + val destReg = dest in ((if isAdd then AddImmediate else SubImmediate) - {regN=getAllocatedGenReg source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code + {regN=source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) = let - val destReg = getAllocatedGenRegOrZero dest + val destReg = dest in ( (if isAdd then AddShiftedReg else SubShiftedReg) - {regN=getAllocatedGenReg base, regM=getAllocatedGenReg shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code + {regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) = let - val destReg = getAllocatedGenRegOrZero dest + val destReg = dest in - (BitwiseLogical{regN=getAllocatedGenReg source, regD=destReg, bits=immed, opSize=length, + (BitwiseLogical{regN=source, regD=destReg, bits=immed, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) = let (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) - val destReg = getAllocatedGenRegOrZero dest + val destReg = dest in - (LogicalShiftedReg{regN=getAllocatedGenReg base, regM=getAllocatedGenReg shifted, regD=destReg, + (LogicalShiftedReg{regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) = - (ShiftRegisterVariable{regN=getAllocatedGenReg source, regM=getAllocatedGenReg shift, regD=getAllocatedGenReg dest, + (ShiftRegisterVariable{regN=source, regM=shift, regD=dest, shiftDirection=direction, opSize=opSize}) :: code | codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) = let - val destReg = getAllocatedGenReg dest - and srcAReg = getAllocatedGenRegOrZero sourceA - and srcNReg = getAllocatedGenReg sourceN - and srcMReg = getAllocatedGenReg sourceM + val destReg = dest + and srcAReg = sourceA + and srcNReg = sourceN + and srcMReg = sourceM in (MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code end | codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) = - (DivideRegs{regN=getAllocatedGenReg dividend, regM=getAllocatedGenReg divisor, - regD=getAllocatedGenReg dest, isSigned=isSigned, opSize=opSize}) :: code + (DivideRegs{regN=dividend, regM=divisor, + regD=dest, isSigned=isSigned, opSize=opSize}) :: code | codeExtended _ (BeginFunction{regArgs, ...}, code) = let val skipCheck = createLabel() val defaultWords = 10 (* This is wired into the RTS. *) val workRegister = workReg1 val debugTrapAlways = false (* Can be set to true for debugging *) (* Test with either the stack-pointer or a high-water value. The RTS assumes that X9 has been used as the high-water if it is called through stackOverflowXCallOffset rather than stackOverflowCallOffset *) val (testReg, entryPt, code1) = if stackRequired <= defaultWords then (X_MLStackPtr, stackOverflowCallOffset, code) else (X9, stackOverflowXCallOffset, addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister, value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code)) (* Skip the RTS call if there is enough stack. N.B. The RTS can modify the end-of-stack value to force a trap here even if there is really enough stack. *) val code2 = (if debugTrapAlways then [] else [ConditionalBranch(CondCarrySet, skipCheck), SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}]) @ (* Load the end-of-stack value. *) LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} :: code1 val code3 = code2 <::> RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs} <::> SetLabel skipCheck val usedRegs = regArgs - fun mkPair(pr, rr) = {src=rr,dst=getAllocatedGenReg pr} + fun mkPair(pr, rr) = {src=rr,dst=pr} val regPairs = List.map mkPair usedRegs in moveMultipleRegisters(regPairs, code3) end | codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, code) = let fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset - | convertArg(ArgInReg reg) = IsInReg(getAllocatedGenReg reg) + | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. That means storing the arguments in the right order to avoid overwriting a value that we are using for a different argument. *) fun codeTailCall(arguments, stackAdjust, code) = if stackAdjust < 0 then let (* If the function we're calling takes more arguments on the stack than the current function we will have to extend the stack. Do that by pushing the argument whose offset is at -1. Then adjust all the offsets and repeat. *) val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments) fun renumberArgs [] = [] | renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) | renumberArgs ({dst, src} :: args) = let val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest val newSrc = case src of IsOnStack wordOffset => IsOnStack(wordOffset+1) | other => other in {dst=newDest, src=newSrc} :: renumberArgs args end val pushCode = case argM1 of IsOnStack wordOffset => (StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: loadFromStack(workReg2, wordOffset, code) | IsInReg reg => (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code in codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode) end else let val loadArgs = moveMultipleValues(arguments, code) in if stackAdjust = 0 then loadArgs else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs) end val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) val jumpToFunctionCode = case callKind of Recursive => [(UnconditionalBranch startOfFunctionLabel)] | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else [(BranchReg{regD=workReg1, brRegType=BRRBranch}), (LoadAddr(workReg1, m))] | FullCall => if is32in64 then [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}, AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}] else [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}] in jumpToFunctionCode @ setArgumentsCode end | codeExtended _ (FunctionCall{callKind, regArgs=regArgs, stackArgs=stackArgs, dest, saveRegs, ...}, code) = let - val destReg = getAllocatedGenReg dest + val destReg = dest local fun pushStackArgs ([], _, code) = code | pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) = let (* Have to adjust the offsets of stack arguments. *) val adjustedOffset = wordOffset+argNum in pushStackArgs(args, argNum+1, loadFromStack(workReg1, adjustedOffset, code) <::> StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) end | pushStackArgs (ArgInReg reg ::args, argNum, code) = pushStackArgs(args, argNum+1, code <::> - (StoreRegUnscaled{regT=getAllocatedGenReg reg, regN=X_MLStackPtr, byteOffset= ~8, + (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) (* We have to adjust any stack offset to account for the arguments we've pushed. *) val numStackArgs = List.length stackArgs fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs) - | convertArg(ArgInReg reg) = IsInReg(getAllocatedGenReg reg) + | convertArg(ArgInReg reg) = IsInReg(reg) in val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val loadArgs = moveMultipleValues(extRegArgs, pushedArgs) end (* Push the registers before the call and pop them afterwards. *) fun makeSavesAndCall([], code) = ( case callKind of Recursive => code <::> (BranchAndLink startOfFunctionLabel) | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else code <::> (LoadAddr(workReg1, m)) <::> (BranchReg{regD=workReg1, brRegType=BRRAndLink}) | FullCall => if is32in64 then code <::> AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} else code <::> LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} ) | makeSavesAndCall(reg::regs, code) = let - val areg = getAllocatedGenReg reg + val areg = reg in makeSavesAndCall(regs, code <::> StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) <::> LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex} end in moveIfNecessary({dst=destReg, src=X0}, makeSavesAndCall(saveRegs, loadArgs)) end | codeExtended _ (ReturnResultFromFunction { resultReg, returnReg, numStackArgs }, code) = let - val resultReg = getAllocatedGenReg resultReg - and returnReg = getAllocatedGenReg returnReg + val resultReg = resultReg + and returnReg = returnReg fun resetStack(0, code) = code | resetStack(nItems, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) in BranchReg{regD=returnReg, brRegType=BRRReturn} :: resetStack(numStackArgs, moveIfNecessary({src=resultReg, dst=X0}, code)) end | codeExtended _ (RaiseExceptionPacket{ packetReg }, code) = (* We need a work register here. It can be any register other than X0 since we don't preserve registers across calls. *) (* Copy the handler "register" into the stack pointer. Then jump to the address in the first word. The second word is the next handler. This is set up in the handler. We have a lot more raises than handlers since most raises are exceptional conditions such as overflow so it makes sense to minimise the code in each raise. *) - moveIfNecessary({src=getAllocatedGenReg packetReg, dst=X0}, code) <::> + moveIfNecessary({src=packetReg, dst=X0}, code) <::> LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch } | codeExtended _ (PushToStack{ source, copies, ... }, code) = let - val reg = getAllocatedGenReg source + val reg = source val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1" fun pushn(0, c) = c | pushn(n, c) = pushn(n-1, (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c) in pushn(copies, code) end | codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) = - loadFromStack(getAllocatedGenReg dest, wordOffset, code) + loadFromStack(dest, wordOffset, code) | codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) = (* Store into the stack to set a field of a container. Always 64-bits. *) - storeToStack(getAllocatedGenReg source, stackOffset, workReg1, code) + storeToStack(source, stackOffset, workReg1, code) | codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) = (* Set the register to an offset in the stack. *) let - val destReg = getAllocatedGenReg dest + val destReg = dest val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset" val byteOffset = stackOffset * Word.toInt nativeWordSize in if byteOffset >= 4096 then code <::> LoadNonAddr(destReg, Word64.fromInt byteOffset) <::> AddShiftedReg{regN=X_MLStackPtr, regM=destReg, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64} else code <::> AddImmediate{regN=X_MLStackPtr, regD=destReg, immed=Word.fromInt byteOffset, shifted=false, setFlags=false, opSize=OpSize64} end | codeExtended _ (ResetStackPtr{ numWords, ... }, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code) | codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) = let - val sourceReg = getAllocatedGenReg source - and destReg = getAllocatedGenReg dest + val sourceReg = source + and destReg = dest (* Shift left by one bit and add one. *) in code <::> shiftConstant{ direction=ShiftLeft, regD=destReg, regN=sourceReg, shift=0w1, opSize=opSize } <::> BitwiseLogical{ bits=0w1, regN=destReg, regD=destReg, opSize=opSize, setFlags=false, logOp=LogOr} end | codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) = code <::> shiftConstant{ direction=if isSigned then ShiftRightArithmetic else ShiftRightLogical, - regD=getAllocatedGenReg dest, regN=getAllocatedGenReg source, shift=0w1, opSize=opSize } + regD=dest, regN=source, shift=0w1, opSize=opSize } | codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) = - boxSysWord({source=getAllocatedGenReg source, destination=getAllocatedGenReg dest, + boxSysWord({source=source, destination=dest, workReg=workReg1, saveRegs=getSaveRegs saveRegs}, code) | codeExtended _ (UnboxLarge{ source, dest }, code) = let (* Unbox a large word. The argument is a poly word. *) - val destReg = getAllocatedGenReg dest - and srcReg = getAllocatedGenReg source + val destReg = dest + and srcReg = source in if is32in64 then LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64} :: AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64} :: code end | codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) = - boxDouble({source=getAllocatedFPReg source, destination=getAllocatedGenReg dest, + boxDouble({source=source, destination=dest, workReg=workReg1, saveRegs=getSaveRegs saveRegs}, code) | codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) = let - val floatReg = getAllocatedFPReg source - and fixedReg = getAllocatedGenReg dest + val floatReg = source + and fixedReg = dest in if is32in64 then boxFloat({source=floatReg, destination=fixedReg, workReg=workReg1, saveRegs=getSaveRegs saveRegs}, code) else code <::> MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32} <::> shiftConstant{ direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64} <::> BitwiseLogical{ bits=0w1, regN=fixedReg, regD=fixedReg, opSize=OpSize64, setFlags=false, logOp=LogOr} end | codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) = let - val addrReg = getAllocatedGenReg source - and valReg = getAllocatedFPReg dest + val addrReg = source + and valReg = dest in if is32in64 then code <::> AddShiftedReg{regM=addrReg, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadFPRegScaled{regT=valReg, regN=workReg1, unitOffset=0, floatSize=Double64} else code <::> LoadFPRegScaled{regT=valReg, regN=addrReg, unitOffset=0, floatSize=Double64} end | codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) = let - val addrReg = getAllocatedGenReg source - and valReg = getAllocatedFPReg dest + val addrReg = source + and valReg = dest (* This is tagged in native 64-bits. In 32-in-64 we're loading 32-bits so we can use an indexed load directly. *) in if is32in64 then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=addrReg, regT=valReg, option=ExtUXTX ScaleOrShift, floatSize=Float32} else code <::> shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=addrReg, regD=workReg1, opSize=OpSize64} <::> MoveGeneralToFP{regN=workReg1, regD=valReg, floatSize=Float32} end | codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) = - LoadAcquireReg{regT=getAllocatedGenReg dest, regN=getAllocatedGenReg base, loadType=loadType} :: code + LoadAcquireReg{regT=dest, regN=base, loadType=loadType} :: code | codeExtended _ (StoreRelease{source, base, loadType, ...}, code) = - StoreReleaseReg{regT=getAllocatedGenReg source, regN=getAllocatedGenReg base, loadType=loadType} :: code + StoreReleaseReg{regT=source, regN=base, loadType=loadType} :: code | codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) = let - val srcReg = getAllocatedGenReg source - val destReg = getAllocatedGenReg dest + val srcReg = source + val destReg = dest in BitField{immr=immr, imms=imms, regN=srcReg, regD=destReg, bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length} :: code end | codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) = let (* If we're using BitFieldMove we retain some of the bits of the destination. The higher levels require us to treat that as a source. *) - val sourceReg = getAllocatedGenReg source - and destReg = getAllocatedGenReg dest + val sourceReg = source + and destReg = dest val _ = sourceReg = destReg andalso raise InternalError "codeExtended: bitfield: dest=source" in - BitField{immr=immr, imms=imms, regN=getAllocatedGenReg source, regD=destReg, bitfieldKind=BFInsert, opSize=length} :: - moveIfNecessary({src=getAllocatedGenReg destAsSource, dst=destReg}, code) + BitField{immr=immr, imms=imms, regN=source, regD=destReg, bitfieldKind=BFInsert, opSize=length} :: + moveIfNecessary({src=destAsSource, dst=destReg}, code) end | codeExtended {flow} (IndexedCaseOperation{testReg}, code) = let (* testReg contains the original value after the lowest value has been subtracted. Since both the original value and the lowest value were tagged it contains a shifted but untagged value. *) (* This should only be within a block with an IndexedBr flow type. *) val cases = case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" val caseLabels = map getBlockLabel cases val tableLabel = createLabel() val startOfCase = code <::> LoadLabelAddress(workReg1, tableLabel) <::> (* Add the value shifted by one since it's already shifted. *) - AddShiftedReg{regN=workReg1, regD=workReg1, regM=getAllocatedGenReg testReg, + AddShiftedReg{regN=workReg1, regD=workReg1, regM=testReg, shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch} <::> SetLabel tableLabel val addCases = List.foldl (fn (label, code) => (UnconditionalBranch label) :: code) startOfCase caseLabels in addCases end | codeExtended {flow} (PushExceptionHandler, code) = let (* This should only be within a block with a SetHandler flow type. *) val handleLabel = case flow of SetHandler{ handler, ...} => handler | _ => raise InternalError "codeGenICode: PushExceptionHandler" val labelRef = getBlockLabel handleLabel in (* Push the old handler and the handler entry point and set the "current handler" to point to the stack after we've pushed these. *) code <::> LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadLabelAddress(workReg2, labelRef) <::> StoreRegPair{regT1=workReg2, regT2=workReg1, regN=X_MLStackPtr, unitOffset= ~2, unscaledType=PreIndex, loadType=Load64} <::> StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} end | codeExtended _ (PopExceptionHandler, code) = (* Remove and discard the handler we've set up. Pop the previous handler and put into "current handler". *) code <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} | codeExtended _ (BeginHandler{packetReg}, code) = let val beginHandleCode = code <::> (* The exception raise code resets the stack pointer to the value in the exception handler so this is probably redundant. Leave it for the moment, *) LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} in - moveIfNecessary({src=X0, dst=getAllocatedGenReg packetReg }, beginHandleCode) + moveIfNecessary({src=X0, dst=packetReg }, beginHandleCode) end | codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length, ...}, code) = let (* Construct a loop to compare two vectors of bytes. *) - val vec1Reg = getAllocatedGenReg vec1Addr - and vec2Reg = getAllocatedGenReg vec2Addr - and lenReg = getAllocatedGenReg length + val vec1Reg = vec1Addr + and vec2Reg = vec2Addr + and lenReg = length val loopLabel = createLabel() and exitLabel = createLabel() in code <::> (* Set the CC to Equal before we start in case length = 0 *) SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Start of loop *) CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Go to the end when len = zero *) (* Load the bytes for the comparison and increment each. *) LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) (* Compare *) SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *) SetLabel exitLabel end | codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) = let (* Construct a loop to move the data. *) - val srcReg = getAllocatedGenReg srcAddr - and destReg = getAllocatedGenReg destAddr - and lenReg = getAllocatedGenReg length + val srcReg = srcAddr + and destReg = destAddr + and lenReg = length val loopLabel = createLabel() and exitLabel = createLabel() val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64) in code <::> SetLabel loopLabel (* Start of loop *) <::> CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Exit when length = 0 *) LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) UnconditionalBranch loopLabel <::> (* Back to the start *) SetLabel exitLabel end | codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) = let val allocFreeCode = (if isAdd then AddExtendedReg else SubExtendedReg) - {regM=getAllocatedGenReg source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code + {regM=source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code in case dest of - ZeroReg => allocFreeCode - | SomeReg destReg => + XZero => allocFreeCode + | destReg => (* We have to use add here to get the SP into the destination instead of the usual move. *) - AddImmediate{regN=XSP, regD=getAllocatedGenReg destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} :: + AddImmediate{regN=XSP, regD=destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} :: allocFreeCode end | codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *) (* Used in mutex operations. *) | codeExtended _ (LoadAcquireExclusive{ base, dest }, code) = - LoadAcquireExclusiveRegister{regN=getAllocatedGenReg base, regT=getAllocatedGenReg dest} :: code + LoadAcquireExclusiveRegister{regN=base, regT=dest} :: code | codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) = - StoreReleaseExclusiveRegister{regS=getAllocatedGenReg result, - regT=getAllocatedGenRegOrZero source, regN=getAllocatedGenReg base} :: code + StoreReleaseExclusiveRegister{regS=result, + regT=source, regN=base} :: code | codeExtended _ (MemoryBarrier, code) = code <::> MemBarrier | codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) = - (CvtIntToFP{regN=getAllocatedGenReg source, regD=getAllocatedFPReg dest, floatSize=destSize, opSize=srcSize}) :: code + (CvtIntToFP{regN=source, regD=dest, floatSize=destSize, opSize=srcSize}) :: code | codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) = - (CvtFloatToInt{regN=getAllocatedFPReg source, regD=getAllocatedGenReg dest, + (CvtFloatToInt{regN=source, regD=dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code | codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) = - (FPUnaryOp{regN=getAllocatedFPReg source, regD=getAllocatedFPReg dest, fpOp=fpOp}) :: code + (FPUnaryOp{regN=source, regD=dest, fpOp=fpOp}) :: code | codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) = - (FPBinaryOp{regN=getAllocatedFPReg arg1, regM=getAllocatedFPReg arg2, regD=getAllocatedFPReg dest, + (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, floatSize=opSize, fpOp=fpOp}) :: code | codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) = - (FPComparison{regN=getAllocatedFPReg arg1, regM=getAllocatedFPReg arg2, floatSize=opSize}) :: code + (FPComparison{regN=arg1, regM=arg2, floatSize=opSize}) :: code local (* processed - set to true when a block has been processed. *) val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) (* Find the blocks that reference this one. This isn't essential but allows us to try to generate blocks in the order of the control flow. This in turn may allow us to use short branches rather than long ones. *) val labelRefs = Array.array(numBlocks, []) datatype flowCode = FlowCodeSimple of int | FlowCodeCMove of {code: precode list, trueJump: int, falseJump: int} (* Process this recursively to set the references. If we have unreachable blocks, perhaps because they've been merged, we don't want to include them in the reference counting. This shouldn't happen now that IdentifyReferences removes unreferenced blocks. *) fun setReferences fromLabel toLabel = case Array.sub(labelRefs, toLabel) of [] => (* Not yet visited at all. *) let - val ExtendedBasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) + val BasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) val refs = case flow of ExitCode => [] | Unconditional lab => [lab] | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] | IndexedBr labs => labs | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val () = if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () in List.app (setReferences toLabel) refs end | refs => (* We've visiting this at least once. Just add us to the list. *) Array.update(labelRefs, toLabel, fromLabel :: refs) val _ = setReferences 0 0 (* Process the blocks. We keep the "stack" explicit rather than using recursion because this allows us to select both arms of a conditional branch sooner. *) fun genCode(toDo, lastFlow, code) = case List.filter (not o haveProcessed) toDo of [] => let (* There's nothing left to do. We may need to add a final branch to the end. *) val finalBranch = case lastFlow of ExitCode => [] | IndexedBr _ => [] | Unconditional dest => [(UnconditionalBranch(getBlockLabel dest))] | Conditional { condition, trueJump, falseJump, ...} => [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] | SetHandler { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] in finalBranch @ code (* Done. *) end | stillToDo as head :: _ => let local (* Check the references. If all the sources that lead up to this have already been we won't have any backward jumps. *) fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) val continuation = case lastFlow of ExitCode => NONE | IndexedBr _ => NONE (* We could put the last branch in here. *) | Unconditional dest => if not (haveProcessed dest) andalso available dest then SOME(FlowCodeSimple dest) else NONE | Conditional {trueJump, falseJump, condition, ...} => (* We can usually choose either destination and in nearly all cases it won't matter. The default branch is not to take forward jumps so if there is reason to believe that one branch is more likely we should follow that branch now and leave the other. If we have Cond(No)Overflow we assume that overflow is unusual. If one branch raises an exception we assume that that is unusual. *) let val (first, second) = case (condition, Vector.sub(blocks, falseJump)) of (CondNoOverflow, _) => (trueJump, falseJump) - | (_, ExtendedBasicBlock{ flow=ExitCode, block, ...}) => - if List.exists(fn{instr=RaiseExceptionPacket _, ...} => true | _ => false) block + | (_, BasicBlock{ flow=ExitCode, block, ...}) => + if List.exists(fn RaiseExceptionPacket _ => true | _ => false) block then (trueJump, falseJump) else (falseJump, trueJump) | _ => (falseJump, trueJump) in if not (haveProcessed first) andalso available first then SOME(FlowCodeSimple first) else if not (haveProcessed second) andalso available second then SOME(FlowCodeSimple second) else NONE end | SetHandler { continue, ... } => (* We want the continuation if possible. We'll need a branch round the handler so that won't help. *) if not (haveProcessed continue) andalso available continue then SOME(FlowCodeSimple continue) else NONE | UnconditionalHandle _ => NONE | ConditionalHandle _ => NONE in (* First choice - continue the existing block. Second choice - the first item whose sources have all been processed. Third choice - something from the list. *) val picked = case continuation of SOME c => c | NONE => case List.find available stillToDo of SOME c => FlowCodeSimple c | NONE => FlowCodeSimple head end in case picked of FlowCodeSimple picked => let val () = Array.update(processed, picked, true) (* Code to terminate the previous block. *) val startCode = case lastFlow of ExitCode => [] | IndexedBr _ => [] | UnconditionalHandle _ => [] | Unconditional dest => if dest = picked then [] else [(UnconditionalBranch(getBlockLabel dest))] | ConditionalHandle { continue, ...} => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | SetHandler { continue, ... } => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | Conditional { condition, trueJump, falseJump, ...} => if picked = falseJump (* Usual case. *) then [(ConditionalBranch(condition, getBlockLabel trueJump))] else if picked = trueJump then (* We have a jump to the true condition. Invert the jump. This is more than an optimisation. Because this immediately precedes the true block we're not going to generate a label. *) [(ConditionalBranch(invertTest condition, getBlockLabel falseJump))] else [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] (* Code-generate the body with the code we've done so far at the end. Add a label at the start if necessary. *) local (* If the previous block dropped through to this and this was the only reference then we don't need a label. *) fun onlyJumpingHere (lab: int) = if lab <> picked then false else case Array.sub(labelRefs, picked) of [singleton] => singleton = lab | _ => false val noLabel = case lastFlow of ExitCode => picked = 0 (* Unless this was the first block. *) | Unconditional dest => onlyJumpingHere dest | Conditional { trueJump, falseJump, ...} => onlyJumpingHere trueJump orelse onlyJumpingHere falseJump | IndexedBr _ => false | SetHandler _ => false | UnconditionalHandle _ => false | ConditionalHandle { continue, ...} => onlyJumpingHere continue in val startLabel = if noLabel then [] else [(SetLabel(getBlockLabel picked))] end - val ExtendedBasicBlock { flow, block, ...} = Vector.sub(blocks, picked) + val BasicBlock { flow, block, ...} = Vector.sub(blocks, picked) local - fun genCodeBlock({instr, ...}, code) = codeExtended {flow=flow} (instr, code) + fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code) in val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] in genCode(addSet @ stillToDo, flow, bodyCode) end | FlowCodeCMove{code, trueJump, falseJump} => let (* We've generated a conditional move and possibly a return. If the trueJump and falseJump are only ever referenced from this block they're done, otherwise we still need to do them. *) val _ = case Array.sub(labelRefs, trueJump) of [_] => Array.update(processed, trueJump, true) | _ => () val _ = case Array.sub(labelRefs, falseJump) of [_] => Array.update(processed, falseJump, true) | _ => () - val ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, trueJump) + val BasicBlock { flow, ...} = Vector.sub(blocks, trueJump) val addSet = case flow of ExitCode => [] | Unconditional dest => [dest] | _ => raise InternalError "FlowCodeCMove" in genCode(addSet @ stillToDo, flow, code) end end in val ops = genCode([0], ExitCode, [(SetLabel startOfFunctionLabel)]) end in generateFinalCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure, parameters=debugSwitches, profileObject=profileObject} end structure Sharing = struct - type intSet = intSet - and extendedBasicBlock = extendedBasicBlock - and regProperty = regProperty - and reg = reg - and closureRef = closureRef + type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock + and xReg = xReg + and vReg = vReg + and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML index 42c3aeeb..8a75e56d 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML @@ -1,332 +1,320 @@ (* Copyright David C. J. Matthews 2016-17, 2020-1 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeTransform( structure Arm64ICode: ARM64ICODE structure Debug: DEBUG structure Codegen: ARM64ICODEGENERATE structure Allocate: ARM64ALLOCATEREGISTERS structure Identify: ARM64IDENTIFYREFERENCES structure ConflictSets: ARM64ICODECONFLICTS structure PushRegisters: ARM64PUSHREGISTERS structure Optimise: ARM64ICODEOPTIMISE structure Pretty: PRETTY structure IntSet: INTSET sharing Arm64ICode.Sharing = Codegen.Sharing = Allocate.Sharing = Identify.Sharing = ConflictSets.Sharing = PushRegisters.Sharing = IntSet = Optimise.Sharing ) : ARM64ICODETRANSFORM = struct open Arm64ICode open Address open Identify open ConflictSets open PushRegisters open IntSet open Codegen open Allocate open Optimise exception InternalError = Misc.InternalError (* Find out the registers that need to be pushed to the stack, if any. We include those marked as "must push" because we need to save them across a function call or handler and also any we need to push because the set of active registers is more than the number of general registers we have. This second case involves choosing suitable registers and is a first attempt to check we have enough registers. We can also get a failure in codeExtended when we actually allocate the registers. *) fun spillRegisters(identified: extendedBasicBlock vector, regStates: regState vector) = let val maxPRegs = Vector.length regStates val pushArray = Array.array(maxPRegs, false) (* Mark anything already marked as "must push" unless it's already on the stack *) local fun checkPush(i, {pushState=true, ...}) = Array.update(pushArray, i, true) | checkPush _ = () in val () = Vector.appi checkPush regStates end (* Make a list of all the active sets ignoring those marked to be pushed. Do that first because we need to know how many sets each register is in. *) local fun addToActive(r, l) = ( case Vector.sub(regStates, r) of {prop=RegPropStack _, ...} => l | _ => if Array.sub(pushArray, r) then l else r :: l ) in fun nowActive regs = List.foldl addToActive [] regs end fun getBlockSets(ExtendedBasicBlock{block, passThrough, ...}, sets) = let fun getSets({active, ...}, l) = let val set = nowActive(setToList(union(active, passThrough))) in if List.length set > nGenRegs then set :: l else l end in List.foldl getSets sets block end val activeSets = Vector.foldl getBlockSets [] identified in if null activeSets then () else let (* See how many times each register appears in a set. *) val activeIn = Array.array(maxPRegs, 0) val () = List.app (fn regs => List.app(fn r => Array.update(activeIn, r, Array.sub(activeIn, r)+1)) regs) activeSets (* We want to choose the best registers to spill. *) fun spillSomeRegs activeSet = let (* We may have already marked some of these to push. *) val currentActive = nowActive activeSet val regCount = List.length currentActive fun addCosts r = let val {active, refs, prop, ...} = Vector.sub(regStates, r) in case prop of RegPropUntagged => (r, ~1, ~1) | RegPropStack _ => (r, ~1, ~1) | RegPropMultiple => (r, ~1, ~1) | _ => (r, Array.sub(activeIn, r), if refs = 0 then 0 else Int.quot(active, refs)) end val withCosts = List.map addCosts currentActive (* Order so that the earlier items are those that appear in more sets and if items appear in the same number of sets those that are active longer come earlier. *) fun compare (_, in1, a1) (_, in2, a2) = if in1 > in2 then true else if in1 < in2 then false else a1 > a2 val sorted = Misc.quickSort compare withCosts fun markAsPush([], _) = () | markAsPush((reg, _, _) :: regs, n) = if n <= 0 then () else let val {prop, ...} = Vector.sub(regStates, reg) val _ = case prop of RegPropStack _ => raise InternalError "markAsPush" | _ => () in Array.update(pushArray, reg, true); markAsPush(regs, n-1) end in markAsPush(sorted, regCount-nGenRegs) end in List.app spillSomeRegs activeSets end; (* Return the vector showing those that must be pushed. *) Array.vector pushArray end type triple = {instr: (xReg, xReg, vReg) arm64ICode, current: intSet, active: intSet} fun codeICodeFunctionToArm64{blocks, functionName, pregProps, ccCount, debugSwitches, resultClosure, profileObject, ...} = let (*val maxPRegs = Vector.length pregProps*) val icodeTabs = [8, 20, 60] val wantPrintCode = Debug.getParameter Debug.icodeTag debugSwitches - fun printCode identifiedCode = + fun printCode printCodeKind identifiedCode = (* Print the code before the transformation. *) let val printStream = Pretty.getSimplePrinter(debugSwitches, icodeTabs) in printStream(functionName ^ "\n"); - printICodeAbstract(identifiedCode, printStream); + printCodeKind(identifiedCode, printStream); printStream "\n" end fun printConflicts(regStates: conflictState vector) = let val printStream = Pretty.getSimplePrinter(debugSwitches, icodeTabs) fun printRegs([], _) = () | printRegs(_, 0) = printStream "..." | printRegs([i], _) = printStream(Int.toString i) | printRegs(i::l, n) = (printStream(Int.toString i ^ ","); printRegs(l, n-1)) fun printRegData(i, { conflicts, ... }) = ( printStream (Int.toString i ^ "\t"); printStream ("Conflicts="); printRegs(setToList conflicts, 20); printStream "\n" ) in Vector.appi printRegData regStates end - fun printRegisters(regAlloc: reg vector) = - let - val printStream = Pretty.getSimplePrinter(debugSwitches, icodeTabs) - fun regRepr (GenReg(XReg w)) = "X" ^ Int.toString(Word8.toInt w) - | regRepr (GenReg XZero) = "X0" - | regRepr (GenReg XSP) = "SP" - | regRepr (FPReg(VReg w)) = "V" ^ Int.toString(Word8.toInt w) - fun printRegAlloc(i, reg) = printStream (Int.toString i ^ "\t=> " ^ regRepr reg ^ "\n"); - in - Vector.appi printRegAlloc regAlloc - end - (* Limit the number of passes. *) val maxOptimisePasses = 30 val maxTotalPasses = maxOptimisePasses + 40 fun processCode(basicBlocks: (preg, pregOrZero, preg) basicBlock vector, pregProps: regProperty vector, maxStack, passes, optPasses) = let (* This should only require a few passes. *) val _ = passes < maxTotalPasses orelse raise InternalError "Too many passes" val () = if wantPrintCode - then printCode basicBlocks + then printCode printICodeAbstract basicBlocks else () (* First pass - identify register use patterns *) val (identified, regStates) = identifyRegisters(basicBlocks, pregProps) (* Try optimising. This may not do anything in which case we can continue with the original code otherwise we need to reprocess. *) val tryOpt = if optPasses < maxOptimisePasses then optimiseICode{code=identified, pregProps=pregProps, ccCount=ccCount, debugSwitches=debugSwitches} else Unchanged in case tryOpt of Changed (postOptimise, postOpProps) => processCode(postOptimise, postOpProps, maxStack, passes, optPasses+1) | Unchanged => let val regsToSpill = spillRegisters(identified, regStates) val needPhase2 = Vector.exists(fn t => t) regsToSpill val (needPhase2, regsToSpill) = if needPhase2 orelse passes <> 0 then (needPhase2, regsToSpill) else (true, Vector.tabulate(Vector.length pregProps, fn _ => false)) in if needPhase2 then let (* Push those registers we need to. This also adds and renumbers pregs and may add labels. *) val {code=postPushCode, pregProps=regPropsPhase2, maxStack=maxStackPhase2} = addRegisterPushes{code=identified, pushVec=regsToSpill, pregProps=pregProps, firstPass=passes=0} in (* And reprocess. *) processCode(postPushCode, regPropsPhase2, maxStackPhase2, passes+1, optPasses) end else let val maxPRegs = Vector.length regStates (* If we have been unable to allocate a register we need to spill something. Choose a single register from each conflict set. Since we've already checked that the active sets are small enough this is really only required to deal with special requirements e.g. esi/edi in block moves. *) fun spillFromConflictSets conflictSets = let val maxPRegs = Vector.length regStates val pushArray = Array.array(maxPRegs, false) fun selectARegisterToSpill active = let val regsToPick = setToList active in (* If we have already marked one of these to be pushed we don't need to do anything here. *) if List.exists (fn r => Array.sub(pushArray, r)) regsToPick then () else (* Choose something to push. *) let fun chooseReg([], bestReg, _) = bestReg | chooseReg(reg::regs, bestReg, bestCost) = let val {active, refs, prop, ...} = Vector.sub(regStates, reg) val cost = if refs = 0 then 0 else Int.quot(active, refs) in case prop of RegPropStack _ => chooseReg(regs, bestReg, bestCost) | RegPropCacheUntagged => reg (* Pick the first cache reg. *) | RegPropCacheTagged => reg (* Pick the first cache reg. *) | _ => if cost >= bestCost then chooseReg(regs, reg, active) else chooseReg(regs, bestReg, bestCost) end val choice = chooseReg(regsToPick, ~1, 0) val _ = choice >= 0 orelse raise InternalError "chooseReg" in Array.update(pushArray, choice, true) end end val () = List.app selectARegisterToSpill conflictSets in Array.vector pushArray end (* Now get the conflict sets. *) val conflictSets = getConflictStates(identified, maxPRegs) local fun mapFromExtended(ExtendedBasicBlock{block, flow, ...}) = BasicBlock{block=List.map #instr block, flow=flow} in val () = if wantPrintCode - then (printCode(Vector.map mapFromExtended identified); printConflicts conflictSets) + then (printCode printICodeAbstract (Vector.map mapFromExtended identified); printConflicts conflictSets) else () end in case allocateRegisters {blocks=identified, regStates=conflictSets, regProps=pregProps } of - AllocateSuccess allocatedRegs => + AllocateSuccess concreteCode => ( - if wantPrintCode then printRegisters allocatedRegs else (); - icodeToArm64Code{blocks=identified, functionName=functionName, allocatedRegisters=allocatedRegs, + if wantPrintCode then printCode printICodeConcrete concreteCode else (); + icodeToArm64Code{blocks=concreteCode, functionName=functionName, stackRequired=maxStack, debugSwitches=debugSwitches, resultClosure=resultClosure, profileObject=profileObject} ) | AllocateFailure fails => let val regsToSpill = spillFromConflictSets fails val {code=postPushCode, pregProps=pregPropsPhase2, maxStack=maxStackPhase2} = addRegisterPushes{code=identified, pushVec=regsToSpill, pregProps=pregProps, firstPass=false} in processCode(postPushCode, pregPropsPhase2, maxStackPhase2, passes+1, optPasses) end end end end in processCode(blocks, pregProps, 0 (* Should include handlers and containers. *), 0, 0) end structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and closureRef = closureRef and preg = preg and pregOrZero = pregOrZero end end;