diff --git a/RootArm64.ML b/RootArm64.ML index 95a798e8..4c94d26c 100644 --- a/RootArm64.ML +++ b/RootArm64.ML @@ -1,155 +1,153 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Compiler root file for Arm64. This gives the "use" instructions necessary to build the compiler and suitable for use with an IDE project file. It was constructed from the Poly/ML make files. *) PolyML.print_depth 1; PolyML.Compiler.reportUnreferencedIds := true; use "mlsource/MLCompiler/Address.ML"; use "mlsource/MLCompiler/Misc.ML"; use "mlsource/MLCompiler/HashTable.ML"; use "mlsource/MLCompiler/UniversalTable.ML"; use "mlsource/MLCompiler/StronglyConnected.sml"; use "mlsource/MLCompiler/StretchArray.ML"; use "mlsource/MLCompiler/STRUCTVALSIG.sml"; use "mlsource/MLCompiler/PRETTY.sig"; use "mlsource/MLCompiler/LEXSIG.sml"; use "mlsource/MLCompiler/SymbolsSig.sml"; use "mlsource/MLCompiler/COMPILERBODYSIG.sml"; use "mlsource/MLCompiler/DEBUG.sig"; use "mlsource/MLCompiler/MAKESIG.sml"; use "mlsource/MLCompiler/MAKE_.ML"; use "mlsource/MLCompiler/FOREIGNCALL.sig"; use "mlsource/MLCompiler/BUILTINS.sml"; use "mlsource/MLCompiler/CODETREE.sig"; use "mlsource/MLCompiler/STRUCT_VALS.ML"; use "mlsource/MLCompiler/CodeTree/BACKENDINTERMEDIATECODE.sig"; use "mlsource/MLCompiler/CodeTree/BASECODETREE.sig"; use "mlsource/MLCompiler/CodeTree/CODETREEFUNCTIONS.sig"; use "mlsource/MLCompiler/CodeTree/CODEARRAY.sig"; use "mlsource/MLCompiler/CodeTree/CODEGENTREE.sig"; use "mlsource/MLCompiler/CodeTree/GENCODE.sig"; use "mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeStaticLinkAndCases.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeCodegenConstantFunctions.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeLambdaLift.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeRemoveRedundant.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeSimplifier.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeOptimiser.ML"; use "mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML"; use "mlsource/MLCompiler/Pretty.sml"; use "mlsource/MLCompiler/CodeTree/CodeArray.ML"; use "mlsource/MLCompiler/Debug.ML"; use "mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml"; use "mlsource/MLCompiler/CodeTree/BaseCodeTree.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64SEQUENCES.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Sequences.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig"; use "mlsource/MLCompiler/CodeTree/INTSET.sig"; use "mlsource/MLCompiler/CodeTree/IntSet.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64IDENTIFYREFERENCES.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML"; -use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODECONFLICTS.sig"; -use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeConflicts.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PUSHREGISTERS.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEOPTIMISE.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeOptimise.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEGENERATE.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODETRANSFORM.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML"; use "mlsource/MLCompiler/CodeTree/GCode.arm64.ML"; use "mlsource/MLCompiler/CodeTree/ml_bind.ML"; use "mlsource/MLCompiler/StructVals.ML"; use "mlsource/MLCompiler/LEX_.ML"; use "mlsource/MLCompiler/Symbols.ML"; use "mlsource/MLCompiler/Lex.ML"; use "mlsource/MLCompiler/SymsetSig.sml"; use "mlsource/MLCompiler/DATATYPEREPSIG.sml"; use "mlsource/MLCompiler/VALUEOPSSIG.sml"; use "mlsource/MLCompiler/EXPORTTREESIG.sml"; use "mlsource/MLCompiler/STRUCTURESSIG.sml"; use "mlsource/MLCompiler/COMPILER_BODY.ML"; use "mlsource/MLCompiler/SymSet.ML"; use "mlsource/MLCompiler/TYPETREESIG.sml"; use "mlsource/MLCompiler/COPIERSIG.sml"; use "mlsource/MLCompiler/TYPEIDCODESIG.sml"; use "mlsource/MLCompiler/DATATYPE_REP.ML"; use "mlsource/MLCompiler/PRINTTABLESIG.sml"; use "mlsource/MLCompiler/VALUE_OPS.ML"; use "mlsource/MLCompiler/TYPE_TREE.ML"; use "mlsource/MLCompiler/UTILITIES_.ML"; use "mlsource/MLCompiler/Utilities.ML"; use "mlsource/MLCompiler/PRINT_TABLE.ML"; use "mlsource/MLCompiler/PrintTable.ML"; use "mlsource/MLCompiler/ExportTree.sml"; use "mlsource/MLCompiler/ExportTreeStruct.sml"; use "mlsource/MLCompiler/TypeTree.ML"; use "mlsource/MLCompiler/COPIER.sml"; use "mlsource/MLCompiler/CopierStruct.sml"; use "mlsource/MLCompiler/TYPEIDCODE.sml"; use "mlsource/MLCompiler/TypeIDCodeStruct.sml"; use "mlsource/MLCompiler/DatatypeRep.ML"; use "mlsource/MLCompiler/ValueOps.ML"; use "mlsource/MLCompiler/PARSETREESIG.sml"; use "mlsource/MLCompiler/SIGNATURESSIG.sml"; use "mlsource/MLCompiler/DEBUGGER.sig"; use "mlsource/MLCompiler/STRUCTURES_.ML"; use "mlsource/MLCompiler/DEBUGGER_.sml"; use "mlsource/MLCompiler/Debugger.sml"; use "mlsource/MLCompiler/ParseTree/BaseParseTreeSig.sml"; use "mlsource/MLCompiler/ParseTree/BASE_PARSE_TREE.sml"; use "mlsource/MLCompiler/ParseTree/PrintParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/PRINT_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/ExportParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/EXPORT_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/TypeCheckParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/TYPECHECK_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/MatchCompilerSig.sml"; use "mlsource/MLCompiler/ParseTree/MATCH_COMPILER.sml"; use "mlsource/MLCompiler/ParseTree/CodegenParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/CODEGEN_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/PARSE_TREE.ML"; use "mlsource/MLCompiler/ParseTree/ml_bind.ML"; use "mlsource/MLCompiler/SIGNATURES.sml"; use "mlsource/MLCompiler/SignaturesStruct.sml"; use "mlsource/MLCompiler/Structures.ML"; use "mlsource/MLCompiler/PARSE_DEC.ML"; use "mlsource/MLCompiler/SKIPS_.ML"; use "mlsource/MLCompiler/Skips.ML"; use "mlsource/MLCompiler/PARSE_TYPE.ML"; use "mlsource/MLCompiler/ParseType.ML"; use "mlsource/MLCompiler/ParseDec.ML"; use "mlsource/MLCompiler/CompilerBody.ML"; use "mlsource/MLCompiler/CompilerVersion.sml"; use "mlsource/MLCompiler/Make.ML"; use "mlsource/MLCompiler/INITIALISE_.ML"; use "mlsource/MLCompiler/Initialise.ML"; use "mlsource/MLCompiler/ml_bind.ML"; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig index c02b0c65..f8cf6e9a 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig @@ -1,56 +1,51 @@ (* Copyright David C. J. Matthews 2021 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ALLOCATEREGISTERS = sig type intSet and extendedBasicBlock and regProperty and reg and ('genReg, 'optGenReg, 'fpReg) basicBlock and xReg and vReg type address = Address.address type basicBlockConcrete = (xReg, xReg, vReg) basicBlock - - type conflictState = - { - conflicts: intSet, realConflicts: reg list - } datatype allocateResult = AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list val allocateRegisters : { blocks: extendedBasicBlock vector, - regStates: conflictState vector, + maxPRegs: int, regProps: regProperty vector } -> allocateResult val nGenRegs: int structure Sharing: sig type intSet = intSet and extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg and xReg = xReg and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODECONFLICTS.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODECONFLICTS.sig deleted file mode 100644 index 98f2ceb4..00000000 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODECONFLICTS.sig +++ /dev/null @@ -1,37 +0,0 @@ -(* - Copyright (c) 2021 David C.J. Matthews - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*) - -signature ARM64ICODECONFLICTS = -sig - type reg and preg and controlFlow and extendedBasicBlock - type intSet - - type conflictState = - { - conflicts: intSet, realConflicts: reg list - } - - val getConflictStates: extendedBasicBlock vector * int -> conflictState vector - - structure Sharing: - sig - type reg = reg - and preg = preg - and intSet = intSet - and extendedBasicBlock = extendedBasicBlock - end; -end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML index c2489b46..2ea77749 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML @@ -1,811 +1,993 @@ (* Copyright David C. J. Matthews 2016-21 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64AllocateRegisters( structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES - structure ConflictSets: ARM64ICODECONFLICTS structure IntSet: INTSET - sharing Arm64ICode.Sharing = Identify.Sharing = ConflictSets.Sharing = IntSet + sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ALLOCATEREGISTERS = struct open Arm64ICode open Identify - open ConflictSets open IntSet open Address exception InternalError = Misc.InternalError datatype allocateResult = AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list (* General registers. X24 is used as the global heap base in 32-in-64. X30 is the return address set by blr but is otherwise a general register. Put the argument registers at the end of the list so they'll only be used when hinted. *) val generalRegisters = map GenReg ([X9, X10, X11, X12, X13, X14, X15, X19, X20, X21, X22, X23, X0, X1, X2, X3, X4, X5, X6, X7, X8, X30] @ (if is32in64 then [] else [X24])) val floatingPtRegisters = map FPReg [V7, V6, V5, V4, V3, V2, V1] + type conflictState = + { + conflicts: intSet, realConflicts: reg list + } + + type triple = {instr: iCodeAbstract, current: intSet, active: intSet} + + exception InternalError = Misc.InternalError - fun allocateRegisters{blocks, regStates, regProps, ...} = + (* Get the conflict sets. This code was originally part of identifyRegisterState and + was split off. *) + fun getConflictStates (blocks: extendedBasicBlock vector, maxPRegs) = let - val maxPRegs = Vector.length regStates - and numBlocks = Vector.length blocks + (* Other registers that conflict with this i.e. cannot share the same + real register. *) + val regConflicts = Array.array(maxPRegs, emptySet) + (* Real registers that cannot be used for this because they are needed for + an instruction. Only X30 in calls and RTS traps. *) + and regRealConflicts = Array.array(maxPRegs, []: reg list) + + fun addConflictsTo(addTo, conflicts) = + List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo + + (* To reserve a register we need to add the real register to the + real conflict sets of all the abstract conflicts. *) + local + fun isInset reg set = List.exists (fn r => r = reg) set + in + fun reserveRegister(reserveFor, reg) = + let + fun reserveAReg r = + let + val absConflicts = Array.sub(regConflicts, r) + fun addConflict i = + if isInset i reserveFor then () else addRealConflict (i, reg) + in + List.app addConflict (setToList absConflicts) + end + in + List.app reserveAReg reserveFor + end + + and addRealConflict (i, reg) = + let + val currentConflicts = Array.sub(regRealConflicts, i) + in + if isInset reg currentConflicts + then () + else Array.update(regRealConflicts, i, reg :: currentConflicts) + end + end + + fun conflictsForInstr passThrough {instr, current, ...} = + let + val {sources, dests} = getInstructionRegisters instr + fun regNo(PReg i) = i + val destRegNos = map regNo dests + and sourceRegNos = map regNo sources + val destSet = listToSet destRegNos + val afterRemoveDests = minus(current, destSet) + + local + (* In almost all circumstances the destination and sources don't + conflict and the same register can be used as a destination and + a source. BoxLarge can only store the value after the memory + has been allocated. BitFieldInsert has to copy the "destAsSource" + value into the destination so cannot use the same register for + the "source". *) + val postInstruction = + case instr of + BoxLarge _ => destRegNos @ sourceRegNos + | BoxTagFloat _ => destRegNos @ sourceRegNos (* Not sure about this. *) + | BitFieldInsert{source, ...} => regNo source :: destRegNos + | _ => destRegNos + in + (* If there is more than one destination they conflict with each other. *) + val () = addConflictsTo(postInstruction, listToSet postInstruction); + (* Mark conflicts for the destinations, i.e. after the instruction. + The destinations conflict with the registers that are used + subsequently. *) + val () = addConflictsTo(postInstruction, current); + val () = addConflictsTo(postInstruction, passThrough); + + (* Mark conflicts for the sources i.e. before the instruction. *) + (* Sources must be set up as conflicts with each other i.e. when we + come to allocate registers we must choose different real registers + for different abstract registers. *) + val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos) + val () = addConflictsTo(sourceRegNos, afterRemoveDests); + val () = addConflictsTo(sourceRegNos, passThrough) + end + + (* I'm not sure if this is needed. There was a check in the old code to ensure that + different registers were used for loop variables even if they were actually unused. + This may happen anyway. + Comment and code copied from X86 version. Retain it for the moment. *) + val () = + case instr of + JumpLoop{regArgs, ...} => + let + val destRegs = List.foldl(fn ({dst=PReg loopReg, ...}, dests) => loopReg :: dests) [] regArgs + in + addConflictsTo(destRegs, listToSet destRegs); + addConflictsTo(destRegs, current); + addConflictsTo(destRegs, passThrough) + end + | _ => () + + (* Certain instructions are specific as to the real registers. *) + val () = + case instr of + ReturnResultFromFunction{ returnReg=PReg retReg, ... } => + (* We're going to put the return value in X0 so we can't use that for + the return address. *) + addRealConflict(retReg, GenReg X0) + + | RaiseExceptionPacket{ packetReg } => + (* This wasn't needed previously because we always pushed the registers + across an exception. *) + reserveRegister([regNo packetReg], GenReg X0) + + | BeginHandler { packetReg, ...} => + reserveRegister([regNo packetReg], GenReg X0) + + | FunctionCall { dest, regArgs, ...} => + (* This is only needed if we are saving the registers rather + than marking them as "must push". *) + let + val destReg = regNo dest + in + reserveRegister([destReg], GenReg X0); + (* The argument registers also conflict. In order to execute this call we need to load + the arguments into specific registers so we can't use them for values that we want after + the call. We use regNo dest here because that will conflict with everything + immediately afterwards. *) + List.app(fn (_, r) => reserveRegister([destReg], GenReg r)) regArgs; + (* Likewise X30 since that's the return address. *) + addRealConflict(destReg, GenReg X30) + end + + (* We can't use X30 as the result because it's needed for the return addr if we have to GC. *) + | AllocateMemoryFixed{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) + | AllocateMemoryVariable{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) + + | _ => () + in + () + end + + (* Process the block. *) + fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) = + let + (* We need to establish conflicts between all the registers active at + the end of the block since they may not be established elsewhere. + This isn't necessary for an unconditional branch since the + same registers will be included in the block that is the target + of the branch, possibly along with others. However if this is + a conditional or indexed branch we may have different sets at + each of the targets and we have to ensure that all the registers + differ. *) + val united = union(exports, passThrough) + val () = addConflictsTo(setToList united, united) + + val () = List.app (conflictsForInstr passThrough) block + in + () + end + + + val () = Vector.app conflictsForBlock blocks + + val conflictState: conflictState vector = + Vector.tabulate(maxPRegs, + fn i => { + conflicts = Array.sub(regConflicts, i), + realConflicts = Array.sub(regRealConflicts, i) + } + ) + in + conflictState + end + + (* Get the conflict states, allocate registers and return the code with the allocated + registers if it is successful. *) + fun allocateRegisters{blocks, regProps, maxPRegs, ...} = + let + val regStates = getConflictStates(blocks, maxPRegs) + + val numBlocks = Vector.length blocks (* Hint values. The idea of hints is that by using a hinted register we may avoid an unnecessary move instruction. realHints is set when a pseudo-register is going to be loaded from a specific register e.g. a register argument, or moved into one e.g. ecx for a shift. friends is set to the other pseudo-registers that may be associated with the pseudo-register. E.g. the argument and destination of an arithmetic operation where choosing the same register for each may avoid a move. *) val realHints = Array.array(maxPRegs, NONE: reg option) (* Sources and destinations. These indicate the registers that are the sources and destinations of the indexing register and are used as hints. If a register has been allocated for a source or destination we may be able to reuse it. *) val sourceRegs = Array.array(maxPRegs, []: int list) and destinationRegs = Array.array(maxPRegs, []: int list) local fun addRealHint(r, reg) = case Array.sub(realHints, r) of NONE => Array.update(realHints, r, SOME reg) | SOME _ => () fun addSourceAndDestinationHint{src, dst} = let val {conflicts, ...} = Vector.sub(regStates, src) in (* If they conflict we can't add them. *) if member(dst, conflicts) then () else let val currentDests = Array.sub(destinationRegs, src) val currentSources = Array.sub(sourceRegs, dst) in if List.exists(fn i => i=dst) currentDests then () else Array.update(destinationRegs, src, dst :: currentDests); if List.exists(fn i => i=src) currentSources then () else Array.update(sourceRegs, dst, src :: currentSources) end end in (* Add the hints to steer the register allocation. The idea is to avoid moves between registers by getting values into the appropriate register in advance. *) fun addHints{instr=MoveRegister{source=PReg sreg, dest=PReg dreg, ...}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints{instr=BitFieldInsert{destAsSource=PReg dsReg, dest=PReg dReg, ...}, ...} = (* The "destAsSource" is the destination if some bits are retained. *) addSourceAndDestinationHint {src=dsReg, dst=dReg} | addHints{instr=ReturnResultFromFunction { resultReg=PReg resReg, returnReg=PReg retReg, ... }, ...} = ( addRealHint(resReg, GenReg X0); addRealHint(retReg, GenReg X30) (* It may be there from earlier. *) ) | addHints{instr=JumpLoop{regArgs, ...}, ...} = let fun addRegArg {src=ArgInReg(PReg argReg), dst=PReg resReg} = addSourceAndDestinationHint {dst=resReg, src=argReg} | addRegArg {src=ArgOnStack _, ...} = () in List.app addRegArg regArgs end | addHints{instr=BeginFunction{regArgs, ...}, ...} = List.app (fn (PReg pr, reg) => addRealHint(pr, GenReg reg)) regArgs | addHints{instr=TailRecursiveCall{regArgs, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app setHint regArgs end | addHints{instr=FunctionCall{regArgs, dest=PReg dreg, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in addRealHint(dreg, GenReg X0); List.app setHint regArgs end (* Exception packets are in X0 *) | addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints{instr=BeginHandler{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints _ = () end val allocatedRegs = Array.array(maxPRegs, NONE: reg option) val failures = ref []: intSet list ref (* Find a real register for a preg. 1. If a register is already allocated use that. 2. Try the "preferred" register if one has been given. 3. Try the realHints value if there is one. 4. See if there is a "friend" that has an appropriate register 5. Look at all the registers and find one. *) fun findRegister(r, pref, regSet) = case Array.sub(allocatedRegs, r) of SOME reg => reg | NONE => let val {conflicts, realConflicts, ...} = Vector.sub(regStates, r) (* Find the registers we've already allocated that may conflict. *) val conflictingRegs = List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @ realConflicts fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs) fun tryAReg NONE = NONE | tryAReg (somePref as SOME prefReg) = if isFree prefReg then (Array.update(allocatedRegs, r, somePref); somePref) else NONE fun findAReg [] = ( (* This failed. We're going to have to spill something. *) failures := conflicts :: ! failures; hd regSet (* Return a register to satisfy everything. *) ) | findAReg (reg::regs) = if isFree reg then (Array.update(allocatedRegs, r, SOME reg); reg) else findAReg regs (* Search the sources and destinations to see if a register has already been allocated or there is a hint. *) fun findAFriend([], [], _) = NONE | findAFriend(aDest :: otherDests, sources, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aDest) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aDest)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the destinations of the destinations to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest)) in findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen) end end | findAFriend([], aSrc :: otherSrcs, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aSrc) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aSrc)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the sources of the sources to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc)) in findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen) end end (* See if there is a friend that has a register already or a hint. Friends are registers that don't conflict and can possibly avoid an extra move. *) (* fun findAFriend([], _) = NONE | findAFriend(friend :: tail, old) = let val possReg = case Array.sub(allocatedRegs, friend) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, friend)) in case possReg of reg as SOME _ => reg | NONE => let (* Add a friend of a friend to the list if we haven't already seen it and it doesn't conflict. *) fun newFriend f = not(List.exists (fn n => n=f) old) andalso not(List.exists (fn n => n=f) conflicts) val fOfF = List.filter newFriend (Array.sub(friends, friend)) in findAFriend(tail @ fOfF, friend :: old) end end*) in case tryAReg pref of SOME r => r | NONE => ( case tryAReg (Array.sub(realHints, r)) of SOME r => r | NONE => ( case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of SOME r => r (* Look through the registers to find one that's free. *) | NONE => findAReg regSet ) ) end fun allocateRegister args = ignore(findRegister args) val allocateFindRegister = findRegister fun allocateGenReg(PReg r) = allocateRegister(r, NONE, generalRegisters) and allocateFloatReg(PReg r) = allocateRegister(r, NONE, floatingPtRegisters) and allocateOptGenReg(SomeReg reg) = allocateGenReg reg | allocateOptGenReg ZeroReg = () val allocateGenRegs = List.app allocateGenReg and allocateFloatRegs = List.app allocateFloatReg fun registerAllocate{instr=MoveRegister{source=PReg sreg, dest=PReg dreg}, ...} = let val realDestReg = findRegister(dreg, NONE, generalRegisters) in allocateRegister(sreg, SOME realDestReg, generalRegisters) end | registerAllocate{instr=LoadNonAddressConstant{dest, ...}, ...} = allocateGenReg dest | registerAllocate{instr=LoadAddressConstant{dest, ...}, ...} = allocateGenReg dest | registerAllocate{instr=LoadWithConstantOffset{dest, base, ...}, ...} = ( allocateGenReg dest; allocateGenReg base ) | registerAllocate{instr=LoadFPWithConstantOffset{dest, base, ...}, ...} = ( allocateFloatReg dest; allocateGenReg base ) | registerAllocate{instr=LoadWithIndexedOffset{dest, base, index, ...}, ...} = ( allocateGenReg dest; allocateGenRegs[base, index] ) | registerAllocate{instr=LoadFPWithIndexedOffset{dest, base, index, ...}, ...} = ( allocateFloatReg dest; allocateGenRegs[base, index] ) | registerAllocate{instr=GetThreadId { dest, ...}, ...} = allocateGenReg dest | registerAllocate{instr=ObjectIndexAddressToAbsolute{dest, source, ...}, ...} = allocateGenRegs[dest, source] | registerAllocate{instr=AbsoluteToObjectIndex{dest, source, ...}, ...} = allocateGenRegs[dest, source] | registerAllocate({instr=AllocateMemoryFixed{ dest, saveRegs, ...}, ...}) = allocateGenRegs (dest :: saveRegs) | registerAllocate({instr=AllocateMemoryVariable{ size, dest, saveRegs, ...}, ...}) = allocateGenRegs (size :: dest :: saveRegs) | registerAllocate({instr=InitialiseMem{ size, addr, init}, ...}) = allocateGenRegs [size, addr, init] | registerAllocate{instr=BeginLoop, ...} = () | registerAllocate({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}) = ( List.app (fn {src=ArgInReg sreg, dst} => (allocateGenReg sreg; allocateGenReg dst) | _ => ()) regArgs; List.app (fn {src=ArgInReg sreg, ...} => allocateGenReg sreg | _ => ()) stackArgs; case checkInterrupt of SOME regs => List.app(fn reg => allocateGenReg reg) regs | NONE => () ) | registerAllocate{instr=StoreWithConstantOffset{source, base, ...}, ...} = allocateGenRegs[source, base] | registerAllocate{instr=StoreFPWithConstantOffset{source, base, ...}, ...} = ( allocateFloatReg source; allocateGenReg base ) | registerAllocate{instr=StoreWithIndexedOffset{source, base, index, ...}, ...} = allocateGenRegs[source, base, index] | registerAllocate{instr=StoreFPWithIndexedOffset{source, base, index, ...}, ...} = ( allocateFloatReg source; allocateGenRegs[base, index] ) | registerAllocate{instr=AddSubImmediate{ source, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenReg source ) | registerAllocate{instr=AddSubRegister{ base, shifted, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenRegs[base, shifted] ) | registerAllocate{instr=LogicalImmediate{ source, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenReg source ) | registerAllocate{instr=LogicalRegister{ base, shifted, dest, ... }, ...} = ( allocateOptGenReg dest; allocateGenRegs[base, shifted] ) | registerAllocate{instr=ShiftRegister{ dest, source, shift, ... }, ...} = allocateGenRegs[dest, source, shift] | registerAllocate{instr=Multiplication{ dest, sourceA, sourceM, sourceN, ... }, ...} = (allocateGenRegs[dest, sourceM, sourceN]; allocateOptGenReg sourceA) | registerAllocate{instr=Division{ dest, dividend, divisor, ... }, ...} = allocateGenRegs[dest, dividend, divisor] | registerAllocate{instr=BeginFunction{regArgs, ...}, ...} = (* Check that every argument has a register allocated including any that are unused. Unused arguments should be discarded at a higher level because we could allocate a different register and copy the argument register only to discard it. *) allocateGenRegs(List.map #1 regArgs) | registerAllocate({instr=TailRecursiveCall{regArgs=regArgs, stackArgs=stackArgs, ...}, ...}) = let fun allocateRegArg(ArgInReg argReg, _) = allocateGenReg argReg | allocateRegArg _ = () in (* We've already hinted the arguments but we want to allocate these first to reduce the chance that they'll be used for stack arguments. *) List.app allocateRegArg regArgs; List.app (fn {src=ArgInReg argReg, ...} => allocateGenReg argReg | _ => ()) stackArgs end | registerAllocate({instr=FunctionCall{regArgs=regArgs, stackArgs=stackArgs, dest=PReg dReg, saveRegs, ...}, ...}) = let fun allocateRegArg(ArgInReg argReg, _) = allocateGenReg argReg | allocateRegArg _ = () in (* We've already hinted the arguments but we want to allocate these first to reduce the chance that they'll be used for stack arguments. *) List.app allocateRegArg regArgs; List.app (fn ArgInReg argReg => allocateGenReg argReg | _ => ()) stackArgs; allocateGenRegs saveRegs; (* Result will be in X0. *) allocateRegister(dReg, SOME(GenReg X0), [GenReg X0]) end | registerAllocate{instr=ReturnResultFromFunction { resultReg=PReg resReg, returnReg, ... }, ...} = ( allocateRegister(resReg, SOME(GenReg X0), [GenReg X0] (* It MUST be in this register *)); allocateGenReg returnReg ) | registerAllocate{instr=RaiseExceptionPacket{packetReg}, ...} = allocateGenReg packetReg | registerAllocate{instr=PushToStack{ source, ... }, ...} = allocateGenReg source | registerAllocate{instr=LoadStack{ dest, ... }, ...} = allocateGenReg dest | registerAllocate{instr=StoreToStack{ source, ... }, ...} = allocateGenReg source | registerAllocate{instr=ContainerAddress{ dest, ... }, ...} = allocateGenReg dest | registerAllocate{instr=ResetStackPtr _, ...} = () | registerAllocate({instr=TagValue{source, dest, ...}, ...}) = allocateGenRegs[source, dest] | registerAllocate({instr=UntagValue{source, dest, ...}, ...}) = allocateGenRegs[source, dest] | registerAllocate({instr=BoxLarge{source, dest, saveRegs}, ...}) = (allocateGenRegs saveRegs; allocateGenRegs[source, dest]) | registerAllocate({instr=UnboxLarge{source, dest}, ...}) = allocateGenRegs[source, dest] | registerAllocate({instr=BoxTagFloat{source, dest, saveRegs, ...}, ...}) = ( allocateGenRegs saveRegs; allocateFloatReg source; allocateGenReg dest ) | registerAllocate({instr=UnboxTagFloat{source, dest, ...}, ...}) = ( allocateFloatReg dest; allocateGenReg source ) | registerAllocate{instr=LoadAcquire{dest, base, ...}, ...} = allocateGenRegs[dest, base] | registerAllocate{instr=StoreRelease{source, base, ...}, ...} = allocateGenRegs[source, base] | registerAllocate{instr=BitFieldShift{ source, dest, ... }, ...} = allocateGenRegs[source, dest] | registerAllocate{instr=BitFieldInsert{ source, dest, destAsSource, ... }, ...} = allocateGenRegs[source, destAsSource, dest] | registerAllocate({instr=IndexedCaseOperation{testReg}, ...}) = allocateGenReg testReg | registerAllocate({instr=PushExceptionHandler, ...}) = () | registerAllocate({instr=PopExceptionHandler, ...}) = () | registerAllocate({instr=BeginHandler{packetReg}, ...}) = allocateGenReg packetReg | registerAllocate({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ...}, ...}) = allocateGenRegs[vec1Addr, vec2Addr, length] | registerAllocate({instr=BlockMove{srcAddr, destAddr, length, ...}, ...}) = allocateGenRegs[srcAddr, destAddr, length] | registerAllocate({instr=AddSubXSP{source, dest, ...}, ...}) = ( allocateOptGenReg dest; allocateGenReg source ) | registerAllocate({instr=TouchValue{source, ...}, ...}) = allocateGenReg source | registerAllocate({instr=LoadAcquireExclusive{ base, dest }, ...}) = allocateGenRegs[base, dest] | registerAllocate({instr=StoreReleaseExclusive{ base, source, result }, ...}) = ( allocateGenRegs[base, result]; allocateOptGenReg source ) | registerAllocate({instr=MemoryBarrier, ...}) = () | registerAllocate({instr=ConvertIntToFloat{ source, dest, ...}, ...}) = (allocateFloatReg dest; allocateGenReg source) | registerAllocate({instr=ConvertFloatToInt{ source, dest, ...}, ...}) = (allocateGenReg dest; allocateFloatReg source) | registerAllocate({instr=UnaryFloatingPt{ source, dest, ...}, ...}) = allocateFloatRegs[source, dest] | registerAllocate({instr=BinaryFloatingPoint{ arg1, arg2, dest, ...}, ...}) = allocateFloatRegs[arg1, arg2, dest] | registerAllocate({instr=CompareFloatingPoint{ arg1, arg2, ...}, ...}) = allocateFloatRegs[arg1, arg2] (* Depth-first scan. *) val visited = Array.array(numBlocks, false) fun processBlocks blockNo = if Array.sub(visited, blockNo) then () (* Done or currently being done. *) else let val () = Array.update(visited, blockNo, true) val ExtendedBasicBlock { flow, block, passThrough, exports, ...} = Vector.sub(blocks, blockNo) (* Add the hints for this block before the actual allocation of registers. *) val _ = List.app addHints block val () = (* Process the dependencies first. *) case flow of ExitCode => () | Unconditional m => processBlocks m | Conditional {trueJump, falseJump, ...} => (processBlocks trueJump; processBlocks falseJump) | IndexedBr cases => List.app processBlocks cases | SetHandler{ handler, continue } => (processBlocks handler; processBlocks continue) | UnconditionalHandle _ => () | ConditionalHandle { continue, ...} => processBlocks continue (* Now this block. *) local (* We assume that anything used later will have been allocated a register. This is generally true except for a loop where the use may occur earlier. *) val exported = setToList passThrough @ setToList exports fun findAReg r = case Vector.sub(regProps, r) of RegPropStack _ => () | _ => ignore(allocateFindRegister(r, NONE, generalRegisters)) in val () = List.app findAReg exported end in List.foldr(fn (c, ()) => registerAllocate c) () block end (* Turn the abstract icode into a concrete version by allocating the registers. *) local fun getAllocatedReg(PReg r) = getOpt(Array.sub(allocatedRegs, r), GenReg XZero) fun getAllocatedGenReg r = case getAllocatedReg r of GenReg r => r | FPReg _ => raise InternalError "getAllocateGenReg: returned FP Reg" and getAllocatedFPReg r = case getAllocatedReg r of FPReg r => r | GenReg _ => raise InternalError "getAllocatedFPReg: returned Gen Reg" fun getAllocatedGenRegOrZero ZeroReg = XZero | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg fun getAllocatedArg(ArgInReg reg) = ArgInReg(getAllocatedGenReg reg) | getAllocatedArg(ArgOnStack stackLoc) = ArgOnStack stackLoc val getSaveRegs = List.map getAllocatedGenReg fun absToConcrete({instr=MoveRegister{ source, dest}, ...}): iCodeConcrete = MoveRegister { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} | absToConcrete({instr=LoadNonAddressConstant { dest, source}, ...}) = LoadNonAddressConstant { dest=getAllocatedGenReg dest, source=source} | absToConcrete({instr=LoadAddressConstant { dest, source}, ...}) = LoadAddressConstant { dest=getAllocatedGenReg dest, source=source} | absToConcrete({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...}) = LoadWithConstantOffset { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest, byteOffset=byteOffset, loadType=loadType} | absToConcrete({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...}) = LoadFPWithConstantOffset { base=getAllocatedGenReg base, dest=getAllocatedFPReg dest, byteOffset=byteOffset, floatSize=floatSize} | absToConcrete({instr=LoadWithIndexedOffset { base, dest, index, loadType}, ...}) = LoadWithIndexedOffset { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest, index=getAllocatedGenReg index, loadType=loadType} | absToConcrete({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize}, ...}) = LoadFPWithIndexedOffset { base=getAllocatedGenReg base, dest=getAllocatedFPReg dest, index=getAllocatedGenReg index, floatSize=floatSize} | absToConcrete({instr=GetThreadId { dest}, ...}) = GetThreadId { dest=getAllocatedGenReg dest} | absToConcrete({instr=ObjectIndexAddressToAbsolute { source, dest}, ...}) = ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} | absToConcrete({instr=AbsoluteToObjectIndex { source, dest}, ...}) = AbsoluteToObjectIndex { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} | absToConcrete({instr=AllocateMemoryFixed { bytesRequired, dest, saveRegs }, ...}) = AllocateMemoryFixed { dest=getAllocatedGenReg dest, bytesRequired=bytesRequired, saveRegs=getSaveRegs saveRegs} | absToConcrete({instr=AllocateMemoryVariable{size, dest, saveRegs}, ...}) = AllocateMemoryVariable{size=getAllocatedGenReg size, dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs} | absToConcrete({instr=InitialiseMem{size, addr, init}, ...}) = InitialiseMem{size=getAllocatedGenReg size, addr=getAllocatedGenReg addr, init=getAllocatedGenReg init} | absToConcrete({instr=BeginLoop, ...}) = BeginLoop | absToConcrete({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}) = let fun getStackArg{src, wordOffset, stackloc} = {src=getAllocatedArg src, wordOffset=wordOffset, stackloc=stackloc} and getRegArg{src, dst} = {src=getAllocatedArg src, dst=getAllocatedGenReg dst} in JumpLoop{ regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, checkInterrupt=Option.map getSaveRegs checkInterrupt} end | absToConcrete({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...}) = StoreWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, byteOffset=byteOffset, loadType=loadType} | absToConcrete({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...}) = StoreFPWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, byteOffset=byteOffset, floatSize=floatSize} | absToConcrete({instr=StoreWithIndexedOffset { base, source, index, loadType}, ...}) = StoreWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, index=getAllocatedGenReg index, loadType=loadType} | absToConcrete({instr=StoreFPWithIndexedOffset { base, source, index, floatSize}, ...}) = StoreFPWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, index=getAllocatedGenReg index, floatSize=floatSize} | absToConcrete({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...}) = AddSubImmediate { source=getAllocatedGenReg source, dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length} | absToConcrete({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...}) = AddSubRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift} | absToConcrete({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...}) = LogicalImmediate { source=getAllocatedGenReg source, dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, immed=immed, logOp=logOp, length=length} | absToConcrete({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...}) = LogicalRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=getAllocatedGenRegOrZero dest, ccRef=ccRef, logOp=logOp, length=length, shift=shift} | absToConcrete({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...}) = ShiftRegister { source=getAllocatedGenReg source, shift=getAllocatedGenReg shift, dest=getAllocatedGenReg dest, direction=direction, opSize=opSize} | absToConcrete({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...}) = Multiplication { kind=kind, sourceA=getAllocatedGenRegOrZero sourceA, sourceM=getAllocatedGenReg sourceM, sourceN=getAllocatedGenReg sourceN, dest=getAllocatedGenReg dest} | absToConcrete({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...}) = Division { isSigned=isSigned, dividend=getAllocatedGenReg dividend, divisor=getAllocatedGenReg divisor, dest=getAllocatedGenReg dest, opSize=opSize} | absToConcrete({instr=BeginFunction {regArgs, stackArgs}, ...}) = BeginFunction {regArgs=map (fn (src, dst) => (getAllocatedGenReg src, dst)) regArgs, stackArgs=stackArgs} | absToConcrete({instr=FunctionCall{callKind, regArgs, stackArgs, dest, containers, saveRegs, ...}, ...}) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) in FunctionCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getAllocatedArg stackArgs, dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs, containers=containers} end | absToConcrete({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, ...}) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) and getStackArg{src, stack} = {src=getAllocatedArg src, stack=stack} in TailRecursiveCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, stackAdjust=stackAdjust, currStackSize=currStackSize} end | absToConcrete({instr=ReturnResultFromFunction{resultReg, returnReg, numStackArgs}, ...}) = ReturnResultFromFunction{resultReg=getAllocatedGenReg resultReg, returnReg=getAllocatedGenReg returnReg, numStackArgs=numStackArgs} | absToConcrete({instr=RaiseExceptionPacket{packetReg}, ...}) = RaiseExceptionPacket{packetReg=getAllocatedGenReg packetReg} | absToConcrete({instr=PushToStack{ source, container, copies }, ...}) = PushToStack{source=getAllocatedGenReg source, container=container, copies=copies} | absToConcrete({instr=LoadStack{ dest, container, field, wordOffset}, ...}) = LoadStack{ dest=getAllocatedGenReg dest, container=container, field=field, wordOffset=wordOffset } | absToConcrete({instr=StoreToStack{source, container, field, stackOffset}, ...}) = StoreToStack{source=getAllocatedGenReg source, container=container, field=field, stackOffset=stackOffset} | absToConcrete({instr=ContainerAddress{ dest, container, stackOffset}, ...}) = ContainerAddress{ dest=getAllocatedGenReg dest, container=container, stackOffset=stackOffset } | absToConcrete({instr=ResetStackPtr {numWords}, ...}) = ResetStackPtr {numWords=numWords} | absToConcrete({instr=TagValue{source, dest, isSigned, opSize}, ...}) = TagValue{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, isSigned=isSigned, opSize=opSize} | absToConcrete({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...}) = UntagValue{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, isSigned=isSigned, opSize=opSize} | absToConcrete({instr=BoxLarge{source, dest, saveRegs, ...}, ...}) = BoxLarge{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs} | absToConcrete({instr=UnboxLarge{source, dest}, ...}) = UnboxLarge{source=getAllocatedGenReg source, dest=getAllocatedGenReg dest} | absToConcrete({instr=BoxTagFloat{floatSize, source, dest, saveRegs}, ...}) = BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, dest=getAllocatedGenReg dest, saveRegs=getSaveRegs saveRegs} | absToConcrete({instr=UnboxTagFloat{floatSize, source, dest}, ...}) = UnboxTagFloat{floatSize=floatSize, source=getAllocatedGenReg source, dest=getAllocatedFPReg dest} | absToConcrete({instr=LoadAcquire { base, dest, loadType}, ...}) = LoadAcquire { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest, loadType=loadType} | absToConcrete({instr=StoreRelease { base, source, loadType}, ...}) = StoreRelease{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, loadType=loadType} | absToConcrete({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...}) = BitFieldShift { source=getAllocatedGenReg source, dest=getAllocatedGenReg dest, isSigned=isSigned, immr=immr, imms=imms, length=length} | absToConcrete({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...}) = BitFieldInsert { source=getAllocatedGenReg source, destAsSource=getAllocatedGenReg destAsSource, dest=getAllocatedGenReg dest, immr=immr, imms=imms, length=length} | absToConcrete({instr=IndexedCaseOperation{testReg}, ...}) = IndexedCaseOperation{testReg=getAllocatedGenReg testReg} | absToConcrete({instr=PushExceptionHandler, ...}) = PushExceptionHandler | absToConcrete({instr=PopExceptionHandler, ...}) = PopExceptionHandler | absToConcrete({instr=BeginHandler{packetReg}, ...}) = BeginHandler{packetReg=getAllocatedGenReg packetReg} | absToConcrete({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...}) = CompareByteVectors{vec1Addr=getAllocatedGenReg vec1Addr, vec2Addr=getAllocatedGenReg vec2Addr, length=getAllocatedGenReg length, ccRef=ccRef} | absToConcrete({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...}) = BlockMove{srcAddr=getAllocatedGenReg srcAddr, destAddr=getAllocatedGenReg destAddr, length=getAllocatedGenReg length, isByteMove=isByteMove} | absToConcrete({instr=AddSubXSP{source, dest, isAdd}, ...}) = AddSubXSP { source=getAllocatedGenReg source, dest=getAllocatedGenRegOrZero dest, isAdd=isAdd} | absToConcrete({instr=TouchValue{source}, ...}) = TouchValue { source=getAllocatedGenReg source} | absToConcrete({instr=LoadAcquireExclusive{ base, dest }, ...}) = LoadAcquireExclusive { base=getAllocatedGenReg base, dest=getAllocatedGenReg dest} | absToConcrete({instr=StoreReleaseExclusive{ base, source, result }, ...}) = StoreReleaseExclusive{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source, result=getAllocatedGenReg result} | absToConcrete({instr=MemoryBarrier, ...}) = MemoryBarrier | absToConcrete({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...}) = ConvertIntToFloat{ source=getAllocatedGenReg source, dest=getAllocatedFPReg dest, srcSize=srcSize, destSize=destSize} | absToConcrete({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...}) = ConvertFloatToInt{ source=getAllocatedFPReg source, dest=getAllocatedGenReg dest, srcSize=srcSize, destSize=destSize, rounding=rounding} | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp}, ...}) = UnaryFloatingPt{ source=getAllocatedFPReg source, dest=getAllocatedFPReg dest, fpOp=fpOp} | absToConcrete({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...}) = BinaryFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, dest=getAllocatedFPReg dest, fpOp=fpOp, opSize=opSize} | absToConcrete({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...}) = CompareFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, opSize=opSize, ccRef=ccRef} in fun concreteBlock(ExtendedBasicBlock{ block, flow, ...}) = BasicBlock{block=map absToConcrete block, flow=flow} end in processBlocks 0; (* If the failures list is empty we succeeded. *) case !failures of [] => (* Return the allocation vector. We may have unused registers, *) AllocateSuccess(Vector.map concreteBlock blocks) (* Else we'll have to spill something. *) | l => AllocateFailure l end val nGenRegs = List.length generalRegisters structure Sharing = struct type intSet = intSet and extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg and xReg = xReg and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeConflicts.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeConflicts.ML deleted file mode 100644 index d7cd0fe0..00000000 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeConflicts.ML +++ /dev/null @@ -1,218 +0,0 @@ -(* - Copyright (c) 2021 David C.J. Matthews - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - License version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. - - You should have received a copy of the GNU Lesser General Public - License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*) - -functor Arm64ICodeConflicts( - structure Arm64ICode: ARM64ICODE - structure IntSet: INTSET - structure Identify: ARM64IDENTIFYREFERENCES - sharing Arm64ICode.Sharing = Identify.Sharing = IntSet -): ARM64ICODECONFLICTS = -struct - open Arm64ICode - open IntSet - open Identify - - type conflictState = - { - conflicts: intSet, realConflicts: reg list - } - - type triple = {instr: iCodeAbstract, current: intSet, active: intSet} - - exception InternalError = Misc.InternalError - - (* Get the conflict sets. This code was originally part of identifyRegisterState and - was split off. *) - fun getConflictStates (blocks: extendedBasicBlock vector, maxPRegs) = - let - (* Other registers that conflict with this i.e. cannot share the same - real register. *) - val regConflicts = Array.array(maxPRegs, emptySet) - (* Real registers that cannot be used for this because they are needed for - an instruction. Only X30 in calls and RTS traps. *) - and regRealConflicts = Array.array(maxPRegs, []: reg list) - - fun addConflictsTo(addTo, conflicts) = - List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo - - (* To reserve a register we need to add the real register to the - real conflict sets of all the abstract conflicts. *) - local - fun isInset reg set = List.exists (fn r => r = reg) set - in - fun reserveRegister(reserveFor, reg) = - let - fun reserveAReg r = - let - val absConflicts = Array.sub(regConflicts, r) - fun addConflict i = - if isInset i reserveFor then () else addRealConflict (i, reg) - in - List.app addConflict (setToList absConflicts) - end - in - List.app reserveAReg reserveFor - end - - and addRealConflict (i, reg) = - let - val currentConflicts = Array.sub(regRealConflicts, i) - in - if isInset reg currentConflicts - then () - else Array.update(regRealConflicts, i, reg :: currentConflicts) - end - end - - fun conflictsForInstr passThrough {instr, current, ...} = - let - val {sources, dests} = getInstructionRegisters instr - fun regNo(PReg i) = i - val destRegNos = map regNo dests - and sourceRegNos = map regNo sources - val destSet = listToSet destRegNos - val afterRemoveDests = minus(current, destSet) - - local - (* In almost all circumstances the destination and sources don't - conflict and the same register can be used as a destination and - a source. BoxLarge can only store the value after the memory - has been allocated. BitFieldInsert has to copy the "destAsSource" - value into the destination so cannot use the same register for - the "source". *) - val postInstruction = - case instr of - BoxLarge _ => destRegNos @ sourceRegNos - | BoxTagFloat _ => destRegNos @ sourceRegNos (* Not sure about this. *) - | BitFieldInsert{source, ...} => regNo source :: destRegNos - | _ => destRegNos - in - (* If there is more than one destination they conflict with each other. *) - val () = addConflictsTo(postInstruction, listToSet postInstruction); - (* Mark conflicts for the destinations, i.e. after the instruction. - The destinations conflict with the registers that are used - subsequently. *) - val () = addConflictsTo(postInstruction, current); - val () = addConflictsTo(postInstruction, passThrough); - - (* Mark conflicts for the sources i.e. before the instruction. *) - (* Sources must be set up as conflicts with each other i.e. when we - come to allocate registers we must choose different real registers - for different abstract registers. *) - val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos) - val () = addConflictsTo(sourceRegNos, afterRemoveDests); - val () = addConflictsTo(sourceRegNos, passThrough) - end - - (* I'm not sure if this is needed. There was a check in the old code to ensure that - different registers were used for loop variables even if they were actually unused. - This may happen anyway. - Comment and code copied from X86 version. Retain it for the moment. *) - val () = - case instr of - JumpLoop{regArgs, ...} => - let - val destRegs = List.foldl(fn ({dst=PReg loopReg, ...}, dests) => loopReg :: dests) [] regArgs - in - addConflictsTo(destRegs, listToSet destRegs); - addConflictsTo(destRegs, current); - addConflictsTo(destRegs, passThrough) - end - | _ => () - - (* Certain instructions are specific as to the real registers. *) - val () = - case instr of - ReturnResultFromFunction{ returnReg=PReg retReg, ... } => - (* We're going to put the return value in X0 so we can't use that for - the return address. *) - addRealConflict(retReg, GenReg X0) - - | RaiseExceptionPacket{ packetReg } => - (* This wasn't needed previously because we always pushed the registers - across an exception. *) - reserveRegister([regNo packetReg], GenReg X0) - - | BeginHandler { packetReg, ...} => - reserveRegister([regNo packetReg], GenReg X0) - - | FunctionCall { dest, regArgs, ...} => - (* This is only needed if we are saving the registers rather - than marking them as "must push". *) - let - val destReg = regNo dest - in - reserveRegister([destReg], GenReg X0); - (* The argument registers also conflict. In order to execute this call we need to load - the arguments into specific registers so we can't use them for values that we want after - the call. We use regNo dest here because that will conflict with everything - immediately afterwards. *) - List.app(fn (_, r) => reserveRegister([destReg], GenReg r)) regArgs; - (* Likewise X30 since that's the return address. *) - addRealConflict(destReg, GenReg X30) - end - - (* We can't use X30 as the result because it's needed for the return addr if we have to GC. *) - | AllocateMemoryFixed{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) - | AllocateMemoryVariable{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) - - | _ => () - in - () - end - - (* Process the block. *) - fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) = - let - (* We need to establish conflicts between all the registers active at - the end of the block since they may not be established elsewhere. - This isn't necessary for an unconditional branch since the - same registers will be included in the block that is the target - of the branch, possibly along with others. However if this is - a conditional or indexed branch we may have different sets at - each of the targets and we have to ensure that all the registers - differ. *) - val united = union(exports, passThrough) - val () = addConflictsTo(setToList united, united) - - val () = List.app (conflictsForInstr passThrough) block - in - () - end - - - val () = Vector.app conflictsForBlock blocks - - val conflictState: conflictState vector = - Vector.tabulate(maxPRegs, - fn i => { - conflicts = Array.sub(regConflicts, i), - realConflicts = Array.sub(regRealConflicts, i) - } - ) - in - conflictState - end - - structure Sharing = - struct - type reg = reg - and preg = preg - and intSet = intSet - and extendedBasicBlock = extendedBasicBlock - end -end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML index 7d4bdfb2..de6978e6 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML @@ -1,1282 +1,1251 @@ (* Copyright David C. J. Matthews 2021 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeToArm64Code( structure Arm64PreAssembly: ARM64PREASSEMBLY structure Debug: DEBUG structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET structure Pretty: PRETTY structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end sharing Arm64PreAssembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ICODEGENERATE = struct open Identify open Arm64ICode open Arm64PreAssembly open Address exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd (* These aren't currently used for anything. *) val workReg1 = X16 and workReg2 = X17 fun icodeToArm64Code {blocks: basicBlockConcrete vector, functionName, stackRequired, debugSwitches, resultClosure, profileObject, ...} = let val numBlocks = Vector.length blocks (* Load from and store to stack. *) fun loadFromStack(destReg, wordOffset, code) = if wordOffset >= 4096 then (LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(destReg, Word64.fromInt wordOffset)] @ code else (LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code and storeToStack(sourceReg, wordOffset, workReg, code) = if wordOffset >= 4096 then (StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(workReg, Word64.fromInt wordOffset)] @ code else (StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code datatype srcAndDest = IsInReg of xReg | IsOnStack of int local (* The registers are numbered from 0. Choose values that don't conflict with the stack addresses. *) fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31 type node = {src: srcAndDest, dst: srcAndDest } fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r | nodeAddress({dst=IsOnStack a, ...}) = a fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset] | arcs{src=IsInReg r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end (* This is a general function for moving values into registers or to the stack where it is possible that the source values might also be in use as destinations. The stack is used for destinations only for tail recursive calls. *) fun moveMultipleValues(moves, code) = let fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let (* stronglyConnectedComponents does two things. It detects loops where it's not possible to move items without breaking the loop but more importantly it orders the dependencies so that if there are no loops we can load the source and store it in the destination knowing that we won't overwrite anything we might later need. *) val ordered = stronglyConnected arguments fun loadIntoReg(IsInReg sReg, dReg, code) = if sReg = dReg then code else (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | loadIntoReg(IsOnStack wordOffset, dReg, code) = loadFromStack(dReg, wordOffset, code) fun moveEachValue ([], code) = code | moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) = moveEachValue(rest, loadIntoReg(src, dReg, code)) | moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) = (* Storing into the stack. *) moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code)) | moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) = (* Copy a stack location - needs a load and store unless the address is the same. *) if dstOffset = srcOffset then moveEachValue(rest, code) else moveEachValue(rest, storeToStack(workReg2, dstOffset, workReg1, loadFromStack(workReg2, srcOffset, code))) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let (* We need to exchange some of the arguments. Doing an exchange here will set the destination with the correct source. However we have to process every subsequent entry with the swapped registers. That may well mean that one of those entries becomes trivial. We also need to rerun stronglyConnectedComponents on at least the rest of this cycle. It's easiest to flatten the rest and do everything. *) (* Exchange the source and destination. We don't have an exchange instruction and there's a further complication. We could be copying between stack locations and their offsets could be > 4096. Since we've only got two work registers we need to use the hardware stack as an extra location. Stack-stack exchange is very rare so the extra overhead to handle the general case is worth it. *) local fun storeToDest(sReg, IsInReg dReg, _, code) = (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | storeToDest(sReg, IsOnStack wordOffset, work, code) = storeToStack(sReg, wordOffset, work, code) in fun exchange(IsInReg arg1Reg, arg2, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) :: storeToDest(arg1Reg, arg2, workReg1, loadIntoReg(arg2, workReg2, code)) | exchange(arg1, IsInReg arg2Reg, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) :: storeToDest(arg2Reg, arg1, workReg1, loadIntoReg(arg1, workReg2, code)) | exchange(arg1, arg2, code) = (* The hardware stack must be 16-byte aligned. *) storeToDest(workReg2, arg2, workReg1, (LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) :: storeToDest(workReg2, arg1, workReg1, loadIntoReg(arg2, workReg2, (StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) :: loadIntoReg(arg1, workReg2, code)))) end (* Try to find either a register-register move or a register-stack move. If not use the first. If there's a stack-register move there will also be a register-stack so we don't need to look for both. *) val {dst=selectDst, src=selectSrc} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun match(s1: srcAndDest, s2) = s1 = s2 fun swapSources{src, dst} = if match(src, selectSrc) then {src=destAsSource, dst=dst} else if match(src, destAsSource) then {src=selectSrc, dst=dst} else {src=src, dst=dst} val exchangeCode = exchange(selectDst, selectSrc, code) in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end (* Where we have multiple specific registers as either source or destination there is the potential that a destination register if currently in use as a source. *) fun moveMultipleRegisters(regPairList, code) = let val regPairsAsDests = List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList in moveMultipleValues(regPairsAsDests, code) end fun moveIfNecessary({src, dst}, code) = if src = dst then code else MoveXRegToXReg{sReg=src, dReg=dst} :: code (* Add a constant word to the source register and put the result in the destination. regW is used as a work register if necessary. This is used both for addition and subtraction. *) fun addConstantWord({regS, regD, value=0w0, ...}, code) = if regS = regD then code else MoveXRegToXReg{sReg=regS, dReg=regD} :: code | addConstantWord({regS, regD, regW, value}, code) = let (* If we have to load the constant it's better if the top 32-bits are zero if possible. *) val (isSub, unsigned) = if value > Word64.<<(0w1, 0w63) then (true, ~ value) else (false, value) in if unsigned < Word64.<<(0w1, 0w24) then (* We can put up to 24 in a shifted and an unshifted constant. *) let val w = Word.fromLarge(Word64.toLarge unsigned) val high = Word.andb(Word.>>(w, 0w12), 0wxfff) val low = Word.andb(w, 0wxfff) val addSub = if isSub then SubImmediate else AddImmediate in if high <> 0w0 then ( (if low <> 0w0 then [addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}] else []) @ addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64} :: code ) else addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64} :: code end else let (* To minimise the constant and increase the chances that it will fit in a single word look to see if we can shift it. *) fun getShift(value, shift) = if Word64.andb(value, 0w1) = 0w0 then getShift(Word64.>>(value, 0w1), shift+0w1) else (value, shift) val (shifted, shift) = getShift(unsigned, 0w0) in code <::> LoadNonAddr(regW, shifted) <::> (if isSub then SubShiftedReg else AddShiftedReg) {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64} end end - fun getSaveRegs r = r - val startOfFunctionLabel = createLabel() (* Used for recursive calls/jumps *) val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel()) fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo) fun codeExtended _ (MoveRegister{source, dest, ...}, code) = moveIfNecessary({src=source, dst=dest}, code) | codeExtended _ (LoadNonAddressConstant{source, dest, ...}, code) = code <::> LoadNonAddr(dest, source) | codeExtended _ (LoadAddressConstant{source, dest, ...}, code) = code <::> LoadAddr(dest, source) | codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then LoadRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate} :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in LoadRegScaled{regT=dest, regN=base, unitOffset=unitOffset, loadType=loadType} :: code end | codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (LoadFPRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (LoadFPRegScaled{regT=dest, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (LoadRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX in (LoadFPRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (GetThreadId { dest}, code) = (* Load the thread id. This is always a 64-bit value. *) (LoadRegScaled{regT=dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code | codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) = (AddShiftedReg{regM=source, regN=X_Base32in64, regD=dest, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code | codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) = let val destReg = dest in code <::> (SubShiftedReg{regM=X_Base32in64, regN=source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> (shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64}) end | codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs, ... }, code) = let val label = createLabel() val destReg = dest in code <@> (* Subtract the number of bytes required from the heap pointer and put in result reg. *) (if bytesRequired >= 0w4096 then [SubShiftedReg{regM=workReg1, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}, LoadNonAddr(workReg1, bytesRequired)] else [SubImmediate{regN=X_MLHeapAllocPtr, regD=destReg, immed=Word.fromLarge bytesRequired, shifted=false, setFlags=false, opSize=OpSize64}] ) <::> (* Compare with heap limit. *) SubShiftedReg{regM=X_MLHeapLimit, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, label) <::> (* Skip the trap if it's ok. *) - RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs} <::> + RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=saveRegs} <::> SetLabel label <::> MoveXRegToXReg{sReg=destReg, dReg=X_MLHeapAllocPtr} end | codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs, ... }, code) = let val trapLabel = createLabel() and noTrapLabel = createLabel() val destReg = dest and sizeReg = size (* Subtract the size into the result register. Subtract a further word for the length word and round down in 32-in-64. *) val subtractSize = if is32in64 then BitwiseLogical{bits= ~ 0w8, regN=destReg, regD=destReg, logOp=LogAnd, opSize=OpSize64, setFlags=false} :: SubImmediate{regN=destReg, regD=destReg, immed=0w4, shifted=false, setFlags=false, opSize=OpSize64} :: SubShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else SubImmediate{regN=destReg, regD=destReg, immed=0w8, shifted=false, setFlags=false, opSize=OpSize64} :: SubShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=destReg, shift=ShiftLSL 0w3, setFlags=false, opSize=OpSize64} :: code (* Check against the limit. If the size is large enough it is possible that this could wrap round. To check for that we trap if either the result is less than the limit or if it is now greater than the allocation pointer. *) in subtractSize <::> SubShiftedReg{regM=X_MLHeapLimit, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarryClear, trapLabel) <::> SubShiftedReg{regM=X_MLHeapAllocPtr, regN=destReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarryClear, noTrapLabel) <::> SetLabel trapLabel <::> - RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs} <::> + RTSTrap{rtsEntry=heapOverflowCallOffset, work=workReg1, save=saveRegs} <::> SetLabel noTrapLabel <::> MoveXRegToXReg{sReg=destReg, dReg=X_MLHeapAllocPtr} end | codeExtended _ (InitialiseMem{ size, addr, init}, code) = let val sizeReg = size and addrReg = addr and initReg = init val exitLabel = createLabel() and loopLabel = createLabel() (* This uses a loop to initialise. It's possible the size is zero so we have to check at the top of the loop. *) val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64) in code <::> (* Add the length in bytes so we point at the end. *) AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Are we at the start? *) SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, exitLabel) <::> StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex } <::> UnconditionalBranch loopLabel <::> SetLabel exitLabel end | codeExtended _ (BeginLoop, code) = code | codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) = let (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *) fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs val extRegArgs = map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code) in case checkInterrupt of NONE => code2 | SOME saveRegs => let val skipCheck = createLabel() in code2 <::> (* Put in stack-check code to allow this to be interrupted. *) LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} <::> SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, skipCheck) <::> - RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=getSaveRegs saveRegs} <::> + RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=saveRegs} <::> SetLabel skipCheck end end | codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then (StoreRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in (StoreRegScaled{regT=source, regN=base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (StoreFPRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (StoreFPRegScaled{regT=source, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (StoreRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 then ExtSXTW else ExtUXTX in (StoreFPRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) = let val destReg = dest in ((if isAdd then AddImmediate else SubImmediate) {regN=source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) = let val destReg = dest in ( (if isAdd then AddShiftedReg else SubShiftedReg) {regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) = let val destReg = dest in (BitwiseLogical{regN=source, regD=destReg, bits=immed, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) = let (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) val destReg = dest in (LogicalShiftedReg{regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) = (ShiftRegisterVariable{regN=source, regM=shift, regD=dest, shiftDirection=direction, opSize=opSize}) :: code | codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) = let val destReg = dest and srcAReg = sourceA and srcNReg = sourceN and srcMReg = sourceM in (MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code end | codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) = (DivideRegs{regN=dividend, regM=divisor, regD=dest, isSigned=isSigned, opSize=opSize}) :: code | codeExtended _ (BeginFunction{regArgs, ...}, code) = let val skipCheck = createLabel() val defaultWords = 10 (* This is wired into the RTS. *) val workRegister = workReg1 val debugTrapAlways = false (* Can be set to true for debugging *) (* Test with either the stack-pointer or a high-water value. The RTS assumes that X9 has been used as the high-water if it is called through stackOverflowXCallOffset rather than stackOverflowCallOffset *) val (testReg, entryPt, code1) = if stackRequired <= defaultWords then (X_MLStackPtr, stackOverflowCallOffset, code) else (X9, stackOverflowXCallOffset, addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister, value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code)) (* Skip the RTS call if there is enough stack. N.B. The RTS can modify the end-of-stack value to force a trap here even if there is really enough stack. *) val code2 = (if debugTrapAlways then [] else [ConditionalBranch(CondCarrySet, skipCheck), SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}]) @ (* Load the end-of-stack value. *) LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} :: code1 val code3 = code2 <::> RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs} <::> SetLabel skipCheck val usedRegs = regArgs fun mkPair(pr, rr) = {src=rr,dst=pr} val regPairs = List.map mkPair usedRegs in moveMultipleRegisters(regPairs, code3) end | codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, code) = let fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. That means storing the arguments in the right order to avoid overwriting a value that we are using for a different argument. *) fun codeTailCall(arguments, stackAdjust, code) = if stackAdjust < 0 then let (* If the function we're calling takes more arguments on the stack than the current function we will have to extend the stack. Do that by pushing the argument whose offset is at -1. Then adjust all the offsets and repeat. *) val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments) fun renumberArgs [] = [] | renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) | renumberArgs ({dst, src} :: args) = let val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest val newSrc = case src of IsOnStack wordOffset => IsOnStack(wordOffset+1) | other => other in {dst=newDest, src=newSrc} :: renumberArgs args end val pushCode = case argM1 of IsOnStack wordOffset => (StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: loadFromStack(workReg2, wordOffset, code) | IsInReg reg => (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code in codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode) end else let val loadArgs = moveMultipleValues(arguments, code) in if stackAdjust = 0 then loadArgs else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs) end val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) val jumpToFunctionCode = case callKind of Recursive => [(UnconditionalBranch startOfFunctionLabel)] | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else [(BranchReg{regD=workReg1, brRegType=BRRBranch}), (LoadAddr(workReg1, m))] | FullCall => if is32in64 then [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}, AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}] else [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}] in jumpToFunctionCode @ setArgumentsCode end | codeExtended _ (FunctionCall{callKind, regArgs=regArgs, stackArgs=stackArgs, dest, saveRegs, ...}, code) = let val destReg = dest local fun pushStackArgs ([], _, code) = code | pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) = let (* Have to adjust the offsets of stack arguments. *) val adjustedOffset = wordOffset+argNum in pushStackArgs(args, argNum+1, loadFromStack(workReg1, adjustedOffset, code) <::> StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) end | pushStackArgs (ArgInReg reg ::args, argNum, code) = pushStackArgs(args, argNum+1, code <::> (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) (* We have to adjust any stack offset to account for the arguments we've pushed. *) val numStackArgs = List.length stackArgs fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs) | convertArg(ArgInReg reg) = IsInReg(reg) in val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val loadArgs = moveMultipleValues(extRegArgs, pushedArgs) end (* Push the registers before the call and pop them afterwards. *) fun makeSavesAndCall([], code) = ( case callKind of Recursive => code <::> (BranchAndLink startOfFunctionLabel) | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else code <::> (LoadAddr(workReg1, m)) <::> (BranchReg{regD=workReg1, brRegType=BRRAndLink}) | FullCall => if is32in64 then code <::> AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} else code <::> LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} ) | makeSavesAndCall(reg::regs, code) = let val areg = reg in makeSavesAndCall(regs, code <::> StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) <::> LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex} end in moveIfNecessary({dst=destReg, src=X0}, makeSavesAndCall(saveRegs, loadArgs)) end | codeExtended _ (ReturnResultFromFunction { resultReg, returnReg, numStackArgs }, code) = let - val resultReg = resultReg - and returnReg = returnReg - fun resetStack(0, code) = code | resetStack(nItems, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) in BranchReg{regD=returnReg, brRegType=BRRReturn} :: resetStack(numStackArgs, moveIfNecessary({src=resultReg, dst=X0}, code)) end | codeExtended _ (RaiseExceptionPacket{ packetReg }, code) = (* We need a work register here. It can be any register other than X0 since we don't preserve registers across calls. *) (* Copy the handler "register" into the stack pointer. Then jump to the address in the first word. The second word is the next handler. This is set up in the handler. We have a lot more raises than handlers since most raises are exceptional conditions such as overflow so it makes sense to minimise the code in each raise. *) moveIfNecessary({src=packetReg, dst=X0}, code) <::> LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch } | codeExtended _ (PushToStack{ source, copies, ... }, code) = let val reg = source val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1" fun pushn(0, c) = c | pushn(n, c) = pushn(n-1, (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c) in pushn(copies, code) end | codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) = loadFromStack(dest, wordOffset, code) | codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) = (* Store into the stack to set a field of a container. Always 64-bits. *) storeToStack(source, stackOffset, workReg1, code) | codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) = (* Set the register to an offset in the stack. *) let - val destReg = dest val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset" val byteOffset = stackOffset * Word.toInt nativeWordSize in if byteOffset >= 4096 - then code <::> LoadNonAddr(destReg, Word64.fromInt byteOffset) <::> - AddShiftedReg{regN=X_MLStackPtr, regM=destReg, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64} - else code <::> AddImmediate{regN=X_MLStackPtr, regD=destReg, immed=Word.fromInt byteOffset, + then code <::> LoadNonAddr(dest, Word64.fromInt byteOffset) <::> + AddShiftedReg{regN=X_MLStackPtr, regM=dest, regD=dest, shift=ShiftNone, setFlags=false, opSize=OpSize64} + else code <::> AddImmediate{regN=X_MLStackPtr, regD=dest, immed=Word.fromInt byteOffset, shifted=false, setFlags=false, opSize=OpSize64} end | codeExtended _ (ResetStackPtr{ numWords, ... }, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code) | codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) = - let - val sourceReg = source - and destReg = dest (* Shift left by one bit and add one. *) - in code <::> - shiftConstant{ direction=ShiftLeft, regD=destReg, regN=sourceReg, shift=0w1, opSize=opSize } <::> - BitwiseLogical{ bits=0w1, regN=destReg, regD=destReg, opSize=opSize, setFlags=false, logOp=LogOr} - end + shiftConstant{ direction=ShiftLeft, regD=dest, regN=source, shift=0w1, opSize=opSize } <::> + BitwiseLogical{ bits=0w1, regN=dest, regD=dest, opSize=opSize, setFlags=false, logOp=LogOr} | codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) = code <::> shiftConstant{ direction=if isSigned then ShiftRightArithmetic else ShiftRightLogical, regD=dest, regN=source, shift=0w1, opSize=opSize } | codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) = boxSysWord({source=source, destination=dest, - workReg=workReg1, saveRegs=getSaveRegs saveRegs}, code) + workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (UnboxLarge{ source, dest }, code) = let (* Unbox a large word. The argument is a poly word. *) val destReg = dest and srcReg = source in if is32in64 then LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64} :: AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64} :: code end | codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) = boxDouble({source=source, destination=dest, - workReg=workReg1, saveRegs=getSaveRegs saveRegs}, code) + workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) = let val floatReg = source and fixedReg = dest in if is32in64 then boxFloat({source=floatReg, destination=fixedReg, - workReg=workReg1, saveRegs=getSaveRegs saveRegs}, code) + workReg=workReg1, saveRegs=saveRegs}, code) else code <::> MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32} <::> shiftConstant{ direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64} <::> BitwiseLogical{ bits=0w1, regN=fixedReg, regD=fixedReg, opSize=OpSize64, setFlags=false, logOp=LogOr} end | codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) = - let - val addrReg = source - and valReg = dest - in if is32in64 then code <::> - AddShiftedReg{regM=addrReg, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, + AddShiftedReg{regM=source, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> - LoadFPRegScaled{regT=valReg, regN=workReg1, unitOffset=0, floatSize=Double64} - else code <::> LoadFPRegScaled{regT=valReg, regN=addrReg, unitOffset=0, floatSize=Double64} - end + LoadFPRegScaled{regT=dest, regN=workReg1, unitOffset=0, floatSize=Double64} + else code <::> LoadFPRegScaled{regT=dest, regN=source, unitOffset=0, floatSize=Double64} | codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) = - let - val addrReg = source - and valReg = dest (* This is tagged in native 64-bits. In 32-in-64 we're loading 32-bits so we can use an indexed load directly. *) - in if is32in64 - then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=addrReg, regT=valReg, option=ExtUXTX ScaleOrShift, floatSize=Float32} - else - code <::> - shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=addrReg, regD=workReg1, opSize=OpSize64} <::> - MoveGeneralToFP{regN=workReg1, regD=valReg, floatSize=Float32} - end + then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=source, regT=dest, option=ExtUXTX ScaleOrShift, floatSize=Float32} + else code <::> + shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=source, regD=workReg1, opSize=OpSize64} <::> + MoveGeneralToFP{regN=workReg1, regD=dest, floatSize=Float32} | codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) = LoadAcquireReg{regT=dest, regN=base, loadType=loadType} :: code | codeExtended _ (StoreRelease{source, base, loadType, ...}, code) = StoreReleaseReg{regT=source, regN=base, loadType=loadType} :: code | codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) = - let - val srcReg = source - val destReg = dest - in - BitField{immr=immr, imms=imms, regN=srcReg, regD=destReg, + BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length} :: code - end | codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) = let (* If we're using BitFieldMove we retain some of the bits of the destination. The higher levels require us to treat that as a source. *) - val sourceReg = source - and destReg = dest - val _ = sourceReg = destReg andalso raise InternalError "codeExtended: bitfield: dest=source" + val _ = source = dest andalso raise InternalError "codeExtended: bitfield: dest=source" in - BitField{immr=immr, imms=imms, regN=source, regD=destReg, bitfieldKind=BFInsert, opSize=length} :: - moveIfNecessary({src=destAsSource, dst=destReg}, code) + BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=BFInsert, opSize=length} :: + moveIfNecessary({src=destAsSource, dst=dest}, code) end | codeExtended {flow} (IndexedCaseOperation{testReg}, code) = let (* testReg contains the original value after the lowest value has been subtracted. Since both the original value and the lowest value were tagged it contains a shifted but untagged value. *) (* This should only be within a block with an IndexedBr flow type. *) val cases = case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" val caseLabels = map getBlockLabel cases val tableLabel = createLabel() val startOfCase = code <::> LoadLabelAddress(workReg1, tableLabel) <::> (* Add the value shifted by one since it's already shifted. *) AddShiftedReg{regN=workReg1, regD=workReg1, regM=testReg, shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch} <::> SetLabel tableLabel val addCases = List.foldl (fn (label, code) => (UnconditionalBranch label) :: code) startOfCase caseLabels in addCases end | codeExtended {flow} (PushExceptionHandler, code) = let (* This should only be within a block with a SetHandler flow type. *) val handleLabel = case flow of SetHandler{ handler, ...} => handler | _ => raise InternalError "codeGenICode: PushExceptionHandler" val labelRef = getBlockLabel handleLabel in (* Push the old handler and the handler entry point and set the "current handler" to point to the stack after we've pushed these. *) code <::> LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadLabelAddress(workReg2, labelRef) <::> StoreRegPair{regT1=workReg2, regT2=workReg1, regN=X_MLStackPtr, unitOffset= ~2, unscaledType=PreIndex, loadType=Load64} <::> StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} end | codeExtended _ (PopExceptionHandler, code) = (* Remove and discard the handler we've set up. Pop the previous handler and put into "current handler". *) code <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} | codeExtended _ (BeginHandler{packetReg}, code) = let val beginHandleCode = code <::> (* The exception raise code resets the stack pointer to the value in the exception handler so this is probably redundant. Leave it for the moment, *) LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} in moveIfNecessary({src=X0, dst=packetReg }, beginHandleCode) end | codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length, ...}, code) = let (* Construct a loop to compare two vectors of bytes. *) val vec1Reg = vec1Addr and vec2Reg = vec2Addr and lenReg = length val loopLabel = createLabel() and exitLabel = createLabel() in code <::> (* Set the CC to Equal before we start in case length = 0 *) SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Start of loop *) CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Go to the end when len = zero *) (* Load the bytes for the comparison and increment each. *) LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) (* Compare *) SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *) SetLabel exitLabel end | codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) = let (* Construct a loop to move the data. *) val srcReg = srcAddr and destReg = destAddr and lenReg = length val loopLabel = createLabel() and exitLabel = createLabel() val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64) in code <::> SetLabel loopLabel (* Start of loop *) <::> CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Exit when length = 0 *) LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) UnconditionalBranch loopLabel <::> (* Back to the start *) SetLabel exitLabel end | codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) = let val allocFreeCode = (if isAdd then AddExtendedReg else SubExtendedReg) {regM=source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code in case dest of XZero => allocFreeCode | destReg => (* We have to use add here to get the SP into the destination instead of the usual move. *) AddImmediate{regN=XSP, regD=destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} :: allocFreeCode end | codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *) (* Used in mutex operations. *) | codeExtended _ (LoadAcquireExclusive{ base, dest }, code) = LoadAcquireExclusiveRegister{regN=base, regT=dest} :: code | codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) = StoreReleaseExclusiveRegister{regS=result, regT=source, regN=base} :: code | codeExtended _ (MemoryBarrier, code) = code <::> MemBarrier | codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) = (CvtIntToFP{regN=source, regD=dest, floatSize=destSize, opSize=srcSize}) :: code | codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) = - (CvtFloatToInt{regN=source, regD=dest, - round=rounding, floatSize=srcSize, opSize=destSize}) :: code + (CvtFloatToInt{regN=source, regD=dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code | codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) = (FPUnaryOp{regN=source, regD=dest, fpOp=fpOp}) :: code | codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) = - (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, - floatSize=opSize, fpOp=fpOp}) :: code + (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, floatSize=opSize, fpOp=fpOp}) :: code | codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) = (FPComparison{regN=arg1, regM=arg2, floatSize=opSize}) :: code local (* processed - set to true when a block has been processed. *) val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) (* Find the blocks that reference this one. This isn't essential but allows us to try to generate blocks in the order of the control flow. This in turn may allow us to use short branches rather than long ones. *) val labelRefs = Array.array(numBlocks, []) datatype flowCode = FlowCodeSimple of int | FlowCodeCMove of {code: precode list, trueJump: int, falseJump: int} (* Process this recursively to set the references. If we have unreachable blocks, perhaps because they've been merged, we don't want to include them in the reference counting. This shouldn't happen now that IdentifyReferences removes unreferenced blocks. *) fun setReferences fromLabel toLabel = case Array.sub(labelRefs, toLabel) of [] => (* Not yet visited at all. *) let val BasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) val refs = case flow of ExitCode => [] | Unconditional lab => [lab] | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] | IndexedBr labs => labs | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val () = if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () in List.app (setReferences toLabel) refs end | refs => (* We've visiting this at least once. Just add us to the list. *) Array.update(labelRefs, toLabel, fromLabel :: refs) val _ = setReferences 0 0 (* Process the blocks. We keep the "stack" explicit rather than using recursion because this allows us to select both arms of a conditional branch sooner. *) fun genCode(toDo, lastFlow, code) = case List.filter (not o haveProcessed) toDo of [] => let (* There's nothing left to do. We may need to add a final branch to the end. *) val finalBranch = case lastFlow of ExitCode => [] | IndexedBr _ => [] | Unconditional dest => [(UnconditionalBranch(getBlockLabel dest))] | Conditional { condition, trueJump, falseJump, ...} => [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] | SetHandler { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] in finalBranch @ code (* Done. *) end | stillToDo as head :: _ => let local (* Check the references. If all the sources that lead up to this have already been we won't have any backward jumps. *) fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) val continuation = case lastFlow of ExitCode => NONE | IndexedBr _ => NONE (* We could put the last branch in here. *) | Unconditional dest => if not (haveProcessed dest) andalso available dest then SOME(FlowCodeSimple dest) else NONE | Conditional {trueJump, falseJump, condition, ...} => (* We can usually choose either destination and in nearly all cases it won't matter. The default branch is not to take forward jumps so if there is reason to believe that one branch is more likely we should follow that branch now and leave the other. If we have Cond(No)Overflow we assume that overflow is unusual. If one branch raises an exception we assume that that is unusual. *) let val (first, second) = case (condition, Vector.sub(blocks, falseJump)) of (CondNoOverflow, _) => (trueJump, falseJump) | (_, BasicBlock{ flow=ExitCode, block, ...}) => if List.exists(fn RaiseExceptionPacket _ => true | _ => false) block then (trueJump, falseJump) else (falseJump, trueJump) | _ => (falseJump, trueJump) in if not (haveProcessed first) andalso available first then SOME(FlowCodeSimple first) else if not (haveProcessed second) andalso available second then SOME(FlowCodeSimple second) else NONE end | SetHandler { continue, ... } => (* We want the continuation if possible. We'll need a branch round the handler so that won't help. *) if not (haveProcessed continue) andalso available continue then SOME(FlowCodeSimple continue) else NONE | UnconditionalHandle _ => NONE | ConditionalHandle _ => NONE in (* First choice - continue the existing block. Second choice - the first item whose sources have all been processed. Third choice - something from the list. *) val picked = case continuation of SOME c => c | NONE => case List.find available stillToDo of SOME c => FlowCodeSimple c | NONE => FlowCodeSimple head end in case picked of FlowCodeSimple picked => let val () = Array.update(processed, picked, true) (* Code to terminate the previous block. *) val startCode = case lastFlow of ExitCode => [] | IndexedBr _ => [] | UnconditionalHandle _ => [] | Unconditional dest => if dest = picked then [] else [(UnconditionalBranch(getBlockLabel dest))] | ConditionalHandle { continue, ...} => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | SetHandler { continue, ... } => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | Conditional { condition, trueJump, falseJump, ...} => if picked = falseJump (* Usual case. *) then [(ConditionalBranch(condition, getBlockLabel trueJump))] else if picked = trueJump then (* We have a jump to the true condition. Invert the jump. This is more than an optimisation. Because this immediately precedes the true block we're not going to generate a label. *) [(ConditionalBranch(invertTest condition, getBlockLabel falseJump))] else [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] (* Code-generate the body with the code we've done so far at the end. Add a label at the start if necessary. *) local (* If the previous block dropped through to this and this was the only reference then we don't need a label. *) fun onlyJumpingHere (lab: int) = if lab <> picked then false else case Array.sub(labelRefs, picked) of [singleton] => singleton = lab | _ => false val noLabel = case lastFlow of ExitCode => picked = 0 (* Unless this was the first block. *) | Unconditional dest => onlyJumpingHere dest | Conditional { trueJump, falseJump, ...} => onlyJumpingHere trueJump orelse onlyJumpingHere falseJump | IndexedBr _ => false | SetHandler _ => false | UnconditionalHandle _ => false | ConditionalHandle { continue, ...} => onlyJumpingHere continue in val startLabel = if noLabel then [] else [(SetLabel(getBlockLabel picked))] end val BasicBlock { flow, block, ...} = Vector.sub(blocks, picked) local fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code) in val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] in genCode(addSet @ stillToDo, flow, bodyCode) end | FlowCodeCMove{code, trueJump, falseJump} => let (* We've generated a conditional move and possibly a return. If the trueJump and falseJump are only ever referenced from this block they're done, otherwise we still need to do them. *) val _ = case Array.sub(labelRefs, trueJump) of [_] => Array.update(processed, trueJump, true) | _ => () val _ = case Array.sub(labelRefs, falseJump) of [_] => Array.update(processed, falseJump, true) | _ => () val BasicBlock { flow, ...} = Vector.sub(blocks, trueJump) val addSet = case flow of ExitCode => [] | Unconditional dest => [dest] | _ => raise InternalError "FlowCodeCMove" in genCode(addSet @ stillToDo, flow, code) end end in val ops = genCode([0], ExitCode, [(SetLabel startOfFunctionLabel)]) end in generateFinalCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure, parameters=debugSwitches, profileObject=profileObject} end structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and xReg = xReg and vReg = vReg and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML index 8a75e56d..0cf0816f 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML @@ -1,320 +1,288 @@ (* Copyright David C. J. Matthews 2016-17, 2020-1 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeTransform( structure Arm64ICode: ARM64ICODE structure Debug: DEBUG structure Codegen: ARM64ICODEGENERATE structure Allocate: ARM64ALLOCATEREGISTERS structure Identify: ARM64IDENTIFYREFERENCES - structure ConflictSets: ARM64ICODECONFLICTS structure PushRegisters: ARM64PUSHREGISTERS structure Optimise: ARM64ICODEOPTIMISE structure Pretty: PRETTY structure IntSet: INTSET sharing Arm64ICode.Sharing = Codegen.Sharing = Allocate.Sharing = Identify.Sharing = - ConflictSets.Sharing = PushRegisters.Sharing = IntSet = Optimise.Sharing + PushRegisters.Sharing = IntSet = Optimise.Sharing ) : ARM64ICODETRANSFORM = struct open Arm64ICode open Address open Identify - open ConflictSets open PushRegisters open IntSet open Codegen open Allocate open Optimise exception InternalError = Misc.InternalError (* Find out the registers that need to be pushed to the stack, if any. We include those marked as "must push" because we need to save them across a function call or handler and also any we need to push because the set of active registers is more than the number of general registers we have. This second case involves choosing suitable registers and is a first attempt to check we have enough registers. We can also get a failure in codeExtended when we actually allocate the registers. *) fun spillRegisters(identified: extendedBasicBlock vector, regStates: regState vector) = let val maxPRegs = Vector.length regStates val pushArray = Array.array(maxPRegs, false) (* Mark anything already marked as "must push" unless it's already on the stack *) local fun checkPush(i, {pushState=true, ...}) = Array.update(pushArray, i, true) | checkPush _ = () in val () = Vector.appi checkPush regStates end (* Make a list of all the active sets ignoring those marked to be pushed. Do that first because we need to know how many sets each register is in. *) local fun addToActive(r, l) = ( case Vector.sub(regStates, r) of {prop=RegPropStack _, ...} => l | _ => if Array.sub(pushArray, r) then l else r :: l ) in fun nowActive regs = List.foldl addToActive [] regs end fun getBlockSets(ExtendedBasicBlock{block, passThrough, ...}, sets) = let fun getSets({active, ...}, l) = let val set = nowActive(setToList(union(active, passThrough))) in if List.length set > nGenRegs then set :: l else l end in List.foldl getSets sets block end val activeSets = Vector.foldl getBlockSets [] identified in if null activeSets then () else let (* See how many times each register appears in a set. *) val activeIn = Array.array(maxPRegs, 0) val () = List.app (fn regs => List.app(fn r => Array.update(activeIn, r, Array.sub(activeIn, r)+1)) regs) activeSets (* We want to choose the best registers to spill. *) fun spillSomeRegs activeSet = let (* We may have already marked some of these to push. *) val currentActive = nowActive activeSet val regCount = List.length currentActive fun addCosts r = let val {active, refs, prop, ...} = Vector.sub(regStates, r) in case prop of RegPropUntagged => (r, ~1, ~1) | RegPropStack _ => (r, ~1, ~1) | RegPropMultiple => (r, ~1, ~1) | _ => (r, Array.sub(activeIn, r), if refs = 0 then 0 else Int.quot(active, refs)) end val withCosts = List.map addCosts currentActive (* Order so that the earlier items are those that appear in more sets and if items appear in the same number of sets those that are active longer come earlier. *) fun compare (_, in1, a1) (_, in2, a2) = if in1 > in2 then true else if in1 < in2 then false else a1 > a2 val sorted = Misc.quickSort compare withCosts fun markAsPush([], _) = () | markAsPush((reg, _, _) :: regs, n) = if n <= 0 then () else let val {prop, ...} = Vector.sub(regStates, reg) val _ = case prop of RegPropStack _ => raise InternalError "markAsPush" | _ => () in Array.update(pushArray, reg, true); markAsPush(regs, n-1) end in markAsPush(sorted, regCount-nGenRegs) end in List.app spillSomeRegs activeSets end; (* Return the vector showing those that must be pushed. *) Array.vector pushArray end type triple = {instr: (xReg, xReg, vReg) arm64ICode, current: intSet, active: intSet} fun codeICodeFunctionToArm64{blocks, functionName, pregProps, ccCount, debugSwitches, resultClosure, profileObject, ...} = let (*val maxPRegs = Vector.length pregProps*) val icodeTabs = [8, 20, 60] val wantPrintCode = Debug.getParameter Debug.icodeTag debugSwitches fun printCode printCodeKind identifiedCode = (* Print the code before the transformation. *) let val printStream = Pretty.getSimplePrinter(debugSwitches, icodeTabs) in printStream(functionName ^ "\n"); printCodeKind(identifiedCode, printStream); printStream "\n" end - - fun printConflicts(regStates: conflictState vector) = - let - val printStream = Pretty.getSimplePrinter(debugSwitches, icodeTabs) - - fun printRegs([], _) = () - | printRegs(_, 0) = printStream "..." - | printRegs([i], _) = printStream(Int.toString i) - | printRegs(i::l, n) = (printStream(Int.toString i ^ ","); printRegs(l, n-1)) - - fun printRegData(i, { conflicts, ... }) = - ( - printStream (Int.toString i ^ "\t"); - printStream ("Conflicts="); printRegs(setToList conflicts, 20); - printStream "\n" - ) - in - Vector.appi printRegData regStates - end (* Limit the number of passes. *) val maxOptimisePasses = 30 val maxTotalPasses = maxOptimisePasses + 40 fun processCode(basicBlocks: (preg, pregOrZero, preg) basicBlock vector, pregProps: regProperty vector, maxStack, passes, optPasses) = let (* This should only require a few passes. *) val _ = passes < maxTotalPasses orelse raise InternalError "Too many passes" val () = if wantPrintCode then printCode printICodeAbstract basicBlocks else () (* First pass - identify register use patterns *) val (identified, regStates) = identifyRegisters(basicBlocks, pregProps) (* Try optimising. This may not do anything in which case we can continue with the original code otherwise we need to reprocess. *) val tryOpt = if optPasses < maxOptimisePasses then optimiseICode{code=identified, pregProps=pregProps, ccCount=ccCount, debugSwitches=debugSwitches} else Unchanged in case tryOpt of Changed (postOptimise, postOpProps) => processCode(postOptimise, postOpProps, maxStack, passes, optPasses+1) | Unchanged => let val regsToSpill = spillRegisters(identified, regStates) val needPhase2 = Vector.exists(fn t => t) regsToSpill val (needPhase2, regsToSpill) = if needPhase2 orelse passes <> 0 then (needPhase2, regsToSpill) else (true, Vector.tabulate(Vector.length pregProps, fn _ => false)) in if needPhase2 then let (* Push those registers we need to. This also adds and renumbers pregs and may add labels. *) val {code=postPushCode, pregProps=regPropsPhase2, maxStack=maxStackPhase2} = addRegisterPushes{code=identified, pushVec=regsToSpill, pregProps=pregProps, firstPass=passes=0} in (* And reprocess. *) processCode(postPushCode, regPropsPhase2, maxStackPhase2, passes+1, optPasses) end else let val maxPRegs = Vector.length regStates (* If we have been unable to allocate a register we need to spill something. Choose a single register from each conflict set. Since we've already checked that the active sets are small enough this is really only required to deal with special requirements e.g. esi/edi in block moves. *) fun spillFromConflictSets conflictSets = let val maxPRegs = Vector.length regStates val pushArray = Array.array(maxPRegs, false) fun selectARegisterToSpill active = let val regsToPick = setToList active in (* If we have already marked one of these to be pushed we don't need to do anything here. *) if List.exists (fn r => Array.sub(pushArray, r)) regsToPick then () else (* Choose something to push. *) let fun chooseReg([], bestReg, _) = bestReg | chooseReg(reg::regs, bestReg, bestCost) = let val {active, refs, prop, ...} = Vector.sub(regStates, reg) val cost = if refs = 0 then 0 else Int.quot(active, refs) in case prop of RegPropStack _ => chooseReg(regs, bestReg, bestCost) | RegPropCacheUntagged => reg (* Pick the first cache reg. *) | RegPropCacheTagged => reg (* Pick the first cache reg. *) | _ => if cost >= bestCost then chooseReg(regs, reg, active) else chooseReg(regs, bestReg, bestCost) end val choice = chooseReg(regsToPick, ~1, 0) val _ = choice >= 0 orelse raise InternalError "chooseReg" in Array.update(pushArray, choice, true) end end val () = List.app selectARegisterToSpill conflictSets in Array.vector pushArray end - - (* Now get the conflict sets. *) - val conflictSets = getConflictStates(identified, maxPRegs) - local - fun mapFromExtended(ExtendedBasicBlock{block, flow, ...}) = - BasicBlock{block=List.map #instr block, flow=flow} - in - val () = - if wantPrintCode - then (printCode printICodeAbstract (Vector.map mapFromExtended identified); printConflicts conflictSets) - else () - end + in - case allocateRegisters {blocks=identified, regStates=conflictSets, regProps=pregProps } of + case allocateRegisters {blocks=identified, maxPRegs=maxPRegs, regProps=pregProps } of AllocateSuccess concreteCode => ( if wantPrintCode then printCode printICodeConcrete concreteCode else (); icodeToArm64Code{blocks=concreteCode, functionName=functionName, stackRequired=maxStack, debugSwitches=debugSwitches, resultClosure=resultClosure, profileObject=profileObject} ) | AllocateFailure fails => let val regsToSpill = spillFromConflictSets fails val {code=postPushCode, pregProps=pregPropsPhase2, maxStack=maxStackPhase2} = addRegisterPushes{code=identified, pushVec=regsToSpill, pregProps=pregProps, firstPass=false} in processCode(postPushCode, pregPropsPhase2, maxStackPhase2, passes+1, optPasses) end end end end in processCode(blocks, pregProps, 0 (* Should include handlers and containers. *), 0, 0) end structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and closureRef = closureRef and preg = preg and pregOrZero = pregOrZero end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML index 7fae6304..0cff3aa6 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML @@ -1,129 +1,120 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) local structure Arm64Assembly = Arm64Assembly ( structure Debug = Debug and Pretty = Pretty and CodeArray = CodeArray ) structure Arm64Sequences = Arm64Sequences ( structure Arm64Assembly = Arm64Assembly ) structure Arm64PreAssembly = Arm64PreAssembly ( structure Arm64Assembly = Arm64Assembly and Debug = Debug and Pretty = Pretty ) structure Arm64Foreign = Arm64ForeignCall ( structure CodeArray = CodeArray and Arm64Assembly = Arm64Assembly and Arm64Sequences = Arm64Sequences and Debug = Debug ) structure Arm64ICode = Arm64ICode ( structure Arm64Code = Arm64PreAssembly ) structure Arm64ICodeIdentify = Arm64IdentifyReferences ( structure Debug = Debug structure Arm64ICode = Arm64ICode structure IntSet = IntSet ) - structure Arm64ICodeConflicts = - Arm64ICodeConflicts ( - structure Arm64ICode = Arm64ICode - structure IntSet = IntSet - structure Identify = Arm64ICodeIdentify - ) - structure Arm64PushRegs = Arm64PushRegisters ( structure Arm64ICode = Arm64ICode structure IntSet = IntSet structure Identify = Arm64ICodeIdentify ) structure Arm64Opt = Arm64ICodeOptimise ( structure Arm64ICode = Arm64ICode structure IntSet = IntSet structure Identify = Arm64ICodeIdentify structure Debug = Debug structure Pretty = Pretty ) structure Arm64IAllocate = Arm64AllocateRegisters ( structure Arm64ICode = Arm64ICode structure Identify = Arm64ICodeIdentify - structure ConflictSets = Arm64ICodeConflicts structure IntSet = IntSet ) structure Arm64ICodeGenerate = Arm64ICodeToArm64Code ( structure Debug = Debug structure Arm64ICode = Arm64ICode structure Identify = Arm64ICodeIdentify structure Pretty = Pretty structure IntSet = IntSet structure Arm64PreAssembly = Arm64PreAssembly structure Arm64Assembly = Arm64Assembly structure Arm64Sequences = Arm64Sequences structure Strongly = StronglyConnected ) structure Arm64ICodeTransform = Arm64ICodeTransform ( structure Debug = Debug structure Arm64ICode = Arm64ICode structure Identify = Arm64ICodeIdentify - structure ConflictSets = Arm64ICodeConflicts structure Allocate = Arm64IAllocate structure PushRegisters = Arm64PushRegs structure Optimise = Arm64Opt structure Pretty = Pretty structure IntSet = IntSet structure Codegen = Arm64ICodeGenerate ) in structure Arm64Code = Arm64CodetreeToICode ( structure BackendTree = BackendIntermediateCode structure Debug = Debug structure Arm64ICode = Arm64ICode structure Arm64Foreign = Arm64Foreign structure ICodeTransform = Arm64ICodeTransform structure CodeArray = CodeArray and Pretty = Pretty ) end; diff --git a/polymlArm64.pyp b/polymlArm64.pyp index dfe62740..f8c402f9 100644 --- a/polymlArm64.pyp +++ b/polymlArm64.pyp @@ -1,245 +1,243 @@ - -