diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig index ebbdac4c..592c9a8a 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig @@ -1,458 +1,455 @@ (* Signature for the high-level ARM64 code Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ICODE = sig type machineWord = Address.machineWord type address = Address.address type closureRef (* Registers. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word (* It is simpler to use a single type for all registers. *) datatype reg = GenReg of xReg | FPReg of vReg val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg val is32in64: bool and isBigEndian: bool (* Condition for conditional branches etc. *) datatype condition = CondEqual (* Z=1 *) | CondNotEqual (* Z=0 *) | CondCarrySet (* C=1 *) | CondCarryClear (* C=0 *) | CondNegative (* N=1 *) | CondPositive (* N=0 imcludes zero *) | CondOverflow (* V=1 *) | CondNoOverflow (* V=0 *) | CondUnsignedHigher (* C=1 && Z=0 *) | CondUnsignedLowOrEq (* ! (C=1 && Z=0) *) | CondSignedGreaterEq (* N=V *) | CondSignedLess (* N<>V *) | CondSignedGreater (* Z==0 && N=V *) | CondSignedLessEq (* !(Z==0 && N=V) *) (* The shift used in arithemtic operations. *) and shiftType = ShiftLSL of Word8.word | ShiftLSR of Word8.word | ShiftASR of Word8.word | ShiftNone datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and callKind = Recursive | ConstantCode of machineWord | FullCall and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat | MoveDouble | MoveFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP (* Some of the atomic operations added in 8.1 *) and atomicOp = LoadAddAL | LoadUmaxAL | SwapAL | LoadAddAcquire | LoadUMaxAcquire | SwapRelease (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Floating point constant *) | LoadFPConstant of { source: Word64.word, dest: 'fpReg, floatSize: floatSize } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The results are stored in the result registers, usually just X0. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, fpDests: ('fpReg * vReg) list, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, fpRegArgs: ('fpReg * vReg) list, stackAdjust: int, currStackSize: int } (* Return from the function. resultRegs are the registers containing the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of { results: ('genReg * xReg) list, fpResults: ('fpReg * vReg) list, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes moves and conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } (* Yield control during a spin-lock. *) | CPUYield (* Atomic operations added for ARM 8.1 *) | AtomicOperation of { base: 'genReg, source: 'optGenReg, dest: 'optGenReg, atOp: atomicOp } - (* Debugging - fault if values don't match. *) - | CacheCheck of { arg1: 'genReg, arg2: 'genReg } - (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } (* Return the successor blocks from a control flow. *) val successorBlocks: controlFlow -> int list type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock val printICodeAbstract: basicBlockAbstract vector * (string -> unit) -> unit and printICodeConcrete: basicBlockConcrete vector * (string -> unit) -> unit (* Check whether this value is acceptable for LogicalImmediate. *) val isEncodableBitPattern: Word64.word * opSize -> bool (* This generates a BitField instruction with the appropriate values for immr and imms. *) val shiftConstant: { direction: shiftDirection, dest: preg, source: preg, shift: word, opSize: opSize } -> iCodeAbstract structure Sharing: sig type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary and atomicOp = atomicOp end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML index ea12d635..7b1ce1fa 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML @@ -1,1416 +1,1400 @@ (* Copyright David C. J. Matthews 2016-22 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64AllocateRegisters( structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ALLOCATEREGISTERS = struct open Arm64ICode open Identify open IntSet exception InternalError = Misc.InternalError - val checkCache = false (* Use the cache *) - datatype allocateResult = AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list (* General registers. X24 is used as the global heap base in 32-in-64. X30 is the return address set by blr but is otherwise a general register. Put the argument registers at the end of the list so they'll only be used when hinted. *) val generalRegisters = map GenReg ([X9, X10, X11, X12, X13, X14, X15, X19, X20, X21, X22, X23, X0, X1, X2, X3, X4, X5, X6, X7, X8, X30] @ (if is32in64 then [] else [X24])) val floatingPtRegisters = map FPReg [V7, V6, V5, V4, V3, V2, V1] type conflictState = { conflicts: intSet, realConflicts: reg list } type triple = {instr: iCodeAbstract, current: intSet, active: intSet} exception InternalError = Misc.InternalError (* Get the conflict states, allocate registers and return the code with the allocated registers if it is successful. *) fun allocateRegisters{blocks, regProps, maxPRegs, ...} = let (* Other registers that conflict with this i.e. cannot share the same real register. *) val regConflicts = Array.array(maxPRegs, emptySet) (* Real registers that cannot be used for this because they are needed for an instruction. Only X30 in calls and RTS traps. *) and regRealConflicts = Array.array(maxPRegs, []: reg list) fun addConflictsTo(addTo, conflicts) = List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo (* To reserve a register we need to add the real register to the real conflict sets of all the abstract conflicts. *) local fun isInset reg set = List.exists (fn r => r = reg) set in fun reserveRegister(reserveFor, reg) = let val absConflicts = Array.sub(regConflicts, reserveFor) fun addConflict i = if i = reserveFor then () else addRealConflict (i, reg) in List.app addConflict (setToList absConflicts) end and addRealConflict (i, reg) = let val currentConflicts = Array.sub(regRealConflicts, i) in if isInset reg currentConflicts then () else Array.update(regRealConflicts, i, reg :: currentConflicts) end end fun conflictsForInstr passThrough {instr, current, ...} = let val {sources, dests} = getInstructionRegisters instr fun regNo(PReg i) = i val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos val afterRemoveDests = minus(current, destSet) local (* In almost all circumstances the destination and sources don't conflict and the same register can be used as a destination and a source. BoxLarge can only store the value after the memory has been allocated. BitFieldInsert has to copy the "destAsSource" value into the destination so cannot use the same register for the "source". *) val postInstruction = case instr of BoxLarge _ => destRegNos @ sourceRegNos | BoxTagFloat _ => destRegNos @ sourceRegNos (* Not sure about this. *) | BitFieldInsert{source, ...} => regNo source :: destRegNos | _ => destRegNos in (* If there is more than one destination they conflict with each other. *) val () = addConflictsTo(postInstruction, listToSet postInstruction); (* Mark conflicts for the destinations, i.e. after the instruction. The destinations conflict with the registers that are used subsequently. *) val () = addConflictsTo(postInstruction, current); val () = addConflictsTo(postInstruction, passThrough); (* Mark conflicts for the sources i.e. before the instruction. *) (* Sources must be set up as conflicts with each other i.e. when we come to allocate registers we must choose different real registers for different abstract registers. *) (* TODO: It may may well no longer be necessary to make sources conflict with each other. It was probably a consequence of the old process of allocating registers from the end back. *) val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos) val () = addConflictsTo(sourceRegNos, afterRemoveDests); val () = addConflictsTo(sourceRegNos, passThrough) end (* I'm not sure if this is needed. There was a check in the old code to ensure that different registers were used for loop variables even if they were actually unused. This may happen anyway. Comment and code copied from X86 version. Retain it for the moment. *) val () = case instr of JumpLoop{regArgs, ...} => let val destRegs = List.foldl(fn ({dst=PReg loopReg, ...}, dests) => loopReg :: dests) [] regArgs in addConflictsTo(destRegs, listToSet destRegs); addConflictsTo(destRegs, current); addConflictsTo(destRegs, passThrough) end | _ => () (* Certain instructions are specific as to the real registers. *) val () = case instr of ReturnResultFromFunction{ returnReg=PReg retReg, results, ... } => (* We can't use any of the registers we're using to carry results, typically X0, to hold the return address. *) List.app(fn (_, xReg) => addRealConflict(retReg, GenReg xReg)) results | RaiseExceptionPacket{ packetReg } => (* This wasn't needed previously because we always pushed the registers across an exception. *) reserveRegister(regNo packetReg, GenReg X0) | BeginHandler { packetReg, ...} => reserveRegister(regNo packetReg, GenReg X0) | FunctionCall { dests, regArgs, ...} => (* This is only needed if we are saving the registers rather than marking them as "must push". *) let val () = List.app(fn (PReg pr, r) => reserveRegister(pr, GenReg r)) dests (* The argument registers also conflict. In order to execute this call we need to load the arguments into specific registers so we can't use them for values that we want after the call. *) val toReserve = X30 :: List.map #2 regArgs in List.app(fn i => List.app(fn r => addRealConflict(i, GenReg r)) toReserve) (setToList passThrough @ setToList afterRemoveDests) end (* We can't use X30 as the result because it's needed for the return addr if we have to GC. *) | AllocateMemoryFixed{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | AllocateMemoryVariable{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | BoxLarge{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | BoxTagFloat{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) (* Could exclude floats on native addr. *) | _ => () in () end (* Process the block. *) fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) = let (* We need to establish conflicts between all the registers active at the end of the block since they may not be established elsewhere. This isn't necessary for an unconditional branch since the same registers will be included in the block that is the target of the branch, possibly along with others. However if this is a conditional or indexed branch we may have different sets at each of the targets and we have to ensure that all the registers differ. *) val united = union(exports, passThrough) val () = addConflictsTo(setToList united, united) val () = List.app (conflictsForInstr passThrough) block in () end val () = Vector.app conflictsForBlock blocks (* Hint values. The idea of hints is that by using a hinted register we may avoid an unnecessary move instruction. realHints is set when a pseudo-register is going to be loaded from a specific register e.g. a register argument, or moved into one e.g. X0 for the function result. friends is set to the other pReg that may be associated with the pReg. Typically this is where we have a merge register that we move some value into. *) val realHints = Array.array(maxPRegs, NONE: reg option) (* Sources and destinations. These indicate the registers that are the sources and destinations of the indexing register and are used as hints. If a register has been allocated for a source or destination we may be able to reuse it. *) val sourceRegs = Array.array(maxPRegs, []: int list) and destinationRegs = Array.array(maxPRegs, []: int list) local (* Real hints. If this is the source of a value e.g. a function argument in a register, we'll use it directly. If, though, this is the result of a function and we want the result to end up in a specific register we want to propagate it to any pReg that moves its value into this. *) fun addRealHint(r, reg) = case Array.sub(realHints, r) of SOME _ => () | NONE => ( (* Add to this pReg *) Array.update(realHints, r, SOME reg); (* and to any other pReg that moves here. *) List.app(fn r => addRealHint(r, reg)) (Array.sub(sourceRegs, r)) ) fun addSourceAndDestinationHint{src, dst} = let val conflicts = Array.sub(regConflicts, src) in (* If they conflict we can't add them. *) if member(dst, conflicts) then () else let val currentDests = Array.sub(destinationRegs, src) val currentSources = Array.sub(sourceRegs, dst) in (* Add the destination for this source i.e. the registers we move this source into. *) if List.exists(fn i => i=dst) currentDests then () else Array.update(destinationRegs, src, dst :: currentDests); (* Add the source to the list of sources for this destination. A merge register may have several sources, a different one for each path. If the destination has a real hint we want to propagate that back. That isn't needed for the destinations because we allocate the registers from the start forward. *) if List.exists(fn i => i=src) currentSources then () else let val sources = src :: currentSources val () = Array.update(sourceRegs, dst, sources) in case Array.sub(realHints, dst) of NONE => () | SOME real => List.app(fn r => addRealHint(r, real)) sources end end end (* Add the hints to steer the register allocation. The idea is to avoid moves between registers by getting values into the appropriate register in advance. We don't actually need to add real hints where the real register is providing the value, e.g. BeginFunction, because the allocation process will take care of that. *) fun addHints{instr=MoveRegister{source=PReg sreg, dest=PReg dreg, ...}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints{instr=BitFieldInsert{destAsSource=PReg dsReg, dest=PReg dReg, ...}, ...} = (* The "destAsSource" is the destination if some bits are retained. *) addSourceAndDestinationHint {src=dsReg, dst=dReg} | addHints{instr=ReturnResultFromFunction { results, fpResults, ... }, ...} = ( List.app(fn(PReg pr, r) => addRealHint(pr, GenReg r)) results; List.app(fn(PReg pr, r) => addRealHint(pr, FPReg r)) fpResults ) | addHints{instr=JumpLoop{regArgs, ...}, ...} = let fun addRegArg {src=ArgInReg(PReg argReg), dst=PReg resReg} = addSourceAndDestinationHint {dst=resReg, src=argReg} | addRegArg {src=ArgOnStack _, ...} = () in List.app addRegArg regArgs end | addHints{instr=BeginFunction{regArgs, fpRegArgs, ...}, ...} = ( List.app (fn (PReg pr, reg) => addRealHint(pr, GenReg reg)) regArgs; List.app (fn (PReg pr, reg) => addRealHint(pr, FPReg reg)) fpRegArgs ) | addHints{instr=TailRecursiveCall{regArgs, fpRegArgs, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app setHint regArgs; List.app (fn(PReg pr, reg) => addRealHint(pr, FPReg reg)) fpRegArgs end | addHints{instr=FunctionCall{regArgs, dests, fpRegArgs, fpDests, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app(fn(PReg pr, r) => addRealHint(pr, GenReg r)) dests; List.app(fn(PReg pr, r) => addRealHint(pr, FPReg r)) fpDests; List.app setHint regArgs; List.app (fn(PReg pr, reg) => addRealHint(pr, FPReg reg)) fpRegArgs end (* Exception packets are in X0 *) | addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints{instr=BeginHandler{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints{instr=UnaryFloatingPt{source=PReg sreg, dest=PReg dreg, fpOp=MoveFloat}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints{instr=UnaryFloatingPt{source=PReg sreg, dest=PReg dreg, fpOp=MoveDouble}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints _ = () in val () = Vector.app(fn ExtendedBasicBlock { block, ...} => List.app addHints block) blocks end val failures = ref []: intSet list ref (* Turn the abstract icode into a concrete version by allocating the registers. *) local fun asGenReg(GenReg reg) = reg | asGenReg _ = raise InternalError "asGenReg" and asFPReg(FPReg reg) = reg | asFPReg _ = raise InternalError "asFPReg" datatype cacheItem = CacheStack of {cached: stackLocn, cachedIn: xReg } (* A value loaded from the stack. *) | CacheAbsAddress of {cached: preg, cachedIn: xReg } (* 32-in-64: An absolute address from an object ID *) | CacheAbsAddrOnStack of {cached: stackLocn, cachedIn: xReg } (* 32-in-64: An absolute address from an object loaded from the stack. *) | CacheLargeUnbox of {cached: preg, cachedIn: xReg } (* A LargeWord.word value has been unboxed *) | CacheRealUnbox of {cached: preg, cachedIn: vReg, floatSize: floatSize } (* A double or float has been unboxed. *) fun regCachedIn(CacheStack{cachedIn, ...}) = GenReg cachedIn | regCachedIn(CacheAbsAddress{cachedIn, ...}) = GenReg cachedIn | regCachedIn(CacheAbsAddrOnStack{cachedIn, ...}) = GenReg cachedIn | regCachedIn(CacheLargeUnbox{cachedIn, ...}) = GenReg cachedIn | regCachedIn(CacheRealUnbox{cachedIn, ...}) = FPReg cachedIn val allocatedRegs = Array.array(maxPRegs, NONE: reg option) (* Find a real register for a preg. 1. If a register is already allocated use that. 2. Try the "preferred" register if one has been given. 3. Try the realHints value if there is one. 4. See if there is a "friend" that has an appropriate register 5. Look at all the registers and find one. *) fun findRegister(r, pref, regSet, cache: cacheItem list) = case Array.sub(allocatedRegs, r) of SOME reg => reg | NONE => let val conflicts = Array.sub(regConflicts, r) and realConflicts = Array.sub(regRealConflicts, r) (* Find the registers we've already allocated that may conflict. *) val conflictingRegs = List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @ realConflicts fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs) fun tryAReg NONE = NONE | tryAReg (somePref as SOME prefReg) = if isFree prefReg then (Array.update(allocatedRegs, r, somePref); somePref) else NONE (* Search the sources and destinations to see if a register has already been allocated or there is a hint. *) fun findAFriend([], [], _) = NONE | findAFriend(aDest :: otherDests, sources, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aDest) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aDest)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the destinations of the destinations to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest)) in findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen) end end | findAFriend([], aSrc :: otherSrcs, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aSrc) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aSrc)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the sources of the sources to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc)) in findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen) end end in case tryAReg pref of SOME r => r | NONE => ( case tryAReg (Array.sub(realHints, r)) of SOME r => r | NONE => ( case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of SOME r => r (* Look through the registers to find one that's free. First try excluding the cache registers. *) | NONE => let (* First try filtering all the cache registers to see if we can find a register. If not see if it works by freeing one. *) fun filterCache(filteredRegset, []) = List.find isFree filteredRegset | filterCache(filteredRegset, cacheItem :: cache) = let val cReg = regCachedIn cacheItem in case filterCache(List.filter(fn r => r <> cReg) filteredRegset, cache) of NONE => if isFree cReg then SOME cReg else NONE | result => result end val pick = case filterCache(regSet, cache) of SOME reg => reg | NONE => ( (* This failed. We're going to have to spill something. *) failures := conflicts :: ! failures; hd regSet (* Return something to allow this pass to complete *) ) val () = Array.update(allocatedRegs, r, SOME pick) in pick end ) ) end (* Cache hints: Try to use the same register for values that can be cached. This increases the chances that we will be able to retain the cache when we merge different branches. *) val cacheHints = Array.array(maxPRegs, NONE: reg option) (* Remove any reference to newly allocated registers from the cache. Also used after block move and comparison that modify registers *) fun pruneCache(reg: reg, cache) = List.filter(fn c => regCachedIn c <> reg) cache (* Return the cache registers that contain valid addresses. N.B. We must only save registers that contain properly tagged values together, on compact 32-bit, with absolute addresses. All registers are saved across a GC but only these registers are updated. *) local fun hasAddress(CacheStack{cachedIn, ...}) = SOME cachedIn | hasAddress(CacheAbsAddress{cachedIn, ...}) = SOME cachedIn | hasAddress(CacheAbsAddrOnStack{cachedIn, ...}) = SOME cachedIn | hasAddress(CacheLargeUnbox _) = NONE (* The value was untagged. *) | hasAddress(CacheRealUnbox _) = NONE (* Untagged value in a FP register. *) in val cachedAddressRegs = List.mapPartial hasAddress end (* Merge the cache states *) fun mergeCacheStates ([]: cacheItem list list) = []: cacheItem list | mergeCacheStates [single] = single | mergeCacheStates (many as first :: rest) = let (* Generally we will either be unable to merge and have an empty cache or will have just one or two entries. *) (* Find the shortest. If it's empty we're done. *) fun findShortest(_, [], _) = [] | findShortest(_, shortest, []) = shortest | findShortest(len, shortest, hd::tl) = let val hdLen = List.length hd in if hdLen < len then findShortest(hdLen, hd, tl) else findShortest(len, shortest, tl) end val shortest = findShortest(List.length first, first, rest) (* Find the item we're caching for. If it is in a different register we can't use it. *) fun findItem search (hd::tl) = search = hd orelse findItem search tl | findItem _ [] = false (* It's present if it's in all the sources. *) fun present search = List.all(findItem search) many val filtered = List.foldl (fn (search, l) => if present search then search :: l else l) [] shortest in filtered end fun allocateNewDestination(PReg r, pref, regSet, cacheList) = case Array.sub(allocatedRegs, r) of SOME reg => ( case Vector.sub(regProps, r) of RegPropMultiple => (reg, pruneCache(reg, cacheList)) (* This is allowed for merge registers *) | _ => raise InternalError "Register defined at multiple points" ) | NONE => let val reg = findRegister(r, pref, regSet, cacheList) in (reg, pruneCache(reg, cacheList)) end fun allocateGenReg(r, hint, cache) = let val (reg, newCache) = allocateNewDestination(r, hint, generalRegisters, cache) in (asGenReg reg, newCache) end and allocateFPReg(r, hint, cache) = let val (reg, newCache) = allocateNewDestination(r, hint, floatingPtRegisters, cache) in (asFPReg reg, newCache) end and allocateGenRegOrZero(ZeroReg, _, cache) = (XZero, cache) | allocateGenRegOrZero(SomeReg reg, hint, cache) = allocateGenReg(reg, hint, cache) fun getAllocatedGenReg(PReg r) = case Array.sub(allocatedRegs, r) of SOME(GenReg reg) => reg | _ => raise InternalError "getAllocatedGenReg" and getAllocatedFPReg(PReg r) = case Array.sub(allocatedRegs, r) of SOME(FPReg reg) => reg | _ => raise InternalError "getAllocatedFPReg" fun getAllocatedGenRegOrZero ZeroReg = XZero | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg fun getAllocatedArg(ArgInReg reg) = ArgInReg(getAllocatedGenReg reg) | getAllocatedArg(ArgOnStack stackLoc) = ArgOnStack stackLoc val getSaveRegs = List.map getAllocatedGenReg (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl (*and snd <@> fst = fst @ snd*) fun absToConcrete([], context, code) = (context, code) | absToConcrete({instr=MoveRegister{ source, dest}, ...} :: rest, cache, code) = let (* Try to use the register we've allocated for the source as the destination so that we can eliminate this instruction altogether. *) val sourceReg = getAllocatedGenReg source val (destReg, newCache) = allocateGenReg(dest, SOME(GenReg sourceReg), cache) in if sourceReg = destReg then absToConcrete(rest, newCache, code) else absToConcrete(rest, newCache, code <::> MoveRegister { source=sourceReg, dest=destReg}) end | absToConcrete({instr=LoadNonAddressConstant { dest, source}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadNonAddressConstant { dest=destReg, source=source}) end | absToConcrete({instr=LoadFPConstant { dest, source, floatSize }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadFPConstant { dest=destReg, source=source, floatSize=floatSize}) end | absToConcrete({instr=LoadAddressConstant { dest, source}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadAddressConstant { dest=destReg, source=source}) end | absToConcrete({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, loadType=loadType}) end | absToConcrete({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadFPWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, floatSize=floatSize}) end | absToConcrete({instr=LoadWithIndexedOffset { base, dest, index, loadType, signExtendIndex}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index, loadType=loadType, signExtendIndex=signExtendIndex}) end | absToConcrete({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize, signExtendIndex}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadFPWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index, floatSize=floatSize, signExtendIndex=signExtendIndex}) end | absToConcrete({instr=GetThreadId { dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> GetThreadId { dest=destReg}) end | absToConcrete({instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest=destOiA as PReg doia}, kill, ...} :: rest, cache, code) = let (* See if this is in the cache and use it if it is. If this is the last reference to this source entry we don't want it in the cache any longer. *) val killThis = member(srcNo, kill) val (newCode, dReg, newCache, next) = case List.find(fn (CacheAbsAddress{cached, ...}) => cached=source | _ => false) cache of SOME (CacheAbsAddress{cachedIn, ...}) => let (* Try to use the cache register as the destination if we can. *) val (destReg, newCache) = allocateNewDestination(destOiA, SOME(GenReg cachedIn), generalRegisters, cache) val dReg = asGenReg destReg in - if checkCache - then (code <::> MoveRegister{source=cachedIn, dest=X17} <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=dReg} - <::> CacheCheck{ arg1=dReg, arg2=X17 }, - dReg, if killThis then pruneCache(GenReg cachedIn, newCache) else newCache, rest) - else if dReg = cachedIn + if dReg = cachedIn then (code, dReg, newCache, rest) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=cachedIn, dest=dReg}, dReg, if killThis then pruneCache(GenReg cachedIn, newCache) else newCache, rest) end | _ => (* If this is the last reference and the next instruction is loading with a zero offset we can use an indexed load and avoid converting to an absolute address. If this is not the last reference it's likely that we're loading another field so it's probably better to convert the object index and cache it. We might manage to use a load-pair instruction. *) ( case (killThis, rest) of (true, {instr=LoadWithConstantOffset{ byteOffset=0, loadType=Load32, base, dest=destLoad, ... }, kill=killLoad, ...} :: next) => if base = destOiA (* of objectindex *) andalso member(doia, killLoad) then let val (destReg, newCache) = allocateGenReg(destLoad, NONE, cache) in (code <::> LoadWithIndexedOffset{ base=X24(*X_Base32in64*), dest=destReg, index=getAllocatedGenReg source, loadType=Load32, signExtendIndex=false }, destReg, newCache, next) end else let val (destReg, newCache) = allocateGenReg(destOiA, Array.sub(cacheHints, srcNo), cache) in (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, destReg, newCache, rest) end | _ => let val (destReg, newCache) = allocateGenReg(destOiA, Array.sub(cacheHints, srcNo), cache) in (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, destReg, newCache, rest) end ) val () = if killThis then () else Array.update(cacheHints, srcNo, SOME(GenReg dReg)) in absToConcrete(next, if killThis then newCache else CacheAbsAddress{cached=source, cachedIn=dReg} :: newCache, newCode) end | absToConcrete({instr=AbsoluteToObjectIndex { source, dest}, ...} :: rest, cache, code) = let (* Don't make an entry in the cache for this; it won't be used. *) val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AbsoluteToObjectIndex { source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=AllocateMemoryFixed { bytesRequired, dest, saveRegs }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> AllocateMemoryFixed { dest=destReg, bytesRequired=bytesRequired, saveRegs=saved}) end | absToConcrete({instr=AllocateMemoryVariable{size, dest, saveRegs}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> AllocateMemoryVariable{size=getAllocatedGenReg size, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=InitialiseMem{size, addr, init}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> InitialiseMem{size=getAllocatedGenReg size, addr=getAllocatedGenReg addr, init=getAllocatedGenReg init}) | absToConcrete({instr=BeginLoop, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> BeginLoop) | absToConcrete({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...} :: rest, context, code) = let fun getStackArg{src, wordOffset, stackloc} = {src=getAllocatedArg src, wordOffset=wordOffset, stackloc=stackloc} and getRegArg{src, dst} = {src=getAllocatedArg src, dst=getAllocatedGenReg dst} in absToConcrete(rest, context, code <::> JumpLoop{ regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, checkInterrupt=Option.map getSaveRegs checkInterrupt}) end | absToConcrete({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, byteOffset=byteOffset, loadType=loadType}) | absToConcrete({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreFPWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, byteOffset=byteOffset, floatSize=floatSize}) | absToConcrete({instr=StoreWithIndexedOffset { base, source, index, loadType, signExtendIndex}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, index=getAllocatedGenReg index, loadType=loadType, signExtendIndex=signExtendIndex}) | absToConcrete({instr=StoreFPWithIndexedOffset { base, source, index, floatSize, signExtendIndex}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreFPWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, index=getAllocatedGenReg index, floatSize=floatSize, signExtendIndex=signExtendIndex}) | absToConcrete({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AddSubImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length}) end | absToConcrete({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AddSubRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=destReg, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift}) end | absToConcrete({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LogicalImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef, immed=immed, logOp=logOp, length=length}) end | absToConcrete({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LogicalRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=destReg, ccRef=ccRef, logOp=logOp, length=length, shift=shift}) end | absToConcrete({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ShiftRegister { source=getAllocatedGenReg source, shift=getAllocatedGenReg shift, dest=destReg, direction=direction, opSize=opSize}) end | absToConcrete({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> Multiplication { kind=kind, sourceA=getAllocatedGenRegOrZero sourceA, sourceM=getAllocatedGenReg sourceM, sourceN=getAllocatedGenReg sourceN, dest=destReg}) end | absToConcrete({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> Division { isSigned=isSigned, dividend=getAllocatedGenReg dividend, divisor=getAllocatedGenReg divisor, dest=destReg, opSize=opSize}) end | absToConcrete({instr=BeginFunction {regArgs, fpRegArgs, stackArgs}, ...} :: rest, _, code) = let (* Allocate the register arguments. At this point all the registers are free and the cache is empty. However we may have a "real conflict" that means that the allocated register is different. e.g. we need this argument some time after an arbitrary precision operation that may call a function. *) fun allocReg(src, dst) = let val (destReg, _) = allocateNewDestination(src, SOME(GenReg dst), generalRegisters, []) in (asGenReg destReg, dst) end fun allocFPReg(src, dst) = let val (destReg, _) = allocateNewDestination(src, SOME(FPReg dst), floatingPtRegisters, []) in (asFPReg destReg, dst) end in absToConcrete(rest, [], code <::> BeginFunction {regArgs=map allocReg regArgs, fpRegArgs=map allocFPReg fpRegArgs, stackArgs=stackArgs}) end | absToConcrete({instr=FunctionCall{callKind, regArgs, stackArgs, dests, fpRegArgs, fpDests, containers, saveRegs, ...}, ...} :: rest, _, code) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) and getFPRegArg(src, dst) = (getAllocatedFPReg src, dst) fun getResult allocReg (preg, reg) = let (* We empty the cache at this point. *) val (newReg, _) = allocReg(preg, NONE, []) in (newReg, reg) end in absToConcrete(rest, [] (* Empty after a function call. *), code <::> FunctionCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getAllocatedArg stackArgs, dests=map (getResult allocateGenReg) dests, fpRegArgs=map getFPRegArg fpRegArgs, fpDests=map (getResult allocateFPReg) fpDests, saveRegs=getSaveRegs saveRegs, containers=containers}) end | absToConcrete({instr=TailRecursiveCall{callKind, regArgs, stackArgs, fpRegArgs, stackAdjust, currStackSize}, ...} :: rest, context, code) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) and getStackArg{src, stack} = {src=getAllocatedArg src, stack=stack} and getFPRegArg(src, dst) = (getAllocatedFPReg src, dst) in absToConcrete(rest, context, code <::> TailRecursiveCall{ callKind=callKind, regArgs=map getRegArg regArgs, fpRegArgs=map getFPRegArg fpRegArgs, stackArgs=map getStackArg stackArgs, stackAdjust=stackAdjust, currStackSize=currStackSize}) end | absToConcrete({instr=ReturnResultFromFunction{results, fpResults, returnReg, numStackArgs}, ...} :: rest, context, code) = let fun getResult(preg, reg) = (getAllocatedGenReg preg, reg) and getFPResult(preg, reg) = (getAllocatedFPReg preg, reg) in absToConcrete(rest, context, code <::> ReturnResultFromFunction{results=map getResult results, fpResults=map getFPResult fpResults, returnReg=getAllocatedGenReg returnReg, numStackArgs=numStackArgs}) end | absToConcrete({instr=RaiseExceptionPacket{packetReg}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> RaiseExceptionPacket{packetReg=getAllocatedGenReg packetReg}) | absToConcrete({instr=PushToStack{ source, container as StackLoc{size, rno}, copies }, ...} :: rest, cache, code) = let val srcReg = getAllocatedGenReg source val newCache = if size = 1 then CacheStack{cached=container, cachedIn=srcReg} :: cache else cache val () = Array.update(cacheHints, rno, SOME(GenReg srcReg)) in absToConcrete(rest, newCache, code <::> PushToStack{source=srcReg, container=container, copies=copies}) end | absToConcrete({instr=LoadStack{ dest=destLoad, container as StackLoc{rno, ...} , field=0, wordOffset}, kill, ...} :: (restPlusOia as {instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest=destOia}, kill=killOia, ...} :: rest), cache, code) = (* If a preg has been pushed to the stack every subsequent reference will be via the stack. If we want to be able to cache object index to absolute addresses for them we have to recognise this combination. *) (* They could be unrelated in which case process the LoadStack and then the ObjectIndex... It seems there are also rare circumstances(??) where the result of the load is not killed and so would have to be preserved. *) if destLoad = source andalso member(srcNo, killOia) then let val killThis = member(rno, kill) (* Is it the last reference to the stack entry? *) val (newCode, destReg, newCache) = case List.find(fn CacheAbsAddrOnStack{cached, ...} => cached=container | _ => false) cache of SOME (CacheAbsAddrOnStack{cachedIn, ...}) => let (* Try to use the cache register as the destination if we can. *) val (destReg, newCache) = allocateNewDestination(destOia, SOME(GenReg cachedIn), generalRegisters, cache) val dReg = asGenReg destReg in - if checkCache - then (code <::> MoveRegister{source=cachedIn, dest=X17} <::> LoadStack{ dest=X16, container=container, field=0, wordOffset=wordOffset } <::> - ObjectIndexAddressToAbsolute { source=X16, dest=dReg} <::> CacheCheck{ arg1=dReg, arg2=X17 }, dReg, - if killThis then pruneCache(GenReg cachedIn, newCache) else newCache) - else if dReg = cachedIn + if dReg = cachedIn then (code, dReg, newCache) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=cachedIn, dest=dReg}, dReg, if killThis then pruneCache(GenReg cachedIn, newCache) else newCache) end | _ => (* It's not in the cache - load it which could be cached. *) let val (cachePostLoad, loadCode) = processLoadStack(destLoad, container, wordOffset, kill, cache, code) val (destReg, cachePlusOia) = allocateGenReg(destOia, Array.sub(cacheHints, rno), cachePostLoad) in (loadCode <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, destReg, cachePlusOia) end val () = if killThis then () else Array.update(cacheHints, rno, SOME(GenReg destReg)) in absToConcrete(rest, if killThis then newCache else CacheAbsAddrOnStack{cached=container, cachedIn=destReg} :: newCache, newCode) end else (* Can't combine these. *) let val (newCache, newCode) = processLoadStack(destLoad, container, wordOffset, kill, cache, code) in absToConcrete(restPlusOia, newCache, newCode) end | absToConcrete({instr=LoadStack{ dest, container, wordOffset, field=0, ...}, kill, ...} :: rest, cache, code) = let val (newCache, newCode) = processLoadStack(dest, container, wordOffset, kill, cache, code) in absToConcrete(rest, newCache, newCode) end | absToConcrete({instr=LoadStack{ dest, container, field, wordOffset}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadStack{ dest=destReg, container=container, field=field, wordOffset=wordOffset }) end | absToConcrete({instr=StoreToStack{source, container, field, stackOffset}, ...} :: rest, cache, code) = (* We may have cached the original push that cleared the container. We could cache this since it now contains the entry but it's probably better to deal with multiple results at a higher level. *) let val sReg = getAllocatedGenReg source val newCache = List.filter( fn CacheStack{cached, ...} => cached <> container | CacheAbsAddrOnStack{cached, ...} => cached <> container | _ => true) cache in absToConcrete(rest, newCache, code <::> StoreToStack{source=sReg, container=container, field=field, stackOffset=stackOffset}) end | absToConcrete({instr=ContainerAddress{ dest, container, stackOffset}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ContainerAddress{ dest=destReg, container=container, stackOffset=stackOffset }) end | absToConcrete({instr=ResetStackPtr {numWords}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> ResetStackPtr {numWords=numWords}) | absToConcrete({instr=TagValue{source, dest, isSigned, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> TagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize}) end | absToConcrete({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> UntagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize}) end | absToConcrete({instr=BoxLarge{source, dest, saveRegs, ...}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> BoxLarge{source=getAllocatedGenReg source, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=UnboxLarge{source as PReg rno, dest}, kill, ...} :: rest, cache, code) = (* Unboxing a large word. We cache this in case we need to unbox it again. *) let val killThis = member(rno, kill) in case List.find(fn CacheLargeUnbox{cached, ...} => cached=source | _ => false) cache of SOME(CacheLargeUnbox{cachedIn, ...}) => let val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg cachedIn), generalRegisters, cache) val dReg = asGenReg destReg in absToConcrete(rest, if killThis then pruneCache(GenReg cachedIn, newCache) else newCache, code <::> MoveRegister { source=cachedIn, dest=dReg}) end | _ => let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, if killThis then newCache else CacheLargeUnbox{cached=source, cachedIn=destReg} :: newCache, code <::> UnboxLarge{source=getAllocatedGenReg source, dest=destReg}) end end | absToConcrete({instr=BoxTagFloat{floatSize, source, dest, saveRegs}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=UnboxTagFloat{floatSize, source as PReg rno, dest}, kill, ...} :: rest, cache, code) = (* Unboxing a large word. We cache this in case we need to unbox it again. *) let val killThis = member(rno, kill) in case List.find(fn CacheRealUnbox{cached=s,floatSize=f, ...}=> s=source andalso f=floatSize | _ => false) cache of SOME (CacheRealUnbox{cachedIn, ...}) => let val (destReg, newCache) = allocateNewDestination(dest, SOME (FPReg cachedIn), floatingPtRegisters, cache) val dReg = asFPReg destReg in absToConcrete(rest, if killThis then pruneCache(FPReg cachedIn, newCache) else newCache, code <::> UnaryFloatingPt { source=cachedIn, dest=dReg, fpOp=case floatSize of Float32=>MoveFloat | Double64=>MoveDouble}) end | _ => let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, if killThis then newCache else CacheRealUnbox{cached=source, floatSize=floatSize, cachedIn=destReg} :: newCache, code <::> UnboxTagFloat{floatSize=floatSize, source=getAllocatedGenReg source, dest=destReg}) end end | absToConcrete({instr=LoadAcquire { base, dest, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadAcquire { base=getAllocatedGenReg base, dest=destReg, loadType=loadType}) end | absToConcrete({instr=StoreRelease { base, source, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreRelease{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, loadType=loadType}) | absToConcrete({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> BitFieldShift { source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, immr=immr, imms=imms, length=length}) end | absToConcrete({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...} :: rest, cache, code) = let val destAsSourceReg = getAllocatedGenReg destAsSource val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg destAsSourceReg), generalRegisters, cache) in absToConcrete(rest, newCache, code <::> BitFieldInsert { source=getAllocatedGenReg source, destAsSource=destAsSourceReg, dest=asGenReg destReg, immr=immr, imms=imms, length=length}) end | absToConcrete({instr=IndexedCaseOperation{testReg}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> IndexedCaseOperation{testReg=getAllocatedGenReg testReg}) | absToConcrete({instr=PushExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PushExceptionHandler) | absToConcrete({instr=PopExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PopExceptionHandler) | absToConcrete({instr=BeginHandler{packetReg}, ...} :: rest, _, code) = let (* The cache is undefined at the start of a handler. *) val (destReg, newCache) = allocateGenReg(packetReg, NONE, []) in absToConcrete(rest, newCache, code <::> BeginHandler{packetReg=destReg}) end | absToConcrete({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...} :: rest, cache, code) = let (* This instruction modifies these registers so they must be removed from the cache *) val vec1Reg = getAllocatedGenReg vec1Addr and vec2Reg = getAllocatedGenReg vec2Addr and lenReg = getAllocatedGenReg length val newCache = pruneCache(GenReg vec1Reg, pruneCache(GenReg vec2Reg, pruneCache(GenReg lenReg, cache))) in absToConcrete(rest, newCache, code <::> CompareByteVectors{vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lenReg, ccRef=ccRef}) end | absToConcrete({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...} :: rest, cache, code) = let (* This instruction modifies these registers so they must be removed from the cache *) val srcAReg = getAllocatedGenReg srcAddr and dstAReg = getAllocatedGenReg destAddr and lenReg = getAllocatedGenReg length val newCache = pruneCache(GenReg srcAReg, pruneCache(GenReg dstAReg, pruneCache(GenReg lenReg, cache))) in absToConcrete(rest, newCache, code <::> BlockMove{srcAddr=srcAReg, destAddr=dstAReg, length=lenReg, isByteMove=isByteMove}) end | absToConcrete({instr=AddSubXSP{source, dest, isAdd}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AddSubXSP { source=getAllocatedGenReg source, dest=destReg, isAdd=isAdd}) end | absToConcrete({instr=TouchValue{source}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> TouchValue { source=getAllocatedGenReg source}) | absToConcrete({instr=LoadAcquireExclusive{ base, dest }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadAcquireExclusive { base=getAllocatedGenReg base, dest=destReg}) end | absToConcrete({instr=StoreReleaseExclusive{ base, source, result }, ...} :: rest, cache, code) = let val (resultReg, newCache) = allocateGenReg(result, NONE, cache) in absToConcrete(rest, newCache, code <::> StoreReleaseExclusive{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source, result=resultReg}) end | absToConcrete({instr=MemoryBarrier, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> MemoryBarrier) | absToConcrete({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ConvertIntToFloat{ source=getAllocatedGenReg source, dest=destReg, srcSize=srcSize, destSize=destSize}) end | absToConcrete({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ConvertFloatToInt{ source=getAllocatedFPReg source, dest=destReg, srcSize=srcSize, destSize=destSize, rounding=rounding}) end | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp=MoveFloat}, ...} :: rest, cache, code) = (* Register-register moves can be eliminated if the same resgister is used. *) let val srcReg = getAllocatedFPReg source val (destReg, newCache) = allocateFPReg(dest, SOME(FPReg srcReg), cache) in absToConcrete(rest, newCache, if destReg = srcReg then code else code <::> UnaryFloatingPt{ source=srcReg, dest=destReg, fpOp=MoveFloat}) end | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp=MoveDouble}, ...} :: rest, cache, code) = let val srcReg = getAllocatedFPReg source val (destReg, newCache) = allocateFPReg(dest, SOME(FPReg srcReg), cache) in absToConcrete(rest, newCache, if destReg = srcReg then code else code <::> UnaryFloatingPt{ source=srcReg, dest=destReg, fpOp=MoveDouble}) end | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> UnaryFloatingPt{ source=getAllocatedFPReg source, dest=destReg, fpOp=fpOp}) end | absToConcrete({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> BinaryFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, dest=destReg, fpOp=fpOp, opSize=opSize}) end | absToConcrete({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> CompareFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, opSize=opSize, ccRef=ccRef}) | absToConcrete({instr=CPUYield, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> CPUYield) | absToConcrete({instr=AtomicOperation{ base, source, dest, atOp }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AtomicOperation{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source, dest=destReg, atOp=atOp }) end - | absToConcrete({instr=CacheCheck _, ...} :: _, _, _) = (* Concrete only. *) - raise InternalError "absToConcrete: CheckCache" - (* LoadStack. *) and processLoadStack(dest, container as StackLoc{rno, ...}, wordOffset, kill, cache, code) = let (* See if this is in the cache and use it if it is. If this is the last reference to this stack entry we don't want it in the cache any longer. *) val killThis = member(rno, kill) val (newCode, destReg: xReg, newCache) = case List.find(fn CacheStack{cached=c, ...} => c=container | _ => false) cache of SOME (CacheStack{cachedIn, ...}) => let val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg cachedIn), generalRegisters, cache) val dReg = asGenReg destReg in - if checkCache - then (code <::> MoveRegister{source=cachedIn, dest=X17} <::> LoadStack{ dest=dReg, container=container, field=0, wordOffset=wordOffset } - <::> CacheCheck{ arg1=dReg, arg2=X17 }, dReg, if killThis then pruneCache(GenReg cachedIn, newCache) else newCache) - else if dReg = cachedIn + if dReg = cachedIn then (code, dReg, newCache) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=cachedIn, dest=dReg}, dReg, if killThis then pruneCache(GenReg cachedIn, newCache) else newCache) end | _ => let val (destReg, newCache) = allocateGenReg(dest, Array.sub(cacheHints, rno), cache) in (code <::> LoadStack{ dest=destReg, container=container, field=0, wordOffset=wordOffset }, destReg, newCache) end val () = if killThis then () else Array.update(cacheHints, rno, SOME(GenReg destReg)) in (if killThis then newCache else CacheStack{cached=container, cachedIn=destReg} :: newCache, newCode) end in fun concreteBlock(ExtendedBasicBlock{ block, ...}, inputCache) = let val (cache, code) = absToConcrete(block, inputCache, []) in {cache=cache, code=List.rev code} end val mergeCacheStates = mergeCacheStates end val numBlocks = Vector.length blocks (* The results. The cache state is initialised to empty so that if we have a loop we will end up with an empty input cache. *) val resultArray = Array.array(numBlocks, {code=[], cache=[]}) (* Process the blocks in execution order so that normally we will be able to propagate the cache states. If we have a loop the input cache state will be empty because the output cache state for an unprocessed block is empty. *) (* Get the blocks that are inputs for each one. *) local val blockRefs = Array.array(numBlocks, []) (* The successors of this block but only including handlers in SetHandler. *) fun directSuccessors ExitCode = [] | directSuccessors(IndexedBr cases) = cases | directSuccessors(Unconditional dest) = [dest] | directSuccessors(Conditional {trueJump, falseJump, ...}) = [falseJump, trueJump] | directSuccessors(SetHandler { handler, continue }) = [handler, continue] | directSuccessors(UnconditionalHandle _) = [] | directSuccessors(ConditionalHandle { continue, ...}) = [continue] fun setReferences fromBlock = let val ExtendedBasicBlock{ flow, ...} = Vector.sub(blocks, fromBlock) val refs = directSuccessors flow fun setRefs toBlock = let val oldRefs = Array.sub(blockRefs, toBlock) in Array.update(blockRefs, toBlock, fromBlock :: oldRefs); if null oldRefs then setReferences toBlock else () end in List.app setRefs refs end val () = setReferences 0 in val directSuccessors = directSuccessors val blockRefs = blockRefs end val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) fun processBlocks (toDo: int list) = case List.filter (fn n => not(haveProcessed n)) toDo of [] => () | stillToDo as head :: _ => let (* Try to find a block all of whose predecessors have been processed. That increases the chances that we will have cached items. *) fun available dest = List.all haveProcessed (Array.sub(blockRefs, dest)) val blockNo = case List.find available stillToDo of SOME c => c | NONE => head val thisBlock as ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, blockNo) (* Get the input cache state. Take the list of output caches of everything that jumps here and produce the intersection. *) val inputCacheList = List.map (fn n => #cache(Array.sub(resultArray, n))) (Array.sub(blockRefs, blockNo)) val inputCache = mergeCacheStates inputCacheList (* Process this block and add it to the results. *) val () = Array.update(processed, blockNo, true) val () = Array.update(resultArray, blockNo, concreteBlock(thisBlock, inputCache)) (* Add the successors but with handlers only included in SetHandler. *) val addSet = directSuccessors flow in processBlocks(addSet @ stillToDo) end in processBlocks [0]; (* If the failures list is empty we succeeded. *) case !failures of [] => (* Return the allocation vector. We may have unused registers, *) AllocateSuccess( Vector.mapi(fn (i, ExtendedBasicBlock{ flow, ...}) => BasicBlock{block= #code(Array.sub(resultArray, i)), flow=flow}) blocks ) (* Else we'll have to spill something. *) | l => AllocateFailure l end val nGenRegs = List.length generalRegisters structure Sharing = struct type intSet = intSet and extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg and xReg = xReg and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML index baa58148..4ce9456e 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML @@ -1,1028 +1,1022 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICode( structure Arm64Code: ARM64PREASSEMBLY ): ARM64ICODE = struct open Arm64Code open Address datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype reg = GenReg of xReg | FPReg of vReg datatype callKind = Recursive | ConstantCode of machineWord | FullCall (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Floating point constant *) | LoadFPConstant of { source: Word64.word, dest: 'fpReg, floatSize: floatSize } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The results are stored in the result registers, usually just X0. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, fpDests: ('fpReg * vReg) list, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, fpRegArgs: ('fpReg * vReg) list, stackAdjust: int, currStackSize: int } (* Return from the function. resultRegs are the registers containing the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of { results: ('genReg * xReg) list, fpResults: ('fpReg * vReg) list, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } (* Yield control during a spin-lock. *) | CPUYield (* Atomic operations added for ARM 8.1 *) | AtomicOperation of { base: 'genReg, source: 'optGenReg, dest: 'optGenReg, atOp: atomicOp } - (* Debugging - fault if values don't match. *) - | CacheCheck of { arg1: 'genReg, arg2: 'genReg } - (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock (* Return the list of blocks that are the immediate successor of this. *) fun successorBlocks(Unconditional l) = [l] | successorBlocks(Conditional{trueJump, falseJump, ...}) = [trueJump, falseJump] | successorBlocks ExitCode = [] | successorBlocks(IndexedBr cases) = cases | successorBlocks(SetHandler{handler, continue, ...}) = [handler, continue] (* We only need "handler" in SetHandler because we may have a handler that is never actually jumped to. *) | successorBlocks(UnconditionalHandle handler) = [handler] | successorBlocks(ConditionalHandle{handler, continue, ...}) = [handler, continue] local fun printCC(CcRef ccRef, stream) = stream ("CC" ^ Int.toString ccRef) fun printStackLoc(StackLoc{size, rno}, stream) = (stream "S"; stream(Int.toString rno); stream "("; stream(Int.toString size); stream ")") fun regRepr(XReg w) = "X" ^ Int.toString(Word8.toInt w) | regRepr XZero = "XZ" | regRepr XSP = "SP" and vRegRepr(VReg v) = "V" ^ Int.toString(Word8.toInt v) fun arithRepr OpSize64 = "64" | arithRepr OpSize32 = "32" fun printLoadType(Load64, stream) = stream "64" | printLoadType(Load32, stream) = stream "32" | printLoadType(Load16, stream) = stream "16" | printLoadType(Load8, stream) = stream "8" fun printSaves([], _, _) = () | printSaves([areg], _, printReg) = printReg areg | printSaves(areg::more, stream, printReg) = (printReg areg; stream ","; printSaves(more, stream, printReg)) fun printArg(ArgInReg reg, _, printReg) = printReg reg | printArg(ArgOnStack{wordOffset, container, field, ...}, stream, _) = ( printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")" ) fun printShift(ShiftLSL w, stream) = stream(" LSL " ^ Word8.toString w) | printShift(ShiftLSR w, stream) = stream(" LSR " ^ Word8.toString w) | printShift(ShiftASR w, stream) = stream(" ASR " ^ Word8.toString w) | printShift(ShiftNone, _) = () fun printFloatSize(Float32, stream) = stream "F" | printFloatSize(Double64, stream) = stream "D" fun printICode {stream, printGenReg, ...} (MoveRegister{ source, dest }: ('a, 'b, 'c) arm64ICode) = ( stream "\tMove\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (LoadNonAddressConstant{ source, dest }) = ( stream "\tLoadNonAddress\t"; stream(Word64.toString source); stream " => "; printGenReg dest ) | printICode {stream, printFPReg, ...} (LoadFPConstant{ source, dest, floatSize }) = ( stream "\tLoadFPConstant"; printFloatSize(floatSize, stream); stream "\t"; stream(Word64.toString source); stream " => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (LoadAddressConstant{ source, dest }) = ( stream "\tLoadAddress\t"; stream(Address.stringOfWord source); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (LoadWithConstantOffset{ base, dest, byteOffset, loadType }) = ( stream "\tLoadConstOffset"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (LoadFPWithConstantOffset{ base, dest, byteOffset, floatSize }) = ( stream "\tLoadConstOffset"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (LoadWithIndexedOffset{ base, dest, index, loadType, signExtendIndex }) = ( stream "\tLoadIndexed"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "] => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (LoadFPWithIndexedOffset{ base, dest, index, floatSize, signExtendIndex }) = ( stream "\tLoadIndexed"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "] => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (GetThreadId { dest}) = ( stream "\tGetThreadId\t"; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (ObjectIndexAddressToAbsolute{ source, dest }) = ( stream "\tObjectAddrToAbs\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (AbsoluteToObjectIndex{ source, dest }) = ( stream "\tAbsToObjectAddr\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (AllocateMemoryFixed{bytesRequired, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream(Word64.fmt StringCvt.DEC bytesRequired); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, ...} (AllocateMemoryVariable{size, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream "s="; printGenReg(size); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, ...} (InitialiseMem{size, addr, init}) = ( stream "\tInitialiseMem\t"; stream "s="; printGenReg(size); stream ",i="; printGenReg(init); stream ",a="; printGenReg(addr) ) | printICode {stream, ...} BeginLoop = stream "\tBeginLoop" | printICode {stream, printGenReg, ...} (JumpLoop{regArgs, stackArgs, checkInterrupt, ... }) = ( stream "\tJumpLoop\t"; List.app(fn {src, dst} => (printGenReg(dst); stream "="; printArg(src, stream, printGenReg); stream " ")) regArgs; List.app( fn {src, wordOffset, stackloc} => (printStackLoc(stackloc, stream); stream("(sp" ^ Int.toString wordOffset); stream ")="; printArg(src, stream, printGenReg); stream " ") ) stackArgs; case checkInterrupt of NONE => () | SOME saveRegs => (stream " Check:save="; printSaves(saveRegs, stream, printGenReg)) ) | printICode {stream, printGenReg, ...} (StoreWithConstantOffset{ base, source, byteOffset, loadType }) = ( stream "\tStoreConstOffset"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode {stream, printGenReg, printFPReg, ...} (StoreFPWithConstantOffset{ base, source, byteOffset, floatSize }) = ( stream "\tStoreConstOffset"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode {stream, printGenReg, ...} (StoreWithIndexedOffset{ base, source, index, loadType, signExtendIndex }) = ( stream "\tStoreIndexed"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "]" ) | printICode {stream, printGenReg, printFPReg, ...} (StoreFPWithIndexedOffset{ base, source, index, floatSize, signExtendIndex }) = ( stream "\tStoreIndexed"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "]" ) | printICode {stream, printGenReg, printOptGenReg, ...} (AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }) = ( stream (if isAdd then "\tAddImmediate" else "\tSubImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, printOptGenReg, ...} (AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift }) = ( stream (if isAdd then "\tAddRegister" else "\tSubRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, printOptGenReg, ...} (LogicalImmediate{ source, dest, ccRef, immed, logOp, length }) = ( stream (case logOp of LogAnd => "\tAndImmediate" | LogOr => "\tOrImmediate" | LogXor => "\tXorImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word64.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, printOptGenReg, ...} (LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift }) = ( stream (case logOp of LogAnd => "\tAndRegister" | LogOr => "\tOrRegister" | LogXor => "\tXorRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, ...} (ShiftRegister{ direction, dest, source, shift, opSize }) = ( stream ( case direction of ShiftLeft => "\tShiftLeft" | ShiftRightLogical => "\tShiftRightLog" | ShiftRightArithmetic => "\tShiftRightArith"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " by "; printGenReg(shift); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printOptGenReg, ...} (Multiplication{ kind, dest, sourceA, sourceM, sourceN }) = ( stream ( case kind of MultAdd32 => "\tMultAdd32\t" | MultSub32 => "\tMultSub32\t" | MultAdd64 => "\tMultAdd64\t" | MultSub64 => "\tMultSub64\t" | SignedMultAddLong => "\tSignedMultAddLong\t" | SignedMultHigh => "\tSignedMultHigh\t"); printGenReg(sourceM); stream " * "; printGenReg(sourceN); stream " +/- "; printOptGenReg sourceA; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (Division{ isSigned, dest, dividend, divisor, opSize }) = ( stream (if isSigned then "\tSignedDivide" else "\tUnsignedDivide"); stream(arithRepr opSize); stream "\t"; printGenReg(dividend); stream " by "; printGenReg(divisor); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (BeginFunction{ regArgs, stackArgs, fpRegArgs }) = ( stream "\tBeginFunction\t"; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printGenReg(arg); stream " ")) regArgs; List.app(fn (arg, r) => (stream(vRegRepr r); stream "="; printFPReg(arg); stream " ")) fpRegArgs; List.app(fn s => printStackLoc(s, stream)) stackArgs ) | printICode {stream, printGenReg, printFPReg, ...} (FunctionCall{callKind, regArgs, stackArgs, dests, fpRegArgs, fpDests, saveRegs, containers}) = ( stream "\tFunctionCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); stream "("; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn (arg, r) => (stream(vRegRepr r); stream "="; printFPReg(arg); stream " ")) fpRegArgs; List.app(fn arg => (stream "p="; printArg(arg, stream, printGenReg); stream " ")) stackArgs; stream ") "; List.app(fn (pr, r) => (stream(regRepr r); stream "=>"; printGenReg pr; stream " ")) dests; List.app(fn (pr, r) => (stream(vRegRepr r); stream "=>"; printFPReg pr; stream " ")) fpDests; stream " save="; printSaves(saveRegs, stream, printGenReg); if null containers then () else (stream " containers="; List.app (fn c => (printStackLoc(c, stream); stream " ")) containers) ) | printICode {stream, printGenReg, printFPReg, ...} (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize, fpRegArgs, ...}) = ( stream "\tTailCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn (arg, r) => (stream(vRegRepr r); stream "="; printFPReg(arg); stream " ")) fpRegArgs; List.app(fn {src, stack} => (stream (Int.toString stack); stream "<="; printArg(src, stream, printGenReg); stream " ")) stackArgs; stream "adjust="; stream(Int.toString stackAdjust); stream " stackSize="; stream(Int.toString currStackSize) ) | printICode {stream, printGenReg, printFPReg, ...} (ReturnResultFromFunction{ results, fpResults, returnReg, numStackArgs }) = ( stream "\tReturnFromFunction\t"; printGenReg(returnReg); stream "with "; List.app(fn (reg, r) => (stream(regRepr r); stream "=>"; printGenReg reg; stream " ")) results; List.app(fn (reg, r) => (stream(vRegRepr r); stream "=>"; printFPReg reg; stream " ")) fpResults; stream("," ^ Int.toString numStackArgs) ) | printICode {stream, printGenReg, ...} (RaiseExceptionPacket{ packetReg }) = ( stream "\tRaiseException\t"; printGenReg(packetReg) ) | printICode {stream, printGenReg, ...} (PushToStack{ source, copies, container }) = ( stream "\tPushToStack\t"; printGenReg source; if copies > 1 then (stream " * "; stream(Int.toString copies)) else (); stream " => "; printStackLoc(container, stream) ) | printICode {stream, printGenReg, ...} (LoadStack{ dest, wordOffset, container, field }) = ( stream "\tLoadStack\t"; printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")"; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (StoreToStack{ source, container, field, stackOffset }) = ( stream "\tStoreToStack\t"; printGenReg source; stream " => "; printStackLoc(container, stream); stream "+"; stream (Int.toString field); stream "("; stream(Int.toString stackOffset); stream ")" ) | printICode {stream, printGenReg, ...} (ContainerAddress{ dest, container, stackOffset }) = ( stream "\tContainerAddress\t"; stream "@"; printStackLoc(container, stream); stream " ("; stream(Int.toString stackOffset); stream ") => "; printGenReg dest ) | printICode {stream, ...} (ResetStackPtr{ numWords }) = ( stream "\tResetStackPtr\t"; stream(Int.toString numWords) ) | printICode {stream, printGenReg, ...} (TagValue{ source, dest, isSigned, opSize }) = ( stream "\tTag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (UntagValue{ source, dest, isSigned, opSize }) = ( stream "\tUntag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (BoxLarge{source, dest, saveRegs}) = ( stream "\tBoxLarge\t"; printGenReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, ...} (UnboxLarge{source, dest}) = ( stream "\tUnboxLarge\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (BoxTagFloat{floatSize, source, dest, saveRegs}) = ( stream "\tBoxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, printFPReg, ...} (UnboxTagFloat{floatSize, source, dest}) = ( stream "\tUnboxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (LoadAcquire{ base, dest, loadType }) = ( stream "\tLoadAcquire"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (StoreRelease{ base, source, loadType }) = ( stream "\tStoreRelease"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "]" ) | printICode {stream, printGenReg, ...} (BitFieldShift{ source, dest, isSigned, length, immr, imms }) = ( stream "\tBitShift"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr length); stream "\t"; printGenReg source; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode {stream, printGenReg, ...} (BitFieldInsert{ source, dest, destAsSource, length, immr, imms }) = ( stream "\tBitInsert"; stream(arithRepr length); stream "\t"; printGenReg source; stream " with "; printGenReg destAsSource; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode {stream, printGenReg, ...} (IndexedCaseOperation{testReg}) = ( stream "\tIndexedCase\t"; printGenReg testReg ) | printICode {stream, ...} PushExceptionHandler = stream "\tPushExcHandler" | printICode {stream, ...} PopExceptionHandler = stream "\tPopExcHandler" | printICode {stream, printGenReg, ...} (BeginHandler{packetReg}) = ( stream "\tBeginHandler\t"; printGenReg packetReg ) | printICode {stream, printGenReg, ...} (CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = ( stream "\tCompareByteVectors\t"; printGenReg(vec1Addr); stream ","; printGenReg(vec2Addr); stream ","; printGenReg(length); stream " => "; printCC(ccRef, stream) ) | printICode {stream, printGenReg, ...} (BlockMove{srcAddr, destAddr, length, isByteMove}) = ( stream(if isByteMove then "\tBlockByteMove\t" else "\tBlockWordMove\t"); stream "src="; printGenReg(srcAddr); stream ",dest="; printGenReg(destAddr); stream ",len="; printGenReg(length) ) | printICode {stream, printGenReg, printOptGenReg, ...} (AddSubXSP{ source, dest, isAdd }) = ( stream(if isAdd then "\tAdd\t" else "\tSubtract\t"); printGenReg source; stream " XSP => "; printOptGenReg dest ) | printICode {stream, printGenReg, ...} (TouchValue{ source }) = ( stream "\tTouchValue\t"; printGenReg source ) | printICode {stream, printGenReg, ...} (LoadAcquireExclusive{ base, dest }) = ( stream "\tLoadExclusive\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode {stream, printGenReg, printOptGenReg, ...} (StoreReleaseExclusive{ base, source, result }) = ( stream "\tStoreExclusive\t"; printOptGenReg source; stream " => ["; printGenReg base; stream "] result => "; printGenReg result ) | printICode {stream, ...} MemoryBarrier = stream "\tMemoryBarrier" | printICode {stream, printGenReg, printFPReg, ...} (ConvertIntToFloat{ source, dest, srcSize, destSize}) = ( stream "\tConvert"; stream(arithRepr srcSize); stream "To"; printFloatSize(destSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}) = let open IEEEReal in stream "\tConvert"; printFloatSize(srcSize, stream); stream "To"; stream(arithRepr destSize); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream( case rounding of TO_NEAREST => " rounding" | TO_NEGINF => " rounding down" | TO_POSINF => " rounding up" | TO_ZERO => " truncating" ) end | printICode {stream, printFPReg, ...} (UnaryFloatingPt{ source, dest, fpOp}) = ( stream( case fpOp of NegFloat => "\tNegFloat\t" | NegDouble => "\tNegDouble\t" | AbsFloat => "\tAbsFloat\t" | AbsDouble => "\tAbsDouble\t" | ConvFloatToDble => "\tFloatToDble\t" | ConvDbleToFloat => "\tDbleToFloat\t" | MoveDouble => "\tMoveDouble\t" | MoveFloat => "\tMoveFloat\t" ); printFPReg source; stream " => "; printFPReg dest ) | printICode {stream, printFPReg, ...} (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}) = ( stream( case fpOp of MultiplyFP => "\tMultiply" | DivideFP => "\tDivide" | AddFP => "\tAdd" | SubtractFP => "\tSubtract" ); printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream " => "; printFPReg dest ) | printICode {stream, printFPReg, ...} (CompareFloatingPoint{ arg1, arg2, opSize, ccRef}) = ( stream "\tCompare"; printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream ", "; printCC(ccRef, stream) ) | printICode {stream, ...} CPUYield = stream "\tCpuYield" | printICode {stream, printGenReg, printOptGenReg, ...} (AtomicOperation{ base, source, dest, atOp }) = ( case atOp of LoadAddAL => stream "\tLoadAddAL\t" | LoadUmaxAL => stream "\tLoadUmaxAL\t" | SwapAL => stream "\tSwapAL\t" | LoadAddAcquire => stream "\tLoadAddAcquire\t" | LoadUMaxAcquire => stream "\tLoadUMaxAcquire\t" | SwapRelease => stream "\tSwapRelease\t"; printOptGenReg source; stream ",["; printGenReg base; stream "] => "; printOptGenReg dest ) - | printICode {stream, printGenReg, ...} (CacheCheck{ arg1, arg2}) = - ( stream "\tCacheCheck\t"; printGenReg arg1; stream ", "; printGenReg arg2 ) - and printCondition(cond, stream) = stream(condToString cond) (* Print a basic block. *) fun printBlock {stream, printGenReg, printOptGenReg, printFPReg} (blockNo, BasicBlock{block, flow, ...}) = ( (* Put a label on all but the first. *) if blockNo <> 0 then stream("L" ^ Int.toString blockNo ^ ":") else (); List.app (fn icode => (printICode {stream=stream, printGenReg=printGenReg, printOptGenReg=printOptGenReg, printFPReg=printFPReg} (icode); stream "\n")) block; case flow of Unconditional l => stream("\tJump\tL" ^ Int.toString l ^ "\n") | Conditional {condition, trueJump, falseJump, ccRef, ...} => ( stream "\tJump"; printCondition(condition, stream); stream "\t"; printCC(ccRef, stream); stream " L"; stream (Int.toString trueJump); stream " else L"; stream (Int.toString falseJump); stream "\n" ) | ExitCode => () | IndexedBr _ => () | SetHandler{handler, continue} => stream(concat["\tSetHandler\tH", Int.toString handler, "\n", "\tJump\tL", Int.toString continue, "\n"]) | UnconditionalHandle handler => stream("\tJump\tH" ^ Int.toString handler ^ "\n") | ConditionalHandle{handler, continue} => stream(concat["\tJump\tL", Int.toString continue, " or H", Int.toString handler, "\n"]) ) in fun printPReg stream (PReg i) = stream("R" ^ Int.toString i) fun printOptPReg stream ZeroReg = stream "Zero" | printOptPReg stream (SomeReg reg) = printPReg stream reg fun printXReg stream (XReg w) = stream("X" ^ Int.toString(Word8.toInt w)) | printXReg stream XZero = stream "XZ" | printXReg stream XSP = stream "XSP" fun printVReg stream (VReg w) = stream("V" ^ Int.toString(Word8.toInt w)) fun printICodeAbstract(blockVec, stream) = Vector.appi(printBlock{stream=stream, printGenReg=printPReg stream, printOptGenReg=printOptPReg stream, printFPReg=printPReg stream}) blockVec and printICodeConcrete(blockVec, stream) = Vector.appi(printBlock{stream=stream, printGenReg=printXReg stream, printOptGenReg=printXReg stream, printFPReg=printVReg stream}) blockVec end (* Only certain bit patterns are allowed in a logical immediate instruction but the encoding is complex so it's easiest to inherit the test from the assembler layer. *) local fun optow OpSize32 = WordSize32 | optow OpSize64 = WordSize64 in fun isEncodableBitPattern(v, w) = Arm64Code.isEncodableBitPattern(v, optow w) end (* This generates a BitField instruction with the appropriate values for immr and imms. *) fun shiftConstant{ direction, dest, source, shift, opSize } = let val (isSigned, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (false, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (false, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (false, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (false, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (true, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (true, shift, 0wx1f) in BitFieldShift{ source=source, dest=dest, isSigned=isSigned, length=opSize, immr=immr, imms=imms } end structure Sharing = struct type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary and atomicOp = atomicOp end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML index a28ccfc4..225d7581 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML @@ -1,1293 +1,1282 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeToArm64Code( structure Arm64PreAssembly: ARM64PREASSEMBLY structure Debug: DEBUG structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET structure Pretty: PRETTY structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end sharing Arm64PreAssembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ICODEGENERATE = struct open Identify open Arm64ICode open Arm64PreAssembly open Address exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl (*and snd <@> fst = fst @ snd*) (* These aren't currently used for anything. *) val workReg1 = X16 and workReg2 = X17 fun icodeToArm64Code {blocks: basicBlockConcrete vector, functionName, stackRequired, debugSwitches, resultClosure, profileObject, ...} = let val numBlocks = Vector.length blocks (* Load from and store to stack. *) fun loadFromStack(destReg, wordOffset, code) = if wordOffset >= 4096 then (LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(destReg, Word64.fromInt wordOffset)] @ code else (LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code and storeToStack(sourceReg, wordOffset, workReg, code) = if wordOffset >= 4096 then (StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(workReg, Word64.fromInt wordOffset)] @ code else (StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code datatype srcAndDest = IsInReg of xReg | IsOnStack of int local (* The registers are numbered from 0. Choose values that don't conflict with the stack addresses. *) fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31 type node = {src: srcAndDest, dst: srcAndDest } fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r | nodeAddress({dst=IsOnStack a, ...}) = a fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset] | arcs{src=IsInReg r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end (* This is a general function for moving values into registers or to the stack where it is possible that the source values might also be in use as destinations. The stack is used for destinations only for tail recursive calls. *) fun moveMultipleValues(moves, code) = let fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let (* stronglyConnectedComponents does two things. It detects loops where it's not possible to move items without breaking the loop but more importantly it orders the dependencies so that if there are no loops we can load the source and store it in the destination knowing that we won't overwrite anything we might later need. *) val ordered = stronglyConnected arguments fun loadIntoReg(IsInReg sReg, dReg, code) = if sReg = dReg then code else (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | loadIntoReg(IsOnStack wordOffset, dReg, code) = loadFromStack(dReg, wordOffset, code) fun moveEachValue ([], code) = code | moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) = moveEachValue(rest, loadIntoReg(src, dReg, code)) | moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) = (* Storing into the stack. *) moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code)) | moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) = (* Copy a stack location - needs a load and store unless the address is the same. *) if dstOffset = srcOffset then moveEachValue(rest, code) else moveEachValue(rest, storeToStack(workReg2, dstOffset, workReg1, loadFromStack(workReg2, srcOffset, code))) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let (* We need to exchange some of the arguments. Doing an exchange here will set the destination with the correct source. However we have to process every subsequent entry with the swapped registers. That may well mean that one of those entries becomes trivial. We also need to rerun stronglyConnectedComponents on at least the rest of this cycle. It's easiest to flatten the rest and do everything. *) (* Exchange the source and destination. We don't have an exchange instruction and there's a further complication. We could be copying between stack locations and their offsets could be > 4096. Since we've only got two work registers we need to use the hardware stack as an extra location. Stack-stack exchange is very rare so the extra overhead to handle the general case is worth it. *) local fun storeToDest(sReg, IsInReg dReg, _, code) = (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | storeToDest(sReg, IsOnStack wordOffset, work, code) = storeToStack(sReg, wordOffset, work, code) in fun exchange(IsInReg arg1Reg, arg2, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) :: storeToDest(arg1Reg, arg2, workReg1, loadIntoReg(arg2, workReg2, code)) | exchange(arg1, IsInReg arg2Reg, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) :: storeToDest(arg2Reg, arg1, workReg1, loadIntoReg(arg1, workReg2, code)) | exchange(arg1, arg2, code) = (* The hardware stack must be 16-byte aligned. *) storeToDest(workReg2, arg2, workReg1, (LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) :: storeToDest(workReg2, arg1, workReg1, loadIntoReg(arg2, workReg2, (StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) :: loadIntoReg(arg1, workReg2, code)))) end (* Try to find either a register-register move or a register-stack move. If not use the first. If there's a stack-register move there will also be a register-stack so we don't need to look for both. *) val {dst=selectDst, src=selectSrc} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun match(s1: srcAndDest, s2) = s1 = s2 fun swapSources{src, dst} = if match(src, selectSrc) then {src=destAsSource, dst=dst} else if match(src, destAsSource) then {src=selectSrc, dst=dst} else {src=src, dst=dst} val exchangeCode = exchange(selectDst, selectSrc, code) in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end (* Where we have multiple specific registers as either source or destination there is the potential that a destination register if currently in use as a source. *) fun moveMultipleRegisters(regPairList, code) = let val regPairsAsDests = List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList in moveMultipleValues(regPairsAsDests, code) end (* Floating point registers can only be moved into other floating point registers but it is possible to have a cycle. *) fun moveMultipleFPRegisters(moves: {dst: vReg, src: vReg} list, code) = let local fun regNo(VReg r) = Word8.toInt r type node = {src: vReg, dst: vReg } fun nodeAddress({dst=r, ...}: node) = regNo r fun arcs{src=r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let val ordered = stronglyConnected arguments fun moveEachValue ([], code) = code | moveEachValue ([{dst, src}] :: rest, code) = moveEachValue(rest, if src = dst then code else (FPUnaryOp{regN=src, regD=dst, fpOp=MoveDouble}) :: code) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let val {dst=selectDst: vReg, src=selectSrc: vReg} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun swapSources{src, dst} = if src=selectSrc then {src=destAsSource, dst=dst} else if src=destAsSource then {src=selectSrc, dst=dst} else {src=src, dst=dst} (* Exchange the values of two floating point registers. There are various ways to do this. For the moment just use the hardware stack. *) val exchangeCode = code <::> StoreFPRegUnscaled{regT=selectDst, regN=XSP, byteOffset= ~16, floatSize=Double64, unscaledType=PreIndex} <::> FPUnaryOp{regN=selectSrc, regD=selectDst, fpOp=MoveDouble} <::> LoadFPRegUnscaled{regT=selectSrc, regN=XSP, byteOffset=16, floatSize=Double64, unscaledType=PostIndex} in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end fun moveIfNecessary({src, dst}, code) = if src = dst then code else MoveXRegToXReg{sReg=src, dReg=dst} :: code (* Add a constant word to the source register and put the result in the destination. regW is used as a work register if necessary. This is used both for addition and subtraction. *) fun addConstantWord({regS, regD, value=0w0, ...}, code) = if regS = regD then code else MoveXRegToXReg{sReg=regS, dReg=regD} :: code | addConstantWord({regS, regD, regW, value}, code) = let (* If we have to load the constant it's better if the top 32-bits are zero if possible. *) val (isSub, unsigned) = if value > Word64.<<(0w1, 0w63) then (true, ~ value) else (false, value) in if unsigned < Word64.<<(0w1, 0w24) then (* We can put up to 24 in a shifted and an unshifted constant. *) let val w = Word.fromLarge(Word64.toLarge unsigned) val high = Word.andb(Word.>>(w, 0w12), 0wxfff) val low = Word.andb(w, 0wxfff) val addSub = if isSub then SubImmediate else AddImmediate in if high <> 0w0 then ( (if low <> 0w0 then [addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}] else []) @ addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64} :: code ) else addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64} :: code end else let (* To minimise the constant and increase the chances that it will fit in a single word look to see if we can shift it. *) fun getShift(value, shift) = if Word64.andb(value, 0w1) = 0w0 then getShift(Word64.>>(value, 0w1), shift+0w1) else (value, shift) val (shifted, shift) = getShift(unsigned, 0w0) in code <::> LoadNonAddr(regW, shifted) <::> (if isSub then SubShiftedReg else AddShiftedReg) {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64} end end val labelMaker = createLabelMaker() val startOfFunctionLabel = createLabel labelMaker (* Used for recursive calls/jumps *) val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel labelMaker) fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo) fun codeExtended _ (MoveRegister{source, dest, ...}, code) = moveIfNecessary({src=source, dst=dest}, code) | codeExtended _ (LoadNonAddressConstant{source, dest}, code) = code <::> LoadNonAddr(dest, source) | codeExtended _ (LoadFPConstant{source, dest, floatSize}, code) = code <::> LoadFPConst{dest=dest, value=source, floatSize=floatSize, work=workReg1} | codeExtended _ (LoadAddressConstant{source, dest, ...}, code) = code <::> LoadAddr(dest, source) | codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then LoadRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate} :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in LoadRegScaled{regT=dest, regN=base, unitOffset=unitOffset, loadType=loadType} :: code end | codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (LoadFPRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (LoadFPRegScaled{regT=dest, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (LoadRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX in (LoadFPRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (GetThreadId { dest}, code) = (* Load the thread id. This is always a 64-bit value. *) (LoadRegScaled{regT=dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code | codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) = (AddShiftedReg{regM=source, regN=X_Base32in64, regD=dest, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code | codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) = let val destReg = dest in code <::> (SubShiftedReg{regM=X_Base32in64, regN=source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> (shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64}) end | codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs }, code) = code <::> AllocateMemoryFixedSize{ bytes=Word.fromLarge bytesRequired, dest=dest, save=saveRegs, work=workReg1 } | codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs }, code) = code <::> AllocateMemoryVariableSize{ sizeReg=size, dest=dest, save=saveRegs, work=workReg1 } | codeExtended _ (InitialiseMem{ size, addr, init}, code) = let val sizeReg = size and addrReg = addr and initReg = init val exitLabel = createLabel labelMaker and loopLabel = createLabel labelMaker (* This uses a loop to initialise. It's possible the size is zero so we have to check at the top of the loop. *) val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64) in code <::> (* Add the length in bytes so we point at the end. *) AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Are we at the start? *) SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, exitLabel) <::> StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex } <::> UnconditionalBranch loopLabel <::> SetLabel exitLabel end | codeExtended _ (BeginLoop, code) = code | codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) = let (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *) fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs val extRegArgs = map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code) in case checkInterrupt of NONE => code2 | SOME saveRegs => let val skipCheck = createLabel labelMaker in code2 <::> (* Put in stack-check code to allow this to be interrupted. *) LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} <::> SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, skipCheck) <::> RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=saveRegs} <::> SetLabel skipCheck end end | codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then (StoreRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in (StoreRegScaled{regT=source, regN=base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (StoreFPRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (StoreFPRegScaled{regT=source, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (StoreRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX in (StoreFPRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) = let val destReg = dest in ((if isAdd then AddImmediate else SubImmediate) {regN=source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) = let val destReg = dest in ( (if isAdd then AddShiftedReg else SubShiftedReg) {regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) = let val destReg = dest in (BitwiseLogical{regN=source, regD=destReg, bits=immed, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) = let (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) val destReg = dest in (LogicalShiftedReg{regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) = (ShiftRegisterVariable{regN=source, regM=shift, regD=dest, shiftDirection=direction, opSize=opSize}) :: code | codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) = let val destReg = dest and srcAReg = sourceA and srcNReg = sourceN and srcMReg = sourceM in (MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code end | codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) = (DivideRegs{regN=dividend, regM=divisor, regD=dest, isSigned=isSigned, opSize=opSize}) :: code | codeExtended _ (BeginFunction{regArgs, fpRegArgs, ...}, code) = let val skipCheck = createLabel labelMaker val defaultWords = 10 (* This is wired into the RTS. *) val workRegister = workReg1 val debugTrapAlways = false (* Can be set to true for debugging *) (* Test with either the stack-pointer or a high-water value. The RTS assumes that X9 has been used as the high-water if it is called through stackOverflowXCallOffset rather than stackOverflowCallOffset *) val (testReg, entryPt, code1) = if stackRequired <= defaultWords then (X_MLStackPtr, stackOverflowCallOffset, code) else (X9, stackOverflowXCallOffset, addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister, value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code)) (* Skip the RTS call if there is enough stack. N.B. The RTS can modify the end-of-stack value to force a trap here even if there is really enough stack. *) val code2 = (if debugTrapAlways then [] else [ConditionalBranch(CondCarrySet, skipCheck), SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}]) @ (* Load the end-of-stack value. *) LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} :: code1 val code3 = code2 <::> RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs} <::> SetLabel skipCheck fun mkPair(pr, rr) = {src=rr,dst=pr} val regPairs = List.map mkPair regArgs val fpRegPairs = List.map mkPair fpRegArgs in moveMultipleFPRegisters(fpRegPairs, moveMultipleRegisters(regPairs, code3)) end | codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, fpRegArgs, stackAdjust, currStackSize}, code) = let fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val extFPArgs = map (fn (a, r) => {src=a, dst=r}) fpRegArgs (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. That means storing the arguments in the right order to avoid overwriting a value that we are using for a different argument. *) fun codeTailCall(arguments, stackAdjust, code) = if stackAdjust < 0 then let (* If the function we're calling takes more arguments on the stack than the current function we will have to extend the stack. Do that by pushing the argument whose offset is at -1. Then adjust all the offsets and repeat. *) val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments) fun renumberArgs [] = [] | renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) | renumberArgs ({dst, src} :: args) = let val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest val newSrc = case src of IsOnStack wordOffset => IsOnStack(wordOffset+1) | other => other in {dst=newDest, src=newSrc} :: renumberArgs args end val pushCode = case argM1 of IsOnStack wordOffset => (StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: loadFromStack(workReg2, wordOffset, code) | IsInReg reg => (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code in codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode) end else let val loadArgs = moveMultipleFPRegisters(extFPArgs, moveMultipleValues(arguments, code)) in if stackAdjust = 0 then loadArgs else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs) end val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) val jumpToFunctionCode = case callKind of Recursive => [(UnconditionalBranch startOfFunctionLabel)] | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else [(BranchReg{regD=workReg1, brRegType=BRRBranch}), (LoadAddr(workReg1, m))] | FullCall => if is32in64 then [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}, AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}] else [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}] in jumpToFunctionCode @ setArgumentsCode end | codeExtended _ (FunctionCall{callKind, regArgs, stackArgs, dests, fpRegArgs, fpDests, saveRegs, ...}, code) = let local fun pushStackArgs ([], _, code) = code | pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) = let (* Have to adjust the offsets of stack arguments. *) val adjustedOffset = wordOffset+argNum in pushStackArgs(args, argNum+1, loadFromStack(workReg1, adjustedOffset, code) <::> StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) end | pushStackArgs (ArgInReg reg ::args, argNum, code) = pushStackArgs(args, argNum+1, code <::> (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) (* We have to adjust any stack offset to account for the arguments we've pushed. *) val numStackArgs = List.length stackArgs fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs) | convertArg(ArgInReg reg) = IsInReg(reg) in val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val extFPArgs = map (fn (a, r) => {src=a, dst=r}) fpRegArgs val loadArgs = moveMultipleFPRegisters(extFPArgs, moveMultipleValues(extRegArgs, pushedArgs)) end (* Push the registers before the call and pop them afterwards. *) fun makeSavesAndCall([], code) = ( case callKind of Recursive => code <::> (BranchAndLink startOfFunctionLabel) | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else code <::> (LoadAddr(workReg1, m)) <::> (BranchReg{regD=workReg1, brRegType=BRRAndLink}) | FullCall => if is32in64 then code <::> AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} else code <::> LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} ) | makeSavesAndCall(reg::regs, code) = let val areg = reg in makeSavesAndCall(regs, code <::> StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) <::> LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex} end (* Results. These go from the specific result register into the allocated register. *) val resultPairs = List.map(fn (pr, rr) => {src=rr,dst=pr}) dests val fpResultPairs = List.map(fn (pr, rr) => {src=rr,dst=pr}) fpDests in moveMultipleFPRegisters(fpResultPairs, moveMultipleRegisters(resultPairs, makeSavesAndCall(saveRegs, loadArgs))) end | codeExtended _ (ReturnResultFromFunction { results, fpResults, returnReg, numStackArgs }, code) = let fun resetStack(0, code) = code | resetStack(nItems, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) (* Return results. This goes from the allocated register into the specific register rr. *) val resultPairs = List.map(fn (pr, rr) => {src=pr,dst=rr}) results and fpResultPairs = List.map(fn (pr, rr) => {src=pr,dst=rr}) fpResults in BranchReg{regD=returnReg, brRegType=BRRReturn} :: resetStack(numStackArgs, moveMultipleFPRegisters(fpResultPairs, moveMultipleRegisters(resultPairs, code))) end | codeExtended _ (RaiseExceptionPacket{ packetReg }, code) = (* We need a work register here. It can be any register other than X0 since we don't preserve registers across calls. *) (* Copy the handler "register" into the stack pointer. Then jump to the address in the first word. The second word is the next handler. This is set up in the handler. We have a lot more raises than handlers since most raises are exceptional conditions such as overflow so it makes sense to minimise the code in each raise. *) moveIfNecessary({src=packetReg, dst=X0}, code) <::> LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch } | codeExtended _ (PushToStack{ source, copies, ... }, code) = let val reg = source val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1" fun pushn(0, c) = c | pushn(n, c) = pushn(n-1, (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c) in pushn(copies, code) end | codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) = loadFromStack(dest, wordOffset, code) | codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) = (* Store into the stack to set a field of a container. Always 64-bits. *) storeToStack(source, stackOffset, workReg1, code) | codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) = (* Set the register to an offset in the stack. *) let val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset" val byteOffset = stackOffset * Word.toInt nativeWordSize in if byteOffset >= 4096 then code <::> LoadNonAddr(dest, Word64.fromInt byteOffset) <::> AddShiftedReg{regN=X_MLStackPtr, regM=dest, regD=dest, shift=ShiftNone, setFlags=false, opSize=OpSize64} else code <::> AddImmediate{regN=X_MLStackPtr, regD=dest, immed=Word.fromInt byteOffset, shifted=false, setFlags=false, opSize=OpSize64} end | codeExtended _ (ResetStackPtr{ numWords, ... }, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code) | codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) = (* Shift left by one bit and add one. *) code <::> shiftConstant{ direction=ShiftLeft, regD=dest, regN=source, shift=0w1, opSize=opSize } <::> BitwiseLogical{ bits=0w1, regN=dest, regD=dest, opSize=opSize, setFlags=false, logOp=LogOr} | codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) = code <::> shiftConstant{ direction=if isSigned then ShiftRightArithmetic else ShiftRightLogical, regD=dest, regN=source, shift=0w1, opSize=opSize } | codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) = boxSysWord({source=source, destination=dest, workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (UnboxLarge{ source, dest }, code) = let (* Unbox a large word. The argument is a poly word. *) val destReg = dest and srcReg = source in if is32in64 then LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64} :: AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64} :: code end | codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) = boxDouble({source=source, destination=dest, workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) = let val floatReg = source and fixedReg = dest in if is32in64 then boxFloat({source=floatReg, destination=fixedReg, workReg=workReg1, saveRegs=saveRegs}, code) else code <::> MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32} <::> shiftConstant{ direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64} <::> BitwiseLogical{ bits=0w1, regN=fixedReg, regD=fixedReg, opSize=OpSize64, setFlags=false, logOp=LogOr} end | codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) = if is32in64 then code <::> AddShiftedReg{regM=source, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadFPRegScaled{regT=dest, regN=workReg1, unitOffset=0, floatSize=Double64} else code <::> LoadFPRegScaled{regT=dest, regN=source, unitOffset=0, floatSize=Double64} | codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) = (* This is tagged in native 64-bits. In 32-in-64 we're loading 32-bits so we can use an indexed load directly. *) if is32in64 then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=source, regT=dest, option=ExtUXTX ScaleOrShift, floatSize=Float32} else code <::> shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=source, regD=workReg1, opSize=OpSize64} <::> MoveGeneralToFP{regN=workReg1, regD=dest, floatSize=Float32} | codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) = LoadAcquireReg{regT=dest, regN=base, loadType=loadType} :: code | codeExtended _ (StoreRelease{source, base, loadType, ...}, code) = StoreReleaseReg{regT=source, regN=base, loadType=loadType} :: code | codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) = BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length} :: code | codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) = let (* If we're using BitFieldMove we retain some of the bits of the destination. The higher levels require us to treat that as a source. *) val _ = source = dest andalso raise InternalError "codeExtended: bitfield: dest=source" in BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=BFInsert, opSize=length} :: moveIfNecessary({src=destAsSource, dst=dest}, code) end | codeExtended {flow} (IndexedCaseOperation{testReg}, code) = let (* testReg contains the original value after the lowest value has been subtracted. Since both the original value and the lowest value were tagged it contains a shifted but untagged value. *) (* This should only be within a block with an IndexedBr flow type. *) val cases = case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" val caseLabels = map getBlockLabel cases val tableLabel = createLabel labelMaker in code <::> LoadLabelAddress(workReg1, tableLabel) <::> (* Add the value shifted by one since it's already shifted. *) AddShiftedReg{regN=workReg1, regD=workReg1, regM=testReg, shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch} <::> BranchTable{ startLabel=tableLabel, brTable=caseLabels } end | codeExtended {flow} (PushExceptionHandler, code) = let (* This should only be within a block with a SetHandler flow type. *) val handleLabel = case flow of SetHandler{ handler, ...} => handler | _ => raise InternalError "codeGenICode: PushExceptionHandler" val labelRef = getBlockLabel handleLabel in (* Push the old handler and the handler entry point and set the "current handler" to point to the stack after we've pushed these. *) code <::> LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadLabelAddress(workReg2, labelRef) <::> StoreRegPair{regT1=workReg2, regT2=workReg1, regN=X_MLStackPtr, unitOffset= ~2, unscaledType=PreIndex, loadType=Load64} <::> StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} end | codeExtended _ (PopExceptionHandler, code) = (* Remove and discard the handler we've set up. Pop the previous handler and put into "current handler". *) code <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} | codeExtended _ (BeginHandler{packetReg}, code) = let val beginHandleCode = code <::> (* The exception raise code resets the stack pointer to the value in the exception handler so this is probably redundant. Leave it for the moment, *) LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} in moveIfNecessary({src=X0, dst=packetReg }, beginHandleCode) end | codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length, ...}, code) = let (* Construct a loop to compare two vectors of bytes. *) val vec1Reg = vec1Addr and vec2Reg = vec2Addr and lenReg = length val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker in code <::> (* Set the CC to Equal before we start in case length = 0 *) SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Start of loop *) CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Go to the end when len = zero *) (* Load the bytes for the comparison and increment each. *) LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) (* Compare *) SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *) SetLabel exitLabel end | codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) = let (* Construct a loop to move the data. *) val srcReg = srcAddr and destReg = destAddr and lenReg = length val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64) in code <::> SetLabel loopLabel (* Start of loop *) <::> CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Exit when length = 0 *) LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) UnconditionalBranch loopLabel <::> (* Back to the start *) SetLabel exitLabel end | codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) = let val allocFreeCode = (if isAdd then AddExtendedReg else SubExtendedReg) {regM=source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code in case dest of XZero => allocFreeCode | destReg => (* We have to use add here to get the SP into the destination instead of the usual move. *) AddImmediate{regN=XSP, regD=destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} :: allocFreeCode end | codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *) (* Used in mutex operations. *) | codeExtended _ (LoadAcquireExclusive{ base, dest }, code) = LoadAcquireExclusiveRegister{regN=base, regT=dest} :: code | codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) = StoreReleaseExclusiveRegister{regS=result, regT=source, regN=base} :: code | codeExtended _ (MemoryBarrier, code) = code <::> MemBarrier | codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) = (CvtIntToFP{regN=source, regD=dest, floatSize=destSize, opSize=srcSize}) :: code | codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) = (CvtFloatToInt{regN=source, regD=dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code | codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) = (FPUnaryOp{regN=source, regD=dest, fpOp=fpOp}) :: code | codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) = (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, floatSize=opSize, fpOp=fpOp}) :: code | codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) = (FPComparison{regN=arg1, regM=arg2, floatSize=opSize}) :: code | codeExtended _ (CPUYield, code) = code <::> Yield | codeExtended _ (AtomicOperation{ base, source, dest, atOp }, code) = AtomicExtension{regN=base, regS=source, regT=dest, atOp=atOp} :: code - | codeExtended _ (CacheCheck{ arg1, arg2 }, code) = - let - val okLabel = createLabel labelMaker - in - (code <::> SubShiftedReg {regM=arg1, regN=arg2, regD=XZero, shift=ShiftNone, opSize=OpSize64, setFlags=true} <::> - ConditionalBranch(CondEqual, okLabel) <::> - MoveXRegToXReg{sReg=XZero, dReg=X16} <::> - LoadRegScaled{regT=X16, regN=X16, unitOffset=0, loadType=Load16} <::> - SetLabel okLabel) - end - local (* processed - set to true when a block has been processed. *) val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) (* Find the blocks that reference this one. This isn't essential but allows us to try to generate blocks in the order of the control flow. This in turn may allow us to use short branches rather than long ones. *) val labelRefs = Array.array(numBlocks, []) datatype flowCode = FlowCodeSimple of int | FlowCodeCMove of {code: precode list, trueJump: int, falseJump: int} (* Process this recursively to set the references. If we have unreachable blocks, perhaps because they've been merged, we don't want to include them in the reference counting. This shouldn't happen now that IdentifyReferences removes unreferenced blocks. *) fun setReferences fromLabel toLabel = case Array.sub(labelRefs, toLabel) of [] => (* Not yet visited at all. *) let val BasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) val refs = case flow of ExitCode => [] | Unconditional lab => [lab] | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] | IndexedBr labs => labs | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val () = if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () in List.app (setReferences toLabel) refs end | refs => (* We've visiting this at least once. Just add us to the list. *) Array.update(labelRefs, toLabel, fromLabel :: refs) val _ = setReferences 0 0 (* Process the blocks. We keep the "stack" explicit rather than using recursion because this allows us to select both arms of a conditional branch sooner. *) fun genCode(toDo, lastFlow, code) = case List.filter (not o haveProcessed) toDo of [] => let (* There's nothing left to do. We may need to add a final branch to the end. *) val finalBranch = case lastFlow of ExitCode => [] | IndexedBr _ => [] | Unconditional dest => [(UnconditionalBranch(getBlockLabel dest))] | Conditional { condition, trueJump, falseJump, ...} => [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] | SetHandler { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] in finalBranch @ code (* Done. *) end | stillToDo as head :: _ => let local (* Check the references. If all the sources that lead up to this have already been we won't have any backward jumps. *) fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) val continuation = case lastFlow of ExitCode => NONE | IndexedBr _ => NONE (* We could put the last branch in here. *) | Unconditional dest => if not (haveProcessed dest) andalso available dest then SOME(FlowCodeSimple dest) else NONE | Conditional {trueJump, falseJump, condition, ...} => (* We can usually choose either destination and in nearly all cases it won't matter. The default branch is not to take forward jumps so if there is reason to believe that one branch is more likely we should follow that branch now and leave the other. If we have Cond(No)Overflow we assume that overflow is unusual. If one branch raises an exception we assume that that is unusual. *) let val (first, second) = case (condition, Vector.sub(blocks, falseJump)) of (CondNoOverflow, _) => (trueJump, falseJump) | (_, BasicBlock{ flow=ExitCode, block, ...}) => if List.exists(fn RaiseExceptionPacket _ => true | _ => false) block then (trueJump, falseJump) else (falseJump, trueJump) | _ => (falseJump, trueJump) in if not (haveProcessed first) andalso available first then SOME(FlowCodeSimple first) else if not (haveProcessed second) andalso available second then SOME(FlowCodeSimple second) else NONE end | SetHandler { continue, ... } => (* We want the continuation if possible. We'll need a branch round the handler so that won't help. *) if not (haveProcessed continue) andalso available continue then SOME(FlowCodeSimple continue) else NONE | UnconditionalHandle _ => NONE | ConditionalHandle _ => NONE in (* First choice - continue the existing block. Second choice - the first item whose sources have all been processed. Third choice - something from the list. *) val picked = case continuation of SOME c => c | NONE => case List.find available stillToDo of SOME c => FlowCodeSimple c | NONE => FlowCodeSimple head end in case picked of FlowCodeSimple picked => let val () = Array.update(processed, picked, true) (* Code to terminate the previous block. *) val startCode = case lastFlow of ExitCode => [] | IndexedBr _ => [] | UnconditionalHandle _ => [] | Unconditional dest => if dest = picked then [] else [(UnconditionalBranch(getBlockLabel dest))] | ConditionalHandle { continue, ...} => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | SetHandler { continue, ... } => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | Conditional { condition, trueJump, falseJump, ...} => if picked = falseJump (* Usual case. *) then [(ConditionalBranch(condition, getBlockLabel trueJump))] else if picked = trueJump then (* We have a jump to the true condition. Invert the jump. This is more than an optimisation. Because this immediately precedes the true block we're not going to generate a label. *) [(ConditionalBranch(invertTest condition, getBlockLabel falseJump))] else [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] (* Code-generate the body with the code we've done so far at the end. Add a label at the start if necessary. *) local (* If the previous block dropped through to this and this was the only reference then we don't need a label. *) fun onlyJumpingHere (lab: int) = if lab <> picked then false else case Array.sub(labelRefs, picked) of [singleton] => singleton = lab | _ => false val noLabel = case lastFlow of ExitCode => picked = 0 (* Unless this was the first block. *) | Unconditional dest => onlyJumpingHere dest | Conditional { trueJump, falseJump, ...} => onlyJumpingHere trueJump orelse onlyJumpingHere falseJump | IndexedBr _ => false | SetHandler _ => false | UnconditionalHandle _ => false | ConditionalHandle { continue, ...} => onlyJumpingHere continue in val startLabel = if noLabel then [] else [(SetLabel(getBlockLabel picked))] end val BasicBlock { flow, block, ...} = Vector.sub(blocks, picked) local fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code) in val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] in genCode(addSet @ stillToDo, flow, bodyCode) end | FlowCodeCMove{code, trueJump, falseJump} => let (* We've generated a conditional move and possibly a return. If the trueJump and falseJump are only ever referenced from this block they're done, otherwise we still need to do them. *) val _ = case Array.sub(labelRefs, trueJump) of [_] => Array.update(processed, trueJump, true) | _ => () val _ = case Array.sub(labelRefs, falseJump) of [_] => Array.update(processed, falseJump, true) | _ => () val BasicBlock { flow, ...} = Vector.sub(blocks, trueJump) val addSet = case flow of ExitCode => [] | Unconditional dest => [dest] | _ => raise InternalError "FlowCodeCMove" in genCode(addSet @ stillToDo, flow, code) end end in val ops = genCode([0], ExitCode, [(SetLabel startOfFunctionLabel)]) end in generateFinalCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure, parameters=debugSwitches, profileObject=profileObject, labelMaker= labelMaker} end structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and xReg = xReg and vReg = vReg and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML index 21790867..03f97a9d 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML @@ -1,896 +1,893 @@ (* Copyright (c) 2021-2 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64IdentifyReferences( structure Arm64ICode: ARM64ICODE structure Debug: DEBUG structure IntSet: INTSET ): ARM64IDENTIFYREFERENCES = struct open Arm64ICode open IntSet type regState = { active: int, refs: int, pushState: bool, prop: regProperty } (* CC states before and after. Currently no instruction uses the condition; conditional branches are handled at the block level. The result of executing the instruction may be to set the condition code to a defined state, an undefined state or leave it unchanged. *) datatype outCCState = CCSet of ccRef | CCIndeterminate | CCUnchanged and inCCState = CCNeeded of ccRef | CCUnused datatype extendedBasicBlock = ExtendedBasicBlock of { block: {instr: iCodeAbstract, current: intSet, active: intSet, kill: intSet } list, flow: controlFlow, locals: intSet, (* Defined and used entirely within the block. *) imports: intSet, (* Defined outside the block, used inside it, but not needed afterwards. *) exports: intSet, (* Defined within the block, possibly used inside, but used outside. *) passThrough: intSet, (* Active throughout the block. May be referred to by it but needed afterwards. *) loopRegs: intSet, (* Destination registers for a loop. They will be updated by this block. *) initialStacks: intSet, (* Stack items required at the start i.e. imports+passThrough for stack items. *) inCCState: ccRef option, (* The state this block assumes. If SOME _ all predecessors must set it. *) outCCState: ccRef option (* The condition code set by this block. SOME _ if at least one successor needs it. *) } exception InternalError = Misc.InternalError (* Return the list of blocks that are the immediate successor of this. *) fun blockSuccessors(BasicBlock{flow, ...}) = successorBlocks flow fun getOptReg(SomeReg reg) = [reg] | getOptReg ZeroReg = [] fun getInstructionState(MoveRegister { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadNonAddressConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAddressConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadWithConstantOffset { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPWithConstantOffset { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadWithIndexedOffset { base, dest, index, ...}) = { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPWithIndexedOffset { base, dest, index, ...}) = { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(GetThreadId { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ObjectIndexAddressToAbsolute { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AbsoluteToObjectIndex { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AllocateMemoryFixed { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(AllocateMemoryVariable{size, dest, ...}) = { sources=[size], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(InitialiseMem{size, addr, init}) = { sources=[size, addr, init], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginLoop) = (* This is just a marker. It doesn't actually generate any code. *) { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(JumpLoop{regArgs, stackArgs, ...}) = let fun getSourceFromRegs({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (regSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=regSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(StoreWithConstantOffset { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreFPWithConstantOffset { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreWithIndexedOffset { base, source, index, ...}) = { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreFPWithIndexedOffset { base, source, index, ...}) = { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AddSubImmediate{ source, dest, ccRef, ... }) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(AddSubRegister{ base, shifted, dest, ccRef, ... }) = { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(LogicalImmediate{ source, dest, ccRef, ... }) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(LogicalRegister{ base, shifted, dest, ccRef, ... }) = { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(ShiftRegister{ source, shift, dest, ... }) = { sources=[source, shift], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(Multiplication{ dest, sourceA, sourceM, sourceN, ... }) = { sources=getOptReg sourceA @ [sourceM, sourceN], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(Division{ dest, dividend, divisor, ... }) = { sources=[dividend, divisor], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginFunction {regArgs, fpRegArgs, stackArgs, ...}) = { sources=[], dests=map #1 regArgs @ map #1 fpRegArgs, sStacks=[], dStacks=stackArgs, ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(FunctionCall{regArgs, stackArgs, dests, fpRegArgs, fpDests, containers, ...}) = let (* Non-tail-recursive. Behaves as a normal reference to sources. *) fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack(ArgInReg reg, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack(ArgOnStack { container, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (argSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs val fpArgSources = List.map #1 fpRegArgs in { sources=argSources @ fpArgSources, dests=List.map #1 dests @ List.map #1 fpDests, sStacks=stackSources @ containers, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(TailRecursiveCall{regArgs, fpRegArgs, stackArgs, ...}) = let (* Tail recursive call. References the argument sources but exits. *) fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (argSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs val fpArgSources = List.map #1 fpRegArgs in { sources=argSources@fpArgSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(ReturnResultFromFunction{results, fpResults, returnReg, ...}) = { sources=returnReg :: List.map #1 results @ List.map #1 fpResults, dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(RaiseExceptionPacket{packetReg}) = { sources=[packetReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(PushToStack{ source, container, ... }) = { sources=[source], dests=[], sStacks=[], dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadStack{ dest, container, ... }) = { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreToStack{ source, container, ... }) = (* Although this stores into the container it must already exist. *) { sources=[source], dests=[], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ContainerAddress{ dest, container, ... }) = { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ResetStackPtr _) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(TagValue{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(UntagValue{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BoxLarge{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(UnboxLarge{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BoxTagFloat{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(UnboxTagFloat{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAcquire { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreRelease { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BitFieldShift{ source, dest, ... }) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BitFieldInsert{ source, destAsSource, dest, ... }) = { sources=[source, destAsSource], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(IndexedCaseOperation{ testReg }) = { sources=[testReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(PushExceptionHandler) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(PopExceptionHandler) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginHandler{packetReg}) = (* The packet register is a destination since this provides its definition. *) { sources=[], dests=[packetReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = { sources=[vec1Addr, vec2Addr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } | getInstructionState(BlockMove{srcAddr, destAddr, length, ...}) = { sources=[srcAddr, destAddr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(AddSubXSP{source, dest, ...}) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(TouchValue{source}) = { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAcquireExclusive{base, dest}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreReleaseExclusive{base, source, result}) = { sources=[base] @ getOptReg source, dests=[result], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(MemoryBarrier) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ConvertIntToFloat{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ConvertFloatToInt{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(UnaryFloatingPt{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BinaryFloatingPoint{ arg1, arg2, dest, ...}) = { sources=[arg1, arg2], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(CompareFloatingPoint{ arg1, arg2, ccRef, ...}) = { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } | getInstructionState(CPUYield) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AtomicOperation{ base, source, dest, ... }) = { sources=base :: getOptReg source, dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } - | getInstructionState(CacheCheck{ arg1, arg2}) = - { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } - (* These instructions can be eliminated if their register sources are not used. There may be other cases. *) fun eliminateable(MoveRegister _) = true | eliminateable(LoadNonAddressConstant _) = true | eliminateable(LoadAddressConstant _) = true | eliminateable(LoadWithConstantOffset _) = true | eliminateable(LoadWithIndexedOffset _) = true | eliminateable(ObjectIndexAddressToAbsolute _) = true | eliminateable(TagValue _) = true | eliminateable(UntagValue _) = true | eliminateable(BoxLarge _) = true | eliminateable(UnboxLarge _) = true | eliminateable(BoxTagFloat _) = true | eliminateable(UnboxTagFloat _) = true | eliminateable _ = false fun identifyRegs(blockVector, pregProps): extendedBasicBlock vector * regState vector = let val maxPRegs = Vector.length pregProps val vectorLength = Vector.length blockVector (* Initial arrays - declarationArray is the set of registers given values by the block, importArray is the set of registers referenced by the block and not declared locally. *) val declarationArray = Array.array(vectorLength, emptySet) and importArray = Array.array(vectorLength, emptySet) val stackDecArray = Array.array(vectorLength, emptySet) and stackImportArray = Array.array(vectorLength, emptySet) and localLoopRegArray = Array.array(vectorLength, emptySet) (* References - this is used locally to see if a register is ever actually used and also included in the result which uses it as part of the choice of which register to spill. *) val regRefs = Array.array(maxPRegs, 0) (* Registers that must be pushed because they are required after a function call. For cache registers this means "discard". *) and requirePushOrDiscard = Array.array(maxPRegs, false) fun incrRef r = Array.update(regRefs, r, Array.sub(regRefs, r)+1) (* Contains the, possibly filtered, code for each block. *) val resultCode = Array.array(vectorLength, NONE) val ccInStates = Array.array(vectorLength, CCUnused) and ccOutStates = Array.array(vectorLength, CCIndeterminate) (* First pass - for each block build up the sets of registers defined and used in the block. We do this depth-first so that we can use "refs" to see if a register is used. If this is an instruction that can be eliminated we don't need to generate it and can ignore any references it makes. *) local fun blockScan blockNo = if isSome(Array.sub(resultCode, blockNo)) then () else let val () = Array.update(resultCode, blockNo, SOME []) (* Prevent looping. *) val thisBlock as BasicBlock { block, flow, ...} = Vector.sub(blockVector, blockNo) val successors = blockSuccessors thisBlock (* Visit everything reachable first. *) val () = List.app blockScan successors fun scanCode(instr, original as { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... }) = let val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ccIn, ccOut, ... } = getInstructionState instr fun regNo(PReg i) = i and stackNo(StackLoc{rno, ...}) = rno val destRegNos = map regNo dests and sourceRegNos = map regNo sources val stackDestRegNos = map stackNo stackDests and stackSourceRegNos = map stackNo stackSrcs (* If this instruction requires a cc i.e. is SetToCondition or X87FPGetCondition we need to set this as a requirement earlier. If this sets the CC and it is the condition we've been expecting we've satisfied it and can set the previous condition to Unused. We could use this to decide if a comparison is no longer required. That can only happen in very specific circumstances e.g. some tests in Test176.ML so it's not worthwhile. *) val newInCC = case (ccIn, ccOut, occIn) of (cc as CCNeeded _, _, _) => cc (* This instr needs a particular cc. *) | (CCUnused, CCSet _, _) => CCUnused | (CCUnused, _, occIn) => occIn (* If this instruction modifies the CC check to see if it is setting an requirement. *) val _ = case (occIn, ccOut) of (CCNeeded ccRIn, CCSet ccRout) => if ccRIn = ccRout then () else raise InternalError "CCCheck failed" | (CCNeeded _, CCIndeterminate) => raise InternalError "CCCheck failed" | _ => () (* The output CC is the last CC set. Tail instructions that don't change the CC state are ignored until we reach an instruction that sets it. *) val newOutCC = case occOut of CCUnchanged => ccOut | _ => occOut val instrLoopRegs = case instr of JumpLoop{regArgs, ...} => listToSet (map (regNo o #dst) regArgs) | _ => emptySet in if eliminateable instr andalso List.all(fn dReg => Array.sub(regRefs, dReg) = 0) destRegNos then original (* Don't include this instruction. *) else let (* Only mark the sources as referred after we know we're going to need this. In that way we may eliminate the instruction that created this source. *) val () = List.app incrRef sourceRegNos in { code = instr :: code, decs = union(listToSet destRegNos, decs), refs = union(listToSet sourceRegNos, refs), sDecs = union(listToSet stackDestRegNos, sDecs), sRefs = union(listToSet stackSourceRegNos, sRefs), occIn = newInCC, occOut = newOutCC, loopRegs = union(loopRegs, instrLoopRegs)} end end (* If we have a conditional branch at the end we need the condition code. It should either be set here or in a preceding block. *) val inCC = case flow of Conditional { ccRef, ...} => CCNeeded ccRef | _ => CCUnused val { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... } = List.foldr scanCode {code=[], decs=emptySet, refs=emptySet, sDecs=emptySet, sRefs=emptySet, occIn=inCC, occOut=CCUnchanged, loopRegs=emptySet} block in Array.update(declarationArray, blockNo, decs); (* refs includes local declarations. Remove before adding to the result. *) Array.update(importArray, blockNo, minus(refs, decs)); Array.update(localLoopRegArray, blockNo, loopRegs); Array.update(stackDecArray, blockNo, sDecs); Array.update(stackImportArray, blockNo, minus(sRefs, sDecs)); Array.update(resultCode, blockNo, SOME code); Array.update(ccInStates, blockNo, occIn); Array.update(ccOutStates, blockNo, occOut) end in val () = blockScan 0 (* Start with the root block. *) end (* Second phase - Propagate reference information between the blocks. We need to consider loops here. Do a depth-first scan marking each block. If we find a loop we save the import information we've used. If when we come to process that block we find the import information is different we need to reprocess. *) (* Pass through array - values used in other blocks after this that are not declared in this block. *) val passThroughArray = Array.array(vectorLength, emptySet) val stackPassThroughArray = Array.array(vectorLength, emptySet) (* Exports - those of our declarations that are used in other blocks. *) val exportArray = Array.array(vectorLength, emptySet) val stackExportArray = Array.array(vectorLength, emptySet) (* Loop registers. This contains the registers that are not exported from or passed through this block but are used subsequently as loop registers. *) val loopRegArray = Array.array(vectorLength, emptySet) val () = Array.copy{src=localLoopRegArray, dst=loopRegArray, di=0} (* If any one of the successors requires the CC then this is set. Otherwise we leave it as Unused. *) val ccRequiredOut = Array.array(vectorLength, CCUnused) local datatype loopData = Unprocessed | Processing | Processed | Looped of { regSet: intSet, loopSet: intSet, stackSet: intSet, ccState: inCCState } fun reprocessLoop () = let val reprocess = ref false val loopArray = Array.array(vectorLength, Unprocessed) fun processBlocks blockNo = case Array.sub(loopArray, blockNo) of Processed => (* Already seen this by a different route. *) { regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), ccState = Array.sub(ccInStates, blockNo), loopSet = Array.sub(loopRegArray, blockNo) } | Looped s => s (* We've already seen this in a loop. *) | Processing => (* We have a loop. *) let (* Use the existing input array. *) val inputs = { regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), ccState = Array.sub(ccInStates, blockNo), loopSet = Array.sub(loopRegArray, blockNo) } val () = Array.update(loopArray, blockNo, Looped inputs) in inputs end | Unprocessed => (* Normal case - not visited yet. *) let val () = Array.update(loopArray, blockNo, Processing) val thisBlock = Vector.sub(blockVector, blockNo) val ourDeclarations = Array.sub(declarationArray, blockNo) and ourStackDeclarations = Array.sub(stackDecArray, blockNo) and ourLocalLoopRegs = Array.sub(localLoopRegArray, blockNo) val successors = blockSuccessors thisBlock fun addSuccessor b = let val {regSet=theirImports, stackSet=theirStackImports, ccState=theirInState, loopSet=theirLoops} = processBlocks b (* Remove loop regs from the imports if they are actually given new values by this block. We don't want to pass the old loop regs through here. *) val theirImports = minus(theirImports, ourLocalLoopRegs) (* Split the imports. If a register is a local declaration then it becomes an export. If it is not it becomes part of our passThrough. *) val (addToExp, addToImp) = IntSet.partition (fn i => member(i, ourDeclarations)) theirImports val (addToStackExp, addToStackImp) = IntSet.partition (fn i => member(i, ourStackDeclarations)) theirStackImports (* Merge the input states from each of the successors. *) val () = case (theirInState, Array.sub(ccRequiredOut, blockNo)) of (CCNeeded ts, CCNeeded req) => if ts = req then () else raise InternalError "Mismatched states" | (ts as CCNeeded _, _) => Array.update(ccRequiredOut, blockNo, ts) | _ => () (* Add loop registers to the set if they are not declared here. The only place they are declared is at the entry to the loop so that stops them being propagated further. *) val addToLoops = minus(theirLoops, ourDeclarations) in Array.update(exportArray, blockNo, union(Array.sub(exportArray, blockNo), addToExp)); Array.update(passThroughArray, blockNo, union(Array.sub(passThroughArray, blockNo), addToImp)); Array.update(stackExportArray, blockNo, union(Array.sub(stackExportArray, blockNo), addToStackExp)); Array.update(stackPassThroughArray, blockNo, union(Array.sub(stackPassThroughArray, blockNo), addToStackImp)); Array.update(loopRegArray, blockNo, union(Array.sub(loopRegArray, blockNo), addToLoops)) end val () = List.app addSuccessor successors val ourInputs = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)) val ourStackInputs = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)) in (* Check that we supply the required state. *) case (Array.sub(ccRequiredOut, blockNo), Array.sub(ccOutStates, blockNo)) of (CCNeeded ccReq, CCSet ccSet) => if ccReq = ccSet then () else raise InternalError "Mismatched cc states" | (CCNeeded _, CCIndeterminate) => raise InternalError "Mismatched cc states" | (cc as CCNeeded needOut, CCUnchanged) => ( (* We pass through the state. If we don't use the state then we need to set this as the input. If we do use the state it must be the same. *) case Array.sub(ccInStates, blockNo) of CCUnused => Array.update(ccInStates, blockNo, cc) | CCNeeded needIn => if needOut = needIn then () else raise InternalError "Mismatched cc states" ) | _ => (); (* Was this block used in a loop? If so we should not be requiring a CC. *) case Array.sub(loopArray, blockNo) of Looped {regSet, stackSet, ...} => ( case Array.sub(ccInStates, blockNo) of CCNeeded _ => raise InternalError "Looped state needs cc" | _ => (); if setToList regSet = setToList ourInputs andalso setToList stackSet = setToList ourStackInputs then () else reprocess := true ) | _ => (); Array.update(loopArray, blockNo, Processed); { regSet = ourInputs, stackSet = ourStackInputs, ccState = Array.sub(ccInStates, blockNo), loopSet=Array.sub(loopRegArray, blockNo)} end in reprocess := false; processBlocks 0; if !reprocess then reprocessLoop () else () end in val () = reprocessLoop () end (* Third pass - Build the result list with the active registers for each instruction. We don't include registers in the passThrough set since they are active throughout the block. *) local (* Number of instrs for which this is active. We use this to try to select a register to push to the stack if we have too many. Registers that have only a short lifetime are less likely to be pushed than those that are active longer. *) val regActive = Array.array(maxPRegs, 0) fun addActivity n r = Array.update(regActive, r, Array.sub(regActive, r)+n) fun createResultInstrs (passThrough, stackPassThrough) (instr, (tail, activeAfterThis, stackActiveAfterThis)) = let val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ... } = getInstructionState instr in (* Eliminate instructions if their results are not required. The earlier check for this will remove most cases but if we have duplicated a block we may have a register that is required elsewhere but not in this particular branch. *) if not(List.exists(fn PReg d => member(d, activeAfterThis)) dests) andalso eliminateable instr then (tail, activeAfterThis, stackActiveAfterThis) else let fun regNo(PReg i) = i fun stackNo(StackLoc{rno, ...}) = rno val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos (* Remove any sources that are present in passThrough since they are going to be active throughout the block. *) and sourceSet = minus(listToSet sourceRegNos, passThrough) val stackDestRegNos = map stackNo stackDests and stackSourceRegNos = map stackNo stackSrcs val stackDestSet = listToSet stackDestRegNos and stackSourceSet = minus(listToSet stackSourceRegNos, stackPassThrough) (* To compute the active set for the PREVIOUS instruction (we're processing from the end back to the start) we remove any registers that have been given values in this instruction and add anything that we are using in this instruction since they will now need to have values. *) val afterRemoveDests = minus(activeAfterThis, destSet) val stackAfterRemoveDests = minus(stackActiveAfterThis, stackDestSet) val activeForPrevious = union(sourceSet, afterRemoveDests) val stackActiveForPrevious = union(stackSourceSet, stackAfterRemoveDests) (* The "active" set is the set of registers that need to be active DURING the instruction. It includes destinations, which will usually be in "activeAfterThis", because there may be destinations that are not actually used subsequently but still need a register. *) val activeForInstr = case instr of FunctionCall _ => sourceSet (* Is this still needed? *) | TailRecursiveCall _ => (* Set the active set to the total set of registers we require including the work register. This ensures that we will spill as many registers as we require when we look at the size of the active set. *) union(sourceSet, destSet) | BoxLarge _ => (* We can only store the value in the box after the box is allocated. *) union(activeAfterThis, union(sourceSet, destSet)) | BoxTagFloat _ => (* Since the source must be a V register and the destination an X register there isn't actually a problem here, but do this anyway. *) union(activeAfterThis, union(sourceSet, destSet)) | _ => union(activeAfterThis, destSet) val () = List.app(addActivity 1) (setToList activeForInstr) local (* If we are allocating memory we have to save the current registers if they could contain an address. We mustn't push untagged registers and we mustn't push the destination. *) fun getSaveSet includeReg = let val activeAfter = union(activeAfterThis, passThrough) (* Remove any registers marked - must-not-push. These are registers holding non-address values. They will actually be saved by the RTS across any GC but not checked or modified by the GC. Exclude the result register. *) fun getSave i = if includeReg i then case Vector.sub(pregProps, i) of RegPropGeneral => SOME(PReg i) | RegPropCacheTagged => SOME(PReg i) | RegPropUntagged => NONE | RegPropStack _ => NONE | RegPropCacheUntagged => NONE | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" else NONE in List.mapPartial getSave (setToList activeAfter) end in (* Sometimes we need to modify the instruction e.g. to include the set of registers to save. *) val convertedInstr = case instr of AllocateMemoryFixed{bytesRequired, dest, saveRegs=_} => AllocateMemoryFixed{bytesRequired=bytesRequired, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)} | AllocateMemoryVariable{size, dest, saveRegs=_} => AllocateMemoryVariable{size=size, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)} | BoxLarge{source, dest, saveRegs=_} => BoxLarge{source=source, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)} | BoxTagFloat{source, dest, floatSize, saveRegs=_} => BoxTagFloat{source=source, dest=dest, floatSize=floatSize, saveRegs=getSaveSet(fn i => i <> regNo dest)} | JumpLoop{regArgs, stackArgs, checkInterrupt = SOME _, ...} => let (* If we have to check for interrupts we must preserve registers across the RTS call. *) fun getSave i = case Vector.sub(pregProps, i) of RegPropGeneral => SOME(PReg i) | RegPropCacheTagged => SOME(PReg i) | RegPropUntagged => NONE | RegPropStack _ => NONE | RegPropCacheUntagged => NONE | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" val currentRegs = union(activeAfterThis, passThrough) (* Have to include the loop registers. These were previously included automatically because they were part of the import set. *) val check = List.mapPartial getSave (map (regNo o #dst) regArgs @ setToList currentRegs) in JumpLoop{regArgs=regArgs, stackArgs=stackArgs, checkInterrupt=SOME check} end | FunctionCall{regArgs, stackArgs=[], dests, fpRegArgs=[], fpDests=[], callKind as ConstantCode m, saveRegs=_, containers} => (* If this is arbitrary precision push the registers rather than marking them as "save". stringOfWord returns 'CODE "PolyAddArbitrary"' etc. *) if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) then let val destRegs = List.map (regNo o #1) dests fun includeInSave i = not(List.exists(fn r => r=i) destRegs) in FunctionCall{regArgs=regArgs, stackArgs=[], callKind=callKind, dests=dests, fpRegArgs=[], fpDests=[], containers=containers, saveRegs=getSaveSet includeInSave} end else instr | _ => instr end (* FunctionCall must mark all registers as "push". *) local fun pushRegisters () = let val activeAfter = union(activeAfterThis, passThrough) fun pushAllButDests i = if List.exists(fn j => i=j) destRegNos then () else case Vector.sub(pregProps, i) of RegPropCacheTagged => raise InternalError "pushRegisters: cache reg" | RegPropCacheUntagged => raise InternalError "pushRegisters: cache reg" | _ => Array.update(requirePushOrDiscard, i, true) in (* We need to push everything active after this except the result register. *) List.app pushAllButDests (setToList activeAfter) end in val () = case instr of FunctionCall{ stackArgs=[], callKind=ConstantCode m, ...} => if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) then () else pushRegisters () | FunctionCall _ => pushRegisters () (* It should no longer be necessary to push across a handler but there still seem to be cases that need it. *) (*| BeginHandler _ => pushRegisters ()*) | _ => () end (* Which entries are active in this instruction but not afterwards? *) val kill = union(minus(stackSourceSet, stackActiveAfterThis), minus(sourceSet, activeAfterThis)) in ({instr=convertedInstr, active=activeForInstr, current=activeAfterThis, kill=kill} :: tail, activeForPrevious, stackActiveForPrevious) end end fun createResult blockNo = let val BasicBlock{ flow, ...} = Vector.sub(blockVector, blockNo) val declSet = Array.sub(declarationArray, blockNo) and importSet = Array.sub(importArray, blockNo) and passSet = Array.sub(passThroughArray, blockNo) and loopSet = Array.sub(loopRegArray, blockNo) and exportSet = Array.sub(exportArray, blockNo) and stackPassSet = Array.sub(stackPassThroughArray, blockNo) and stackImportSet = Array.sub(stackImportArray, blockNo) and stackExportSet = Array.sub(stackExportArray, blockNo) val filteredCode = getOpt(Array.sub(resultCode, blockNo), []) (* At the end of the block we should have the exports active. *) val (resultInstrs, _, _) = List.foldr (createResultInstrs (passSet, stackPassSet)) ([], exportSet, stackExportSet) filteredCode (* Set the active count for the pass through. *) val instrCount = List.length filteredCode val () = List.app(addActivity instrCount) (setToList passSet) val inCCState = case Array.sub(ccInStates, blockNo) of CCNeeded s => SOME s | CCUnused => NONE val outCCState = case Array.sub(ccRequiredOut, blockNo) of CCNeeded s => SOME s | CCUnused => NONE in ExtendedBasicBlock { block = resultInstrs, flow=flow, locals = minus(declSet, exportSet), imports = importSet, exports = exportSet, passThrough = passSet, loopRegs = loopSet, initialStacks = union(stackPassSet, stackImportSet), inCCState = inCCState, outCCState = outCCState } end in val resultBlocks = Vector.tabulate(vectorLength, createResult) val regActive = regActive end val registerState: regState vector = Vector.tabulate(maxPRegs, fn i => { active = Array.sub(regActive, i), refs = Array.sub(regRefs, i), pushState = Array.sub(requirePushOrDiscard, i), prop = Vector.sub(pregProps, i) } ) in (resultBlocks, registerState) end (* Exported function. First filter out unreferenced blocks then process the registers themselves. *) fun identifyRegisters(blockVector, pregProps) = let val vectorLength = Vector.length blockVector val mapArray = Array.array(vectorLength, NONE) and resArray = Array.array(vectorLength, NONE) val count = ref 0 fun setReferences label = case Array.sub(mapArray, label) of NONE => (* Not yet visited *) let val BasicBlock{flow, block} = Vector.sub(blockVector, label) (* Create a new entry for it. *) val newLabel = ! count before count := !count + 1 (* Add it to the map. Any other references will use this without reprocessing. *) val () = Array.update(mapArray, label, SOME newLabel) val newFlow = case flow of Unconditional l => Unconditional(setReferences l) | Conditional{trueJump, falseJump, ccRef, condition} => Conditional{trueJump=setReferences trueJump, falseJump=setReferences falseJump, ccRef=ccRef, condition=condition} | ExitCode => ExitCode | IndexedBr list => IndexedBr(map setReferences list) | SetHandler{handler, continue} => SetHandler{handler=setReferences handler, continue=setReferences continue} | UnconditionalHandle l => UnconditionalHandle(setReferences l) | ConditionalHandle{handler, continue} => ConditionalHandle{handler=setReferences handler, continue=setReferences continue} val () = Array.update(resArray, newLabel, SOME(BasicBlock{flow=newFlow, block=block})) in newLabel end | SOME lab => lab val _ = setReferences 0 val newBlockVector = Vector.tabulate(!count, fn i => valOf(Array.sub(resArray, i))) in identifyRegs(newBlockVector, pregProps) end (* Exported for use in GetConflictSets *) fun getInstructionRegisters instr = let val {sources, dests, ...} = getInstructionState instr in {sources=sources, dests=dests} end (* Exported for use in ICodeOptimise *) val getInstructionCC = #ccOut o getInstructionState structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and intSet = intSet and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and extendedBasicBlock = extendedBasicBlock and controlFlow = controlFlow and regProperty = regProperty and ccRef = ccRef and outCCState = outCCState end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML index 4455a39b..09afa730 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML @@ -1,1210 +1,1208 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64PushRegisters( structure Arm64ICode: ARM64ICODE structure IntSet: INTSET structure Identify: ARM64IDENTIFYREFERENCES sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ) : ARM64PUSHREGISTERS = struct open Arm64ICode open IntSet open Identify type basicBlockAbstract = (preg, pregOrZero, preg) basicBlock (* Curried subscript functions *) fun asub a i = Array.sub(a, i) and vsub v i = Vector.sub(v, i) exception InternalError = Misc.InternalError (* Each preg in the input is mapped to either a new preg or the stack. *) datatype pregMapType = Unset | ToPReg of preg | ToStack of int * stackLocn (* The stack contains both entries in the input code and entries added here. It is really used to ensure that the stack at run time is the same size at the start of a block whichever block has jumped to it. *) datatype stackEntry = NewEntry of {pregNo: int} (* pregNo is the original preg that has been pushed here. *) | OriginalEntry of { stackLoc: stackLocn } | HandlerEntry fun addRegisterPushes{code: extendedBasicBlock vector, pushVec: bool vector, pregProps, firstPass=_} = let val maxPRegs = Vector.length pregProps val numberOfBlocks = Vector.length code (* Output registers and properties. *) val pregCounter = ref 0 val pregPropList = ref [] val pregMap = Array.array(maxPRegs, Unset) val maxStack = ref 0 (* The stack size we've assumed for the block. Also indicates if a block has already been processed. *) val inputStackSizes = Array.array(numberOfBlocks, NONE) (* The result of processing a block. *) val blockOutput = Array.array(numberOfBlocks, {code=[], stackCount=0}) (* Extra blocks to adjust the stack are added here. *) val extraBlocks: basicBlockAbstract list ref = ref [] val blockCounter = ref numberOfBlocks (* Get the blocks that are inputs for each one. *) local val blockRefs = Array.array(numberOfBlocks, []) fun setReferences fromBlock = let val ExtendedBasicBlock{ flow, ...} = vsub code fromBlock val refs = successorBlocks flow fun setRefs toBlock = let val oldRefs = asub blockRefs toBlock in Array.update(blockRefs, toBlock, fromBlock :: oldRefs); if null oldRefs then setReferences toBlock else () end in List.app setRefs refs end val () = setReferences 0 in val blockRefs = blockRefs end (* Recursive scan of the blocks. For each block we produce an input and output state. The input state is the output state of the predecessor i.e. some block that jumps to this, but with any entries removed that are not used in this block. It is then necessary to match the input state, if necessary by adding extra blocks that just do the matching. *) local val haveProcessed = isSome o asub inputStackSizes fun processBlocks toDo = case List.filter (fn (n, _) => not(haveProcessed n)) toDo of [] => () (* Nothing left to do *) | stillToDo as head :: _ => let (* Try to find a block all of whose predecessors have been processed. That increases the chances that we will have cached items. TODO: This is no longer necessary since we don't do any caching here now so could may be simplified. *) fun available(dest, _) = List.all haveProcessed (Array.sub(blockRefs, dest)) val (blockNo, lastOutputState) = case List.find available stillToDo of SOME c => c | NONE => head (* This is the first time we've come to this block. *) val ExtendedBasicBlock{ block, flow, imports, passThrough, loopRegs, initialStacks, ...} = vsub code blockNo (* Remove any items from the input state that are no longer needed for this block. They could be local to the previous block or needed by a different successor. Although the values in loopRegs are not required the stack space is so that they can be updated. *) fun removeItems(result as {stack=[], stackCount=0}) = result | removeItems{stack=[], ...} = raise InternalError "removeItems - stack size" | removeItems (thisStack as {stack=NewEntry{pregNo} :: rest, stackCount}) = if member(pregNo, imports) orelse member(pregNo, passThrough) orelse member(pregNo, loopRegs) then thisStack else removeItems{stack=rest, stackCount=stackCount-1} | removeItems (thisStack as {stack=OriginalEntry{stackLoc=StackLoc{rno, size}, ...} :: rest, stackCount}) = if member(rno, initialStacks) then thisStack else removeItems{stack=rest, stackCount=stackCount-size} | removeItems result = result val {stackCount=newSp, stack=newStack} = removeItems lastOutputState (* References to hold the current stack count (number of words on the stack) and the list of items on the stack. The list is not used directly to map stack addresses. Instead it is used to match the stack at the beginning and end of a block. *) val stackCount = ref newSp val stack = ref newStack (* Items from the stack that have been marked as deleted but not yet removed. We only remove items from the top of the stack to avoid quadratic behaviour with a very deep stack. *) val deletedItems = ref [] (* Save the stack size in case we come by a different route. *) val () = Array.update(inputStackSizes, blockNo, SOME newSp) fun pushItemToStack item = let val size = case item of NewEntry _ => 1 | OriginalEntry{stackLoc=StackLoc{size, ...}, ...} => size | HandlerEntry => 2 in stackCount := ! stackCount+size; stack := item :: ! stack; maxStack := Int.max(!maxStack, !stackCount) end fun newPReg propKind = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := propKind :: !pregPropList in PReg regNo end and newStackLoc size = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropStack size :: !pregPropList in StackLoc{size=size, rno=regNo} end (* Map a source register. This always loads the argument. *) fun mapSrcReg(PReg n) = case Array.sub(pregMap, n) of Unset => raise InternalError "mapSrcReg - unset" | ToPReg preg => (preg, []) | ToStack(stackLoc, container as StackLoc{size, ...}) => let (* Make a new untagged register. That will prevent us pushing it if we have to spill registers. *) val newReg = newPReg RegPropUntagged in (newReg, [LoadStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0, dest=newReg}]) end fun mapDestReg(PReg n) = let val currentLocation = Array.sub(pregMap, n) val kind = Vector.sub(pregProps, n) in if Vector.sub(pushVec, n) then let (* This should not have been seen before. *) val _ = case currentLocation of Unset => () | _ => raise InternalError "mapDestReg - already set" val newReg = newPReg kind val newContainer = newStackLoc 1 val () = Array.update(pregMap, n, ToStack (!stackCount, newContainer)) val () = pushItemToStack(NewEntry{pregNo=n}) in (newReg, [PushToStack{source= newReg, container=newContainer, copies=1}]) end else let (* See if we already have a number for it. We may encounter the same preg as a destination when returning the result from a conditional in which case we have to use the same number. We shouldn't have pushed it. *) val newReg = case (currentLocation, kind) of (Unset, _) => let val newReg = newPReg kind val () = Array.update(pregMap, n, ToPReg newReg) in newReg end | (ToPReg preg, RegPropMultiple) => preg | _ => raise InternalError "mapDestReg - multiply defined non-merge reg" in (newReg, []) end end (* Optional destination for arithmetic and logical ops. *) fun mapOptDest ZeroReg = (ZeroReg, []) | mapOptDest (SomeReg destReg) = let val (destVal, destCode) = mapDestReg destReg in (SomeReg destVal, destCode) end fun mapOptSrc ZeroReg = (ZeroReg, []) | mapOptSrc (SomeReg srcReg) = let val (srcVal, srcCode) = mapSrcReg srcReg in (SomeReg srcVal, srcCode) end (* Adjust a stack offset from the old state to the new state. *) fun mapContainerAndStack(StackLoc{rno, size}, field) = let val (newStackAddr, newContainer) = case Array.sub(pregMap, rno) of Unset => raise InternalError "mapContainer - unset" | ToPReg _ => raise InternalError "mapContainer - ToPReg" | ToStack stackContainer => stackContainer val newOffset = !stackCount-(newStackAddr+size) + field in (newOffset, newContainer) end (* Add an entry for an existing stack entry. *) fun mapDestContainer(StackLoc{rno, size}, locn) = ( case Array.sub(pregMap, rno) of Unset => let val newContainer = newStackLoc size val () = Array.update(pregMap, rno, ToStack(locn, newContainer)) in newContainer end | _ => raise InternalError "mapDestContainer: already set" ) (* Map a function argument which could be a register or a stack entry. A register entry could have been pushed. *) fun mapArgument(ArgInReg (PReg r)) = ( case Array.sub(pregMap, r) of Unset => raise InternalError "mapSource - unset" | ToPReg preg => ArgInReg preg | ToStack(stackLoc, container as StackLoc{size, ...}) => ArgOnStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0} ) | mapArgument(ArgOnStack{container, field, ...}) = let val (newOffset, newContainer) = mapContainerAndStack(container, field) in ArgOnStack{container=newContainer, wordOffset=newOffset, field=field} end (* Rewrite the code, replacing any registers that need to be pushed with references to the stack. The result is built up in reverse order and then reversed. *) fun pushRegisters({instr=MoveRegister{ source, dest as PReg dReg }, ...}, code) = if Vector.sub(pushVec, dReg) then (* We're going to push this. *) let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest (* TODO: Since we're pushing it we don't need to move it first. *) in destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code end else let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=LoadNonAddressConstant { dest, source}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadNonAddressConstant { dest=destVal, source=source} :: code end | pushRegisters({instr=LoadFPConstant { dest, source, floatSize}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPConstant { dest=destVal, source=source, floatSize=floatSize} :: code end | pushRegisters({instr=LoadAddressConstant { dest, source}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadAddressConstant { dest=destVal, source=source} :: code end | pushRegisters({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, loadType=loadType} :: baseCode @ code end | pushRegisters({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, floatSize=floatSize} :: baseCode @ code end | pushRegisters({instr=LoadWithIndexedOffset { base, dest, index, loadType, signExtendIndex}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index val (destVal, destCode) = mapDestReg dest in destCode @ LoadWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal, loadType=loadType, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ code end | pushRegisters({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize, signExtendIndex}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal, floatSize=floatSize, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ code end | pushRegisters({instr=GetThreadId { dest}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ GetThreadId { dest=destVal} :: code end | pushRegisters({instr=ObjectIndexAddressToAbsolute { source, dest}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ObjectIndexAddressToAbsolute { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=AbsoluteToObjectIndex { source, dest}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ AbsoluteToObjectIndex { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=AllocateMemoryFixed { bytesRequired, dest, ...}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ AllocateMemoryFixed { dest=destVal, bytesRequired=bytesRequired, saveRegs=[]} :: code end | pushRegisters({instr=AllocateMemoryVariable{size, dest, ...}, ...}, code) = let val (sizeVal, sizeCode) = mapSrcReg size val (destVal, destCode) = mapDestReg dest in destCode @ AllocateMemoryVariable{size=sizeVal, dest=destVal, saveRegs=[]} :: sizeCode @ code end | pushRegisters({instr=InitialiseMem{size, addr, init}, ...}, code) = let val (sizeVal, sizeCode) = mapSrcReg size val (addrVal, addrCode) = mapSrcReg addr val (initVal, initCode) = mapSrcReg init in InitialiseMem{size=sizeVal, addr=addrVal, init=initVal} :: initCode @ addrCode @ sizeCode @ code end | pushRegisters({instr=BeginLoop, ...}, code) = BeginLoop :: code | pushRegisters({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}, code) = let (* Normally JumpLoop will be the last item in a block but it is possible that we've added a reset-stack after it. *) fun getValues [] = ([], []) | getValues ({src=source, dst=PReg n} :: rest) = let val (otherRegArgs, otherStackArgs) = getValues rest in case Array.sub(pregMap, n) of ToPReg lReg => ({src=mapArgument source, dst=lReg} :: otherRegArgs, otherStackArgs) | ToStack(stackloc, stackC as StackLoc{size, ...}) => let val sourceVal = mapArgument source val stackOff = !stackCount - stackloc - size in (otherRegArgs, {src=sourceVal, wordOffset=stackOff, stackloc=stackC} :: otherStackArgs) end | Unset => (* Drop it. It's never used. This can happen if we are folding a function over a list such that it always returns the last value and then discard the result of the fold. *) (otherRegArgs, otherStackArgs) end val (newRegArguments, newStackArgs) = getValues regArgs fun loadStackArg({src=source, stackloc=destC, ...}, otherArgs) = let val sourceVal = mapArgument source val (newOffset, newContainer) = mapContainerAndStack(destC, 0) in {src=sourceVal, wordOffset=newOffset, stackloc=newContainer} :: otherArgs end val oldStackArgs = List.foldr loadStackArg [] stackArgs val check = case checkInterrupt of NONE => NONE | SOME _ => SOME [] in JumpLoop{ regArgs=newRegArguments, stackArgs=oldStackArgs @ newStackArgs, checkInterrupt=check} :: code end | pushRegisters({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, loadType=loadType} :: baseCode @ sourceCode @ code end | pushRegisters({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreFPWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, floatSize=floatSize} :: baseCode @ sourceCode @ code end | pushRegisters({instr=StoreWithIndexedOffset { base, source, index, loadType, signExtendIndex}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index in StoreWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal, loadType=loadType, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ sourceCode @ code end | pushRegisters({instr=StoreFPWithIndexedOffset { base, source, index, floatSize, signExtendIndex}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index in StoreFPWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal, floatSize=floatSize, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ sourceCode @ code end | pushRegisters({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ AddSubImmediate { source=sourceVal, dest=destVal, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length} :: sourceCode @ code end | pushRegisters({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...}, code) = let val (op1Val, op1Code) = mapSrcReg base val (op2Val, op2Code) = mapSrcReg shifted val (destVal, destCode) = mapOptDest dest in destCode @ AddSubRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift} :: op2Code @ op1Code @ code end | pushRegisters({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ LogicalImmediate { source=sourceVal, dest=destVal, ccRef=ccRef, immed=immed, logOp=logOp, length=length} :: sourceCode @ code end | pushRegisters({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...}, code) = let val (op1Val, op1Code) = mapSrcReg base val (op2Val, op2Code) = mapSrcReg shifted val (destVal, destCode) = mapOptDest dest in destCode @ LogicalRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef, logOp=logOp, length=length, shift=shift} :: op2Code @ op1Code @ code end | pushRegisters({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...}, code) = let val (srcVal, op1Code) = mapSrcReg source val (shiftVal, op2Code) = mapSrcReg shift val (destVal, destCode) = mapDestReg dest in destCode @ ShiftRegister { source=srcVal, shift=shiftVal, dest=destVal, direction=direction, opSize=opSize} :: op2Code @ op1Code @ code end | pushRegisters({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...}, code) = let val (srcAVal, srcACode) = mapOptSrc sourceA val (srcMVal, srcMCode) = mapSrcReg sourceM val (srcNVal, srcNCode) = mapSrcReg sourceN val (destVal, destCode) = mapDestReg dest in destCode @ Multiplication { kind=kind, sourceA=srcAVal, sourceM=srcMVal, sourceN=srcNVal, dest=destVal} :: srcNCode @ srcMCode @ srcACode @ code end | pushRegisters({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...}, code) = let val (dividendVal, dividendCode) = mapSrcReg dividend val (divisorVal, divisorCode) = mapSrcReg divisor val (destVal, destCode) = mapDestReg dest in destCode @ Division { isSigned=isSigned, dividend=dividendVal, divisor=divisorVal, dest=destVal, opSize=opSize} :: divisorCode @ dividendCode @ code end | pushRegisters({instr=BeginFunction {regArgs, fpRegArgs, stackArgs}, ...}, code) = let (* Create a new container list. The offsets begin at -numArgs. *) fun newContainers(src :: srcs, offset) = let val newContainer = mapDestContainer(src, offset) in newContainer :: newContainers(srcs, offset+1) end | newContainers _ = [] val newStackArgs = newContainers(stackArgs, ~ (List.length stackArgs)) (* Push any registers that need to be pushed. *) fun pushReg((preg, rreg), (others, code)) = let val (newReg, newCode) = mapDestReg preg in ((newReg, rreg) :: others, newCode @ code) end val (newRegArgs, pushCode) = List.foldl pushReg ([], []) regArgs val (newFPRegArgs, pushFPCode) = List.foldl pushReg ([], []) fpRegArgs in pushFPCode @ pushCode @ BeginFunction {regArgs=newRegArgs, fpRegArgs=newFPRegArgs, stackArgs=newStackArgs} :: code end | pushRegisters({instr=FunctionCall{callKind, regArgs, stackArgs, dests, fpRegArgs, fpDests, containers, ...}, ...}, code) = let (* It's possible that this could lead to having to spill registers in order to load others. Leave that problem for the moment. *) fun loadStackArg (arg, otherArgs) = let val argVal = mapArgument arg in argVal :: otherArgs end val newStackArgs = List.foldr loadStackArg [] stackArgs fun loadRegArg ((arg, reg), otherArgs) = let val argVal = mapArgument arg in (argVal, reg) :: otherArgs end val newRegArgs = List.foldr loadRegArg [] regArgs fun loadFPRegArg ((PReg n, reg), otherArgs) = let (* FP regs are untagged registers and should never be pushed. *) val argVal = case Array.sub(pregMap, n) of Unset => raise InternalError "mapSrcReg - unset" | ToPReg preg => preg | ToStack _ => raise InternalError "loadFPRegArg: on stack" in (argVal, reg) :: otherArgs end val newFPRegArgs = List.foldr loadFPRegArg [] fpRegArgs (* Push any result registers that need to be pushed. *) fun pushResults((preg, rreg), (others, code)) = let val (newReg, newCode) = mapDestReg preg in ((newReg, rreg) :: others, newCode @ code) end val (destVals, destCode) = List.foldl pushResults ([], []) dests val (destFPVals, destFPCode) = List.foldl pushResults ([], []) fpDests val newContainers = List.map(fn c => #2(mapContainerAndStack(c, 0))) containers in destFPCode @ destCode @ FunctionCall{ callKind=callKind, regArgs=newRegArgs, stackArgs=newStackArgs, dests=destVals, fpRegArgs=newFPRegArgs, fpDests=destFPVals, saveRegs=[], containers=newContainers} :: code end | pushRegisters({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, fpRegArgs, ...}, ...}, code) = let val newStackOffset = !stackCount fun loadStackArg ({src, stack}, (otherLoads, otherArgs)) = let val (argVal, loadCode) = case mapArgument src of (source as ArgOnStack{wordOffset, container, field}) => (* If we're leaving it in its old location or we're pushing it above the current top we're ok. We're also ok if we're moving it from a somewhere above the last argument. Otherwise we have to load it. It goes into a normal tagged register which may mean that it could be pushed onto the stack in a subsequent pass. *) if wordOffset = stack+newStackOffset orelse stack+newStackOffset < 0 orelse newStackOffset-wordOffset > ~ stackAdjust then (source, []) else let val preg = newPReg RegPropGeneral in (ArgInReg preg, [LoadStack{wordOffset=wordOffset, container=container, field=field, dest=preg}]) end | argCode => (argCode, []) in (loadCode @ otherLoads, {src=argVal, stack=stack} :: otherArgs) end val (stackArgLoads, newStackArgs) = List.foldr loadStackArg ([], []) stackArgs fun loadRegArg ((arg, reg), otherArgs) = let val argVal = mapArgument arg in (argVal, reg) :: otherArgs end val newRegArgs = List.foldr loadRegArg [] regArgs fun loadFPRegArg ((PReg n, reg), otherArgs) = let (* FP regs are untagged registers and should never be pushed. *) val argVal = case Array.sub(pregMap, n) of Unset => raise InternalError "mapSrcReg - unset" | ToPReg preg => preg | ToStack _ => raise InternalError "loadFPRegArg: on stack" in (argVal, reg) :: otherArgs end val newFPRegArgs = List.foldr loadFPRegArg [] fpRegArgs in TailRecursiveCall{ callKind=callKind, regArgs=newRegArgs, fpRegArgs=newFPRegArgs, stackArgs=newStackArgs, stackAdjust=stackAdjust, currStackSize=newStackOffset} :: stackArgLoads @ code end | pushRegisters({instr=ReturnResultFromFunction{results, fpResults, returnReg, numStackArgs}, ...}, code) = let fun getResults((preg, rreg), (others, code)) = let val (newReg, newCode) = mapSrcReg preg in ((newReg, rreg) :: others, newCode @ code) end val (resultValues, loadResults) = List.foldr getResults ([], []) results val (fpResultValues, loadFPResults) = List.foldr getResults ([], []) fpResults val (returnValue, loadReturn) = mapSrcReg returnReg val resetCode = if !stackCount = 0 then [] else [ResetStackPtr{numWords= !stackCount}] in ReturnResultFromFunction{results=resultValues, fpResults=fpResultValues, returnReg=returnValue, numStackArgs=numStackArgs} :: resetCode @ loadReturn @ loadFPResults @ loadResults @ code end | pushRegisters({instr=RaiseExceptionPacket{packetReg}, ...}, code) = let val (packetVal, packetCode) = mapSrcReg packetReg in RaiseExceptionPacket{packetReg=packetVal} :: packetCode @ code end | pushRegisters({instr=PushToStack{ source, container, copies }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source (* This was a push from a previous pass. Treat as a container of size "copies". *) val newContainer = mapDestContainer(container, !stackCount) val () = pushItemToStack(OriginalEntry{stackLoc=container}) in PushToStack{source=sourceVal, container=newContainer, copies=copies} :: sourceCode @ code end | pushRegisters({instr=LoadStack{ dest, container, field, ... }, ...}, code) = let val (newOffset, newContainer) = mapContainerAndStack(container, field) val (destVal, destCode) = mapDestReg dest in destCode @ LoadStack{ dest=destVal, container=newContainer, field=field, wordOffset=newOffset } :: code end | pushRegisters({instr=StoreToStack{source, container, field, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (newOffset, newContainer) = mapContainerAndStack(container, field) in StoreToStack{source=sourceVal, container=newContainer, field=field, stackOffset=newOffset} :: sourceCode @ code end | pushRegisters({instr=ContainerAddress{ dest, container, ... }, ...}, code) = let val (newOffset, newContainer) = mapContainerAndStack(container, 0) val (destVal, destCode) = mapDestReg dest in destCode @ ContainerAddress{ dest=destVal, container=newContainer, stackOffset=newOffset } :: code end | pushRegisters({instr=ResetStackPtr _, ...}, code) = code (* Added in a previous pass - discard it. *) | pushRegisters({instr=TagValue{source, dest, isSigned, opSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ TagValue{source=sourceVal, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code end | pushRegisters({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...}, code) = let val (loadedSource, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UntagValue{source=loadedSource, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code end | pushRegisters({instr=BoxLarge{source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BoxLarge{source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code end | pushRegisters({instr=UnboxLarge{source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnboxLarge{source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=BoxTagFloat{floatSize, source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BoxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code end | pushRegisters({instr=UnboxTagFloat{floatSize, source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnboxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=LoadAcquire { base, dest, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadAcquire { base=baseVal, dest=destVal, loadType=loadType} :: baseCode @ code end | pushRegisters({instr=StoreRelease { base, source, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreRelease{ base=baseVal, source=sourceVal, loadType=loadType} :: baseCode @ sourceCode @ code end | pushRegisters({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BitFieldShift { source=sourceVal, dest=destVal, isSigned=isSigned, immr=immr, imms=imms, length=length} :: sourceCode @ code end | pushRegisters({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destSrcVal, destSrcCode) = mapSrcReg destAsSource val (destVal, destCode) = mapDestReg dest in destCode @ BitFieldInsert { source=sourceVal, destAsSource=destSrcVal, dest=destVal, immr=immr, imms=imms, length=length} :: destSrcCode @ sourceCode @ code end | pushRegisters({instr=IndexedCaseOperation{testReg}, ...}, code) = let val (testVal, testCode) = mapSrcReg testReg in IndexedCaseOperation{testReg=testVal} :: testCode @ code end | pushRegisters({instr=PushExceptionHandler, ...}, code) = let (* Add a handler entry to the stack. *) val () = pushItemToStack HandlerEntry in PushExceptionHandler :: code end | pushRegisters({instr=PopExceptionHandler, ...}, code) = let (* Appears at the end of the block whose exceptions are being handled. Delete the handler and anything above it. *) (* Get the state after removing the handler. *) fun popContext ([], _) = raise InternalError "pushRegisters - pop handler" | popContext (HandlerEntry :: tl, new) = (tl, new-2) | popContext (OriginalEntry{stackLoc=StackLoc{size, ...}, ...} :: tl, new) = popContext(tl, new-size) | popContext (NewEntry _ :: tl, new) = popContext(tl, new-1) val (newStack, nnCount) = popContext(!stack, !stackCount) val () = stack := newStack val oldStackPtr = ! stackCount val () = stackCount := nnCount (* Reset the stack to just above the two words of the handler. *) val resetCode = if oldStackPtr <> nnCount+2 then [ResetStackPtr{numWords=oldStackPtr-nnCount-2}] else [] in PopExceptionHandler :: resetCode @ code end | pushRegisters({instr=BeginHandler{packetReg}, ...}, code) = let (* Start of a handler. The top active entry should be the handler. *) val () = case !stack of HandlerEntry :: tl => stack := tl | _ => raise InternalError "pushRegisters: BeginHandler" val () = stackCount := !stackCount - 2 val (packetVal, packetCode) = mapDestReg packetReg in packetCode @ BeginHandler{packetReg=packetVal} :: code end | pushRegisters({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...}, code) = let val (vec1Val, vec1Code) = mapSrcReg vec1Addr val (vec2Val, vec2Code) = mapSrcReg vec2Addr val (lenVal, lenCode) = mapSrcReg length in CompareByteVectors{vec1Addr=vec1Val, vec2Addr=vec2Val, length=lenVal, ccRef=ccRef} :: lenCode @ vec2Code @ vec1Code @ code end | pushRegisters({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...}, code) = let val (srcVal, srcCode) = mapSrcReg srcAddr val (destVal, destCode) = mapSrcReg destAddr val (lenVal, lenCode) = mapSrcReg length in BlockMove{srcAddr=srcVal, destAddr=destVal, length=lenVal, isByteMove=isByteMove} :: lenCode @ destCode @ srcCode @ code end | pushRegisters({instr=AddSubXSP{source, dest, isAdd}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ AddSubXSP { source=sourceVal, dest=destVal, isAdd=isAdd} :: sourceCode @ code end | pushRegisters({instr=TouchValue{source, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source in TouchValue { source=sourceVal} :: sourceCode @ code end | pushRegisters({instr=LoadAcquireExclusive{ base, dest }, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadAcquireExclusive { base=baseVal, dest=destVal} :: baseCode @ code end | pushRegisters({instr=StoreReleaseExclusive{ base, source, result }, ...}, code) = let val (sourceVal, sourceCode) = mapOptSrc source val (baseVal, baseCode) = mapSrcReg base val (resVal, resCode) = mapDestReg result in resCode @ StoreReleaseExclusive{ base=baseVal, source=sourceVal, result=resVal} :: baseCode @ sourceCode @ code end | pushRegisters({instr=MemoryBarrier, ...}, code) = MemoryBarrier :: code | pushRegisters({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ConvertIntToFloat{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize} :: sourceCode @ code end | pushRegisters({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ConvertFloatToInt{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize, rounding=rounding} :: sourceCode @ code end | pushRegisters({instr=UnaryFloatingPt{ source, dest, fpOp}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnaryFloatingPt{ source=sourceVal, dest=destVal, fpOp=fpOp} :: sourceCode @ code end | pushRegisters({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...}, code) = let val (arg1Val, arg1Code) = mapSrcReg arg1 val (arg2Val, arg2Code) = mapSrcReg arg2 val (destVal, destCode) = mapDestReg dest in destCode @ BinaryFloatingPoint{ arg1=arg1Val, arg2=arg2Val, dest=destVal, fpOp=fpOp, opSize=opSize} :: arg2Code @ arg1Code @ code end | pushRegisters({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...}, code) = let val (arg1Val, arg1Code) = mapSrcReg arg1 val (arg2Val, arg2Code) = mapSrcReg arg2 in CompareFloatingPoint{ arg1=arg1Val, arg2=arg2Val, opSize=opSize, ccRef=ccRef} :: arg2Code @ arg1Code @ code end | pushRegisters({instr=CPUYield, ...}, code) = CPUYield :: code | pushRegisters({instr=AtomicOperation{ base, source, dest, atOp }, ...}, code) = let val (sourceVal, sourceCode) = mapOptSrc source val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapOptDest dest in destCode @ AtomicOperation{ base=baseVal, source=sourceVal, dest=destVal, atOp=atOp } :: baseCode @ sourceCode @ code end - | pushRegisters({instr=CacheCheck _, ...}, _) = raise InternalError "pushRegisters: CacheCheck" - local fun doPush(instr as {kill, ...}, code) = let val newCode = pushRegisters(instr, code) (* Can we pop the stack? *) val stackReset = case setToList (minus(kill, loopRegs)) of [] => [] | killList => let (* See if any of the kill items are at the top of the stack. If they are we can pop them and perhaps items we've previously marked for deletion but not been able to pop. *) val oldStack = !stackCount fun checkAndAdd(r, output) = case Array.sub(pregMap, r) of ToStack(stackLoc, StackLoc{size, ...}) => if stackLoc < 0 then r :: output (* We can have arguments and return address. *) else if !stackCount = stackLoc+size then ( stack := tl (!stack); stackCount := stackLoc; output ) else r :: output | _ => r :: output val toAdd = List.foldl checkAndAdd [] killList fun reprocess list = let val prevStack = !stackCount val outlist = List.foldl checkAndAdd [] list in if !stackCount = prevStack then list else reprocess outlist end val () = if !stackCount = oldStack then deletedItems := toAdd @ !deletedItems else deletedItems := reprocess(toAdd @ !deletedItems) val _ = oldStack >= !stackCount orelse raise InternalError "negative stack offset" in if !stackCount = oldStack then [] else [ResetStackPtr{numWords=oldStack - !stackCount}] end in stackReset @ newCode end in val codeResult = List.foldl doPush [] block val outputCount = ! stackCount val results = {code=codeResult, stackCount= outputCount} val stateResult = { stackCount= outputCount, stack= !stack } val () = Array.update(blockOutput, blockNo, results) end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val addItems = List.map(fn m => (m, stateResult)) addSet in processBlocks(addItems @ stillToDo) end in val () = processBlocks([(0, {stack=[], stackCount=0})]) end (* Put together the result code and blocks. *) local fun createBlock blockNo = (* Skip unreferenced blocks apart from block 0. *) if blockNo <> 0 andalso null (asub blockRefs blockNo) then BasicBlock{block=[], flow=ExitCode} else let val ExtendedBasicBlock{ flow, ...} = vsub code blockNo val {code=codeResult, stackCount=outputCount, ...} = asub blockOutput blockNo (* Process the successor. If we need a stack adjustment this will require an adjustment block. TODO: We could put a pre-adjustment if we only have one branch to this block. *) fun matchStacks targetBlock = let (* Process the destination. If it hasn't been processed. *) val expectedInput = valOf (asub inputStackSizes targetBlock) in if expectedInput = outputCount then targetBlock else let val _ = outputCount > expectedInput orelse raise InternalError "adjustStack" val adjustCode = [ResetStackPtr{numWords=outputCount-expectedInput}] val newBlock = BasicBlock{block=adjustCode, flow=Unconditional targetBlock} val newBlockNo = !blockCounter before blockCounter := !blockCounter+1 val () = extraBlocks := newBlock :: !extraBlocks in newBlockNo end end val (finalCode, newFlow) = case flow of ExitCode => (codeResult, ExitCode) | Unconditional m => let (* Process the block. Since we're making an unconditional jump we can include any stack adjustment needed to match the destination in here. In particular this includes loops. *) val expectedInput = valOf (asub inputStackSizes m) val _ = outputCount >= expectedInput orelse raise InternalError "negative reset" val resultCode = if expectedInput = outputCount then codeResult else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult in (resultCode, Unconditional m) end (* For any of these, if we need to adjust the stack we have to add an adjustment block. *) | Conditional {trueJump, falseJump, ccRef, condition} => (codeResult, Conditional{trueJump=matchStacks trueJump, falseJump=matchStacks falseJump, ccRef=ccRef, condition=condition}) | SetHandler{ handler, continue } => (codeResult, SetHandler{ handler=matchStacks handler, continue=matchStacks continue}) | IndexedBr cases => (codeResult, IndexedBr(map matchStacks cases)) | u as UnconditionalHandle _ => (codeResult, u) | c as ConditionalHandle{ continue, ... } => let (* As for unconditional branch *) val expectedInput = valOf (asub inputStackSizes continue) val _ = outputCount >= expectedInput orelse raise InternalError "negative reset" val resultCode = if expectedInput = outputCount then codeResult else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult in (resultCode, c) end in BasicBlock{block=List.rev finalCode, flow=newFlow} end in val resultBlocks = Vector.tabulate(numberOfBlocks, createBlock) end (* Add any extra blocks to the result. *) val finalResult = case !extraBlocks of [] => resultBlocks | blocks => Vector.concat[resultBlocks, Vector.fromList(List.rev blocks)] val pregProperties = Vector.fromList(List.rev(! pregPropList)) in {code=finalResult, pregProps=pregProperties, maxStack= !maxStack} end structure Sharing = struct type extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and preg = preg and pregOrZero = pregOrZero end end;