diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig index 592c9a8a..9a281308 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig @@ -1,455 +1,455 @@ (* Signature for the high-level ARM64 code Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ICODE = sig type machineWord = Address.machineWord type address = Address.address type closureRef (* Registers. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word (* It is simpler to use a single type for all registers. *) datatype reg = GenReg of xReg | FPReg of vReg val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg val is32in64: bool and isBigEndian: bool (* Condition for conditional branches etc. *) datatype condition = CondEqual (* Z=1 *) | CondNotEqual (* Z=0 *) | CondCarrySet (* C=1 *) | CondCarryClear (* C=0 *) | CondNegative (* N=1 *) | CondPositive (* N=0 imcludes zero *) | CondOverflow (* V=1 *) | CondNoOverflow (* V=0 *) | CondUnsignedHigher (* C=1 && Z=0 *) | CondUnsignedLowOrEq (* ! (C=1 && Z=0) *) | CondSignedGreaterEq (* N=V *) | CondSignedLess (* N<>V *) | CondSignedGreater (* Z==0 && N=V *) | CondSignedLessEq (* !(Z==0 && N=V) *) (* The shift used in arithemtic operations. *) and shiftType = ShiftLSL of Word8.word | ShiftLSR of Word8.word | ShiftASR of Word8.word | ShiftNone datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and callKind = Recursive | ConstantCode of machineWord | FullCall and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat | MoveDouble | MoveFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP (* Some of the atomic operations added in 8.1 *) and atomicOp = LoadAddAL | LoadUmaxAL | SwapAL | LoadAddAcquire | LoadUMaxAcquire | SwapRelease (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Floating point constant *) | LoadFPConstant of { source: Word64.word, dest: 'fpReg, floatSize: floatSize } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The results are stored in the result registers, usually just X0. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, fpDests: ('fpReg * vReg) list, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, fpRegArgs: ('fpReg * vReg) list, stackAdjust: int, currStackSize: int } (* Return from the function. resultRegs are the registers containing the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of - { results: ('genReg * xReg) list, fpResults: ('fpReg * vReg) list, returnReg: 'genReg, numStackArgs: int } + { results: ('genReg * xReg) list, fpResults: ('fpReg * vReg) list, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes moves and conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } (* Yield control during a spin-lock. *) | CPUYield (* Atomic operations added for ARM 8.1 *) | AtomicOperation of { base: 'genReg, source: 'optGenReg, dest: 'optGenReg, atOp: atomicOp } (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } (* Return the successor blocks from a control flow. *) val successorBlocks: controlFlow -> int list type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock val printICodeAbstract: basicBlockAbstract vector * (string -> unit) -> unit and printICodeConcrete: basicBlockConcrete vector * (string -> unit) -> unit (* Check whether this value is acceptable for LogicalImmediate. *) val isEncodableBitPattern: Word64.word * opSize -> bool (* This generates a BitField instruction with the appropriate values for immr and imms. *) val shiftConstant: { direction: shiftDirection, dest: preg, source: preg, shift: word, opSize: opSize } -> iCodeAbstract structure Sharing: sig type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary and atomicOp = atomicOp end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML index 4ce9456e..fb3cb3f6 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML @@ -1,1022 +1,1022 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICode( structure Arm64Code: ARM64PREASSEMBLY ): ARM64ICODE = struct open Arm64Code open Address datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype reg = GenReg of xReg | FPReg of vReg datatype callKind = Recursive | ConstantCode of machineWord | FullCall (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Floating point constant *) | LoadFPConstant of { source: Word64.word, dest: 'fpReg, floatSize: floatSize } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The results are stored in the result registers, usually just X0. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list, fpRegArgs: ('fpReg * vReg) list, fpDests: ('fpReg * vReg) list, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, fpRegArgs: ('fpReg * vReg) list, stackAdjust: int, currStackSize: int } (* Return from the function. resultRegs are the registers containing the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of - { results: ('genReg * xReg) list, fpResults: ('fpReg * vReg) list, returnReg: 'genReg, numStackArgs: int } + { results: ('genReg * xReg) list, fpResults: ('fpReg * vReg) list, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } (* Yield control during a spin-lock. *) | CPUYield (* Atomic operations added for ARM 8.1 *) | AtomicOperation of { base: 'genReg, source: 'optGenReg, dest: 'optGenReg, atOp: atomicOp } (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock (* Return the list of blocks that are the immediate successor of this. *) fun successorBlocks(Unconditional l) = [l] | successorBlocks(Conditional{trueJump, falseJump, ...}) = [trueJump, falseJump] | successorBlocks ExitCode = [] | successorBlocks(IndexedBr cases) = cases | successorBlocks(SetHandler{handler, continue, ...}) = [handler, continue] (* We only need "handler" in SetHandler because we may have a handler that is never actually jumped to. *) | successorBlocks(UnconditionalHandle handler) = [handler] | successorBlocks(ConditionalHandle{handler, continue, ...}) = [handler, continue] local fun printCC(CcRef ccRef, stream) = stream ("CC" ^ Int.toString ccRef) fun printStackLoc(StackLoc{size, rno}, stream) = (stream "S"; stream(Int.toString rno); stream "("; stream(Int.toString size); stream ")") fun regRepr(XReg w) = "X" ^ Int.toString(Word8.toInt w) | regRepr XZero = "XZ" | regRepr XSP = "SP" and vRegRepr(VReg v) = "V" ^ Int.toString(Word8.toInt v) fun arithRepr OpSize64 = "64" | arithRepr OpSize32 = "32" fun printLoadType(Load64, stream) = stream "64" | printLoadType(Load32, stream) = stream "32" | printLoadType(Load16, stream) = stream "16" | printLoadType(Load8, stream) = stream "8" fun printSaves([], _, _) = () | printSaves([areg], _, printReg) = printReg areg | printSaves(areg::more, stream, printReg) = (printReg areg; stream ","; printSaves(more, stream, printReg)) fun printArg(ArgInReg reg, _, printReg) = printReg reg | printArg(ArgOnStack{wordOffset, container, field, ...}, stream, _) = ( printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")" ) fun printShift(ShiftLSL w, stream) = stream(" LSL " ^ Word8.toString w) | printShift(ShiftLSR w, stream) = stream(" LSR " ^ Word8.toString w) | printShift(ShiftASR w, stream) = stream(" ASR " ^ Word8.toString w) | printShift(ShiftNone, _) = () fun printFloatSize(Float32, stream) = stream "F" | printFloatSize(Double64, stream) = stream "D" fun printICode {stream, printGenReg, ...} (MoveRegister{ source, dest }: ('a, 'b, 'c) arm64ICode) = ( stream "\tMove\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (LoadNonAddressConstant{ source, dest }) = ( stream "\tLoadNonAddress\t"; stream(Word64.toString source); stream " => "; printGenReg dest ) | printICode {stream, printFPReg, ...} (LoadFPConstant{ source, dest, floatSize }) = ( stream "\tLoadFPConstant"; printFloatSize(floatSize, stream); stream "\t"; stream(Word64.toString source); stream " => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (LoadAddressConstant{ source, dest }) = ( stream "\tLoadAddress\t"; stream(Address.stringOfWord source); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (LoadWithConstantOffset{ base, dest, byteOffset, loadType }) = ( stream "\tLoadConstOffset"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (LoadFPWithConstantOffset{ base, dest, byteOffset, floatSize }) = ( stream "\tLoadConstOffset"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (LoadWithIndexedOffset{ base, dest, index, loadType, signExtendIndex }) = ( stream "\tLoadIndexed"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "] => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (LoadFPWithIndexedOffset{ base, dest, index, floatSize, signExtendIndex }) = ( stream "\tLoadIndexed"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "] => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (GetThreadId { dest}) = ( stream "\tGetThreadId\t"; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (ObjectIndexAddressToAbsolute{ source, dest }) = ( stream "\tObjectAddrToAbs\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (AbsoluteToObjectIndex{ source, dest }) = ( stream "\tAbsToObjectAddr\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (AllocateMemoryFixed{bytesRequired, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream(Word64.fmt StringCvt.DEC bytesRequired); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, ...} (AllocateMemoryVariable{size, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream "s="; printGenReg(size); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, ...} (InitialiseMem{size, addr, init}) = ( stream "\tInitialiseMem\t"; stream "s="; printGenReg(size); stream ",i="; printGenReg(init); stream ",a="; printGenReg(addr) ) | printICode {stream, ...} BeginLoop = stream "\tBeginLoop" | printICode {stream, printGenReg, ...} (JumpLoop{regArgs, stackArgs, checkInterrupt, ... }) = ( stream "\tJumpLoop\t"; List.app(fn {src, dst} => (printGenReg(dst); stream "="; printArg(src, stream, printGenReg); stream " ")) regArgs; List.app( fn {src, wordOffset, stackloc} => (printStackLoc(stackloc, stream); stream("(sp" ^ Int.toString wordOffset); stream ")="; printArg(src, stream, printGenReg); stream " ") ) stackArgs; case checkInterrupt of NONE => () | SOME saveRegs => (stream " Check:save="; printSaves(saveRegs, stream, printGenReg)) ) | printICode {stream, printGenReg, ...} (StoreWithConstantOffset{ base, source, byteOffset, loadType }) = ( stream "\tStoreConstOffset"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode {stream, printGenReg, printFPReg, ...} (StoreFPWithConstantOffset{ base, source, byteOffset, floatSize }) = ( stream "\tStoreConstOffset"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode {stream, printGenReg, ...} (StoreWithIndexedOffset{ base, source, index, loadType, signExtendIndex }) = ( stream "\tStoreIndexed"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "]" ) | printICode {stream, printGenReg, printFPReg, ...} (StoreFPWithIndexedOffset{ base, source, index, floatSize, signExtendIndex }) = ( stream "\tStoreIndexed"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "]" ) | printICode {stream, printGenReg, printOptGenReg, ...} (AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }) = ( stream (if isAdd then "\tAddImmediate" else "\tSubImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, printOptGenReg, ...} (AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift }) = ( stream (if isAdd then "\tAddRegister" else "\tSubRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, printOptGenReg, ...} (LogicalImmediate{ source, dest, ccRef, immed, logOp, length }) = ( stream (case logOp of LogAnd => "\tAndImmediate" | LogOr => "\tOrImmediate" | LogXor => "\tXorImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word64.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, printOptGenReg, ...} (LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift }) = ( stream (case logOp of LogAnd => "\tAndRegister" | LogOr => "\tOrRegister" | LogXor => "\tXorRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode {stream, printGenReg, ...} (ShiftRegister{ direction, dest, source, shift, opSize }) = ( stream ( case direction of ShiftLeft => "\tShiftLeft" | ShiftRightLogical => "\tShiftRightLog" | ShiftRightArithmetic => "\tShiftRightArith"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " by "; printGenReg(shift); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printOptGenReg, ...} (Multiplication{ kind, dest, sourceA, sourceM, sourceN }) = ( stream ( case kind of MultAdd32 => "\tMultAdd32\t" | MultSub32 => "\tMultSub32\t" | MultAdd64 => "\tMultAdd64\t" | MultSub64 => "\tMultSub64\t" | SignedMultAddLong => "\tSignedMultAddLong\t" | SignedMultHigh => "\tSignedMultHigh\t"); printGenReg(sourceM); stream " * "; printGenReg(sourceN); stream " +/- "; printOptGenReg sourceA; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (Division{ isSigned, dest, dividend, divisor, opSize }) = ( stream (if isSigned then "\tSignedDivide" else "\tUnsignedDivide"); stream(arithRepr opSize); stream "\t"; printGenReg(dividend); stream " by "; printGenReg(divisor); stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (BeginFunction{ regArgs, stackArgs, fpRegArgs }) = ( stream "\tBeginFunction\t"; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printGenReg(arg); stream " ")) regArgs; List.app(fn (arg, r) => (stream(vRegRepr r); stream "="; printFPReg(arg); stream " ")) fpRegArgs; List.app(fn s => printStackLoc(s, stream)) stackArgs ) | printICode {stream, printGenReg, printFPReg, ...} (FunctionCall{callKind, regArgs, stackArgs, dests, fpRegArgs, fpDests, saveRegs, containers}) = ( stream "\tFunctionCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); stream "("; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn (arg, r) => (stream(vRegRepr r); stream "="; printFPReg(arg); stream " ")) fpRegArgs; List.app(fn arg => (stream "p="; printArg(arg, stream, printGenReg); stream " ")) stackArgs; stream ") "; List.app(fn (pr, r) => (stream(regRepr r); stream "=>"; printGenReg pr; stream " ")) dests; List.app(fn (pr, r) => (stream(vRegRepr r); stream "=>"; printFPReg pr; stream " ")) fpDests; stream " save="; printSaves(saveRegs, stream, printGenReg); if null containers then () else (stream " containers="; List.app (fn c => (printStackLoc(c, stream); stream " ")) containers) ) | printICode {stream, printGenReg, printFPReg, ...} (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize, fpRegArgs, ...}) = ( stream "\tTailCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn (arg, r) => (stream(vRegRepr r); stream "="; printFPReg(arg); stream " ")) fpRegArgs; List.app(fn {src, stack} => (stream (Int.toString stack); stream "<="; printArg(src, stream, printGenReg); stream " ")) stackArgs; stream "adjust="; stream(Int.toString stackAdjust); stream " stackSize="; stream(Int.toString currStackSize) ) | printICode {stream, printGenReg, printFPReg, ...} (ReturnResultFromFunction{ results, fpResults, returnReg, numStackArgs }) = ( stream "\tReturnFromFunction\t"; printGenReg(returnReg); stream "with "; List.app(fn (reg, r) => (stream(regRepr r); stream "=>"; printGenReg reg; stream " ")) results; List.app(fn (reg, r) => (stream(vRegRepr r); stream "=>"; printFPReg reg; stream " ")) fpResults; stream("," ^ Int.toString numStackArgs) ) | printICode {stream, printGenReg, ...} (RaiseExceptionPacket{ packetReg }) = ( stream "\tRaiseException\t"; printGenReg(packetReg) ) | printICode {stream, printGenReg, ...} (PushToStack{ source, copies, container }) = ( stream "\tPushToStack\t"; printGenReg source; if copies > 1 then (stream " * "; stream(Int.toString copies)) else (); stream " => "; printStackLoc(container, stream) ) | printICode {stream, printGenReg, ...} (LoadStack{ dest, wordOffset, container, field }) = ( stream "\tLoadStack\t"; printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")"; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (StoreToStack{ source, container, field, stackOffset }) = ( stream "\tStoreToStack\t"; printGenReg source; stream " => "; printStackLoc(container, stream); stream "+"; stream (Int.toString field); stream "("; stream(Int.toString stackOffset); stream ")" ) | printICode {stream, printGenReg, ...} (ContainerAddress{ dest, container, stackOffset }) = ( stream "\tContainerAddress\t"; stream "@"; printStackLoc(container, stream); stream " ("; stream(Int.toString stackOffset); stream ") => "; printGenReg dest ) | printICode {stream, ...} (ResetStackPtr{ numWords }) = ( stream "\tResetStackPtr\t"; stream(Int.toString numWords) ) | printICode {stream, printGenReg, ...} (TagValue{ source, dest, isSigned, opSize }) = ( stream "\tTag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (UntagValue{ source, dest, isSigned, opSize }) = ( stream "\tUntag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (BoxLarge{source, dest, saveRegs}) = ( stream "\tBoxLarge\t"; printGenReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, ...} (UnboxLarge{source, dest}) = ( stream "\tUnboxLarge\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (BoxTagFloat{floatSize, source, dest, saveRegs}) = ( stream "\tBoxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode {stream, printGenReg, printFPReg, ...} (UnboxTagFloat{floatSize, source, dest}) = ( stream "\tUnboxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode {stream, printGenReg, ...} (LoadAcquire{ base, dest, loadType }) = ( stream "\tLoadAcquire"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode {stream, printGenReg, ...} (StoreRelease{ base, source, loadType }) = ( stream "\tStoreRelease"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "]" ) | printICode {stream, printGenReg, ...} (BitFieldShift{ source, dest, isSigned, length, immr, imms }) = ( stream "\tBitShift"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr length); stream "\t"; printGenReg source; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode {stream, printGenReg, ...} (BitFieldInsert{ source, dest, destAsSource, length, immr, imms }) = ( stream "\tBitInsert"; stream(arithRepr length); stream "\t"; printGenReg source; stream " with "; printGenReg destAsSource; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode {stream, printGenReg, ...} (IndexedCaseOperation{testReg}) = ( stream "\tIndexedCase\t"; printGenReg testReg ) | printICode {stream, ...} PushExceptionHandler = stream "\tPushExcHandler" | printICode {stream, ...} PopExceptionHandler = stream "\tPopExcHandler" | printICode {stream, printGenReg, ...} (BeginHandler{packetReg}) = ( stream "\tBeginHandler\t"; printGenReg packetReg ) | printICode {stream, printGenReg, ...} (CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = ( stream "\tCompareByteVectors\t"; printGenReg(vec1Addr); stream ","; printGenReg(vec2Addr); stream ","; printGenReg(length); stream " => "; printCC(ccRef, stream) ) | printICode {stream, printGenReg, ...} (BlockMove{srcAddr, destAddr, length, isByteMove}) = ( stream(if isByteMove then "\tBlockByteMove\t" else "\tBlockWordMove\t"); stream "src="; printGenReg(srcAddr); stream ",dest="; printGenReg(destAddr); stream ",len="; printGenReg(length) ) | printICode {stream, printGenReg, printOptGenReg, ...} (AddSubXSP{ source, dest, isAdd }) = ( stream(if isAdd then "\tAdd\t" else "\tSubtract\t"); printGenReg source; stream " XSP => "; printOptGenReg dest ) | printICode {stream, printGenReg, ...} (TouchValue{ source }) = ( stream "\tTouchValue\t"; printGenReg source ) | printICode {stream, printGenReg, ...} (LoadAcquireExclusive{ base, dest }) = ( stream "\tLoadExclusive\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode {stream, printGenReg, printOptGenReg, ...} (StoreReleaseExclusive{ base, source, result }) = ( stream "\tStoreExclusive\t"; printOptGenReg source; stream " => ["; printGenReg base; stream "] result => "; printGenReg result ) | printICode {stream, ...} MemoryBarrier = stream "\tMemoryBarrier" | printICode {stream, printGenReg, printFPReg, ...} (ConvertIntToFloat{ source, dest, srcSize, destSize}) = ( stream "\tConvert"; stream(arithRepr srcSize); stream "To"; printFloatSize(destSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode {stream, printGenReg, printFPReg, ...} (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}) = let open IEEEReal in stream "\tConvert"; printFloatSize(srcSize, stream); stream "To"; stream(arithRepr destSize); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream( case rounding of TO_NEAREST => " rounding" | TO_NEGINF => " rounding down" | TO_POSINF => " rounding up" | TO_ZERO => " truncating" ) end | printICode {stream, printFPReg, ...} (UnaryFloatingPt{ source, dest, fpOp}) = ( stream( case fpOp of NegFloat => "\tNegFloat\t" | NegDouble => "\tNegDouble\t" | AbsFloat => "\tAbsFloat\t" | AbsDouble => "\tAbsDouble\t" | ConvFloatToDble => "\tFloatToDble\t" | ConvDbleToFloat => "\tDbleToFloat\t" | MoveDouble => "\tMoveDouble\t" | MoveFloat => "\tMoveFloat\t" ); printFPReg source; stream " => "; printFPReg dest ) | printICode {stream, printFPReg, ...} (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}) = ( stream( case fpOp of MultiplyFP => "\tMultiply" | DivideFP => "\tDivide" | AddFP => "\tAdd" | SubtractFP => "\tSubtract" ); printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream " => "; printFPReg dest ) | printICode {stream, printFPReg, ...} (CompareFloatingPoint{ arg1, arg2, opSize, ccRef}) = ( stream "\tCompare"; printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream ", "; printCC(ccRef, stream) ) | printICode {stream, ...} CPUYield = stream "\tCpuYield" | printICode {stream, printGenReg, printOptGenReg, ...} (AtomicOperation{ base, source, dest, atOp }) = ( case atOp of LoadAddAL => stream "\tLoadAddAL\t" | LoadUmaxAL => stream "\tLoadUmaxAL\t" | SwapAL => stream "\tSwapAL\t" | LoadAddAcquire => stream "\tLoadAddAcquire\t" | LoadUMaxAcquire => stream "\tLoadUMaxAcquire\t" | SwapRelease => stream "\tSwapRelease\t"; printOptGenReg source; stream ",["; printGenReg base; stream "] => "; printOptGenReg dest ) and printCondition(cond, stream) = stream(condToString cond) (* Print a basic block. *) fun printBlock {stream, printGenReg, printOptGenReg, printFPReg} (blockNo, BasicBlock{block, flow, ...}) = ( (* Put a label on all but the first. *) if blockNo <> 0 then stream("L" ^ Int.toString blockNo ^ ":") else (); List.app (fn icode => (printICode {stream=stream, printGenReg=printGenReg, printOptGenReg=printOptGenReg, printFPReg=printFPReg} (icode); stream "\n")) block; case flow of Unconditional l => stream("\tJump\tL" ^ Int.toString l ^ "\n") | Conditional {condition, trueJump, falseJump, ccRef, ...} => ( stream "\tJump"; printCondition(condition, stream); stream "\t"; printCC(ccRef, stream); stream " L"; stream (Int.toString trueJump); stream " else L"; stream (Int.toString falseJump); stream "\n" ) | ExitCode => () | IndexedBr _ => () | SetHandler{handler, continue} => stream(concat["\tSetHandler\tH", Int.toString handler, "\n", "\tJump\tL", Int.toString continue, "\n"]) | UnconditionalHandle handler => stream("\tJump\tH" ^ Int.toString handler ^ "\n") | ConditionalHandle{handler, continue} => stream(concat["\tJump\tL", Int.toString continue, " or H", Int.toString handler, "\n"]) ) in fun printPReg stream (PReg i) = stream("R" ^ Int.toString i) fun printOptPReg stream ZeroReg = stream "Zero" | printOptPReg stream (SomeReg reg) = printPReg stream reg fun printXReg stream (XReg w) = stream("X" ^ Int.toString(Word8.toInt w)) | printXReg stream XZero = stream "XZ" | printXReg stream XSP = stream "XSP" fun printVReg stream (VReg w) = stream("V" ^ Int.toString(Word8.toInt w)) fun printICodeAbstract(blockVec, stream) = Vector.appi(printBlock{stream=stream, printGenReg=printPReg stream, printOptGenReg=printOptPReg stream, printFPReg=printPReg stream}) blockVec and printICodeConcrete(blockVec, stream) = Vector.appi(printBlock{stream=stream, printGenReg=printXReg stream, printOptGenReg=printXReg stream, printFPReg=printVReg stream}) blockVec end (* Only certain bit patterns are allowed in a logical immediate instruction but the encoding is complex so it's easiest to inherit the test from the assembler layer. *) local fun optow OpSize32 = WordSize32 | optow OpSize64 = WordSize64 in fun isEncodableBitPattern(v, w) = Arm64Code.isEncodableBitPattern(v, optow w) end (* This generates a BitField instruction with the appropriate values for immr and imms. *) fun shiftConstant{ direction, dest, source, shift, opSize } = let val (isSigned, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (false, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (false, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (false, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (false, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (true, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (true, shift, 0wx1f) in BitFieldShift{ source=source, dest=dest, isSigned=isSigned, length=opSize, immr=immr, imms=imms } end structure Sharing = struct type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary and atomicOp = atomicOp end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeOptimise.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeOptimise.ML index e73755ed..5b8d9512 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeOptimise.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeOptimise.ML @@ -1,627 +1,696 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeOptimise( structure Arm64ICode: ARM64ICODE structure IntSet: INTSET structure Identify: ARM64IDENTIFYREFERENCES structure Debug: DEBUG structure Pretty: PRETTY sharing Arm64ICode.Sharing = Identify.Sharing = IntSet = Arm64ICode ): ARM64ICODEOPTIMISE = struct open Arm64ICode open IntSet open Identify datatype optimise = Changed of (preg, pregOrZero, preg) basicBlock vector * regProperty vector | Unchanged exception InternalError = Misc.InternalError (* Optimiser. This could incorporate optimisations done elsewhere. IdentifyReferences currently removes instructions that produce results in registers that are never used. AllocateRegisters deals generally with caching. The optimiser currently deals with booleans and conditions and with constants. *) (* This is a rewrite of the last instruction to set a boolean. This is almost always rewriting the next instruction. The only possibility is that we have a ResetStackPtr in between. *) datatype boolRegRewrite = BRNone (* BRSetConditionToConstant - we have a comparison of two constant value. This will usually happen because we've duplicated a branch and set a register to a constant which we then compare. *) | BRSetConditionToConstant of { srcCC: ccRef, test: condition -> bool } fun optimiseICode{ code, pregProps, ccCount=_, debugSwitches=_ } = let val hasChanged = ref false val pregPropSize = Vector.length pregProps val regCounter = ref pregPropSize (* Allocate new registers after the old. *) val regList = ref [] (* New properties are added in reverse order. *) fun newReg kind = ( regList := kind :: ! regList; PReg (!regCounter) ) before regCounter := !regCounter + 1 (* Constant values in registers. *) datatype regValue = SomeValue | NonAddressConst of Word64.word | AddressConstant of machineWord | LargeBox of preg | RealBox of preg * floatSize | TaggedValue of preg * bool * opSize (* If this argument is a register and the register is mapped to a constant or another register replace the value. Unlike the X86 version we don't map memory locations but we do map registers. *) (* TODO: This is potentially quadratic if we have a long piece of code with very many registers. *) fun getRegisterValue(preg as PReg pregNo, kill, regMap) = ( case List.find(fn {dest, ... } => dest = preg) regMap of SOME { source, ...} => ( source, (* Filter it if it is the last reference. *) if member(pregNo, kill) then List.filter(fn {dest, ...} => dest <> preg) regMap else regMap ) | NONE => (SomeValue, regMap) ) fun optimiseBlock processed (block, flow, outCCState) = let fun optCode([], brCond, regMap, code, changed) = (code, brCond, regMap, changed) + | optCode({instr as AddSubImmediate{source, dest=ZeroReg, ccRef=SOME ccRefOut, immed, isAdd=false, length}, kill, ...} :: rest, + _, regMap, code, changed) = + let + val (repArg1, memRefsOut) = getRegisterValue(source, kill, regMap) + in + case repArg1 of + NonAddressConst test => + (* AddSubImmediate is put in by CodetreeToIcode to test a boolean value. It can also + arise as the result of pattern matching on booleans or even by tests such as = true. + If the source register is now a constant we want to propagate the constant + condition. *) + let + (* This comparison reduces to a constant. *) + val _ = hasChanged := true + (* Signed comparison. If this is a 32-bit operation the top word + could be zero so we need to convert this as Word32. + immediate values are always unsigned. *) + val testValue = + case length of + OpSize64 => Word64.toLargeIntX test + | OpSize32 => Word32.toLargeIntX(Word32.fromLarge test) + (* Put in a replacement so that if we were previously testing ccRefOut + we should instead test ccRef. *) + val signedCompare=LargeInt.compare(testValue, Word.toLargeInt immed) + and unsignedCompare=Word64.compare(test, Word64.fromLarge(Word.toLargeWord immed)) + fun testResult condition = + case (condition, signedCompare, unsignedCompare) of + (CondEqual, EQUAL, _) => true + | (CondEqual, _, _) => false + | (CondNotEqual, EQUAL, _) => false + | (CondNotEqual, _, _) => true + | (CondSignedLess, LESS, _) => true + | (CondSignedLess, _, _) => false + | (CondSignedGreater, GREATER,_) => true + | (CondSignedGreater, _, _) => false + | (CondSignedLessEq, GREATER,_) => false + | (CondSignedLessEq, _, _) => true + | (CondSignedGreaterEq, LESS, _) => false + | (CondSignedGreaterEq, _, _) => true + | (CondCarryClear, _, LESS ) => true + | (CondCarryClear, _, _) => false + | (CondUnsignedHigher, _,GREATER) => true + | (CondUnsignedHigher, _, _) => false + | (CondUnsignedLowOrEq, _,GREATER) => false + | (CondUnsignedLowOrEq, _, _) => true + | (CondCarrySet, _, LESS ) => false + | (CondCarrySet, _, _) => true + (* The overflow and parity checks should never occur. *) + | _ => raise InternalError "getCondResult: comparison" + + val repl = BRSetConditionToConstant{srcCC=ccRefOut, test=testResult} + val _ = isSome outCCState andalso raise InternalError "optCode: CC exported" + in + optCode(rest, repl, memRefsOut, code, true) + end + + | _ => optCode(rest, BRNone, memRefsOut, instr :: code, changed) + end + | optCode({instr as AddSubImmediate{source, dest=SomeReg dest, ccRef=NONE, immed, isAdd, length}, kill, ...} :: rest, _, regMap, code, changed) = (* This is frequently used to remove a tag from a value before an addition or subtraction. If it's a constant we want to do that now. *) let val (repArg1, newMap) = getRegisterValue(source, kill, regMap) in case repArg1 of NonAddressConst cVal => let val addSub = if isAdd then cVal + Word.toLarge immed else cVal - Word.toLarge immed (* Mask the result to 32-bits if this is a 32-bit operation. *) val result = case length of OpSize32 => Word64.andb(addSub, Word64.<<(0w1, 0w32) - 0w1) | OpSize64 => addSub in optCode(rest, BRNone, {dest=dest, source=NonAddressConst result} :: newMap, LoadNonAddressConstant{dest=dest, source=result} :: code, true) end | _ => optCode(rest, BRNone, newMap, instr ::code, changed) end | optCode({instr as AddSubRegister{base, shifted, dest, ccRef, isAdd, length, shift}, kill, ...} :: rest, _, regMap, code, changed) = let (* If we have a constant as the second argument we can change this to the immediate form. *) val (repOp1, mapOp1) = getRegisterValue(base, kill, regMap) val (repOp2, mapOp2) = getRegisterValue(shifted, kill, mapOp1) val regAndImmed = case (repOp1, repOp2, isAdd, shift) of (_, NonAddressConst immed, _, ShiftNone) => if immed < 0w4096 then SOME(base, immed) else NONE (* If this is an ADD we can also use the immediate if the first arg is a constant. *) | (NonAddressConst immed, _, true, ShiftNone) => if immed < 0w4096 then SOME(shifted, immed) else NONE | _ => NONE in case regAndImmed of SOME(srcReg, immediate) => optCode(rest, BRNone, mapOp2, AddSubImmediate{source=srcReg, dest=dest, ccRef=ccRef, immed=Word.fromLargeWord(Word64.toLargeWord immediate), isAdd=isAdd, length=length} :: code, true) | NONE => optCode(rest, BRNone, mapOp2, instr :: code, changed) end | optCode({instr as LogicalImmediate{source, dest=ZeroReg, ccRef=SOME ccRefOut, immed, logOp=LogAnd, ...}, kill, ...} :: rest, _, regMap, code, changed) = let val (repArg1, memRefsOut) = getRegisterValue(source, kill, regMap) in case repArg1 of NonAddressConst test => (* This was previously used to test a boolean but that has now changed. It may also occur as a result of pattern matching. *) let (* This comparison reduces to a constant. *) val _ = hasChanged := true val result = LargeWord.andb(test, immed) (* Put in a replacement so that if we were previously testing ccRefOut we should instead test ccRef. *) fun testResult CondEqual = result = 0w0 | testResult CondNotEqual = result <> 0w0 | testResult _ = raise InternalError "testResult: invalid condition" val repl = BRSetConditionToConstant{srcCC=ccRefOut, test=testResult} val _ = isSome outCCState andalso raise InternalError "optCode: CC exported" in optCode(rest, repl, memRefsOut, code, true) end | _ => optCode(rest, BRNone, memRefsOut, instr :: code, changed) end | optCode({instr as LogicalRegister{base, shifted, dest, ccRef, logOp, length, shift}, kill, ...} :: rest, _, regMap, code, changed) = let (* If we have a constant as the second argument we can change this to the immediate form. *) val (repOp1, mapOp1) = getRegisterValue(base, kill, regMap) val (repOp2, mapOp2) = getRegisterValue(shifted, kill, mapOp1) val regAndImmed = case (repOp1, repOp2, shift) of (_, NonAddressConst immed, ShiftNone) => if isEncodableBitPattern (immed, length) then SOME(base, immed) else NONE | (NonAddressConst immed, _, ShiftNone) => if isEncodableBitPattern (immed, length) then SOME(shifted, immed) else NONE | _ => NONE in case regAndImmed of SOME(srcReg, immediate) => optCode(rest, BRNone, mapOp2, LogicalImmediate{source=srcReg, dest=dest, ccRef=ccRef, immed=immediate, logOp=logOp, length=length} :: code, true) | NONE => optCode(rest, BRNone, mapOp2, instr :: code, changed) end | optCode({instr as MoveRegister{dest, source}, kill, ...} :: rest, inCond, regMap, code, changed) = let val (repSource, mapAfterReplace) = getRegisterValue(source, kill, regMap) (* If the source is mapped to a constant we can set the destination to the same constant. *) val newMap = case repSource of SomeValue => mapAfterReplace | _ => {dest=dest, source=repSource} :: mapAfterReplace in optCode(rest, inCond, newMap, instr :: code, changed) end | optCode({instr as LoadNonAddressConstant{dest, source}, ...} :: rest, inCond, regMap, code, changed) = let (* If we already have a register with this constant we would probably be better off reusing it. The map, though, needs to indicate that the destination register contains the constant. The X86 version always uses a load-constant here. *) val newInstr = case List.find(fn {source=NonAddressConst c, ... } => c = source | _ => false) regMap of SOME{dest=cDest, ...} => MoveRegister{dest=dest, source=cDest} | NONE => instr in optCode(rest, inCond, {dest=dest, source=NonAddressConst source} :: regMap, newInstr :: code, changed) end | optCode({instr as LoadAddressConstant{dest, source}, ...} :: rest, inCond, regMap, code, changed) = (* Address constant. This is used in conjunction with UnboxValue *) optCode(rest, inCond, {dest=dest, source=AddressConstant source} :: regMap, instr :: code, changed) | optCode({instr as BoxLarge{ source, dest, ... }, ...} :: rest, inCond, regMap, code, changed) = (* Try to eliminate adjacent sequences of boxing and unboxing. *) optCode(rest, inCond, {dest=dest, source=LargeBox source} :: regMap, instr :: code, changed) | optCode({instr as BoxTagFloat{ source, dest, floatSize, ... }, ...} :: rest, inCond, regMap, code, changed) = optCode(rest, inCond, {dest=dest, source=RealBox(source, floatSize)} :: regMap, instr :: code, changed) | optCode({instr as TagValue{ source, dest, isSigned, opSize}, ...} :: rest, inCond, regMap, code, changed) = optCode(rest, inCond, {dest=dest, source=TaggedValue(source, isSigned, opSize)} :: regMap, instr :: code, changed) | optCode({instr as UnboxLarge{ source, dest, ... }, kill, ...} :: rest, inCond, regMap, code, changed) = let val (repSource, mapAfterReplace) = getRegisterValue(source, kill, regMap) in (* If we're unboxing a constant address we can unpack it now. This is intended to handle constant aruments to LargeWord operations. *) case repSource of AddressConstant cVal => let (* Check this looks like a boxed large word. *) open Address val addr = toAddress cVal val _ = isBytes addr andalso length addr = 0w8 div wordSize val wordConst: LargeWord.word = RunCall.unsafeCast cVal val newMap = {dest=dest, source=NonAddressConst wordConst} :: mapAfterReplace in optCode(rest, inCond, newMap, LoadNonAddressConstant{dest=dest, source=wordConst} :: code, true) end (* Or if it was recently boxed we can use the original and hopefully eliminate the box. *) | LargeBox original => optCode(rest, inCond, mapAfterReplace, MoveRegister{dest=dest, source=original} :: code, true) | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | optCode({instr as UntagValue{ source, dest, isSigned, opSize }, kill, ...} :: rest, inCond, regMap, code, changed) = (* If we're untagging a constant we can produce the result now. *) let val (repSource, mapAfterReplace) = getRegisterValue(source, kill, regMap) in case repSource of NonAddressConst cVal => let (* The result depends on the kind of untagging. Unsigned values can just be left shifted but signed values need to be treated differently for 32-bit and 64-bit operations. *) val wordConst = case (isSigned, opSize) of (false, _) => LargeWord.>>(cVal, 0w1) | (true, OpSize64) => LargeWord.~>>(cVal, 0w1) | (true, OpSize32) => Word32.toLarge(Word32.~>>(Word32.fromLarge cVal, 0w1)) val newMap = {dest=dest, source=NonAddressConst wordConst} :: mapAfterReplace in optCode(rest, inCond, newMap, LoadNonAddressConstant{dest=dest, source=wordConst} :: code, true) end (* Or if it was recently tagged we can eliminate the tagging and untagging. This occurs, for example, if we extract a character and then put it into a string. Normally the lengths will match but may use a 32-bit tagged value as a 64-bit value in the special case of using a character to index into the array of single-character strings. Mismatched signed/unsigned could occur with conversions between Word.word and FixedInt.int which are generally no-ops. *) | TaggedValue(original, wasSigned, oldOpSize) => if isSigned = wasSigned andalso (case (oldOpSize, opSize) of (OpSize64, OpSize32) => false | _ => true) then optCode(rest, inCond, mapAfterReplace, MoveRegister{dest=dest, source=original} :: code, true) else optCode(rest, inCond, mapAfterReplace, instr :: code, changed) | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | optCode({instr as UnboxTagFloat{ source, dest, floatSize=Float32 }, kill, ...} :: rest, inCond, regMap, code, changed) = let val (repSource, mapAfterReplace) = getRegisterValue(source, kill, regMap) in case repSource of NonAddressConst cVal => (* Should only be Float32 in native 64-bits. *) let val _ = not is32in64 andalso LargeWord.andb(cVal, 0wxffffffff) = 0w1 orelse raise InternalError "incorrect FP constant form" val fpConstant = LargeWord.>>(cVal, 0w32) in optCode(rest, inCond, mapAfterReplace, LoadFPConstant{dest=dest, floatSize=Float32, source=fpConstant} :: code, true) end | AddressConstant cVal => let open Address val addr = toAddress cVal val _ = is32in64 andalso length addr = 0w1 andalso flags addr = F_bytes orelse raise InternalError "incorrect FP constant form" val fpConstant = RunCall.loadPolyWord(addr, 0w0) in optCode(rest, inCond, mapAfterReplace, LoadFPConstant{dest=dest, floatSize=Float32, source=fpConstant} :: code, true) end | RealBox(original, fSize) => ( fSize = Float32 orelse raise InternalError "Mismatch float size"; optCode(rest, inCond, mapAfterReplace, UnaryFloatingPt{dest=dest, source=original, fpOp=MoveFloat} :: code, true) ) | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | optCode({instr as UnboxTagFloat{ source, dest, floatSize=Double64 }, kill, ...} :: rest, inCond, regMap, code, changed) = let val (repSource, mapAfterReplace) = getRegisterValue(source, kill, regMap) in case repSource of AddressConstant cVal => let open Address val addr = toAddress cVal val _ = length addr = (0w8 div wordSize) andalso flags addr = F_bytes orelse raise InternalError "incorrect FP constant form" val fpConstant = RunCall.loadNativeWord(addr, 0w0) in optCode(rest, inCond, mapAfterReplace, LoadFPConstant{dest=dest, floatSize=Double64, source=fpConstant} :: code, true) end | RealBox(original, fSize) => ( fSize = Double64 orelse raise InternalError "Mismatch float size"; optCode(rest, inCond, mapAfterReplace, UnaryFloatingPt{dest=dest, source=original, fpOp=MoveDouble} :: code, true) ) | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end (* Some of these are specifically to reduce "ref" to its simplest form since it's generated as a variable-length item. *) | optCode({instr as AllocateMemoryVariable{ size, dest, saveRegs }, kill, ...} :: rest, inCond, regMap, code, changed) = (* Turn a variable size allocation into a fixed size allocation if the size is a constant. *) let val (repSize, mapAfterReplace) = getRegisterValue(size, kill, regMap) in case repSize of NonAddressConst words => let val wordsRequired = if is32in64 then (* Have to round this up to 8 bytes *) Word64.andb(words+0w2, ~ 0w2) else words+0w1 val bytesRequired = Word64.fromLarge(Word.toLarge Address.wordSize) * wordsRequired in optCode(rest, inCond, mapAfterReplace, AllocateMemoryFixed{bytesRequired=bytesRequired, dest=dest, saveRegs=saveRegs} :: code, true) end | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | optCode({instr as InitialiseMem{size, addr, init}, kill, ...} :: rest, inCond, regMap, code, changed) = (* If we're initialising "a few" words we're better off unrolling the loop. *) let val (repSize, mapAfterReplace) = getRegisterValue(size, kill, regMap) in case repSize of NonAddressConst words => if words <= 0w8 then let val nWords = LargeWord.toInt words fun unroll(n, l) = if n = nWords then l else unroll(n+1, StoreWithConstantOffset{ source=init, base=addr, byteOffset=n*Word.toInt Address.wordSize, loadType=if is32in64 then Load32 else Load64 } :: l) in optCode(rest, inCond, mapAfterReplace, unroll(0, code), true) end else optCode(rest, inCond, mapAfterReplace, instr :: code, changed) | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | optCode({instr as BitFieldShift{ source, dest, isSigned, length, immr, imms }, kill, ...} :: rest, inCond, regMap, code, changed) = (* Bit shift. Specifically this is used to shift the flags byte to construct a length word. The flags are frequently a constant. Unlike BitFieldInsert this sets unused bits to either zero or the sign bit. *) let val (repSource, mapAfterReplace) = getRegisterValue(source, kill, regMap) in case repSource of NonAddressConst cVal => let val regSize = case length of OpSize32 => 0w32 | OpSize64 => 0w64 in if not isSigned andalso imms + 0w1 = immr then (* Simple left shift: ignore the other cases for the moment. *) let val wordConst64 = Word64.<<(cVal, regSize-immr) val wordConst = case length of OpSize64 => wordConst64 | OpSize32 => Word64.andb(wordConst64, 0wxffffffff) val newMap = {dest=dest, source=NonAddressConst wordConst} :: mapAfterReplace in optCode(rest, inCond, newMap, LoadNonAddressConstant{dest=dest, source=wordConst} :: code, true) end else optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end | optCode({instr as BitFieldInsert{ source, dest, destAsSource, length=_, immr, imms }, kill, ...} :: rest, inCond, regMap, code, changed) = (* Bit field insertion. This is used to insert the length field into the register containing the shifted flags value. *) let val (repSource, mapAfterRepSrc) = getRegisterValue(source, kill, regMap) val (repDestAs, mapAfterReplace) = getRegisterValue(destAsSource, kill, mapAfterRepSrc) in case (repSource, repDestAs) of (NonAddressConst srcVal, NonAddressConst dstVal) => if immr = 0w0 then (* Insert new bits without shifting. *) let (* We take imms-immr+1 bits from the source. *) val maskSrc = Word64.>>(Word64.notb 0w0, 0w64-(imms+0w1)) val maskDst = Word64.notb maskSrc val wordConst = Word64.orb(Word64.andb(dstVal, maskDst), Word64.andb(srcVal, maskSrc)) val newMap = {dest=dest, source=NonAddressConst wordConst} :: mapAfterReplace in optCode(rest, inCond, newMap, LoadNonAddressConstant{dest=dest, source=wordConst} :: code, true) end else optCode(rest, inCond, mapAfterReplace, instr :: code, changed) | _ => optCode(rest, inCond, mapAfterReplace, instr :: code, changed) end (* Clear the cache across a function call. We would have to push these registers. *) | optCode({instr as FunctionCall _, ...} :: rest, _, _, code, changed) = optCode(rest, BRNone, [], instr::code, changed) | optCode({instr as BeginLoop, ...} :: rest, _, _, code, changed) = (* Any register value from outside the loop is not valid inside. *) optCode(rest, BRNone, [], instr::code, changed) | optCode({instr as JumpLoop _, ...} :: rest, _, _, code, changed) = (* Likewise at the end of the loop. Not sure if this is essential. *) optCode(rest, BRNone, [], instr::code, changed) (* CompareByteVectors and BlockMove modify their arguments. In particular if we have the length as a constant in a register it won't still have that value at the end. TODO: This coult be refined. *) | optCode({instr as CompareByteVectors _, ...} :: rest, _, _, code, changed) = (* Likewise at the end of the loop. Not sure if this is essential. *) optCode(rest, BRNone, [], instr::code, changed) | optCode({instr as BlockMove _, ...} :: rest, _, _, code, changed) = (* Likewise at the end of the loop. Not sure if this is essential. *) optCode(rest, BRNone, [], instr::code, changed) | optCode({instr, ...} :: rest, inCond, regMap, code, changed) = let (* If this instruction affects the CC the cached SetToCondition will no longer be valid. *) val afterCond = case getInstructionCC instr of CCUnchanged => inCond | _ => BRNone in optCode(rest, afterCond, regMap, instr::code, changed) end val (blkCode, finalRepl, finalMap, blockChanged) = optCode(block, BRNone, [], processed, false) val _ = if blockChanged then hasChanged := true else () in case (flow, finalRepl) of (* We have a Condition and a change to the condition. *) (flow as Conditional{ccRef, condition, trueJump, falseJump}, BRSetConditionToConstant({srcCC, test, ...})) => if srcCC = ccRef then let val newFlow = if test condition then Unconditional trueJump else Unconditional falseJump val() = hasChanged := true in BasicBlock{flow=newFlow, block=List.rev blkCode} end else BasicBlock{flow=flow, block=List.rev blkCode} | (flow as Unconditional jmp, _) => let val ExtendedBasicBlock{block=targetBlck, locals, exports, flow=targetFlow, outCCState=targetCC, ...} = Vector.sub(code, jmp) (* If the target is empty or is simply one or more Resets or a Return we're better off merging this in rather than doing the jump. We allow a single Load e.g. when loading a constant or moving a register. If we have a CompareLiteral and we're comparing with a register in the map that has been set to a constant we include that because the comparison will then be reduced to a constant. *) fun isSimple([], _, _) = true | isSimple ({instr=ResetStackPtr _, ...} :: instrs, moves, regMap) = isSimple(instrs, moves, regMap) | isSimple ({instr=ReturnResultFromFunction _, ...} :: instrs, moves, regMap) = isSimple(instrs, moves, regMap) | isSimple ({instr=RaiseExceptionPacket _, ...} :: instrs, moves, regMap) = isSimple(instrs, moves, regMap) | isSimple ({instr=MoveRegister{source, dest}, ...} :: instrs, moves, regMap) = let (* We frequently have a move of the original register into a new register before the test. *) val newMap = case List.find(fn {dest, ... } => dest = source) regMap of SOME {source, ...} => {dest=dest, source=source} :: regMap | NONE => regMap in moves = 0 andalso isSimple(instrs, moves+1, newMap) end | isSimple ({instr=LoadNonAddressConstant _, ...} :: instrs, moves, regMap) = moves = 0 andalso isSimple(instrs, moves+1, regMap) | isSimple ({instr=LoadAddressConstant _, ...} :: instrs, moves, regMap) = moves = 0 andalso isSimple(instrs, moves+1, regMap) | isSimple ({instr=LogicalImmediate{source, dest=ZeroReg, logOp=LogAnd, ...}, ...} :: instrs, moves, regMap) = let val isReplace = List.find(fn {dest, ... } => dest = source) regMap in case isReplace of SOME {source=NonAddressConst _, ...} => isSimple(instrs, moves, regMap) | _ => false end + | isSimple ({instr=AddSubImmediate{source, dest=ZeroReg, ...}, ...} :: instrs, moves, regMap) = + let + val isReplace = List.find(fn {dest, ... } => dest = source) regMap + in + case isReplace of + SOME {source=NonAddressConst _, ...} => isSimple(instrs, moves, regMap) + | _ => false + end | isSimple _ = false in (* Merge trivial blocks. This previously also tried to merge non-trivial blocks if they only had one reference but this ends up duplicating non-trivial code. If we have a trivial block that has multiple references but is the only reference to a non-trivial block we can merge the non-trivial block into it. That would be fine except that at the same time we may merge this trivial block elsewhere. *) (* The restriction that a block must only export "merge" registers is unfortunate but necessary to avoid the situation where a non-merge register is defined at multiple points and cannot be pushed to the stack. This really isn't an issue with blocks with unconditional branches but there are cases where we have successive tests of the same condition and that results in local registers being defined and then exported. This occurs in, for example, fun f x = if x > "abcde" then "yes" else "no"; *) if isSimple(targetBlck, 0, finalMap) andalso List.all (fn i => Vector.sub(pregProps, i) = RegPropMultiple) (setToList exports) then let (* Copy the block, creating new registers for the locals. *) val localMap = List.map (fn r => (PReg r, newReg(Vector.sub(pregProps, r)))) (setToList locals) fun mapReg r = case List.find (fn (s, _) => r = s) localMap of SOME(_, s) => s | NONE => r fun mapInstr(instr as ResetStackPtr _) = instr | mapInstr(ReturnResultFromFunction{results, fpResults, returnReg, numStackArgs}) = ReturnResultFromFunction{results=List.map(fn(pr, r) => (mapReg pr, r))results, fpResults=List.map(fn(pr, r) => (mapReg pr, r))fpResults, returnReg=mapReg returnReg, numStackArgs=numStackArgs} | mapInstr(RaiseExceptionPacket{packetReg}) = RaiseExceptionPacket{packetReg=mapReg packetReg} | mapInstr(MoveRegister{source, dest}) = MoveRegister{source=mapReg source, dest=mapReg dest} | mapInstr(LoadNonAddressConstant{source, dest}) = LoadNonAddressConstant{source=source, dest=mapReg dest} | mapInstr(LoadAddressConstant{source, dest}) = LoadAddressConstant{source=source, dest=mapReg dest} | mapInstr(LogicalImmediate{source, dest=ZeroReg, immed, ccRef, logOp, length}) = LogicalImmediate{source=mapReg source, dest=ZeroReg, immed=immed, ccRef=ccRef, logOp=logOp, length=length} + | mapInstr(AddSubImmediate{source, dest=ZeroReg, immed, ccRef, isAdd, length}) = + AddSubImmediate{source=mapReg source, dest=ZeroReg, immed=immed, ccRef=ccRef, isAdd=isAdd, length=length} | mapInstr _ = raise InternalError "mapInstr: other instruction" fun mapRegNo i = case(mapReg(PReg i)) of PReg r => r (* Map the instructions and the sets although we only use the kill set. *) fun mapCode{instr, current, active, kill} = {instr=mapInstr instr, current=listToSet(map mapRegNo (setToList current)), active=listToSet(map mapRegNo (setToList active)), kill=listToSet(map mapRegNo (setToList kill))} in hasChanged := true; optimiseBlock blkCode(map mapCode targetBlck, targetFlow, targetCC) end else BasicBlock{flow=flow, block=List.rev blkCode} end | (flow, _) => BasicBlock{flow=flow, block=List.rev blkCode} end fun optBlck(ExtendedBasicBlock{block, flow, outCCState, ...}) = optimiseBlock [] (block, flow, outCCState) val resVector = Vector.map optBlck code in if !hasChanged then let val extraRegs = List.rev(! regList) val props = if null extraRegs then pregProps else Vector.concat[pregProps, Vector.fromList extraRegs] in Changed(resVector, props) end else Unchanged end structure Sharing = struct type extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and optimise = optimise and preg = preg and pregOrZero = pregOrZero end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML index 3a70de97..c8730147 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML @@ -1,1356 +1,1425 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeToArm64Code( structure Arm64PreAssembly: ARM64PREASSEMBLY structure Debug: DEBUG structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET structure Pretty: PRETTY structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end sharing Arm64PreAssembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ICODEGENERATE = struct open Identify open Arm64ICode open Arm64PreAssembly open Address exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl (*and snd <@> fst = fst @ snd*) (* These aren't currently used for anything. *) val workReg1 = X16 and workReg2 = X17 fun icodeToArm64Code {blocks: basicBlockConcrete vector, functionName, stackRequired, debugSwitches, resultClosure, profileObject, ...} = let val numBlocks = Vector.length blocks (* Load from and store to stack. *) fun loadFromStack(destReg, wordOffset, code) = if wordOffset >= 4096 then (LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(destReg, Word64.fromInt wordOffset)] @ code else (LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code and storeToStack(sourceReg, wordOffset, workReg, code) = if wordOffset >= 4096 then (StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(workReg, Word64.fromInt wordOffset)] @ code else (StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code datatype srcAndDest = IsInReg of xReg | IsOnStack of int local (* The registers are numbered from 0. Choose values that don't conflict with the stack addresses. *) fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31 type node = {src: srcAndDest, dst: srcAndDest } fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r | nodeAddress({dst=IsOnStack a, ...}) = a fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset] | arcs{src=IsInReg r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end (* This is a general function for moving values into registers or to the stack where it is possible that the source values might also be in use as destinations. The stack is used for destinations only for tail recursive calls. *) fun moveMultipleValues(moves, code) = let fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let (* stronglyConnectedComponents does two things. It detects loops where it's not possible to move items without breaking the loop but more importantly it orders the dependencies so that if there are no loops we can load the source and store it in the destination knowing that we won't overwrite anything we might later need. *) val ordered = stronglyConnected arguments fun loadIntoReg(IsInReg sReg, dReg, code) = if sReg = dReg then code else (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | loadIntoReg(IsOnStack wordOffset, dReg, code) = loadFromStack(dReg, wordOffset, code) fun moveEachValue ([], code) = code | moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) = moveEachValue(rest, loadIntoReg(src, dReg, code)) | moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) = (* Storing into the stack. *) moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code)) | moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) = (* Copy a stack location - needs a load and store unless the address is the same. *) if dstOffset = srcOffset then moveEachValue(rest, code) else moveEachValue(rest, storeToStack(workReg2, dstOffset, workReg1, loadFromStack(workReg2, srcOffset, code))) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let (* We need to exchange some of the arguments. Doing an exchange here will set the destination with the correct source. However we have to process every subsequent entry with the swapped registers. That may well mean that one of those entries becomes trivial. We also need to rerun stronglyConnectedComponents on at least the rest of this cycle. It's easiest to flatten the rest and do everything. *) (* Exchange the source and destination. We don't have an exchange instruction and there's a further complication. We could be copying between stack locations and their offsets could be > 4096. Since we've only got two work registers we need to use the hardware stack as an extra location. Stack-stack exchange is very rare so the extra overhead to handle the general case is worth it. *) local fun storeToDest(sReg, IsInReg dReg, _, code) = (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | storeToDest(sReg, IsOnStack wordOffset, work, code) = storeToStack(sReg, wordOffset, work, code) in fun exchange(IsInReg arg1Reg, arg2, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) :: storeToDest(arg1Reg, arg2, workReg1, loadIntoReg(arg2, workReg2, code)) | exchange(arg1, IsInReg arg2Reg, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) :: storeToDest(arg2Reg, arg1, workReg1, loadIntoReg(arg1, workReg2, code)) | exchange(arg1, arg2, code) = (* The hardware stack must be 16-byte aligned. *) storeToDest(workReg2, arg2, workReg1, (LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) :: storeToDest(workReg2, arg1, workReg1, loadIntoReg(arg2, workReg2, (StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) :: loadIntoReg(arg1, workReg2, code)))) end (* Try to find either a register-register move or a register-stack move. If not use the first. If there's a stack-register move there will also be a register-stack so we don't need to look for both. *) val {dst=selectDst, src=selectSrc} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun match(s1: srcAndDest, s2) = s1 = s2 fun swapSources{src, dst} = if match(src, selectSrc) then {src=destAsSource, dst=dst} else if match(src, destAsSource) then {src=selectSrc, dst=dst} else {src=src, dst=dst} val exchangeCode = exchange(selectDst, selectSrc, code) in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end (* Where we have multiple specific registers as either source or destination there is the potential that a destination register if currently in use as a source. *) fun moveMultipleRegisters(regPairList, code) = let val regPairsAsDests = List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList in moveMultipleValues(regPairsAsDests, code) end (* Floating point registers can only be moved into other floating point registers but it is possible to have a cycle. *) fun moveMultipleFPRegisters(moves: {dst: vReg, src: vReg} list, code) = let local fun regNo(VReg r) = Word8.toInt r type node = {src: vReg, dst: vReg } fun nodeAddress({dst=r, ...}: node) = regNo r fun arcs{src=r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let val ordered = stronglyConnected arguments fun moveEachValue ([], code) = code | moveEachValue ([{dst, src}] :: rest, code) = moveEachValue(rest, if src = dst then code else (FPUnaryOp{regN=src, regD=dst, fpOp=MoveDouble}) :: code) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let val {dst=selectDst: vReg, src=selectSrc: vReg} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun swapSources{src, dst} = if src=selectSrc then {src=destAsSource, dst=dst} else if src=destAsSource then {src=selectSrc, dst=dst} else {src=src, dst=dst} (* Exchange the values of two floating point registers. There are various ways to do this. For the moment just use the hardware stack. *) val exchangeCode = code <::> StoreFPRegUnscaled{regT=selectDst, regN=XSP, byteOffset= ~16, floatSize=Double64, unscaledType=PreIndex} <::> FPUnaryOp{regN=selectSrc, regD=selectDst, fpOp=MoveDouble} <::> LoadFPRegUnscaled{regT=selectSrc, regN=XSP, byteOffset=16, floatSize=Double64, unscaledType=PostIndex} in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end fun moveIfNecessary({src, dst}, code) = if src = dst then code else MoveXRegToXReg{sReg=src, dReg=dst} :: code (* Add a constant word to the source register and put the result in the destination. regW is used as a work register if necessary. This is used both for addition and subtraction. *) fun addConstantWord({regS, regD, value=0w0, ...}, code) = if regS = regD then code else MoveXRegToXReg{sReg=regS, dReg=regD} :: code | addConstantWord({regS, regD, regW, value}, code) = let (* If we have to load the constant it's better if the top 32-bits are zero if possible. *) val (isSub, unsigned) = if value > Word64.<<(0w1, 0w63) then (true, ~ value) else (false, value) in if unsigned < Word64.<<(0w1, 0w24) then (* We can put up to 24 in a shifted and an unshifted constant. *) let val w = Word.fromLarge(Word64.toLarge unsigned) val high = Word.andb(Word.>>(w, 0w12), 0wxfff) val low = Word.andb(w, 0wxfff) val addSub = if isSub then SubImmediate else AddImmediate in if high <> 0w0 then ( (if low <> 0w0 then [addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}] else []) @ addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64} :: code ) else addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64} :: code end else let (* To minimise the constant and increase the chances that it will fit in a single word look to see if we can shift it. *) fun getShift(value, shift) = if Word64.andb(value, 0w1) = 0w0 then getShift(Word64.>>(value, 0w1), shift+0w1) else (value, shift) val (shifted, shift) = getShift(unsigned, 0w0) in code <::> LoadNonAddr(regW, shifted) <::> (if isSub then SubShiftedReg else AddShiftedReg) {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64} end end val labelMaker = createLabelMaker() val startOfFunctionLabel = createLabel labelMaker (* Used for recursive calls/jumps *) val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel labelMaker) fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo) fun codeExtended _ (MoveRegister{source, dest, ...}, code) = moveIfNecessary({src=source, dst=dest}, code) | codeExtended _ (LoadNonAddressConstant{source, dest}, code) = code <::> LoadNonAddr(dest, source) | codeExtended _ (LoadFPConstant{source, dest, floatSize}, code) = code <::> LoadFPConst{dest=dest, value=source, floatSize=floatSize, work=workReg1} | codeExtended _ (LoadAddressConstant{source, dest, ...}, code) = code <::> LoadAddr(dest, source) | codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then LoadRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate} :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in LoadRegScaled{regT=dest, regN=base, unitOffset=unitOffset, loadType=loadType} :: code end | codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (LoadFPRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (LoadFPRegScaled{regT=dest, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (LoadRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX in (LoadFPRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (GetThreadId { dest}, code) = (* Load the thread id. This is always a 64-bit value. *) (LoadRegScaled{regT=dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code | codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) = (AddShiftedReg{regM=source, regN=X_Base32in64, regD=dest, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code | codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) = let val destReg = dest in code <::> (SubShiftedReg{regM=X_Base32in64, regN=source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> (shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64}) end | codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs }, code) = code <::> AllocateMemoryFixedSize{ bytes=Word.fromLarge bytesRequired, dest=dest, save=saveRegs, work=workReg1 } | codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs }, code) = code <::> AllocateMemoryVariableSize{ sizeReg=size, dest=dest, save=saveRegs, work=workReg1 } | codeExtended _ (InitialiseMem{ size, addr, init}, code) = let val sizeReg = size and addrReg = addr and initReg = init val exitLabel = createLabel labelMaker and loopLabel = createLabel labelMaker (* This uses a loop to initialise. It's possible the size is zero so we have to check at the top of the loop. *) val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64) in code <::> (* Add the length in bytes so we point at the end. *) AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Are we at the start? *) SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, exitLabel) <::> StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex } <::> UnconditionalBranch loopLabel <::> SetLabel exitLabel end | codeExtended _ (BeginLoop, code) = code | codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) = let (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *) fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs val extRegArgs = map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code) in case checkInterrupt of NONE => code2 | SOME saveRegs => let val skipCheck = createLabel labelMaker in code2 <::> (* Put in stack-check code to allow this to be interrupted. *) LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} <::> SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, skipCheck) <::> RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=saveRegs} <::> SetLabel skipCheck end end | codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then (StoreRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in (StoreRegScaled{regT=source, regN=base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (StoreFPRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (StoreFPRegScaled{regT=source, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (StoreRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX in (StoreFPRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) = let val destReg = dest in ((if isAdd then AddImmediate else SubImmediate) {regN=source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) = let val destReg = dest in ( (if isAdd then AddShiftedReg else SubShiftedReg) {regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) = let val destReg = dest in (BitwiseLogical{regN=source, regD=destReg, bits=immed, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) = let (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) val destReg = dest in (LogicalShiftedReg{regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) = (ShiftRegisterVariable{regN=source, regM=shift, regD=dest, shiftDirection=direction, opSize=opSize}) :: code | codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) = let val destReg = dest and srcAReg = sourceA and srcNReg = sourceN and srcMReg = sourceM in (MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code end | codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) = (DivideRegs{regN=dividend, regM=divisor, regD=dest, isSigned=isSigned, opSize=opSize}) :: code | codeExtended _ (BeginFunction{regArgs, fpRegArgs, ...}, code) = let val skipCheck = createLabel labelMaker val defaultWords = 10 (* This is wired into the RTS. *) val workRegister = workReg1 val debugTrapAlways = false (* Can be set to true for debugging *) (* Test with either the stack-pointer or a high-water value. The RTS assumes that X9 has been used as the high-water if it is called through stackOverflowXCallOffset rather than stackOverflowCallOffset *) val (testReg, entryPt, code1) = if stackRequired <= defaultWords then (X_MLStackPtr, stackOverflowCallOffset, code) else (X9, stackOverflowXCallOffset, addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister, value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code)) (* Skip the RTS call if there is enough stack. N.B. The RTS can modify the end-of-stack value to force a trap here even if there is really enough stack. *) val code2 = (if debugTrapAlways then [] else [ConditionalBranch(CondCarrySet, skipCheck), SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}]) @ (* Load the end-of-stack value. *) LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} :: code1 val code3 = code2 <::> RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs} <::> SetLabel skipCheck fun mkPair(pr, rr) = {src=rr,dst=pr} val regPairs = List.map mkPair regArgs val fpRegPairs = List.map mkPair fpRegArgs in moveMultipleFPRegisters(fpRegPairs, moveMultipleRegisters(regPairs, code3)) end | codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, fpRegArgs, stackAdjust, currStackSize}, code) = let fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val extFPArgs = map (fn (a, r) => {src=a, dst=r}) fpRegArgs (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. That means storing the arguments in the right order to avoid overwriting a value that we are using for a different argument. *) fun codeTailCall(arguments, stackAdjust, code) = if stackAdjust < 0 then let (* If the function we're calling takes more arguments on the stack than the current function we will have to extend the stack. Do that by pushing the argument whose offset is at -1. Then adjust all the offsets and repeat. *) val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments) fun renumberArgs [] = [] | renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) | renumberArgs ({dst, src} :: args) = let val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest val newSrc = case src of IsOnStack wordOffset => IsOnStack(wordOffset+1) | other => other in {dst=newDest, src=newSrc} :: renumberArgs args end val pushCode = case argM1 of IsOnStack wordOffset => (StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: loadFromStack(workReg2, wordOffset, code) | IsInReg reg => (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code in codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode) end else let val loadArgs = moveMultipleFPRegisters(extFPArgs, moveMultipleValues(arguments, code)) in if stackAdjust = 0 then loadArgs else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs) end val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) val jumpToFunctionCode = case callKind of Recursive => [(UnconditionalBranch startOfFunctionLabel)] | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else [(BranchReg{regD=workReg1, brRegType=BRRBranch}), (LoadAddr(workReg1, m))] | FullCall => if is32in64 then [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}, AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}] else [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}] in jumpToFunctionCode @ setArgumentsCode end | codeExtended _ (FunctionCall{callKind, regArgs, stackArgs, dests, fpRegArgs, fpDests, saveRegs, ...}, code) = let local fun pushStackArgs ([], _, code) = code | pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) = let (* Have to adjust the offsets of stack arguments. *) val adjustedOffset = wordOffset+argNum in pushStackArgs(args, argNum+1, loadFromStack(workReg1, adjustedOffset, code) <::> StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) end | pushStackArgs (ArgInReg reg ::args, argNum, code) = pushStackArgs(args, argNum+1, code <::> (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) (* We have to adjust any stack offset to account for the arguments we've pushed. *) val numStackArgs = List.length stackArgs fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs) | convertArg(ArgInReg reg) = IsInReg(reg) in val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val extFPArgs = map (fn (a, r) => {src=a, dst=r}) fpRegArgs val loadArgs = moveMultipleFPRegisters(extFPArgs, moveMultipleValues(extRegArgs, pushedArgs)) end (* Push the registers before the call and pop them afterwards. *) fun makeSavesAndCall([], code) = ( case callKind of Recursive => code <::> (BranchAndLink startOfFunctionLabel) | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else code <::> (LoadAddr(workReg1, m)) <::> (BranchReg{regD=workReg1, brRegType=BRRAndLink}) | FullCall => if is32in64 then code <::> AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} else code <::> LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} ) | makeSavesAndCall(reg::regs, code) = let val areg = reg in makeSavesAndCall(regs, code <::> StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) <::> LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex} end (* Results. These go from the specific result register into the allocated register. *) val resultPairs = List.map(fn (pr, rr) => {src=rr,dst=pr}) dests val fpResultPairs = List.map(fn (pr, rr) => {src=rr,dst=pr}) fpDests in moveMultipleFPRegisters(fpResultPairs, moveMultipleRegisters(resultPairs, makeSavesAndCall(saveRegs, loadArgs))) end | codeExtended _ (ReturnResultFromFunction { results, fpResults, returnReg, numStackArgs }, code) = let fun resetStack(0, code) = code | resetStack(nItems, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) (* Return results. This goes from the allocated register into the specific register rr. *) val resultPairs = List.map(fn (pr, rr) => {src=pr,dst=rr}) results and fpResultPairs = List.map(fn (pr, rr) => {src=pr,dst=rr}) fpResults in BranchReg{regD=returnReg, brRegType=BRRReturn} :: resetStack(numStackArgs, moveMultipleFPRegisters(fpResultPairs, moveMultipleRegisters(resultPairs, code))) end | codeExtended _ (RaiseExceptionPacket{ packetReg }, code) = (* We need a work register here. It can be any register other than X0 since we don't preserve registers across calls. *) (* Copy the handler "register" into the stack pointer. Then jump to the address in the first word. The second word is the next handler. This is set up in the handler. We have a lot more raises than handlers since most raises are exceptional conditions such as overflow so it makes sense to minimise the code in each raise. *) moveIfNecessary({src=packetReg, dst=X0}, code) <::> LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch } | codeExtended _ (PushToStack{ source, copies, ... }, code) = let val reg = source val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1" fun pushn(0, c) = c | pushn(n, c) = pushn(n-1, (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c) in pushn(copies, code) end | codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) = loadFromStack(dest, wordOffset, code) | codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) = (* Store into the stack to set a field of a container. Always 64-bits. *) storeToStack(source, stackOffset, workReg1, code) | codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) = (* Set the register to an offset in the stack. *) let val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset" val byteOffset = stackOffset * Word.toInt nativeWordSize in if byteOffset >= 4096 then code <::> LoadNonAddr(dest, Word64.fromInt byteOffset) <::> AddShiftedReg{regN=X_MLStackPtr, regM=dest, regD=dest, shift=ShiftNone, setFlags=false, opSize=OpSize64} else code <::> AddImmediate{regN=X_MLStackPtr, regD=dest, immed=Word.fromInt byteOffset, shifted=false, setFlags=false, opSize=OpSize64} end | codeExtended _ (ResetStackPtr{ numWords, ... }, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code) | codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) = (* Shift left by one bit and add one. *) code <::> shiftConstant{ direction=ShiftLeft, regD=dest, regN=source, shift=0w1, opSize=opSize } <::> BitwiseLogical{ bits=0w1, regN=dest, regD=dest, opSize=opSize, setFlags=false, logOp=LogOr} | codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) = code <::> shiftConstant{ direction=if isSigned then ShiftRightArithmetic else ShiftRightLogical, regD=dest, regN=source, shift=0w1, opSize=opSize } | codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) = boxSysWord({source=source, destination=dest, workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (UnboxLarge{ source, dest }, code) = let (* Unbox a large word. The argument is a poly word. *) val destReg = dest and srcReg = source in if is32in64 then LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64} :: AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64} :: code end | codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) = boxDouble({source=source, destination=dest, workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) = let val floatReg = source and fixedReg = dest in if is32in64 then boxFloat({source=floatReg, destination=fixedReg, workReg=workReg1, saveRegs=saveRegs}, code) else code <::> MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32} <::> shiftConstant{ direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64} <::> BitwiseLogical{ bits=0w1, regN=fixedReg, regD=fixedReg, opSize=OpSize64, setFlags=false, logOp=LogOr} end | codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) = if is32in64 then code <::> AddShiftedReg{regM=source, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadFPRegScaled{regT=dest, regN=workReg1, unitOffset=0, floatSize=Double64} else code <::> LoadFPRegScaled{regT=dest, regN=source, unitOffset=0, floatSize=Double64} | codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) = (* This is tagged in native 64-bits. In 32-in-64 we're loading 32-bits so we can use an indexed load directly. *) if is32in64 then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=source, regT=dest, option=ExtUXTX ScaleOrShift, floatSize=Float32} else code <::> shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=source, regD=workReg1, opSize=OpSize64} <::> MoveGeneralToFP{regN=workReg1, regD=dest, floatSize=Float32} | codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) = LoadAcquireReg{regT=dest, regN=base, loadType=loadType} :: code | codeExtended _ (StoreRelease{source, base, loadType, ...}, code) = StoreReleaseReg{regT=source, regN=base, loadType=loadType} :: code | codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) = BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length} :: code | codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) = let (* If we're using BitFieldMove we retain some of the bits of the destination. The higher levels require us to treat that as a source. *) val _ = source = dest andalso raise InternalError "codeExtended: bitfield: dest=source" in BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=BFInsert, opSize=length} :: moveIfNecessary({src=destAsSource, dst=dest}, code) end | codeExtended {flow} (IndexedCaseOperation{testReg}, code) = let (* testReg contains the original value after the lowest value has been subtracted. Since both the original value and the lowest value were tagged it contains a shifted but untagged value. *) (* This should only be within a block with an IndexedBr flow type. *) val cases = case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" val caseLabels = map getBlockLabel cases val tableLabel = createLabel labelMaker in code <::> LoadLabelAddress(workReg1, tableLabel) <::> (* Add the value shifted by one since it's already shifted. *) AddShiftedReg{regN=workReg1, regD=workReg1, regM=testReg, shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch} <::> BranchTable{ startLabel=tableLabel, brTable=caseLabels } end | codeExtended {flow} (PushExceptionHandler, code) = let (* This should only be within a block with a SetHandler flow type. *) val handleLabel = case flow of SetHandler{ handler, ...} => handler | _ => raise InternalError "codeGenICode: PushExceptionHandler" val labelRef = getBlockLabel handleLabel in (* Push the old handler and the handler entry point and set the "current handler" to point to the stack after we've pushed these. *) code <::> LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadLabelAddress(workReg2, labelRef) <::> StoreRegPair{regT1=workReg2, regT2=workReg1, regN=X_MLStackPtr, unitOffset= ~2, unscaledType=PreIndex, loadType=Load64} <::> StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} end | codeExtended _ (PopExceptionHandler, code) = (* Remove and discard the handler we've set up. Pop the previous handler and put into "current handler". *) code <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} | codeExtended _ (BeginHandler{packetReg}, code) = let val beginHandleCode = code <::> (* The exception raise code resets the stack pointer to the value in the exception handler so this is probably redundant. Leave it for the moment, *) LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} in moveIfNecessary({src=X0, dst=packetReg }, beginHandleCode) end | codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length, ...}, code) = let (* Construct a loop to compare two vectors of bytes. *) val vec1Reg = vec1Addr and vec2Reg = vec2Addr and lenReg = length val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker in code <::> (* Set the CC to Equal before we start in case length = 0 *) SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Start of loop *) CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Go to the end when len = zero *) (* Load the bytes for the comparison and increment each. *) LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) (* Compare *) SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *) SetLabel exitLabel end | codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) = let (* Construct a loop to move the data. *) val srcReg = srcAddr and destReg = destAddr and lenReg = length val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64) in code <::> SetLabel loopLabel (* Start of loop *) <::> CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Exit when length = 0 *) LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) UnconditionalBranch loopLabel <::> (* Back to the start *) SetLabel exitLabel end | codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) = let val allocFreeCode = (if isAdd then AddExtendedReg else SubExtendedReg) {regM=source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code in case dest of XZero => allocFreeCode | destReg => (* We have to use add here to get the SP into the destination instead of the usual move. *) AddImmediate{regN=XSP, regD=destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} :: allocFreeCode end | codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *) (* Used in mutex operations. *) | codeExtended _ (LoadAcquireExclusive{ base, dest }, code) = LoadAcquireExclusiveRegister{regN=base, regT=dest} :: code | codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) = StoreReleaseExclusiveRegister{regS=result, regT=source, regN=base} :: code | codeExtended _ (MemoryBarrier, code) = code <::> MemBarrier | codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) = (CvtIntToFP{regN=source, regD=dest, floatSize=destSize, opSize=srcSize}) :: code | codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) = (CvtFloatToInt{regN=source, regD=dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code | codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) = (FPUnaryOp{regN=source, regD=dest, fpOp=fpOp}) :: code | codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) = (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, floatSize=opSize, fpOp=fpOp}) :: code | codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) = (FPComparison{regN=arg1, regM=arg2, floatSize=opSize}) :: code | codeExtended _ (CPUYield, code) = code <::> Yield | codeExtended _ (AtomicOperation{ base, source, dest, atOp }, code) = AtomicExtension{regN=base, regS=source, regT=dest, atOp=atOp} :: code local (* processed - set to true when a block has been processed. *) val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) (* Find the blocks that reference this one. This isn't essential but allows us to try to generate blocks in the order of the control flow. This in turn may allow us to use short branches rather than long ones. *) val labelRefs = Array.array(numBlocks, []) datatype flowCode = FlowCodeSimple of int | FlowCodeCMove of {code: precode list, trueJump: int, falseJump: int} (* Process this recursively to set the references. If we have unreachable blocks, perhaps because they've been merged, we don't want to include them in the reference counting. This shouldn't happen now that IdentifyReferences removes unreferenced blocks. *) fun setReferences fromLabel toLabel = case Array.sub(labelRefs, toLabel) of [] => (* Not yet visited at all. *) let val BasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) val refs = case flow of ExitCode => [] | Unconditional lab => [lab] | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] | IndexedBr labs => labs | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val () = if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () in List.app (setReferences toLabel) refs end | refs => (* We've visiting this at least once. Just add us to the list. *) Array.update(labelRefs, toLabel, fromLabel :: refs) val _ = setReferences 0 0 (* Process the blocks. We keep the "stack" explicit rather than using recursion because this allows us to select both arms of a conditional branch sooner. *) fun genCode(toDo, lastFlow, code) = case List.filter (not o haveProcessed) toDo of [] => let (* There's nothing left to do. We may need to add a final branch to the end. *) val finalBranch = case lastFlow of ExitCode => [] | IndexedBr _ => [] | Unconditional dest => [(UnconditionalBranch(getBlockLabel dest))] | Conditional { condition, trueJump, falseJump, ...} => [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] | SetHandler { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] in finalBranch @ code (* Done. *) end | stillToDo as head :: _ => let local (* Check the references. If all the sources that lead up to this have already been we won't have any backward jumps. *) fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) val continuation = case lastFlow of ExitCode => NONE | IndexedBr _ => NONE (* We could put the last branch in here. *) | Unconditional dest => if not (haveProcessed dest) andalso available dest then SOME(FlowCodeSimple dest) else NONE | Conditional {trueJump, falseJump, condition, ...} => let (* Can we replace this with a conditional set? If both arms simply set a register to a value and either return or jump to the same location we can. *) (* The most common case is producing a boolean result, 1 (tagged 0) for false and 3 (tagged 1) for true. We look for blocks that generate this and also functions that return this. *) val BasicBlock { flow=tFlow, block=tBlock, ...} = Vector.sub(blocks, trueJump) and BasicBlock { flow=fFlow, block=fBlock, ...} = Vector.sub(blocks, falseJump) + datatype moveType = MoveReg of xReg | MoveAddr of machineWord | MoveNonAddr of Word64.word + + fun findMoveAndTail [] = (SOME NONE, []) + | findMoveAndTail (MoveRegister{source, dest} :: tl) = (SOME(SOME(dest, MoveReg source)), tl) + | findMoveAndTail (LoadAddressConstant{source, dest} :: tl) = (SOME(SOME(dest, MoveAddr source)), tl) + | findMoveAndTail (LoadNonAddressConstant{source, dest} :: tl) = (SOME(SOME(dest, MoveNonAddr source)), tl) + (* The result could be in a different register which will need to be moved to X0. *) + | findMoveAndTail (tl as [ReturnResultFromFunction{results=[(resRegT, realRegT)], ...}]) = (SOME(SOME(realRegT, MoveReg resRegT)), tl) + | findMoveAndTail tl = (NONE, tl) + val isPossSetCCOrCmov = if not (haveProcessed trueJump) andalso available trueJump andalso not (haveProcessed falseJump) andalso available falseJump - then case (tFlow, fFlow, tBlock, fBlock) of - (Unconditional tDest, Unconditional fDest, - [LoadNonAddressConstant{ source=sourceTrue, dest=destTrue }], - [LoadNonAddressConstant{ source=sourceFalse, dest=destFalse }]) => - if tDest = fDest andalso destTrue = destFalse - then if sourceFalse = 0w1 - then SOME{code= - (* We can generate 1 by using CSINC and XZero. *) - code <::> LoadNonAddr(destTrue, sourceTrue) <::> - ConditionalSet{regD=destTrue, regTrue=destTrue, regFalse=XZero, cond=condition, condSet=CondSetIncr, opSize=OpSize64}, - trueJump=trueJump, falseJump=falseJump} - else if sourceTrue = 0w1 - then SOME{code= - (* We can generate 1 by using CSINC and XZero. *) - code <::> LoadNonAddr(destTrue, sourceFalse) <::> - ConditionalSet{regD=destTrue, regTrue=destTrue, regFalse=XZero, cond=invertTest condition, condSet=CondSetIncr, opSize=OpSize64}, - trueJump=trueJump, falseJump=falseJump} - else NONE - else NONE - - | (ExitCode, ExitCode, - [LoadNonAddressConstant{ source=sourceTrue, dest=destTrue }, - ReturnResultFromFunction { results=[(retRT, retRTR)], fpResults=[], returnReg=retaT, numStackArgs=numST }], - [LoadNonAddressConstant{ source=sourceFalse, dest=destFalse }, - ReturnResultFromFunction { results=[(retRF, retRFR)], fpResults=[], returnReg=retaF, numStackArgs=numSF }]) => - if destTrue = retRT andalso destFalse = retRF (* These ought to X0. *) - then - let - val _ = retRTR = X0 andalso retRFR = X0 orelse raise InternalError "Test for condmove: mismatch regs" - val _ = retaT = retaF orelse raise InternalError "Test for condmove: mismatched return regs" - val _ = numST = numSF orelse raise InternalError "Test for condmove: mismatched stack" - fun resetAndReturn code = - (if numST = 0 - then code - else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, - value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt numSF}, code) - ) <::> BranchReg{regD=retaT, brRegType=BRRReturn} - in - if sourceFalse = 0w1 - then SOME{code= - resetAndReturn( - code <::> LoadNonAddr(destTrue, sourceTrue) <::> - ConditionalSet{regD=destTrue, regTrue=destTrue, regFalse=XZero, cond=condition, condSet=CondSetIncr, opSize=OpSize64}), - trueJump=trueJump, falseJump=falseJump} - else if sourceTrue = 0w1 - then SOME{code= - resetAndReturn( - code <::> LoadNonAddr(destTrue, sourceFalse) <::> - ConditionalSet{regD=destTrue, regTrue=destTrue, regFalse=XZero, cond=invertTest condition, condSet=CondSetIncr, opSize=OpSize64}), - trueJump=trueJump, falseJump=falseJump} - else NONE - end - else NONE - - | _ => NONE + then + let + (* If we have a conditional branch and each arm simply sets the + result register to a constant or another register we can + replace this with a conditional move. Apart from reducing the size of + the code this should reduce the chance of a processor pre-fetch stall. + We may have this either with an internal branch or a return. *) + fun makeCond(reg, sourceT, MoveNonAddr 0w1) = + let + (* We can generate 0w1 using CSINC XZ. Because constants + are almost always tagged we don't bother with zero itself. *) + val (codeT, regT) = + case sourceT of + MoveReg regT => (code, regT) + | MoveAddr addr => (code <::> LoadAddr(workReg1, addr), workReg1) + | MoveNonAddr value => (code <::> LoadNonAddr(workReg1, value), workReg1) + in + codeT <::> + ConditionalSet{regD=reg, regTrue=regT, regFalse=XZero, cond=condition, + condSet=CondSetIncr, opSize=OpSize64} + end + + | makeCond(reg, MoveNonAddr 0w1, sourceF) = + let + val (codeF, regF) = + case sourceF of + MoveReg regF => (code, regF) + | MoveAddr addr => (code <::> LoadAddr(workReg2, addr), workReg2) + | MoveNonAddr value => (code <::> LoadNonAddr(workReg2, value), workReg2) + in + codeF <::> + ConditionalSet{regD=reg, regTrue=regF, regFalse=XZero, cond=invertTest condition, + condSet=CondSetIncr, opSize=OpSize64} + end + + | makeCond(reg, sourceT, sourceF) = + let + val (codeT, regT) = + case sourceT of + MoveReg regT => (code, regT) + | MoveAddr addr => (code <::> LoadAddr(workReg1, addr), workReg1) + | MoveNonAddr value => (code <::> LoadNonAddr(workReg1, value), workReg1) + val (codeF, regF) = + case sourceF of + MoveReg regF => (codeT, regF) + | MoveAddr addr => (codeT <::> LoadAddr(workReg2, addr), workReg2) + | MoveNonAddr value => (codeT <::> LoadNonAddr(workReg2, value), workReg2) + in + codeF <::> + ConditionalSet{regD=reg, regTrue=regT, regFalse=regF, cond=condition, + condSet=CondSet, opSize=OpSize64} + end + in + case (tFlow, fFlow) of + (Unconditional tDest, Unconditional fDest) => + if tDest <> fDest then NONE + else + let + val (tInstr, tTl) = findMoveAndTail tBlock + and (fInstr, fTl) = findMoveAndTail fBlock + in + case (fTl, tTl, tInstr, fInstr) of + ([], [], SOME tIns, SOME fIns) => (* We should have at most one move. *) + let + in + case (tIns, fIns) of + (NONE, NONE) => NONE (* Both empty??*) + | (SOME (regT, sourceT), NONE) => + SOME({code=makeCond(regT, sourceT, MoveReg regT), trueJump=trueJump, falseJump=falseJump}) + | (NONE, SOME (regF, sourceF)) => + SOME({code=makeCond(regF, MoveReg regF, sourceF), trueJump=trueJump, falseJump=falseJump}) + | (SOME (regT, sourceT), SOME (regF, sourceF)) => + if regT <> regF then NONE + else SOME({code=makeCond(regF, sourceT, sourceF), trueJump=trueJump, falseJump=falseJump}) + end + + | _ => NONE + end + + | (ExitCode, ExitCode) => + let + val (tInstr, tTl) = findMoveAndTail tBlock + and (fInstr, fTl) = findMoveAndTail fBlock + (* We can use a conditional set if we have the same return after any move. + This is slightly simpler than the branch case because the result will + always end up in X0. *) + in + case (tTl, fTl, tInstr, fInstr) of + ([ReturnResultFromFunction{results=[(_, realRegT)], fpResults=[], returnReg=retRegT, numStackArgs=nsT}], + [ReturnResultFromFunction{results=[(_, realRegF)], fpResults=[], returnReg=retRegF, numStackArgs=nsF}], + SOME(SOME (regT, sourceT)), SOME (SOME (regF, sourceF))) => + if regT <> regF then NONE + else + let + val _ = realRegT = X0 andalso realRegF = X0 andalso retRegT = retRegF andalso nsT = nsF + orelse raise InternalError "isPossSetCCOrCmov: return mismatch" + val condCode = makeCond(regF, sourceT, sourceF) + val withReset = + if nsT = 0 + then condCode + else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, + value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nsT}, condCode) + val withReturn = + withReset <::> BranchReg{regD=retRegT, brRegType=BRRReturn} + in + SOME({code=withReturn, trueJump=trueJump, falseJump=falseJump}) + end + | _ => NONE + + end + + | _ => NONE + end + else NONE in case isPossSetCCOrCmov of NONE => (* We can usually choose either destination and in nearly all cases it won't matter. The ARM doesn't seem to define what happens if a conditional branch hasn't been seen before. Assume it's the same as the X86 and that conditional forward branches aren't taken. Arrange this so that something that raises an exception is assumed to be "unusual". *) let val (first, second) = case (condition, Vector.sub(blocks, falseJump)) of (CondNoOverflow, _) => (trueJump, falseJump) | (_, BasicBlock{ flow=ExitCode, block, ...}) => if List.exists(fn RaiseExceptionPacket _ => true | _ => false) block then (trueJump, falseJump) else (falseJump, trueJump) | _ => (falseJump, trueJump) in if not (haveProcessed first) andalso available first then SOME(FlowCodeSimple first) else if not (haveProcessed second) andalso available second then SOME(FlowCodeSimple second) else NONE end | SOME args => SOME(FlowCodeCMove args) end | SetHandler { continue, ... } => (* We want the continuation if possible. We'll need a branch round the handler so that won't help. *) if not (haveProcessed continue) andalso available continue then SOME(FlowCodeSimple continue) else NONE | UnconditionalHandle _ => NONE | ConditionalHandle _ => NONE in (* First choice - continue the existing block. Second choice - the first item whose sources have all been processed. Third choice - something from the list. *) val picked = case continuation of SOME c => c | NONE => case List.find available stillToDo of SOME c => FlowCodeSimple c | NONE => FlowCodeSimple head end in case picked of FlowCodeSimple picked => let val () = Array.update(processed, picked, true) (* Code to terminate the previous block. *) val startCode = case lastFlow of ExitCode => [] | IndexedBr _ => [] | UnconditionalHandle _ => [] | Unconditional dest => if dest = picked then [] else [(UnconditionalBranch(getBlockLabel dest))] | ConditionalHandle { continue, ...} => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | SetHandler { continue, ... } => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | Conditional { condition, trueJump, falseJump, ...} => if picked = falseJump (* Usual case. *) then [(ConditionalBranch(condition, getBlockLabel trueJump))] else if picked = trueJump then (* We have a jump to the true condition. Invert the jump. This is more than an optimisation. Because this immediately precedes the true block we're not going to generate a label. *) [(ConditionalBranch(invertTest condition, getBlockLabel falseJump))] else [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] (* Code-generate the body with the code we've done so far at the end. Add a label at the start if necessary. *) local (* If the previous block dropped through to this and this was the only reference then we don't need a label. *) fun onlyJumpingHere (lab: int) = if lab <> picked then false else case Array.sub(labelRefs, picked) of [singleton] => singleton = lab | _ => false val noLabel = case lastFlow of ExitCode => picked = 0 (* Unless this was the first block. *) | Unconditional dest => onlyJumpingHere dest | Conditional { trueJump, falseJump, ...} => onlyJumpingHere trueJump orelse onlyJumpingHere falseJump | IndexedBr _ => false | SetHandler _ => false | UnconditionalHandle _ => false | ConditionalHandle { continue, ...} => onlyJumpingHere continue in val startLabel = if noLabel then [] else [(SetLabel(getBlockLabel picked))] end val BasicBlock { flow, block, ...} = Vector.sub(blocks, picked) local fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code) in val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] in genCode(addSet @ stillToDo, flow, bodyCode) end | FlowCodeCMove{code, trueJump, falseJump} => let (* We've generated a conditional move and possibly a return. If the trueJump and falseJump are only ever referenced from this block they're done, otherwise we still need to do them. *) val _ = case Array.sub(labelRefs, trueJump) of [_] => Array.update(processed, trueJump, true) | _ => () val _ = case Array.sub(labelRefs, falseJump) of [_] => Array.update(processed, falseJump, true) | _ => () val BasicBlock { flow, ...} = Vector.sub(blocks, trueJump) val addSet = case flow of ExitCode => [] | Unconditional dest => [dest] | _ => raise InternalError "FlowCodeCMove" in genCode(addSet @ stillToDo, flow, code) end end in val ops = genCode([0], ExitCode, [(SetLabel startOfFunctionLabel)]) end in generateFinalCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure, parameters=debugSwitches, profileObject=profileObject, labelMaker= labelMaker} end structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and xReg = xReg and vReg = vReg and closureRef = closureRef end end;