diff --git a/mlsource/MLCompiler/BUILTINS.sml b/mlsource/MLCompiler/BUILTINS.sml index 0bbea279..82a612e0 100644 --- a/mlsource/MLCompiler/BUILTINS.sml +++ b/mlsource/MLCompiler/BUILTINS.sml @@ -1,118 +1,117 @@ (* Signature for built-in functions - Copyright David C. J. Matthews 2016, 2018-21 + Copyright David C. J. Matthews 2016, 2018-22 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature BUILTINS = sig datatype testConditions = TestEqual (* No TestNotEqual because that is always generated with "not" *) | TestLess | TestLessEqual | TestGreater | TestGreaterEqual | TestUnordered (* Reals only. *) datatype arithmeticOperations = ArithAdd | ArithSub | ArithMult | ArithQuot | ArithRem | ArithDiv | ArithMod datatype logicalOperations = LogicalAnd | LogicalOr | LogicalXor datatype shiftOperations = ShiftLeft | ShiftRightLogical (* Logical shift - zero added bits. *) | ShiftRightArithmetic (* Arithmetic shift - add the sign bit. *) datatype unaryOps = NotBoolean (* true => false; false => true - XOR *) | IsTaggedValue (* Test the tag bit. *) | MemoryCellLength (* Return the length of a memory cell (heap object) *) | MemoryCellFlags (* Return the flags byte of a memory cell (heap object) *) | ClearMutableFlag (* Remove the mutable flag from the flags byte *) | LongWordToTagged (* Convert a LargeWord.word to a Word.word or FixedInt.int. *) | SignedToLongWord (* Convert a tagged value to a LargeWord with sign extension. *) | UnsignedToLongWord (* Convert a tagged value to a LargeWord without sign extension. *) | RealAbs of precision (* Set the sign bit of a real to positive. *) | RealNeg of precision (* Invert the sign bit of a real. *) | RealFixedInt of precision (* Convert an integer value into a real value. *) | FloatToDouble (* Convert a single precision floating point value to double precision. *) | DoubleToFloat (* Convert a double precision floating point value to single precision using current rounding mode. *) | RealToInt of precision * IEEEReal.rounding_mode (* Convert a double or float to a fixed precision int. *) | TouchAddress (* Ensures that the cell is reachable. *) | AllocCStack (* Allocate space on the C stack. *) | LockMutex (* Try to lock a mutex, returning true if it succeeded. If it failed the thread must block. *) | TryLockMutex (* Try to lock a mutex but if it failed the thread will not block. *) | UnlockMutex (* Unlock a mutex. Returns false if there are blocked threads that must be woken. *) and precision = PrecSingle | PrecDouble (* Single or double precision floating pt. *) and binaryOps = (* Compare two words and return the result. This is used for both word values (isSigned=false) and fixed precision integer (isSigned=true). Values must be tagged and not pointers. *) WordComparison of { test: testConditions, isSigned: bool } (* Fixed precision int operations. These may raise Overflow. *) | FixedPrecisionArith of arithmeticOperations (* Arithmetic operations on word values. These do not raise Overflow. *) | WordArith of arithmeticOperations (* Load a word at a specific offset in a heap object. If this is immutable and the arguments are constants it can be folded at compile time since the result will never change. *) | WordLogical of logicalOperations (* Logical operations on words. *) | WordShift of shiftOperations (* Shift operations on words. *) (* Allocate a heap cell for byte data. The first argument is the number of words (not bytes) needed. The second argument is the "flags" byte which must include F_bytes and F_mutable. The new cell is not initialised. *) | AllocateByteMemory (* Operations on LargeWords. These are 32/64 bit values that are "boxed". *) | LargeWordComparison of testConditions | LargeWordArith of arithmeticOperations | LargeWordLogical of logicalOperations | LargeWordShift of shiftOperations | RealComparison of testConditions * precision | RealArith of arithmeticOperations * precision (* Equality of values which could be pointers or tagged values. At the lowest level this is the same as WordComparison but if we try to use an indexed case there must be a check that the values are tagged. *) | PointerEq | FreeCStack (* Free space on the C stack. *) and nullaryOps = (* Get the current thread id *) GetCurrentThreadId - (* Check whether the last RTS call set the exception status and raise it if it had. *) - | CheckRTSException + | CPUPause (* Pause a CPU while waiting for a spinlock. *) (* Allocate memory for a mutex *) | CreateMutex val unaryRepr: unaryOps -> string and binaryRepr: binaryOps -> string and testRepr: testConditions -> string and arithRepr: arithmeticOperations -> string and nullaryRepr: nullaryOps -> string end; diff --git a/mlsource/MLCompiler/CODETREE.sig b/mlsource/MLCompiler/CODETREE.sig index 5fc2a4c5..7e0ef890 100644 --- a/mlsource/MLCompiler/CODETREE.sig +++ b/mlsource/MLCompiler/CODETREE.sig @@ -1,164 +1,164 @@ (* - Copyright (c) 2012,13,15-21 David C.J. Matthews + Copyright (c) 2012,13,15-22 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature CODETREE = sig type machineWord type codetree type pretty type codeBinding type level datatype argumentType = GeneralType | DoubleFloatType | SingleFloatType | ContainerType of int and loadStoreKind = LoadStoreMLWord of {isImmutable: bool} | LoadStoreMLByte of {isImmutable: bool} | LoadStoreC8 | LoadStoreC16 | LoadStoreC32 | LoadStoreC64 | LoadStoreCFloat | LoadStoreCDouble | LoadStoreUntaggedUnsigned and blockOpKind = BlockOpMove of {isByteMove: bool} | BlockOpEqualByte | BlockOpCompareByte structure BuiltIns: BUILTINS datatype arbPrecisionOps = ArbCompare of BuiltIns.testConditions | ArbArith of BuiltIns.arithmeticOperations val CodeTrue: codetree (* code for "true" *) val CodeFalse: codetree (* code for "false" *) val CodeZero: codetree (* code for 0, nil etc. *) val mkFunction: { body: codetree, argTypes:argumentType list, resultType: argumentType, name: string, closure: codetree list, numLocals: int } -> codetree val mkInlineFunction: { body: codetree, argTypes:argumentType list, resultType: argumentType, name: string, closure: codetree list, numLocals: int } -> codetree val mkCall: codetree * (codetree * argumentType) list * argumentType -> codetree val mkLoadLocal: int -> codetree and mkLoadArgument: int -> codetree and mkLoadClosure: int -> codetree val mkConst: machineWord -> codetree val mkInd: int * codetree -> codetree val mkVarField: int * codetree -> codetree val mkProc: codetree * int * string * codetree list * int -> codetree val mkInlproc: codetree * int * string * codetree list * int -> codetree val mkMacroProc: codetree * int * string * codetree list * int -> codetree val mkIf: codetree * codetree * codetree -> codetree val mkWhile: codetree * codetree -> codetree val mkEnv: codeBinding list * codetree -> codetree val mkStr: string -> codetree val mkTuple: codetree list -> codetree val mkDatatype: codetree list -> codetree val mkRaise: codetree -> codetree val mkCor: codetree * codetree -> codetree val mkCand: codetree * codetree -> codetree val mkHandle: codetree * codetree * int -> codetree val mkEval: codetree * codetree list -> codetree val identityFunction: string -> codetree val mkSetContainer: codetree * codetree * int -> codetree val mkTupleFromContainer: int * int -> codetree val mkTagTest: codetree * word * word -> codetree val mkBeginLoop: codetree * (int * codetree) list -> codetree val mkLoop: codetree list -> codetree val mkDec: int * codetree -> codeBinding val mkMutualDecs: (int * codetree) list -> codeBinding val mkNullDec: codetree -> codeBinding val mkContainer: int * int * codetree -> codeBinding val mkNot: codetree -> codetree val mkIsShort: codetree -> codetree val mkEqualTaggedWord: codetree * codetree -> codetree val mkEqualPointerOrWord: codetree * codetree -> codetree val equalTaggedWordFn: codetree val equalPointerOrWordFn: codetree val decSequenceWithFinalExp: codeBinding list -> codetree val pretty: codetree -> pretty val evalue: codetree -> machineWord option val genCode: codetree * Universal.universal list * int -> (unit -> codetree) (* Helper functions to build closure. *) val mkLoad: int * level * level -> codetree and mkLoadParam: int * level * level -> codetree val baseLevel: level val newLevel: level -> level val getClosure: level -> codetree list val multipleUses: codetree * (unit -> int) * level -> {load: level -> codetree, dec: codeBinding list} val mkUnary: BuiltIns.unaryOps * codetree -> codetree and mkBinary: BuiltIns.binaryOps * codetree * codetree -> codetree val mkUnaryFn: BuiltIns.unaryOps -> codetree and mkBinaryFn: BuiltIns.binaryOps -> codetree and mkArbitraryFn: arbPrecisionOps -> codetree val getCurrentThreadId: codetree and getCurrentThreadIdFn: codetree - and checkRTSException: codetree + and cpuPauseFn: codetree and createMutexFn: codetree val mkAllocateWordMemory: codetree * codetree * codetree -> codetree and mkAllocateWordMemoryFn: codetree (* Load and store operations. At this level the first operand is the base address and the second is an index. *) val mkLoadOperation: loadStoreKind * codetree * codetree -> codetree val mkLoadOperationFn: loadStoreKind -> codetree val mkStoreOperation: loadStoreKind * codetree * codetree * codetree -> codetree val mkStoreOperationFn: loadStoreKind -> codetree val mkBlockOperation: {kind:blockOpKind, leftBase: codetree, rightBase: codetree, leftIndex: codetree, rightIndex: codetree, length: codetree} -> codetree val mkBlockOperationFn: blockOpKind -> codetree structure Foreign: FOREIGNCALL structure Sharing: sig type machineWord = machineWord type codetree = codetree type pretty = pretty type argumentType=argumentType type codeBinding = codeBinding type level = level end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig index b0d53275..220fe16c 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig @@ -1,441 +1,444 @@ (* Signature for the high-level ARM64 code Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) signature ARM64ICODE = sig type machineWord = Address.machineWord type address = Address.address type closureRef (* Registers. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word (* It is simpler to use a single type for all registers. *) datatype reg = GenReg of xReg | FPReg of vReg val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg val is32in64: bool and isBigEndian: bool (* Condition for conditional branches etc. *) datatype condition = CondEqual (* Z=1 *) | CondNotEqual (* Z=0 *) | CondCarrySet (* C=1 *) | CondCarryClear (* C=0 *) | CondNegative (* N=1 *) | CondPositive (* N=0 imcludes zero *) | CondOverflow (* V=1 *) | CondNoOverflow (* V=0 *) | CondUnsignedHigher (* C=1 && Z=0 *) | CondUnsignedLowOrEq (* ! (C=1 && Z=0) *) | CondSignedGreaterEq (* N=V *) | CondSignedLess (* N<>V *) | CondSignedGreater (* Z==0 && N=V *) | CondSignedLessEq (* !(Z==0 && N=V) *) (* The shift used in arithemtic operations. *) and shiftType = ShiftLSL of Word8.word | ShiftLSR of Word8.word | ShiftASR of Word8.word | ShiftNone datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and callKind = Recursive | ConstantCode of machineWord | FullCall and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The results are stored in the result registers, usually just X0. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, stackAdjust: int, currStackSize: int } (* Return from the function. resultRegs are the registers containing the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of { results: ('genReg * xReg) list, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } + (* Yield control during a spin-lock. *) + | CPUYield + (* Debugging - fault if values don't match. *) | CacheCheck of { arg1: 'genReg, arg2: 'genReg } (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } (* Return the successor blocks from a control flow. *) val successorBlocks: controlFlow -> int list type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock val printICodeAbstract: basicBlockAbstract vector * (string -> unit) -> unit and printICodeConcrete: basicBlockConcrete vector * (string -> unit) -> unit (* Check whether this value is acceptable for LogicalImmediate. *) val isEncodableBitPattern: Word64.word * opSize -> bool (* This generates a BitField instruction with the appropriate values for immr and imms. *) val shiftConstant: { direction: shiftDirection, dest: preg, source: preg, shift: word, opSize: opSize } -> iCodeAbstract structure Sharing: sig type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig index 5992e264..0b643099 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig @@ -1,260 +1,261 @@ (* Copyright (c) 2021-2 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *) signature ARM64PREASSEMBLY = sig type closureRef type machineWord = Address.machineWord (* XZero and XSP are both encoded as 31 but the interpretation depends on the instruction The datatype definition is included here to allow for pattern matching on XSP and XZero. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val X_MLHeapLimit: xReg (* ML Heap limit pointer *) and X_MLAssemblyInt: xReg (* ML assembly interface pointer. *) and X_MLHeapAllocPtr: xReg (* ML Heap allocation pointer. *) and X_MLStackPtr: xReg (* ML Stack pointer. *) and X_LinkReg: xReg (* Link reg - return address *) and X_Base32in64: xReg (* X24 is used for the heap base in 32-in-64. *) val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg (* Condition for conditional branches etc. *) datatype condition = CondEqual (* Z=1 *) | CondNotEqual (* Z=0 *) | CondCarrySet (* C=1 *) | CondCarryClear (* C=0 *) | CondNegative (* N=1 *) | CondPositive (* N=0 imcludes zero *) | CondOverflow (* V=1 *) | CondNoOverflow (* V=0 *) | CondUnsignedHigher (* C=1 && Z=0 *) | CondUnsignedLowOrEq (* ! (C=1 && Z=0) *) | CondSignedGreaterEq (* N=V *) | CondSignedLess (* N<>V *) | CondSignedGreater (* Z==0 && N=V *) | CondSignedLessEq (* !(Z==0 && N=V) *) val invertTest: condition -> condition (* i.e. jump when the condition is not true. *) val condToString: condition -> string datatype shiftType = ShiftLSL of Word8.word | ShiftLSR of Word8.word | ShiftASR of Word8.word | ShiftNone datatype wordSize = WordSize32 | WordSize64 datatype 'a extend = ExtUXTB of 'a (* Unsigned extend byte *) | ExtUXTH of 'a (* Unsigned extend byte *) | ExtUXTW of 'a (* Unsigned extend byte *) | ExtUXTX of 'a (* Left shift *) | ExtSXTB of 'a (* Sign extend byte *) | ExtSXTH of 'a (* Sign extend halfword *) | ExtSXTW of 'a (* Sign extend word *) | ExtSXTX of 'a (* Left shift *) (* Load/store instructions have only a single bit for the shift. For byte operations this is one bit shift; for others it scales by the size of the operand if set. *) datatype scale = ScaleOrShift | NoScale datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP and unscaledType = NoUpdate | PreIndex | PostIndex and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate and bitfieldKind = BFUnsigned | BFSigned | BFInsert and brRegType = BRRBranch | BRRAndLink | BRRReturn type label and labelMaker val createLabelMaker: unit -> labelMaker and createLabel: labelMaker -> label datatype precode = (* Basic instructions *) AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind} | DivideRegs of {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize} | LogicalShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool} | LoadRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | LoadFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | StoreRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | StoreFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | LoadRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} | StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} (* LoadAcquire and StoreRelease are used for mutables. *) | LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType} | StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType} (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *) | LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg} | StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg} | MemBarrier | LoadRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | ConditionalSet of {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize} | BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind} | ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection} | BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp} (* Floating point *) | MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize} | MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize} | CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize} | CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize} | FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary} | FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize} | FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary} (* Branches and Labels. *) | SetLabel of label | ConditionalBranch of condition * label | UnconditionalBranch of label | BranchAndLink of label | BranchReg of {regD: xReg, brRegType: brRegType } | LoadLabelAddress of xReg * label | TestBitBranch of { test: xReg, bit: Word8.word, label: label, onZero: bool } | CompareBranch of { test: xReg, label: label, onZero: bool, opSize: opSize } (* Composite instructions *) | MoveXRegToXReg of {sReg: xReg, dReg: xReg} | LoadNonAddr of xReg * Word64.word | LoadAddr of xReg * machineWord | RTSTrap of { rtsEntry: int, work: xReg, save: xReg list } (* Allocate memory - bytes includes the length word and rounding. *) | AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg } (* Allocate memory - sizeReg is number of ML words needed for cell. *) | AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg } (* Branch table for indexed case. startLabel is the address of the first label in the list. The branch table is a sequence of unconditional branches. *) | BranchTable of { startLabel: label, brTable: label list } | LoadGlobalHeapBaseInCallback of xReg + | Yield (* Wrapper for BitField *) val shiftConstant: { direction: shiftDirection, regD: xReg, regN: xReg, shift: word, opSize: opSize } -> precode (* Convenient sequences. N.B. These are in reverse order. *) val boxDouble: {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list and boxFloat: {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list and boxSysWord: {source: xReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list (* Create the vector of code from the list of instructions and update the closure reference to point to it. *) val generateFinalCode: {instrs: precode list, name: string, parameters: Universal.universal list, resultClosure: closureRef, profileObject: machineWord, labelMaker: labelMaker} -> unit (* Offsets in the assembly code interface pointed at by X26 These are in units of 64-bits NOT bytes. *) val heapOverflowCallOffset: int and stackOverflowCallOffset: int and stackOverflowXCallOffset: int and exceptionHandlerOffset: int and stackLimitOffset: int and threadIdOffset: int and heapLimitPtrOffset: int and heapAllocPtrOffset: int and mlStackPtrOffset: int and exceptionPacketOffset: int val is32in64: bool and isBigEndian: bool val isEncodableBitPattern: Word64.word * wordSize -> bool structure Sharing: sig type closureRef = closureRef type loadType = loadType type opSize = opSize type logicalOp = logicalOp type floatSize = floatSize type shiftDirection = shiftDirection type multKind = multKind type fpUnary = fpUnary type fpBinary = fpBinary type unscaledType = unscaledType type condSet = condSet type bitfieldKind = bitfieldKind type brRegType = brRegType type precode = precode type xReg = xReg type vReg = vReg type label = label type labelMaker = labelMaker type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML index 1ada6681..ba45ae29 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML @@ -1,1297 +1,1299 @@ (* Copyright David C. J. Matthews 2016-22 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64AllocateRegisters( structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ALLOCATEREGISTERS = struct open Arm64ICode open Identify open IntSet exception InternalError = Misc.InternalError val checkCache = false (* Use the cache *) datatype allocateResult = AllocateSuccess of basicBlockConcrete vector | AllocateFailure of intSet list (* General registers. X24 is used as the global heap base in 32-in-64. X30 is the return address set by blr but is otherwise a general register. Put the argument registers at the end of the list so they'll only be used when hinted. *) val generalRegisters = map GenReg ([X9, X10, X11, X12, X13, X14, X15, X19, X20, X21, X22, X23, X0, X1, X2, X3, X4, X5, X6, X7, X8, X30] @ (if is32in64 then [] else [X24])) val floatingPtRegisters = map FPReg [V7, V6, V5, V4, V3, V2, V1] type conflictState = { conflicts: intSet, realConflicts: reg list } type triple = {instr: iCodeAbstract, current: intSet, active: intSet} exception InternalError = Misc.InternalError (* Get the conflict states, allocate registers and return the code with the allocated registers if it is successful. *) fun allocateRegisters{blocks, regProps, maxPRegs, ...} = let (* Other registers that conflict with this i.e. cannot share the same real register. *) val regConflicts = Array.array(maxPRegs, emptySet) (* Real registers that cannot be used for this because they are needed for an instruction. Only X30 in calls and RTS traps. *) and regRealConflicts = Array.array(maxPRegs, []: reg list) fun addConflictsTo(addTo, conflicts) = List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo (* To reserve a register we need to add the real register to the real conflict sets of all the abstract conflicts. *) local fun isInset reg set = List.exists (fn r => r = reg) set in fun reserveRegister(reserveFor, reg) = let val absConflicts = Array.sub(regConflicts, reserveFor) fun addConflict i = if i = reserveFor then () else addRealConflict (i, reg) in List.app addConflict (setToList absConflicts) end and addRealConflict (i, reg) = let val currentConflicts = Array.sub(regRealConflicts, i) in if isInset reg currentConflicts then () else Array.update(regRealConflicts, i, reg :: currentConflicts) end end fun conflictsForInstr passThrough {instr, current, ...} = let val {sources, dests} = getInstructionRegisters instr fun regNo(PReg i) = i val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos val afterRemoveDests = minus(current, destSet) local (* In almost all circumstances the destination and sources don't conflict and the same register can be used as a destination and a source. BoxLarge can only store the value after the memory has been allocated. BitFieldInsert has to copy the "destAsSource" value into the destination so cannot use the same register for the "source". *) val postInstruction = case instr of BoxLarge _ => destRegNos @ sourceRegNos | BoxTagFloat _ => destRegNos @ sourceRegNos (* Not sure about this. *) | BitFieldInsert{source, ...} => regNo source :: destRegNos | _ => destRegNos in (* If there is more than one destination they conflict with each other. *) val () = addConflictsTo(postInstruction, listToSet postInstruction); (* Mark conflicts for the destinations, i.e. after the instruction. The destinations conflict with the registers that are used subsequently. *) val () = addConflictsTo(postInstruction, current); val () = addConflictsTo(postInstruction, passThrough); (* Mark conflicts for the sources i.e. before the instruction. *) (* Sources must be set up as conflicts with each other i.e. when we come to allocate registers we must choose different real registers for different abstract registers. *) val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos) val () = addConflictsTo(sourceRegNos, afterRemoveDests); val () = addConflictsTo(sourceRegNos, passThrough) end (* I'm not sure if this is needed. There was a check in the old code to ensure that different registers were used for loop variables even if they were actually unused. This may happen anyway. Comment and code copied from X86 version. Retain it for the moment. *) val () = case instr of JumpLoop{regArgs, ...} => let val destRegs = List.foldl(fn ({dst=PReg loopReg, ...}, dests) => loopReg :: dests) [] regArgs in addConflictsTo(destRegs, listToSet destRegs); addConflictsTo(destRegs, current); addConflictsTo(destRegs, passThrough) end | _ => () (* Certain instructions are specific as to the real registers. *) val () = case instr of ReturnResultFromFunction{ returnReg=PReg retReg, ... } => (* We're going to put the return value in X0 so we can't use that for the return address. *) addRealConflict(retReg, GenReg X0) | RaiseExceptionPacket{ packetReg } => (* This wasn't needed previously because we always pushed the registers across an exception. *) reserveRegister(regNo packetReg, GenReg X0) | BeginHandler { packetReg, ...} => reserveRegister(regNo packetReg, GenReg X0) | FunctionCall { dests, regArgs, ...} => (* This is only needed if we are saving the registers rather than marking them as "must push". *) let val () = List.app(fn (PReg pr, r) => reserveRegister(pr, GenReg r)) dests (* The argument registers also conflict. In order to execute this call we need to load the arguments into specific registers so we can't use them for values that we want after the call. *) val toReserve = X30 :: List.map #2 regArgs in List.app(fn i => List.app(fn r => addRealConflict(i, GenReg r)) toReserve) (setToList passThrough @ setToList afterRemoveDests) end (* We can't use X30 as the result because it's needed for the return addr if we have to GC. *) | AllocateMemoryFixed{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | AllocateMemoryVariable{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | BoxLarge{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) | BoxTagFloat{dest=PReg destReg, ...} => addRealConflict(destReg, GenReg X30) (* Could exclude floats on native addr. *) | _ => () in () end (* Process the block. *) fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) = let (* We need to establish conflicts between all the registers active at the end of the block since they may not be established elsewhere. This isn't necessary for an unconditional branch since the same registers will be included in the block that is the target of the branch, possibly along with others. However if this is a conditional or indexed branch we may have different sets at each of the targets and we have to ensure that all the registers differ. *) val united = union(exports, passThrough) val () = addConflictsTo(setToList united, united) val () = List.app (conflictsForInstr passThrough) block in () end val () = Vector.app conflictsForBlock blocks (* Hint values. The idea of hints is that by using a hinted register we may avoid an unnecessary move instruction. realHints is set when a pseudo-register is going to be loaded from a specific register e.g. a register argument, or moved into one e.g. X0 for the function result. friends is set to the other pReg that may be associated with the pReg. Typically this is where we have a merge register that we move some value into. *) val realHints = Array.array(maxPRegs, NONE: reg option) (* Sources and destinations. These indicate the registers that are the sources and destinations of the indexing register and are used as hints. If a register has been allocated for a source or destination we may be able to reuse it. *) val sourceRegs = Array.array(maxPRegs, []: int list) and destinationRegs = Array.array(maxPRegs, []: int list) local (* Real hints. If this is the source of a value e.g. a function argument in a register, we'll use it directly. If, though, this is the result of a function and we want the result to end up in a specific register we want to propagate it to any pReg that moves its value into this. *) fun addRealHint(r, reg) = case Array.sub(realHints, r) of SOME _ => () | NONE => ( (* Add to this pReg *) Array.update(realHints, r, SOME reg); (* and to any other pReg that moves here. *) List.app(fn r => addRealHint(r, reg)) (Array.sub(sourceRegs, r)) ) fun addSourceAndDestinationHint{src, dst} = let val conflicts = Array.sub(regConflicts, src) in (* If they conflict we can't add them. *) if member(dst, conflicts) then () else let val currentDests = Array.sub(destinationRegs, src) val currentSources = Array.sub(sourceRegs, dst) in (* Add the destination for this source i.e. the registers we move this source into. *) if List.exists(fn i => i=dst) currentDests then () else Array.update(destinationRegs, src, dst :: currentDests); (* Add the source to the list of sources for this destination. A merge register may have several sources, a different one for each path. If the destination has a real hint we want to propagate that back. That isn't needed for the destinations because we allocate the registers from the start forward. *) if List.exists(fn i => i=src) currentSources then () else let val sources = src :: currentSources val () = Array.update(sourceRegs, dst, sources) in case Array.sub(realHints, dst) of NONE => () | SOME real => List.app(fn r => addRealHint(r, real)) sources end end end (* Add the hints to steer the register allocation. The idea is to avoid moves between registers by getting values into the appropriate register in advance. We don't actually need to add real hints where the real register is providing the value, e.g. BeginFunction, because the allocation process will take care of that. *) fun addHints{instr=MoveRegister{source=PReg sreg, dest=PReg dreg, ...}, ...} = addSourceAndDestinationHint {src=sreg, dst=dreg} | addHints{instr=BitFieldInsert{destAsSource=PReg dsReg, dest=PReg dReg, ...}, ...} = (* The "destAsSource" is the destination if some bits are retained. *) addSourceAndDestinationHint {src=dsReg, dst=dReg} | addHints{instr=ReturnResultFromFunction { results, ... }, ...} = List.app(fn(PReg pr, r) => addRealHint(pr, GenReg r)) results | addHints{instr=JumpLoop{regArgs, ...}, ...} = let fun addRegArg {src=ArgInReg(PReg argReg), dst=PReg resReg} = addSourceAndDestinationHint {dst=resReg, src=argReg} | addRegArg {src=ArgOnStack _, ...} = () in List.app addRegArg regArgs end | addHints{instr=BeginFunction{regArgs, ...}, ...} = List.app (fn (PReg pr, reg) => addRealHint(pr, GenReg reg)) regArgs | addHints{instr=TailRecursiveCall{regArgs, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app setHint regArgs end | addHints{instr=FunctionCall{regArgs, dests, ...}, ...} = let fun setHint(ArgInReg(PReg pr), reg) = addRealHint(pr, GenReg reg) | setHint _ = () in List.app(fn(PReg pr, r) => addRealHint(pr, GenReg r)) dests; List.app setHint regArgs end (* Exception packets are in X0 *) | addHints{instr=RaiseExceptionPacket{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints{instr=BeginHandler{ packetReg=PReg preg }, ...} = addRealHint(preg, GenReg X0) | addHints _ = () in val () = Vector.app(fn ExtendedBasicBlock { block, ...} => List.app addHints block) blocks end val allocatedRegs = Array.array(maxPRegs, NONE: reg option) val failures = ref []: intSet list ref (* Find a real register for a preg. 1. If a register is already allocated use that. 2. Try the "preferred" register if one has been given. 3. Try the realHints value if there is one. 4. See if there is a "friend" that has an appropriate register 5. Look at all the registers and find one. *) fun findRegister(r, pref, regSet, cache) = case Array.sub(allocatedRegs, r) of SOME reg => reg | NONE => let val conflicts = Array.sub(regConflicts, r) and realConflicts = Array.sub(regRealConflicts, r) (* Find the registers we've already allocated that may conflict. *) val conflictingRegs = List.mapPartial(fn i => Array.sub(allocatedRegs, i)) (setToList conflicts) @ realConflicts fun isFree aReg = not (List.exists(fn i => i=aReg) conflictingRegs) fun tryAReg NONE = NONE | tryAReg (somePref as SOME prefReg) = if isFree prefReg then (Array.update(allocatedRegs, r, somePref); somePref) else NONE (* Search the sources and destinations to see if a register has already been allocated or there is a hint. *) fun findAFriend([], [], _) = NONE | findAFriend(aDest :: otherDests, sources, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aDest) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aDest)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the destinations of the destinations to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(destinationRegs, aDest)) in findAFriend(otherDests @ fOfF, sources, aDest :: alreadySeen) end end | findAFriend([], aSrc :: otherSrcs, alreadySeen) = let val possReg = case Array.sub(allocatedRegs, aSrc) of v as SOME _ => tryAReg v | NONE => tryAReg(Array.sub(realHints, aSrc)) in case possReg of reg as SOME _ => reg | NONE => let (* Add the sources of the sources to the list if they don't conflict and haven't been seen. *) fun newFriend f = not(List.exists (fn n => n=f) alreadySeen) andalso not(member(f, conflicts)) val fOfF = List.filter newFriend (Array.sub(sourceRegs, aSrc)) in findAFriend([], otherSrcs @ fOfF, aSrc :: alreadySeen) end end in case tryAReg pref of SOME r => r | NONE => ( case tryAReg (Array.sub(realHints, r)) of SOME r => r | NONE => ( case findAFriend(Array.sub(destinationRegs, r), Array.sub(sourceRegs, r), []) of SOME r => r (* Look through the registers to find one that's free. First try excluding the cache registers. *) | NONE => let (* First try filtering all the cache registers to see if we can find a register. If not see if it works by *) fun filterCache(filteredRegset, []) = List.find isFree filteredRegset | filterCache(filteredRegset, (cReg, _) :: cache) = ( case filterCache(List.filter(fn r => r <> cReg) filteredRegset, cache) of NONE => if isFree cReg then SOME cReg else NONE | result => result ) val pick = case filterCache(regSet, cache) of SOME reg => reg | NONE => ( (* This failed. We're going to have to spill something. *) failures := conflicts :: ! failures; hd regSet (* Return something to allow this pass to complete *) ) val () = Array.update(allocatedRegs, r, SOME pick) in pick end ) ) end (* Turn the abstract icode into a concrete version by allocating the registers. *) local fun asGenReg(GenReg reg) = reg | asGenReg _ = raise InternalError "asGenReg" and asFPReg(FPReg reg) = reg | asFPReg _ = raise InternalError "asFPReg" datatype cacheItem = CacheStack of stackLocn (* A value loaded from the stack. *) | CacheAbsAddress of preg (* 32-in-64: An absolute address from an object ID *) | CacheAbsAddrOnStack of stackLocn (* 32-in-64: An absolute address from an object loaded from the stack. *) (* Cache hints: Try to use the same register for values that can be cached. This increases the chances that we will be able to retain the cache when we merge different branches. *) val cacheHints = Array.array(maxPRegs, NONE: reg option) (* Remove any reference to newly allocated registers from the cache. Also used after block move and comparison that modify registers *) fun pruneCache(reg: reg, cache) = List.filter(fn (r, _) => r <> reg) cache (* Return the cache registers that contain valid addresses. *) fun cachedAddressRegs cache = List.map (asGenReg o #1) cache (* Merge the cache states *) fun mergeCacheStates ([]: (reg * cacheItem) list list) = []: (reg * cacheItem) list | mergeCacheStates [single] = single | mergeCacheStates (many as first :: rest) = let (* Generally we will either be unable to merge and have an empty cache or will have just one or two entries. *) (* Find the shortest. If it's empty we're done. *) fun findShortest(_, [], _) = [] | findShortest(_, shortest, []) = shortest | findShortest(len, shortest, hd::tl) = let val hdLen = List.length hd in if hdLen < len then findShortest(hdLen, hd, tl) else findShortest(len, shortest, tl) end val shortest = findShortest(List.length first, first, rest) (* Find the item we're caching for. If it is in a different register we can't use it. *) fun findItem search (hd::tl) = search = hd orelse findItem search tl | findItem _ [] = false (* It's present if it's in all the sources. *) fun present search = List.all(findItem search) many val filtered = List.foldl (fn (search, l) => if present search then search :: l else l) [] shortest in filtered end fun allocateNewDestination(PReg r, pref, regSet, cacheList) = case Array.sub(allocatedRegs, r) of SOME reg => ( case Vector.sub(regProps, r) of RegPropMultiple => (reg, pruneCache(reg, cacheList)) (* This is allowed for merge registers *) | _ => raise InternalError "Register defined at multiple points" ) | NONE => let val reg = findRegister(r, pref, regSet, cacheList) in (reg, pruneCache(reg, cacheList)) end fun allocateGenReg(r, hint, cache) = let val (reg, newCache) = allocateNewDestination(r, hint, generalRegisters, cache) in (asGenReg reg, newCache) end and allocateFPReg(r, hint, cache) = let val (reg, newCache) = allocateNewDestination(r, hint, floatingPtRegisters, cache) in (asFPReg reg, newCache) end and allocateGenRegOrZero(ZeroReg, _, cache) = (XZero, cache) | allocateGenRegOrZero(SomeReg reg, hint, cache) = allocateGenReg(reg, hint, cache) fun getAllocatedGenReg(PReg r) = case Array.sub(allocatedRegs, r) of SOME(GenReg reg) => reg | _ => raise InternalError "getAllocatedGenReg" and getAllocatedFPReg(PReg r) = case Array.sub(allocatedRegs, r) of SOME(FPReg reg) => reg | _ => raise InternalError "getAllocatedFPReg" fun getAllocatedGenRegOrZero ZeroReg = XZero | getAllocatedGenRegOrZero (SomeReg reg) = getAllocatedGenReg reg fun getAllocatedArg(ArgInReg reg) = ArgInReg(getAllocatedGenReg reg) | getAllocatedArg(ArgOnStack stackLoc) = ArgOnStack stackLoc val getSaveRegs = List.map getAllocatedGenReg (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl (*and snd <@> fst = fst @ snd*) fun absToConcrete([], context, code) = (context, code) | absToConcrete({instr=MoveRegister{ source, dest}, ...} :: rest, cache, code) = let (* Try to use the register we've allocated for the source as the destination so that we can eliminate this instruction altogether. *) val sourceReg = getAllocatedGenReg source val (destReg, newCache) = allocateGenReg(dest, SOME(GenReg sourceReg), cache) in if sourceReg = destReg then absToConcrete(rest, newCache, code) else absToConcrete(rest, newCache, code <::> MoveRegister { source=sourceReg, dest=destReg}) end | absToConcrete({instr=LoadNonAddressConstant { dest, source}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadNonAddressConstant { dest=destReg, source=source}) end | absToConcrete({instr=LoadAddressConstant { dest, source}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadAddressConstant { dest=destReg, source=source}) end | absToConcrete({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, loadType=loadType}) end | absToConcrete({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadFPWithConstantOffset { base=getAllocatedGenReg base, dest=destReg, byteOffset=byteOffset, floatSize=floatSize}) end | absToConcrete({instr=LoadWithIndexedOffset { base, dest, index, loadType, signExtendIndex}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index, loadType=loadType, signExtendIndex=signExtendIndex}) end | absToConcrete({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize, signExtendIndex}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadFPWithIndexedOffset { base=getAllocatedGenReg base, dest=destReg, index=getAllocatedGenReg index, floatSize=floatSize, signExtendIndex=signExtendIndex}) end | absToConcrete({instr=GetThreadId { dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> GetThreadId { dest=destReg}) end | absToConcrete({instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest=destOiA as PReg doia}, kill, ...} :: rest, cache, code) = let (* See if this is in the cache and use it if it is. If this is the last reference to this source entry we don't want it in the cache any longer. *) val killThis = member(srcNo, kill) val (newCode, destReg, newCache, next) = case List.find(fn (_, CacheAbsAddress c) => c=source | _ => false) cache of SOME (srcReg, _) => let (* Try to use the cache register as the destination if we can. *) val (destReg, newCache) = allocateNewDestination(destOiA, SOME srcReg, generalRegisters, cache) val dReg = asGenReg destReg and sReg = asGenReg srcReg in if checkCache then (code <::> MoveRegister{source=sReg, dest=X17} <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=dReg} <::> CacheCheck{ arg1=dReg, arg2=X17 }, destReg, if killThis then pruneCache(srcReg, newCache) else newCache, rest) else if dReg = sReg then (code, destReg, newCache, rest) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg, if killThis then pruneCache(srcReg, newCache) else newCache, rest) end | NONE => (* If this is the last reference and the next instruction is loading with a zero offset we can use an indexed load and avoid converting to an absolute address. If this is not the last reference it's likely that we're loading another field so it's probably better to convert the object index and cache it. We might manage to use a load-pair instruction. *) ( case (killThis, rest) of (true, {instr=LoadWithConstantOffset{ byteOffset=0, loadType=Load32, base, dest=destLoad, ... }, kill=killLoad, ...} :: next) => if base = destOiA (* of objectindex *) andalso member(doia, killLoad) then let val (destReg, newCache) = allocateGenReg(destLoad, NONE, cache) in (code <::> LoadWithIndexedOffset{ base=X24(*X_Base32in64*), dest=destReg, index=getAllocatedGenReg source, loadType=Load32, signExtendIndex=false }, GenReg destReg, newCache, next) end else let val (destReg, newCache) = allocateGenReg(destOiA, Array.sub(cacheHints, srcNo), cache) in (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, GenReg destReg, newCache, rest) end | _ => let val (destReg, newCache) = allocateGenReg(destOiA, Array.sub(cacheHints, srcNo), cache) in (code <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, GenReg destReg, newCache, rest) end ) val () = if killThis then () else Array.update(cacheHints, srcNo, SOME destReg) in absToConcrete(next, if killThis then newCache else (destReg, CacheAbsAddress source) :: newCache, newCode) end | absToConcrete({instr=AbsoluteToObjectIndex { source, dest}, ...} :: rest, cache, code) = let (* Don't make an entry in the cache for this; it won't be used. *) val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AbsoluteToObjectIndex { source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=AllocateMemoryFixed { bytesRequired, dest, saveRegs }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> AllocateMemoryFixed { dest=destReg, bytesRequired=bytesRequired, saveRegs=saved}) end | absToConcrete({instr=AllocateMemoryVariable{size, dest, saveRegs}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> AllocateMemoryVariable{size=getAllocatedGenReg size, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=InitialiseMem{size, addr, init}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> InitialiseMem{size=getAllocatedGenReg size, addr=getAllocatedGenReg addr, init=getAllocatedGenReg init}) | absToConcrete({instr=BeginLoop, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> BeginLoop) | absToConcrete({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...} :: rest, context, code) = let fun getStackArg{src, wordOffset, stackloc} = {src=getAllocatedArg src, wordOffset=wordOffset, stackloc=stackloc} and getRegArg{src, dst} = {src=getAllocatedArg src, dst=getAllocatedGenReg dst} in absToConcrete(rest, context, code <::> JumpLoop{ regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, checkInterrupt=Option.map getSaveRegs checkInterrupt}) end | absToConcrete({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, byteOffset=byteOffset, loadType=loadType}) | absToConcrete({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreFPWithConstantOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, byteOffset=byteOffset, floatSize=floatSize}) | absToConcrete({instr=StoreWithIndexedOffset { base, source, index, loadType, signExtendIndex}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, index=getAllocatedGenReg index, loadType=loadType, signExtendIndex=signExtendIndex}) | absToConcrete({instr=StoreFPWithIndexedOffset { base, source, index, floatSize, signExtendIndex}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreFPWithIndexedOffset{ base=getAllocatedGenReg base, source=getAllocatedFPReg source, index=getAllocatedGenReg index, floatSize=floatSize, signExtendIndex=signExtendIndex}) | absToConcrete({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AddSubImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length}) end | absToConcrete({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AddSubRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=destReg, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift}) end | absToConcrete({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LogicalImmediate { source=getAllocatedGenReg source, dest=destReg, ccRef=ccRef, immed=immed, logOp=logOp, length=length}) end | absToConcrete({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LogicalRegister { base=getAllocatedGenReg base, shifted=getAllocatedGenReg shifted, dest=destReg, ccRef=ccRef, logOp=logOp, length=length, shift=shift}) end | absToConcrete({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ShiftRegister { source=getAllocatedGenReg source, shift=getAllocatedGenReg shift, dest=destReg, direction=direction, opSize=opSize}) end | absToConcrete({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> Multiplication { kind=kind, sourceA=getAllocatedGenRegOrZero sourceA, sourceM=getAllocatedGenReg sourceM, sourceN=getAllocatedGenReg sourceN, dest=destReg}) end | absToConcrete({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> Division { isSigned=isSigned, dividend=getAllocatedGenReg dividend, divisor=getAllocatedGenReg divisor, dest=destReg, opSize=opSize}) end | absToConcrete({instr=BeginFunction {regArgs, stackArgs}, ...} :: rest, _, code) = let (* Allocate the register arguments. At this point all the registers are free and the cache is empty. However we may have a "real conflict" that means that the allocated register is different. e.g. we need this argument some time after an arbitrary precision operation that may call a function. *) fun allocReg(src, dst) = let val (destReg, _) = allocateNewDestination(src, SOME(GenReg dst), generalRegisters, []) in (asGenReg destReg, dst) end in absToConcrete(rest, [], code <::> BeginFunction {regArgs=map allocReg regArgs, stackArgs=stackArgs}) end | absToConcrete({instr=FunctionCall{callKind, regArgs, stackArgs, dests, containers, saveRegs, ...}, ...} :: rest, _, code) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) fun getResult(preg, reg) = let (* We empty the cache at this point. *) val (newReg, _) = allocateGenReg(preg, NONE, []) in (newReg, reg) end in absToConcrete(rest, [] (* Empty after a function call. *), code <::> FunctionCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getAllocatedArg stackArgs, dests=map getResult dests, saveRegs=getSaveRegs saveRegs, containers=containers}) end | absToConcrete({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, ...} :: rest, context, code) = let fun getRegArg(src, dst) = (getAllocatedArg src, dst) and getStackArg{src, stack} = {src=getAllocatedArg src, stack=stack} in absToConcrete(rest, context, code <::> TailRecursiveCall{ callKind=callKind, regArgs=map getRegArg regArgs, stackArgs=map getStackArg stackArgs, stackAdjust=stackAdjust, currStackSize=currStackSize}) end | absToConcrete({instr=ReturnResultFromFunction{results, returnReg, numStackArgs}, ...} :: rest, context, code) = let fun getResult(preg, reg) = (getAllocatedGenReg preg, reg) in absToConcrete(rest, context, code <::> ReturnResultFromFunction{results=map getResult results, returnReg=getAllocatedGenReg returnReg, numStackArgs=numStackArgs}) end | absToConcrete({instr=RaiseExceptionPacket{packetReg}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> RaiseExceptionPacket{packetReg=getAllocatedGenReg packetReg}) | absToConcrete({instr=PushToStack{ source, container as StackLoc{size, rno}, copies }, ...} :: rest, cache, code) = let val srcReg = getAllocatedGenReg source val newCache = if size = 1 then (GenReg srcReg, CacheStack container) :: cache else cache val () = Array.update(cacheHints, rno, SOME(GenReg srcReg)) in absToConcrete(rest, newCache, code <::> PushToStack{source=srcReg, container=container, copies=copies}) end | absToConcrete({instr=LoadStack{ dest=destLoad, container as StackLoc{rno, ...} , field=0, wordOffset}, kill, ...} :: (restPlusOia as {instr=ObjectIndexAddressToAbsolute { source as PReg srcNo, dest=destOia}, kill=killOia, ...} :: rest), cache, code) = (* If a preg has been pushed to the stack every subsequent reference will be via the stack. If we want to be able to cache object index to absolute addresses for them we have to recognise this combination. *) (* They could be unrelated in which case process the LoadStack and then the ObjectIndex... It seems there are also rare circumstances(??) where the result of the load is not killed and so would have to be preserved. *) if destLoad = source andalso member(srcNo, killOia) then let val killThis = member(rno, kill) (* Is it the last reference to the stack entry? *) val (newCode, destReg, newCache) = case List.find(fn (_, CacheAbsAddrOnStack c) => c=container | _ => false) cache of SOME (srcReg, _) => let (* Try to use the cache register as the destination if we can. *) val (destReg, newCache) = allocateNewDestination(destOia, SOME srcReg, generalRegisters, cache) val dReg = asGenReg destReg and sReg = asGenReg srcReg in if checkCache then (code <::> MoveRegister{source=sReg, dest=X17} <::> LoadStack{ dest=X16, container=container, field=0, wordOffset=wordOffset } <::> ObjectIndexAddressToAbsolute { source=X16, dest=dReg} <::> CacheCheck{ arg1=dReg, arg2=X17 }, destReg, if killThis then pruneCache(srcReg, newCache) else newCache) else if dReg = sReg then (code, destReg, newCache) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg, if killThis then pruneCache(srcReg, newCache) else newCache) end | NONE => (* It's not in the cache - load it which could be cached. *) let val (cachePostLoad, loadCode) = processLoadStack(destLoad, container, wordOffset, kill, cache, code) val (destReg, cachePlusOia) = allocateGenReg(destOia, Array.sub(cacheHints, rno), cachePostLoad) in (loadCode <::> ObjectIndexAddressToAbsolute { source=getAllocatedGenReg source, dest=destReg}, GenReg destReg, cachePlusOia) end val () = if killThis then () else Array.update(cacheHints, rno, SOME destReg) in absToConcrete(rest, if killThis then newCache else (destReg, CacheAbsAddrOnStack container) :: newCache, newCode) end else (* Can't combine these. *) let val (newCache, newCode) = processLoadStack(destLoad, container, wordOffset, kill, cache, code) in absToConcrete(restPlusOia, newCache, newCode) end | absToConcrete({instr=LoadStack{ dest, container, wordOffset, field=0, ...}, kill, ...} :: rest, cache, code) = let val (newCache, newCode) = processLoadStack(dest, container, wordOffset, kill, cache, code) in absToConcrete(rest, newCache, newCode) end | absToConcrete({instr=LoadStack{ dest, container, field, wordOffset}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadStack{ dest=destReg, container=container, field=field, wordOffset=wordOffset }) end | absToConcrete({instr=StoreToStack{source, container, field, stackOffset}, ...} :: rest, cache, code) = (* We may have cached the original push that cleared the container. We could cache this since it now contains the entry but it's probably better to deal with multiple results at a higher level. *) let val sReg = getAllocatedGenReg source val newCache = List.filter(fn (_, CacheStack c) => c <> container | (_, CacheAbsAddrOnStack c) => c <> container | _ => true) cache in absToConcrete(rest, newCache, code <::> StoreToStack{source=sReg, container=container, field=field, stackOffset=stackOffset}) end | absToConcrete({instr=ContainerAddress{ dest, container, stackOffset}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ContainerAddress{ dest=destReg, container=container, stackOffset=stackOffset }) end | absToConcrete({instr=ResetStackPtr {numWords}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> ResetStackPtr {numWords=numWords}) | absToConcrete({instr=TagValue{source, dest, isSigned, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> TagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize}) end | absToConcrete({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> UntagValue{source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, opSize=opSize}) end | absToConcrete({instr=BoxLarge{source, dest, saveRegs, ...}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> BoxLarge{source=getAllocatedGenReg source, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=UnboxLarge{source, dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> UnboxLarge{source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=BoxTagFloat{floatSize, source, dest, saveRegs}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) val filteredCache = pruneCache(GenReg X30, newCache) val saved = cachedAddressRegs filteredCache @ getSaveRegs saveRegs in absToConcrete(rest, filteredCache, code <::> BoxTagFloat{floatSize=floatSize, source=getAllocatedFPReg source, dest=destReg, saveRegs=saved}) end | absToConcrete({instr=UnboxTagFloat{floatSize, source, dest}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> UnboxTagFloat{floatSize=floatSize, source=getAllocatedGenReg source, dest=destReg}) end | absToConcrete({instr=LoadAcquire { base, dest, loadType}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadAcquire { base=getAllocatedGenReg base, dest=destReg, loadType=loadType}) end | absToConcrete({instr=StoreRelease { base, source, loadType}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> StoreRelease{ base=getAllocatedGenReg base, source=getAllocatedGenReg source, loadType=loadType}) | absToConcrete({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> BitFieldShift { source=getAllocatedGenReg source, dest=destReg, isSigned=isSigned, immr=immr, imms=imms, length=length}) end | absToConcrete({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...} :: rest, cache, code) = let val destAsSourceReg = getAllocatedGenReg destAsSource val (destReg, newCache) = allocateNewDestination(dest, SOME(GenReg destAsSourceReg), generalRegisters, cache) in absToConcrete(rest, newCache, code <::> BitFieldInsert { source=getAllocatedGenReg source, destAsSource=destAsSourceReg, dest=asGenReg destReg, immr=immr, imms=imms, length=length}) end | absToConcrete({instr=IndexedCaseOperation{testReg}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> IndexedCaseOperation{testReg=getAllocatedGenReg testReg}) | absToConcrete({instr=PushExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PushExceptionHandler) | absToConcrete({instr=PopExceptionHandler, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> PopExceptionHandler) | absToConcrete({instr=BeginHandler{packetReg}, ...} :: rest, _, code) = let (* The cache is undefined at the start of a handler. *) val (destReg, newCache) = allocateGenReg(packetReg, NONE, []) in absToConcrete(rest, newCache, code <::> BeginHandler{packetReg=destReg}) end | absToConcrete({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...} :: rest, cache, code) = let (* This instruction modifies these registers so they must be removed from the cache *) val vec1Reg = getAllocatedGenReg vec1Addr and vec2Reg = getAllocatedGenReg vec2Addr and lenReg = getAllocatedGenReg length val newCache = pruneCache(GenReg vec1Reg, pruneCache(GenReg vec2Reg, pruneCache(GenReg lenReg, cache))) in absToConcrete(rest, newCache, code <::> CompareByteVectors{vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lenReg, ccRef=ccRef}) end | absToConcrete({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...} :: rest, cache, code) = let (* This instruction modifies these registers so they must be removed from the cache *) val srcAReg = getAllocatedGenReg srcAddr and dstAReg = getAllocatedGenReg destAddr and lenReg = getAllocatedGenReg length val newCache = pruneCache(GenReg srcAReg, pruneCache(GenReg dstAReg, pruneCache(GenReg lenReg, cache))) in absToConcrete(rest, newCache, code <::> BlockMove{srcAddr=srcAReg, destAddr=dstAReg, length=lenReg, isByteMove=isByteMove}) end | absToConcrete({instr=AddSubXSP{source, dest, isAdd}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenRegOrZero(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> AddSubXSP { source=getAllocatedGenReg source, dest=destReg, isAdd=isAdd}) end | absToConcrete({instr=TouchValue{source}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> TouchValue { source=getAllocatedGenReg source}) | absToConcrete({instr=LoadAcquireExclusive{ base, dest }, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> LoadAcquireExclusive { base=getAllocatedGenReg base, dest=destReg}) end | absToConcrete({instr=StoreReleaseExclusive{ base, source, result }, ...} :: rest, cache, code) = let val (resultReg, newCache) = allocateGenReg(result, NONE, cache) in absToConcrete(rest, newCache, code <::> StoreReleaseExclusive{ base=getAllocatedGenReg base, source=getAllocatedGenRegOrZero source, result=resultReg}) end | absToConcrete({instr=MemoryBarrier, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> MemoryBarrier) | absToConcrete({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ConvertIntToFloat{ source=getAllocatedGenReg source, dest=destReg, srcSize=srcSize, destSize=destSize}) end | absToConcrete({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateGenReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> ConvertFloatToInt{ source=getAllocatedFPReg source, dest=destReg, srcSize=srcSize, destSize=destSize, rounding=rounding}) end | absToConcrete({instr=UnaryFloatingPt{ source, dest, fpOp}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> UnaryFloatingPt{ source=getAllocatedFPReg source, dest=destReg, fpOp=fpOp}) end | absToConcrete({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...} :: rest, cache, code) = let val (destReg, newCache) = allocateFPReg(dest, NONE, cache) in absToConcrete(rest, newCache, code <::> BinaryFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, dest=destReg, fpOp=fpOp, opSize=opSize}) end | absToConcrete({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...} :: rest, context, code) = absToConcrete(rest, context, code <::> CompareFloatingPoint{ arg1=getAllocatedFPReg arg1, arg2=getAllocatedFPReg arg2, opSize=opSize, ccRef=ccRef}) + | absToConcrete({instr=CPUYield, ...} :: rest, cache, code) = absToConcrete(rest, cache, code <::> CPUYield) + | absToConcrete({instr=CacheCheck _, ...} :: _, _, _) = (* Concrete only. *) raise InternalError "absToConcrete: CheckCache" (* LoadStack. *) and processLoadStack(dest, container as StackLoc{rno, ...}, wordOffset, kill, cache, code) = let (* See if this is in the cache and use it if it is. If this is the last reference to this stack entry we don't want it in the cache any longer. *) val killThis = member(rno, kill) val (newCode, destReg, newCache) = case List.find(fn (_, CacheStack c) => c=container | _ => false) cache of SOME (srcReg, _) => let val (destReg, newCache) = allocateNewDestination(dest, SOME srcReg, generalRegisters, cache) val dReg = asGenReg destReg and sReg = asGenReg srcReg in if checkCache then (code <::> MoveRegister{source=sReg, dest=X17} <::> LoadStack{ dest=dReg, container=container, field=0, wordOffset=wordOffset } <::> CacheCheck{ arg1=dReg, arg2=X17 }, destReg, if killThis then pruneCache(srcReg, newCache) else newCache) else if dReg = sReg andalso false then (code, destReg, newCache) (* We will have pruned this since it's the destination. *) else (code <::> MoveRegister { source=sReg, dest=dReg}, destReg, if killThis then pruneCache(srcReg, newCache) else newCache) end | NONE => let val (destReg, newCache) = allocateGenReg(dest, Array.sub(cacheHints, rno), cache) in (code <::> LoadStack{ dest=destReg, container=container, field=0, wordOffset=wordOffset }, GenReg destReg, newCache) end val () = if killThis then () else Array.update(cacheHints, rno, SOME destReg) in (if killThis then newCache else (destReg, CacheStack container) :: newCache, newCode) end in fun concreteBlock(ExtendedBasicBlock{ block, ...}, inputCache) = let val (cache, code) = absToConcrete(block, inputCache, []) in {cache=cache, code=List.rev code} end val mergeCacheStates = mergeCacheStates end val numBlocks = Vector.length blocks (* The results. The cache state is initialised to empty so that if we have a loop we will end up with an empty input cache. *) val resultArray = Array.array(numBlocks, {code=[], cache=[]}) (* Process the blocks in execution order so that normally we will be able to propagate the cache states. If we have a loop the input cache state will be empty because the output cache state for an unprocessed block is empty. *) (* Get the blocks that are inputs for each one. *) local val blockRefs = Array.array(numBlocks, []) (* The successors of this block but only including handlers in SetHandler. *) fun directSuccessors ExitCode = [] | directSuccessors(IndexedBr cases) = cases | directSuccessors(Unconditional dest) = [dest] | directSuccessors(Conditional {trueJump, falseJump, ...}) = [falseJump, trueJump] | directSuccessors(SetHandler { handler, continue }) = [handler, continue] | directSuccessors(UnconditionalHandle _) = [] | directSuccessors(ConditionalHandle { continue, ...}) = [continue] fun setReferences fromBlock = let val ExtendedBasicBlock{ flow, ...} = Vector.sub(blocks, fromBlock) val refs = directSuccessors flow fun setRefs toBlock = let val oldRefs = Array.sub(blockRefs, toBlock) in Array.update(blockRefs, toBlock, fromBlock :: oldRefs); if null oldRefs then setReferences toBlock else () end in List.app setRefs refs end val () = setReferences 0 in val directSuccessors = directSuccessors val blockRefs = blockRefs end val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) fun processBlocks (toDo: int list) = case List.filter (fn n => not(haveProcessed n)) toDo of [] => () | stillToDo as head :: _ => let (* Try to find a block all of whose predecessors have been processed. That increases the chances that we will have cached items. *) fun available dest = List.all haveProcessed (Array.sub(blockRefs, dest)) val blockNo = case List.find available stillToDo of SOME c => c | NONE => head val thisBlock as ExtendedBasicBlock { flow, ...} = Vector.sub(blocks, blockNo) (* Get the input cache state. Take the list of output caches of everything that jumps here and produce the intersection. *) val inputCacheList = List.map (fn n => #cache(Array.sub(resultArray, n))) (Array.sub(blockRefs, blockNo)) val inputCache = mergeCacheStates inputCacheList val inputCache = [] (* Temporarily *) (* Process this block and add it to the results. *) val () = Array.update(processed, blockNo, true) val () = Array.update(resultArray, blockNo, concreteBlock(thisBlock, inputCache)) (* Add the successors but with handlers only included in SetHandler. *) val addSet = directSuccessors flow in processBlocks(addSet @ stillToDo) end in processBlocks [0]; (* If the failures list is empty we succeeded. *) case !failures of [] => (* Return the allocation vector. We may have unused registers, *) AllocateSuccess( Vector.mapi(fn (i, ExtendedBasicBlock{ flow, ...}) => BasicBlock{block= #code(Array.sub(resultArray, i)), flow=flow}) blocks ) (* Else we'll have to spill something. *) | l => AllocateFailure l end val nGenRegs = List.length generalRegisters structure Sharing = struct type intSet = intSet and extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and reg = reg and xReg = xReg and vReg = vReg and allocateResult = allocateResult end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML index 13308b20..d3fc2713 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML @@ -1,3198 +1,3199 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64CodetreeToICode( structure BackendTree: BACKENDINTERMEDIATECODE structure Arm64ICode: ARM64ICODE structure Debug: DEBUG structure Arm64Foreign: FOREIGNCALL structure ICodeTransform: ARM64ICODETRANSFORM structure CodeArray: CODEARRAY structure Pretty:PRETTY sharing Arm64ICode.Sharing = ICodeTransform.Sharing = CodeArray.Sharing = BackendTree.Sharing ): GENCODE = struct open BackendTree open Address open Arm64ICode open CodeArray open BuiltIns (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock exception InternalError = Misc.InternalError fun taggedWord64 w: Word64.word = w * 0w2 + 0w1 and taggedWord w: word = w * 0w2 + 0w1 datatype blockStruct = BlockSimple of iCodeAbstract | BlockExit of iCodeAbstract | BlockLabel of int | BlockFlow of controlFlow | BlockBegin of { regArgs: (preg * xReg) list, stackArgs: stackLocn list } | BlockRaiseAndHandle of iCodeAbstract * int | BlockOptionalHandle of {call: iCodeAbstract, handler: int, label: int } val moveRegister = BlockSimple o MoveRegister and loadNonAddressConstant = BlockSimple o LoadNonAddressConstant and loadAddressConstant = BlockSimple o LoadAddressConstant and loadWithConstantOffset = BlockSimple o LoadWithConstantOffset and loadFPWithConstantOffset = BlockSimple o LoadFPWithConstantOffset and loadWithIndexedOffset = BlockSimple o LoadWithIndexedOffset and loadFPWithIndexedOffset = BlockSimple o LoadFPWithIndexedOffset and getThreadId = BlockSimple o GetThreadId and objectIndexAddressToAbsolute = BlockSimple o ObjectIndexAddressToAbsolute and absoluteToObjectIndex = BlockSimple o AbsoluteToObjectIndex and allocateMemoryFixed = BlockSimple o AllocateMemoryFixed and allocateMemoryVariable = BlockSimple o AllocateMemoryVariable and initialiseMem = BlockSimple o InitialiseMem and storeWithConstantOffset = BlockSimple o StoreWithConstantOffset and storeFPWithConstantOffset = BlockSimple o StoreFPWithConstantOffset and storeWithIndexedOffset = BlockSimple o StoreWithIndexedOffset and storeFPWithIndexedOffset = BlockSimple o StoreFPWithIndexedOffset and addSubImmediate = BlockSimple o AddSubImmediate and addSubRegister = BlockSimple o AddSubRegister and logicalImmediate = BlockSimple o LogicalImmediate and logicalRegister = BlockSimple o LogicalRegister and shiftRegister = BlockSimple o ShiftRegister and multiplication = BlockSimple o Multiplication and division = BlockSimple o Division and pushToStack = BlockSimple o PushToStack and loadStack = BlockSimple o LoadStack and storeToStack = BlockSimple o StoreToStack and containerAddress = BlockSimple o ContainerAddress and resetStackPtr = BlockSimple o ResetStackPtr and tagValue = BlockSimple o TagValue and untagValue = BlockSimple o UntagValue and boxLarge = BlockSimple o BoxLarge and unboxLarge = BlockSimple o UnboxLarge and boxTagFloat = BlockSimple o BoxTagFloat and unboxTagFloat = BlockSimple o UnboxTagFloat and loadAcquire = BlockSimple o LoadAcquire and storeRelease = BlockSimple o StoreRelease and bitFieldShift = BlockSimple o BitFieldShift and bitFieldInsert = BlockSimple o BitFieldInsert and compareByteVectors = BlockSimple o CompareByteVectors and blockMove = BlockSimple o BlockMove and addSubXSP = BlockSimple o AddSubXSP and touchValue = BlockSimple o TouchValue and loadAcquireExclusive = BlockSimple o LoadAcquireExclusive and storeReleaseExclusive = BlockSimple o StoreReleaseExclusive and memoryBarrier = BlockSimple MemoryBarrier and convertIntToFloat = BlockSimple o ConvertIntToFloat and convertFloatToInt = BlockSimple o ConvertFloatToInt and unaryFloatingPt = BlockSimple o UnaryFloatingPt and binaryFloatingPoint = BlockSimple o BinaryFloatingPoint and compareFloatingPoint = BlockSimple o CompareFloatingPoint + and cpuYield = BlockSimple CPUYield val shiftConstant = BlockSimple o shiftConstant (* Many operations use 32-bit arguments in 32-in-64 and 64-bit in native 64. *) val polyWordLoadSize = if is32in64 then Load32 else Load64 val polyWordOpSize = if is32in64 then OpSize32 else OpSize64 val tagBitMask64 = Word64.<<(Word64.fromInt ~1, 0w1) val tagBitMask32 = Word64.andb(tagBitMask64, 0wxffffffff) val polyWordTagBitMask = if is32in64 then tagBitMask32 else tagBitMask64 (* The flags byte is the high-order byte of length word. *) val flagsByteOffset = if isBigEndian then ~ (Word.toInt wordSize) else ~1 (* Size of operand in bytes and therefore the scale factor. *) fun opWordSize Load64 = 8 | opWordSize Load32 = 4 | opWordSize Load16 = 2 | opWordSize Load8 = 1 (* Shift for each size. i.e. log2 of opWordSize. *) fun loadShift Load64 = 0w3 | loadShift Load32 = 0w2 | loadShift Load16 = 0w1 | loadShift Load8 = 0w0 fun precisionToFpSize PrecSingle = Float32 | precisionToFpSize PrecDouble = Double64 fun codeFunctionToArm64({body, localCount, name, argTypes, closure, ...}:bicLambdaForm, debugSwitches, resultClosure) = let (* Pseudo-registers are allocated sequentially and the properties added to the list. *) val pregCounter = ref 0 val pregPropList = ref [] fun newPReg() = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropGeneral :: !pregPropList in PReg regNo end and newUReg() = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropUntagged :: !pregPropList in PReg regNo end and newStackLoc size = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropStack size :: !pregPropList in StackLoc{size=size, rno=regNo} end and newMergeReg() = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropMultiple :: !pregPropList in PReg regNo end datatype locationValue = NoLocation | PregLocation of preg | StackContainer of { container: stackLocn, stackOffset: int } | RegisterContainer of preg list val locToPregArray = Array.array(localCount, NoLocation) val labelCounter = ref 1 (* Start at 1. Zero is used for the root. *) fun newLabel() = !labelCounter before labelCounter := !labelCounter + 1 val ccRefCounter = ref 0 fun newCCRef() = CcRef(!ccRefCounter) before ccRefCounter := !ccRefCounter + 1 (* The profile object is a single mutable with the F_bytes bit set. *) val profileObject = CodeArray.createProfileObject() (* Switch to indicate if we want to trace where live data has been allocated. *) (* TODO: This should be used in AllocateMemoryOperation and BoxValue and possibly AllocateMemoryVariable. *) val addAllocatingFunction = Debug.getParameter Debug.profileAllocationTag debugSwitches = 1 datatype destination = SpecificPReg of preg | NoResult | AnyReg (* Context type. *) type context = { loopArgs: (preg list * int * int) option, stackPtr: int, currHandler: int option, overflowBlock: int option ref } datatype argLoc = ArgumentIsInReg of { realReg: xReg, argReg: preg } | ArgumentIsOnStack of { stackOffset: int, stackReg: stackLocn } | ArgumentIsRegContainer of preg list (* An address as either suitable for Load/StoreWithConstantOffset or else Load/StoreWithIndexedOffset. *) datatype addressKind = AddrOffset of {base: preg, offset: int} | AddrIndex of {base: preg, index: preg} (* Pseudo-regs for the result, the closure and the args that were passed in real regs. *) val resultTarget = newPReg() val closureRegAddr = newPReg() val returnAddrReg = newPReg() val generalArgRegs = [X0, X1, X2, X3, X4, X5, X6, X7] (* If a container is larger than this it is passed on the stack. *) val smallContainerSize = 4 (* Create a map for the arguments indicating their register or stack location. *) local val containerRegs = case List.filter(fn ContainerType _ => true | _ => false) argTypes of [] => NONE | [ContainerType s] => if s <= smallContainerSize then SOME(List.tabulate(s, fn _ => newMergeReg())) else SOME [] (* Larger containers return their result on the stack. *) | _ => raise InternalError "more than one container arg" (* Select the appropriate argument register depending on the argument type. *) fun argTypesToArgEntries([], _, _) = ([], [], [], []) | argTypesToArgEntries(ContainerType s :: tl, gRegs, n) = if s <= smallContainerSize then let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, n-1) val regs = valOf containerRegs in (ArgumentIsRegContainer regs :: argTypes, argCode, argRegs, stackArgs) end (* The address of a larger container is passed as an argument *) else argTypesToArgEntries(GeneralType :: tl, gRegs, n) | argTypesToArgEntries(_ :: tl, gReg :: gRegs, n) = (* This deals with general arguments but also with extra floating point arguments. They are boxed as usual. *) let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, n-1) val argReg=newPReg() in (ArgumentIsInReg{realReg=gReg, argReg=argReg} :: argTypes, argCode, (argReg, gReg) :: argRegs, stackArgs) end | argTypesToArgEntries(_ :: tl, [], n) = let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, [], n-1) val stackLoc = newStackLoc 1 in (ArgumentIsOnStack {stackOffset=n, stackReg = stackLoc } :: argTypes, argCode, argRegs, stackLoc :: stackArgs) end val (argEntries, argCode, argRegs, stackArguments) = argTypesToArgEntries(argTypes, generalArgRegs, List.length argTypes) val clReg = case closure of [] => [] | _ => [(closureRegAddr, X8)] val retReg = [(returnAddrReg, X30)] in val argumentVector = Vector.fromList argEntries (* Start code for the function. *) val beginInstructions = argCode @ [BlockBegin{regArgs=retReg @ clReg @ argRegs, stackArgs=stackArguments }] (* The number of arguments on the stack. Needed in return instrs and tail calls. *) val currentStackArgs = List.length stackArguments val containerResults = Option.map(fn regs => ListPair.zip(regs, generalArgRegs)) containerRegs end (* TODO: Return the values of the container registers if we have multiple results. *) fun returnInstruction({stackPtr, ...}, resReg, tailCode) = let val results = getOpt(containerResults, [(resReg, X0)]) (* Return the result in X0 unless there's a container. *) in BlockExit(ReturnResultFromFunction{results=results, returnReg = returnAddrReg, numStackArgs=currentStackArgs}) :: (if stackPtr <> 0 then resetStackPtr{numWords=stackPtr} :: tailCode else tailCode) end fun asTarget(SpecificPReg preg) = preg | asTarget _ = newPReg() fun moveToResult(SpecificPReg tReg, code, sReg) = (moveRegister{source=sReg, dest=tReg} :: code, tReg, false) | moveToResult(AnyReg, code, sReg) = (code, sReg, false) | moveToResult(NoResult, code, sReg) = let val tReg = newPReg() in (moveRegister{source=sReg, dest=tReg} :: code, tReg, false) end (* Store a register at a given offset. This may have to use an index register if the offset is too large. *) fun storeAtWordOffset(toStore, offset, base, loadSize, tailCode) = let val wSize = opWordSize loadSize val byteOffset = offset*wSize in if offset < 4096 andalso byteOffset > ~256 then storeWithConstantOffset{base=base, source=toStore, byteOffset=byteOffset, loadType=loadSize} :: tailCode else let val indexReg = newUReg() in storeWithIndexedOffset{ base=base, source=toStore, index=indexReg, loadType=loadSize, signExtendIndex=false } :: loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=indexReg } :: tailCode end end (* Allocate a fixed size cell with a reference to the profile object if we want to trace the location of live data. Currently only used for tuples and closures. *) fun allocateWithProfileRev(n, flags, memAddr, tlCode) = let fun doAllocation(words, flags, tlCode) = let val wordsRequired = if is32in64 then (* Have to round this up to 8 bytes *) Word64.andb(Word64.fromInt(words+2), ~ 0w2) else Word64.fromInt(words+1) val bytesRequired = Word64.fromLarge(Word.toLarge wordSize) * wordsRequired val lengthWord = Word64.orb(Word64.fromInt words, Word64.<<(Word64.fromLarge(Word8.toLarge flags), if is32in64 then 0w24 else 0w56)) val lengthReg = newUReg() in storeWithConstantOffset{ source=lengthReg, base=memAddr, byteOffset= ~(Word.toInt wordSize), loadType=polyWordLoadSize } :: loadNonAddressConstant{ source=lengthWord, dest=lengthReg } :: allocateMemoryFixed{bytesRequired=bytesRequired, dest=memAddr, saveRegs=[]} :: tlCode end in if addAllocatingFunction then let val profReg = newPReg() in storeAtWordOffset(profReg, n, memAddr, polyWordLoadSize, loadAddressConstant{ source=profileObject, dest=profReg} :: doAllocation(n+1, Word8.orb(flags, Address.F_profile), tlCode)) end else doAllocation(n, flags, tlCode) end (* Return a unit result. *) fun returnUnit(target, code, exit) = let val tReg = asTarget target in (loadNonAddressConstant{source=taggedWord64 0w0, dest=tReg} :: code, tReg, exit) end (* Create a bool result from a test by returning true or false. *) fun makeBoolResultRev(condition, ccRef, target, testCode) = let val trueLab = newLabel() and falseLab = newLabel() and mergeLab = newLabel() val mergeReg = newMergeReg() in moveRegister{dest=target, source=mergeReg} :: BlockLabel mergeLab :: BlockFlow(Unconditional mergeLab) :: loadNonAddressConstant{dest=mergeReg, source=taggedWord64 0w0} :: BlockLabel falseLab :: BlockFlow(Unconditional mergeLab) :: loadNonAddressConstant{dest=mergeReg, source=taggedWord64 0w1} :: BlockLabel trueLab :: BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=trueLab, falseJump=falseLab }) :: testCode end (* Return an absolute address in both native addressing and 32-in-64. *) fun getAbsoluteAddress(code, baseReg) = if is32in64 then let val absReg = newUReg() in (objectIndexAddressToAbsolute{ source=baseReg, dest=absReg } :: code, absReg) end else (code, baseReg) (* Load a value aligned on a 64 or 32-bit boundary. offset is the number of units. Typically this will be a polyword. *) fun wordAddressOffset(destination, baseReg1, offset, loadOp, code) = let val dReg = asTarget destination val opWordSize = opWordSize loadOp val byteOffset = offset * opWordSize val (codeBase, baseReg) = getAbsoluteAddress(code, baseReg1) val code = if offset < 4096 andalso byteOffset > ~256 then loadWithConstantOffset{base=baseReg, dest=dReg, byteOffset=byteOffset, loadType=loadOp} :: codeBase else let val indexReg = newUReg() in loadWithIndexedOffset{ base=baseReg, dest=dReg, index=indexReg, loadType=loadOp, signExtendIndex=false } :: loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=indexReg } :: codeBase end in (code, dReg, false) end (* See if we have a container and return the entry if present. *) datatype containerType = NoContainer | ContainerOnStack of { container: stackLocn, stackOffset: int } | ContainerInRegs of preg list fun getContainerIfPresent(BICExtract(BICLoadLocal l)) = ( case Array.sub(locToPregArray, l) of StackContainer container => ContainerOnStack container | RegisterContainer rc => ContainerInRegs rc | _ => NoContainer ) | getContainerIfPresent(BICExtract(BICLoadArgument a)) = ( case Vector.sub(argumentVector, a) of ArgumentIsRegContainer rc => ContainerInRegs rc | _ => NoContainer ) | getContainerIfPresent _ = NoContainer (* General function for loads and stores. *) fun loadAndStoreWithAddress ({base=bReg1, index, offset}, loadSize, loadShift, isCAddress, loadStoreOffset, loadStoreIndex, code) = let val byteOffset = offset * loadSize (* Get the base register value *) val bCode = code val sCode = bCode (* Get any index register value. *) val (iCode, iReg1Opt) = case index of NONE => if offset < 4096 andalso byteOffset > ~256 then (sCode, NONE) (* We can use this offset. *) else let val iReg = newUReg() in (loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=iReg } :: sCode, SOME iReg) end | SOME iReg1 => let val iCode1 = sCode (* The index is a tagged integer containing the number of units (words, bytes etc). It has to be untagged. If this is a C address it may be negative. *) val iReg2 = newUReg() (* Logical shift if this is a Poly address, arithmetic shift if this is a C address. *) val iCode2 = untagValue{source=iReg1, dest=iReg2, opSize=polyWordOpSize, isSigned=isCAddress } :: iCode1 in if offset = 0 then (iCode2, SOME iReg2) else let (* If there's some constant offset add it to the index. Because it's a byte offset we need to divide it by the scale but it should always be a multiple. N.B. In 32-in-64 the index register contains a 32-bit value even when the offset is negative. *) val cReg = newUReg() and iReg3 = newUReg() val offsetAsWord = LargeWord.fromInt offset (* It could be negative if it's a C address. *) val shiftedOffset = (if isCAddress then LargeWord.~>> else LargeWord.>>) (offsetAsWord, loadShift) in (addSubRegister{ base=iReg2, shifted=cReg, dest=SomeReg iReg3, ccRef=NONE, isAdd=true, length=polyWordOpSize, shift=ShiftNone} :: loadNonAddressConstant{ source=shiftedOffset, dest=cReg } :: iCode2, SOME iReg3) end end (* If this is 32in64 get the absolute address. *) val (absBCode, absBReg) = getAbsoluteAddress(iCode, bReg1) (* If this is a C address the "base address" is actually a box containing the address. *) val (effBCode, effBReg) = if isCAddress then let val bReg = newUReg() in (loadWithConstantOffset{ base=absBReg, dest=bReg, byteOffset=0, loadType=Load64 } :: absBCode, bReg) end else (absBCode, absBReg) in case iReg1Opt of SOME iReg => loadStoreIndex(effBReg, iReg, effBCode) | NONE => loadStoreOffset(effBReg, offset, effBCode) end (* Some operations require a single absolute address. These are all ML addresses so the index/offset is always unsigned. *) fun loadAndStoreWithAbsolute (address, loadSize, loadShift, loadStore, code) = let (* Have to add the offset/index register. *) fun loadStoreOffset(bReg, 0, code) = loadStore(bReg, code) | loadStoreOffset(bReg, offset, code) = let val cReg = newUReg() and aReg = newUReg() in loadStore(aReg, addSubRegister{ base=bReg, shifted=cReg, dest=SomeReg aReg, ccRef=NONE, isAdd=true, length=OpSize64, shift=ShiftNone} :: loadNonAddressConstant{ source=LargeWord.fromInt offset, dest=cReg } :: code) end and loadStoreIndex(bReg, iReg, code) = let val aReg = newUReg() (* The index register is a number of words/bytes etc so has to be multiplied when it's added in. *) val indexShift = if loadShift = 0w0 then ShiftNone else ShiftLSL(Word8.fromLarge(Word.toLarge loadShift)) in loadStore(aReg, addSubRegister{ base=bReg, shifted=iReg, dest=SomeReg aReg, ccRef=NONE, isAdd=true, length=OpSize64, shift=indexShift} :: code) end in loadAndStoreWithAddress (address, loadSize, loadShift, false, loadStoreOffset, loadStoreIndex, code) end (* Overflow check. This raises Overflow if the condition is satisfied. Normally this will be that the overflow bit is set but for multiplication it's more complicated. This generates a single block for the function unless there is a handler. As well as reducing the size of the code this also means that overflow checks are generally BO instructions to the end of the code. Since the default branch prediction is not to take forward jumps this should improve prefetching on the normal, non-overflow, path. *) fun checkOverflow (condition, {currHandler=NONE, overflowBlock=ref(SOME overFlowLab), ...}, ccRef) = (* It's already been set and there's no surrounding handler - use this. *) let val noOverflowLab = newLabel() in [ BlockLabel noOverflowLab, BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=overFlowLab, falseJump=noOverflowLab }) ] end | checkOverflow (condition, {currHandler=NONE, overflowBlock, ...}, ccRef) = let (* *) val overFlowLab = newLabel() and noOverflowLab = newLabel() val packetReg = newPReg() val () = overflowBlock := SOME overFlowLab in [ BlockLabel noOverflowLab, BlockExit(RaiseExceptionPacket{packetReg=packetReg}), loadAddressConstant{source=toMachineWord(Overflow), dest=packetReg}, BlockLabel overFlowLab, BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=overFlowLab, falseJump=noOverflowLab }) ] end | checkOverflow (condition, {currHandler=SOME h, ...}, ccRef) = let val overFlowLab = newLabel() and noOverflowLab = newLabel() val packetReg = newPReg() in [ BlockLabel noOverflowLab, BlockRaiseAndHandle(RaiseExceptionPacket{packetReg=packetReg}, h), loadAddressConstant{source=toMachineWord(Overflow), dest=packetReg}, BlockLabel overFlowLab, BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=overFlowLab, falseJump=noOverflowLab }) ] end fun codeToICodeRev(BICNewenv (bindings, exp), context: context as {stackPtr=initialSp, ...}, isTail, destination, tailCode) = let (* Process a list of bindings. We need to accumulate the space used by any containers and reset the stack pointer at the end if necessary. *) fun doBindings([], context, tailCode) = (tailCode, context) | doBindings(BICDeclar{value=BICExtract(BICLoadLocal l), addr, ...} :: decs, context, tailCode) = let (* Giving a new name to an existing entry. This should have been removed at a higher level but it doesn't always seem to be. In particular we must treat this specially if it's a container. *) val original = Array.sub(locToPregArray, l) val () = Array.update(locToPregArray, addr, original) in doBindings(decs, context, tailCode) end | doBindings(BICDeclar{value, addr, ...} :: decs, context, tailCode) = let val (code, dest, _) = codeToICodeRev(value, context, false, AnyReg, tailCode) val () = Array.update(locToPregArray, addr, PregLocation dest) in doBindings(decs, context, code) end | doBindings(BICRecDecs [{lambda, addr, ...}] :: decs, context, tailCode) = (* We shouldn't have single entries in RecDecs but it seems to occur at the moment. *) let val dest = newPReg() val (code, _, _) = codeToICodeRev(BICLambda lambda, context, false, SpecificPReg dest, tailCode) val () = Array.update(locToPregArray, addr, PregLocation dest) in doBindings(decs, context, code) end | doBindings(BICRecDecs recDecs :: decs, context, tailCode) = let val destRegs = map (fn _ => newPReg()) recDecs val flagsValue = if is32in64 then F_closure else 0w0 (* First build the closures as mutable cells containing zeros. Set the entry in the address table to the register containing the address. *) fun makeClosure({lambda={closure, ...}, addr, ...}, dest, tailCode) = let val () = Array.update(locToPregArray, addr, PregLocation dest) val wordsRequired = List.length closure + (if is32in64 then 2 else 1) val absAddr = if is32in64 then newUReg() else dest val zeroReg = newPReg() val allocAndSetZero = loadNonAddressConstant{ source=taggedWord64 0w0, dest=zeroReg} :: allocateWithProfileRev(wordsRequired, Word8.orb(F_mutable, flagsValue), absAddr, tailCode) val (_, clearCode) = List.foldl(fn (_, (n, l)) => (n+1, storeAtWordOffset(zeroReg, n, absAddr, polyWordLoadSize, l))) (0, allocAndSetZero) closure in if is32in64 then absoluteToObjectIndex{ source=absAddr, dest=dest } :: clearCode else clearCode end val allocClosures = ListPair.foldlEq makeClosure tailCode (recDecs, destRegs) fun setClosure({lambda, ...}, dest, l) = let val absAddr = if is32in64 then newUReg() else dest val flagsReg = newUReg() (* Lock the closure by storing the flags byte without the mutable flag. TODO: We could simply use XZ here. *) in storeWithConstantOffset{ base=absAddr, source=flagsReg, byteOffset=flagsByteOffset, loadType=Load8 } :: loadNonAddressConstant{ source=Word8.toLarge flagsValue, dest=flagsReg } :: storeIntoClosure(lambda, absAddr, context, if is32in64 then objectIndexAddressToAbsolute{ source=dest, dest=absAddr } :: l else l) end val setAndLockClosures = ListPair.foldlEq setClosure allocClosures (recDecs, destRegs) in doBindings(decs, context, setAndLockClosures) end | doBindings(BICNullBinding exp :: decs, context, tailCode) = let val (code, _, _) = codeToICodeRev(exp, context, false, NoResult, tailCode) (* And discard result. *) in doBindings(decs, context, code) end | doBindings(BICDecContainer{ addr, size } :: decs, context as {loopArgs, stackPtr, currHandler, overflowBlock}, tailCode) = if size <= smallContainerSize then let val regs = List.tabulate(size, fn _ => newMergeReg()) val () = Array.update(locToPregArray, addr, RegisterContainer regs) in doBindings(decs, context, tailCode) end else let (* Larger container - reserve a portion of stack and zero it. *) val containerLoc = newStackLoc size val () = Array.update(locToPregArray, addr, StackContainer{container=containerLoc, stackOffset=stackPtr+size}) val zeroReg = newPReg() in doBindings(decs, {loopArgs=loopArgs, stackPtr=stackPtr+size, currHandler=currHandler, overflowBlock=overflowBlock}, tailCode <::> loadNonAddressConstant{ source=taggedWord64 0w0, dest=zeroReg } <::> pushToStack{copies=size, container=containerLoc, source=zeroReg}) end val (codeBindings, resContext as {stackPtr=finalSp, ...}) = doBindings(bindings, context, tailCode) (* If we have had a container we'll need to reset the stack *) in if initialSp <> finalSp then let val _ = finalSp >= initialSp orelse raise InternalError "codeToICode - stack ptr" val bodyReg = newPReg() and resultReg = asTarget destination val (codeExp, result, haveExited) = codeToICodeRev(exp, resContext, isTail, SpecificPReg bodyReg, codeBindings) val afterAdjustSp = if haveExited then codeExp else moveRegister{source=result, dest=resultReg} :: resetStackPtr{numWords=finalSp-initialSp} :: codeExp in (afterAdjustSp, resultReg, haveExited) end else codeToICodeRev(exp, resContext, isTail, destination, codeBindings) end | codeToICodeRev(BICExtract(BICLoadLocal l), {stackPtr, ...}, _, destination, tailCode) = ( case Array.sub(locToPregArray, l) of NoLocation => raise InternalError "codeToICodeRev - local unset" | PregLocation preg => moveToResult(destination, tailCode, preg) | StackContainer{container, stackOffset} => let val target = asTarget destination in (containerAddress{dest=target, container=container, stackOffset=stackPtr-stackOffset} :: tailCode, target, false) end | RegisterContainer _ => raise InternalError "BICExtract local: reg container" ) | codeToICodeRev(BICExtract(BICLoadArgument a), {stackPtr, ...}, _, destination, tailCode) = ( case Vector.sub(argumentVector, a) of ArgumentIsInReg{argReg, ...} => (* It was originally in a register. It's now in a preg. *) moveToResult(destination, tailCode, argReg) | ArgumentIsOnStack{stackOffset, stackReg} => (* Pushed before call. *) let val target = asTarget destination in (loadStack{wordOffset=stackOffset+stackPtr, container=stackReg, field=0, dest=target} :: tailCode, target, false) end | ArgumentIsRegContainer _ => raise InternalError "BICExtract argument: reg container" ) | codeToICodeRev(BICExtract(BICLoadClosure c), _, _, destination, tailCode) = let (* Add the number of words for the code address. This is 1 in native but 2 in 32-in-64. *) val offset = if is32in64 then c+2 else c+1 in if c >= List.length closure then raise InternalError "BICExtract: closure" else (); wordAddressOffset(destination, closureRegAddr, offset, polyWordLoadSize, tailCode) end | codeToICodeRev(BICExtract BICLoadRecursive, _, _, destination, tailCode) = (* If the closure is empty we must use the constant. We can't guarantee that the caller will actually load the closure register if it knows the closure is empty. *) ( case closure of [] => let val dReg = asTarget destination in (loadAddressConstant{source=closureAsAddress resultClosure, dest=dReg} :: tailCode, dReg, false) end | _ => moveToResult(destination, tailCode, closureRegAddr) ) | codeToICodeRev(BICConstnt(w, _), _, _, destination, tailCode) = let val dReg = asTarget destination val instr = if isShort w then (* When converting to Word64 we do NOT want to use sign-extension. In 32-in-64 signed fixed-precision ints need to have zeros in the top 32 bits. *) loadNonAddressConstant{source=taggedWord64(Word64.fromLarge(Word.toLarge(toShort w))), dest=dReg} else loadAddressConstant{source=w, dest=dReg} in (instr :: tailCode, dReg, false) end | codeToICodeRev(BICField{base, offset}, context, _, destination, tailCode) = let val (codeBase, baseReg, _) = codeToICodeRev(base, context, false, AnyReg, tailCode) in wordAddressOffset(destination, baseReg, offset, polyWordLoadSize, codeBase) end | codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, NoResult, tailCode) = let (* If we don't want the result but are only evaluating for side-effects we may be able to optimise special cases. This was easier in the forward case but for now we don't bother and leave it to the lower levels. *) val startElse = newLabel() and skipElse = newLabel() val codeTest = codeConditionRev(test, context, false, startElse, tailCode) val (codeThen, _, _) = codeToICodeRev(thenPt, context, isTail, NoResult, codeTest) val (codeElse, _, _) = codeToICodeRev(elsePt, context, isTail, NoResult, BlockLabel startElse :: BlockFlow(Unconditional skipElse) :: codeThen) in returnUnit(NoResult, BlockLabel skipElse :: codeElse, false(*??*)) end | codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, destination, tailCode) = let (* Because we may push the result onto the stack we have to create a new preg to hold the result and then copy that to the final result. *) (* If this is a tail each arm will exit separately and neither will return a result. *) val target = asTarget destination val condResult = newMergeReg() val thenTarget = if isTail then newPReg() else condResult val startElse = newLabel() val testCode = codeConditionRev(test, context, false, startElse, tailCode) (* Put the result in the target register. *) val (thenCode, _, thenExited) = codeToICodeRev(thenPt, context, isTail, SpecificPReg thenTarget, testCode) (* Add a jump round the else-part except that if this is a tail we return. The then-part could have exited e.g. with a raise or a loop. *) val (exitThen, thenLabel, elseTarget) = if thenExited then (thenCode, [], target (* Can use original target. *)) else if isTail then (returnInstruction(context, thenTarget, thenCode), [], newPReg()) else let val skipElse = newLabel() in (BlockFlow(Unconditional skipElse) :: thenCode, [moveRegister{source=condResult, dest=target}, BlockLabel skipElse], condResult) end val (elseCode, _, elseExited) = codeToICodeRev(elsePt, context, isTail, SpecificPReg elseTarget, BlockLabel startElse :: exitThen) (* Add a return to the else-part if necessary so we will always exit on a tail. *) val exitElse = if isTail andalso not elseExited then returnInstruction(context, elseTarget, elseCode) else elseCode in (thenLabel @ exitElse, target, isTail orelse thenExited andalso elseExited) end | codeToICodeRev(BICUnary instr, context, isTail, destination, tailCode) = codeToICodeUnaryRev(instr, context, isTail, destination, tailCode) | codeToICodeRev(BICBinary instr, context, isTail, destination, tailCode) = codeToICodeBinaryRev(instr, context, isTail, destination, tailCode) | codeToICodeRev(BICTagTest{test, tag=tagValue, ...}, context, isTail, destination, tailCode) = (* Check the "tag" word of a union (datatype). N.B. Not the same as testing the tag bit of a word. Just generate it as a general word comparison. The optimiser will sort out whether the tag value can be an immediate. *) codeToICodeRev(BICBinary{oper=WordComparison{test=TestEqual, isSigned=false}, arg1=test, arg2=BICConstnt(toMachineWord tagValue, [])}, context, isTail, destination, tailCode) | codeToICodeRev(BICTuple fields, context, _, destination, tailCode) = let val target = asTarget destination (* The allocator sets the register to the absolute address. It has to be converted to an object pointer in 32-in-64. *) val absAddr = if is32in64 then newUReg() else target fun loadFields([], n, tlCode) = allocateWithProfileRev(n, 0w0, absAddr, tlCode) | loadFields((f as BICConstnt _) :: rest, n, tlCode) = let (* Unlike the X86 we still need to load a constant into a register in order to store it in the new tuple. However, it's better to leave that until after the allocation and move it then. That way we can use the same register for different constants if we have a very large tuple. *) val restAndAlloc = loadFields(rest, n+1, tlCode) val (code1, source, _) = codeToICodeRev(f, context, false, AnyReg, restAndAlloc) in storeAtWordOffset(source, n, absAddr, polyWordLoadSize, code1) end | loadFields(f :: rest, n, tlCode) = let val (code1, source, _) = codeToICodeRev(f, context, false, AnyReg, tlCode) val restAndAlloc = loadFields(rest, n+1, code1) in storeAtWordOffset(source, n, absAddr, polyWordLoadSize, restAndAlloc) end val allocAndStore = loadFields(fields, 0, tailCode) val code = if is32in64 then absoluteToObjectIndex{source=absAddr, dest=target} :: allocAndStore else allocAndStore in (code, target, false) end | codeToICodeRev(BICRaise exc, context as { currHandler, ...}, _, destination, tailCode) = let val (code, packetReg, _) = codeToICodeRev(exc, context, false, AnyReg, tailCode) val raiseCode = RaiseExceptionPacket{packetReg=packetReg} val block = case currHandler of NONE => BlockExit raiseCode | SOME h => BlockRaiseAndHandle(raiseCode, h) in returnUnit(destination, block :: code, true (* Always exits *)) end | codeToICodeRev(BICEval{function, argList, ...}, context as { currHandler, ...}, isTail, destination, tailCode) = let val target = asTarget destination (* Create pregs for the closure and each argument. *) val clPReg = newPReg() (* If we have a constant closure we can go directly to the entry point. If the closure is a single word we don't need to load the closure register. *) val (functionCode, closureEntry, callKind) = case function of BICConstnt(addr, _) => let val addrAsAddr = toAddress addr (* If this is a closure we're still compiling we can't get the code address. However if this is directly recursive we can use the recursive convention. *) in if wordEq(closureAsAddress resultClosure, addr) then (tailCode, [], Recursive) else if flags addrAsAddr <> Address.F_words andalso flags addrAsAddr <> Address.F_closure then (loadAddressConstant{source=addr, dest=clPReg} :: tailCode, [(ArgInReg clPReg, X8)], FullCall) else if is32in64 then (* The code address is a 64-bit value so we have to load it at run-time. The X86 version passes the closure address here and generates a relative CALL/JMP. The actual offset is computed by the RTS. For the moment just use a full call. *) (loadAddressConstant{source=addr, dest=clPReg} :: tailCode, [(ArgInReg clPReg, X8)], FullCall) else (* Native 64-bits. *) let val addrLength = length addrAsAddr val _ = addrLength >= 0w1 orelse raise InternalError "BICEval address" val codeAddr = loadWord(addrAsAddr, 0w0) val _ = isCode (toAddress codeAddr) orelse raise InternalError "BICEval address not code" in if addrLength = 0w1 then (tailCode, [], ConstantCode codeAddr) else (loadAddressConstant{source=addr, dest=clPReg} :: tailCode, [(ArgInReg clPReg, X8)], ConstantCode codeAddr) end end | BICExtract BICLoadRecursive => ( (* If the closure is empty we don't need to load X8 *) case closure of [] => (tailCode, [], Recursive) | _ => (moveRegister {source=closureRegAddr, dest=clPReg} :: tailCode, [(ArgInReg clPReg, X8)], Recursive) ) | function => (* General case. *) (#1 (codeToICodeRev(function, context, false, SpecificPReg clPReg, tailCode)), [(ArgInReg clPReg, X8)], FullCall) local (* Load the first arguments into registers and the rest to the stack. *) fun loadArgs ([], _, tailCode) = (tailCode, [], []) | loadArgs ((arg, _) :: args, gReg::gRegs, tailCode) = let (* General register argument. *) val (c, r, _) = codeToICodeRev(arg, context, false, AnyReg, tailCode) val (code, regArgs, stackArgs) = loadArgs(args, gRegs, c) in (code, (ArgInReg r, gReg) :: regArgs, stackArgs) end | loadArgs ((arg, _) :: args, [], tailCode) = let (* Stack argument. *) val (c, r, _) = codeToICodeRev(arg, context, false, AnyReg, tailCode) val (code, regArgs, stackArgs) = loadArgs(args, [], c) in (code, regArgs, ArgInReg r :: stackArgs) end fun isSmallContainer(ContainerType s) = s <= smallContainerSize | isSmallContainer _ = false in val (codeArgs, regArgs, stackArgs) = loadArgs(List.filter(not o isSmallContainer o #2) argList, generalArgRegs, functionCode) end (* If this is at the end of the function and the result types are the same we can use a tail-recursive call. *) val tailCall = isTail (*andalso resultType = fnResultType*) val callCode = if tailCall then let val {stackPtr, ...} = context (* The number of arguments currently on the stack. *) val currentStackArgCount = currentStackArgs val newStackArgCount = List.length stackArgs (* The offset of the first argument. Offsets can be negative. *) val stackOffset = stackPtr fun makeStackArgs([], _) = [] | makeStackArgs(arg::args, offset) = {src=arg, stack=offset} :: makeStackArgs(args, offset-1) val stackArgs = makeStackArgs(stackArgs, currentStackArgCount-1) (* The stack adjustment needed to compensate for any items that have been pushed and the differences in the number of arguments. May be positive or negative. *) val stackAdjust = currentStackArgCount - newStackArgCount (* Add an entry for the return address to the register arguments. *) in BlockExit(TailRecursiveCall{regArgs=(ArgInReg returnAddrReg, X30) :: closureEntry @ regArgs, stackArgs=stackArgs, stackAdjust = stackAdjust, currStackSize=stackOffset, callKind=callKind}) :: codeArgs end else let (* See if there is a container argument. *) val containerArg = List.find(fn (_, ContainerType _) => true | _ => false) argList val containerValue = case containerArg of SOME(argVal, _) => getContainerIfPresent argVal | NONE => NoContainer (* When a container is passed as an argument we put the address into a register. Normally the container will be referenced after the call in order to extract the values but if it's discarded we need to make sure it will continue to be referenced at least as far as the call. This isn't a problem for the X86 code-generator since container addresses are a form of the "argument" datatype. *) val stackContainers = case containerValue of ContainerOnStack{container, ...} => [container] | _ => [] (* Get the results. If we're returning the result through a container the target isn't used so we return unit. *) val (results, setTarget) = case containerValue of ContainerInRegs regs => (ListPair.zip(regs, generalArgRegs), [loadNonAddressConstant{source=taggedWord64 0w0, dest=target}]) | ContainerOnStack _ => ([], [loadNonAddressConstant{source=taggedWord64 0w0, dest=target}]) | NoContainer => ([(target, X0)], []) val call = FunctionCall{regArgs=closureEntry @ regArgs, stackArgs=stackArgs, dests=results, callKind=callKind, saveRegs=[], containers=stackContainers} val callBlock = case currHandler of NONE => BlockSimple call :: codeArgs | SOME h => BlockOptionalHandle{call=call, handler=h, label=newLabel()} :: codeArgs in callBlock <@> setTarget end in (callCode, target, tailCall (* We've exited if this was a tail jump *)) end | codeToICodeRev(BICNullary{oper=BuiltIns.GetCurrentThreadId}, _, _, destination, tailCode) = (* Get the ID of the current thread. *) let val target = asTarget destination in (getThreadId{dest=target} :: tailCode, target, false) end - | codeToICodeRev(BICNullary{oper=BuiltIns.CheckRTSException}, _, _, destination, tailCode) = + | codeToICodeRev(BICNullary{oper=BuiltIns.CPUPause}, _, _, destination, tailCode) = (* This is now done in the RTS call code. *) - returnUnit(destination, tailCode, false) + returnUnit(destination, tailCode <::> cpuYield, false) | codeToICodeRev(BICNullary {oper=CreateMutex}, _, _, destination, tailCode) = let (* Allocate memory for a mutex. Use a native word as a mutable, weak, no-overwrite, byte cell which is the same as a volatileRef. This ensures that it will always be cleared when it is loaded even if it was locked when it was saved. *) val target = asTarget destination val flags = Word8.orb(F_mutable, Word8.orb(F_weak, Word8.orb(F_noOverwrite, F_bytes))) (* 0wx69 *) val absAddr = if is32in64 then newUReg() else target val zeroReg = newUReg() val allocAndStore = storeWithConstantOffset{ source=zeroReg, base=absAddr, byteOffset=0, loadType=Load64 } :: loadNonAddressConstant{source=0w0, dest=zeroReg} :: allocateWithProfileRev(if is32in64 then 2 else 1, flags, absAddr, tailCode) val code = if is32in64 then absoluteToObjectIndex{source=absAddr, dest=target} :: allocAndStore else allocAndStore in (code, target, false) end | codeToICodeRev(BICArbitrary { oper=ArithMult, longCall, ... }, context, isTail, destination, tailCode) = (* Just call the long function to do this. Overflow detection makes this too complicated. *) codeToICodeRev(longCall, context, isTail, destination, tailCode) | codeToICodeRev(BICArbitrary { oper, shortCond, arg1, arg2, longCall }, context, _, destination, tailCode) = let val startLong = newLabel() and resultLabel = newLabel() val condResult = newMergeReg() (* Test to see if the arguments are short and go straight to the long case if not. *) val testCode = codeConditionRev(shortCond, context, false, startLong, tailCode) (* Do the short case *) val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, testCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) (* We need to subtract the tag from one of the arguments and then do the addition. The optimiser will do the subtraction at compile time if we subtract from a constant so if this is and Add we try to put the constant in the second arg. *) val (firstReg, secondReg) = case (arg1, oper) of (BICConstnt _, ArithAdd) => (aReg2, aReg1) | _ => (aReg1, aReg2) (* Generate code for the short case. Put the result in the merge register. Jump to the result if there's no overflow and to the long case if there is. *) val codeShort = case oper of ArithAdd => let val uReg = newUReg() and chkOverflow = newCCRef() in BlockFlow(Conditional{ ccRef=chkOverflow, condition=CondOverflow, trueJump=startLong, falseJump=resultLabel }) :: addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg condResult, length=polyWordOpSize, ccRef=SOME chkOverflow, isAdd=true, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code end | ArithSub => let val uReg = newUReg() and chkOverflow = newCCRef() in BlockFlow(Conditional{ ccRef=chkOverflow, condition=CondOverflow, trueJump=startLong, falseJump=resultLabel }) :: addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg condResult, length=polyWordOpSize, ccRef=SOME chkOverflow, isAdd=false, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code end | _ => raise InternalError "BICArbitrary: unimplemented operation" (* Code for the long case. Put the result into the merge register. *) (* TODO: This could use a tail call if this is at the end of the function. *) val (codeLong, _, _) = codeToICodeRev(longCall, context, false, SpecificPReg condResult, BlockLabel startLong :: codeShort) val target = asTarget destination (* Copy the merge register into the result. *) val finalCode = moveRegister{source=condResult, dest=target} :: BlockLabel resultLabel :: codeLong in (finalCode, target, false) end | codeToICodeRev(BICLambda(lambda as { closure = [], ...}), _, _, destination, tailCode) = (* Empty closure - create a constant closure for any recursive calls. *) let val closure = makeConstantClosure() val () = codeFunctionToArm64(lambda, debugSwitches, closure) val dReg = asTarget destination (* Return the closure itself as the value. *) in (BlockSimple(LoadAddressConstant{source=closureAsAddress closure, dest=dReg}) :: tailCode, dReg, false) end | codeToICodeRev(BICLambda(lambda as { closure, ...}), context, _, destination, tailCode) = (* Non-empty closure. Ignore stack closure option at the moment. *) let val wordsRequired = List.length closure + (if is32in64 then 2 else 1) val target = asTarget destination val absAddr = if is32in64 then newUReg() else target (* The values we're storing are all either constants or local/closure variables so we can allocate the memory and then store into it. *) val allocCode = allocateWithProfileRev(wordsRequired, if is32in64 then F_closure else 0w0, absAddr, tailCode) val storeCode = storeIntoClosure(lambda, absAddr, context, allocCode) val finalCode = if is32in64 then BlockSimple(AbsoluteToObjectIndex{source=absAddr, dest=target}) :: storeCode else storeCode in (finalCode, target, false) end | codeToICodeRev(BICCase { cases, test, default, isExhaustive, firstIndex}, context, isTail, destination, tailCode) = let (* We have to create a new preg for the result in case we need to push it to the stack. *) val targetReg = newMergeReg() local val (testCode, initialTestReg, _) = codeToICodeRev(test, context, false, AnyReg, tailCode) (* Subtract the minimum even if it is zero to remove the tag. This leaves us with a shifted but untagged value. Don't check for overflow. Instead allow large values to wrap around and check later. *) val cReg1 = newUReg() val subValue = taggedWord64(Word64.fromLarge(Word.toLargeX firstIndex)) in val testReg = newUReg() val testCode = addSubRegister{ base=initialTestReg, shifted=cReg1, dest=SomeReg testReg, ccRef=NONE, isAdd=false, length=polyWordOpSize, shift=ShiftNone} :: loadNonAddressConstant{ source=subValue, dest=cReg1 } :: testCode end val (rangeCheck, extraDefaults) = if isExhaustive then (testCode, []) else let (* Check the value is within the number of cases, *2 because this is shifted. *) val cReg2 = newUReg() and ccRef1 = newCCRef() val nCases = List.length cases val continueLab = newLabel() and defaultLab1 = newLabel() val rangeCheck = BlockLabel continueLab :: BlockFlow(Conditional{ccRef=ccRef1, condition=CondCarrySet, trueJump=defaultLab1, falseJump=continueLab}) :: addSubRegister{base=testReg, shifted=cReg2, dest=ZeroReg, ccRef=SOME ccRef1, isAdd=false, length=OpSize64, shift=ShiftNone} :: loadNonAddressConstant{ source=Word64.fromInt nCases * 0w2, dest=cReg2 } :: testCode in (rangeCheck, [defaultLab1]) end (* Make a label for each item in the list. *) val codeLabels = map (fn _ => newLabel()) cases (* Create an exit label in case it's needed. *) val labelForExit = newLabel() (* Generate the code for each of the cases and the default. We need to put an unconditional branch after each to skip the other cases. *) fun codeCases (SOME c :: otherCases, startLabel :: otherLabels, tailCode) = let val caseTarget = if isTail then newPReg() else targetReg (* Put in the case with a jump to the end of the sequence. *) val (codeThisCase, _, caseExited) = codeToICodeRev(c, context, isTail, SpecificPReg caseTarget, BlockLabel startLabel :: tailCode) val exitThisCase = if caseExited then codeThisCase else if isTail then returnInstruction(context, caseTarget, codeThisCase) else BlockFlow(Unconditional labelForExit) :: codeThisCase in codeCases(otherCases, otherLabels, exitThisCase) end | codeCases(NONE :: otherCases, _ :: otherLabels, tailCode) = codeCases(otherCases, otherLabels, tailCode) | codeCases ([], [], tailCode) = let (* We need to add labels for all the gaps we filled and also for a "default" label for the indexed-case instruction itself as well as any range checks. *) fun addDefault (startLabel, NONE, l) = BlockLabel startLabel :: l | addDefault (_, SOME _, l) = l fun asForward l = BlockLabel l val dLabs = map asForward extraDefaults @ tailCode val defLabels = ListPair.foldlEq addDefault dLabs (codeLabels, cases) val defaultTarget = if isTail then newPReg() else targetReg val (defaultCode, _, defaultExited) = codeToICodeRev(default, context, isTail, SpecificPReg defaultTarget, defLabels) in (* Put in the default. Because this is the last we don't need to jump round it. However if this is a tail and we haven't exited we put in a return. That way the case will always have exited if this is a tail. *) if isTail andalso not defaultExited then returnInstruction(context, defaultTarget, defaultCode) else defaultCode end | codeCases _ = raise InternalError "codeCases: mismatch" val codedCases = codeCases(cases, codeLabels, BlockFlow(IndexedBr codeLabels) :: BlockSimple(IndexedCaseOperation{testReg=testReg}) :: rangeCheck) (* We can now copy to the target. If we need to push the result this load will be converted into a push. *) val target = asTarget destination val copyToTarget = if isTail then codedCases else moveRegister{source=targetReg, dest=target} :: BlockLabel labelForExit :: codedCases in (copyToTarget, target, isTail (* We have always exited on a tail. *)) end | codeToICodeRev(BICBeginLoop {loop, arguments}, context as { stackPtr, currHandler, overflowBlock, ...}, isTail, destination, tailCode) = let val target = asTarget destination fun codeArgs ([], tailCode) = ([], tailCode) | codeArgs (({value, addr}, _) :: rest, tailCode) = let val pr = newPReg() val () = Array.update(locToPregArray, addr, PregLocation pr) val (code, _, _) = codeToICodeRev(value, context, false, SpecificPReg pr, tailCode) val (pregs, othercode) = codeArgs(rest, code) in (pr::pregs, othercode) end val (loopRegs, argCode) = codeArgs(arguments, tailCode) val loopLabel = newLabel() val (loopBody, _, loopExited) = codeToICodeRev(loop, {loopArgs=SOME (loopRegs, loopLabel, stackPtr), stackPtr=stackPtr, currHandler=currHandler, overflowBlock=overflowBlock }, isTail, SpecificPReg target, BlockLabel loopLabel :: BlockSimple BeginLoop :: argCode) in (loopBody, target, loopExited) end | codeToICodeRev(BICLoop args, context as {loopArgs=SOME (loopRegs, loopLabel, loopSp), stackPtr, currHandler, ...}, _, destination, tailCode) = let val target = asTarget destination (* Registers to receive the evaluated arguments. We can't put the values into the loop variables yet because the values could depend on the current values of the loop variables. *) val argPRegs = map(fn _ => newPReg()) args val codeArgs = ListPair.foldlEq(fn ((arg, _), pr, l) => #1 (codeToICodeRev(arg, context, false, SpecificPReg pr, l))) tailCode (args, argPRegs) val jumpArgs = ListPair.mapEq(fn (s, l) => {src=ArgInReg s, dst=l}) (argPRegs, loopRegs) (* If we've allocated a container in the loop we have to remove it before jumping back. *) val stackReset = if loopSp = stackPtr then codeArgs else resetStackPtr{numWords=stackPtr-loopSp} :: codeArgs val jumpLoop = JumpLoop{regArgs=jumpArgs, stackArgs=[], checkInterrupt=SOME[]} (* "checkInterrupt" could result in a Interrupt exception so we treat this like a function call. *) val code = case currHandler of NONE => BlockFlow(Unconditional loopLabel) :: BlockSimple jumpLoop :: stackReset | SOME h => BlockOptionalHandle{call=jumpLoop, handler=h, label=loopLabel} :: stackReset in (code, target, true) end | codeToICodeRev(BICLoop _, {loopArgs=NONE, ...}, _, _, _) = raise InternalError "BICLoop without BICBeginLoop" (* Copy the source tuple into the container. There are important special cases for both the source tuple and the container. If the source tuple is a BICTuple we have the fields and can store them without creating a tuple on the heap. If the destination is a local container we can store directly into the stack. *) | codeToICodeRev(BICSetContainer{container, tuple, filter}, context as {stackPtr, ...}, _, destination, tailCode) = let local fun createStore containerReg (source, destWord, tail) = storeAtWordOffset(source, destWord, containerReg, Load64, tail) in val (codeContainer, storeInstr) = case getContainerIfPresent container of ContainerOnStack{container, stackOffset} => let fun store(source, destWord, tail) = storeToStack{source=source, container=container, field=destWord, stackOffset=stackPtr-stackOffset+destWord} :: tail in (tailCode, store) end | ContainerInRegs regs => let fun copy(source, destWord, tail) = tail <::> moveRegister{source=source, dest=List.nth(regs, destWord)} in (tailCode, copy) end | NoContainer => let val containerTarget = newPReg() val (codeContainer, _, _) = codeToICodeRev(container, context, false, SpecificPReg containerTarget, tailCode) in (codeContainer, createStore containerTarget) end end val filterLength = BoolVector.length filter val code = case tuple of BICTuple cl => let (* In theory it's possible that the tuple could contain fields that are not used but nevertheless need to be evaluated for their side-effects. Create all the fields and push to the stack. *) fun codeField(arg, (regs, tailCode)) = let val (c, r, _) = codeToICodeRev(arg, context, false, AnyReg, tailCode) in (r :: regs, c) end val (pregsRev, codeFields) = List.foldl codeField ([], codeContainer) cl val pregs = List.rev pregsRev fun copyField(srcReg, (sourceWord, destWord, tailCode)) = if sourceWord < filterLength andalso BoolVector.sub(filter, sourceWord) then (sourceWord+1, destWord+1, storeInstr(srcReg, destWord, tailCode)) else (sourceWord+1, destWord, tailCode) val (_, _, resultCode) = List.foldl copyField (0, 0, codeFields) pregs in resultCode end | tuple => let (* Copy a heap tuple. It is possible that this is another container in which case we must load the fields directly. We mustn't load its address and then copy because loading the address would be the last reference and might cause the container to be reused prematurely. ??? Is that an old comment ?? *) val (codeTuple, loadField) = case getContainerIfPresent tuple of ContainerOnStack {container, stackOffset} => let fun getAddr(destReg, sourceWord, tail) = loadStack{dest=destReg, wordOffset=stackPtr-stackOffset+sourceWord, container=container, field=sourceWord} :: tail in (codeContainer, getAddr) end | ContainerInRegs regs => let fun copyReg(destReg, sourceWord, tail) = tail <::> moveRegister{dest=destReg, source=List.nth(regs, sourceWord)} in (codeContainer, copyReg) end | NoContainer => let val (codeTuple, tupleTarget, _) = codeToICodeRev(tuple, context, false, AnyReg, codeContainer) fun loadField(destReg: preg, sourceWord: int, tail): blockStruct list = let val (code, _, _) = wordAddressOffset(SpecificPReg destReg, tupleTarget, sourceWord, polyWordLoadSize, tail) in code end in (codeTuple, loadField) end fun copyContainer(sourceWord, destWord, tailCode) = if sourceWord = filterLength then tailCode else if BoolVector.sub(filter, sourceWord) then let val loadReg = newPReg() val code = storeInstr(loadReg, destWord, loadField(loadReg, sourceWord, tailCode)) in copyContainer(sourceWord+1, destWord+1, code) end else copyContainer(sourceWord+1, destWord, tailCode) in copyContainer(0, 0, codeTuple) end in returnUnit(destination, code, false) end | codeToICodeRev(BICLoadContainer{base, offset}, context as {stackPtr, ...}, _, destination, tailCode) = ( case getContainerIfPresent base of ContainerOnStack {container, stackOffset} => let (* If this is a local container we extract the field. *) val target = asTarget destination val finalOffset = stackPtr-stackOffset+offset val _ = finalOffset >= 0 orelse raise InternalError "offset" in (BlockSimple(LoadStack{wordOffset=finalOffset, container=container, field=offset, dest=target}) :: tailCode, target, false) end | NoContainer => let val (codeBase, baseEntry, _) = codeToICodeRev(base, context, false, AnyReg, tailCode) in wordAddressOffset(destination, baseEntry, offset, Load64, codeBase) end | ContainerInRegs regs => let (* Always copy this into a new register because the source will be a merge reg. *) val target = asTarget destination in (moveRegister{source=List.nth(regs, offset), dest=target} :: tailCode, target, false) end ) | codeToICodeRev(BICLoadOperation{ kind, address}, context, _, destination, tailCode) = codeLoadOperation(kind, address, context, asTarget destination, tailCode) | codeToICodeRev(BICStoreOperation{ kind, address, value}, context, _, destination, tailCode) = codeStoreOperation(kind, address, value, context, destination, tailCode) | codeToICodeRev(BICBlockOperation{ kind=BlockOpMove{isByteMove}, sourceLeft, destRight, length }, context, _, destination, tailCode) = (* Assume these are copying immutable data i.e. vector to vector and string to string. The simplifier now assumes that when optimising short constant moves e.g. concatenating with a constant string. *) let (* Move bytes or words from the source to the destination. Need to get the start addresses and length into new registers because they will be modified. *) val (leftAddr, codeLft) = addressToPregAddress(sourceLeft, context, tailCode) val (rightAddr, codeRt) = addressToPregAddress(destRight, context, codeLft) val (codeLength, lengthReg, _) = codeToICodeRev(length, context, false, AnyReg, codeRt) val loadOp = if isByteMove then Load8 else if is32in64 then Load32 else Load64 (* This threads the calls through two calls to loadAndStoreWithAbsolute to compute the addresses. *) fun getDestAndMove(ltReg, tailCode) = let fun doMove (rtReg, code) = let val lengthReg2 = newUReg() and ltReg2 = newUReg() and rtReg2 = newUReg() in blockMove{ srcAddr=ltReg2, destAddr=rtReg2, length=lengthReg2, isByteMove=isByteMove } :: moveRegister{dest=rtReg2, source=rtReg} :: moveRegister{dest=ltReg2, source=ltReg} :: untagValue{dest=lengthReg2, source=lengthReg, isSigned=false, opSize=polyWordOpSize} :: code end in loadAndStoreWithAbsolute (rightAddr, opWordSize loadOp, loadShift loadOp, doMove, tailCode) end in returnUnit(destination, loadAndStoreWithAbsolute (leftAddr, opWordSize loadOp, loadShift loadOp, getDestAndMove, codeLength), false) end | codeToICodeRev(BICBlockOperation{ kind=BlockOpEqualByte, sourceLeft, destRight, length }, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() (* Compare bytes for equality. Need to get the start addresses and length into new registers because they will be modified. *) val (leftAddr, codeLft) = addressToPregAddress(sourceLeft, context, tailCode) val (rightAddr, codeRt) = addressToPregAddress(destRight, context, codeLft) val (codeLength, lengthReg, _) = codeToICodeRev(length, context, false, AnyReg, codeRt) (* This threads the calls through two calls to loadAndStoreWithAbsolute to compute the addresses. *) fun getRightAndCompare(ltReg, tailCode) = let fun doComparison (rtReg, code) = let val lengthReg2 = newUReg() and ltReg2 = newUReg() and rtReg2 = newUReg() in compareByteVectors{ vec1Addr=ltReg2, vec2Addr=rtReg2, length=lengthReg2, ccRef=ccRef } :: moveRegister{dest=rtReg2, source=rtReg} :: moveRegister{dest=ltReg2, source=ltReg} :: untagValue{dest=lengthReg2, source=lengthReg, isSigned=false, opSize=polyWordOpSize} :: code end in loadAndStoreWithAbsolute (rightAddr, opWordSize Load8, loadShift Load8, doComparison, tailCode) end val testCode = loadAndStoreWithAbsolute (leftAddr, opWordSize Load8, loadShift Load8, getRightAndCompare, codeLength) in (makeBoolResultRev(CondEqual, ccRef, target, testCode), target, false) end | codeToICodeRev(BICBlockOperation{ kind=BlockOpCompareByte, sourceLeft, destRight, length }, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() (* Similar to OpEqualByte except it returns -1, 0, +1 depending on the condition code. *) (* Compare bytes for equality. Need to get the start addresses and length into new registers because they will be modified. *) val (leftAddr, codeLft) = addressToPregAddress(sourceLeft, context, tailCode) val (rightAddr, codeRt) = addressToPregAddress(destRight, context, codeLft) val (codeLength, lengthReg, _) = codeToICodeRev(length, context, false, AnyReg, codeRt) (* This threads the calls through two calls to loadAndStoreWithAbsolute to compute the addresses. *) fun getRightAndCompare(ltReg, tailCode) = let fun doComparison (rtReg, code) = let val lengthReg2 = newUReg() and ltReg2 = newUReg() and rtReg2 = newUReg() val exitLab = newLabel() and labGreater = newLabel() and labNotGreater = newLabel() and labLess = newLabel() and labNotLess = newLabel() val mergeResult = newMergeReg() val taggedMinus1 = if is32in64 then 0wxffffffff else 0wxffffffffffffffff in (* Compare the words then a series of comparisons to set the result. TODO; The old code-generator makes the "equal" exit of compareByteVectors jump directly to code to set the result to zero. It then uses loadNonAddress(X0, Word64.fromInt(tag 1)) followed by conditionalSetInverted{regD=X0, regTrue=X0, regFalse=XZero, cond=CondUnsignedHigher} to set the result to one or minus one. N.B. This needs to use a 32-bit operation on 32-in-64. *) moveRegister{dest=target, source=mergeResult} :: BlockLabel exitLab :: loadNonAddressConstant{source=taggedWord64 0w1, dest=mergeResult} :: BlockLabel labGreater :: BlockFlow(Unconditional exitLab) :: loadNonAddressConstant{source=taggedMinus1, dest=mergeResult} :: BlockLabel labLess :: BlockFlow(Unconditional exitLab) :: loadNonAddressConstant{source=taggedWord64 0w0, dest=mergeResult} :: BlockLabel labNotGreater :: BlockFlow(Conditional{ ccRef=ccRef, condition=CondUnsignedHigher, trueJump=labGreater, falseJump=labNotGreater }) :: BlockLabel labNotLess :: BlockFlow(Conditional{ ccRef=ccRef, condition=CondCarryClear, trueJump=labLess, falseJump=labNotLess }) :: compareByteVectors{ vec1Addr=ltReg2, vec2Addr=rtReg2, length=lengthReg2, ccRef=ccRef } :: moveRegister{dest=rtReg2, source=rtReg} :: moveRegister{dest=ltReg2, source=ltReg} :: untagValue{dest=lengthReg2, source=lengthReg, isSigned=false, opSize=polyWordOpSize} :: code end in loadAndStoreWithAbsolute (rightAddr, opWordSize Load8, loadShift Load8, doComparison, tailCode) end val testCode = loadAndStoreWithAbsolute (leftAddr, opWordSize Load8, loadShift Load8, getRightAndCompare, codeLength) in (testCode, target, false) end | codeToICodeRev(BICAllocateWordMemory {numWords, flags, initial }, context, _, destination, tailCode) = let (* Allocate a block of memory and initialise it. *) val target = asTarget destination val (codeSize, sizeReg, _) = codeToICodeRev(numWords, context, false, AnyReg, tailCode) val (codeFlags, flagsReg, _) = codeToICodeRev(flags, context, false, AnyReg, codeSize) val (codeInit, initReg, _) = codeToICodeRev(initial, context, false, AnyReg, codeFlags) val uSizeReg = newUReg() and shiftFReg = newUReg() and lengthWord = newUReg() val absAddr = if is32in64 then newUReg() else target val untagSize = untagValue{source=sizeReg, dest=uSizeReg, opSize=polyWordOpSize, isSigned=false} :: codeInit val allocateMem = allocateMemoryVariable{ size=uSizeReg, dest=absAddr, saveRegs=[]} :: untagSize (* Make the length word by first shifting the flags into the length word reg by 55 or 23 bits. This puts the tag bit in the top bit of the size. Then insert the size into this which will overwrite the flag's tag bit. *) val makeLengthWord = bitFieldInsert{ source=uSizeReg, destAsSource=shiftFReg, dest=lengthWord, length=polyWordOpSize, immr=0w0 (*bit 0*), imms=if is32in64 then 0w23 else 0w55 (*width-1*) } :: shiftConstant{direction=Arm64ICode.ShiftLeft, dest=shiftFReg, source=flagsReg, shift=if is32in64 then 0w23 else 0w55, opSize=polyWordOpSize } :: allocateMem val setLengthWordAndInit = initialiseMem{ size=uSizeReg, addr=absAddr, init=initReg} :: storeWithConstantOffset{ source=lengthWord, base=absAddr, byteOffset= ~(Word.toInt wordSize), loadType=polyWordLoadSize } :: makeLengthWord val finalCode = if is32in64 then absoluteToObjectIndex{ source=absAddr, dest=target } :: setLengthWordAndInit else setLengthWordAndInit in (finalCode, target, false) end | codeToICodeRev(BICHandle{exp, handler, exPacketAddr}, context as { stackPtr, loopArgs, overflowBlock, ... }, isTail, destination, tailCode) = let (* As with BICCond and BICCase we need to create a new register for the result in case we need to push it to the stack. *) val handleResult = newMergeReg() val handlerLab = newLabel() and startHandling = newLabel() val (bodyTarget, handlerTarget) = if isTail then (newPReg(), newPReg()) else (handleResult, handleResult) (* TODO: Even if we don't actually want a result we force one in here by using "asTarget". *) (* The expression cannot be treated as a tail because the handler has to be removed after. It may "exit" if it has raised an unconditional exception. If it has we mustn't generate a PopExceptionHandler because there won't be any result for resultReg. We need to add two words to the stack to account for the items pushed by PushExceptionHandler. We create an instruction to push the handler followed by a block fork to the start of the code and, potentially the handler, then a label to start the code that the handler is in effect for. *) val initialCode = BlockLabel startHandling :: BlockFlow(SetHandler{handler=handlerLab, continue=startHandling}) :: BlockSimple(PushExceptionHandler) :: tailCode val (expCode, _, expExit) = codeToICodeRev(exp, {stackPtr=stackPtr+2, loopArgs=loopArgs, currHandler=SOME handlerLab, overflowBlock=overflowBlock}, false (* Not tail *), SpecificPReg bodyTarget, initialCode) (* If this is the tail we can replace the jump at the end of the handled code with returns. If the handler has exited we don't need a return there. Otherwise we need to add an unconditional jump to skip the handler. *) val (atExpEnd, skipExpLabel) = case (isTail, expExit) of (true, true) => (* Tail and exited. *) (expCode, NONE) | (true, false) => (* Tail and not exited. *) (returnInstruction(context, bodyTarget, BlockSimple(PopExceptionHandler) :: expCode), NONE) | (false, true) => (* Not tail but exited. *) (expCode, NONE) | (false, false) => let val skipHandler = newLabel() in (BlockFlow(Unconditional skipHandler) :: BlockSimple(PopExceptionHandler) :: expCode, SOME skipHandler) end (* Make a register to hold the exception packet and put eax into it. *) val packetAddr = newPReg() val () = Array.update(locToPregArray, exPacketAddr, PregLocation packetAddr) val (handleCode, _, handleExit) = codeToICodeRev(handler, context, isTail, SpecificPReg handlerTarget, BlockSimple(BeginHandler{packetReg=packetAddr}) :: BlockLabel handlerLab :: atExpEnd) val target = asTarget destination val afterHandler = case (isTail, handleExit) of (true, true) => (* Tail and exited. *) handleCode | (true, false) => (* Tail and not exited. *) returnInstruction(context, handlerTarget, handleCode) | (false, _) => (* Not tail. *) handleCode val addLabel = case skipExpLabel of SOME lab => BlockLabel lab:: afterHandler | NONE => afterHandler in (moveRegister{source=handleResult, dest=target} :: addLabel, target, isTail) end and codeConditionRev(condition, context, jumpOn, jumpLabel, tailCode) = (* Jump optimisation is done later. Just generate the general case. Load the value into a register and compare it with 1 (true) *) let val ccRef = newCCRef() val (testCode, testReg, _) = codeToICodeRev(condition, context, false, AnyReg, tailCode) val noJumpLabel = newLabel() in BlockLabel noJumpLabel :: BlockFlow(Conditional{ccRef=ccRef, condition=if jumpOn then CondEqual else CondNotEqual, trueJump=jumpLabel, falseJump=noJumpLabel}) :: (* Compare: SUBS XZ,reg,3. Can use 32-bit comparison because it's either tagged 0 or tagged 1. *) addSubImmediate{source=testReg, immed=taggedWord 0w1, isAdd=false, dest=ZeroReg, length=OpSize32, ccRef=SOME ccRef} :: testCode end and codeToICodeUnaryRev({oper=NotBoolean, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() val (argCode, testDest, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) in (* Test the argument and return a boolean result. If either the argument is a condition or the result is used in a test this will be better than using XOR. *) (makeBoolResultRev(CondNotEqual, ccRef, target, addSubImmediate{source=testDest, immed=taggedWord 0w1, isAdd=false, dest=ZeroReg, length=OpSize32 (* Always either tagged 0 or tagged 1 *), ccRef=SOME ccRef} :: argCode), target, false) end | codeToICodeUnaryRev({oper=IsTaggedValue, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() val (argCode, testDest, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) in (* Test the argument and return a boolean result. This ought to be optimised at a lower level to use a test-and-branch. *) (makeBoolResultRev(CondNotEqual, ccRef, target, logicalImmediate{source=testDest, immed=0w1 (* The tag bit*), logOp=LogAnd, dest=ZeroReg, length=OpSize32 (* Always either tagged 0 or tagged 1 *), ccRef=SOME ccRef} :: argCode), target, false) end | codeToICodeUnaryRev({oper=MemoryCellLength, arg1}, context, _, destination, tailCode) = let val ureg1 = newUReg() and ureg2 = newUReg() val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) (* Load the word at -1 (words) into a ureg *) val (codeLoad, _, _) = wordAddressOffset(SpecificPReg ureg1, baseReg, ~1, polyWordLoadSize, codeBase) (* Select 56 or 24 bits and shift it left. This disassembles as UBFIZ..*) val lsb = 0w1 and width = if is32in64 then 0w24 else 0w56 (* Encoding for unsignedBitfieldInsertinZeros64/32 *) val immr = if is32in64 then Word.~ lsb mod 0w32 else Word.~ lsb mod 0w64 val imms = width-0w1 val maskAndShift = bitFieldShift{source=ureg1, dest=ureg2, isSigned=false, length=polyWordOpSize, immr=immr, imms=imms} :: codeLoad val target = asTarget destination val addTag = addSubImmediate{dest=SomeReg target, source=ureg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} :: maskAndShift in (addTag, target, false) end | codeToICodeUnaryRev({oper=MemoryCellFlags, arg1}, context, _, destination, tailCode) = let (* Load the flags byte and tag it. *) val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (codeRealBase, realBaseReg) = getAbsoluteAddress(codeBase, baseReg) val ureg = newUReg() val codeLoad = loadWithConstantOffset{ base=realBaseReg, dest=ureg, byteOffset=flagsByteOffset, loadType=Load8 } :: codeRealBase val target = asTarget destination val withTag = tagValue{ source=ureg, dest=target, isSigned=false, opSize=OpSize32 } :: codeLoad in (withTag, target, false) end | codeToICodeUnaryRev({oper=ClearMutableFlag, arg1}, context, _, destination, tailCode) = let val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (codeRealBase, realBaseReg) = getAbsoluteAddress(codeBase, baseReg) val ureg1 = newUReg() and ureg2 = newUReg() (* Load the flags, mask off the mutable bit and store it back. *) val code = storeWithConstantOffset{ base=realBaseReg, source=ureg2, byteOffset=flagsByteOffset, loadType=Load8 } :: logicalImmediate{ source=ureg1, dest=SomeReg ureg2, ccRef=NONE, immed=Word64.xorb(0wxffffffff, 0wx40), logOp=LogAnd, length=OpSize32 } :: loadWithConstantOffset{ base=realBaseReg, dest=ureg1, byteOffset=flagsByteOffset, loadType=Load8 } :: codeRealBase in returnUnit(destination, code, false) end | codeToICodeUnaryRev({oper=LongWordToTagged, arg1}, context, _, destination, tailCode) = let val (codeBase, baseReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg = newUReg() val target = asTarget destination val code = tagValue{ source=uReg, dest=target, isSigned=false, opSize=polyWordOpSize } :: unboxLarge{ source=baseReg, dest=uReg } :: codeBase in (code, target, false) end | codeToICodeUnaryRev({oper=SignedToLongWord, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg = newUReg() val target = asTarget destination (* We can use a single instruction here on both 32-in-64 and native 64-bits. On 64-bits this is equivalent to an arithmetic shift; on 32-bits it propagates the sign bit into the high-order part. *) val code = boxLarge{ source=uReg, dest=target, saveRegs=[] } :: bitFieldShift{ source=aReg1, dest=uReg, isSigned=true, length=OpSize64, immr=0w1, imms=if is32in64 then 0wx1f else 0wx3f } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=UnsignedToLongWord, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg = newUReg() val target = asTarget destination (* This amounts to a logical shift. Since the top half of the register is zero in 32-in-64 we don't have to select just the low word but there's no advantage in not. *) val code = boxLarge{ source=uReg, dest=target, saveRegs=[] } :: bitFieldShift{ source=aReg1, dest=uReg, isSigned=false, length=OpSize64, immr=0w1, imms=if is32in64 then 0wx1f else 0wx3f } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=RealAbs precision, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg1 = newUReg() and uReg2 = newUReg() val target = asTarget destination val fpSize = precisionToFpSize precision val fpOp = case precision of PrecSingle => AbsFloat | PrecDouble => AbsDouble val code = boxTagFloat{ floatSize=fpSize, source=uReg2, dest=target, saveRegs=[] } :: unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=fpOp } :: unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=RealNeg precision, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg1 = newUReg() and uReg2 = newUReg() val target = asTarget destination val fpSize = precisionToFpSize precision val fpOp = case precision of PrecSingle => NegFloat | PrecDouble => NegDouble val code = boxTagFloat{ floatSize=fpSize, source=uReg2, dest=target, saveRegs=[] } :: unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=fpOp } :: unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=RealFixedInt precision, arg1}, context, _, destination, tailCode) = let (* Convert a tagged integer (FixedInt.int) to float or double. *) val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg1 = newUReg() and uReg2 = newUReg() val target = asTarget destination val fpSize = precisionToFpSize precision val code = boxTagFloat{ floatSize=fpSize, source=uReg2, dest=target, saveRegs=[] } :: convertIntToFloat{ source=uReg1, dest=uReg2, srcSize=polyWordOpSize, destSize=fpSize } :: untagValue{ source=aReg1, dest=uReg1, opSize=polyWordOpSize, isSigned=true } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=FloatToDouble, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg1 = newUReg() and uReg2 = newUReg() val target = asTarget destination val code = boxTagFloat{ floatSize=Double64, source=uReg2, dest=target, saveRegs=[] } :: unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=ConvFloatToDble } :: unboxTagFloat{ floatSize=Float32, source=aReg1, dest=uReg1 } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=DoubleToFloat, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg1 = newUReg() and uReg2 = newUReg() val target = asTarget destination val code = boxTagFloat{ floatSize=Float32, source=uReg2, dest=target, saveRegs=[] } :: unaryFloatingPt{ source=uReg1, dest=uReg2, fpOp=ConvDbleToFloat } :: unboxTagFloat{ floatSize=Double64, source=aReg1, dest=uReg1 } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=RealToInt(precision, rounding), arg1}, context, _, destination, tailCode) = let (* Convert a float or double to a tagged int. We could get an overflow in either the conversion to integer or in the conversion to a tagged value. Fortunately if the conversion detects an overflow it sets the result to a value that will cause an overflow in the addition. *) val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val target = asTarget destination val chkOverflow = newCCRef() val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val fpSize = precisionToFpSize precision val code = (* Set the tag bit. *) addSubImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} :: checkOverflow(CondOverflow, context, chkOverflow) @ (* Add it to itself and set the condition code. *) addSubRegister{base=uReg2, shifted=uReg2, dest=SomeReg uReg3, length=polyWordOpSize, ccRef=SOME chkOverflow, isAdd=true, shift=ShiftNone} :: convertFloatToInt{ source=uReg1, dest=uReg2, srcSize=fpSize, destSize=polyWordOpSize, rounding=rounding } :: unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=TouchAddress, arg1}, context, _, destination, tailCode) = let val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) in returnUnit(destination, touchValue{source=aReg1} :: arg1Code, false) end | codeToICodeUnaryRev({oper=AllocCStack, arg1}, context, _, destination, tailCode) = let (* Allocate space on the stack. The higher levels have already aligned the size to a multiple of 16. The number of bytes to allocate is a Word.word value. The result is a boxed large word. *) val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val uReg1 = newUReg() and uReg2 = newUReg() val target = asTarget destination val code = boxLarge{ source=uReg2, dest=target, saveRegs=[] } :: addSubXSP{ source=uReg1, dest=SomeReg uReg2, isAdd=false } :: untagValue{ source=aReg1, dest=uReg1, isSigned=false, opSize=polyWordOpSize } :: arg1Code in (code, target, false) end | codeToICodeUnaryRev({oper=LockMutex, arg1}, context, _, destination, tailCode) = (* The earliest versions of the Arm8 do not have the LDADD instruction which will do this directly. To preserve compatibility we use LDAXR/STLXR which require a loop. *) let local val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) in val (baseCode, baseReg) = getAbsoluteAddress(arg1Code, aReg1) end val loopLabel = newLabel() and noLoopLabel = newLabel() val target = asTarget destination val ccRef1 = newCCRef() and ccRef2 = newCCRef() val uRegNew = newUReg() and uRegTest = newUReg() and uRegOld = newUReg() (* N.B. in reverse order. *) val code = (* The result is true if the old value was zero. *) addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize64, ccRef=SOME ccRef2} :: memoryBarrier :: (* Put in the memory barrier. *) (* If the result is zero we've been successful otherwise we loop. *) BlockLabel noLoopLabel :: BlockFlow(Conditional{ ccRef=ccRef1, condition=CondNotEqual, trueJump=loopLabel, falseJump=noLoopLabel }) :: addSubImmediate{source=uRegTest, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize32, ccRef=SOME ccRef1} :: (* Add and try to store the result *) storeReleaseExclusive{ base=baseReg, source=SomeReg uRegNew, result=uRegTest } :: addSubImmediate{source=uRegOld, dest=SomeReg uRegNew, immed=0w1, isAdd=true, length=OpSize64, ccRef=NONE} :: loadAcquireExclusive{ base=baseReg, dest=uRegOld } :: BlockLabel loopLabel :: baseCode in (makeBoolResultRev(CondEqual, ccRef2, target, code), target, false) end | codeToICodeUnaryRev({oper=TryLockMutex, arg1}, context, _, destination, tailCode) = (* *) let (* Could use LDUMAXAL to set it the greater of the current value or 1. *) local val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) in val (baseCode, baseReg) = getAbsoluteAddress(arg1Code, aReg1) end val loopLabel = newLabel() and noLoopLabel = newLabel() and okLabel = newLabel() val target = asTarget destination val ccRef0 = newCCRef() and ccRef1 = newCCRef() and ccRef2 = newCCRef() val uRegNew = newUReg() and uRegTest = newUReg() and uRegOld = newUReg() val code = (* The result is true if the old value was zero. *) addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize64, ccRef=SOME ccRef2} :: memoryBarrier :: (* Put in the memory barrier. *) (* If the result is zero we've been successful otherwise we loop. *) BlockLabel noLoopLabel :: BlockFlow(Conditional{ ccRef=ccRef1, condition=CondNotEqual, trueJump=loopLabel, falseJump=noLoopLabel }) :: addSubImmediate{source=uRegTest, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize32, ccRef=SOME ccRef1} :: (* If the lock wasn't taken set it to one to lock it. *) storeReleaseExclusive{ base=baseReg, source=SomeReg uRegNew, result=uRegTest } :: loadNonAddressConstant{source=0w1, dest=uRegNew } :: BlockLabel okLabel :: (* If it's not zero don't try to store anything back and exit the loop. *) BlockFlow(Conditional{ ccRef=ccRef0, condition=CondNotEqual, trueJump=noLoopLabel, falseJump=okLabel }) :: addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize64, ccRef=SOME ccRef0} :: (* Get the old value and see if it's zero i.e. unlocked. *) loadAcquireExclusive{ base=baseReg, dest=uRegOld } :: BlockLabel loopLabel :: baseCode in (makeBoolResultRev(CondEqual, ccRef2, target, code), target, false) end | codeToICodeUnaryRev({oper=UnlockMutex, arg1}, context, _, destination, tailCode) = (* Get the previous value of the mutex to see if another thread had tried to lock it and set the result to zero. *) let (* Could use SWAPAL *) local val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) in val (baseCode, baseReg) = getAbsoluteAddress(arg1Code, aReg1) end val loopLabel = newLabel() and noLoopLabel = newLabel() val target = asTarget destination val ccRef1 = newCCRef() and ccRef2 = newCCRef() val uRegTest = newUReg() and uRegOld = newUReg() val code = (* The result is true if the old value was one. i.e. we were the only thread that locked it. *) addSubImmediate{source=uRegOld, dest=ZeroReg, immed=0w1, isAdd=false, length=OpSize64, ccRef=SOME ccRef2} :: memoryBarrier :: (* Put in the memory barrier. *) (* If the result is zero we've been successful otherwise we loop. *) BlockLabel noLoopLabel :: BlockFlow(Conditional{ ccRef=ccRef1, condition=CondNotEqual, trueJump=loopLabel, falseJump=noLoopLabel }) :: addSubImmediate{source=uRegTest, dest=ZeroReg, immed=0w0, isAdd=false, length=OpSize32, ccRef=SOME ccRef1} :: (* Try to set this to zero *) storeReleaseExclusive{ base=baseReg, source=ZeroReg, result=uRegTest } :: loadAcquireExclusive{ base=baseReg, dest=uRegOld } :: BlockLabel loopLabel :: baseCode in (makeBoolResultRev(CondEqual, ccRef2, target, code), target, false) end and codeToICodeBinaryRev({oper=WordComparison{test, isSigned}, arg1, arg2}, context, _, destination, tailCode) = let (* Comparisons. This is now only used for tagged values, not for pointer equality. *) val ccRef = newCCRef() val (testCode1, testDest1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (testCode2, testDest2, _) = codeToICodeRev(arg2, context, false, AnyReg, testCode1) val comparison = addSubRegister{base=testDest1, shifted=testDest2, dest=ZeroReg, length=polyWordOpSize, ccRef=SOME ccRef, isAdd=false, shift=ShiftNone} :: testCode2 val target = asTarget destination open BuiltIns val cond = case (test, isSigned) of (TestEqual, _) => CondEqual | (TestLess, true) => CondSignedLess | (TestLessEqual, true) => CondSignedLessEq | (TestGreater, true) => CondSignedGreater | (TestGreaterEqual, true) => CondSignedGreaterEq | (TestLess, false) => CondCarryClear | (TestLessEqual, false) => CondUnsignedLowOrEq | (TestGreater, false) => CondUnsignedHigher | (TestGreaterEqual, false) => CondCarrySet | (TestUnordered, _) => raise InternalError "WordComparison: TestUnordered" in (makeBoolResultRev(cond, ccRef, target, comparison), target, false) end | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithAdd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) (* We need to subtract the tag from one of the arguments and then do the addition. The optimiser will do the subtraction at compile time if we subtract from a constant so try to put the constant in the second arg. *) val (firstReg, secondReg) = case arg1 of BICConstnt _ => (aReg2, aReg1) | _ => (aReg1, aReg2) val uReg = newUReg() val chkOverflow = newCCRef() val code = checkOverflow(CondOverflow, context, chkOverflow) @ addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg target, length=polyWordOpSize, ccRef=SOME chkOverflow, isAdd=true, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithSub, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg = newUReg() val chkOverflow = newCCRef() val code = checkOverflow(CondOverflow, context, chkOverflow) @ addSubRegister{base=aReg1, shifted=uReg, dest=SomeReg target, length=polyWordOpSize, ccRef=SOME chkOverflow, isAdd=false, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithMult, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg() val chkOverflow = newCCRef() (* Untag one argument. subtract the tag from the second, multiply and add back the tag. *) val multiplyCode = addSubImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} :: multiplication{kind=if is32in64 then SignedMultAddLong else MultAdd64, dest=uReg3, sourceA=ZeroReg, sourceM=uReg1, sourceN=uReg2} :: addSubImmediate{dest=SomeReg uReg2, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} :: untagValue{ source=aReg1, dest=uReg1, isSigned=true, opSize=polyWordOpSize } :: arg2Code (* Overflow check: The condition for overflow is that the high order part (64-bits in native 64-bits, 32-bits in 32-in-64) must be zero if the result is positive and all ones if the result is negative. The high-order part is in uReg3 in 32-in-64 since we've already used SignedMultAddLong but in native 64-bits we need to use SignedMultHigh to get the high order part. In both cases we can use a comparison with ShiftASR to give a value containing just the sign of the result. *) val checkOverflowCode = if is32in64 then addSubRegister{ base=uReg4, shifted=target, dest=ZeroReg, ccRef=SOME chkOverflow, isAdd=false, length=OpSize32, shift=ShiftASR 0w31 } :: shiftConstant{direction=Arm64ICode.ShiftRightArithmetic, source=uReg3, dest=uReg4, shift=0w32, opSize=OpSize64 (* Have to start with 64-bits *)} :: multiplyCode else addSubRegister{ base=uReg4, shifted=target, dest=ZeroReg, ccRef=SOME chkOverflow, isAdd=false, length=OpSize64, shift=ShiftASR 0w63 } :: multiplication{kind=SignedMultHigh, dest=uReg4, sourceA=ZeroReg, sourceM=uReg1, sourceN=uReg2} :: multiplyCode val code = checkOverflow(CondNotEqual, context, chkOverflow) @ checkOverflowCode in (code, target, false) end | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithQuot, arg1, arg2}, context, _, destination, tailCode) = let (* The word version avoids an extra shift. Don't do that here at least for the moment. Division by zero and overflow are checked for at the higher level. *) val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val code = tagValue { source=uReg3, dest=target, opSize=polyWordOpSize, isSigned=true } :: division{isSigned=true, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} :: untagValue{ source=aReg2, dest=uReg2, isSigned=true, opSize=polyWordOpSize } :: untagValue{ source=aReg1, dest=uReg1, isSigned=true, opSize=polyWordOpSize } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithRem, arg1, arg2}, context, _, destination, tailCode) = let (* For the moment we remove the tags and then retag afterwards. The word version avoids this but at least for the moment we do it the longer way. *) (* There's no direct way to get the remainder - have to use divide and multiply. *) val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg() val code = tagValue { source=uReg4, dest=target, opSize=polyWordOpSize, isSigned=true } :: multiplication{kind=if is32in64 then MultSub32 else MultSub64, dest=uReg4, sourceM=uReg3, sourceN=uReg2, sourceA=SomeReg uReg1} :: division{isSigned=true, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} :: untagValue{ source=aReg2, dest=uReg2, isSigned=true, opSize=polyWordOpSize } :: untagValue{ source=aReg1, dest=uReg1, isSigned=true, opSize=polyWordOpSize } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithDiv, ...}, _, _, _, _) = raise InternalError "unimplemented operation: FixedPrecisionArith ArithDiv" | codeToICodeBinaryRev({oper=FixedPrecisionArith ArithMod, ...}, _, _, _, _) = raise InternalError "unimplemented operation: FixedPrecisionArith ArithMod" | codeToICodeBinaryRev({oper=WordArith ArithAdd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) (* We need to subtract the tag from one of the arguments and then do the addition. The optimiser will do the subtraction at compile time if we subtract from a constant so try to put the constant in the second arg. *) val (firstReg, secondReg) = case arg1 of BICConstnt _ => (aReg2, aReg1) | _ => (aReg1, aReg2) val uReg = newUReg() val code = addSubRegister{base=firstReg, shifted=uReg, dest=SomeReg target, length=polyWordOpSize, ccRef=NONE, isAdd=true, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=WordArith ArithSub, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg = newUReg() (* TODO: If the first argument is a constant we could add one to that rather than subtracting one from the second argument. We're not concerned with overflow. *) val code = addSubRegister{base=aReg1, shifted=uReg, dest=SomeReg target, length=polyWordOpSize, ccRef=NONE, isAdd=false, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=WordArith ArithMult, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() (* Untag one argument. subtract the tag from the second, multiply and add back the tag. *) val code = addSubImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=true} :: multiplication{kind=if is32in64 then MultAdd32 else MultAdd64, dest=uReg3, sourceA=ZeroReg, sourceM=uReg1, sourceN=uReg2} :: addSubImmediate{dest=SomeReg uReg2, source=aReg2, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} :: untagValue{ source=aReg1, dest=uReg1, isSigned=false, opSize=polyWordOpSize } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=WordArith ArithDiv, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() (* Untag the divisor (into uReg2). subtract the tag from the dividend (into uReg1), divide and or in the tag. The tag may have been set already depending on the result of the division. *) val code = logicalImmediate{dest=SomeReg target, source=uReg3, immed=0w1, length=polyWordOpSize, ccRef=NONE, logOp=LogOr} :: division{isSigned=false, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} :: addSubImmediate{dest=SomeReg uReg1, source=aReg1, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} :: untagValue{ source=aReg2, dest=uReg2, isSigned=false, opSize=polyWordOpSize } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=WordArith ArithMod, arg1, arg2}, context, _, destination, tailCode) = let (* There's no direct way to get the remainder - have to use divide and multiply. *) val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg() (* Untag the divisor (into uReg2). subtract the tag from the dividend (into uReg1) Untag one argument. subtract the tag from the second, divide and or in the tag. The tag may have been set already depending on the result of the division. *) val tagBitMask = Word64.<<(Word64.fromInt ~1, 0w1) (* Requires a 64-bit AND. *) val code = (* Multiply the result of the division by the divisor and subtract this from the original, tagged dividend. This leaves us a tagged value so it can go straight into the result. *) multiplication{kind=if is32in64 then MultSub32 else MultSub64, dest=target, sourceM=uReg4, sourceN=uReg2, sourceA=SomeReg aReg1} :: (* Clear the bottom bit before the multiplication. *) logicalImmediate{dest=SomeReg uReg4, source=uReg3, immed=tagBitMask, length=OpSize64, ccRef=NONE, logOp=LogAnd} :: division{isSigned=false, opSize=polyWordOpSize, dest=uReg3, dividend=uReg1, divisor=uReg2} :: addSubImmediate{dest=SomeReg uReg1, source=aReg1, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false} :: untagValue{ source=aReg2, dest=uReg2, isSigned=false, opSize=polyWordOpSize } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=WordArith _, ...}, _, _, _, _) = raise InternalError "WordArith - unimplemented instruction" | codeToICodeBinaryRev({oper=WordLogical LogicalAnd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) (* Since both values are tagged the tag will be preserved. *) val code = logicalRegister{base=aReg1, shifted=aReg2, dest=SomeReg target, length=polyWordOpSize, ccRef=NONE, logOp=LogAnd, shift=ShiftNone} :: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=WordLogical LogicalOr, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) (* Since both values are tagged the tag will be preserved. *) val code = logicalRegister{base=aReg1, shifted=aReg2, dest=SomeReg target, length=polyWordOpSize, ccRef=NONE, logOp=LogOr, shift=ShiftNone} :: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=WordLogical LogicalXor, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) (* If we just XOR the values together the tag bit in the result will be zero. It's better to remove one of the tag bits beforehand. As with Add, we try to choose a constant. *) val (firstReg, secondReg) = case arg1 of BICConstnt _ => (aReg2, aReg1) | _ => (aReg1, aReg2) val uReg = newUReg() val code = logicalRegister{base=firstReg, shifted=uReg, dest=SomeReg target, length=polyWordOpSize, ccRef=NONE, logOp=LogXor, shift=ShiftNone} :: addSubImmediate{dest=SomeReg uReg, source=secondReg, immed=0w1, length=polyWordOpSize, ccRef=NONE, isAdd=false}:: arg2Code in (code , target, false) end | codeToICodeBinaryRev({oper=WordShift ShiftLeft, arg1, arg2}, context, _, destination, tailCode) = let val ureg1 = newUReg() and ureg2 = newUReg() and ureg3 = newUReg() val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val code = (* Put back the tag. *) logicalImmediate{ source=ureg3, dest=SomeReg target, ccRef=NONE, immed=0w1, logOp=LogOr, length=polyWordOpSize } :: (* Do the shift *) shiftRegister{direction=Arm64ICode.ShiftLeft, dest=ureg3, source=ureg1, shift=ureg2, opSize=polyWordOpSize} :: (* Untag the shift amount. Since it's at most 64 we can use a 32-bit operation. *) untagValue{source=aReg2, dest=ureg2, opSize=OpSize32, isSigned=false} :: (* Remove tag bit from the value we're shifting. *) logicalImmediate{ source=aReg1, dest=SomeReg ureg1, ccRef=NONE, immed=polyWordTagBitMask, logOp=LogAnd, length=polyWordOpSize } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=WordShift ShiftRightLogical, arg1, arg2}, context, _, destination, tailCode) = let val ureg1 = newUReg() and ureg2 = newUReg() val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val code = (* Put back the tag. *) logicalImmediate{ source=ureg2, dest=SomeReg target, ccRef=NONE, immed=0w1, logOp=LogOr, length=polyWordOpSize } :: (* Do the shift *) shiftRegister{direction=Arm64ICode.ShiftRightLogical, dest=ureg2, source=aReg1, shift=ureg1, opSize=polyWordOpSize} :: (* Untag the shift amount. Since it's at most 64 we can use a 32-bit operation. *) untagValue{source=aReg2, dest=ureg1, opSize=OpSize32, isSigned=false} :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=WordShift ShiftRightArithmetic, arg1, arg2}, context, _, destination, tailCode) = let val ureg1 = newUReg() and ureg2 = newUReg() val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val code = (* Put back the tag. *) logicalImmediate{ source=ureg2, dest=SomeReg target, ccRef=NONE, immed=0w1, logOp=LogOr, length=polyWordOpSize } :: (* Do the shift *) shiftRegister{direction=Arm64ICode.ShiftRightArithmetic, dest=ureg2, source=aReg1, shift=ureg1, opSize=polyWordOpSize} :: (* Untag the shift amount. Since it's at most 64 we can use a 32-bit operation. *) untagValue{source=aReg2, dest=ureg1, opSize=OpSize32, isSigned=false} :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=AllocateByteMemory, arg1, arg2}, context, _, destination, tailCode) = let (* Allocate a block of memory and without initialisation. If the flags include the "bytes" bit the GC won't look at it so it doesn't matter that it's not initialised. This is identical to AllocateWordMemory apart from the lack of initialisation. *) val target = asTarget destination val (codeSize, sizeReg, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (codeFlags, flagsReg, _) = codeToICodeRev(arg2, context, false, AnyReg, codeSize) val uSizeReg = newUReg() and shiftFReg = newUReg() and lengthWord = newUReg() val absAddr = if is32in64 then newUReg() else target val untagSize = untagValue{source=sizeReg, dest=uSizeReg, opSize=polyWordOpSize, isSigned=false} :: codeFlags val allocateMem = allocateMemoryVariable{ size=uSizeReg, dest=absAddr, saveRegs=[]} :: untagSize (* Make the length word by first shifting the flags into the length word reg by 55 or 23 bits. This puts the tag bit in the top bit of the size. Then insert the size into this which will overwrite the flag's tag bit. *) val makeLengthWord = bitFieldInsert{ source=uSizeReg, destAsSource=shiftFReg, dest=lengthWord, length=polyWordOpSize, immr=0w0 (*bit 0*), imms=if is32in64 then 0w23 else 0w55 (*width-1*) } :: shiftConstant{direction=Arm64ICode.ShiftLeft, dest=shiftFReg, source=flagsReg, shift=if is32in64 then 0w23 else 0w55, opSize=polyWordOpSize } :: allocateMem val setLengthWordAndInit = storeWithConstantOffset{ source=lengthWord, base=absAddr, byteOffset= ~(Word.toInt wordSize), loadType=polyWordLoadSize } :: makeLengthWord val finalCode = if is32in64 then absoluteToObjectIndex{ source=absAddr, dest=target } :: setLengthWordAndInit else setLengthWordAndInit in (finalCode, target, false) end | codeToICodeBinaryRev({oper=LargeWordComparison test, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() val (testCode1, testDest1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (testCode2, testDest2, _) = codeToICodeRev(arg2, context, false, AnyReg, testCode1) val uReg1 = newUReg() and uReg2 = newUReg() val comparison = addSubRegister{base=uReg1, shifted=uReg2, dest=ZeroReg, length=OpSize64, ccRef=SOME ccRef, isAdd=false, shift=ShiftNone} :: unboxLarge{ source=testDest2, dest=uReg2 } :: unboxLarge{ source=testDest1, dest=uReg1 } :: testCode2 open BuiltIns val cond = case test of TestEqual => CondEqual | TestLess => CondCarryClear | TestLessEqual => CondUnsignedLowOrEq | TestGreater => CondUnsignedHigher | TestGreaterEqual => CondCarrySet | TestUnordered => raise InternalError "LargeWordComparison: TestUnordered" in (makeBoolResultRev(cond, ccRef, target, comparison), target, false) end | codeToICodeBinaryRev({oper=LargeWordArith ArithAdd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val code = boxLarge{ source=uReg3, dest=target, saveRegs=[] } :: addSubRegister{base=uReg1, shifted=uReg2, dest=SomeReg uReg3, length=OpSize64, ccRef=NONE, isAdd=true, shift=ShiftNone} :: unboxLarge{ source=aReg2, dest=uReg2 } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=LargeWordArith ArithSub, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val code = boxLarge{ source=uReg3, dest=target, saveRegs=[] } :: addSubRegister{base=uReg1, shifted=uReg2, dest=SomeReg uReg3, length=OpSize64, ccRef=NONE, isAdd=false, shift=ShiftNone} :: unboxLarge{ source=aReg2, dest=uReg2 } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=LargeWordArith ArithMult, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val code = boxLarge{ source=uReg3, dest=target, saveRegs=[] } :: multiplication{kind=MultAdd64, sourceA=ZeroReg, sourceM=uReg1, sourceN=uReg2, dest=uReg3} :: unboxLarge{ source=aReg2, dest=uReg2 } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=LargeWordArith ArithDiv, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val code = boxLarge{ source=uReg3, dest=target, saveRegs=[] } :: division{isSigned=false, opSize=OpSize64, dividend=uReg1, divisor=uReg2, dest=uReg3} :: unboxLarge{ source=aReg2, dest=uReg2 } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=LargeWordArith ArithMod, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() and uReg4 = newUReg() val code = boxLarge{ source=uReg4, dest=target, saveRegs=[] } :: multiplication{kind=MultSub64, dest=uReg4, sourceM=uReg3, sourceN=uReg2, sourceA=SomeReg uReg1} :: division{isSigned=false, opSize=OpSize64, dividend=uReg1, divisor=uReg2, dest=uReg3} :: unboxLarge{ source=aReg2, dest=uReg2 } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=LargeWordArith _, ...}, _, _, _, _) = raise InternalError "LargeWordArith - unimplemented instruction" | codeToICodeBinaryRev({oper=LargeWordLogical logop, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val logicalOp = case logop of LogicalAnd => LogAnd | LogicalOr => LogOr | LogicalXor => LogXor val code = boxLarge{ source=uReg3, dest=target, saveRegs=[] } :: logicalRegister{base=uReg1, shifted=uReg2, dest=SomeReg uReg3, length=OpSize64, ccRef=NONE, logOp=logicalOp, shift=ShiftNone} :: unboxLarge{ source=aReg2, dest=uReg2 } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=LargeWordShift shiftKind, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val shiftType = case shiftKind of ShiftLeft => Arm64ICode.ShiftLeft | ShiftRightLogical => Arm64ICode.ShiftRightLogical | ShiftRightArithmetic => Arm64ICode.ShiftRightArithmetic val code = boxLarge{ source=uReg3, dest=target, saveRegs=[] } :: shiftRegister{direction=shiftType, source=uReg1, shift=uReg2, dest=uReg3, opSize=OpSize64 } :: (* The shift amount is a word, not a large word. *) untagValue{ source=aReg2, dest=uReg2, opSize=OpSize32, isSigned=false } :: unboxLarge{ source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=RealComparison(test, precision), arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val fpSize = precisionToFpSize precision val uReg1 = newUReg() and uReg2 = newUReg() (* Floating point comparisons. The fcmp instruction differs from integer comparison. If either argument is a NaN the overflow bit is set and the other bits are cleared. That means that in order to get a true result only if the values are not NaNs we have to test that at least one of C, N, or Z are set. We use unsigned tests for < and <= and signed tests for > and >=. *) val cond = case test of TestEqual => CondEqual | TestLess => CondCarryClear | TestLessEqual => CondUnsignedLowOrEq | TestGreater => CondSignedGreater | TestGreaterEqual => CondSignedGreaterEq | TestUnordered => CondOverflow val code = compareFloatingPoint{arg1=uReg1, arg2=uReg2, ccRef=ccRef, opSize=fpSize} :: unboxTagFloat{ floatSize=fpSize, source=aReg2, dest=uReg2 } :: unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } :: arg2Code in (makeBoolResultRev(cond, ccRef, target, code), target, false) end | codeToICodeBinaryRev({oper=RealArith(oper, precision), arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val fpSize = precisionToFpSize precision val uReg1 = newUReg() and uReg2 = newUReg() and uReg3 = newUReg() val fpOp = case oper of ArithAdd => AddFP | ArithSub => SubtractFP | ArithMult => MultiplyFP | ArithDiv => DivideFP | _ => raise InternalError "RealArith - unimplemented instruction" val code = boxTagFloat{ floatSize=fpSize, source=uReg3, dest=target, saveRegs=[] } :: binaryFloatingPoint{arg1=uReg1, arg2=uReg2, dest=uReg3, fpOp=fpOp, opSize=fpSize } :: unboxTagFloat{ floatSize=fpSize, source=aReg2, dest=uReg2 } :: unboxTagFloat{ floatSize=fpSize, source=aReg1, dest=uReg1 } :: arg2Code in (code, target, false) end | codeToICodeBinaryRev({oper=PointerEq, arg1, arg2}, context, _, destination, tailCode) = let (* Equality of general values which can include pointers. This can be treated exactly as a word equality. It has to be analysed differently for indexed cases. *) val ccRef = newCCRef() val (testCode1, testDest1, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (testCode2, testDest2, _) = codeToICodeRev(arg2, context, false, AnyReg, testCode1) val comparison = addSubRegister{base=testDest1, shifted=testDest2, dest=ZeroReg, length=polyWordOpSize, ccRef=SOME ccRef, isAdd=false, shift=ShiftNone} :: testCode2 val target = asTarget destination in (makeBoolResultRev(CondEqual, ccRef, target, comparison), target, false) end | codeToICodeBinaryRev({oper=FreeCStack, arg1, arg2}, context, _, destination, tailCode) = let (* Free space on the C stack. This is a binary operation that takes the base address and the size. The base address isn't used in this version. *) val (arg1Code, _, _) = codeToICodeRev(arg1, context, false, AnyReg, tailCode) val (arg2Code, aReg2, _) = codeToICodeRev(arg2, context, false, AnyReg, arg1Code) val uReg = newUReg() val code = addSubXSP{ source=uReg, dest=ZeroReg, isAdd=true } :: untagValue{ source=aReg2, dest=uReg, isSigned=false, opSize=polyWordOpSize } :: arg2Code in returnUnit(destination, code, false) end (* Code-generate an address into one or two Pregs. At this point they are in a state where we can code-generate arbitrary code before the address is used *) and addressToPregAddress({base, index, offset}, context, code) = let val (bCode, bReg, _) = codeToICodeRev(base, context, false, AnyReg, code) in case index of NONE => ({base=bReg, index=NONE, offset=offset}, bCode) | SOME index => let val (iCode, iReg, _) = codeToICodeRev(index, context, false, AnyReg, bCode) in ({base=bReg, index=SOME iReg, offset=offset}, iCode) end end (* Store the code address and the closure items into a previously allocated closure on the heap. This is used both in the simple case and also with mutually recursive declarations. *) and storeIntoClosure(lambda as { closure, ...}, absClosureAddr, context, tailCode) = let val closureRef = makeConstantClosure() val () = codeFunctionToArm64(lambda, debugSwitches, closureRef) val codeAddrWords = if is32in64 then 2 else 1 fun storeAValue(f, (n, tlCode)) = let val (code, source, _) = codeToICodeRev(BICExtract f, context, false, AnyReg, tlCode) in (n+1, storeAtWordOffset(source, n, absClosureAddr, polyWordLoadSize, code)) end (* Store the code address in the first 64-bits. *) val storeCodeAddress = if is32in64 then let (* We can't use codeAddressFromClosure on 32-in-64 because it always returns a 64-bit value. Instead we have to get the code address at run-time. *) val clReg = newPReg() and absClReg = newUReg() and absCodeReg = newUReg() in storeAtWordOffset(absCodeReg, 0, absClosureAddr, Load64, loadWithConstantOffset{base=absClReg, dest=absCodeReg, byteOffset=0, loadType=Load64} :: objectIndexAddressToAbsolute{ source=clReg, dest=absClReg } :: loadAddressConstant{source=closureAsAddress closureRef, dest=clReg} :: tailCode) end else let val cReg = newPReg() in storeAtWordOffset(cReg, 0, absClosureAddr, Load64, loadAddressConstant{source=codeAddressFromClosure closureRef, dest=cReg} :: tailCode) end val (_, storeCode) = List.foldl storeAValue (codeAddrWords, storeCodeAddress) closure in storeCode end (* Load operations. *) and codeLoadOperation(kind, address, context, target, tailCode) = let val (regAddr, codeAddr) = addressToPregAddress(address, context, tailCode) val code = case kind of LoadStoreMLWord {isImmutable=false} => let fun loadOp(addrReg, code) = loadAcquire{base=addrReg, dest=target, loadType=polyWordLoadSize} :: code in loadAndStoreWithAbsolute (regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, loadOp, codeAddr) end | LoadStoreMLWord {isImmutable=true} => let fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=target, byteOffset=offset, loadType=polyWordLoadSize} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=target, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: code in loadAndStoreWithAddress (regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreMLByte {isImmutable=false} => let (* Have to load into a ureg and then tag it. *) val destReg = newUReg() fun loadOp(addrReg, code) = loadAcquire{base=addrReg, dest=destReg, loadType=Load8} :: code in tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} :: loadAndStoreWithAbsolute (regAddr, opWordSize Load8, loadShift Load8, loadOp, codeAddr) end | LoadStoreMLByte {isImmutable=true} => let (* Have to load into a ureg and then tag it. *) val destReg = newUReg() fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load8} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load8, signExtendIndex=false} :: code in tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} :: loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, false, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreC8 => let (* Have to load into a ureg and then tag it. *) val destReg = newUReg() fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load8} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load8, signExtendIndex=true} :: code in tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} :: loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, true, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreC16 => let (* Have to load into a ureg and then tag it. *) val destReg = newUReg() fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load16} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load16, signExtendIndex=true} :: code in tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize32} :: loadAndStoreWithAddress(regAddr, opWordSize Load16, loadShift Load16, true, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreC32 => let (* This is tagged in native 64-bits and boxed in 32-in-64. *) val destReg = newUReg() fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load32} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load32, signExtendIndex=true} :: code in (if is32in64 then boxLarge{ source=destReg, dest=target, saveRegs=[] } else tagValue{source=destReg, dest=target, isSigned=false, opSize=OpSize64 (* It becomes 33 bits *)}) :: loadAndStoreWithAddress(regAddr, opWordSize Load32, loadShift Load32, true, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreC64 => let (* This is always boxed. *) val destReg = newUReg() fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=destReg, byteOffset=offset, loadType=Load64} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=destReg, index=index, loadType=Load64, signExtendIndex=true} :: code in boxLarge{ source=destReg, dest=target, saveRegs=[]} :: loadAndStoreWithAddress(regAddr, opWordSize Load64, loadShift Load64, true, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreCFloat => let (* This always returns a double, not a 32-bit float. *) val destReg = newUReg() and convertReg = newUReg() fun loadConstOffset(base, offset, code) = loadFPWithConstantOffset{base=base, dest=destReg, byteOffset=offset, floatSize=Float32} :: code fun loadIndexed(base, index, code) = loadFPWithIndexedOffset{base=base, dest=destReg, index=index, floatSize=Float32, signExtendIndex=true} :: code in boxTagFloat{floatSize=Double64, source=convertReg, dest=target, saveRegs=[]} :: unaryFloatingPt{source=destReg, dest=convertReg, fpOp=ConvFloatToDble} :: loadAndStoreWithAddress(regAddr, 4, 0w2, true, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreCDouble => let (* This is always boxed. *) val destReg = newUReg() fun loadConstOffset(base, offset, code) = loadFPWithConstantOffset{base=base, dest=destReg, byteOffset=offset, floatSize=Double64} :: code fun loadIndexed(base, index, code) = loadFPWithIndexedOffset{base=base, dest=destReg, index=index, floatSize=Double64, signExtendIndex=true} :: code in boxTagFloat{floatSize=Double64, source=destReg, dest=target, saveRegs=[]} :: loadAndStoreWithAddress(regAddr, 8, 0w3, true, loadConstOffset, loadIndexed, codeAddr) end | LoadStoreUntaggedUnsigned => let (* LoadStoreMLWord {isImmutable=true} except it has to be tagged. *) val ureg = newUReg() fun loadConstOffset(base, offset, code) = loadWithConstantOffset{base=base, dest=ureg, byteOffset=offset, loadType=polyWordLoadSize} :: code fun loadIndexed(base, index, code) = loadWithIndexedOffset{base=base, dest=ureg, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: code in tagValue{source=ureg, dest=target, isSigned=false, opSize=polyWordOpSize} :: loadAndStoreWithAddress(regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, codeAddr) end in (code, target, false) end (* Store operations. *) and codeStoreOperation(kind, address, value, context, destination, tailCode1) = let val (regAddr, codeAddr) = addressToPregAddress(address, context, tailCode1) val (sourceCode, sourceReg, _) = codeToICodeRev(value, context, false, AnyReg, codeAddr) val storeCode = case kind of LoadStoreMLWord {isImmutable=false} => let fun storeOp(addrReg, code) = storeRelease{base=addrReg, source=sourceReg, loadType=polyWordLoadSize} :: code in loadAndStoreWithAbsolute(regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, storeOp, sourceCode) end | LoadStoreMLWord {isImmutable=true} => let (* Used when initialising immutables that do not require store-release. *) fun loadConstOffset(base, offset, code) = storeWithConstantOffset{base=base, source=sourceReg, byteOffset=offset, loadType=polyWordLoadSize} :: code fun loadIndexed(base, index, code) = storeWithIndexedOffset{base=base, source=sourceReg, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: code in loadAndStoreWithAddress (regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreMLByte {isImmutable=false} => let fun storeOp(addrReg, code) = let val tReg = newUReg() in storeRelease{base=addrReg, source=tReg, loadType=Load8} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end in loadAndStoreWithAbsolute(regAddr, opWordSize Load8, loadShift Load8, storeOp, sourceCode) end | LoadStoreMLByte {isImmutable=true} => let (* Used when initialising immutables that do not require store-release. *) fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load8} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load8, signExtendIndex=false} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end in loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, false, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreC8 => let fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load8} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load8, signExtendIndex=true} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end in loadAndStoreWithAddress(regAddr, opWordSize Load8, loadShift Load8, true, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreC16 => let fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load16} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load16, signExtendIndex=true} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize32} :: code end in loadAndStoreWithAddress(regAddr, opWordSize Load16, loadShift Load16, true, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreC32 => let fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load32} :: (if is32in64 then unboxLarge{source=sourceReg, dest=tReg} else untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize64}) :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load32, signExtendIndex=true} :: (if is32in64 then unboxLarge{source=sourceReg, dest=tReg} else untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=OpSize64}) :: code end in loadAndStoreWithAddress(regAddr, opWordSize Load32, loadShift Load32, true, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreC64 => let fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=Load64} :: unboxLarge{source=sourceReg, dest=tReg} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=Load64, signExtendIndex=true} :: unboxLarge{source=sourceReg, dest=tReg} :: code end in loadAndStoreWithAddress(regAddr, opWordSize Load64, loadShift Load64, true, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreCFloat => let (* The "real" value is a double, not a 32-bit float *) fun loadConstOffset(base, offset, code) = let val tReg = newUReg() and cReg = newUReg() in storeFPWithConstantOffset{base=base, source=tReg, byteOffset=offset, floatSize=Float32} :: unaryFloatingPt{source=cReg, dest=tReg, fpOp=ConvDbleToFloat} :: unboxTagFloat{floatSize=Double64, source=sourceReg, dest=cReg} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() and cReg = newUReg() in storeFPWithIndexedOffset{base=base, source=tReg, index=index, floatSize=Float32, signExtendIndex=true} :: unaryFloatingPt{source=cReg, dest=tReg, fpOp=ConvDbleToFloat} :: unboxTagFloat{floatSize=Double64, source=sourceReg, dest=cReg} :: code end in loadAndStoreWithAddress(regAddr, 4, 0w2, true, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreCDouble => let fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeFPWithConstantOffset{base=base, source=tReg, byteOffset=offset, floatSize=Double64} :: unboxTagFloat{floatSize=Double64, source=sourceReg, dest=tReg} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeFPWithIndexedOffset{base=base, source=tReg, index=index, floatSize=Double64, signExtendIndex=true} :: unboxTagFloat{floatSize=Double64, source=sourceReg, dest=tReg} :: code end in loadAndStoreWithAddress(regAddr, 8, 0w3, true, loadConstOffset, loadIndexed, sourceCode) end | LoadStoreUntaggedUnsigned => let (* Only used when initialising strings so this does not require store-release. *) fun loadConstOffset(base, offset, code) = let val tReg = newUReg() in storeWithConstantOffset{base=base, source=tReg, byteOffset=offset, loadType=polyWordLoadSize} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=polyWordOpSize} :: code end fun loadIndexed(base, index, code) = let val tReg = newUReg() in storeWithIndexedOffset{base=base, source=tReg, index=index, loadType=polyWordLoadSize, signExtendIndex=false} :: untagValue{source=sourceReg, dest=tReg, isSigned=false, opSize=polyWordOpSize} :: code end in loadAndStoreWithAddress(regAddr, opWordSize polyWordLoadSize, loadShift polyWordLoadSize, false, loadConstOffset, loadIndexed, sourceCode) end in returnUnit(destination, storeCode, false) end (*Turn the codetree structure into icode. *) val bodyContext = {loopArgs=NONE, stackPtr=0, currHandler=NONE, overflowBlock=ref NONE} val (bodyCode, _, bodyExited) = codeToICodeRev(body, bodyContext, true, SpecificPReg resultTarget, beginInstructions) val icode = if bodyExited then bodyCode else returnInstruction(bodyContext, resultTarget, bodyCode) (* Turn the icode list into basic blocks. The input list is in reverse so as part of this we reverse the list. *) local val resArray = Array.array(!labelCounter, BasicBlock{ block=[], flow=ExitCode }) fun createEntry (blockNo, block, flow) = Array.update(resArray, blockNo, BasicBlock{ block=block, flow=flow}) fun splitCode([], _, _) = (* End of code. We should have had a BeginFunction. *) raise InternalError "splitCode - no begin" | splitCode(BlockBegin args :: _, sinceLabel, flow) = (* Final instruction. Create the initial block and exit. *) createEntry(0, BeginFunction args ::sinceLabel, flow) | splitCode(BlockSimple instr :: rest, sinceLabel, flow) = splitCode(rest, instr :: sinceLabel, flow) | splitCode(BlockLabel label :: rest, sinceLabel, flow) = (* Label - finish this block and start another. *) ( createEntry(label, sinceLabel, flow); (* Default to a jump to this label. That is used if we have assumed a drop-through. *) splitCode(rest, [], Unconditional label) ) | splitCode(BlockExit instr :: rest, _, _) = splitCode(rest, [instr], ExitCode) | splitCode(BlockFlow flow :: rest, _, _) = splitCode(rest, [], flow) | splitCode(BlockRaiseAndHandle(instr, handler) :: rest, _, _) = splitCode(rest, [instr], UnconditionalHandle handler) | splitCode(BlockOptionalHandle{call, handler, label} :: rest, sinceLabel, flow) = let (* A function call within a handler. This could go to the handler but if there is no exception will go to the next instruction. Also includes JumpLoop since the stack check could result in an Interrupt exception. *) in createEntry(label, sinceLabel, flow); splitCode(rest, [call], ConditionalHandle{handler=handler, continue=label}) end in val () = splitCode(icode, [], ExitCode) val resultVector = Array.vector resArray end open ICodeTransform val pregProperties = Vector.fromList(List.rev(! pregPropList)) in codeICodeFunctionToArm64{blocks = resultVector, functionName = name, pregProps = pregProperties, ccCount= ! ccRefCounter, debugSwitches = debugSwitches, resultClosure = resultClosure, profileObject = profileObject} end val gencodeLambda = codeFunctionToArm64 structure Foreign = Arm64Foreign structure Sharing = struct type backendIC = backendIC and bicLoadForm = bicLoadForm and argumentType = argumentType and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML index a177429c..d16c624b 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML @@ -1,983 +1,988 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICode( structure Arm64Code: ARM64PREASSEMBLY ): ARM64ICODE = struct open Arm64Code open Address datatype preg = PReg of int (* A pseudo-register - an abstract register. *) (* If the value is zero we can use X0/W0. *) datatype pregOrZero = SomeReg of preg | ZeroReg (* A location on the stack. May be more than word if this is a container or a handler entry. *) datatype stackLocn = StackLoc of {size: int, rno: int } (* This combines pregKind and stackLocn. *) datatype regProperty = RegPropGeneral (* A general register. *) | RegPropUntagged (* An untagged general register. *) | RegPropStack of int (* A stack location or container. *) | RegPropCacheTagged | RegPropCacheUntagged | RegPropMultiple (* The result of a conditional or case. May be defined at multiple points. *) (* The reference to a condition code. *) datatype ccRef = CcRef of int datatype reg = GenReg of xReg | FPReg of vReg datatype callKind = Recursive | ConstantCode of machineWord | FullCall (* Function calls can have an unlimited number of arguments so it isn't always going to be possible to load them into registers. *) datatype 'genReg fnarg = ArgInReg of 'genReg | ArgOnStack of { wordOffset: int, container: stackLocn, field: int } datatype ('genReg, 'optGenReg, 'fpReg) arm64ICode = (* Move the contents of one preg to another. These are always 64-bits. *) MoveRegister of { source: 'genReg, dest: 'genReg } (* Numerical constant. *) | LoadNonAddressConstant of { source: Word64.word, dest: 'genReg } (* Address constant. *) | LoadAddressConstant of { source: machineWord, dest: 'genReg } (* Load a value into a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | LoadWithConstantOffset of { base: 'genReg, dest: 'genReg, byteOffset: int, loadType: loadType } (* Similarly for FP registers. *) | LoadFPWithConstantOffset of { base: 'genReg, dest: 'fpReg, byteOffset: int, floatSize: floatSize } (* Load a value into a register using an index register. *) | LoadWithIndexedOffset of { base: 'genReg, dest: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* Ditto for FP. *) | LoadFPWithIndexedOffset of { base: 'genReg, dest: 'fpReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Returns the current thread ID. Always a 64-bit value.. *) | GetThreadId of { dest: 'genReg } (* Convert a 32-in-64 object index into an absolute address. *) | ObjectIndexAddressToAbsolute of { source: 'genReg, dest: 'genReg } (* Convert an absolute address into an object index. *) | AbsoluteToObjectIndex of { source: 'genReg, dest: 'genReg } (* Allocate a fixed sized piece of memory and puts the absolute address into dest. bytesRequired is the total number of bytes including the length word and any alignment necessary for 32-in-64. saveRegs is the list of registers that need to be saved if we need to do a garbage collection. *) | AllocateMemoryFixed of { bytesRequired: Word64.word, dest: 'genReg, saveRegs: 'genReg list } (* Allocate a piece of memory. The size argument is an untagged value containing the number of words i.e. the same value used for InitialiseMemory and to store in the length word. *) | AllocateMemoryVariable of { size: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Initialise a piece of memory by writing "size" copies of the value in "init". N.B. The size is an untagged value containing the number of words. *) | InitialiseMem of { size: 'genReg, addr: 'genReg, init: 'genReg } (* Mark the beginning of a loop. This is really only to prevent the initialisation code being duplicated in ICodeOptimise. *) | BeginLoop (* Set up the registers for a jump back to the start of a loop. *) | JumpLoop of { regArgs: {src: 'genReg fnarg, dst: 'genReg} list, stackArgs: {src: 'genReg fnarg, wordOffset: int, stackloc: stackLocn} list, checkInterrupt: 'genReg list option } (* Store a register using a constant, signed, byte offset. The offset is in the range of -256 to (+4095*unit size). *) | StoreWithConstantOffset of { source: 'genReg, base: 'genReg, byteOffset: int, loadType: loadType } (* Ditto for FP regs. *) | StoreFPWithConstantOffset of { source: 'fpReg, base: 'genReg, byteOffset: int, floatSize: floatSize } (* Store a register using an index register. *) | StoreWithIndexedOffset of { source: 'genReg, base: 'genReg, index: 'genReg, loadType: loadType, signExtendIndex: bool } (* and for FP regs. *) | StoreFPWithIndexedOffset of { source: 'fpReg, base: 'genReg, index: 'genReg, floatSize: floatSize, signExtendIndex: bool } (* Add/Subtract immediate. The destination is optional in which case XZero is used. ccRef is optional. If it is NONE the version of the instruction that does not generate a condition code is used. immed must be < 0wx1000. *) | AddSubImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: word, isAdd: bool, length: opSize } (* Add/Subtract register. As with AddSubImmediate, both the destination and cc are optional. *) | AddSubRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, isAdd: bool, length: opSize, shift: shiftType } (* Bitwise logical operations. The immediate value must be a valid bit pattern. ccRef can only be SOME if logOp is LogAnd. *) | LogicalImmediate of { source: 'genReg, dest: 'optGenReg, ccRef: ccRef option, immed: Word64.word, logOp: logicalOp, length: opSize } (* Register logical operations. ccRef can only be SOME if logOp is LogAnd.*) | LogicalRegister of { base: 'genReg, shifted: 'genReg, dest: 'optGenReg, ccRef: ccRef option, logOp: logicalOp, length: opSize, shift: shiftType } (* Shift a word by an amount specified in a register. *) | ShiftRegister of { direction: shiftDirection, dest: 'genReg, source: 'genReg, shift: 'genReg, opSize: opSize } (* The various forms of multiply all take three arguments and the general form is dest = M * N +/- A.. *) | Multiplication of { kind: multKind, dest: 'genReg, sourceA: 'optGenReg, sourceM: 'genReg, sourceN: 'genReg } (* Signed or unsigned division. Sets the result to zero if the divisor is zero. *) | Division of { isSigned: bool, dest: 'genReg, dividend: 'genReg, divisor: 'genReg, opSize: opSize } (* Start of function. Set the register arguments. stackArgs is the list of stack arguments. If the function has a real closure regArgs includes the closure register (X8). The register arguments include the return register (X30). *) | BeginFunction of { regArgs: ('genReg * xReg) list, stackArgs: stackLocn list } (* Call a function. If the code address is a constant it is passed here. Otherwise the address is obtained by indirecting through X8 which has been loaded as one of the argument registers. The results are stored in the result registers, usually just X0. The "containers" argument is used to ensure that any container whose address is passed as one of the other arguments continues to be referenced until the function is called since there's a possibility that it isn't actually used after the function. *) | FunctionCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: 'genReg fnarg list, dests: ('genReg * xReg) list, saveRegs: 'genReg list, containers: stackLocn list} (* Jump to a tail-recursive function. This is similar to FunctionCall but complicated for stack arguments because the stack and the return address need to be overwritten. stackAdjust is the number of words to remove (positive) or add (negative) to the stack before the call. currStackSize contains the number of items currently on the stack. *) | TailRecursiveCall of { callKind: callKind, regArgs: ('genReg fnarg * xReg) list, stackArgs: {src: 'genReg fnarg, stack: int} list, stackAdjust: int, currStackSize: int } (* Return from the function. resultRegs are the registers containing the result, returnReg is the preg that contains the return address. *) | ReturnResultFromFunction of { results: ('genReg * xReg) list, returnReg: 'genReg, numStackArgs: int } (* Raise an exception. The packet is always loaded into X0. *) | RaiseExceptionPacket of { packetReg: 'genReg } (* Push a register to the stack. This is used both for a normal push, copies=1, and also to reserve a container. *) | PushToStack of { source: 'genReg, copies: int, container: stackLocn } (* Load a register from the stack. The container is the stack location identifier, the field is an offset in a container. *) | LoadStack of { dest: 'genReg, wordOffset: int, container: stackLocn, field: int } (* Store a value into the stack. *) | StoreToStack of { source: 'genReg, container: stackLocn, field: int, stackOffset: int } (* Set the register to the address of the container i.e. a specific offset on the stack. *) | ContainerAddress of { dest: 'genReg, container: stackLocn, stackOffset: int } (* Remove items from the stack. Used to remove containers or registers pushed to the stack.. *) | ResetStackPtr of { numWords: int } (* Tag a value by shifting and setting the tag bit. *) | TagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Shift a value to remove the tag bit. The cache is used if this is untagging a value that has previously been tagged. *) | UntagValue of { source: 'genReg, dest: 'genReg, isSigned: bool, opSize: opSize } (* Box a largeword value. Stores a value into a byte area. This can be implemented using AllocateMemoryFixed but keeping it separate makes optimisation easier. The result is always an address and needs to be converted to an object index on 32-in-64. *) | BoxLarge of { source: 'genReg, dest: 'genReg, saveRegs: 'genReg list } (* Load a value from a box. This can be implemented using a load but is kept separate to simplify optimisation. The source is always an absolute address. *) | UnboxLarge of { source: 'genReg, dest: 'genReg } (* Convert a floating point value into a value suitable for storing in the heap. This normally involves boxing except that 32-bit floats can be tagged in native 64-bits. *) | BoxTagFloat of { floatSize: floatSize, source: 'fpReg, dest: 'genReg, saveRegs: 'genReg list } (* The reverse of BoxTagFloat. *) | UnboxTagFloat of { floatSize: floatSize, source: 'genReg, dest: 'fpReg } (* Load a value with acquire semantics. This means that any other load in this thread after this sees the value of the shared memory at this point and not earlier. This is used for references and arrays to ensure that if another thread has built a data structure on the heap and then assigns the address to a shared ref this thread will see the updated heap and not any locally cached previous version. *) | LoadAcquire of { base: 'genReg, dest: 'genReg, loadType: loadType } (* Store a value with release semantics. This ensures that any other write completes before this operation and works with LoadAcquire. *) | StoreRelease of { base: 'genReg, source: 'genReg, loadType: loadType } (* This is a generalised constant shift which includes selection of a range of bits. *) | BitFieldShift of { source: 'genReg, dest: 'genReg, isSigned: bool, length: opSize, immr: word, imms: word } (* Copy a range of bits and insert it into another register. This is the only case where a register functions both as a source and a destination. *) | BitFieldInsert of { source: 'genReg, destAsSource: 'genReg, dest: 'genReg, length: opSize, immr: word, imms: word } (* Indexed case. *) | IndexedCaseOperation of { testReg: 'genReg } (* Exception handling. - Set up an exception handler. *) | PushExceptionHandler (* End of a handled section. Restore the previous handler. *) | PopExceptionHandler (* Marks the start of a handler. This sets the stack pointer and restores the old handler. Sets the exception packet register. *) | BeginHandler of { packetReg: 'genReg } (* Compare two vectors of bytes and set the condition code on the result. The registers are modified by the instruction. *) | CompareByteVectors of { vec1Addr: 'genReg, vec2Addr: 'genReg, length: 'genReg, ccRef: ccRef } (* Move a block of bytes (isByteMove true) or words (isByteMove false). The length is the number of items (bytes or words) to move. The registers are modified by the instruction. *) | BlockMove of { srcAddr: 'genReg, destAddr: 'genReg, length: 'genReg, isByteMove: bool } (* Add or subtract to the system stack pointer and optionally return the new value. This is used to allocate and deallocate C space. *) | AddSubXSP of { source: 'genReg, dest: 'optGenReg, isAdd: bool } (* Ensures the value will actually be referenced although it doesn't generate any code. *) | TouchValue of { source: 'genReg } (* Load a value at the address and get exclusive access. Always loads a 64-bit value. *) | LoadAcquireExclusive of { base: 'genReg, dest: 'genReg } (* Store a value into an address releasing the lock. Sets the result to either 0 or 1 if it succeeds or fails. *) | StoreReleaseExclusive of { base: 'genReg, source: 'optGenReg, result: 'genReg } (* Insert a memory barrier. dmb ish. *) | MemoryBarrier (* Convert an integer to a floating point value. *) | ConvertIntToFloat of { source: 'genReg, dest: 'fpReg, srcSize: opSize, destSize: floatSize } (* Convert a floating point value to an integer using the specified rounding mode. We could get an overflow here but fortunately the ARM generates a value that will cause an overflow when we tag it, provided we tag it explicitly. *) | ConvertFloatToInt of { source: 'fpReg, dest: 'genReg, srcSize: floatSize, destSize: opSize, rounding: IEEEReal.rounding_mode } (* Unary floating point. This includes conversions between float and double. *) | UnaryFloatingPt of { source: 'fpReg, dest: 'fpReg, fpOp: fpUnary } (* Binary floating point: addition, subtraction, multiplication and division. *) | BinaryFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, dest: 'fpReg, fpOp: fpBinary, opSize: floatSize } (* Floating point comparison. *) | CompareFloatingPoint of { arg1: 'fpReg, arg2: 'fpReg, ccRef: ccRef, opSize: floatSize } + (* Yield control during a spin-lock. *) + | CPUYield + (* Debugging - fault if values don't match. *) | CacheCheck of { arg1: 'genReg, arg2: 'genReg } (* Destinations at the end of a basic block. *) and controlFlow = (* Unconditional branch to a label - should be a merge point. *) Unconditional of int (* Conditional branch. Jumps to trueJump if the condional is false, falseJump if false. *) | Conditional of { ccRef: ccRef, condition: condition, trueJump: int, falseJump: int } (* Exit - the last instruction of the block is a return, raise or tailcall. *) | ExitCode (* Indexed case - this branches to one of a number of labels *) | IndexedBr of int list (* Set up a handler. This doesn't cause an immediate branch but the state at the start of the handler is the state at this point. *) | SetHandler of { handler: int, continue: int } (* Unconditional branch to a handler. If an exception is raised explicitly within the scope of a handler. *) | UnconditionalHandle of int (* Conditional branch to a handler. Occurs if there is a call to a function within the scope of a handler. It may jump to the handler. *) | ConditionalHandle of { handler: int, continue: int } and ('genReg, 'optGenReg, 'fpReg) basicBlock = BasicBlock of { block: ('genReg, 'optGenReg, 'fpReg) arm64ICode list, flow: controlFlow } type iCodeAbstract = (preg, pregOrZero, preg) arm64ICode and basicBlockAbstract = (preg, pregOrZero, preg) basicBlock and iCodeConcrete = (xReg, xReg, vReg) arm64ICode and basicBlockConcrete = (xReg, xReg, vReg) basicBlock (* Return the list of blocks that are the immediate successor of this. *) fun successorBlocks(Unconditional l) = [l] | successorBlocks(Conditional{trueJump, falseJump, ...}) = [trueJump, falseJump] | successorBlocks ExitCode = [] | successorBlocks(IndexedBr cases) = cases | successorBlocks(SetHandler{handler, continue, ...}) = [handler, continue] (* We only need "handler" in SetHandler because we may have a handler that is never actually jumped to. *) | successorBlocks(UnconditionalHandle handler) = [handler] | successorBlocks(ConditionalHandle{handler, continue, ...}) = [handler, continue] local fun printCC(CcRef ccRef, stream) = stream ("CC" ^ Int.toString ccRef) fun printStackLoc(StackLoc{size, rno}, stream) = (stream "S"; stream(Int.toString rno); stream "("; stream(Int.toString size); stream ")") fun regRepr(XReg w) = "X" ^ Int.toString(Word8.toInt w) | regRepr XZero = "XZ" | regRepr XSP = "SP" fun arithRepr OpSize64 = "64" | arithRepr OpSize32 = "32" fun printLoadType(Load64, stream) = stream "64" | printLoadType(Load32, stream) = stream "32" | printLoadType(Load16, stream) = stream "16" | printLoadType(Load8, stream) = stream "8" fun printSaves([], _, _) = () | printSaves([areg], _, printReg) = printReg areg | printSaves(areg::more, stream, printReg) = (printReg areg; stream ","; printSaves(more, stream, printReg)) fun printArg(ArgInReg reg, _, printReg) = printReg reg | printArg(ArgOnStack{wordOffset, container, field, ...}, stream, _) = ( printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")" ) fun printShift(ShiftLSL w, stream) = stream(" LSL " ^ Word8.toString w) | printShift(ShiftLSR w, stream) = stream(" LSR " ^ Word8.toString w) | printShift(ShiftASR w, stream) = stream(" ASR " ^ Word8.toString w) | printShift(ShiftNone, _) = () fun printFloatSize(Float32, stream) = stream "F" | printFloatSize(Double64, stream) = stream "D" fun printICode (stream, printGenReg:'a -> unit, _: 'b->unit, _: 'c->unit) (MoveRegister{ source, dest }: ('a, 'b, 'c) arm64ICode) = ( stream "\tMove\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (LoadNonAddressConstant{ source, dest }) = ( stream "\tLoadNonAddress\t"; stream(Word64.toString source); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (LoadAddressConstant{ source, dest }) = ( stream "\tLoadAddress\t"; stream(Address.stringOfWord source); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (LoadWithConstantOffset{ base, dest, byteOffset, loadType }) = ( stream "\tLoadConstOffset"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, printFPReg) (LoadFPWithConstantOffset{ base, dest, byteOffset, floatSize }) = ( stream "\tLoadConstOffset"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "]+"; stream(Int.toString byteOffset); stream " => "; printFPReg dest ) | printICode (stream, printGenReg, _, _) (LoadWithIndexedOffset{ base, dest, index, loadType, signExtendIndex }) = ( stream "\tLoadIndexed"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "] => "; printGenReg dest ) | printICode (stream, printGenReg, _, printFPReg) (LoadFPWithIndexedOffset{ base, dest, index, floatSize, signExtendIndex }) = ( stream "\tLoadIndexed"; printFloatSize(floatSize, stream); stream "\t["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "] => "; printFPReg dest ) | printICode (stream, printGenReg, _, _) (GetThreadId { dest}) = ( stream "\tGetThreadId\t"; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (ObjectIndexAddressToAbsolute{ source, dest }) = ( stream "\tObjectAddrToAbs\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (AbsoluteToObjectIndex{ source, dest }) = ( stream "\tAbsToObjectAddr\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (AllocateMemoryFixed{bytesRequired, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream(Word64.fmt StringCvt.DEC bytesRequired); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, _) (AllocateMemoryVariable{size, dest, saveRegs}) = ( stream "\tAllocateMemory\t"; stream "s="; printGenReg(size); stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, _) (InitialiseMem{size, addr, init}) = ( stream "\tInitialiseMem\t"; stream "s="; printGenReg(size); stream ",i="; printGenReg(init); stream ",a="; printGenReg(addr) ) | printICode (stream, _, _, _) BeginLoop = stream "\tBeginLoop" | printICode (stream, printGenReg, _, _) (JumpLoop{regArgs, stackArgs, checkInterrupt, ... }) = ( stream "\tJumpLoop\t"; List.app(fn {src, dst} => (printGenReg(dst); stream "="; printArg(src, stream, printGenReg); stream " ")) regArgs; List.app( fn {src, wordOffset, stackloc} => (printStackLoc(stackloc, stream); stream("(sp" ^ Int.toString wordOffset); stream ")="; printArg(src, stream, printGenReg); stream " ") ) stackArgs; case checkInterrupt of NONE => () | SOME saveRegs => (stream " Check:save="; printSaves(saveRegs, stream, printGenReg)) ) | printICode (stream, printGenReg, _, _) (StoreWithConstantOffset{ base, source, byteOffset, loadType }) = ( stream "\tStoreConstOffset"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode (stream, printGenReg, _, printFPReg) (StoreFPWithConstantOffset{ base, source, byteOffset, floatSize }) = ( stream "\tStoreConstOffset"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; stream(Int.toString byteOffset); stream "]" ) | printICode (stream, printGenReg, _, _) (StoreWithIndexedOffset{ base, source, index, loadType, signExtendIndex }) = ( stream "\tStoreIndexed"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "]" ) | printICode (stream, printGenReg, _, printFPReg) (StoreFPWithIndexedOffset{ base, source, index, floatSize, signExtendIndex }) = ( stream "\tStoreIndexed"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => ["; printGenReg base; stream "+"; printGenReg index; if signExtendIndex then stream " SX" else (); stream "]" ) | printICode (stream, printGenReg, printOptGenReg, _) (AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }) = ( stream (if isAdd then "\tAddImmediate" else "\tSubImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, printOptGenReg, _) (AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift }) = ( stream (if isAdd then "\tAddRegister" else "\tSubRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, printOptGenReg, _) (LogicalImmediate{ source, dest, ccRef, immed, logOp, length }) = ( stream (case logOp of LogAnd => "\tAndImmediate" | LogOr => "\tOrImmediate" | LogXor => "\tXorImmediate"); stream(arithRepr length); stream "\t"; printGenReg source; stream ",0x"; stream(Word64.toString immed); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, printOptGenReg, _) (LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift }) = ( stream (case logOp of LogAnd => "\tAndRegister" | LogOr => "\tOrRegister" | LogXor => "\tXorRegister"); stream(arithRepr length); stream "\t"; printGenReg base; stream ", "; printGenReg(shifted); printShift(shift, stream); stream " => "; printOptGenReg dest; case ccRef of NONE => () | SOME cc => (stream ", "; printCC(cc, stream)) ) | printICode (stream, printGenReg, _, _) (ShiftRegister{ direction, dest, source, shift, opSize }) = ( stream ( case direction of ShiftLeft => "\tShiftLeft" | ShiftRightLogical => "\tShiftRightLog" | ShiftRightArithmetic => "\tShiftRightArith"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " by "; printGenReg(shift); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, printOptGenReg, _) (Multiplication{ kind, dest, sourceA, sourceM, sourceN }) = ( stream ( case kind of MultAdd32 => "\tMultAdd32\t" | MultSub32 => "\tMultSub32\t" | MultAdd64 => "\tMultAdd64\t" | MultSub64 => "\tMultSub64\t" | SignedMultAddLong => "\tSignedMultAddLong\t" | SignedMultHigh => "\tSignedMultHigh\t"); printGenReg(sourceM); stream " * "; printGenReg(sourceN); stream " +/- "; printOptGenReg sourceA; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (Division{ isSigned, dest, dividend, divisor, opSize }) = ( stream (if isSigned then "\tSignedDivide" else "\tUnsignedDivide"); stream(arithRepr opSize); stream "\t"; printGenReg(dividend); stream " by "; printGenReg(divisor); stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (BeginFunction{ regArgs, stackArgs }) = ( stream "\tBeginFunction\t"; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printGenReg(arg); stream " ")) regArgs; List.app(fn s => printStackLoc(s, stream)) stackArgs ) | printICode (stream, printGenReg, _, _) (FunctionCall{callKind, regArgs, stackArgs, dests, saveRegs, containers}) = ( stream "\tFunctionCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); stream "("; List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn arg => (stream "p="; printArg(arg, stream, printGenReg); stream " ")) stackArgs; stream ") "; List.app(fn (pr, r) => (stream(regRepr r); stream "=>"; printGenReg pr; stream " ")) dests; stream " save="; printSaves(saveRegs, stream, printGenReg); if null containers then () else (stream " containers="; List.app (fn c => (printStackLoc(c, stream); stream " ")) containers) ) | printICode (stream, printGenReg, _, _) (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize, ...}) = ( stream "\tTailCall\t"; case callKind of Recursive => stream "recursive " | ConstantCode m => (stream(stringOfWord m); stream " ") | FullCall => (); List.app(fn (arg, r) => (stream(regRepr r); stream "="; printArg(arg, stream, printGenReg); stream " ")) regArgs; List.app(fn {src, stack} => (stream (Int.toString stack); stream "<="; printArg(src, stream, printGenReg); stream " ")) stackArgs; stream "adjust="; stream(Int.toString stackAdjust); stream " stackSize="; stream(Int.toString currStackSize) ) | printICode (stream, printGenReg, _, _) (ReturnResultFromFunction{ results, returnReg, numStackArgs }) = ( stream "\tReturnFromFunction\t"; printGenReg(returnReg); stream "with "; List.app(fn (reg, r) => (stream(regRepr r); stream "=>"; printGenReg reg; stream " ")) results; stream("," ^ Int.toString numStackArgs) ) | printICode (stream, printGenReg, _, _) (RaiseExceptionPacket{ packetReg }) = ( stream "\tRaiseException\t"; printGenReg(packetReg) ) | printICode (stream, printGenReg, _, _) (PushToStack{ source, copies, container }) = ( stream "\tPushToStack\t"; printGenReg source; if copies > 1 then (stream " * "; stream(Int.toString copies)) else (); stream " => "; printStackLoc(container, stream) ) | printICode (stream, printGenReg, _, _) (LoadStack{ dest, wordOffset, container, field }) = ( stream "\tLoadStack\t"; printStackLoc(container, stream); stream " + "; stream(Int.toString field); stream " ("; stream(Int.toString wordOffset); stream ")"; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (StoreToStack{ source, container, field, stackOffset }) = ( stream "\tStoreToStack\t"; printGenReg source; stream " => "; printStackLoc(container, stream); stream "+"; stream (Int.toString field); stream "("; stream(Int.toString stackOffset); stream ")" ) | printICode (stream, printGenReg, _, _) (ContainerAddress{ dest, container, stackOffset }) = ( stream "\tContainerAddress\t"; stream "@"; printStackLoc(container, stream); stream " ("; stream(Int.toString stackOffset); stream ") => "; printGenReg dest ) | printICode (stream, _, _, _) (ResetStackPtr{ numWords }) = ( stream "\tResetStackPtr\t"; stream(Int.toString numWords) ) | printICode (stream, printGenReg, _, _) (TagValue{ source, dest, isSigned, opSize }) = ( stream "\tTag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (UntagValue{ source, dest, isSigned, opSize }) = ( stream "\tUntag"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr opSize); stream "\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (BoxLarge{source, dest, saveRegs}) = ( stream "\tBoxLarge\t"; printGenReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, _) (UnboxLarge{source, dest}) = ( stream "\tUnboxLarge\t"; printGenReg source; stream " => "; printGenReg dest ) | printICode (stream, printGenReg, _, printFPReg) (BoxTagFloat{floatSize, source, dest, saveRegs}) = ( stream "\tBoxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream " save="; printSaves(saveRegs, stream, printGenReg) ) | printICode (stream, printGenReg, _, printFPReg) (UnboxTagFloat{floatSize, source, dest}) = ( stream "\tUnboxTagFloat"; printFloatSize(floatSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode (stream, printGenReg, _, _) (LoadAcquire{ base, dest, loadType }) = ( stream "\tLoadAcquire"; printLoadType(loadType, stream); stream "\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode (stream, printGenReg, _, _) (StoreRelease{ base, source, loadType }) = ( stream "\tStoreRelease"; printLoadType(loadType, stream); stream "\t"; printGenReg source; stream " => ["; printGenReg base; stream "]" ) | printICode (stream, printGenReg, _, _) (BitFieldShift{ source, dest, isSigned, length, immr, imms }) = ( stream "\tBitShift"; stream(if isSigned then "Signed" else "Unsigned"); stream(arithRepr length); stream "\t"; printGenReg source; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode (stream, printGenReg, _, _) (BitFieldInsert{ source, dest, destAsSource, length, immr, imms }) = ( stream "\tBitInsert"; stream(arithRepr length); stream "\t"; printGenReg source; stream " with "; printGenReg destAsSource; stream " => "; printGenReg dest; stream " immr="; stream(Word.fmt StringCvt.DEC immr); stream " imms="; stream(Word.fmt StringCvt.DEC imms) ) | printICode (stream, printGenReg, _, _) (IndexedCaseOperation{testReg}) = ( stream "\tIndexedCase\t"; printGenReg testReg ) | printICode (stream, _, _, _) PushExceptionHandler = stream "\tPushExcHandler" | printICode (stream, _, _, _) PopExceptionHandler = stream "\tPopExcHandler" | printICode (stream, printGenReg, _, _) (BeginHandler{packetReg}) = ( stream "\tBeginHandler\t"; printGenReg packetReg ) | printICode (stream, printGenReg, _, _) (CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = ( stream "\tCompareByteVectors\t"; printGenReg(vec1Addr); stream ","; printGenReg(vec2Addr); stream ","; printGenReg(length); stream " => "; printCC(ccRef, stream) ) | printICode (stream, printGenReg, _, _) (BlockMove{srcAddr, destAddr, length, isByteMove}) = ( stream(if isByteMove then "\tBlockByteMove\t" else "\tBlockWordMove\t"); stream "src="; printGenReg(srcAddr); stream ",dest="; printGenReg(destAddr); stream ",len="; printGenReg(length) ) | printICode (stream, printGenReg, printOptGenReg, _) (AddSubXSP{ source, dest, isAdd }) = ( stream(if isAdd then "\tAdd\t" else "\tSubtract\t"); printGenReg source; stream " XSP => "; printOptGenReg dest ) | printICode (stream, printGenReg, _, _) (TouchValue{ source }) = ( stream "\tTouchValue\t"; printGenReg source ) | printICode (stream, printGenReg, _, _) (LoadAcquireExclusive{ base, dest }) = ( stream "\tLoadExclusive\t["; printGenReg base; stream "] => "; printGenReg dest ) | printICode (stream, printGenReg, printOptGenReg, _) (StoreReleaseExclusive{ base, source, result }) = ( stream "\tStoreExclusive\t"; printOptGenReg source; stream " => ["; printGenReg base; stream "] result => "; printGenReg result ) | printICode (stream, _, _, _) MemoryBarrier = stream "\tMemoryBarrier" | printICode (stream, printGenReg, _, printFPReg) (ConvertIntToFloat{ source, dest, srcSize, destSize}) = ( stream "\tConvert"; stream(arithRepr srcSize); stream "To"; printFloatSize(destSize, stream); stream "\t"; printGenReg source; stream " => "; printFPReg dest ) | printICode (stream, printGenReg, _, printFPReg) (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}) = let open IEEEReal in stream "\tConvert"; printFloatSize(srcSize, stream); stream "To"; stream(arithRepr destSize); stream "\t"; printFPReg source; stream " => "; printGenReg dest; stream( case rounding of TO_NEAREST => " rounding" | TO_NEGINF => " rounding down" | TO_POSINF => " rounding up" | TO_ZERO => " truncating" ) end | printICode (stream, _, _, printFPReg) (UnaryFloatingPt{ source, dest, fpOp}) = ( stream( case fpOp of NegFloat => "\tNegFloat\t" | NegDouble => "\tNegDouble\t" | AbsFloat => "\tAbsFloat\t" | AbsDouble => "\tAbsDouble\t" | ConvFloatToDble => "\tFloatToDble\t" | ConvDbleToFloat => "\t\t" ); printFPReg source; stream " => "; printFPReg dest ) | printICode (stream, _, _, printFPReg) (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}) = ( stream( case fpOp of MultiplyFP => "\tMultiply" | DivideFP => "\tDivide" | AddFP => "\tAdd" | SubtractFP => "\tSubtract" ); printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream " => "; printFPReg dest ) | printICode (stream, _, _, printFPReg) (CompareFloatingPoint{ arg1, arg2, opSize, ccRef}) = ( stream "\tCompare"; printFloatSize(opSize, stream); stream "\t"; printFPReg arg1; stream ", "; printFPReg arg2; stream ", "; printCC(ccRef, stream) ) + | printICode (stream, _, _, _) CPUYield = stream "\tCpuYield" + | printICode (stream, printGenReg, _, _) (CacheCheck{ arg1, arg2}) = ( stream "\tCacheCheck\t"; printGenReg arg1; stream ", "; printGenReg arg2 ) and printCondition(cond, stream) = stream(condToString cond) (* Print a basic block. *) fun printBlock (stream, printGenReg, printOptGenReg, printFPReg) (blockNo, BasicBlock{block, flow, ...}) = ( (* Put a label on all but the first. *) if blockNo <> 0 then stream("L" ^ Int.toString blockNo ^ ":") else (); List.app (fn icode => (printICode (stream, printGenReg, printOptGenReg, printFPReg) (icode); stream "\n")) block; case flow of Unconditional l => stream("\tJump\tL" ^ Int.toString l ^ "\n") | Conditional {condition, trueJump, falseJump, ccRef, ...} => ( stream "\tJump"; printCondition(condition, stream); stream "\t"; printCC(ccRef, stream); stream " L"; stream (Int.toString trueJump); stream " else L"; stream (Int.toString falseJump); stream "\n" ) | ExitCode => () | IndexedBr _ => () | SetHandler{handler, continue} => stream(concat["\tSetHandler\tH", Int.toString handler, "\n", "\tJump\tL", Int.toString continue, "\n"]) | UnconditionalHandle handler => stream("\tJump\tH" ^ Int.toString handler ^ "\n") | ConditionalHandle{handler, continue} => stream(concat["\tJump\tL", Int.toString continue, " or H", Int.toString handler, "\n"]) ) in fun printPReg stream (PReg i) = stream("R" ^ Int.toString i) fun printOptPReg stream ZeroReg = stream "Zero" | printOptPReg stream (SomeReg reg) = printPReg stream reg fun printXReg stream (XReg w) = stream("X" ^ Int.toString(Word8.toInt w)) | printXReg stream XZero = stream "XZ" | printXReg stream XSP = stream "XSP" fun printVReg stream (VReg w) = stream("V" ^ Int.toString(Word8.toInt w)) fun printICodeAbstract(blockVec, stream) = Vector.appi(printBlock(stream, printPReg stream, printOptPReg stream, printPReg stream)) blockVec and printICodeConcrete(blockVec, stream) = Vector.appi(printBlock(stream, printXReg stream, printXReg stream, printVReg stream)) blockVec end (* Only certain bit patterns are allowed in a logical immediate instruction but the encoding is complex so it's easiest to inherit the test from the assembler layer. *) local fun optow OpSize32 = WordSize32 | optow OpSize64 = WordSize64 in fun isEncodableBitPattern(v, w) = Arm64Code.isEncodableBitPattern(v, optow w) end (* This generates a BitField instruction with the appropriate values for immr and imms. *) fun shiftConstant{ direction, dest, source, shift, opSize } = let val (isSigned, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (false, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (false, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (false, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (false, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (true, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (true, shift, 0wx1f) in BitFieldShift{ source=source, dest=dest, isSigned=isSigned, length=opSize, immr=immr, imms=imms } end structure Sharing = struct type xReg = xReg and vReg = vReg and reg = reg and condition = condition and shiftType = shiftType and ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and controlFlow = controlFlow and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and stackLocn = stackLocn and regProperty = regProperty and ccRef = ccRef and 'genReg fnarg = 'genReg fnarg and closureRef = closureRef and loadType = loadType and opSize = opSize and logicalOp = logicalOp and callKind = callKind and floatSize = floatSize and shiftDirection = shiftDirection and multKind = multKind and fpUnary = fpUnary and fpBinary = fpBinary end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML index 99da294c..bb86fe02 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML @@ -1,1219 +1,1221 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ICodeToArm64Code( structure Arm64PreAssembly: ARM64PREASSEMBLY structure Debug: DEBUG structure Arm64ICode: ARM64ICODE structure Identify: ARM64IDENTIFYREFERENCES structure IntSet: INTSET structure Pretty: PRETTY structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end sharing Arm64PreAssembly.Sharing = Arm64ICode.Sharing = Identify.Sharing = IntSet ): ARM64ICODEGENERATE = struct open Identify open Arm64ICode open Arm64PreAssembly open Address exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl (*and snd <@> fst = fst @ snd*) (* These aren't currently used for anything. *) val workReg1 = X16 and workReg2 = X17 fun icodeToArm64Code {blocks: basicBlockConcrete vector, functionName, stackRequired, debugSwitches, resultClosure, profileObject, ...} = let val numBlocks = Vector.length blocks (* Load from and store to stack. *) fun loadFromStack(destReg, wordOffset, code) = if wordOffset >= 4096 then (LoadRegIndexed{regT=destReg, regN=X_MLStackPtr, regM=destReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(destReg, Word64.fromInt wordOffset)] @ code else (LoadRegScaled{regT=destReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code and storeToStack(sourceReg, wordOffset, workReg, code) = if wordOffset >= 4096 then (StoreRegIndexed{regT=sourceReg, regN=X_MLStackPtr, regM=workReg, option=ExtUXTX ScaleOrShift, loadType=Load64}) :: [LoadNonAddr(workReg, Word64.fromInt wordOffset)] @ code else (StoreRegScaled{regT=sourceReg, regN=X_MLStackPtr, unitOffset=wordOffset, loadType=Load64}) :: code datatype srcAndDest = IsInReg of xReg | IsOnStack of int local (* The registers are numbered from 0. Choose values that don't conflict with the stack addresses. *) fun regNo(XReg r) = ~1 - Word8.toInt r | regNo _ = ~1 - 31 type node = {src: srcAndDest, dst: srcAndDest } fun nodeAddress({dst=IsInReg r, ...}: node) = regNo r | nodeAddress({dst=IsOnStack a, ...}) = a fun arcs({src=IsOnStack wordOffset, ...}: node) = [wordOffset] | arcs{src=IsInReg r, ...} = [regNo r] in val stronglyConnected = Strongly.stronglyConnectedComponents { nodeAddress=nodeAddress, arcs=arcs } end (* This is a general function for moving values into registers or to the stack where it is possible that the source values might also be in use as destinations. The stack is used for destinations only for tail recursive calls. *) fun moveMultipleValues(moves, code) = let fun moveValues ([], code) = code (* We're done. *) | moveValues (arguments, code) = let (* stronglyConnectedComponents does two things. It detects loops where it's not possible to move items without breaking the loop but more importantly it orders the dependencies so that if there are no loops we can load the source and store it in the destination knowing that we won't overwrite anything we might later need. *) val ordered = stronglyConnected arguments fun loadIntoReg(IsInReg sReg, dReg, code) = if sReg = dReg then code else (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | loadIntoReg(IsOnStack wordOffset, dReg, code) = loadFromStack(dReg, wordOffset, code) fun moveEachValue ([], code) = code | moveEachValue ([{dst=IsInReg dReg, src}] :: rest, code) = moveEachValue(rest, loadIntoReg(src, dReg, code)) | moveEachValue ([{dst=IsOnStack wordOffset, src=IsInReg sReg}] :: rest, code) = (* Storing into the stack. *) moveEachValue(rest, storeToStack(sReg, wordOffset, workReg1, code)) | moveEachValue ([{dst=IsOnStack dstOffset, src=IsOnStack srcOffset}] :: rest, code) = (* Copy a stack location - needs a load and store unless the address is the same. *) if dstOffset = srcOffset then moveEachValue(rest, code) else moveEachValue(rest, storeToStack(workReg2, dstOffset, workReg1, loadFromStack(workReg2, srcOffset, code))) | moveEachValue((cycle as first :: _ :: _) :: rest, code) = (* We have a cycle. *) let (* We need to exchange some of the arguments. Doing an exchange here will set the destination with the correct source. However we have to process every subsequent entry with the swapped registers. That may well mean that one of those entries becomes trivial. We also need to rerun stronglyConnectedComponents on at least the rest of this cycle. It's easiest to flatten the rest and do everything. *) (* Exchange the source and destination. We don't have an exchange instruction and there's a further complication. We could be copying between stack locations and their offsets could be > 4096. Since we've only got two work registers we need to use the hardware stack as an extra location. Stack-stack exchange is very rare so the extra overhead to handle the general case is worth it. *) local fun storeToDest(sReg, IsInReg dReg, _, code) = (MoveXRegToXReg{sReg=sReg, dReg=dReg}) :: code | storeToDest(sReg, IsOnStack wordOffset, work, code) = storeToStack(sReg, wordOffset, work, code) in fun exchange(IsInReg arg1Reg, arg2, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg1Reg}) :: storeToDest(arg1Reg, arg2, workReg1, loadIntoReg(arg2, workReg2, code)) | exchange(arg1, IsInReg arg2Reg, code) = (MoveXRegToXReg{sReg=workReg2, dReg=arg2Reg}) :: storeToDest(arg2Reg, arg1, workReg1, loadIntoReg(arg1, workReg2, code)) | exchange(arg1, arg2, code) = (* The hardware stack must be 16-byte aligned. *) storeToDest(workReg2, arg2, workReg1, (LoadRegUnscaled{regT=workReg2, regN=XSP, byteOffset=16, loadType=Load64, unscaledType=PostIndex}) :: storeToDest(workReg2, arg1, workReg1, loadIntoReg(arg2, workReg2, (StoreRegUnscaled{regT=workReg2, regN=XSP, byteOffset= ~16, loadType=Load64, unscaledType=PreIndex}) :: loadIntoReg(arg1, workReg2, code)))) end (* Try to find either a register-register move or a register-stack move. If not use the first. If there's a stack-register move there will also be a register-stack so we don't need to look for both. *) val {dst=selectDst, src=selectSrc} = first (* This includes this entry but after the swap we'll eliminate it. *) val flattened = List.foldl(fn (a, b) => a @ b) [] (cycle :: rest) val destAsSource = selectDst fun match(s1: srcAndDest, s2) = s1 = s2 fun swapSources{src, dst} = if match(src, selectSrc) then {src=destAsSource, dst=dst} else if match(src, destAsSource) then {src=selectSrc, dst=dst} else {src=src, dst=dst} val exchangeCode = exchange(selectDst, selectSrc, code) in moveValues(List.map swapSources flattened, exchangeCode) end | moveEachValue(([]) :: _, _) = (* This should not happen - avoid warning. *) raise InternalError "moveEachValue - empty set" in moveEachValue(ordered, code) end in moveValues(moves, code) end (* Where we have multiple specific registers as either source or destination there is the potential that a destination register if currently in use as a source. *) fun moveMultipleRegisters(regPairList, code) = let val regPairsAsDests = List.map(fn {src, dst} => {src=IsInReg src, dst=IsInReg dst}) regPairList in moveMultipleValues(regPairsAsDests, code) end fun moveIfNecessary({src, dst}, code) = if src = dst then code else MoveXRegToXReg{sReg=src, dReg=dst} :: code (* Add a constant word to the source register and put the result in the destination. regW is used as a work register if necessary. This is used both for addition and subtraction. *) fun addConstantWord({regS, regD, value=0w0, ...}, code) = if regS = regD then code else MoveXRegToXReg{sReg=regS, dReg=regD} :: code | addConstantWord({regS, regD, regW, value}, code) = let (* If we have to load the constant it's better if the top 32-bits are zero if possible. *) val (isSub, unsigned) = if value > Word64.<<(0w1, 0w63) then (true, ~ value) else (false, value) in if unsigned < Word64.<<(0w1, 0w24) then (* We can put up to 24 in a shifted and an unshifted constant. *) let val w = Word.fromLarge(Word64.toLarge unsigned) val high = Word.andb(Word.>>(w, 0w12), 0wxfff) val low = Word.andb(w, 0wxfff) val addSub = if isSub then SubImmediate else AddImmediate in if high <> 0w0 then ( (if low <> 0w0 then [addSub{regN=regD, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64}] else []) @ addSub{regN=regS, regD=regD, immed=high, shifted=true, setFlags=false, opSize=OpSize64} :: code ) else addSub{regN=regS, regD=regD, immed=low, shifted=false, setFlags=false, opSize=OpSize64} :: code end else let (* To minimise the constant and increase the chances that it will fit in a single word look to see if we can shift it. *) fun getShift(value, shift) = if Word64.andb(value, 0w1) = 0w0 then getShift(Word64.>>(value, 0w1), shift+0w1) else (value, shift) val (shifted, shift) = getShift(unsigned, 0w0) in code <::> LoadNonAddr(regW, shifted) <::> (if isSub then SubShiftedReg else AddShiftedReg) {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift, setFlags=false, opSize=OpSize64} end end val labelMaker = createLabelMaker() val startOfFunctionLabel = createLabel labelMaker (* Used for recursive calls/jumps *) val blockToLabelMap = Vector.tabulate(numBlocks, fn _ => createLabel labelMaker) fun getBlockLabel blockNo = Vector.sub(blockToLabelMap, blockNo) fun codeExtended _ (MoveRegister{source, dest, ...}, code) = moveIfNecessary({src=source, dst=dest}, code) | codeExtended _ (LoadNonAddressConstant{source, dest, ...}, code) = code <::> LoadNonAddr(dest, source) | codeExtended _ (LoadAddressConstant{source, dest, ...}, code) = code <::> LoadAddr(dest, source) | codeExtended _ (LoadWithConstantOffset{dest, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then LoadRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate} :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in LoadRegScaled{regT=dest, regN=base, unitOffset=unitOffset, loadType=loadType} :: code end | codeExtended _ (LoadFPWithConstantOffset{dest, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (LoadFPRegUnscaled{regT=dest, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (LoadFPRegScaled{regT=dest, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (LoadWithIndexedOffset{dest, base, index, loadType, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (LoadRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (LoadFPWithIndexedOffset{dest, base, index, floatSize, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX in (LoadFPRegIndexed{regT=dest, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (GetThreadId { dest}, code) = (* Load the thread id. This is always a 64-bit value. *) (LoadRegScaled{regT=dest, regN=X_MLAssemblyInt, unitOffset=threadIdOffset, loadType=Load64}) :: code | codeExtended _ (ObjectIndexAddressToAbsolute{source, dest, ...}, code) = (AddShiftedReg{regM=source, regN=X_Base32in64, regD=dest, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}) :: code | codeExtended _ (AbsoluteToObjectIndex{source, dest, ...}, code) = let val destReg = dest in code <::> (SubShiftedReg{regM=X_Base32in64, regN=source, regD=destReg, shift=ShiftNone, setFlags=false, opSize=OpSize64}) <::> (shiftConstant{shift=0w2, regN=destReg, regD=destReg, direction=ShiftRightLogical, opSize=OpSize64}) end | codeExtended _ (AllocateMemoryFixed{ bytesRequired, dest, saveRegs }, code) = code <::> AllocateMemoryFixedSize{ bytes=Word.fromLarge bytesRequired, dest=dest, save=saveRegs, work=workReg1 } | codeExtended _ (AllocateMemoryVariable{ size, dest, saveRegs }, code) = code <::> AllocateMemoryVariableSize{ sizeReg=size, dest=dest, save=saveRegs, work=workReg1 } | codeExtended _ (InitialiseMem{ size, addr, init}, code) = let val sizeReg = size and addrReg = addr and initReg = init val exitLabel = createLabel labelMaker and loopLabel = createLabel labelMaker (* This uses a loop to initialise. It's possible the size is zero so we have to check at the top of the loop. *) val (bShift, offset, loadType) = if is32in64 then (0w2, ~4, Load32) else (0w3, ~8, Load64) in code <::> (* Add the length in bytes so we point at the end. *) AddShiftedReg{regM=sizeReg, regN=addrReg, regD=workReg1, shift=ShiftLSL bShift, setFlags=false, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Are we at the start? *) SubShiftedReg{regM=workReg1, regN=addrReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, exitLabel) <::> StoreRegUnscaled{regT=initReg, regN=workReg1, byteOffset=offset, loadType=loadType, unscaledType=PreIndex } <::> UnconditionalBranch loopLabel <::> SetLabel exitLabel end | codeExtended _ (BeginLoop, code) = code | codeExtended _ (JumpLoop{regArgs, stackArgs, checkInterrupt}, code) = let (* TODO: We could have a single list and use ArgOnStack and ArgInReg to distinguish. *) fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {wordOffset, src, ...} => {src=convertArg src, dst=IsOnStack wordOffset}) stackArgs val extRegArgs = map (fn {dst, src} => {src=convertArg src, dst=convertArg(ArgInReg dst)}) regArgs val code2 = moveMultipleValues(extStackArgs @ extRegArgs, code) in case checkInterrupt of NONE => code2 | SOME saveRegs => let val skipCheck = createLabel labelMaker in code2 <::> (* Put in stack-check code to allow this to be interrupted. *) LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} <::> SubShiftedReg{regM=workReg1, regN=X_MLStackPtr, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondCarrySet, skipCheck) <::> RTSTrap{rtsEntry=stackOverflowCallOffset, work=workReg1, save=saveRegs} <::> SetLabel skipCheck end end | codeExtended _ (StoreWithConstantOffset{source, base, byteOffset, loadType, ...}, code) = if byteOffset < 0 then (StoreRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, loadType=loadType, unscaledType=NoUpdate}) :: code else let val unitOffset = case loadType of Load64 => Int.quot(byteOffset, 8) | Load32 => Int.quot(byteOffset, 4) | Load16 => Int.quot(byteOffset, 2) | Load8 => byteOffset in (StoreRegScaled{regT=source, regN=base, unitOffset=unitOffset, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithConstantOffset{source, base, byteOffset, floatSize, ...}, code) = if byteOffset < 0 then (StoreFPRegUnscaled{regT=source, regN=base, byteOffset=byteOffset, floatSize=floatSize, unscaledType=NoUpdate}) :: code else let val unitOffset = Int.quot(byteOffset, case floatSize of Float32 => 4 | Double64 => 8) in (StoreFPRegScaled{regT=source, regN=base, unitOffset=unitOffset, floatSize=floatSize}) :: code end | codeExtended _ (StoreWithIndexedOffset{source, base, index, loadType, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX val scale = case loadType of Load8 => NoScale | _ => ScaleOrShift in (StoreRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType scale, loadType=loadType}) :: code end | codeExtended _ (StoreFPWithIndexedOffset{source, base, index, floatSize, signExtendIndex, ...}, code) = let val baseReg = base and indexReg = index (* In 32-in-64 the index is a 32-bit value in the low order 32-bits. It may be signed if it is a C address. *) val scaleType = if is32in64 andalso signExtendIndex then ExtSXTW else ExtUXTX in (StoreFPRegIndexed{regT=source, regN=baseReg, regM=indexReg, option=scaleType ScaleOrShift, floatSize=floatSize}) :: code end | codeExtended _ (AddSubImmediate{ source, dest, immed, isAdd, length, ccRef}, code) = let val destReg = dest in ((if isAdd then AddImmediate else SubImmediate) {regN=source, regD=destReg, immed=immed, shifted=false, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (AddSubRegister{ base, shifted, dest, isAdd, length, ccRef, shift}, code) = let val destReg = dest in ( (if isAdd then AddShiftedReg else SubShiftedReg) {regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalImmediate{ source, dest, immed, logOp, length, ccRef}, code) = let val destReg = dest in (BitwiseLogical{regN=source, regD=destReg, bits=immed, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (LogicalRegister{ base, shifted, dest, logOp, length, ccRef, shift}, code) = let (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) val destReg = dest in (LogicalShiftedReg{regN=base, regM=shifted, regD=destReg, shift=shift, opSize=length, logOp=logOp, setFlags=isSome ccRef}) :: code end | codeExtended _ (ShiftRegister{ direction, dest, source, shift, opSize }, code) = (ShiftRegisterVariable{regN=source, regM=shift, regD=dest, shiftDirection=direction, opSize=opSize}) :: code | codeExtended _ (Multiplication{ kind, dest, sourceA, sourceM, sourceN }, code) = let val destReg = dest and srcAReg = sourceA and srcNReg = sourceN and srcMReg = sourceM in (MultiplyAndAddSub{regM=srcMReg, regN=srcNReg, regA=srcAReg, regD=destReg, multKind=kind}) :: code end | codeExtended _ (Division{ isSigned, dest, dividend, divisor, opSize }, code) = (DivideRegs{regN=dividend, regM=divisor, regD=dest, isSigned=isSigned, opSize=opSize}) :: code | codeExtended _ (BeginFunction{regArgs, ...}, code) = let val skipCheck = createLabel labelMaker val defaultWords = 10 (* This is wired into the RTS. *) val workRegister = workReg1 val debugTrapAlways = false (* Can be set to true for debugging *) (* Test with either the stack-pointer or a high-water value. The RTS assumes that X9 has been used as the high-water if it is called through stackOverflowXCallOffset rather than stackOverflowCallOffset *) val (testReg, entryPt, code1) = if stackRequired <= defaultWords then (X_MLStackPtr, stackOverflowCallOffset, code) else (X9, stackOverflowXCallOffset, addConstantWord({regS=X_MLStackPtr, regD=X9, regW=workRegister, value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt stackRequired}, code)) (* Skip the RTS call if there is enough stack. N.B. The RTS can modify the end-of-stack value to force a trap here even if there is really enough stack. *) val code2 = (if debugTrapAlways then [] else [ConditionalBranch(CondCarrySet, skipCheck), SubShiftedReg{regM=workRegister, regN=testReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64}]) @ (* Load the end-of-stack value. *) LoadRegScaled{regT=workRegister, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset, loadType=Load64} :: code1 val code3 = code2 <::> RTSTrap{rtsEntry=entryPt, work=workReg1, save=List.map #2 regArgs} <::> SetLabel skipCheck val usedRegs = regArgs fun mkPair(pr, rr) = {src=rr,dst=pr} val regPairs = List.map mkPair usedRegs in moveMultipleRegisters(regPairs, code3) end | codeExtended _ (TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, currStackSize}, code) = let fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack wordOffset | convertArg(ArgInReg reg) = IsInReg(reg) val extStackArgs = map (fn {stack, src} => {dst=IsOnStack(stack+currStackSize), src=convertArg src}) stackArgs val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs (* Tail recursive calls are complicated because we generally have to overwrite the existing stack. That means storing the arguments in the right order to avoid overwriting a value that we are using for a different argument. *) fun codeTailCall(arguments, stackAdjust, code) = if stackAdjust < 0 then let (* If the function we're calling takes more arguments on the stack than the current function we will have to extend the stack. Do that by pushing the argument whose offset is at -1. Then adjust all the offsets and repeat. *) val {src=argM1, ...} = valOf(List.find(fn {dst=IsOnStack ~1, ...} => true | _ => false) arguments) fun renumberArgs [] = [] | renumberArgs ({dst=IsOnStack ~1, ...} :: args) = renumberArgs args (* Remove the one we've done. *) | renumberArgs ({dst, src} :: args) = let val newDest = case dst of IsOnStack d => IsOnStack(d+1) | regDest => regDest val newSrc = case src of IsOnStack wordOffset => IsOnStack(wordOffset+1) | other => other in {dst=newDest, src=newSrc} :: renumberArgs args end val pushCode = case argM1 of IsOnStack wordOffset => (StoreRegUnscaled{regT=workReg2, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: loadFromStack(workReg2, wordOffset, code) | IsInReg reg => (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) :: code in codeTailCall(renumberArgs arguments, stackAdjust+1, pushCode) end else let val loadArgs = moveMultipleValues(arguments, code) in if stackAdjust = 0 then loadArgs else addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt stackAdjust * Word.toLarge nativeWordSize}, loadArgs) end val setArgumentsCode = codeTailCall(extStackArgs @ extRegArgs, stackAdjust+currStackSize, code) val jumpToFunctionCode = case callKind of Recursive => [(UnconditionalBranch startOfFunctionLabel)] | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else [(BranchReg{regD=workReg1, brRegType=BRRBranch}), (LoadAddr(workReg1, m))] | FullCall => if is32in64 then [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64}, AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64}] else [BranchReg{regD=workReg1, brRegType=BRRBranch}, LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64}] in jumpToFunctionCode @ setArgumentsCode end | codeExtended _ (FunctionCall{callKind, regArgs=regArgs, stackArgs=stackArgs, dests, saveRegs, ...}, code) = let local fun pushStackArgs ([], _, code) = code | pushStackArgs (ArgOnStack {wordOffset, ...} ::args, argNum, code) = let (* Have to adjust the offsets of stack arguments. *) val adjustedOffset = wordOffset+argNum in pushStackArgs(args, argNum+1, loadFromStack(workReg1, adjustedOffset, code) <::> StoreRegUnscaled{regT=workReg1, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64}) end | pushStackArgs (ArgInReg reg ::args, argNum, code) = pushStackArgs(args, argNum+1, code <::> (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, unscaledType=PreIndex, loadType=Load64})) val pushedArgs = pushStackArgs(stackArgs, 0, code (* Initial code *)) (* We have to adjust any stack offset to account for the arguments we've pushed. *) val numStackArgs = List.length stackArgs fun convertArg(ArgOnStack{wordOffset, ...}) = IsOnStack(wordOffset+numStackArgs) | convertArg(ArgInReg reg) = IsInReg(reg) in val extRegArgs = map (fn (a, r) => {src=convertArg a, dst=IsInReg r}) regArgs val loadArgs = moveMultipleValues(extRegArgs, pushedArgs) end (* Push the registers before the call and pop them afterwards. *) fun makeSavesAndCall([], code) = ( case callKind of Recursive => code <::> (BranchAndLink startOfFunctionLabel) | ConstantCode m => if is32in64 then raise InternalError "ConstantCode" (* Not currently *) else code <::> (LoadAddr(workReg1, m)) <::> (BranchReg{regD=workReg1, brRegType=BRRAndLink}) | FullCall => if is32in64 then code <::> AddShiftedReg{regM=X8, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadRegScaled{regT=workReg1, regN=workReg1, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} else code <::> LoadRegScaled{regT=workReg1, regN=X8, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRAndLink} ) | makeSavesAndCall(reg::regs, code) = let val areg = reg in makeSavesAndCall(regs, code <::> StoreRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) <::> LoadRegUnscaled{regT=areg, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex} end (* Results. These go from the specific result register into the allocated register. *) val resultPairs = List.map(fn (pr, rr) => {src=rr,dst=pr}) dests in moveMultipleRegisters(resultPairs, makeSavesAndCall(saveRegs, loadArgs)) end | codeExtended _ (ReturnResultFromFunction { results, returnReg, numStackArgs }, code) = let fun resetStack(0, code) = code | resetStack(nItems, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) (* Return results. This goes from the allocated register into the specific register rr. *) val resultPairs = List.map(fn (pr, rr) => {src=pr,dst=rr}) results in BranchReg{regD=returnReg, brRegType=BRRReturn} :: resetStack(numStackArgs, moveMultipleRegisters(resultPairs, code)) end | codeExtended _ (RaiseExceptionPacket{ packetReg }, code) = (* We need a work register here. It can be any register other than X0 since we don't preserve registers across calls. *) (* Copy the handler "register" into the stack pointer. Then jump to the address in the first word. The second word is the next handler. This is set up in the handler. We have a lot more raises than handlers since most raises are exceptional conditions such as overflow so it makes sense to minimise the code in each raise. *) moveIfNecessary({src=packetReg, dst=X0}, code) <::> LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegScaled{regT=workReg1, regN=X_MLStackPtr, unitOffset=0, loadType=Load64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch } | codeExtended _ (PushToStack{ source, copies, ... }, code) = let val reg = source val _ = copies > 0 orelse raise InternalError "PushToStack: copies<1" fun pushn(0, c) = c | pushn(n, c) = pushn(n-1, (StoreRegUnscaled{regT=reg, regN=X_MLStackPtr, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: c) in pushn(copies, code) end | codeExtended _ (LoadStack{ dest, wordOffset, ... }, code) = loadFromStack(dest, wordOffset, code) | codeExtended _ (StoreToStack{ source, stackOffset, ... }, code) = (* Store into the stack to set a field of a container. Always 64-bits. *) storeToStack(source, stackOffset, workReg1, code) | codeExtended _ (ContainerAddress{ dest, stackOffset, ... }, code) = (* Set the register to an offset in the stack. *) let val _ = stackOffset >= 0 orelse raise InternalError "codeGenICode: ContainerAddress - negative offset" val byteOffset = stackOffset * Word.toInt nativeWordSize in if byteOffset >= 4096 then code <::> LoadNonAddr(dest, Word64.fromInt byteOffset) <::> AddShiftedReg{regN=X_MLStackPtr, regM=dest, regD=dest, shift=ShiftNone, setFlags=false, opSize=OpSize64} else code <::> AddImmediate{regN=X_MLStackPtr, regD=dest, immed=Word.fromInt byteOffset, shifted=false, setFlags=false, opSize=OpSize64} end | codeExtended _ (ResetStackPtr{ numWords, ... }, code) = addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=workReg1, value=Word64.fromInt numWords * Word.toLarge nativeWordSize}, code) | codeExtended _ (TagValue{ source, dest, isSigned=_, opSize }, code) = (* Shift left by one bit and add one. *) code <::> shiftConstant{ direction=ShiftLeft, regD=dest, regN=source, shift=0w1, opSize=opSize } <::> BitwiseLogical{ bits=0w1, regN=dest, regD=dest, opSize=opSize, setFlags=false, logOp=LogOr} | codeExtended _ (UntagValue{ source, dest, isSigned, opSize }, code) = code <::> shiftConstant{ direction=if isSigned then ShiftRightArithmetic else ShiftRightLogical, regD=dest, regN=source, shift=0w1, opSize=opSize } | codeExtended _ (BoxLarge{ source, dest, saveRegs }, code) = boxSysWord({source=source, destination=dest, workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (UnboxLarge{ source, dest }, code) = let (* Unbox a large word. The argument is a poly word. *) val destReg = dest and srcReg = source in if is32in64 then LoadRegScaled{regT=destReg, regN=destReg, unitOffset=0, loadType=Load64} :: AddShiftedReg{regM=srcReg, regN=X_Base32in64, regD=destReg, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} :: code else LoadRegScaled{regT=destReg, regN=srcReg, unitOffset=0, loadType=Load64} :: code end | codeExtended _ (BoxTagFloat{ floatSize=Double64, source, dest, saveRegs }, code) = boxDouble({source=source, destination=dest, workReg=workReg1, saveRegs=saveRegs}, code) | codeExtended _ (BoxTagFloat{ floatSize=Float32, source, dest, saveRegs }, code) = let val floatReg = source and fixedReg = dest in if is32in64 then boxFloat({source=floatReg, destination=fixedReg, workReg=workReg1, saveRegs=saveRegs}, code) else code <::> MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32} <::> shiftConstant{ direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64} <::> BitwiseLogical{ bits=0w1, regN=fixedReg, regD=fixedReg, opSize=OpSize64, setFlags=false, logOp=LogOr} end | codeExtended _ (UnboxTagFloat { floatSize=Double64, source, dest }, code) = if is32in64 then code <::> AddShiftedReg{regM=source, regN=X_Base32in64, regD=workReg1, shift=ShiftLSL 0w2, setFlags=false, opSize=OpSize64} <::> LoadFPRegScaled{regT=dest, regN=workReg1, unitOffset=0, floatSize=Double64} else code <::> LoadFPRegScaled{regT=dest, regN=source, unitOffset=0, floatSize=Double64} | codeExtended _ (UnboxTagFloat { floatSize=Float32, source, dest }, code) = (* This is tagged in native 64-bits. In 32-in-64 we're loading 32-bits so we can use an indexed load directly. *) if is32in64 then code <::> LoadFPRegIndexed{regN=X_Base32in64, regM=source, regT=dest, option=ExtUXTX ScaleOrShift, floatSize=Float32} else code <::> shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=source, regD=workReg1, opSize=OpSize64} <::> MoveGeneralToFP{regN=workReg1, regD=dest, floatSize=Float32} | codeExtended _ (LoadAcquire{dest, base, loadType, ...}, code) = LoadAcquireReg{regT=dest, regN=base, loadType=loadType} :: code | codeExtended _ (StoreRelease{source, base, loadType, ...}, code) = StoreReleaseReg{regT=source, regN=base, loadType=loadType} :: code | codeExtended _ (BitFieldShift{ source, dest, isSigned, length, immr, imms }, code) = BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=if isSigned then BFSigned else BFUnsigned, opSize=length} :: code | codeExtended _ (BitFieldInsert{ source, destAsSource, dest, length, immr, imms }, code) = let (* If we're using BitFieldMove we retain some of the bits of the destination. The higher levels require us to treat that as a source. *) val _ = source = dest andalso raise InternalError "codeExtended: bitfield: dest=source" in BitField{immr=immr, imms=imms, regN=source, regD=dest, bitfieldKind=BFInsert, opSize=length} :: moveIfNecessary({src=destAsSource, dst=dest}, code) end | codeExtended {flow} (IndexedCaseOperation{testReg}, code) = let (* testReg contains the original value after the lowest value has been subtracted. Since both the original value and the lowest value were tagged it contains a shifted but untagged value. *) (* This should only be within a block with an IndexedBr flow type. *) val cases = case flow of IndexedBr cases => cases | _ => raise InternalError "codeGenICode: IndexedCaseOperation" val caseLabels = map getBlockLabel cases val tableLabel = createLabel labelMaker in code <::> LoadLabelAddress(workReg1, tableLabel) <::> (* Add the value shifted by one since it's already shifted. *) AddShiftedReg{regN=workReg1, regD=workReg1, regM=testReg, shift=ShiftLSL 0w1, setFlags=false, opSize=OpSize64} <::> BranchReg{regD=workReg1, brRegType=BRRBranch} <::> BranchTable{ startLabel=tableLabel, brTable=caseLabels } end | codeExtended {flow} (PushExceptionHandler, code) = let (* This should only be within a block with a SetHandler flow type. *) val handleLabel = case flow of SetHandler{ handler, ...} => handler | _ => raise InternalError "codeGenICode: PushExceptionHandler" val labelRef = getBlockLabel handleLabel in (* Push the old handler and the handler entry point and set the "current handler" to point to the stack after we've pushed these. *) code <::> LoadRegScaled{regT=workReg1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadLabelAddress(workReg2, labelRef) <::> StoreRegPair{regT1=workReg2, regT2=workReg1, regN=X_MLStackPtr, unitOffset= ~2, unscaledType=PreIndex, loadType=Load64} <::> StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} end | codeExtended _ (PopExceptionHandler, code) = (* Remove and discard the handler we've set up. Pop the previous handler and put into "current handler". *) code <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} | codeExtended _ (BeginHandler{packetReg}, code) = let val beginHandleCode = code <::> (* The exception raise code resets the stack pointer to the value in the exception handler so this is probably redundant. Leave it for the moment, *) LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} <::> LoadRegPair{regT1=XZero, regT2=workReg2, regN=X_MLStackPtr, unitOffset=2, unscaledType=PostIndex, loadType=Load64} <::> StoreRegScaled{regT=workReg2, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64} in moveIfNecessary({src=X0, dst=packetReg }, beginHandleCode) end | codeExtended _ (CompareByteVectors{vec1Addr, vec2Addr, length, ...}, code) = let (* Construct a loop to compare two vectors of bytes. *) val vec1Reg = vec1Addr and vec2Reg = vec2Addr and lenReg = length val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker in code <::> (* Set the CC to Equal before we start in case length = 0 *) SubShiftedReg{regM=lenReg, regN=lenReg, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> SetLabel loopLabel <::> (* Start of loop *) CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Go to the end when len = zero *) (* Load the bytes for the comparison and increment each. *) LoadRegUnscaled{regT=workReg1, regN=vec1Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> LoadRegUnscaled{regT=workReg2, regN=vec2Reg, byteOffset=1, unscaledType=PostIndex, loadType=Load8} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) (* Compare *) SubShiftedReg{regM=workReg2, regN=workReg1, regD=XZero, shift=ShiftNone, setFlags=true, opSize=OpSize64} <::> ConditionalBranch(CondEqual, loopLabel) <::> (* Loop if they're equal *) SetLabel exitLabel end | codeExtended _ (BlockMove{srcAddr, destAddr, length, isByteMove}, code) = let (* Construct a loop to move the data. *) val srcReg = srcAddr and destReg = destAddr and lenReg = length val loopLabel = createLabel labelMaker and exitLabel = createLabel labelMaker val (offset, loadType) = if isByteMove then (1, Load8) else if is32in64 then (4, Load32) else (8, Load64) in code <::> SetLabel loopLabel (* Start of loop *) <::> CompareBranch{ test=lenReg, label=exitLabel, onZero=true, opSize=OpSize64} <::> (* Exit when length = 0 *) LoadRegUnscaled{regT=workReg1, regN=srcReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> StoreRegUnscaled{regT=workReg1, regN=destReg, byteOffset=offset, loadType=loadType, unscaledType=PostIndex} <::> SubImmediate{regN=lenReg, regD=lenReg, immed=0w1, shifted=false, setFlags=false, opSize=OpSize64} <::> (* Decr len *) UnconditionalBranch loopLabel <::> (* Back to the start *) SetLabel exitLabel end | codeExtended _ (AddSubXSP{ source, dest, isAdd }, code) = let val allocFreeCode = (if isAdd then AddExtendedReg else SubExtendedReg) {regM=source, regN=XSP, regD=XSP, extend=ExtUXTX 0w0, setFlags=false, opSize=OpSize64} :: code in case dest of XZero => allocFreeCode | destReg => (* We have to use add here to get the SP into the destination instead of the usual move. *) AddImmediate{regN=XSP, regD=destReg, immed=0w0, shifted=false, setFlags=false, opSize=OpSize64} :: allocFreeCode end | codeExtended _ (TouchValue _, code) = code (* Don't need to do anything now. *) (* Used in mutex operations. *) | codeExtended _ (LoadAcquireExclusive{ base, dest }, code) = LoadAcquireExclusiveRegister{regN=base, regT=dest} :: code | codeExtended _ (StoreReleaseExclusive{ base, source, result }, code) = StoreReleaseExclusiveRegister{regS=result, regT=source, regN=base} :: code | codeExtended _ (MemoryBarrier, code) = code <::> MemBarrier | codeExtended _ (ConvertIntToFloat{ source, dest, srcSize, destSize}, code) = (CvtIntToFP{regN=source, regD=dest, floatSize=destSize, opSize=srcSize}) :: code | codeExtended _ (ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, code) = (CvtFloatToInt{regN=source, regD=dest, round=rounding, floatSize=srcSize, opSize=destSize}) :: code | codeExtended _ (UnaryFloatingPt{ source, dest, fpOp}, code) = (FPUnaryOp{regN=source, regD=dest, fpOp=fpOp}) :: code | codeExtended _ (BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, code) = (FPBinaryOp{regN=arg1, regM=arg2, regD=dest, floatSize=opSize, fpOp=fpOp}) :: code | codeExtended _ (CompareFloatingPoint{ arg1, arg2, opSize, ...}, code) = (FPComparison{regN=arg1, regM=arg2, floatSize=opSize}) :: code + | codeExtended _ (CPUYield, code) = code <::> Yield + | codeExtended _ (CacheCheck{ arg1, arg2 }, code) = let val okLabel = createLabel labelMaker in (code <::> SubShiftedReg {regM=arg1, regN=arg2, regD=XZero, shift=ShiftNone, opSize=OpSize64, setFlags=true} <::> ConditionalBranch(CondEqual, okLabel) <::> MoveXRegToXReg{sReg=XZero, dReg=X16} <::> LoadRegScaled{regT=X16, regN=X16, unitOffset=0, loadType=Load16} <::> SetLabel okLabel) end local (* processed - set to true when a block has been processed. *) val processed = Array.array(numBlocks, false) fun haveProcessed n = Array.sub(processed, n) (* Find the blocks that reference this one. This isn't essential but allows us to try to generate blocks in the order of the control flow. This in turn may allow us to use short branches rather than long ones. *) val labelRefs = Array.array(numBlocks, []) datatype flowCode = FlowCodeSimple of int | FlowCodeCMove of {code: precode list, trueJump: int, falseJump: int} (* Process this recursively to set the references. If we have unreachable blocks, perhaps because they've been merged, we don't want to include them in the reference counting. This shouldn't happen now that IdentifyReferences removes unreferenced blocks. *) fun setReferences fromLabel toLabel = case Array.sub(labelRefs, toLabel) of [] => (* Not yet visited at all. *) let val BasicBlock{ flow, ...} = Vector.sub(blocks, toLabel) val refs = case flow of ExitCode => [] | Unconditional lab => [lab] | Conditional{trueJump, falseJump, ... } => [trueJump, falseJump] | IndexedBr labs => labs | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val () = if fromLabel >= 0 then Array.update(labelRefs, toLabel, [fromLabel]) else () in List.app (setReferences toLabel) refs end | refs => (* We've visiting this at least once. Just add us to the list. *) Array.update(labelRefs, toLabel, fromLabel :: refs) val _ = setReferences 0 0 (* Process the blocks. We keep the "stack" explicit rather than using recursion because this allows us to select both arms of a conditional branch sooner. *) fun genCode(toDo, lastFlow, code) = case List.filter (not o haveProcessed) toDo of [] => let (* There's nothing left to do. We may need to add a final branch to the end. *) val finalBranch = case lastFlow of ExitCode => [] | IndexedBr _ => [] | Unconditional dest => [(UnconditionalBranch(getBlockLabel dest))] | Conditional { condition, trueJump, falseJump, ...} => [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] | SetHandler { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [(UnconditionalBranch(getBlockLabel continue))] in finalBranch @ code (* Done. *) end | stillToDo as head :: _ => let local (* Check the references. If all the sources that lead up to this have already been we won't have any backward jumps. *) fun available dest = List.all haveProcessed (Array.sub(labelRefs, dest)) val continuation = case lastFlow of ExitCode => NONE | IndexedBr _ => NONE (* We could put the last branch in here. *) | Unconditional dest => if not (haveProcessed dest) andalso available dest then SOME(FlowCodeSimple dest) else NONE | Conditional {trueJump, falseJump, condition, ...} => (* We can usually choose either destination and in nearly all cases it won't matter. The default branch is not to take forward jumps so if there is reason to believe that one branch is more likely we should follow that branch now and leave the other. If we have Cond(No)Overflow we assume that overflow is unusual. If one branch raises an exception we assume that that is unusual. *) let val (first, second) = case (condition, Vector.sub(blocks, falseJump)) of (CondNoOverflow, _) => (trueJump, falseJump) | (_, BasicBlock{ flow=ExitCode, block, ...}) => if List.exists(fn RaiseExceptionPacket _ => true | _ => false) block then (trueJump, falseJump) else (falseJump, trueJump) | _ => (falseJump, trueJump) in if not (haveProcessed first) andalso available first then SOME(FlowCodeSimple first) else if not (haveProcessed second) andalso available second then SOME(FlowCodeSimple second) else NONE end | SetHandler { continue, ... } => (* We want the continuation if possible. We'll need a branch round the handler so that won't help. *) if not (haveProcessed continue) andalso available continue then SOME(FlowCodeSimple continue) else NONE | UnconditionalHandle _ => NONE | ConditionalHandle _ => NONE in (* First choice - continue the existing block. Second choice - the first item whose sources have all been processed. Third choice - something from the list. *) val picked = case continuation of SOME c => c | NONE => case List.find available stillToDo of SOME c => FlowCodeSimple c | NONE => FlowCodeSimple head end in case picked of FlowCodeSimple picked => let val () = Array.update(processed, picked, true) (* Code to terminate the previous block. *) val startCode = case lastFlow of ExitCode => [] | IndexedBr _ => [] | UnconditionalHandle _ => [] | Unconditional dest => if dest = picked then [] else [(UnconditionalBranch(getBlockLabel dest))] | ConditionalHandle { continue, ...} => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | SetHandler { continue, ... } => if continue = picked then [] else [(UnconditionalBranch(getBlockLabel continue))] | Conditional { condition, trueJump, falseJump, ...} => if picked = falseJump (* Usual case. *) then [(ConditionalBranch(condition, getBlockLabel trueJump))] else if picked = trueJump then (* We have a jump to the true condition. Invert the jump. This is more than an optimisation. Because this immediately precedes the true block we're not going to generate a label. *) [(ConditionalBranch(invertTest condition, getBlockLabel falseJump))] else [ (UnconditionalBranch(getBlockLabel falseJump)), (ConditionalBranch(condition, getBlockLabel trueJump)) ] (* Code-generate the body with the code we've done so far at the end. Add a label at the start if necessary. *) local (* If the previous block dropped through to this and this was the only reference then we don't need a label. *) fun onlyJumpingHere (lab: int) = if lab <> picked then false else case Array.sub(labelRefs, picked) of [singleton] => singleton = lab | _ => false val noLabel = case lastFlow of ExitCode => picked = 0 (* Unless this was the first block. *) | Unconditional dest => onlyJumpingHere dest | Conditional { trueJump, falseJump, ...} => onlyJumpingHere trueJump orelse onlyJumpingHere falseJump | IndexedBr _ => false | SetHandler _ => false | UnconditionalHandle _ => false | ConditionalHandle { continue, ...} => onlyJumpingHere continue in val startLabel = if noLabel then [] else [(SetLabel(getBlockLabel picked))] end val BasicBlock { flow, block, ...} = Vector.sub(blocks, picked) local fun genCodeBlock(instr, code) = codeExtended {flow=flow} (instr, code) in val bodyCode = List.foldl genCodeBlock (startLabel @ startCode @ code) block end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] in genCode(addSet @ stillToDo, flow, bodyCode) end | FlowCodeCMove{code, trueJump, falseJump} => let (* We've generated a conditional move and possibly a return. If the trueJump and falseJump are only ever referenced from this block they're done, otherwise we still need to do them. *) val _ = case Array.sub(labelRefs, trueJump) of [_] => Array.update(processed, trueJump, true) | _ => () val _ = case Array.sub(labelRefs, falseJump) of [_] => Array.update(processed, falseJump, true) | _ => () val BasicBlock { flow, ...} = Vector.sub(blocks, trueJump) val addSet = case flow of ExitCode => [] | Unconditional dest => [dest] | _ => raise InternalError "FlowCodeCMove" in genCode(addSet @ stillToDo, flow, code) end end in val ops = genCode([0], ExitCode, [(SetLabel startOfFunctionLabel)]) end in generateFinalCode{instrs=List.rev ops, name=functionName, resultClosure=resultClosure, parameters=debugSwitches, profileObject=profileObject, labelMaker= labelMaker} end structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and xReg = xReg and vReg = vReg and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML index 80f41e32..bc9a0686 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML @@ -1,883 +1,886 @@ (* Copyright (c) 2021-2 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64IdentifyReferences( structure Arm64ICode: ARM64ICODE structure Debug: DEBUG structure IntSet: INTSET ): ARM64IDENTIFYREFERENCES = struct open Arm64ICode open IntSet type regState = { active: int, refs: int, pushState: bool, prop: regProperty } (* CC states before and after. Currently no instruction uses the condition; conditional branches are handled at the block level. The result of executing the instruction may be to set the condition code to a defined state, an undefined state or leave it unchanged. *) datatype outCCState = CCSet of ccRef | CCIndeterminate | CCUnchanged and inCCState = CCNeeded of ccRef | CCUnused datatype extendedBasicBlock = ExtendedBasicBlock of { block: {instr: iCodeAbstract, current: intSet, active: intSet, kill: intSet } list, flow: controlFlow, locals: intSet, (* Defined and used entirely within the block. *) imports: intSet, (* Defined outside the block, used inside it, but not needed afterwards. *) exports: intSet, (* Defined within the block, possibly used inside, but used outside. *) passThrough: intSet, (* Active throughout the block. May be referred to by it but needed afterwards. *) loopRegs: intSet, (* Destination registers for a loop. They will be updated by this block. *) initialStacks: intSet, (* Stack items required at the start i.e. imports+passThrough for stack items. *) inCCState: ccRef option, (* The state this block assumes. If SOME _ all predecessors must set it. *) outCCState: ccRef option (* The condition code set by this block. SOME _ if at least one successor needs it. *) } exception InternalError = Misc.InternalError (* Return the list of blocks that are the immediate successor of this. *) fun blockSuccessors(BasicBlock{flow, ...}) = successorBlocks flow fun getOptReg(SomeReg reg) = [reg] | getOptReg ZeroReg = [] fun getInstructionState(MoveRegister { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadNonAddressConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAddressConstant { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadWithConstantOffset { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPWithConstantOffset { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadWithIndexedOffset { base, dest, index, ...}) = { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadFPWithIndexedOffset { base, dest, index, ...}) = { sources=[base, index], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(GetThreadId { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ObjectIndexAddressToAbsolute { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AbsoluteToObjectIndex { source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AllocateMemoryFixed { dest, ...}) = { sources=[], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(AllocateMemoryVariable{size, dest, ...}) = { sources=[size], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(InitialiseMem{size, addr, init}) = { sources=[size, addr, init], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginLoop) = (* This is just a marker. It doesn't actually generate any code. *) { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(JumpLoop{regArgs, stackArgs, ...}) = let fun getSourceFromRegs({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (regSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=regSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(StoreWithConstantOffset { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreFPWithConstantOffset { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreWithIndexedOffset { base, source, index, ...}) = { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreFPWithIndexedOffset { base, source, index, ...}) = { sources=[source, base, index], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(AddSubImmediate{ source, dest, ccRef, ... }) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(AddSubRegister{ base, shifted, dest, ccRef, ... }) = { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(LogicalImmediate{ source, dest, ccRef, ... }) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(LogicalRegister{ base, shifted, dest, ccRef, ... }) = { sources=[base, shifted], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=case ccRef of NONE => CCUnchanged | SOME cc => CCSet cc } | getInstructionState(ShiftRegister{ source, shift, dest, ... }) = { sources=[source, shift], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(Multiplication{ dest, sourceA, sourceM, sourceN, ... }) = { sources=getOptReg sourceA @ [sourceM, sourceN], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(Division{ dest, dividend, divisor, ... }) = { sources=[dividend, divisor], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginFunction {regArgs, stackArgs, ...}) = { sources=[], dests=map #1 regArgs, sStacks=[], dStacks=stackArgs, ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(FunctionCall{regArgs, stackArgs, dests, containers, ...}) = let (* Non-tail-recursive. Behaves as a normal reference to sources. *) fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack(ArgInReg reg, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack(ArgOnStack { container, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (argSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=argSources, dests=List.map #1 dests, sStacks=stackSources @ containers, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(TailRecursiveCall{regArgs, stackArgs, ...}) = let (* Tail recursive call. References the argument sources but exits. *) fun getSourceFromRegs((ArgInReg reg, _), (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromRegs((ArgOnStack { container, ...}, _), (regArgs, stackArgs)) = (regArgs, container :: stackArgs) and getSourceFromStack({src=ArgInReg reg, ...}, (regArgs, stackArgs)) = (reg :: regArgs, stackArgs) | getSourceFromStack({src=ArgOnStack { container, ...}, ...}, (regArgs, stackArgs)) = (regArgs, container :: stackArgs) val (argSources, stackSources) = List.foldl getSourceFromRegs (List.foldl getSourceFromStack ([], []) stackArgs) regArgs in { sources=argSources, dests=[], sStacks=stackSources, dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } end | getInstructionState(ReturnResultFromFunction{results, returnReg, ...}) = { sources=returnReg :: List.map #1 results, dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(RaiseExceptionPacket{packetReg}) = { sources=[packetReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(PushToStack{ source, container, ... }) = { sources=[source], dests=[], sStacks=[], dStacks=[container], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadStack{ dest, container, ... }) = { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreToStack{ source, container, ... }) = (* Although this stores into the container it must already exist. *) { sources=[source], dests=[], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ContainerAddress{ dest, container, ... }) = { sources=[], dests=[dest], sStacks=[container], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ResetStackPtr _) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(TagValue{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(UntagValue{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BoxLarge{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(UnboxLarge{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BoxTagFloat{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(UnboxTagFloat{source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAcquire { base, dest, ...}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreRelease { base, source, ...}) = { sources=[source, base], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BitFieldShift{ source, dest, ... }) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BitFieldInsert{ source, destAsSource, dest, ... }) = { sources=[source, destAsSource], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(IndexedCaseOperation{ testReg }) = { sources=[testReg], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(PushExceptionHandler) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(PopExceptionHandler) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BeginHandler{packetReg}) = (* The packet register is a destination since this provides its definition. *) { sources=[], dests=[packetReg], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(CompareByteVectors{vec1Addr, vec2Addr, length, ccRef, ...}) = { sources=[vec1Addr, vec2Addr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } | getInstructionState(BlockMove{srcAddr, destAddr, length, ...}) = { sources=[srcAddr, destAddr, length], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } | getInstructionState(AddSubXSP{source, dest, ...}) = { sources=[source], dests=getOptReg dest, sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(TouchValue{source}) = { sources=[source], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(LoadAcquireExclusive{base, dest}) = { sources=[base], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(StoreReleaseExclusive{base, source, result}) = { sources=[base] @ getOptReg source, dests=[result], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(MemoryBarrier) = { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ConvertIntToFloat{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(ConvertFloatToInt{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(UnaryFloatingPt{ source, dest, ...}) = { sources=[source], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(BinaryFloatingPoint{ arg1, arg2, dest, ...}) = { sources=[arg1, arg2], dests=[dest], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } | getInstructionState(CompareFloatingPoint{ arg1, arg2, ccRef, ...}) = { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCSet ccRef } + | getInstructionState(CPUYield) = + { sources=[], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCUnchanged } + | getInstructionState(CacheCheck{ arg1, arg2}) = { sources=[arg1, arg2], dests=[], sStacks=[], dStacks=[], ccIn=CCUnused, ccOut=CCIndeterminate } (* These instructions can be eliminated if their register sources are not used. There may be other cases. *) fun eliminateable(MoveRegister _) = true | eliminateable(LoadNonAddressConstant _) = true | eliminateable(LoadAddressConstant _) = true | eliminateable(LoadWithConstantOffset _) = true | eliminateable(LoadWithIndexedOffset _) = true | eliminateable(ObjectIndexAddressToAbsolute _) = true | eliminateable(TagValue _) = true | eliminateable(UntagValue _) = true | eliminateable(BoxLarge _) = true | eliminateable(UnboxLarge _) = true | eliminateable _ = false fun identifyRegs(blockVector, pregProps): extendedBasicBlock vector * regState vector = let val maxPRegs = Vector.length pregProps val vectorLength = Vector.length blockVector (* Initial arrays - declarationArray is the set of registers given values by the block, importArray is the set of registers referenced by the block and not declared locally. *) val declarationArray = Array.array(vectorLength, emptySet) and importArray = Array.array(vectorLength, emptySet) val stackDecArray = Array.array(vectorLength, emptySet) and stackImportArray = Array.array(vectorLength, emptySet) and localLoopRegArray = Array.array(vectorLength, emptySet) (* References - this is used locally to see if a register is ever actually used and also included in the result which uses it as part of the choice of which register to spill. *) val regRefs = Array.array(maxPRegs, 0) (* Registers that must be pushed because they are required after a function call. For cache registers this means "discard". *) and requirePushOrDiscard = Array.array(maxPRegs, false) fun incrRef r = Array.update(regRefs, r, Array.sub(regRefs, r)+1) (* Contains the, possibly filtered, code for each block. *) val resultCode = Array.array(vectorLength, NONE) val ccInStates = Array.array(vectorLength, CCUnused) and ccOutStates = Array.array(vectorLength, CCIndeterminate) (* First pass - for each block build up the sets of registers defined and used in the block. We do this depth-first so that we can use "refs" to see if a register is used. If this is an instruction that can be eliminated we don't need to generate it and can ignore any references it makes. *) local fun blockScan blockNo = if isSome(Array.sub(resultCode, blockNo)) then () else let val () = Array.update(resultCode, blockNo, SOME []) (* Prevent looping. *) val thisBlock as BasicBlock { block, flow, ...} = Vector.sub(blockVector, blockNo) val successors = blockSuccessors thisBlock (* Visit everything reachable first. *) val () = List.app blockScan successors fun scanCode(instr, original as { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... }) = let val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ccIn, ccOut, ... } = getInstructionState instr fun regNo(PReg i) = i and stackNo(StackLoc{rno, ...}) = rno val destRegNos = map regNo dests and sourceRegNos = map regNo sources val stackDestRegNos = map stackNo stackDests and stackSourceRegNos = map stackNo stackSrcs (* If this instruction requires a cc i.e. is SetToCondition or X87FPGetCondition we need to set this as a requirement earlier. If this sets the CC and it is the condition we've been expecting we've satisfied it and can set the previous condition to Unused. We could use this to decide if a comparison is no longer required. That can only happen in very specific circumstances e.g. some tests in Test176.ML so it's not worthwhile. *) val newInCC = case (ccIn, ccOut, occIn) of (cc as CCNeeded _, _, _) => cc (* This instr needs a particular cc. *) | (CCUnused, CCSet _, _) => CCUnused | (CCUnused, _, occIn) => occIn (* If this instruction modifies the CC check to see if it is setting an requirement. *) val _ = case (occIn, ccOut) of (CCNeeded ccRIn, CCSet ccRout) => if ccRIn = ccRout then () else raise InternalError "CCCheck failed" | (CCNeeded _, CCIndeterminate) => raise InternalError "CCCheck failed" | _ => () (* The output CC is the last CC set. Tail instructions that don't change the CC state are ignored until we reach an instruction that sets it. *) val newOutCC = case occOut of CCUnchanged => ccOut | _ => occOut val instrLoopRegs = case instr of JumpLoop{regArgs, ...} => listToSet (map (regNo o #dst) regArgs) | _ => emptySet in if eliminateable instr andalso List.all(fn dReg => Array.sub(regRefs, dReg) = 0) destRegNos then original (* Don't include this instruction. *) else let (* Only mark the sources as referred after we know we're going to need this. In that way we may eliminate the instruction that created this source. *) val () = List.app incrRef sourceRegNos in { code = instr :: code, decs = union(listToSet destRegNos, decs), refs = union(listToSet sourceRegNos, refs), sDecs = union(listToSet stackDestRegNos, sDecs), sRefs = union(listToSet stackSourceRegNos, sRefs), occIn = newInCC, occOut = newOutCC, loopRegs = union(loopRegs, instrLoopRegs)} end end (* If we have a conditional branch at the end we need the condition code. It should either be set here or in a preceding block. *) val inCC = case flow of Conditional { ccRef, ...} => CCNeeded ccRef | _ => CCUnused val { code, decs, refs, sDecs, sRefs, occIn, occOut, loopRegs, ... } = List.foldr scanCode {code=[], decs=emptySet, refs=emptySet, sDecs=emptySet, sRefs=emptySet, occIn=inCC, occOut=CCUnchanged, loopRegs=emptySet} block in Array.update(declarationArray, blockNo, decs); (* refs includes local declarations. Remove before adding to the result. *) Array.update(importArray, blockNo, minus(refs, decs)); Array.update(localLoopRegArray, blockNo, loopRegs); Array.update(stackDecArray, blockNo, sDecs); Array.update(stackImportArray, blockNo, minus(sRefs, sDecs)); Array.update(resultCode, blockNo, SOME code); Array.update(ccInStates, blockNo, occIn); Array.update(ccOutStates, blockNo, occOut) end in val () = blockScan 0 (* Start with the root block. *) end (* Second phase - Propagate reference information between the blocks. We need to consider loops here. Do a depth-first scan marking each block. If we find a loop we save the import information we've used. If when we come to process that block we find the import information is different we need to reprocess. *) (* Pass through array - values used in other blocks after this that are not declared in this block. *) val passThroughArray = Array.array(vectorLength, emptySet) val stackPassThroughArray = Array.array(vectorLength, emptySet) (* Exports - those of our declarations that are used in other blocks. *) val exportArray = Array.array(vectorLength, emptySet) val stackExportArray = Array.array(vectorLength, emptySet) (* Loop registers. This contains the registers that are not exported from or passed through this block but are used subsequently as loop registers. *) val loopRegArray = Array.array(vectorLength, emptySet) val () = Array.copy{src=localLoopRegArray, dst=loopRegArray, di=0} (* If any one of the successors requires the CC then this is set. Otherwise we leave it as Unused. *) val ccRequiredOut = Array.array(vectorLength, CCUnused) local datatype loopData = Unprocessed | Processing | Processed | Looped of { regSet: intSet, loopSet: intSet, stackSet: intSet, ccState: inCCState } fun reprocessLoop () = let val reprocess = ref false val loopArray = Array.array(vectorLength, Unprocessed) fun processBlocks blockNo = case Array.sub(loopArray, blockNo) of Processed => (* Already seen this by a different route. *) { regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), ccState = Array.sub(ccInStates, blockNo), loopSet = Array.sub(loopRegArray, blockNo) } | Looped s => s (* We've already seen this in a loop. *) | Processing => (* We have a loop. *) let (* Use the existing input array. *) val inputs = { regSet = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)), stackSet = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)), ccState = Array.sub(ccInStates, blockNo), loopSet = Array.sub(loopRegArray, blockNo) } val () = Array.update(loopArray, blockNo, Looped inputs) in inputs end | Unprocessed => (* Normal case - not visited yet. *) let val () = Array.update(loopArray, blockNo, Processing) val thisBlock = Vector.sub(blockVector, blockNo) val ourDeclarations = Array.sub(declarationArray, blockNo) and ourStackDeclarations = Array.sub(stackDecArray, blockNo) and ourLocalLoopRegs = Array.sub(localLoopRegArray, blockNo) val successors = blockSuccessors thisBlock fun addSuccessor b = let val {regSet=theirImports, stackSet=theirStackImports, ccState=theirInState, loopSet=theirLoops} = processBlocks b (* Remove loop regs from the imports if they are actually given new values by this block. We don't want to pass the old loop regs through here. *) val theirImports = minus(theirImports, ourLocalLoopRegs) (* Split the imports. If a register is a local declaration then it becomes an export. If it is not it becomes part of our passThrough. *) val (addToExp, addToImp) = IntSet.partition (fn i => member(i, ourDeclarations)) theirImports val (addToStackExp, addToStackImp) = IntSet.partition (fn i => member(i, ourStackDeclarations)) theirStackImports (* Merge the input states from each of the successors. *) val () = case (theirInState, Array.sub(ccRequiredOut, blockNo)) of (CCNeeded ts, CCNeeded req) => if ts = req then () else raise InternalError "Mismatched states" | (ts as CCNeeded _, _) => Array.update(ccRequiredOut, blockNo, ts) | _ => () (* Add loop registers to the set if they are not declared here. The only place they are declared is at the entry to the loop so that stops them being propagated further. *) val addToLoops = minus(theirLoops, ourDeclarations) in Array.update(exportArray, blockNo, union(Array.sub(exportArray, blockNo), addToExp)); Array.update(passThroughArray, blockNo, union(Array.sub(passThroughArray, blockNo), addToImp)); Array.update(stackExportArray, blockNo, union(Array.sub(stackExportArray, blockNo), addToStackExp)); Array.update(stackPassThroughArray, blockNo, union(Array.sub(stackPassThroughArray, blockNo), addToStackImp)); Array.update(loopRegArray, blockNo, union(Array.sub(loopRegArray, blockNo), addToLoops)) end val () = List.app addSuccessor successors val ourInputs = union(Array.sub(passThroughArray, blockNo), Array.sub(importArray, blockNo)) val ourStackInputs = union(Array.sub(stackPassThroughArray, blockNo), Array.sub(stackImportArray, blockNo)) in (* Check that we supply the required state. *) case (Array.sub(ccRequiredOut, blockNo), Array.sub(ccOutStates, blockNo)) of (CCNeeded ccReq, CCSet ccSet) => if ccReq = ccSet then () else raise InternalError "Mismatched cc states" | (CCNeeded _, CCIndeterminate) => raise InternalError "Mismatched cc states" | (cc as CCNeeded needOut, CCUnchanged) => ( (* We pass through the state. If we don't use the state then we need to set this as the input. If we do use the state it must be the same. *) case Array.sub(ccInStates, blockNo) of CCUnused => Array.update(ccInStates, blockNo, cc) | CCNeeded needIn => if needOut = needIn then () else raise InternalError "Mismatched cc states" ) | _ => (); (* Was this block used in a loop? If so we should not be requiring a CC. *) case Array.sub(loopArray, blockNo) of Looped {regSet, stackSet, ...} => ( case Array.sub(ccInStates, blockNo) of CCNeeded _ => raise InternalError "Looped state needs cc" | _ => (); if setToList regSet = setToList ourInputs andalso setToList stackSet = setToList ourStackInputs then () else reprocess := true ) | _ => (); Array.update(loopArray, blockNo, Processed); { regSet = ourInputs, stackSet = ourStackInputs, ccState = Array.sub(ccInStates, blockNo), loopSet=Array.sub(loopRegArray, blockNo)} end in reprocess := false; processBlocks 0; if !reprocess then reprocessLoop () else () end in val () = reprocessLoop () end (* Third pass - Build the result list with the active registers for each instruction. We don't include registers in the passThrough set since they are active throughout the block. *) local (* Number of instrs for which this is active. We use this to try to select a register to push to the stack if we have too many. Registers that have only a short lifetime are less likely to be pushed than those that are active longer. *) val regActive = Array.array(maxPRegs, 0) fun addActivity n r = Array.update(regActive, r, Array.sub(regActive, r)+n) fun createResultInstrs (passThrough, stackPassThrough) (instr, (tail, activeAfterThis, stackActiveAfterThis)) = let val { sources, dests, sStacks=stackSrcs, dStacks=stackDests, ... } = getInstructionState instr in (* Eliminate instructions if their results are not required. The earlier check for this will remove most cases but if we have duplicated a block we may have a register that is required elsewhere but not in this particular branch. *) if not(List.exists(fn PReg d => member(d, activeAfterThis)) dests) andalso eliminateable instr then (tail, activeAfterThis, stackActiveAfterThis) else let fun regNo(PReg i) = i fun stackNo(StackLoc{rno, ...}) = rno val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos (* Remove any sources that are present in passThrough since they are going to be active throughout the block. *) and sourceSet = minus(listToSet sourceRegNos, passThrough) val stackDestRegNos = map stackNo stackDests and stackSourceRegNos = map stackNo stackSrcs val stackDestSet = listToSet stackDestRegNos and stackSourceSet = minus(listToSet stackSourceRegNos, stackPassThrough) (* To compute the active set for the PREVIOUS instruction (we're processing from the end back to the start) we remove any registers that have been given values in this instruction and add anything that we are using in this instruction since they will now need to have values. *) val afterRemoveDests = minus(activeAfterThis, destSet) val stackAfterRemoveDests = minus(stackActiveAfterThis, stackDestSet) val activeForPrevious = union(sourceSet, afterRemoveDests) val stackActiveForPrevious = union(stackSourceSet, stackAfterRemoveDests) (* The "active" set is the set of registers that need to be active DURING the instruction. It includes destinations, which will usually be in "activeAfterThis", because there may be destinations that are not actually used subsequently but still need a register. *) val activeForInstr = case instr of FunctionCall _ => sourceSet (* Is this still needed? *) | TailRecursiveCall _ => (* Set the active set to the total set of registers we require including the work register. This ensures that we will spill as many registers as we require when we look at the size of the active set. *) union(sourceSet, destSet) | BoxLarge _ => (* We can only store the value in the box after the box is allocated. *) union(activeAfterThis, union(sourceSet, destSet)) | BoxTagFloat _ => (* Since the source must be a V register and the destination an X register there isn't actually a problem here, but do this anyway. *) union(activeAfterThis, union(sourceSet, destSet)) | _ => union(activeAfterThis, destSet) val () = List.app(addActivity 1) (setToList activeForInstr) local (* If we are allocating memory we have to save the current registers if they could contain an address. We mustn't push untagged registers and we mustn't push the destination. *) fun getSaveSet includeReg = let val activeAfter = union(activeAfterThis, passThrough) (* Remove any registers marked - must-not-push. These are registers holding non-address values. They will actually be saved by the RTS across any GC but not checked or modified by the GC. Exclude the result register. *) fun getSave i = if includeReg i then case Vector.sub(pregProps, i) of RegPropGeneral => SOME(PReg i) | RegPropCacheTagged => SOME(PReg i) | RegPropUntagged => NONE | RegPropStack _ => NONE | RegPropCacheUntagged => NONE | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" else NONE in List.mapPartial getSave (setToList activeAfter) end in (* Sometimes we need to modify the instruction e.g. to include the set of registers to save. *) val convertedInstr = case instr of AllocateMemoryFixed{bytesRequired, dest, saveRegs=_} => AllocateMemoryFixed{bytesRequired=bytesRequired, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)} | AllocateMemoryVariable{size, dest, saveRegs=_} => AllocateMemoryVariable{size=size, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)} | BoxLarge{source, dest, saveRegs=_} => BoxLarge{source=source, dest=dest, saveRegs=getSaveSet(fn i => i <> regNo dest)} | BoxTagFloat{source, dest, floatSize, saveRegs=_} => BoxTagFloat{source=source, dest=dest, floatSize=floatSize, saveRegs=getSaveSet(fn i => i <> regNo dest)} | JumpLoop{regArgs, stackArgs, checkInterrupt = SOME _, ...} => let (* If we have to check for interrupts we must preserve registers across the RTS call. *) fun getSave i = case Vector.sub(pregProps, i) of RegPropGeneral => SOME(PReg i) | RegPropCacheTagged => SOME(PReg i) | RegPropUntagged => NONE | RegPropStack _ => NONE | RegPropCacheUntagged => NONE | RegPropMultiple => raise InternalError "getSave: RegPropMultiple" val currentRegs = union(activeAfterThis, passThrough) (* Have to include the loop registers. These were previously included automatically because they were part of the import set. *) val check = List.mapPartial getSave (map (regNo o #dst) regArgs @ setToList currentRegs) in JumpLoop{regArgs=regArgs, stackArgs=stackArgs, checkInterrupt=SOME check} end | FunctionCall{regArgs, stackArgs=[], dests, callKind as ConstantCode m, saveRegs=_, containers} => (* If this is arbitrary precision push the registers rather than marking them as "save". stringOfWord returns 'CODE "PolyAddArbitrary"' etc. *) if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) then let val destRegs = List.map (regNo o #1) dests fun includeInSave i = not(List.exists(fn r => r=i) destRegs) in FunctionCall{regArgs=regArgs, stackArgs=[], callKind=callKind, dests=dests, containers=containers, saveRegs=getSaveSet includeInSave} end else instr | _ => instr end (* FunctionCall must mark all registers as "push". *) local fun pushRegisters () = let val activeAfter = union(activeAfterThis, passThrough) fun pushAllButDests i = if List.exists(fn j => i=j) destRegNos then () else case Vector.sub(pregProps, i) of RegPropCacheTagged => raise InternalError "pushRegisters: cache reg" | RegPropCacheUntagged => raise InternalError "pushRegisters: cache reg" | _ => Array.update(requirePushOrDiscard, i, true) in (* We need to push everything active after this except the result register. *) List.app pushAllButDests (setToList activeAfter) end in val () = case instr of FunctionCall{ stackArgs=[], callKind=ConstantCode m, ...} => if (String.isSubstring "Arbitrary\"" (Address.stringOfWord m)) then () else pushRegisters () | FunctionCall _ => pushRegisters () (* It should no longer be necessary to push across a handler but there still seem to be cases that need it. *) (*| BeginHandler _ => pushRegisters ()*) | _ => () end (* Which entries are active in this instruction but not afterwards? *) val kill = union(minus(stackSourceSet, stackActiveAfterThis), minus(sourceSet, activeAfterThis)) in ({instr=convertedInstr, active=activeForInstr, current=activeAfterThis, kill=kill} :: tail, activeForPrevious, stackActiveForPrevious) end end fun createResult blockNo = let val BasicBlock{ flow, ...} = Vector.sub(blockVector, blockNo) val declSet = Array.sub(declarationArray, blockNo) and importSet = Array.sub(importArray, blockNo) and passSet = Array.sub(passThroughArray, blockNo) and loopSet = Array.sub(loopRegArray, blockNo) and exportSet = Array.sub(exportArray, blockNo) and stackPassSet = Array.sub(stackPassThroughArray, blockNo) and stackImportSet = Array.sub(stackImportArray, blockNo) and stackExportSet = Array.sub(stackExportArray, blockNo) val filteredCode = getOpt(Array.sub(resultCode, blockNo), []) (* At the end of the block we should have the exports active. *) val (resultInstrs, _, _) = List.foldr (createResultInstrs (passSet, stackPassSet)) ([], exportSet, stackExportSet) filteredCode (* Set the active count for the pass through. *) val instrCount = List.length filteredCode val () = List.app(addActivity instrCount) (setToList passSet) val inCCState = case Array.sub(ccInStates, blockNo) of CCNeeded s => SOME s | CCUnused => NONE val outCCState = case Array.sub(ccRequiredOut, blockNo) of CCNeeded s => SOME s | CCUnused => NONE in ExtendedBasicBlock { block = resultInstrs, flow=flow, locals = minus(declSet, exportSet), imports = importSet, exports = exportSet, passThrough = passSet, loopRegs = loopSet, initialStacks = union(stackPassSet, stackImportSet), inCCState = inCCState, outCCState = outCCState } end in val resultBlocks = Vector.tabulate(vectorLength, createResult) val regActive = regActive end val registerState: regState vector = Vector.tabulate(maxPRegs, fn i => { active = Array.sub(regActive, i), refs = Array.sub(regRefs, i), pushState = Array.sub(requirePushOrDiscard, i), prop = Vector.sub(pregProps, i) } ) in (resultBlocks, registerState) end (* Exported function. First filter out unreferenced blocks then process the registers themselves. *) fun identifyRegisters(blockVector, pregProps) = let val vectorLength = Vector.length blockVector val mapArray = Array.array(vectorLength, NONE) and resArray = Array.array(vectorLength, NONE) val count = ref 0 fun setReferences label = case Array.sub(mapArray, label) of NONE => (* Not yet visited *) let val BasicBlock{flow, block} = Vector.sub(blockVector, label) (* Create a new entry for it. *) val newLabel = ! count before count := !count + 1 (* Add it to the map. Any other references will use this without reprocessing. *) val () = Array.update(mapArray, label, SOME newLabel) val newFlow = case flow of Unconditional l => Unconditional(setReferences l) | Conditional{trueJump, falseJump, ccRef, condition} => Conditional{trueJump=setReferences trueJump, falseJump=setReferences falseJump, ccRef=ccRef, condition=condition} | ExitCode => ExitCode | IndexedBr list => IndexedBr(map setReferences list) | SetHandler{handler, continue} => SetHandler{handler=setReferences handler, continue=setReferences continue} | UnconditionalHandle l => UnconditionalHandle(setReferences l) | ConditionalHandle{handler, continue} => ConditionalHandle{handler=setReferences handler, continue=setReferences continue} val () = Array.update(resArray, newLabel, SOME(BasicBlock{flow=newFlow, block=block})) in newLabel end | SOME lab => lab val _ = setReferences 0 val newBlockVector = Vector.tabulate(!count, fn i => valOf(Array.sub(resArray, i))) in identifyRegs(newBlockVector, pregProps) end (* Exported for use in GetConflictSets *) fun getInstructionRegisters instr = let val {sources, dests, ...} = getInstructionState instr in {sources=sources, dests=dests} end (* Exported for use in ICodeOptimise *) val getInstructionCC = #ccOut o getInstructionState structure Sharing = struct type ('genReg, 'optGenReg, 'fpReg) arm64ICode = ('genReg, 'optGenReg, 'fpReg) arm64ICode and preg = preg and pregOrZero = pregOrZero and intSet = intSet and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and extendedBasicBlock = extendedBasicBlock and controlFlow = controlFlow and regProperty = regProperty and ccRef = ccRef and outCCState = outCCState end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML index ae35a130..1b991cab 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML @@ -1,1073 +1,1077 @@ (* Copyright (c) 2021-2 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *) functor Arm64PreAssembly( structure Arm64Assembly: ARM64ASSEMBLY structure Debug: DEBUG structure Pretty: PRETTY ): ARM64PREASSEMBLY = struct open Arm64Assembly exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd (* Many of the datatypes are inherited from Arm64Assembly *) datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP and unscaledType = NoUpdate | PreIndex | PostIndex and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate and bitfieldKind = BFUnsigned | BFSigned | BFInsert and brRegType = BRRBranch | BRRAndLink | BRRReturn datatype label = Label of int type labelMaker = int ref fun createLabelMaker() = ref 0 fun createLabel(r as ref n) = Label n before r := n+1 datatype precode = (* Basic instructions *) AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind} | DivideRegs of {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize} | LogicalShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool} | LoadRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | LoadFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | StoreRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | StoreFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | LoadRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} | StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} (* LoadAcquire and StoreRelease are used for mutables. *) | LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType} | StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType} (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *) | LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg} | StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg} | MemBarrier | LoadRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | ConditionalSet of {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize} | BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind} | ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection} | BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp} (* Floating point *) | MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize} | MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize} | CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize} | CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize} | FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary} | FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize} | FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary} (* Branches and Labels. *) | SetLabel of label | ConditionalBranch of condition * label | UnconditionalBranch of label | BranchAndLink of label | BranchReg of {regD: xReg, brRegType: brRegType } | LoadLabelAddress of xReg * label | TestBitBranch of { test: xReg, bit: Word8.word, label: label, onZero: bool } | CompareBranch of { test: xReg, label: label, onZero: bool, opSize: opSize } (* Composite instructions *) | MoveXRegToXReg of {sReg: xReg, dReg: xReg} | LoadNonAddr of xReg * Word64.word | LoadAddr of xReg * machineWord | RTSTrap of { rtsEntry: int, work: xReg, save: xReg list } | AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg } | AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg } (* Branch table for indexed case. startLabel is the address of the first label in the list. The branch table is a sequence of unconditional branches. *) | BranchTable of { startLabel: label, brTable: label list } | LoadGlobalHeapBaseInCallback of xReg + | Yield (* Optimise the pre-assembler code and then generate the final code. *) fun generateFinalCode {instrs, name, parameters, resultClosure, profileObject, labelMaker=ref labelCount} = let val labelTargets = Array.tabulate(labelCount, fn i => (Arm64Assembly.createLabel(), i) ) (* Follow the chain of forwarded labels. *) local fun forwardLab(labelNo, labels) = let val dest as (_, dNo) = Array.sub(labelTargets, labelNo) in if dNo = labelNo then dest (* This should not happen but just in case... *) else if List.exists(fn i => i = dNo) labels then raise InternalError "Infinite loop" else forwardLab(dNo, dNo::labels) end in fun getLabel labelNo = forwardLab(labelNo, [labelNo]) val getLabelTarget = #1 o getLabel end fun toAssembler([], code) = code | toAssembler(AddImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => addImmediate | (OpSize32, false) => addImmediate32 | (OpSize64, true) => addSImmediate | (OpSize32, true) => addSImmediate32 in toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted}) end | toAssembler(SubImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subImmediate | (OpSize32, false) => subImmediate32 | (OpSize64, true) => subSImmediate | (OpSize32, true) => subSImmediate32 in toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted}) end | toAssembler(AddShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => addShiftedReg | (OpSize32, false) => addShiftedReg32 | (OpSize64, true) => addSShiftedReg | (OpSize32, true) => addSShiftedReg32 in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift}) end | toAssembler(SubShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subShiftedReg | (OpSize32, false) => subShiftedReg32 | (OpSize64, true) => subSShiftedReg | (OpSize32, true) => subSShiftedReg32 in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift}) end | toAssembler(AddExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) = (* Add/SubExtended are only used to access XSP. *) let val instr = case (opSize, setFlags) of (OpSize64, false) => addExtendedReg | (OpSize32, false) => raise InternalError "AddExtendedReg; 32" | (OpSize64, true) => addSExtendedReg | (OpSize32, true) => raise InternalError "AddExtendedReg; 32" in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend}) end | toAssembler(SubExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subExtendedReg | (OpSize32, false) => raise InternalError "AddExtendedReg; 32" | (OpSize64, true) => subSExtendedReg | (OpSize32, true) => raise InternalError "AddExtendedReg; 32" in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend}) end | toAssembler(MultiplyAndAddSub{regM, regN, regA, regD, multKind} :: rest, code) = let val instr = case multKind of MultAdd32 => multiplyAndAdd32{regM=regM, regN=regN, regA=regA, regD=regD} | MultSub32 => multiplyAndSub32{regM=regM, regN=regN, regA=regA, regD=regD} | MultAdd64 => multiplyAndAdd{regM=regM, regN=regN, regA=regA, regD=regD} | MultSub64 => multiplyAndSub{regM=regM, regN=regN, regA=regA, regD=regD} | SignedMultAddLong => signedMultiplyAndAddLong{regM=regM, regN=regN, regA=regA, regD=regD} | SignedMultHigh => signedMultiplyHigh{regM=regM, regN=regN, regD=regD} in toAssembler(rest, code <::> instr) end | toAssembler(DivideRegs{regM, regN, regD, isSigned, opSize} :: rest, code) = let val instr = case (isSigned, opSize) of (true, OpSize64) => signedDivide | (true, OpSize32) => signedDivide32 | (false, OpSize64) => unsignedDivide | (false, OpSize32) => unsignedDivide32 in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD}) end | toAssembler(LogicalShiftedReg{regM, regN, regD, shift, logOp, opSize, setFlags} :: rest, code) = let val instr = case (logOp, setFlags, opSize) of (LogAnd, false, OpSize64) => andShiftedReg | (LogAnd, true, OpSize64) => andsShiftedReg | (LogOr, false, OpSize64) => orrShiftedReg | (LogXor, false, OpSize64) => eorShiftedReg | (LogAnd, false, OpSize32) => andShiftedReg32 | (LogAnd, true, OpSize32) => andsShiftedReg32 | (LogOr, false, OpSize32) => orrShiftedReg32 | (LogXor, false, OpSize32) => eorShiftedReg32 | _ => raise InternalError "setFlags not valid with OR or XOR" (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD, shift=shift}) end | toAssembler(LoadRegScaled{regT, regN, unitOffset, loadType} :: rest, code) = let val instr = case loadType of Load64 => loadRegScaled | Load32 => loadRegScaled32 | Load16 => loadRegScaled16 | Load8 => loadRegScaledByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreRegScaled{regT, regN, unitOffset, loadType} :: rest, code) = let val instr = case loadType of Load64 => storeRegScaled | Load32 => storeRegScaled32 | Load16 => storeRegScaled16 | Load8 => storeRegScaledByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) = let val instr = case floatSize of Float32 => loadRegScaledFloat | Double64 => loadRegScaledDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) = let val instr = case floatSize of Float32 => storeRegScaledFloat | Double64 => storeRegScaledDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => loadRegUnscaled | (Load32, NoUpdate) => loadRegUnscaled32 | (Load16, NoUpdate) => loadRegUnscaled16 | (Load8, NoUpdate) => loadRegUnscaledByte | (Load64, PreIndex) => loadRegPreIndex | (Load32, PreIndex) => loadRegPreIndex32 | (Load16, PreIndex) => raise InternalError "loadRegPreIndex16" | (Load8, PreIndex) => loadRegPreIndexByte | (Load64, PostIndex) => loadRegPostIndex | (Load32, PostIndex) => loadRegPostIndex32 | (Load16, PostIndex) => raise InternalError "loadRegPostIndex16" | (Load8, PostIndex) => loadRegPostIndexByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(LoadFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Float32, NoUpdate) => loadRegUnscaledFloat | (Double64, NoUpdate) => loadRegUnscaledDouble | _ => raise InternalError "LoadFPRegUnscaled: pre/post indexed" in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(StoreRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => storeRegUnscaled | (Load32, NoUpdate) => storeRegUnscaled32 | (Load16, NoUpdate) => storeRegUnscaled16 | (Load8, NoUpdate) => storeRegUnscaledByte | (Load64, PreIndex) => storeRegPreIndex | (Load32, PreIndex) => storeRegPreIndex32 | (Load16, PreIndex) => raise InternalError "storeRegPreIndex16" | (Load8, PreIndex) => storeRegPreIndexByte | (Load64, PostIndex) => storeRegPostIndex | (Load32, PostIndex) => storeRegPostIndex32 | (Load16, PostIndex) => raise InternalError "storeRegPostIndex16" | (Load8, PostIndex) => storeRegPostIndexByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(StoreFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Float32, NoUpdate) => storeRegUnscaledFloat | (Double64, NoUpdate) => storeRegUnscaledDouble | _ => raise InternalError "StoreFPRegUnscaled: pre/post indexed" in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(LoadRegIndexed{regT, regN, regM, loadType, option} :: rest, code) = let val instr = case loadType of Load64 => loadRegIndexed | Load32 => loadRegIndexed32 | Load16 => loadRegIndexed16 | Load8 => loadRegIndexedByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreRegIndexed{regT, regN, regM, loadType, option} :: rest, code) = let val instr = case loadType of Load64 => storeRegIndexed | Load32 => storeRegIndexed32 | Load16 => storeRegIndexed16 | Load8 => storeRegIndexedByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(LoadFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) = let val instr = case floatSize of Float32 => loadRegIndexedFloat | Double64 => loadRegIndexedDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) = let val instr = case floatSize of Float32 => storeRegIndexedFloat | Double64 => storeRegIndexedDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(LoadAcquireReg{regN, regT, loadType} :: rest, code) = let val loadInstr = case loadType of Load64 => loadAcquire | Load32 => loadAcquire32 | Load8 => loadAcquireByte | _ => raise InternalError "LoadAcquire: Unsupported size" (* Not used *) in toAssembler(rest, code <::> loadInstr{regT=regT, regN=regN}) end | toAssembler(StoreReleaseReg{regN, regT, loadType} :: rest, code) = let val storeInstr = case loadType of Load64 => storeRelease | Load32 => storeRelease32 | Load8 => storeReleaseByte | _ => raise InternalError "StoreRelease: Unsupported size" (* Not used *) in toAssembler(rest, code <::> storeInstr{regT=regT, regN=regN}) end | toAssembler(LoadAcquireExclusiveRegister{regN, regT} :: rest, code) = toAssembler(rest, code <::> loadAcquireExclusiveRegister{regN=regN, regT=regT}) | toAssembler(StoreReleaseExclusiveRegister{regN, regT, regS} :: rest, code) = toAssembler(rest, code <::> storeReleaseExclusiveRegister{regN=regN, regT=regT, regS=regS}) | toAssembler(MemBarrier :: rest, code) = toAssembler(rest, code <::> dmbIsh) | toAssembler(LoadRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) = let val _ = regT1 <> regT2 orelse raise InternalError "LoadRegPair: same register" val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => loadPairOffset | (Load64, PreIndex) => loadPairPreIndexed | (Load64, PostIndex) => loadPairPostIndexed | (Load32, NoUpdate) => loadPairOffset32 | (Load32, PreIndex) => loadPairPreIndexed32 | (Load32, PostIndex) => loadPairPostIndexed32 | _ => raise InternalError "LoadRegPair: unimplemented" in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => storePairOffset | (Load64, PreIndex) => storePairPreIndexed | (Load64, PostIndex) => storePairPostIndexed | (Load32, NoUpdate) => storePairOffset32 | (Load32, PreIndex) => storePairPreIndexed32 | (Load32, PostIndex) => storePairPostIndexed32 | _ => raise InternalError "StoreRegPair: unimplemented" in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) = let val _ = regT1 <> regT2 orelse raise InternalError "LoadRegPair: same register" val instr = case (floatSize, unscaledType) of (Double64, NoUpdate) => loadPairOffsetDouble | (Double64, PreIndex) => loadPairPreIndexedDouble | (Double64, PostIndex) => loadPairPostIndexedDouble | (Float32, NoUpdate) => loadPairOffsetFloat | (Float32, PreIndex) => loadPairPreIndexedFloat | (Float32, PostIndex) => loadPairPostIndexedFloat in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Double64, NoUpdate) => storePairOffsetDouble | (Double64, PreIndex) => storePairPreIndexedDouble | (Double64, PostIndex) => storePairPostIndexedDouble | (Float32, NoUpdate) => storePairOffsetFloat | (Float32, PreIndex) => storePairPreIndexedFloat | (Float32, PostIndex) => storePairPostIndexedFloat in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(ConditionalSet{regD, regTrue, regFalse, cond, condSet, opSize} :: rest, code) = let val instr = case (condSet, opSize) of (CondSet, OpSize64) => conditionalSet | (CondSetIncr, OpSize64) => conditionalSetIncrement | (CondSetInvert, OpSize64) => conditionalSetInverted | (CondSetNegate, OpSize64) => conditionalSetNegated | (CondSet, OpSize32) => conditionalSet32 | (CondSetIncr, OpSize32) => conditionalSetIncrement32 | (CondSetInvert, OpSize32) => conditionalSetInverted32 | (CondSetNegate, OpSize32) => conditionalSetNegated32 in toAssembler(rest, code <::> instr{regD=regD, regTrue=regTrue, regFalse=regFalse, cond=cond}) end | toAssembler(BitField{immr, imms, regN, regD, opSize, bitfieldKind} :: rest, code) = let val bfInstr = case (bitfieldKind, opSize) of (BFSigned, OpSize64) => signedBitfieldMove64 | (BFUnsigned, OpSize64) => unsignedBitfieldMove64 | (BFInsert, OpSize64) => bitfieldMove64 | (BFSigned, OpSize32) => signedBitfieldMove32 | (BFUnsigned, OpSize32) => unsignedBitfieldMove32 | (BFInsert, OpSize32) => bitfieldMove32 in toAssembler(rest, code <::> bfInstr{immr=immr, imms=imms, regN=regN, regD=regD}) end | toAssembler(ShiftRegisterVariable{regM, regN, regD, opSize, shiftDirection} :: rest, code) = let val instr = case (shiftDirection, opSize) of (ShiftLeft, OpSize64) => logicalShiftLeftVariable | (ShiftLeft, OpSize32) => logicalShiftLeftVariable32 | (ShiftRightLogical, OpSize64) => logicalShiftRightVariable | (ShiftRightLogical, OpSize32) => logicalShiftRightVariable32 | (ShiftRightArithmetic, OpSize64) => arithmeticShiftRightVariable | (ShiftRightArithmetic, OpSize32) => arithmeticShiftRightVariable32 in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD}) end | toAssembler(BitwiseLogical{ bits, regN, regD, opSize, setFlags, logOp} :: rest, code) = let val instr = case (logOp, setFlags, opSize) of (LogAnd, false, OpSize64) => bitwiseAndImmediate | (LogAnd, true, OpSize64) => bitwiseAndSImmediate | (LogOr, false, OpSize64) => bitwiseOrImmediate | (LogXor, false, OpSize64) => bitwiseXorImmediate | (LogAnd, false, OpSize32) => bitwiseAndImmediate32 | (LogAnd, true, OpSize32) => bitwiseAndSImmediate32 | (LogOr, false, OpSize32) => bitwiseOrImmediate32 | (LogXor, false, OpSize32) => bitwiseXorImmediate32 | _ => raise InternalError "flags not valid with OR or XOR" in toAssembler(rest, code <::> instr{regN=regN, regD=regD, bits=bits}) end | toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Float32} :: rest, code) = toAssembler(rest, code <::> moveGeneralToFloat{regN=regN, regD=regD}) | toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Double64} :: rest, code) = toAssembler(rest, code <::> moveGeneralToDouble{regN=regN, regD=regD}) | toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Float32} :: rest, code) = toAssembler(rest, code <::> moveFloatToGeneral{regN=regN, regD=regD}) | toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Double64} :: rest, code) = toAssembler(rest, code <::> moveDoubleToGeneral{regN=regN, regD=regD}) | toAssembler(CvtIntToFP{ regN, regD, floatSize, opSize} :: rest, code) = let val instr = case (opSize, floatSize) of (OpSize32, Float32) => convertInt32ToFloat | (OpSize64, Float32) => convertIntToFloat | (OpSize32, Double64) => convertInt32ToDouble | (OpSize64, Double64) => convertIntToDouble in toAssembler(rest, code <::> instr{regN=regN, regD=regD}) end | toAssembler(CvtFloatToInt{ round, regN, regD, floatSize, opSize} :: rest, code) = let val instr = case (floatSize, opSize) of (Float32, OpSize32) => convertFloatToInt32 | (Float32, OpSize64) => convertFloatToInt | (Double64, OpSize32) => convertDoubleToInt32 | (Double64, OpSize64) => convertDoubleToInt in toAssembler(rest, code <::> instr round {regN=regN, regD=regD}) end | toAssembler(FPBinaryOp{ regM, regN, regD, floatSize, fpOp} :: rest, code) = let val instr = case (fpOp, floatSize) of (MultiplyFP, Float32) => multiplyFloat | (DivideFP, Float32) => divideFloat | (AddFP, Float32) => addFloat | (SubtractFP, Float32) => subtractFloat | (MultiplyFP, Double64) => multiplyDouble | (DivideFP, Double64) => divideDouble | (AddFP, Double64) => addDouble | (SubtractFP, Double64) => subtractDouble in toAssembler(rest, code <::> instr {regN=regN, regM=regM, regD=regD}) end | toAssembler(FPComparison{ regM, regN, floatSize} :: rest, code) = toAssembler(rest, code <::> (case floatSize of Float32 => compareFloat | Double64 => compareDouble){regN=regN, regM=regM}) | toAssembler(FPUnaryOp{ regN, regD, fpOp} :: rest, code) = let val instr = case fpOp of NegFloat => negFloat | NegDouble => negDouble | AbsFloat => absFloat | AbsDouble => absDouble | ConvFloatToDble => convertFloatToDouble | ConvDbleToFloat => convertDoubleToFloat in toAssembler(rest, code <::> instr {regN=regN, regD=regD}) end | toAssembler(SetLabel(Label lab) :: rest, code) = toAssembler(rest, code <::> setLabel(getLabelTarget lab)) | toAssembler(ConditionalBranch(cond, Label lab) :: rest, code) = toAssembler(rest, code <::> conditionalBranch(cond, getLabelTarget lab)) | toAssembler(UnconditionalBranch(Label lab) :: rest, code) = toAssembler(rest, code <::> unconditionalBranch(getLabelTarget lab)) | toAssembler(BranchAndLink(Label lab) :: rest, code) = toAssembler(rest, code <::> branchAndLink(getLabelTarget lab)) | toAssembler(BranchReg{regD, brRegType=BRRBranch} :: rest, code) = toAssembler(rest, code <::> branchRegister regD) | toAssembler(BranchReg{regD, brRegType=BRRAndLink} :: rest, code) = toAssembler(rest, code <::> branchAndLinkReg regD) | toAssembler(BranchReg{regD, brRegType=BRRReturn} :: rest, code) = toAssembler(rest, code <::> returnRegister regD) | toAssembler(LoadLabelAddress(reg, Label lab) :: rest, code) = toAssembler(rest, code <::> loadLabelAddress(reg, getLabelTarget lab)) | toAssembler(TestBitBranch{ test, bit, label=Label lab, onZero } :: rest, code) = toAssembler(rest, code <::> (if onZero then testBitBranchZero else testBitBranchNonZero)(test, bit, getLabelTarget lab)) | toAssembler(CompareBranch{ test, label=Label lab, onZero, opSize } :: rest, code) = let val instr = case (onZero, opSize) of (true, OpSize64) => compareBranchZero | (false, OpSize64) => compareBranchNonZero | (true, OpSize32) => compareBranchZero32 | (false, OpSize32) => compareBranchNonZero32 in toAssembler(rest, code <::> instr(test, getLabelTarget lab)) end (* Register-register moves - special case for XSP. *) | toAssembler(MoveXRegToXReg{sReg=XSP, dReg} :: rest, code) = toAssembler(rest, code <::> addImmediate{regN=XSP, regD=dReg, immed=0w0, shifted=false}) | toAssembler(MoveXRegToXReg{sReg, dReg=XSP} :: rest, code) = toAssembler(rest, code <::> addImmediate{regN=sReg, regD=XSP, immed=0w0, shifted=false}) | toAssembler(MoveXRegToXReg{sReg, dReg} :: rest, code) = toAssembler(rest, code <::> orrShiftedReg{regN=XZero, regM=sReg, regD=dReg, shift=ShiftNone}) | toAssembler(LoadNonAddr(xReg, value) :: rest, code) = let (* Load a non-address constant. Tries to use movz/movn/movk if that can be done easily, othewise uses loadNonAddressConstant to load the value from the non-address constant area. *) fun extW (v, h) = Word.andb(Word.fromLarge(LargeWord.>>(Word64.toLarge v, h*0w16)), 0wxffff) val hw0 = extW(value, 0w3) and hw1 = extW(value, 0w2) and hw2 = extW(value, 0w1) and hw3 = extW(value, 0w0) val nextCode = if value < 0wx100000000 then let (* 32-bit constants can be loaded using at most a movz and movk but various cases can be reduced since all 32-bit operations set the top word to zero. *) val hi = hw2 and lo = hw3 in (* 32-bit constants can be loaded with at most a movz and a movk but it may be that there is something shorter. *) if hi = 0w0 then code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0} else if hi = 0wxffff then code <::> moveNot32{regD=xReg, immediate=Word.xorb(0wxffff, lo), shift=0w0} else if lo = 0w0 then code <::> moveZero32{regD=xReg, immediate=hi, shift=0w16} else if isEncodableBitPattern(value, WordSize32) then code <::> bitwiseOrImmediate32{bits=value, regN=XZero, regD=xReg} else (* Have to use two instructions *) code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0} <::> moveKeep{regD=xReg, immediate=hi, shift=0w16} end else if hw0 = 0wxffff andalso hw1 = 0wxffff andalso hw2 = 0wxffff then code <::> moveNot{regD=xReg, immediate=Word.xorb(0wxffff, hw3), shift=0w0} else if hw1 = 0w0 andalso hw2 = 0w0 then (* This is common for length words with a flags byte *) code <::> moveZero32{regD=xReg, immediate=hw3, shift=0w0} <::> moveKeep{regD=xReg, immediate=hw0, shift=0w48} else code <::> loadNonAddressConstant(xReg, value) in toAssembler(rest, nextCode) end | toAssembler(LoadAddr(dReg, source) :: rest, code) = toAssembler(rest, loadAddressConstant(dReg, source) :: code) | toAssembler(RTSTrap{ rtsEntry, work, save } :: rest, code) = let (* Because X30 is used in the branchAndLink it has to be pushed across any trap. *) val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save in toAssembler(rest, code <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=rtsEntry} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) ) end | toAssembler(AllocateMemoryFixedSize{ bytes, dest, save, work } :: rest, code) = let val label = Arm64Assembly.createLabel() val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save val allocCode = code <@> (* Subtract the number of bytes required from the heap pointer. *) (if bytes >= 0w4096 then [subShiftedReg{regM=work, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftNone}, loadNonAddressConstant(work, Word.toLarge bytes)] else [subImmediate{regN=X_MLHeapAllocPtr, regD=dest, immed=bytes, shifted=false}]) <::> (* Compare the result with the heap limit. *) subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::> conditionalBranch(CondCarrySet, label) <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::> setLabel label <::> (* Update the heap pointer. *) orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone} in toAssembler(rest, allocCode) end | toAssembler(AllocateMemoryVariableSize{ sizeReg, dest, save, work } :: rest, code) = let val trapLabel = Arm64Assembly.createLabel() and noTrapLabel = Arm64Assembly.createLabel() val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save val allocCode = ( (* Subtract the size into the result register. Subtract a further word for the length word and round down in 32-in-64. *) if is32in64 then code <::> subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w2} <::> subImmediate{regN=dest, regD=dest, immed=0w4, shifted=false} <::> bitwiseAndImmediate{bits= ~ 0w8, regN=dest, regD=dest} else code <::> subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w3} <::> subImmediate{regN=dest, regD=dest, immed=0w8, shifted=false} ) <::> (* Check against the limit. If the size is large enough it is possible that this could wrap round. To check for that we trap if either the result is less than the limit or if it is now greater than the allocation pointer. *) subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::> conditionalBranch(CondCarryClear, trapLabel) <::> subSShiftedReg{regM=X_MLHeapAllocPtr, regN=dest, regD=XZero, shift=ShiftNone} <::> conditionalBranch(CondCarryClear, noTrapLabel) <::> setLabel trapLabel <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::> setLabel noTrapLabel <::> (* Update the heap pointer. *) orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone} in toAssembler(rest, allocCode) end | toAssembler(BranchTable{ startLabel=Label lab, brTable } :: rest, code) = toAssembler(rest, List.foldl (fn (Label lab, code) => (unconditionalBranch(getLabelTarget lab)) :: code) (code <::> setLabel(getLabelTarget lab)) brTable) | toAssembler(LoadGlobalHeapBaseInCallback dest :: rest, code) = toAssembler(rest, code <@> List.rev(loadGlobalHeapBaseInCallback dest)) + | toAssembler(Yield :: rest, code) = + toAssembler(rest, code <::> yield) + (* Optimisation passes. *) fun isValidForPair(offset1, offset2) = let val v = Int.min(offset1, offset2) in v >= ~64 andalso v < 64 end fun forward([], list, rep) = reverse(list, [], rep) | forward(SetLabel(Label srcLab) :: (ubr as UnconditionalBranch(Label destLab)) :: tl, list, _) = if srcLab = destLab (* We should never get this because there should always be a stack-check to allow a loop to be broken. If that ever changes we need to retain the label. *) then raise InternalError "Infinite loop detected" else (* Mark this to forward to its destination. *) ( Array.update(labelTargets, srcLab, getLabel destLab); forward(ubr :: tl, list, true) ) | forward(SetLabel(Label jmpLab1) :: (tl as SetLabel(Label jmpLab2) :: _), list, _) = (* Eliminate adjacent labels. They complicate the other tests although they don't incur any run-time cost. *) ( (* Any reference to the first label is forwarded to the second. *) Array.update(labelTargets, jmpLab1, getLabel jmpLab2); forward(tl, list, true) ) | forward((ubr as UnconditionalBranch(Label ubrLab)) :: (tl as SetLabel(Label jumpLab) :: _), list, rep) = (* Eliminate unconditional jumps to the next instruction. *) if ubrLab = jumpLab then forward(tl, list, true) else forward(tl, ubr :: list, rep) | forward((cbr as ConditionalBranch(test, Label cbrLab)) :: (ubr as UnconditionalBranch(Label ubrLab)) :: (tl as SetLabel(Label jumpLab) :: _), list, rep) = if cbrLab = jumpLab then (* We have a conditional branch followed by an unconditional branch followed by the destination of the conditional branch. Eliminate the unconditional branch by reversing the test. This can often happen if one branch of an if-then-else has been reduced to zero because the same register has been chosen for the input and output. *) forward(tl (* Leave the label just in case it's used elsewhere*), ConditionalBranch(invertTest test, Label ubrLab) :: list, true) else forward(ubr :: tl, cbr :: list, rep) | forward((load as LoadRegScaled{regT=regT1, regN=regN1, unitOffset=offset1, loadType=lt1}) :: (tl1 as LoadRegScaled{regT=regT2, regN=regN2, unitOffset=offset2, loadType=lt2} ::tl2), list, rep) = (* Two adjacent loads - can this be converted to load-pair? N.B. We have to be careful about the sequence ldr x0,[x0]; ldr x1,[x0+8] which isn't the same at all. *) if regN1 = regN2 andalso regN1 <> regT1 andalso lt1 = lt2 andalso (offset2 = offset1 + 1 orelse offset2 = offset1 - 1) andalso (case lt1 of Load64 => true | Load32 => true | _ => false) andalso isValidForPair(offset1, offset2) then let val (reg1, reg2, offset) = if offset1 < offset2 then (regT1, regT2, offset1) else (regT2, regT1, offset2) in forward(tl2, LoadRegPair{ regT1=reg1, regT2=reg2, regN=regN1, unitOffset=offset, loadType=lt1, unscaledType=NoUpdate} :: list, true) end else forward(tl1, load :: list, rep) | forward((store as StoreRegScaled{regT=regT1, regN=regN1, unitOffset=offset1, loadType=lt1}) :: (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=offset2, loadType=lt2} ::tl2), list, rep) = (* Two adjacent stores - can this be converted to store-pair? *) if regN1 = regN2 andalso lt1 = lt2 andalso (offset2 = offset1 + 1 orelse offset2 = offset1 - 1) andalso (case lt1 of Load64 => true | Load32 => true | _ => false) andalso isValidForPair(offset1, offset2) then let val (reg1, reg2, offset) = if offset1 < offset2 then (regT1, regT2, offset1) else (regT2, regT1, offset2) in forward(tl2, StoreRegPair{ regT1=reg1, regT2=reg2, regN=regN1, unitOffset=offset, loadType=lt1, unscaledType=NoUpdate} :: list, true) end else forward(tl1, store :: list, rep) | forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~8, loadType=Load64, unscaledType=NoUpdate}) :: (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=0, loadType=Load64} ::tl2), list, rep) = (* Common case - store length word and then the first word of the cell. *) if regN1 = regN2 then forward(tl2, StoreRegPair{ regT1=regT1, regT2=regT2, regN=regN1, unitOffset= ~1, loadType=Load64, unscaledType=NoUpdate} :: list, true) else forward(tl1, store :: list, rep) | forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~4, loadType=Load32, unscaledType=NoUpdate}) :: (tl1 as StoreRegScaled{regT=regT2, regN=regN2, unitOffset=0, loadType=Load32} ::tl2), list, rep) = (* Common case - store length word and then the first word of the cell. *) if regN1 = regN2 then forward(tl2, StoreRegPair{ regT1=regT1, regT2=regT2, regN=regN1, unitOffset= ~1, loadType=Load32, unscaledType=NoUpdate} :: list, true) else forward(tl1, store :: list, rep) | forward((store as StoreRegUnscaled{regT=regT1, regN=regN1, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex}) :: (tl1 as StoreRegUnscaled{regT=regT2, regN=regN2, byteOffset= ~8, loadType=Load64, unscaledType=PreIndex} :: tl2), list, rep) = (* Adjacent pushes T2 is in the lower address so the order is T2, T1. The stack is always 64-bit aligned so this works on both native addressing and 32-in-64. *) if regN1 = regN2 then forward(tl2, StoreRegPair{ regT1=regT2, regT2=regT1, regN=regN1, unitOffset= ~2, loadType=Load64, unscaledType=PreIndex} :: list, true) else forward(tl1, store :: list, rep) | forward((add1 as AddImmediate{regN=regN1, regD=regD1, immed=immed1, shifted=false, opSize=OpSize64, setFlags=false}) :: (tl1 as AddImmediate{regN=regN2, regD=regD2, immed=immed2, shifted=false, opSize=OpSize64, setFlags=false} ::tl2), list, rep) = (* Adjacent stack resets. This can apply more generally but only if the result registers are the same. If they're not we may need the intermediate result. We put the result back into the input stream in case it can be combined with another stack reset. *) if regN2 = regD2 andalso regD1 = regD2 andalso immed2+immed1 < 0w4096 then forward(AddImmediate{regN=regN1, regD=regD2, immed=immed2+immed1, shifted=false, opSize=OpSize64, setFlags=false} :: tl2, list, true) else forward(tl1, add1 :: list, rep) | forward(BitwiseLogical{bits=0w1, regN, regD=XZero, logOp=LogAnd, opSize=_, setFlags=true} :: ConditionalBranch(CondEqual, label) :: tl2, list, _) = (* Test the tag bit: bit 0. This is very common to test for nil/not nil. We could include other values but they're far less likely. *) forward(TestBitBranch{test=regN, bit=0w0, label=label, onZero=true} :: tl2, list, true) | forward(BitwiseLogical{bits=0w1, regN, regD=XZero, logOp=LogAnd, opSize=_, setFlags=true} :: ConditionalBranch(CondNotEqual, label) :: tl2, list, _) = forward(TestBitBranch{test=regN, bit=0w0, label=label, onZero=false} :: tl2, list, true) | forward(hd :: tl, list, rep) = forward(tl, hd :: list, rep) and reverse([], list, rep) = (list, rep) | reverse((add as AddImmediate{regN=regN2, regD=regD2, immed, shifted=false, opSize=OpSize64, setFlags=false}) :: (tl1 as LoadRegScaled{regT=regT1, regN=regN1, unitOffset=0, loadType=Load64} ::tl2), list, rep) = (* A stack reset occurring after a load. This is usually the ML SP but can also occur with C memory ops. It might be possible to consider other cases. *) if regN1 = regD2 andalso regN2 = regD2 andalso regT1 <> regN1 andalso immed < 0w256 then reverse(tl2, LoadRegUnscaled{regT=regT1, regN=regN1, byteOffset=Word.toInt immed, loadType=Load64, unscaledType=PostIndex} :: list, true) else reverse(tl1, add :: list, rep) | reverse((add as AddImmediate{regN=regN2, regD=regD2, immed, shifted=false, opSize=OpSize64, setFlags=false}) :: (tl1 as LoadRegPair{regT1=regT1, regT2=regT2, regN=regN1, unitOffset=0, loadType=Load64, unscaledType=NoUpdate} ::tl2), list, rep) = (* A stack reset occurring after a load pair *) if regN1 = regD2 andalso regN2 = regD2 andalso regT1 <> regN1 andalso regT2 <> regN1 andalso immed < 0w64 * 0w8 then reverse(tl2, LoadRegPair{regT1=regT1, regT2=regT2, regN=regN1, unitOffset=Word.toInt(immed div 0w8), loadType=Load64, unscaledType=PostIndex} :: list, true) else reverse(tl1, add :: list, rep) | reverse(hd :: tl, list, rep) = reverse(tl, hd :: list, rep) (* Repeat scans through the code until there are no further changes. *) fun repeat ops = case forward(ops, [], false) of (list, false) => list | (list, true) => repeat list val optimised = repeat instrs in generateCode{instrs=List.rev(toAssembler(optimised, [])), name=name, parameters=parameters, resultClosure=resultClosure, profileObject=profileObject} end (* Constant shifts are encoded in the immr and imms fields of the bit-field instruction. *) fun shiftConstant{ direction, regD, regN, shift, opSize } = let val (bitfieldKind, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (BFUnsigned, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (BFUnsigned, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (BFUnsigned, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (BFUnsigned, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (BFSigned, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (BFSigned, shift, 0wx1f) in BitField{ regN=regN, regD=regD, opSize=opSize, immr=immr, imms=imms, bitfieldKind=bitfieldKind } end (* These sequences are used both in the ML code-generator and in the FFI code so it is convenient to have them here and share the code. *) local fun allocateWords(fixedReg, workReg, words, bytes, regMask, code) = let val (lengthWord, setLength, flagShift) = if is32in64 then (~4, Load32, 0w24) else (~8, Load64, 0w56) in code <::> AllocateMemoryFixedSize{ bytes=bytes, dest=fixedReg, save=regMask, work=X16 } <::> LoadNonAddr(workReg, Word64.orb(words, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), flagShift))) <::> (* Store the length word. Have to use the unaligned version because offset is -ve. *) StoreRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= lengthWord, loadType=setLength, unscaledType=NoUpdate} end fun absoluteAddressToIndex(reg, code) = if is32in64 then code <::> SubShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone, opSize=OpSize64, setFlags=false} <::> shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=0w2, opSize=OpSize64} else code in fun boxDouble({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::> StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Double64}) and boxSysWord({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::> StoreRegScaled{regT=source, regN=destination, unitOffset=0, loadType=Load64}) and boxFloat({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, 0w1, 0w8, saveRegs, code) <::> StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Float32}) end structure Sharing = struct type closureRef = closureRef type loadType = loadType type opSize = opSize type logicalOp = logicalOp type floatSize = floatSize type shiftDirection = shiftDirection type multKind = multKind type fpUnary = fpUnary type fpBinary = fpBinary type unscaledType = unscaledType type condSet = condSet type bitfieldKind = bitfieldKind type brRegType = brRegType type precode = precode type xReg = xReg type vReg = vReg type label = label type labelMaker = labelMaker type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale type instr = instr end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML index 3e35b063..b00be2fd 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML @@ -1,1163 +1,1165 @@ (* Copyright David C. J. Matthews 2021-2 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64PushRegisters( structure Arm64ICode: ARM64ICODE structure IntSet: INTSET structure Identify: ARM64IDENTIFYREFERENCES sharing Arm64ICode.Sharing = Identify.Sharing = IntSet ) : ARM64PUSHREGISTERS = struct open Arm64ICode open IntSet open Identify type basicBlockAbstract = (preg, pregOrZero, preg) basicBlock (* Curried subscript functions *) fun asub a i = Array.sub(a, i) and vsub v i = Vector.sub(v, i) exception InternalError = Misc.InternalError (* Each preg in the input is mapped to either a new preg or the stack. *) datatype pregMapType = Unset | ToPReg of preg | ToStack of int * stackLocn (* The stack contains both entries in the input code and entries added here. It is really used to ensure that the stack at run time is the same size at the start of a block whichever block has jumped to it. *) datatype stackEntry = NewEntry of {pregNo: int} (* pregNo is the original preg that has been pushed here. *) | OriginalEntry of { stackLoc: stackLocn } | HandlerEntry fun addRegisterPushes{code: extendedBasicBlock vector, pushVec: bool vector, pregProps, firstPass=_} = let val maxPRegs = Vector.length pregProps val numberOfBlocks = Vector.length code (* Output registers and properties. *) val pregCounter = ref 0 val pregPropList = ref [] val pregMap = Array.array(maxPRegs, Unset) val maxStack = ref 0 (* The stack size we've assumed for the block. Also indicates if a block has already been processed. *) val inputStackSizes = Array.array(numberOfBlocks, NONE) (* The result of processing a block. *) val blockOutput = Array.array(numberOfBlocks, {code=[], stackCount=0}) (* Extra blocks to adjust the stack are added here. *) val extraBlocks: basicBlockAbstract list ref = ref [] val blockCounter = ref numberOfBlocks (* Get the blocks that are inputs for each one. *) local val blockRefs = Array.array(numberOfBlocks, []) fun setReferences fromBlock = let val ExtendedBasicBlock{ flow, ...} = vsub code fromBlock val refs = successorBlocks flow fun setRefs toBlock = let val oldRefs = asub blockRefs toBlock in Array.update(blockRefs, toBlock, fromBlock :: oldRefs); if null oldRefs then setReferences toBlock else () end in List.app setRefs refs end val () = setReferences 0 in val blockRefs = blockRefs end (* Recursive scan of the blocks. For each block we produce an input and output state. The input state is the output state of the predecessor i.e. some block that jumps to this, but with any entries removed that are not used in this block. It is then necessary to match the input state, if necessary by adding extra blocks that just do the matching. *) local val haveProcessed = isSome o asub inputStackSizes fun processBlocks toDo = case List.filter (fn (n, _) => not(haveProcessed n)) toDo of [] => () (* Nothing left to do *) | stillToDo as head :: _ => let (* Try to find a block all of whose predecessors have been processed. That increases the chances that we will have cached items. TODO: This is no longer necessary since we don't do any caching here now so could may be simplified. *) fun available(dest, _) = List.all haveProcessed (Array.sub(blockRefs, dest)) val (blockNo, lastOutputState) = case List.find available stillToDo of SOME c => c | NONE => head (* This is the first time we've come to this block. *) val ExtendedBasicBlock{ block, flow, imports, passThrough, loopRegs, initialStacks, ...} = vsub code blockNo (* Remove any items from the input state that are no longer needed for this block. They could be local to the previous block or needed by a different successor. Although the values in loopRegs are not required the stack space is so that they can be updated. *) fun removeItems(result as {stack=[], stackCount=0}) = result | removeItems{stack=[], ...} = raise InternalError "removeItems - stack size" | removeItems (thisStack as {stack=NewEntry{pregNo} :: rest, stackCount}) = if member(pregNo, imports) orelse member(pregNo, passThrough) orelse member(pregNo, loopRegs) then thisStack else removeItems{stack=rest, stackCount=stackCount-1} | removeItems (thisStack as {stack=OriginalEntry{stackLoc=StackLoc{rno, size}, ...} :: rest, stackCount}) = if member(rno, initialStacks) then thisStack else removeItems{stack=rest, stackCount=stackCount-size} | removeItems result = result val {stackCount=newSp, stack=newStack} = removeItems lastOutputState (* References to hold the current stack count (number of words on the stack) and the list of items on the stack. The list is not used directly to map stack addresses. Instead it is used to match the stack at the beginning and end of a block. *) val stackCount = ref newSp val stack = ref newStack (* Items from the stack that have been marked as deleted but not yet removed. We only remove items from the top of the stack to avoid quadratic behaviour with a very deep stack. *) val deletedItems = ref [] (* Save the stack size in case we come by a different route. *) val () = Array.update(inputStackSizes, blockNo, SOME newSp) fun pushItemToStack item = let val size = case item of NewEntry _ => 1 | OriginalEntry{stackLoc=StackLoc{size, ...}, ...} => size | HandlerEntry => 2 in stackCount := ! stackCount+size; stack := item :: ! stack; maxStack := Int.max(!maxStack, !stackCount) end fun newPReg propKind = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := propKind :: !pregPropList in PReg regNo end and newStackLoc size = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropStack size :: !pregPropList in StackLoc{size=size, rno=regNo} end (* Map a source register. This always loads the argument. *) fun mapSrcReg(PReg n) = case Array.sub(pregMap, n) of Unset => raise InternalError "mapSrcReg - unset" | ToPReg preg => (preg, []) | ToStack(stackLoc, container as StackLoc{size, ...}) => let (* Make a new untagged register. That will prevent us pushing it if we have to spill registers. *) val newReg = newPReg RegPropUntagged in (newReg, [LoadStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0, dest=newReg}]) end fun mapDestReg(PReg n) = let val currentLocation = Array.sub(pregMap, n) val kind = Vector.sub(pregProps, n) in if Vector.sub(pushVec, n) then let (* This should not have been seen before. *) val _ = case currentLocation of Unset => () | _ => raise InternalError "mapDestReg - already set" val newReg = newPReg kind val newContainer = newStackLoc 1 val () = Array.update(pregMap, n, ToStack (!stackCount, newContainer)) val () = pushItemToStack(NewEntry{pregNo=n}) in (newReg, [PushToStack{source= newReg, container=newContainer, copies=1}]) end else let (* See if we already have a number for it. We may encounter the same preg as a destination when returning the result from a conditional in which case we have to use the same number. We shouldn't have pushed it. *) val newReg = case (currentLocation, kind) of (Unset, _) => let val newReg = newPReg kind val () = Array.update(pregMap, n, ToPReg newReg) in newReg end | (ToPReg preg, RegPropMultiple) => preg | _ => raise InternalError "mapDestReg - multiply defined non-merge reg" in (newReg, []) end end (* Optional destination for arithmetic and logical ops. *) fun mapOptDest ZeroReg = (ZeroReg, []) | mapOptDest (SomeReg destReg) = let val (destVal, destCode) = mapDestReg destReg in (SomeReg destVal, destCode) end fun mapOptSrc ZeroReg = (ZeroReg, []) | mapOptSrc (SomeReg srcReg) = let val (srcVal, srcCode) = mapSrcReg srcReg in (SomeReg srcVal, srcCode) end (* Adjust a stack offset from the old state to the new state. *) fun mapContainerAndStack(StackLoc{rno, size}, field) = let val (newStackAddr, newContainer) = case Array.sub(pregMap, rno) of Unset => raise InternalError "mapContainer - unset" | ToPReg _ => raise InternalError "mapContainer - ToPReg" | ToStack stackContainer => stackContainer val newOffset = !stackCount-(newStackAddr+size) + field in (newOffset, newContainer) end (* Add an entry for an existing stack entry. *) fun mapDestContainer(StackLoc{rno, size}, locn) = ( case Array.sub(pregMap, rno) of Unset => let val newContainer = newStackLoc size val () = Array.update(pregMap, rno, ToStack(locn, newContainer)) in newContainer end | _ => raise InternalError "mapDestContainer: already set" ) (* Map a function argument which could be a register or a stack entry. A register entry could have been pushed. *) fun mapArgument(ArgInReg (PReg r)) = ( case Array.sub(pregMap, r) of Unset => raise InternalError "mapSource - unset" | ToPReg preg => ArgInReg preg | ToStack(stackLoc, container as StackLoc{size, ...}) => ArgOnStack{wordOffset= !stackCount-stackLoc-size, container=container, field=0} ) | mapArgument(ArgOnStack{container, field, ...}) = let val (newOffset, newContainer) = mapContainerAndStack(container, field) in ArgOnStack{container=newContainer, wordOffset=newOffset, field=field} end (* Rewrite the code, replacing any registers that need to be pushed with references to the stack. The result is built up in reverse order and then reversed. *) fun pushRegisters({instr=MoveRegister{ source, dest as PReg dReg }, ...}, code) = if Vector.sub(pushVec, dReg) then (* We're going to push this. *) let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest (* TODO: Since we're pushing it we don't need to move it first. *) in destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code end else let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ MoveRegister { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=LoadNonAddressConstant { dest, source}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadNonAddressConstant { dest=destVal, source=source} :: code end | pushRegisters({instr=LoadAddressConstant { dest, source}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ LoadAddressConstant { dest=destVal, source=source} :: code end | pushRegisters({instr=LoadWithConstantOffset { base, dest, byteOffset, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, loadType=loadType} :: baseCode @ code end | pushRegisters({instr=LoadFPWithConstantOffset { base, dest, byteOffset, floatSize}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPWithConstantOffset { base=baseVal, dest=destVal, byteOffset=byteOffset, floatSize=floatSize} :: baseCode @ code end | pushRegisters({instr=LoadWithIndexedOffset { base, dest, index, loadType, signExtendIndex}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index val (destVal, destCode) = mapDestReg dest in destCode @ LoadWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal, loadType=loadType, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ code end | pushRegisters({instr=LoadFPWithIndexedOffset { base, dest, index, floatSize, signExtendIndex}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index val (destVal, destCode) = mapDestReg dest in destCode @ LoadFPWithIndexedOffset { base=baseVal, dest=destVal, index=indexVal, floatSize=floatSize, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ code end | pushRegisters({instr=GetThreadId { dest}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ GetThreadId { dest=destVal} :: code end | pushRegisters({instr=ObjectIndexAddressToAbsolute { source, dest}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ObjectIndexAddressToAbsolute { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=AbsoluteToObjectIndex { source, dest}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ AbsoluteToObjectIndex { source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=AllocateMemoryFixed { bytesRequired, dest, ...}, ...}, code) = let val (destVal, destCode) = mapDestReg dest in destCode @ AllocateMemoryFixed { dest=destVal, bytesRequired=bytesRequired, saveRegs=[]} :: code end | pushRegisters({instr=AllocateMemoryVariable{size, dest, ...}, ...}, code) = let val (sizeVal, sizeCode) = mapSrcReg size val (destVal, destCode) = mapDestReg dest in destCode @ AllocateMemoryVariable{size=sizeVal, dest=destVal, saveRegs=[]} :: sizeCode @ code end | pushRegisters({instr=InitialiseMem{size, addr, init}, ...}, code) = let val (sizeVal, sizeCode) = mapSrcReg size val (addrVal, addrCode) = mapSrcReg addr val (initVal, initCode) = mapSrcReg init in InitialiseMem{size=sizeVal, addr=addrVal, init=initVal} :: initCode @ addrCode @ sizeCode @ code end | pushRegisters({instr=BeginLoop, ...}, code) = BeginLoop :: code | pushRegisters({instr=JumpLoop{regArgs, stackArgs, checkInterrupt}, ...}, code) = let (* Normally JumpLoop will be the last item in a block but it is possible that we've added a reset-stack after it. *) fun getValues [] = ([], []) | getValues ({src=source, dst=PReg n} :: rest) = let val (otherRegArgs, otherStackArgs) = getValues rest in case Array.sub(pregMap, n) of ToPReg lReg => ({src=mapArgument source, dst=lReg} :: otherRegArgs, otherStackArgs) | ToStack(stackloc, stackC as StackLoc{size, ...}) => let val sourceVal = mapArgument source val stackOff = !stackCount - stackloc - size in (otherRegArgs, {src=sourceVal, wordOffset=stackOff, stackloc=stackC} :: otherStackArgs) end | Unset => (* Drop it. It's never used. This can happen if we are folding a function over a list such that it always returns the last value and then discard the result of the fold. *) (otherRegArgs, otherStackArgs) end val (newRegArguments, newStackArgs) = getValues regArgs fun loadStackArg({src=source, stackloc=destC, ...}, otherArgs) = let val sourceVal = mapArgument source val (newOffset, newContainer) = mapContainerAndStack(destC, 0) in {src=sourceVal, wordOffset=newOffset, stackloc=newContainer} :: otherArgs end val oldStackArgs = List.foldr loadStackArg [] stackArgs val check = case checkInterrupt of NONE => NONE | SOME _ => SOME [] in JumpLoop{ regArgs=newRegArguments, stackArgs=oldStackArgs @ newStackArgs, checkInterrupt=check} :: code end | pushRegisters({instr=StoreWithConstantOffset { base, source, byteOffset, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, loadType=loadType} :: baseCode @ sourceCode @ code end | pushRegisters({instr=StoreFPWithConstantOffset { base, source, byteOffset, floatSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreFPWithConstantOffset{ base=baseVal, source=sourceVal, byteOffset=byteOffset, floatSize=floatSize} :: baseCode @ sourceCode @ code end | pushRegisters({instr=StoreWithIndexedOffset { base, source, index, loadType, signExtendIndex}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index in StoreWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal, loadType=loadType, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ sourceCode @ code end | pushRegisters({instr=StoreFPWithIndexedOffset { base, source, index, floatSize, signExtendIndex}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base val (indexVal, indexCode) = mapSrcReg index in StoreFPWithIndexedOffset{ base=baseVal, source=sourceVal, index=indexVal, floatSize=floatSize, signExtendIndex=signExtendIndex} :: indexCode @ baseCode @ sourceCode @ code end | pushRegisters({instr=AddSubImmediate{ source, dest, ccRef, immed, isAdd, length }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ AddSubImmediate { source=sourceVal, dest=destVal, ccRef=ccRef, immed=immed, isAdd=isAdd, length=length} :: sourceCode @ code end | pushRegisters({instr=AddSubRegister{ base, shifted, dest, ccRef, isAdd, length, shift}, ...}, code) = let val (op1Val, op1Code) = mapSrcReg base val (op2Val, op2Code) = mapSrcReg shifted val (destVal, destCode) = mapOptDest dest in destCode @ AddSubRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef, isAdd=isAdd, length=length, shift=shift} :: op2Code @ op1Code @ code end | pushRegisters({instr=LogicalImmediate{ source, dest, ccRef, immed, logOp, length }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ LogicalImmediate { source=sourceVal, dest=destVal, ccRef=ccRef, immed=immed, logOp=logOp, length=length} :: sourceCode @ code end | pushRegisters({instr=LogicalRegister{ base, shifted, dest, ccRef, logOp, length, shift}, ...}, code) = let val (op1Val, op1Code) = mapSrcReg base val (op2Val, op2Code) = mapSrcReg shifted val (destVal, destCode) = mapOptDest dest in destCode @ LogicalRegister { base=op1Val, shifted=op2Val, dest=destVal, ccRef=ccRef, logOp=logOp, length=length, shift=shift} :: op2Code @ op1Code @ code end | pushRegisters({instr=ShiftRegister{ direction, dest, source, shift, opSize}, ...}, code) = let val (srcVal, op1Code) = mapSrcReg source val (shiftVal, op2Code) = mapSrcReg shift val (destVal, destCode) = mapDestReg dest in destCode @ ShiftRegister { source=srcVal, shift=shiftVal, dest=destVal, direction=direction, opSize=opSize} :: op2Code @ op1Code @ code end | pushRegisters({instr=Multiplication{ kind, dest, sourceA, sourceM, sourceN }, ...}, code) = let val (srcAVal, srcACode) = mapOptSrc sourceA val (srcMVal, srcMCode) = mapSrcReg sourceM val (srcNVal, srcNCode) = mapSrcReg sourceN val (destVal, destCode) = mapDestReg dest in destCode @ Multiplication { kind=kind, sourceA=srcAVal, sourceM=srcMVal, sourceN=srcNVal, dest=destVal} :: srcNCode @ srcMCode @ srcACode @ code end | pushRegisters({instr=Division{ isSigned, dest, dividend, divisor, opSize }, ...}, code) = let val (dividendVal, dividendCode) = mapSrcReg dividend val (divisorVal, divisorCode) = mapSrcReg divisor val (destVal, destCode) = mapDestReg dest in destCode @ Division { isSigned=isSigned, dividend=dividendVal, divisor=divisorVal, dest=destVal, opSize=opSize} :: divisorCode @ dividendCode @ code end | pushRegisters({instr=BeginFunction {regArgs, stackArgs}, ...}, code) = let (* Create a new container list. The offsets begin at -numArgs. *) fun newContainers(src :: srcs, offset) = let val newContainer = mapDestContainer(src, offset) in newContainer :: newContainers(srcs, offset+1) end | newContainers _ = [] val newStackArgs = newContainers(stackArgs, ~ (List.length stackArgs)) (* Push any registers that need to be pushed. *) fun pushReg((preg, rreg), (others, code)) = let val (newReg, newCode) = mapDestReg(preg) in ((newReg, rreg) :: others, newCode @ code) end val (newRegArgs, pushCode) = List.foldl pushReg ([], []) regArgs in pushCode @ BeginFunction {regArgs=newRegArgs, stackArgs=newStackArgs} :: code end | pushRegisters({instr=FunctionCall{callKind, regArgs, stackArgs, dests, containers, ...}, ...}, code) = let (* It's possible that this could lead to having to spill registers in order to load others. Leave that problem for the moment. *) fun loadStackArg (arg, otherArgs) = let val argVal = mapArgument arg in argVal :: otherArgs end val newStackArgs = List.foldr loadStackArg [] stackArgs fun loadRegArg ((arg, reg), otherArgs) = let val argVal = mapArgument arg in (argVal, reg) :: otherArgs end val newRegArgs = List.foldr loadRegArg [] regArgs (* Push any result registers that need to be pushed. *) fun pushResults((preg, rreg), (others, code)) = let val (newReg, newCode) = mapDestReg preg in ((newReg, rreg) :: others, newCode @ code) end val (destVals, destCode) = List.foldl pushResults ([], []) dests val newContainers = List.map(fn c => #2(mapContainerAndStack(c, 0))) containers in destCode @ FunctionCall{ callKind=callKind, regArgs=newRegArgs, stackArgs=newStackArgs, dests=destVals, saveRegs=[], containers=newContainers} :: code end | pushRegisters({instr=TailRecursiveCall{callKind, regArgs, stackArgs, stackAdjust, ...}, ...}, code) = let val newStackOffset = !stackCount fun loadStackArg ({src, stack}, (otherLoads, otherArgs)) = let val (argVal, loadCode) = case mapArgument src of (source as ArgOnStack{wordOffset, container, field}) => (* If we're leaving it in its old location or we're pushing it above the current top we're ok. We're also ok if we're moving it from a somewhere above the last argument. Otherwise we have to load it. It goes into a normal tagged register which may mean that it could be pushed onto the stack in a subsequent pass. *) if wordOffset = stack+newStackOffset orelse stack+newStackOffset < 0 orelse newStackOffset-wordOffset > ~ stackAdjust then (source, []) else let val preg = newPReg RegPropGeneral in (ArgInReg preg, [LoadStack{wordOffset=wordOffset, container=container, field=field, dest=preg}]) end | argCode => (argCode, []) in (loadCode @ otherLoads, {src=argVal, stack=stack} :: otherArgs) end val (stackArgLoads, newStackArgs) = List.foldr loadStackArg ([], []) stackArgs fun loadRegArg ((arg, reg), otherArgs) = let val argVal = mapArgument arg in (argVal, reg) :: otherArgs end val newRegArgs = List.foldr loadRegArg [] regArgs in TailRecursiveCall{ callKind=callKind, regArgs=newRegArgs, stackArgs=newStackArgs, stackAdjust=stackAdjust, currStackSize=newStackOffset} :: stackArgLoads @ code end | pushRegisters({instr=ReturnResultFromFunction{results, returnReg, numStackArgs}, ...}, code) = let fun loadResults((preg, rreg), (others, code)) = let val (newReg, newCode) = mapSrcReg preg in ((newReg, rreg) :: others, newCode @ code) end val (resultValues, loadResults) = List.foldr loadResults ([], []) results val (returnValue, loadReturn) = mapSrcReg returnReg val resetCode = if !stackCount = 0 then [] else [ResetStackPtr{numWords= !stackCount}] in ReturnResultFromFunction{results=resultValues, returnReg=returnValue, numStackArgs=numStackArgs} :: resetCode @ loadReturn @ loadResults @ code end | pushRegisters({instr=RaiseExceptionPacket{packetReg}, ...}, code) = let val (packetVal, packetCode) = mapSrcReg packetReg in RaiseExceptionPacket{packetReg=packetVal} :: packetCode @ code end | pushRegisters({instr=PushToStack{ source, container, copies }, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source (* This was a push from a previous pass. Treat as a container of size "copies". *) val newContainer = mapDestContainer(container, !stackCount) val () = pushItemToStack(OriginalEntry{stackLoc=container}) in PushToStack{source=sourceVal, container=newContainer, copies=copies} :: sourceCode @ code end | pushRegisters({instr=LoadStack{ dest, container, field, ... }, ...}, code) = let val (newOffset, newContainer) = mapContainerAndStack(container, field) val (destVal, destCode) = mapDestReg dest in destCode @ LoadStack{ dest=destVal, container=newContainer, field=field, wordOffset=newOffset } :: code end | pushRegisters({instr=StoreToStack{source, container, field, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (newOffset, newContainer) = mapContainerAndStack(container, field) in StoreToStack{source=sourceVal, container=newContainer, field=field, stackOffset=newOffset} :: sourceCode @ code end | pushRegisters({instr=ContainerAddress{ dest, container, ... }, ...}, code) = let val (newOffset, newContainer) = mapContainerAndStack(container, 0) val (destVal, destCode) = mapDestReg dest in destCode @ ContainerAddress{ dest=destVal, container=newContainer, stackOffset=newOffset } :: code end | pushRegisters({instr=ResetStackPtr _, ...}, code) = code (* Added in a previous pass - discard it. *) | pushRegisters({instr=TagValue{source, dest, isSigned, opSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ TagValue{source=sourceVal, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code end | pushRegisters({instr=UntagValue{source, dest, isSigned, opSize, ...}, ...}, code) = let val (loadedSource, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UntagValue{source=loadedSource, dest=destVal, isSigned=isSigned, opSize=opSize} :: sourceCode @ code end | pushRegisters({instr=BoxLarge{source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BoxLarge{source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code end | pushRegisters({instr=UnboxLarge{source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnboxLarge{source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=BoxTagFloat{floatSize, source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BoxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal, saveRegs=[]} :: sourceCode @ code end | pushRegisters({instr=UnboxTagFloat{floatSize, source, dest, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnboxTagFloat{floatSize=floatSize, source=sourceVal, dest=destVal} :: sourceCode @ code end | pushRegisters({instr=LoadAcquire { base, dest, loadType}, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadAcquire { base=baseVal, dest=destVal, loadType=loadType} :: baseCode @ code end | pushRegisters({instr=StoreRelease { base, source, loadType}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (baseVal, baseCode) = mapSrcReg base in StoreRelease{ base=baseVal, source=sourceVal, loadType=loadType} :: baseCode @ sourceCode @ code end | pushRegisters({instr=BitFieldShift{source, dest, isSigned, length, immr, imms}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ BitFieldShift { source=sourceVal, dest=destVal, isSigned=isSigned, immr=immr, imms=imms, length=length} :: sourceCode @ code end | pushRegisters({instr=BitFieldInsert{source, destAsSource, dest, length, immr, imms}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destSrcVal, destSrcCode) = mapSrcReg destAsSource val (destVal, destCode) = mapDestReg dest in destCode @ BitFieldInsert { source=sourceVal, destAsSource=destSrcVal, dest=destVal, immr=immr, imms=imms, length=length} :: destSrcCode @ sourceCode @ code end | pushRegisters({instr=IndexedCaseOperation{testReg}, ...}, code) = let val (testVal, testCode) = mapSrcReg testReg in IndexedCaseOperation{testReg=testVal} :: testCode @ code end | pushRegisters({instr=PushExceptionHandler, ...}, code) = let (* Add a handler entry to the stack. *) val () = pushItemToStack HandlerEntry in PushExceptionHandler :: code end | pushRegisters({instr=PopExceptionHandler, ...}, code) = let (* Appears at the end of the block whose exceptions are being handled. Delete the handler and anything above it. *) (* Get the state after removing the handler. *) fun popContext ([], _) = raise InternalError "pushRegisters - pop handler" | popContext (HandlerEntry :: tl, new) = (tl, new-2) | popContext (OriginalEntry{stackLoc=StackLoc{size, ...}, ...} :: tl, new) = popContext(tl, new-size) | popContext (NewEntry _ :: tl, new) = popContext(tl, new-1) val (newStack, nnCount) = popContext(!stack, !stackCount) val () = stack := newStack val oldStackPtr = ! stackCount val () = stackCount := nnCount (* Reset the stack to just above the two words of the handler. *) val resetCode = if oldStackPtr <> nnCount+2 then [ResetStackPtr{numWords=oldStackPtr-nnCount-2}] else [] in PopExceptionHandler :: resetCode @ code end | pushRegisters({instr=BeginHandler{packetReg}, ...}, code) = let (* Start of a handler. The top active entry should be the handler. *) val () = case !stack of HandlerEntry :: tl => stack := tl | _ => raise InternalError "pushRegisters: BeginHandler" val () = stackCount := !stackCount - 2 val (packetVal, packetCode) = mapDestReg packetReg in packetCode @ BeginHandler{packetReg=packetVal} :: code end | pushRegisters({instr=CompareByteVectors{vec1Addr, vec2Addr, length, ccRef}, ...}, code) = let val (vec1Val, vec1Code) = mapSrcReg vec1Addr val (vec2Val, vec2Code) = mapSrcReg vec2Addr val (lenVal, lenCode) = mapSrcReg length in CompareByteVectors{vec1Addr=vec1Val, vec2Addr=vec2Val, length=lenVal, ccRef=ccRef} :: lenCode @ vec2Code @ vec1Code @ code end | pushRegisters({instr=BlockMove{srcAddr, destAddr, length, isByteMove}, ...}, code) = let val (srcVal, srcCode) = mapSrcReg srcAddr val (destVal, destCode) = mapSrcReg destAddr val (lenVal, lenCode) = mapSrcReg length in BlockMove{srcAddr=srcVal, destAddr=destVal, length=lenVal, isByteMove=isByteMove} :: lenCode @ destCode @ srcCode @ code end | pushRegisters({instr=AddSubXSP{source, dest, isAdd}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapOptDest dest in destCode @ AddSubXSP { source=sourceVal, dest=destVal, isAdd=isAdd} :: sourceCode @ code end | pushRegisters({instr=TouchValue{source, ...}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source in TouchValue { source=sourceVal} :: sourceCode @ code end | pushRegisters({instr=LoadAcquireExclusive{ base, dest }, ...}, code) = let val (baseVal, baseCode) = mapSrcReg base val (destVal, destCode) = mapDestReg dest in destCode @ LoadAcquireExclusive { base=baseVal, dest=destVal} :: baseCode @ code end | pushRegisters({instr=StoreReleaseExclusive{ base, source, result }, ...}, code) = let val (sourceVal, sourceCode) = mapOptSrc source val (baseVal, baseCode) = mapSrcReg base val (resVal, resCode) = mapDestReg result in resCode @ StoreReleaseExclusive{ base=baseVal, source=sourceVal, result=resVal} :: baseCode @ sourceCode @ code end | pushRegisters({instr=MemoryBarrier, ...}, code) = MemoryBarrier :: code | pushRegisters({instr=ConvertIntToFloat{ source, dest, srcSize, destSize}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ConvertIntToFloat{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize} :: sourceCode @ code end | pushRegisters({instr=ConvertFloatToInt{ source, dest, srcSize, destSize, rounding}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ ConvertFloatToInt{ source=sourceVal, dest=destVal, srcSize=srcSize, destSize=destSize, rounding=rounding} :: sourceCode @ code end | pushRegisters({instr=UnaryFloatingPt{ source, dest, fpOp}, ...}, code) = let val (sourceVal, sourceCode) = mapSrcReg source val (destVal, destCode) = mapDestReg dest in destCode @ UnaryFloatingPt{ source=sourceVal, dest=destVal, fpOp=fpOp} :: sourceCode @ code end | pushRegisters({instr=BinaryFloatingPoint{ arg1, arg2, dest, fpOp, opSize}, ...}, code) = let val (arg1Val, arg1Code) = mapSrcReg arg1 val (arg2Val, arg2Code) = mapSrcReg arg2 val (destVal, destCode) = mapDestReg dest in destCode @ BinaryFloatingPoint{ arg1=arg1Val, arg2=arg2Val, dest=destVal, fpOp=fpOp, opSize=opSize} :: arg2Code @ arg1Code @ code end | pushRegisters({instr=CompareFloatingPoint{ arg1, arg2, opSize, ccRef}, ...}, code) = let val (arg1Val, arg1Code) = mapSrcReg arg1 val (arg2Val, arg2Code) = mapSrcReg arg2 in CompareFloatingPoint{ arg1=arg1Val, arg2=arg2Val, opSize=opSize, ccRef=ccRef} :: arg2Code @ arg1Code @ code end + | pushRegisters({instr=CPUYield, ...}, code) = CPUYield :: code + | pushRegisters({instr=CacheCheck _, ...}, _) = raise InternalError "pushRegisters: CacheCheck" local fun doPush(instr as {kill, ...}, code) = let val newCode = pushRegisters(instr, code) (* Can we pop the stack? *) val stackReset = case setToList (minus(kill, loopRegs)) of [] => [] | killList => let (* See if any of the kill items are at the top of the stack. If they are we can pop them and perhaps items we've previously marked for deletion but not been able to pop. *) val oldStack = !stackCount fun checkAndAdd(r, output) = case Array.sub(pregMap, r) of ToStack(stackLoc, StackLoc{size, ...}) => if stackLoc < 0 then r :: output (* We can have arguments and return address. *) else if !stackCount = stackLoc+size then ( stack := tl (!stack); stackCount := stackLoc; output ) else r :: output | _ => r :: output val toAdd = List.foldl checkAndAdd [] killList fun reprocess list = let val prevStack = !stackCount val outlist = List.foldl checkAndAdd [] list in if !stackCount = prevStack then list else reprocess outlist end val () = if !stackCount = oldStack then deletedItems := toAdd @ !deletedItems else deletedItems := reprocess(toAdd @ !deletedItems) val _ = oldStack >= !stackCount orelse raise InternalError "negative stack offset" in if !stackCount = oldStack then [] else [ResetStackPtr{numWords=oldStack - !stackCount}] end in stackReset @ newCode end in val codeResult = List.foldl doPush [] block val outputCount = ! stackCount val results = {code=codeResult, stackCount= outputCount} val stateResult = { stackCount= outputCount, stack= !stack } val () = Array.update(blockOutput, blockNo, results) end val addSet = case flow of ExitCode => [] | IndexedBr cases => cases | Unconditional dest => [dest] | Conditional {trueJump, falseJump, ...} => [falseJump, trueJump] | SetHandler { handler, continue } => [handler, continue] | UnconditionalHandle _ => [] | ConditionalHandle { continue, ...} => [continue] val addItems = List.map(fn m => (m, stateResult)) addSet in processBlocks(addItems @ stillToDo) end in val () = processBlocks([(0, {stack=[], stackCount=0})]) end (* Put together the result code and blocks. *) local fun createBlock blockNo = (* Skip unreferenced blocks apart from block 0. *) if blockNo <> 0 andalso null (asub blockRefs blockNo) then BasicBlock{block=[], flow=ExitCode} else let val ExtendedBasicBlock{ flow, ...} = vsub code blockNo val {code=codeResult, stackCount=outputCount, ...} = asub blockOutput blockNo (* Process the successor. If we need a stack adjustment this will require an adjustment block. TODO: We could put a pre-adjustment if we only have one branch to this block. *) fun matchStacks targetBlock = let (* Process the destination. If it hasn't been processed. *) val expectedInput = valOf (asub inputStackSizes targetBlock) in if expectedInput = outputCount then targetBlock else let val _ = outputCount > expectedInput orelse raise InternalError "adjustStack" val adjustCode = [ResetStackPtr{numWords=outputCount-expectedInput}] val newBlock = BasicBlock{block=adjustCode, flow=Unconditional targetBlock} val newBlockNo = !blockCounter before blockCounter := !blockCounter+1 val () = extraBlocks := newBlock :: !extraBlocks in newBlockNo end end val (finalCode, newFlow) = case flow of ExitCode => (codeResult, ExitCode) | Unconditional m => let (* Process the block. Since we're making an unconditional jump we can include any stack adjustment needed to match the destination in here. In particular this includes loops. *) val expectedInput = valOf (asub inputStackSizes m) val _ = outputCount >= expectedInput orelse raise InternalError "negative reset" val resultCode = if expectedInput = outputCount then codeResult else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult in (resultCode, Unconditional m) end (* For any of these, if we need to adjust the stack we have to add an adjustment block. *) | Conditional {trueJump, falseJump, ccRef, condition} => (codeResult, Conditional{trueJump=matchStacks trueJump, falseJump=matchStacks falseJump, ccRef=ccRef, condition=condition}) | SetHandler{ handler, continue } => (codeResult, SetHandler{ handler=matchStacks handler, continue=matchStacks continue}) | IndexedBr cases => (codeResult, IndexedBr(map matchStacks cases)) | u as UnconditionalHandle _ => (codeResult, u) | c as ConditionalHandle{ continue, ... } => let (* As for unconditional branch *) val expectedInput = valOf (asub inputStackSizes continue) val _ = outputCount >= expectedInput orelse raise InternalError "negative reset" val resultCode = if expectedInput = outputCount then codeResult else ResetStackPtr{numWords=outputCount-expectedInput} :: codeResult in (resultCode, c) end in BasicBlock{block=List.rev finalCode, flow=newFlow} end in val resultBlocks = Vector.tabulate(numberOfBlocks, createBlock) end (* Add any extra blocks to the result. *) val finalResult = case !extraBlocks of [] => resultBlocks | blocks => Vector.concat[resultBlocks, Vector.fromList(List.rev blocks)] val pregProperties = Vector.fromList(List.rev(! pregPropList)) in {code=finalResult, pregProps=pregProperties, maxStack= !maxStack} end structure Sharing = struct type extendedBasicBlock = extendedBasicBlock and ('genReg, 'optGenReg, 'fpReg) basicBlock = ('genReg, 'optGenReg, 'fpReg) basicBlock and regProperty = regProperty and preg = preg and pregOrZero = pregOrZero end end; diff --git a/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml b/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml index a5117b97..d7190762 100644 --- a/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml +++ b/mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml @@ -1,739 +1,739 @@ (* - Copyright (c) 2012, 2016-21 David C.J. Matthews + Copyright (c) 2012, 2016-22 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Intermediate code tree for the back end of the compiler. *) structure BackendIntermediateCode: BACKENDINTERMEDIATECODE = struct open Address structure BuiltIns = struct datatype testConditions = TestEqual | TestLess | TestLessEqual | TestGreater | TestGreaterEqual | TestUnordered (* Reals only. *) datatype arithmeticOperations = ArithAdd | ArithSub | ArithMult | ArithQuot | ArithRem | ArithDiv | ArithMod datatype logicalOperations = LogicalAnd | LogicalOr | LogicalXor datatype shiftOperations = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic datatype unaryOps = NotBoolean | IsTaggedValue | MemoryCellLength | MemoryCellFlags | ClearMutableFlag | LongWordToTagged | SignedToLongWord | UnsignedToLongWord | RealAbs of precision | RealNeg of precision | RealFixedInt of precision | FloatToDouble | DoubleToFloat | RealToInt of precision * IEEEReal.rounding_mode | TouchAddress | AllocCStack | LockMutex | TryLockMutex | UnlockMutex and precision = PrecSingle | PrecDouble and binaryOps = WordComparison of { test: testConditions, isSigned: bool } | FixedPrecisionArith of arithmeticOperations | WordArith of arithmeticOperations | WordLogical of logicalOperations | WordShift of shiftOperations | AllocateByteMemory | LargeWordComparison of testConditions | LargeWordArith of arithmeticOperations | LargeWordLogical of logicalOperations | LargeWordShift of shiftOperations | RealComparison of testConditions * precision | RealArith of arithmeticOperations * precision | PointerEq | FreeCStack and nullaryOps = GetCurrentThreadId - | CheckRTSException + | CPUPause | CreateMutex fun unaryRepr NotBoolean = "NotBoolean" | unaryRepr IsTaggedValue = "IsTaggedValue" | unaryRepr MemoryCellLength = "MemoryCellLength" | unaryRepr MemoryCellFlags = "MemoryCellFlags" | unaryRepr ClearMutableFlag = "ClearMutableFlag" | unaryRepr LongWordToTagged = "LongWordToTagged" | unaryRepr SignedToLongWord = "SignedToLongWord" | unaryRepr UnsignedToLongWord = "UnsignedToLongWord" | unaryRepr (RealAbs prec) = "RealAbs" ^ precRepr prec | unaryRepr (RealNeg prec) = "RealNeg" ^ precRepr prec | unaryRepr (RealFixedInt prec) = "RealFixedInt" ^ precRepr prec | unaryRepr FloatToDouble = "FloatToDouble" | unaryRepr DoubleToFloat = "DoubleToFloat" | unaryRepr (RealToInt (prec, mode)) = "RealToInt" ^ precRepr prec ^ rndModeRepr mode | unaryRepr TouchAddress = "TouchAddress" | unaryRepr AllocCStack = "AllocCStack" | unaryRepr LockMutex = "LockMutex" | unaryRepr TryLockMutex = "TryLockMutex" | unaryRepr UnlockMutex = "UnlockMutex" and binaryRepr (WordComparison{test, isSigned}) = "Test" ^ (testRepr test) ^ (if isSigned then "Signed" else "Unsigned") | binaryRepr (FixedPrecisionArith arithOp) = (arithRepr arithOp) ^ "Fixed" | binaryRepr (WordArith arithOp) = (arithRepr arithOp) ^ "Word" | binaryRepr (WordLogical logOp) = (logicRepr logOp) ^ "Word" | binaryRepr (WordShift shiftOp) = (shiftRepr shiftOp) ^ "Word" | binaryRepr AllocateByteMemory = "AllocateByteMemory" | binaryRepr (LargeWordComparison test) = "Test" ^ (testRepr test) ^ "LargeWord" | binaryRepr (LargeWordArith arithOp) = (arithRepr arithOp) ^ "LargeWord" | binaryRepr (LargeWordLogical logOp) = (logicRepr logOp) ^ "LargeWord" | binaryRepr (LargeWordShift shiftOp) = (shiftRepr shiftOp) ^ "LargeWord" | binaryRepr (RealComparison (test, prec)) = "Test" ^ testRepr test ^ precRepr prec | binaryRepr (RealArith (arithOp, prec)) = arithRepr arithOp ^ precRepr prec | binaryRepr PointerEq = "PointerEq" | binaryRepr FreeCStack = "FreeCStack" and nullaryRepr GetCurrentThreadId = "GetCurrentThreadId" - | nullaryRepr CheckRTSException = "CheckRTSException" + | nullaryRepr CPUPause = "CPUPause" | nullaryRepr CreateMutex = "CreateMutex" and testRepr TestEqual = "Equal" | testRepr TestLess = "Less" | testRepr TestLessEqual = "LessEqual" | testRepr TestGreater = "Greater" | testRepr TestGreaterEqual = "GreaterEqual" | testRepr TestUnordered = "Unordered" and arithRepr ArithAdd = "Add" | arithRepr ArithSub = "Sub" | arithRepr ArithMult = "Mult" | arithRepr ArithQuot = "Quot" | arithRepr ArithRem = "Rem" | arithRepr ArithDiv = "Div" | arithRepr ArithMod = "Mod" and logicRepr LogicalAnd = "And" | logicRepr LogicalOr = "Or" | logicRepr LogicalXor = "Xor" and shiftRepr ShiftLeft = "Left" | shiftRepr ShiftRightLogical = "RightLogical" | shiftRepr ShiftRightArithmetic = "RightArithmetic" and precRepr PrecSingle = "Single" | precRepr PrecDouble = "Double" and rndModeRepr IEEEReal.TO_NEAREST = "Round" | rndModeRepr IEEEReal.TO_NEGINF = "Down" | rndModeRepr IEEEReal.TO_POSINF = "Up" | rndModeRepr IEEEReal.TO_ZERO = "Trunc" end datatype argumentType = GeneralType | DoubleFloatType | SingleFloatType | ContainerType of int datatype backendIC = BICNewenv of bicCodeBinding list * backendIC (* Set of bindings with an expression. *) | BICConstnt of machineWord * Universal.universal list (* Load a constant *) | BICExtract of bicLoadForm (* Get a local variable, an argument or a closure value *) | BICField of {base: backendIC, offset: int } (* Load a field from a tuple or record *) | BICEval of (* Evaluate a function with an argument list. *) { function: backendIC, argList: (backendIC * argumentType) list, resultType: argumentType } (* Built-in functions. *) | BICNullary of {oper: BuiltIns.nullaryOps} | BICUnary of {oper: BuiltIns.unaryOps, arg1: backendIC} | BICBinary of {oper: BuiltIns.binaryOps, arg1: backendIC, arg2: backendIC} | BICArbitrary of {oper: BuiltIns.arithmeticOperations, shortCond: backendIC, arg1: backendIC, arg2: backendIC, longCall: backendIC} | BICLambda of bicLambdaForm (* Lambda expressions. *) | BICCond of backendIC * backendIC * backendIC (* If-then-else expression *) | BICCase of (* Case expressions *) { cases : backendIC option list, (* NONE means "jump to the default". *) test : backendIC, default : backendIC, isExhaustive: bool, firstIndex: word } | BICBeginLoop of (* Start of tail-recursive inline function. *) { loop: backendIC, arguments: (bicSimpleBinding * argumentType) list } | BICLoop of (backendIC * argumentType) list (* Jump back to start of tail-recursive function. *) | BICRaise of backendIC (* Raise an exception *) | BICHandle of (* Exception handler. *) { exp: backendIC, handler: backendIC, exPacketAddr: int } | BICTuple of backendIC list (* Tuple *) | BICSetContainer of (* Copy a tuple to a container. *) { container: backendIC, tuple: backendIC, filter: BoolVector.vector } | BICLoadContainer of {base: backendIC, offset: int } | BICTagTest of { test: backendIC, tag: word, maxTag: word } | BICLoadOperation of { kind: loadStoreKind, address: bicAddress } | BICStoreOperation of { kind: loadStoreKind, address: bicAddress, value: backendIC } | BICBlockOperation of { kind: blockOpKind, sourceLeft: bicAddress, destRight: bicAddress, length: backendIC } | BICAllocateWordMemory of {numWords: backendIC, flags: backendIC, initial: backendIC} and bicCodeBinding = BICDeclar of bicSimpleBinding (* Make a local declaration or push an argument *) | BICRecDecs of { addr: int, lambda: bicLambdaForm } list (* Set of mutually recursive declarations. *) | BICNullBinding of backendIC (* Just evaluate the expression and discard the result. *) | BICDecContainer of { addr: int, size: int } (* Create a container for a tuple on the stack. *) and caseType = CaseWord (* Word or fixed-precision integer. *) | CaseTag of word and bicLoadForm = BICLoadLocal of int (* Local binding *) | BICLoadArgument of int (* Argument - 0 is first arg etc.*) | BICLoadClosure of int (* Closure - 0 is first closure item etc *) | BICLoadRecursive (* Recursive call *) and loadStoreKind = LoadStoreMLWord of {isImmutable: bool} (* Load/Store an ML word in an ML word cell. *) | LoadStoreMLByte of {isImmutable: bool} (* Load/Store a byte, tagging and untagging as appropriate, in an ML byte cell. *) | LoadStoreC8 (* Load/Store C values - The base address is a boxed SysWord.word value. *) | LoadStoreC16 | LoadStoreC32 | LoadStoreC64 | LoadStoreCFloat | LoadStoreCDouble | LoadStoreUntaggedUnsigned and blockOpKind = BlockOpMove of {isByteMove: bool} | BlockOpEqualByte | BlockOpCompareByte withtype bicSimpleBinding = { (* Declare a value or push an argument. *) value: backendIC, addr: int } and bicLambdaForm = { (* Lambda expressions. *) body : backendIC, name : string, closure : bicLoadForm list, argTypes : argumentType list, resultType : argumentType, localCount : int } and bicAddress = (* Address form used in loads, store and block operations. The base is an ML address if this is to/from ML memory or a (boxed) SysWord.word if it is to/from C memory. The index is a value in units of the size of the item being loaded/stored and the offset is always in bytes. *) {base: backendIC, index: backendIC option, offset: int} structure CodeTags = struct open Universal val tupleTag: universal list list tag = tag() fun splitProps _ [] = (NONE, []) | splitProps tag (hd::tl) = if Universal.tagIs tag hd then (SOME hd, tl) else let val (p, l) = splitProps tag tl in (p, hd :: l) end fun mergeTupleProps(p, []) = p | mergeTupleProps([], p) = p | mergeTupleProps(m, n) = ( case (splitProps tupleTag m, splitProps tupleTag n) of ((SOME mp, ml), (SOME np, nl)) => let val mpl = Universal.tagProject tupleTag mp and npl = Universal.tagProject tupleTag np val merge = ListPair.mapEq mergeTupleProps (mpl, npl) in Universal.tagInject tupleTag merge :: (ml @ nl) end | _ => m @ n ) end fun loadStoreKindRepr(LoadStoreMLWord {isImmutable=true}) = "MLWordImmutable" | loadStoreKindRepr(LoadStoreMLWord {isImmutable=false}) = "MLWord" | loadStoreKindRepr(LoadStoreMLByte {isImmutable=true}) = "MLByteImmutable" | loadStoreKindRepr(LoadStoreMLByte {isImmutable=false}) = "MLByte" | loadStoreKindRepr LoadStoreC8 = "C8Bit" | loadStoreKindRepr LoadStoreC16 = "C16Bit" | loadStoreKindRepr LoadStoreC32 = "C32Bit" | loadStoreKindRepr LoadStoreC64 = "C64Bit" | loadStoreKindRepr LoadStoreCFloat = "CFloat" | loadStoreKindRepr LoadStoreCDouble = "CDouble" | loadStoreKindRepr LoadStoreUntaggedUnsigned = "MLWordUntagged" fun blockOpKindRepr (BlockOpMove{isByteMove=false}) = "MoveWord" | blockOpKindRepr (BlockOpMove{isByteMove=true}) = "MoveByte" | blockOpKindRepr BlockOpEqualByte = "EqualByte" | blockOpKindRepr BlockOpCompareByte = "CompareByte" open Pretty fun pList ([]: 'b list, _: string, _: 'b->pretty) = [] | pList ([h], _, disp) = [disp h] | pList (h::t, sep, disp) = PrettyBlock (0, false, [], [ disp h, PrettyBreak (0, 0), PrettyString sep ] ) :: PrettyBreak (1, 0) :: pList (t, sep, disp) fun pretty (pt : backendIC) : pretty = let fun printList(start, lst, sep) : pretty = PrettyBlock (1, true, [], PrettyString (start ^ "(") :: pList(lst, sep, pretty) @ [ PrettyBreak (0, 0), PrettyString (")") ] ) fun prettyArgType GeneralType = PrettyString "G" | prettyArgType DoubleFloatType = PrettyString "D" | prettyArgType SingleFloatType = PrettyString "F" | prettyArgType (ContainerType m) = PrettyString ("M"^Int.toString m) fun prettyArg (c, t) = PrettyBlock(1, false, [], [pretty c, PrettyBreak (1, 0), prettyArgType t]) fun prettyArgs(start, lst, sep) : pretty = PrettyBlock (1, true, [], PrettyString (start ^ "(") :: pList(lst, sep, prettyArg) @ [ PrettyBreak (0, 0), PrettyString (")") ] ) fun prettyAddress({base, index, offset}: bicAddress): pretty = let in PrettyBlock (1, true, [], [ PrettyString "[", PrettyBreak (0, 3), pretty base, PrettyBreak (0, 0), PrettyString ",", PrettyBreak (1, 0), case index of NONE => PrettyString "-" | SOME i => pretty i, PrettyBreak (0, 0), PrettyString ",", PrettyBreak (1, 0), PrettyString(Int.toString offset), PrettyBreak (0, 0), PrettyString "]" ]) end in case pt of BICEval {function, argList, resultType} => let val prettyArgs = PrettyBlock (1, true, [], PrettyString ("$(") :: pList(argList, ",", prettyArg) @ [ PrettyBreak (0, 0), PrettyString (")") ] ) in PrettyBlock (3, false, [], [ pretty function, PrettyBreak(1, 0), prettyArgType resultType, PrettyBreak(1, 0), prettyArgs ] ) end | BICUnary { oper, arg1 } => PrettyBlock (3, false, [], [ PrettyString(BuiltIns.unaryRepr oper), PrettyBreak(1, 0), printList("", [arg1], ",") ] ) | BICBinary { oper, arg1, arg2 } => PrettyBlock (3, false, [], [ PrettyString(BuiltIns.binaryRepr oper), PrettyBreak(1, 0), printList("", [arg1, arg2], ",") ] ) | BICNullary { oper } => PrettyString(BuiltIns.nullaryRepr oper) | BICArbitrary { oper, shortCond, arg1, arg2, longCall } => PrettyBlock (3, false, [], [ PrettyString(BuiltIns.arithRepr oper), PrettyBreak(1, 0), printList("", [shortCond, arg1, arg2, longCall], ",") ] ) | BICAllocateWordMemory { numWords, flags, initial } => PrettyBlock (3, false, [], [ PrettyString "AllocateWordMemory", PrettyBreak(1, 0), printList("", [numWords, flags, initial], ",") ] ) | BICExtract (BICLoadLocal addr) => let val str : string = concat ["LOCAL(", Int.toString addr, ")"] in PrettyString str end | BICExtract (BICLoadArgument addr) => let val str : string = concat ["PARAM(", Int.toString addr, ")"] in PrettyString str end | BICExtract (BICLoadClosure addr) => let val str : string = concat ["CLOS(", Int.toString addr, ")"] in PrettyString str end | BICExtract (BICLoadRecursive) => let val str : string = concat ["RECURSIVE(", ")"] in PrettyString str end | BICField {base, offset} => let val str = "INDIRECT(" ^ Int.toString offset ^ ", "; in PrettyBlock(0, false, [], [ PrettyString str, pretty base, PrettyString ")" ] ) end | BICLambda {body, name, closure, argTypes, resultType, localCount} => let fun prettyArgTypes [] = [] | prettyArgTypes [last] = [prettyArgType last] | prettyArgTypes (hd::tl) = prettyArgType hd :: PrettyBreak(1, 0) :: prettyArgTypes tl in PrettyBlock (1, true, [], [ PrettyString ("LAMBDA("), PrettyBreak (1, 0), PrettyString name, PrettyBreak (1, 0), PrettyString (" LOCALS=" ^ Int.toString localCount), PrettyBreak(1, 0), PrettyBlock (1, false, [], PrettyString "ARGS=" :: prettyArgTypes argTypes), PrettyBreak(1, 0), PrettyBlock (1, false, [], [PrettyString "RES=", prettyArgType resultType]), printList (" CLOS=", map BICExtract closure, ","), PrettyBreak (1, 0), pretty body, PrettyString "){LAMBDA}" ] ) end | BICConstnt (w, _) => PrettyString (stringOfWord w) | BICCond (f, s, t) => PrettyBlock (1, true, [], [ PrettyString "IF(", pretty f, PrettyString ", ", PrettyBreak (0, 0), pretty s, PrettyString ", ", PrettyBreak (0, 0), pretty t, PrettyBreak (0, 0), PrettyString (")") ] ) | BICNewenv(decs, final) => PrettyBlock (1, true, [], PrettyString ("BLOCK" ^ "(") :: pList(decs, ";", prettyBinding) @ [ PrettyBreak (1, 0), pretty final, PrettyBreak (0, 0), PrettyString (")") ] ) | BICBeginLoop{loop=loopExp, arguments=args } => let fun prettyArg (c, t) = PrettyBlock(1, false, [], [prettySimpleBinding c, PrettyBreak (1, 0), prettyArgType t]) in PrettyBlock (3, false, [], [ PrettyBlock (1, true, [], PrettyString ("BEGINLOOP(") :: pList(args, ",", prettyArg) @ [ PrettyBreak (0, 0), PrettyString (")") ] ), PrettyBreak (0, 0), PrettyString "(", PrettyBreak (0, 0), pretty loopExp, PrettyBreak (0, 0), PrettyString ")" ] ) end | BICLoop ptl => prettyArgs("LOOP", ptl, ",") | BICRaise c => PrettyBlock (1, true, [], [ PrettyString "RAISE(", pretty c, PrettyBreak (0, 0), PrettyString (")") ] ) | BICHandle {exp, handler, exPacketAddr} => PrettyBlock (3, false, [], [ PrettyString "HANDLE(", pretty exp, PrettyString ("WITH exid=" ^ Int.toString exPacketAddr), PrettyBreak (1, 0), pretty handler, PrettyString ")" ] ) | BICCase {cases, test, default, isExhaustive, firstIndex, ...} => PrettyBlock (1, true, [], PrettyString "CASE (" :: pretty test :: PrettyBreak (1, 0) :: PrettyString ("( from " ^ Word.toString firstIndex ^ (if isExhaustive then " exhaustive" else "")) :: PrettyBreak (1, 0) :: pList(cases, ",", fn (SOME exp) => PrettyBlock (1, true, [], [ PrettyString "=>", PrettyBreak (1, 0), pretty exp ]) | NONE => PrettyString "=> default" ) @ [ PrettyBreak (1, 0), PrettyBlock (1, false, [], [ PrettyString "ELSE:", PrettyBreak (1, 0), pretty default ] ), PrettyBreak (1, 0), PrettyString (") {"^"CASE"^"}") ] ) | BICTuple ptl => printList("RECCONSTR", ptl, ",") | BICSetContainer{container, tuple, filter} => let val source = BoolVector.length filter val dest = BoolVector.foldl(fn (true, n) => n+1 | (false, n) => n) 0 filter in PrettyBlock (3, false, [], [ PrettyString (concat["SETCONTAINER(", Int.toString dest, "/", Int.toString source, ", "]), pretty container, PrettyBreak (0, 0), PrettyString ",", PrettyBreak (1, 0), pretty tuple, PrettyBreak (0, 0), PrettyString ")" ] ) end | BICLoadContainer {base, offset} => let val str = "INDIRECTCONTAINER(" ^ Int.toString offset ^ ", "; in PrettyBlock(0, false, [], [ PrettyString str, pretty base, PrettyString ")" ] ) end | BICTagTest { test, tag, maxTag } => PrettyBlock (3, false, [], [ PrettyString (concat["TAGTEST(", Word.toString tag, ", ", Word.toString maxTag, ","]), PrettyBreak (1, 0), pretty test, PrettyBreak (0, 0), PrettyString ")" ] ) | BICLoadOperation{ kind, address } => PrettyBlock (3, false, [], [ PrettyString("Load" ^ loadStoreKindRepr kind), PrettyBreak (1, 0), prettyAddress address ] ) | BICStoreOperation{ kind, address, value } => PrettyBlock (3, false, [], [ PrettyString("Store" ^ loadStoreKindRepr kind), PrettyBreak (1, 0), prettyAddress address, PrettyBreak (1, 0), PrettyString "<=", PrettyBreak (1, 0), pretty value ] ) | BICBlockOperation{ kind, sourceLeft, destRight, length } => PrettyBlock (3, false, [], [ PrettyString(blockOpKindRepr kind ^ "("), PrettyBreak (1, 0), prettyAddress sourceLeft, PrettyBreak (1, 0), PrettyString ",", prettyAddress destRight, PrettyBreak (1, 0), PrettyString ",", pretty length, PrettyBreak (1, 0), PrettyString ")" ] ) (* That list should be exhaustive! *) end (* pretty *) and prettyBinding(BICDeclar dec) = prettySimpleBinding dec | prettyBinding(BICRecDecs ptl) = let fun prettyRDec {lambda, addr} = PrettyBlock (1, false, [], [ PrettyString (concat ["DECL #", Int.toString addr, "="]), PrettyBreak (1, 0), pretty(BICLambda lambda) ] ) in PrettyBlock (1, true, [], PrettyString ("MUTUAL" ^ "(") :: pList(ptl, " AND ", prettyRDec) @ [ PrettyBreak (0, 0), PrettyString (")") ] ) end | prettyBinding(BICNullBinding c) = pretty c | prettyBinding(BICDecContainer{addr, size}) = PrettyString (concat ["CONTAINER #", Int.toString addr, "=", Int.toString size]) and prettySimpleBinding{value, addr} = PrettyBlock (1, false, [], [ PrettyString (concat ["DECL #", Int.toString addr, "="]), PrettyBreak (1, 0), pretty value ] ) structure Sharing = struct type backendIC = backendIC and bicLoadForm = bicLoadForm and caseType = caseType and pretty = pretty and argumentType = argumentType and bicCodeBinding = bicCodeBinding and bicSimpleBinding = bicSimpleBinding and loadStoreKind = loadStoreKind and blockOpKind = blockOpKind and unaryOps = BuiltIns.unaryOps and binaryOps = BuiltIns.binaryOps and nullaryOps = BuiltIns.nullaryOps and testConditions = BuiltIns.testConditions and arithmeticOperations = BuiltIns.arithmeticOperations end end; diff --git a/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML b/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML index de758fb6..a0adb9fe 100644 --- a/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML +++ b/mlsource/MLCompiler/CodeTree/ByteCode/IntGCode.ML @@ -1,1237 +1,1237 @@ (* Copyright (c) 2000 Cambridge University Technical Services Limited - Further development copyright David C.J. Matthews 2016-18,2020-21 + Further development copyright David C.J. Matthews 2016-18,2020-22 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Title: Generate interpretable code for Poly system from the code tree. Author: Dave Matthews, Cambridge University Computer Laboratory Copyright Cambridge University 1985 *) (* This generates byte-code that is interpreted by the run-time system. It is now used as a fall-back to allow Poly/ML to run on non-X86 architectures. Early versions were used as a porting aid while a native code-generator was being developed and the "enter-int" instructions that were needed for that have been retained although they no longer actually generate code. *) functor IntGCode ( structure CodeCons : INTCODECONS structure BackendTree: BACKENDINTERMEDIATECODE structure CodeArray: CODEARRAY sharing CodeCons.Sharing = BackendTree.Sharing = CodeArray.Sharing ) : GENCODE = struct open CodeCons open Address open BackendTree open Misc open CodeArray val word0 = toMachineWord 0; val DummyValue : machineWord = word0; (* used as result of "raise e" etc. *) type caseForm = { cases : (backendIC * word) list, test : backendIC, caseType: caseType, default : backendIC } (* Where the result, if any, should go *) datatype whereto = NoResult (* discard result *) | ToStack (* Need a result but it can stay on the pseudo-stack *); (* Are we at the end of the function. *) datatype tail = EndOfProc | NotEnd (* Code generate a function or global declaration *) fun codegen (pt, cvec, resultClosure, numOfArgs, localCount, parameters) = let datatype decEntry = StackAddr of int | Empty val decVec = Array.array (localCount, Empty) (* Count of number of items on the stack. *) val realstackptr = ref 1 (* The closure ptr is already there *) (* Maximum size of the stack. *) val maxStack = ref 1 (* Push a value onto the stack. *) fun incsp () = ( realstackptr := !realstackptr + 1; if !realstackptr > !maxStack then maxStack := !realstackptr else () ) (* An entry has been removed from the stack. *) fun decsp () = realstackptr := !realstackptr - 1; fun pushLocalStackValue addr = ( genLocal(!realstackptr + addr, cvec); incsp() ) (* Loads a local, argument or closure value; translating local stack addresses to real stack offsets. *) fun locaddr(BICLoadArgument locn) = pushLocalStackValue (numOfArgs-locn) | locaddr(BICLoadLocal locn) = ( (* positive address - on the stack. *) case Array.sub (decVec, locn) of StackAddr n => pushLocalStackValue (~ n) | _ => (* Should be on the stack, not a function. *) raise InternalError "locaddr: bad stack address" ) | locaddr(BICLoadClosure locn) = (* closure-pointer relative *) ( genIndirectClosure{addr = !realstackptr-1, item=locn, code=cvec}; incsp() ) | locaddr BICLoadRecursive = pushLocalStackValue ~1 (* The closure itself - first value on the stack. *) (* generates code from the tree *) fun gencde (pt : backendIC, whereto : whereto, tailKind : tail, loopAddr) : unit = let (* Save the stack pointer value here. We may want to reset the stack. *) val oldsp = !realstackptr; (* Operations on ML memory always have the base as an ML address. Word operations are always word aligned. The higher level will have extracted any constant offset and scaled it if necessary. That's helpful for the X86 but not for the interpreter. We have to turn them back into indexes. *) fun genMLAddress({base, index, offset}, scale) = ( gencde (base, ToStack, NotEnd, loopAddr); offset mod scale = 0 orelse raise InternalError "genMLAddress"; case (index, offset div scale) of (NONE, soffset) => (pushConst (toMachineWord soffset, cvec); incsp()) | (SOME indexVal, 0) => gencde (indexVal, ToStack, NotEnd, loopAddr) | (SOME indexVal, soffset) => ( gencde (indexVal, ToStack, NotEnd, loopAddr); pushConst (toMachineWord soffset, cvec); genOpcode(opcode_wordAdd, cvec) ) ) (* Load the address, index value and offset for non-byte operations. Because the offset has already been scaled by the size of the operand we have to load the index and offset separately. *) fun genCAddress{base, index, offset} = ( gencde (base, ToStack, NotEnd, loopAddr); case index of NONE => (pushConst (toMachineWord 0, cvec); incsp()) | SOME indexVal => gencde (indexVal, ToStack, NotEnd, loopAddr); pushConst (toMachineWord offset, cvec); incsp() ) val () = case pt of BICEval evl => genEval (evl, tailKind) | BICExtract ext => (* This may just be being used to discard a value which isn't used on this branch. *) if whereto = NoResult then () else locaddr ext | BICField {base, offset} => (gencde (base, ToStack, NotEnd, loopAddr); genIndirect (offset, cvec)) | BICLoadContainer {base, offset} => (gencde (base, ToStack, NotEnd, loopAddr); genIndirectContainer (offset, cvec)) | BICLambda lam => genProc (lam, false, fn () => ()) | BICConstnt(w, _) => let val () = pushConst (w, cvec); in incsp () end | BICCond (testPart, thenPart, elsePart) => genCond (testPart, thenPart, elsePart, whereto, tailKind, loopAddr) | BICNewenv(decls, exp) => let (* Processes a list of entries. *) (* Mutually recursive declarations. May be either lambdas or constants. Recurse down the list pushing the addresses of the closure vectors, then unwind the recursion and fill them in. *) fun genMutualDecs [] = () | genMutualDecs ({lambda, addr, ...} :: otherDecs) = genProc (lambda, true, fn() => ( Array.update (decVec, addr, StackAddr (! realstackptr)); genMutualDecs (otherDecs) )) fun codeDecls(BICRecDecs dl) = genMutualDecs dl | codeDecls(BICDecContainer{size, addr}) = ( (* If this is a container we have to process it here otherwise it will be removed in the stack adjustment code. *) genContainer(size, cvec); (* Push the address of this container. *) realstackptr := !realstackptr + size + 1; (* Pushes N words plus the address. *) Array.update (decVec, addr, StackAddr(!realstackptr)) ) | codeDecls(BICDeclar{value, addr, ...}) = ( gencde (value, ToStack, NotEnd, loopAddr); Array.update (decVec, addr, StackAddr(!realstackptr)) ) | codeDecls(BICNullBinding exp) = gencde (exp, NoResult, NotEnd, loopAddr) in List.app codeDecls decls; gencde (exp, whereto, tailKind, loopAddr) end | BICBeginLoop {loop=body, arguments} => (* Execute the body which will contain at least one Loop instruction. There will also be path(s) which don't contain Loops and these will drop through. *) let val args = List.map #1 arguments (* Evaluate each of the arguments, pushing the result onto the stack. *) fun genLoopArg ({addr, value, ...}) = ( gencde (value, ToStack, NotEnd, loopAddr); Array.update (decVec, addr, StackAddr (!realstackptr)); !realstackptr (* Return the posn on the stack. *) ) val argIndexList = map genLoopArg args; val startSp = ! realstackptr; (* Remember the current top of stack. *) val startLoop = createLabel () val () = setLabel(startLoop, cvec) (* Start of loop *) in (* Process the body, passing the jump-back address down for the Loop instruction(s). *) gencde (body, whereto, tailKind, SOME(startLoop, startSp, argIndexList)) (* Leave the arguments on the stack. They can be cleared later if needed. *) end | BICLoop argList => (* Jump back to the enclosing BeginLoop. *) let val (startLoop, startSp, argIndexList) = case loopAddr of SOME l => l | NONE => raise InternalError "No BeginLoop for Loop instr" (* Evaluate the arguments. First push them to the stack because evaluating an argument may depend on the current value of others. Only when we've evaluated all of them can we overwrite the original argument positions. *) fun loadArgs ([], []) = !realstackptr - startSp (* The offset of all the args. *) | loadArgs (arg:: argList, _ :: argIndexList) = let (* Evaluate all the arguments. *) val () = gencde (arg, ToStack, NotEnd, NONE); val argOffset = loadArgs(argList, argIndexList); in genSetStackVal(argOffset, cvec); (* Copy the arg over. *) decsp(); (* The argument has now been popped. *) argOffset end | loadArgs _ = raise InternalError "loadArgs: Mismatched arguments"; val _: int = loadArgs(List.map #1 argList, argIndexList) in if !realstackptr <> startSp then resetStack (!realstackptr - startSp, false, cvec) (* Remove any local variables. *) else (); (* Jump back to the start of the loop. *) putBranchInstruction(JumpBack, startLoop, cvec) end | BICRaise exp => ( gencde (exp, ToStack, NotEnd, loopAddr); genRaiseEx cvec ) | BICHandle {exp, handler, exPacketAddr} => let (* Save old handler *) val () = genPushHandler cvec val () = incsp () val handlerLabel = createLabel() val () = putBranchInstruction (SetHandler, handlerLabel, cvec) val () = incsp() (* Code generate the body; "NotEnd" because we have to come back to remove the handler; "ToStack" because delHandler needs a result to carry down. *) val () = gencde (exp, ToStack, NotEnd, loopAddr) (* Now get out of the handler and restore the old one. *) val () = genOpcode(opcode_deleteHandler, cvec) val skipHandler = createLabel() val () = putBranchInstruction (Jump, skipHandler, cvec) (* Now process the handler itself. First we have to reset the stack. Note that we have to use "ToStack" again to be consistent with the stack-handling in the body-part. If we actually wanted "NoResult", the stack adjustment code at the end of gencde will take care of this. This means that I don't want to do any clever "end-of-function" optimisation either. SPF 6/1/97 *) val () = realstackptr := oldsp val () = setLabel (handlerLabel, cvec) (* If we were executing machine code we must re-enter the interpreter. *) val () = genEnterIntCatch cvec (* Push the exception packet and set the address. *) val () = genLdexc cvec val () = incsp () val () = Array.update (decVec, exPacketAddr, StackAddr(!realstackptr)) val () = gencde (handler, ToStack, NotEnd, loopAddr) (* Have to remove the exception packet. *) val () = resetStack(1, true, cvec) val () = decsp() (* Finally fix-up the jump around the handler *) val () = setLabel (skipHandler, cvec) in () end | BICCase ({cases, test, default, firstIndex, ...}) => let val () = gencde (test, ToStack, NotEnd, loopAddr) (* Label to jump to at the end of each case. *) val exitJump = createLabel() val () = if firstIndex = 0w0 then () else ( (* Subtract lower limit. Don't check for overflow. Instead allow large value to wrap around and check in "case" instruction. *) pushConst(toMachineWord firstIndex, cvec); genOpcode(opcode_wordSub, cvec) ) (* Generate the case instruction followed by the table of jumps. *) val nCases = List.length cases val caseLabels = genCase (nCases, cvec) val () = decsp () (* The default case, if any, follows the case statement. *) (* If we have a jump to the default set it to jump here. *) local fun fixDefault(NONE, defCase) = setLabel(defCase, cvec) | fixDefault(SOME _, _) = () in val () = ListPair.appEq fixDefault (cases, caseLabels) end val () = gencde (default, whereto, tailKind, loopAddr); fun genCases(SOME body, label) = ( (* First exit from the previous case or the default if this is the first. *) putBranchInstruction(Jump, exitJump, cvec); (* Remove the result - the last case will leave it. *) case whereto of ToStack => decsp () | NoResult => (); (* Fix up the jump to come here. *) setLabel(label, cvec); gencde (body, whereto, tailKind, loopAddr) ) | genCases(NONE, _) = () val () = ListPair.appEq genCases (cases, caseLabels) (* Finally set the exit jump to come here. *) val () = setLabel (exitJump, cvec) in () end | BICTuple recList => let val size = List.length recList in (* Move the fields into the vector. *) List.app(fn v => gencde (v, ToStack, NotEnd, loopAddr)) recList; genTuple (size, cvec); realstackptr := !realstackptr - (size - 1) end | BICSetContainer{container, tuple, filter} => (* Copy the contents of a tuple into a container. If the tuple is a Tuple instruction we can avoid generating the tuple and then unpacking it and simply copy the fields that make up the tuple directly into the container. *) ( case tuple of BICTuple cl => (* Simply set the container from the values. *) let (* Load the address of the container. *) val _ = gencde (container, ToStack, NotEnd, loopAddr); fun setValues([], _, _) = () | setValues(v::tl, sourceOffset, destOffset) = if sourceOffset < BoolVector.length filter andalso BoolVector.sub(filter, sourceOffset) then ( gencde (v, ToStack, NotEnd, loopAddr); (* Move the entry into the container. This instruction pops the value to be moved but not the destination. *) genMoveToContainer(destOffset, cvec); decsp(); setValues(tl, sourceOffset+1, destOffset+1) ) else setValues(tl, sourceOffset+1, destOffset) in setValues(cl, 0, 0) (* The container address is still on the stack. *) end | _ => let (* General case. *) (* First the target tuple, then the container. *) val () = gencde (tuple, ToStack, NotEnd, loopAddr) val () = gencde (container, ToStack, NotEnd, loopAddr) val last = BoolVector.foldli(fn (i, true, _) => i | (_, false, n) => n) ~1 filter fun copy (sourceOffset, destOffset) = if BoolVector.sub(filter, sourceOffset) then ( (* Duplicate the tuple address . *) genLocal(1, cvec); genIndirect(sourceOffset, cvec); genMoveToContainer(destOffset, cvec); if sourceOffset = last then () else copy (sourceOffset+1, destOffset+1) ) else copy(sourceOffset+1, destOffset) in copy (0, 0) (* The container and tuple addresses are still on the stack. *) end ) | BICTagTest { test, tag, ... } => ( gencde (test, ToStack, NotEnd, loopAddr); genEqualWordConst(tag, cvec) ) | BICNullary {oper=BuiltIns.GetCurrentThreadId} => ( genOpcode(opcode_getThreadId, cvec); incsp() ) - | BICNullary {oper=BuiltIns.CheckRTSException} => - ( (* Do nothing. This is done in the RTS call. *) + | BICNullary {oper=BuiltIns.CPUPause} => + ( (* Do nothing. It's really only a hint. *) ) | BICNullary {oper=BuiltIns.CreateMutex} => ( genOpcode(opcode_createMutex, cvec); incsp() ) | BICUnary { oper, arg1 } => let open BuiltIns val () = gencde (arg1, ToStack, NotEnd, loopAddr) in case oper of NotBoolean => genOpcode(opcode_notBoolean, cvec) | IsTaggedValue => genIsTagged cvec | MemoryCellLength => genOpcode(opcode_cellLength, cvec) | MemoryCellFlags => genOpcode(opcode_cellFlags, cvec) | ClearMutableFlag => genOpcode(opcode_clearMutable, cvec) | LongWordToTagged => genOpcode(opcode_longWToTagged, cvec) | SignedToLongWord => genOpcode(opcode_signedToLongW, cvec) | UnsignedToLongWord => genOpcode(opcode_unsignedToLongW, cvec) | RealAbs PrecDouble => genOpcode(opcode_realAbs, cvec) | RealNeg PrecDouble => genOpcode(opcode_realNeg, cvec) | RealFixedInt PrecDouble => genOpcode(opcode_fixedIntToReal, cvec) | RealAbs PrecSingle => genOpcode(opcode_floatAbs, cvec) | RealNeg PrecSingle => genOpcode(opcode_floatNeg, cvec) | RealFixedInt PrecSingle => genOpcode(opcode_fixedIntToFloat, cvec) | FloatToDouble => genOpcode(opcode_floatToReal, cvec) | DoubleToFloat => genDoubleToFloat cvec | RealToInt (PrecDouble, rnding) => genRealToInt(rnding, cvec) | RealToInt (PrecSingle, rnding) => genFloatToInt(rnding, cvec) | TouchAddress => resetStack(1, false, cvec) (* Discard this *) | AllocCStack => genOpcode(opcode_allocCSpace, cvec) | LockMutex => genOpcode(opcode_lockMutex, cvec) | TryLockMutex => genOpcode(opcode_tryLockMutex, cvec) | UnlockMutex => genOpcode(opcode_atomicReset, cvec) end | BICBinary { oper=BuiltIns.WordComparison{test=BuiltIns.TestEqual, ...}, arg1, arg2=BICConstnt(w, _) } => let val () = gencde (arg1, ToStack, NotEnd, loopAddr) in genEqualWordConst(toShort w, cvec) end | BICBinary { oper=BuiltIns.WordComparison{test=BuiltIns.TestEqual, ...}, arg1=BICConstnt(w, _), arg2 } => let val () = gencde (arg2, ToStack, NotEnd, loopAddr) in genEqualWordConst(toShort w, cvec) end | BICBinary { oper, arg1, arg2 } => let open BuiltIns val () = gencde (arg1, ToStack, NotEnd, loopAddr) val () = gencde (arg2, ToStack, NotEnd, loopAddr) in case oper of WordComparison{test=TestEqual, ...} => genOpcode(opcode_equalWord, cvec) | WordComparison{test=TestLess, isSigned=true} => genOpcode(opcode_lessSigned, cvec) | WordComparison{test=TestLessEqual, isSigned=true} => genOpcode(opcode_lessEqSigned, cvec) | WordComparison{test=TestGreater, isSigned=true} => genOpcode(opcode_greaterSigned, cvec) | WordComparison{test=TestGreaterEqual, isSigned=true} => genOpcode(opcode_greaterEqSigned, cvec) | WordComparison{test=TestLess, isSigned=false} => genOpcode(opcode_lessUnsigned, cvec) | WordComparison{test=TestLessEqual, isSigned=false} => genOpcode(opcode_lessEqUnsigned, cvec) | WordComparison{test=TestGreater, isSigned=false} => genOpcode(opcode_greaterUnsigned, cvec) | WordComparison{test=TestGreaterEqual, isSigned=false} => genOpcode(opcode_greaterEqUnsigned, cvec) | WordComparison{test=TestUnordered, ...} => raise InternalError "WordComparison: TestUnordered" | PointerEq => genOpcode(opcode_equalWord, cvec) | FixedPrecisionArith ArithAdd => genOpcode(opcode_fixedAdd, cvec) | FixedPrecisionArith ArithSub => genOpcode(opcode_fixedSub, cvec) | FixedPrecisionArith ArithMult => genOpcode(opcode_fixedMult, cvec) | FixedPrecisionArith ArithQuot => genOpcode(opcode_fixedQuot, cvec) | FixedPrecisionArith ArithRem => genOpcode(opcode_fixedRem, cvec) | FixedPrecisionArith ArithDiv => raise InternalError "TODO: FixedPrecisionArith ArithDiv" | FixedPrecisionArith ArithMod => raise InternalError "TODO: FixedPrecisionArith ArithMod" | WordArith ArithAdd => genOpcode(opcode_wordAdd, cvec) | WordArith ArithSub => genOpcode(opcode_wordSub, cvec) | WordArith ArithMult => genOpcode(opcode_wordMult, cvec) | WordArith ArithDiv => genOpcode(opcode_wordDiv, cvec) | WordArith ArithMod => genOpcode(opcode_wordMod, cvec) | WordArith _ => raise InternalError "WordArith - unimplemented instruction" | WordLogical LogicalAnd => genOpcode(opcode_wordAnd, cvec) | WordLogical LogicalOr => genOpcode(opcode_wordOr, cvec) | WordLogical LogicalXor => genOpcode(opcode_wordXor, cvec) | WordShift ShiftLeft => genOpcode(opcode_wordShiftLeft, cvec) | WordShift ShiftRightLogical => genOpcode(opcode_wordShiftRLog, cvec) | WordShift ShiftRightArithmetic => genOpcode(opcode_wordShiftRArith, cvec) | AllocateByteMemory => genOpcode(opcode_allocByteMem, cvec) | LargeWordComparison TestEqual => genOpcode(opcode_lgWordEqual, cvec) | LargeWordComparison TestLess => genOpcode(opcode_lgWordLess, cvec) | LargeWordComparison TestLessEqual => genOpcode(opcode_lgWordLessEq, cvec) | LargeWordComparison TestGreater => genOpcode(opcode_lgWordGreater, cvec) | LargeWordComparison TestGreaterEqual => genOpcode(opcode_lgWordGreaterEq, cvec) | LargeWordComparison TestUnordered => raise InternalError "LargeWordComparison: TestUnordered" | LargeWordArith ArithAdd => genOpcode(opcode_lgWordAdd, cvec) | LargeWordArith ArithSub => genOpcode(opcode_lgWordSub, cvec) | LargeWordArith ArithMult => genOpcode(opcode_lgWordMult, cvec) | LargeWordArith ArithDiv => genOpcode(opcode_lgWordDiv, cvec) | LargeWordArith ArithMod => genOpcode(opcode_lgWordMod, cvec) | LargeWordArith _ => raise InternalError "LargeWordArith - unimplemented instruction" | LargeWordLogical LogicalAnd => genOpcode(opcode_lgWordAnd, cvec) | LargeWordLogical LogicalOr => genOpcode(opcode_lgWordOr, cvec) | LargeWordLogical LogicalXor => genOpcode(opcode_lgWordXor, cvec) | LargeWordShift ShiftLeft => genOpcode(opcode_lgWordShiftLeft, cvec) | LargeWordShift ShiftRightLogical => genOpcode(opcode_lgWordShiftRLog, cvec) | LargeWordShift ShiftRightArithmetic => genOpcode(opcode_lgWordShiftRArith, cvec) | RealComparison (TestEqual, PrecDouble) => genOpcode(opcode_realEqual, cvec) | RealComparison (TestLess, PrecDouble) => genOpcode(opcode_realLess, cvec) | RealComparison (TestLessEqual, PrecDouble) => genOpcode(opcode_realLessEq, cvec) | RealComparison (TestGreater, PrecDouble) => genOpcode(opcode_realGreater, cvec) | RealComparison (TestGreaterEqual, PrecDouble) => genOpcode(opcode_realGreaterEq, cvec) | RealComparison (TestUnordered, PrecDouble) => genOpcode(opcode_realUnordered, cvec) | RealComparison (TestEqual, PrecSingle) => genOpcode(opcode_floatEqual, cvec) | RealComparison (TestLess, PrecSingle) => genOpcode(opcode_floatLess, cvec) | RealComparison (TestLessEqual, PrecSingle) => genOpcode(opcode_floatLessEq, cvec) | RealComparison (TestGreater, PrecSingle) => genOpcode(opcode_floatGreater, cvec) | RealComparison (TestGreaterEqual, PrecSingle) => genOpcode(opcode_floatGreaterEq, cvec) | RealComparison (TestUnordered, PrecSingle) => genOpcode(opcode_floatUnordered, cvec) | RealArith (ArithAdd, PrecDouble) => genOpcode(opcode_realAdd, cvec) | RealArith (ArithSub, PrecDouble) => genOpcode(opcode_realSub, cvec) | RealArith (ArithMult, PrecDouble) => genOpcode(opcode_realMult, cvec) | RealArith (ArithDiv, PrecDouble) => genOpcode(opcode_realDiv, cvec) | RealArith (ArithAdd, PrecSingle) => genOpcode(opcode_floatAdd, cvec) | RealArith (ArithSub, PrecSingle) => genOpcode(opcode_floatSub, cvec) | RealArith (ArithMult, PrecSingle) => genOpcode(opcode_floatMult, cvec) | RealArith (ArithDiv, PrecSingle) => genOpcode(opcode_floatDiv, cvec) | RealArith _ => raise InternalError "RealArith - unimplemented instruction" | FreeCStack => genOpcode(opcode_freeCSpace, cvec) ; decsp() (* Removes one item from the stack. *) end | BICAllocateWordMemory {numWords as BICConstnt(length, _), flags as BICConstnt(flagByte, _), initial } => if isShort length andalso toShort length = 0w1 andalso isShort flagByte andalso toShort flagByte = 0wx40 then (* This is a very common case. *) ( gencde (initial, ToStack, NotEnd, loopAddr); genOpcode(opcode_alloc_ref, cvec) ) else let val () = gencde (numWords, ToStack, NotEnd, loopAddr) val () = gencde (flags, ToStack, NotEnd, loopAddr) val () = gencde (initial, ToStack, NotEnd, loopAddr) in genOpcode(opcode_allocWordMemory, cvec); decsp(); decsp() end | BICAllocateWordMemory { numWords, flags, initial } => let val () = gencde (numWords, ToStack, NotEnd, loopAddr) val () = gencde (flags, ToStack, NotEnd, loopAddr) val () = gencde (initial, ToStack, NotEnd, loopAddr) in genOpcode(opcode_allocWordMemory, cvec); decsp(); decsp() end | BICLoadOperation { kind=LoadStoreMLWord _, address={base, index=NONE, offset}} => ( (* If the index is a constant, frequently zero, we can use indirection. The offset is a byte count so has to be divided by the word size but it should always be an exact multiple. *) gencde (base, ToStack, NotEnd, loopAddr); offset mod Word.toInt wordSize = 0 orelse raise InternalError "gencde: BICLoadOperation - not word multiple"; genIndirect (offset div Word.toInt wordSize, cvec) ) | BICLoadOperation { kind=LoadStoreMLWord _, address} => ( genMLAddress(address, Word.toInt wordSize); genOpcode(opcode_loadMLWord, cvec); decsp() ) | BICLoadOperation { kind=LoadStoreMLByte _, address} => ( genMLAddress(address, 1); genOpcode(opcode_loadMLByte, cvec); decsp() ) | BICLoadOperation { kind=LoadStoreC8, address} => ( genCAddress address; genOpcode(opcode_loadC8, cvec); decsp(); decsp() ) | BICLoadOperation { kind=LoadStoreC16, address} => ( genCAddress address; genOpcode(opcode_loadC16, cvec); decsp(); decsp() ) | BICLoadOperation { kind=LoadStoreC32, address} => ( genCAddress address; genOpcode(opcode_loadC32, cvec); decsp(); decsp() ) | BICLoadOperation { kind=LoadStoreC64, address} => ( genCAddress address; genOpcode(opcode_loadC64, cvec); decsp(); decsp() ) | BICLoadOperation { kind=LoadStoreCFloat, address} => ( genCAddress address; genOpcode(opcode_loadCFloat, cvec); decsp(); decsp() ) | BICLoadOperation { kind=LoadStoreCDouble, address} => ( genCAddress address; genOpcode(opcode_loadCDouble, cvec); decsp(); decsp() ) | BICLoadOperation { kind=LoadStoreUntaggedUnsigned, address} => ( genMLAddress(address, Word.toInt wordSize); genOpcode(opcode_loadUntagged, cvec); decsp() ) | BICStoreOperation { kind=LoadStoreMLWord _, address, value } => ( genMLAddress(address, Word.toInt wordSize); gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeMLWord, cvec); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreMLByte _, address, value } => ( genMLAddress(address, 1); gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeMLByte, cvec); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreC8, address, value} => ( genCAddress address; gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeC8, cvec); decsp(); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreC16, address, value} => ( genCAddress address; gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeC16, cvec); decsp(); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreC32, address, value} => ( genCAddress address; gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeC32, cvec); decsp(); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreC64, address, value} => ( genCAddress address; gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeC64, cvec); decsp(); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreCFloat, address, value} => ( genCAddress address; gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeCFloat, cvec); decsp(); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreCDouble, address, value} => ( genCAddress address; gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeCDouble, cvec); decsp(); decsp(); decsp() ) | BICStoreOperation { kind=LoadStoreUntaggedUnsigned, address, value} => ( genMLAddress(address, Word.toInt wordSize); gencde (value, ToStack, NotEnd, loopAddr); genOpcode(opcode_storeUntagged, cvec); decsp(); decsp() ) | BICBlockOperation { kind=BlockOpMove{isByteMove=true}, sourceLeft, destRight, length } => ( genMLAddress(sourceLeft, 1); genMLAddress(destRight, 1); gencde (length, ToStack, NotEnd, loopAddr); genOpcode(opcode_blockMoveByte, cvec); decsp(); decsp(); decsp(); decsp() ) | BICBlockOperation { kind=BlockOpMove{isByteMove=false}, sourceLeft, destRight, length } => ( genMLAddress(sourceLeft, Word.toInt wordSize); genMLAddress(destRight, Word.toInt wordSize); gencde (length, ToStack, NotEnd, loopAddr); genOpcode(opcode_blockMoveWord, cvec); decsp(); decsp(); decsp(); decsp() ) | BICBlockOperation { kind=BlockOpEqualByte, sourceLeft, destRight, length } => ( genMLAddress(sourceLeft, 1); genMLAddress(destRight, 1); gencde (length, ToStack, NotEnd, loopAddr); genOpcode(opcode_blockEqualByte, cvec); decsp(); decsp(); decsp(); decsp() ) | BICBlockOperation { kind=BlockOpCompareByte, sourceLeft, destRight, length } => ( genMLAddress(sourceLeft, 1); genMLAddress(destRight, 1); gencde (length, ToStack, NotEnd, loopAddr); genOpcode(opcode_blockCompareByte, cvec); decsp(); decsp(); decsp(); decsp() ) | BICArbitrary { oper, arg1, arg2, ... } => let open BuiltIns val () = gencde (arg1, ToStack, NotEnd, loopAddr) val () = gencde (arg2, ToStack, NotEnd, loopAddr) in case oper of ArithAdd => genOpcode(opcode_arbAdd, cvec) | ArithSub => genOpcode(opcode_arbSubtract, cvec) | ArithMult => genOpcode(opcode_arbMultiply, cvec) | _ => raise InternalError "Unknown arbitrary precision operation"; decsp() (* Removes one item from the stack. *) end in (* body of gencde *) (* This ensures that there is precisely one item on the stack if whereto = ToStack and no items if whereto = NoResult. There are two points to note carefully here: (1) Negative stack adjustments are legal if we have exited. This is because matchFailFn can cut the stack back too far for its immediately enclosing expression. This is harmless because the code actually exits that expression. (2) A stack adjustment of ~1 is legal if we're generating a declaration in "ToStack" mode, because not all declarations actually generate the dummy value that we expect. This used to be handled in resetStack itself, but it's more transparent to do it here. (In addition, there was a bug in resetStack - it accumulated the stack resets, but didn't correctly accumulate these "~1" dummy value pushes.) It's all much better now. SPF 9/1/97 *) case whereto of ToStack => let val newsp = oldsp + 1; val adjustment = !realstackptr - newsp val () = if adjustment = 0 then () else if adjustment < ~1 then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment) (* Hack for declarations that should push values, but don't *) else if adjustment = ~1 then pushConst (DummyValue, cvec) else resetStack (adjustment, true, cvec) in realstackptr := newsp end | NoResult => let val adjustment = !realstackptr - oldsp val () = if adjustment = 0 then () else if adjustment < 0 then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment) else resetStack (adjustment, false, cvec) in realstackptr := oldsp end end (* gencde *) (* doNext is only used for mutually recursive functions where a function may not be able to fill in its closure if it does not have all the remaining declarations. *) (* TODO: This always creates the closure on the heap even when makeClosure is false. *) and genProc ({ closure=[], localCount, body, argTypes, name, ...}: bicLambdaForm, mutualDecs, doNext: unit -> unit) : unit = let (* Create a one word item for the closure. This is returned for recursive references and filled in with the address of the code when we've finished. *) val closure = makeConstantClosure() val newCode : code = codeCreate(name, parameters); (* Code-gen function. No non-local references. *) val () = codegen (body, newCode, closure, List.length argTypes, localCount, parameters); val () = pushConst(closureAsAddress closure, cvec); val () = incsp(); in if mutualDecs then doNext () else () end | genProc ({ localCount, body, name, argTypes, closure, ...}, mutualDecs, doNext) = let (* Full closure required. *) val resClosure = makeConstantClosure() val newCode = codeCreate (name, parameters) (* Code-gen function. *) val () = codegen (body, newCode, resClosure, List.length argTypes, localCount, parameters) val closureVars = List.length closure (* Size excluding the code address *) in if mutualDecs then let (* Have to make the closure now and fill it in later. *) val () = pushConst(toMachineWord resClosure, cvec) val () = genAllocMutableClosure(closureVars, cvec) val () = incsp () val entryAddr : int = !realstackptr val () = doNext () (* Any mutually recursive functions. *) (* Push the address of the vector - If we have processed other closures the vector will no longer be on the top of the stack. *) val () = pushLocalStackValue (~ entryAddr) (* Load items for the closure. *) fun loadItems ([], _) = () | loadItems (v :: vs, addr : int) = let (* Generate an item and move it into the clsoure *) val () = gencde (BICExtract v, ToStack, NotEnd, NONE) (* The closure "address" excludes the code address. *) val () = genMoveToMutClosure(addr, cvec) val () = decsp () in loadItems (vs, addr + 1) end val () = loadItems (closure, 0) val () = genLock cvec (* Lock it. *) (* Remove the extra reference. *) val () = resetStack (1, false, cvec) in realstackptr := !realstackptr - 1 end else let (* Put it on the stack. *) val () = pushConst (toMachineWord resClosure, cvec) val () = incsp () val () = List.app (fn pt => gencde (BICExtract pt, ToStack, NotEnd, NONE)) closure val () = genClosure (closureVars, cvec) in realstackptr := !realstackptr - closureVars end end and genCond (testCode, thenCode, elseCode, whereto, tailKind, loopAddr) = let (* andalso and orelse are turned into conditionals with constants. Convert this into a series of tests. *) fun genTest(BICConstnt(w, _), jumpOn, targetLabel) = let val cVal = case toShort w of 0w0 => false | 0w1 => true | _ => raise InternalError "genTest" in if cVal = jumpOn then putBranchInstruction (Jump, targetLabel, cvec) else () end | genTest(BICUnary { oper=BuiltIns.NotBoolean, arg1 }, jumpOn, targetLabel) = genTest(arg1, not jumpOn, targetLabel) | genTest(BICCond (testPart, thenPart, elsePart), jumpOn, targetLabel) = let val toElse = createLabel() and exitJump = createLabel() in genTest(testPart, false, toElse); genTest(thenPart, jumpOn, targetLabel); putBranchInstruction (Jump, exitJump, cvec); setLabel (toElse, cvec); genTest(elsePart, jumpOn, targetLabel); setLabel (exitJump, cvec) end | genTest(testCode, jumpOn, targetLabel) = ( gencde (testCode, ToStack, NotEnd, loopAddr); putBranchInstruction(if jumpOn then JumpTrue else JumpFalse, targetLabel, cvec); decsp() (* conditional branch pops a value. *) ) val toElse = createLabel() and exitJump = createLabel() val () = genTest(testCode, false, toElse) val () = gencde (thenCode, whereto, tailKind, loopAddr) (* Get rid of the result from the stack. If there is a result then the ``else-part'' will push it. *) val () = case whereto of ToStack => decsp () | NoResult => () val () = putBranchInstruction (Jump, exitJump, cvec) (* start of "else part" *) val () = setLabel (toElse, cvec) val () = gencde (elseCode, whereto, tailKind, loopAddr) val () = setLabel (exitJump, cvec) in () end (* genCond *) and genEval (eval, tailKind : tail) : unit = let val argList : backendIC list = List.map #1 (#argList eval) val argsToPass : int = List.length argList; (* Load arguments *) fun loadArgs [] = () | loadArgs (v :: vs) = let (* Push each expression onto the stack. *) val () = gencde(v, ToStack, NotEnd, NONE) in loadArgs vs end; (* Called after the args and the closure to call have been pushed onto the stack. *) fun callClosure () : unit = case tailKind of NotEnd => (* Normal call. *) genCallClosure cvec | EndOfProc => (* Tail recursive call. *) let (* Get the return address onto the top of the stack. *) val () = pushLocalStackValue 0 (* Slide the return address, closure and args over the old closure, return address and args, and reset the stack. Then jump to the closure. *) val () = genTailCall(argsToPass + 2, !realstackptr - 1 + (numOfArgs - argsToPass), cvec); (* It's "-1" not "-2", because we didn't bump the realstackptr when we pushed the return address. SPF 3/1/97 *) in () end (* Have to guarantee that the expression to return the function is evaluated before the arguments. *) (* Returns true if evaluating it later is safe. *) fun safeToLeave (BICConstnt _) = true | safeToLeave (BICLambda _) = true | safeToLeave (BICExtract _) = true | safeToLeave (BICField {base, ...}) = safeToLeave base | safeToLeave (BICLoadContainer {base, ...}) = safeToLeave base | safeToLeave _ = false val () = if (case argList of [] => true | _ => safeToLeave (#function eval)) then let (* Can load the args first. *) val () = loadArgs argList in gencde (#function eval, ToStack, NotEnd, NONE) end else let (* The expression for the function is too complicated to risk leaving. It might have a side-effect and we must ensure that any side-effects it has are done before the arguments are loaded. *) val () = gencde(#function eval, ToStack, NotEnd, NONE); val () = loadArgs(argList); (* Load the function again. *) val () = genLocal(argsToPass, cvec); in incsp () end val () = callClosure () (* Call the function. *) (* Make sure we interpret when we return from the call *) val () = genEnterIntCall (cvec, argsToPass) in (* body of genEval *) realstackptr := !realstackptr - argsToPass (* Args popped by caller. *) end (* Generate the function. *) (* Assume we always want a result. There is otherwise a problem if the called routine returns a result of type void (i.e. no result) but the caller wants a result (e.g. the identity function). *) val () = gencde (pt, ToStack, EndOfProc, NONE) val () = genReturn (numOfArgs, cvec); in (* body of codegen *) (* Having code-generated the body of the function, it is copied into a new data segment. *) copyCode{code = cvec, maxStack = !maxStack, resultClosure=resultClosure, numberOfArguments=numOfArgs} end (* codegen *); fun gencodeLambda({ name, body, argTypes, localCount, ...}:bicLambdaForm, parameters, closure) = let (* make the code buffer for the new function. *) val newCode : code = codeCreate (name, parameters) (* This function must have no non-local references. *) in codegen (body, newCode, closure, List.length argTypes, localCount, parameters) end local val makeEntryPoint: string -> machineWord = RunCall.rtsCallFull1 "PolyCreateEntryPointObject" fun rtsCall makeCall (entryName: string, numOfArgs, debugArgs: Universal.universal list): machineWord = let open Address val cvec = codeCreate (entryName, debugArgs) val entryPointAddr = makeEntryPoint entryName (* Each argument is at the same offset, essentially we're just shifting them *) fun genLocals 0 = () | genLocals n = (genLocal(numOfArgs +1, cvec); genLocals (n-1)) val () = genLocals numOfArgs val () = pushConst(entryPointAddr, cvec) val () = makeCall(numOfArgs, cvec) val () = genReturn (numOfArgs, cvec) val closure = makeConstantClosure() val () = copyCode{code=cvec, maxStack=numOfArgs+1, numberOfArguments=numOfArgs, resultClosure=closure} in closureAsAddress closure end in structure Foreign = struct val rtsCallFast = rtsCall genRTSCallFast fun rtsCallFastRealtoReal(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastRealtoReal c) (entryName, 1, debugArgs) and rtsCallFastRealRealtoReal(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastRealRealtoReal c) (entryName, 2, debugArgs) and rtsCallFastGeneraltoReal(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastGeneraltoReal c) (entryName, 1, debugArgs) and rtsCallFastRealGeneraltoReal(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastRealGeneraltoReal c) (entryName, 2, debugArgs) fun rtsCallFastFloattoFloat(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastFloattoFloat c) (entryName, 1, debugArgs) and rtsCallFastFloatFloattoFloat(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastFloatFloattoFloat c) (entryName, 2, debugArgs) and rtsCallFastGeneraltoFloat(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastGeneraltoFloat c) (entryName, 1, debugArgs) and rtsCallFastFloatGeneraltoFloat(entryName, debugArgs) = rtsCall (fn (_, c) => genRTSCallFastFloatGeneraltoFloat c) (entryName, 2, debugArgs) type abi = int (* This must match the type in Foreign.LowLevel. Once this is bootstrapped we could use that type but note that this is the type we use within the compiler and we build Foreign.LowLevel AFTER compiling this. *) datatype cTypeForm = CTypeFloatingPt | CTypePointer | CTypeSignedInt | CTypeUnsignedInt | CTypeStruct of cType list | CTypeVoid withtype cType = { typeForm: cTypeForm, align: word, size: word } val abiList: unit -> (string * abi) list = RunCall.rtsCallFull0 "PolyInterpretedGetAbiList" type cif = Foreign.Memory.voidStar val createCIF: abi * cType * cType list -> cif= RunCall.rtsCallFull3 "PolyInterpretedCreateCIF" val callCFunction: cif * LargeWord.word * LargeWord.word * LargeWord.word -> unit = RunCall.rtsCallFull4 "PolyInterpretedCallFunction" (* foreignCall returns a function that actually calls the foreign function. *) fun foreignCall(abi, argTypes, resultType) = let val memocif = Foreign.Memory.memoise (fn () => createCIF(abi, resultType, argTypes)) () val closure = makeConstantClosure() (* For compatibility with the native code version we have to construct a function that takes three arguments rather than a single triple. *) val bodyCode = BICEval{function=BICConstnt(toMachineWord callCFunction, []), argList=[ (BICTuple[ BICEval{ function=BICConstnt(toMachineWord memocif, []), argList=[(BICConstnt(toMachineWord 0, []), GeneralType)], (* Unit. *) resultType=GeneralType }, BICExtract(BICLoadArgument 0), BICExtract(BICLoadArgument 2), BICExtract(BICLoadArgument 1)], GeneralType) ], resultType=GeneralType} val lambdaCode = { body = bodyCode, name = "foreignCall", closure=[], argTypes=[GeneralType, GeneralType, GeneralType], resultType = GeneralType, localCount=0} val () = gencodeLambda(lambdaCode, [], closure) in closureAsAddress closure end fun buildCallBack((*abi*) _, (*argTypes*) _, (*resultType*)_) = let fun buildClosure ((*mlFun*)_: LargeWord.word*LargeWord.word -> unit) = (* The result is the SysWord.word holding the C function. *) raise Foreign.Foreign "foreignCall not implemented" in Address.toMachineWord buildClosure end end end structure Sharing = struct open BackendTree.Sharing type closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML b/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML index 815defde..d46502c3 100644 --- a/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML +++ b/mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML @@ -1,610 +1,613 @@ (* - Copyright (c) 2012,13,15-21 David C.J. Matthews + Copyright (c) 2012,13,15-22 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor CodeTreeConstruction ( structure Debug: DEBUG structure Pretty : PRETTY structure BaseCodeTree: BASECODETREE structure CodetreeFunctions: CODETREEFUNCTIONS structure Backend: sig type codetree type machineWord = Address.machineWord val codeGenerate: codetree * int * Universal.universal list -> (unit -> machineWord) * Universal.universal list structure Foreign: FOREIGNCALL structure Sharing : sig type codetree = codetree end end structure Optimiser: sig type codetree and envSpecial and codeBinding val codetreeOptimiser: codetree * Universal.universal list * int -> { numLocals: int, general: codetree, bindings: codeBinding list, special: envSpecial } structure Sharing: sig type codetree = codetree and envSpecial = envSpecial and codeBinding = codeBinding end end sharing type Pretty.pretty = BaseCodeTree.pretty sharing BaseCodeTree.Sharing = CodetreeFunctions.Sharing = Backend.Sharing = Optimiser.Sharing ) : CODETREE = struct open Address open StretchArray open BaseCodeTree open Pretty open CodetreeFunctions exception InternalError = Misc.InternalError and Interrupt = Thread.Thread.Interrupt infix 9 sub; fun mkDec (laddr, res) = Declar{value = res, addr = laddr, use=[]} fun deExtract(Extract ext) = ext | deExtract _ = raise InternalError "deExtract" datatype level = Level of { lev: int, closure: createClosure, lookup: int * int * bool -> loadForm } local (* We can have locals at the outer level. *) fun bottomLevel(addr, 0, false) = if addr < 0 then raise InternalError "load: negative" else LoadLocal addr | bottomLevel _ = (* Either the level is wrong or it's a parameter. *) raise InternalError "bottom level" in val baseLevel = Level { lev = 0, closure = makeClosure(), lookup = bottomLevel } end fun newLevel (Level{ lev, lookup, ...}) = let val closureList = makeClosure() val makeClosure = addToClosure closureList fun check n = if n < 0 then raise InternalError "load: negative" else n fun thisLevel(addr, level, isParam) = if level < 0 then raise InternalError "mkLoad: level must be non-negative" else if level > 0 then makeClosure(lookup(addr, level-1, isParam)) else (* This level *) if isParam then LoadArgument(check addr) else LoadLocal(check addr) in Level { lev = lev+1, closure = closureList, lookup = thisLevel } end fun getClosure(Level{ closure, ...}) = List.map Extract (extractClosure closure) fun mkLoad (addr, Level { lev = newLevel, lookup, ... } , Level { lev = oldLevel, ... }) = Extract(lookup(addr, newLevel - oldLevel, false)) and mkLoadParam(addr, Level { lev = newLevel, lookup, ... } , Level { lev = oldLevel, ... }) = Extract(lookup(addr, newLevel - oldLevel, true)) (* Transform a function so that free variables are converted to closure form. Returns the maximum local address used. *) fun genCode(pt, debugSwitches, numLocals) = let val printCodeTree = Debug.getParameter Debug.codetreeTag debugSwitches and compilerOut = Pretty.getCompilerOutput debugSwitches (* val printCodeTree = true and compilerOut = PRETTY.prettyPrint(TextIO.print, 70) *) (* If required, print it first. This is the code that the front-end has produced. *) val () = if printCodeTree then compilerOut(pretty pt) else () (* This ensures that everything is printed just before it is code-generated. *) fun codeAndPrint(code, localCount) = let val () = if printCodeTree then compilerOut (BaseCodeTree.pretty code) else (); in Backend.codeGenerate(code, localCount, debugSwitches) end (* Optimise it. *) val { numLocals = localCount, general = gen, bindings = decs, special = spec } = Optimiser.codetreeOptimiser(pt, debugSwitches, numLocals) (* At this stage we have a "general" value and also, possibly a "special" value. We could simply create mkEnv(decs, gen) and run preCode and genCode on that. However, we would lose the ability to insert any inline functions from this code into subsequent top-level expressions. We can't simply retain the "special" entry either because that may refer to values that have to be created once when the code is run. Such values will be referenced by "load" entries which refer to entries in the "decs". We construct a tuple which will contain the actual values after the code is run. Then if we want the value at some time in the future when we use something from the "special" entry we can extract the corresponding value from this tuple. Previously, this code always generated a tuple containing every declaration. That led to some very long compilation times because the back-end has some code which is quadratic in the number of entries on the stack. We now try to prune bindings by only generating the tuple if we have an inline function somewhere and only generating bindings we actually need. *) fun simplifySpec (EnvSpecTuple(size, env)) = let (* Get all the field entries. *) fun simpPair (gen, spec) = (gen, simplifySpec spec) val fields = List.tabulate(size, simpPair o env) in if List.all(fn (_, EnvSpecNone) => true | _ => false) fields then EnvSpecNone else EnvSpecTuple(size, fn n => List.nth(fields, n)) end | simplifySpec s = s (* None or inline function. *) in case simplifySpec spec of EnvSpecNone => let val (code, props) = codeAndPrint (mkEnv(decs, gen), localCount) in fn () => Constnt(code(), props) end | simpleSpec => let (* The bindings are marked using a three-valued mark. A binding is needed if it is referenced in any way. During the scan to find the references we need to avoid processing an entry that has already been processed but it is possible that a binding may be referenced as a general value only (e.g. from a function closure) and separately as a special value. See Test148.ML *) datatype visit = UnVisited | VisitedGeneral | VisitedSpecial local val refArray = Array.array(localCount, UnVisited) fun findDecs EnvSpecNone = () | findDecs (EnvSpecTuple(size, env)) = let val fields = List.tabulate(size, env) in List.app processGenAndSpec fields end | findDecs (EnvSpecInlineFunction({closure, ...}, env)) = let val closureItems = List.tabulate(List.length closure, env) in List.app processGenAndSpec closureItems end | findDecs (EnvSpecUnary _) = () | findDecs (EnvSpecBinary _) = () and processGenAndSpec (gen, spec) = (* The spec part needs only to be processed if this entry has not yet been visited, *) case gen of EnvGenLoad(LoadLocal addr) => let val previous = Array.sub(refArray, addr) in case (previous, spec) of (VisitedSpecial, _) => () (* Fully done *) | (VisitedGeneral, EnvSpecNone) => () (* Nothing useful *) | (_, EnvSpecNone) => (* We need this entry but we don't have any special entry to process. We could find another reference with a special entry. *) Array.update(refArray, addr, VisitedGeneral) | (_, _) => ( (* This has a special entry. Mark it and process. *) Array.update(refArray, addr, VisitedSpecial); findDecs spec ) end | EnvGenConst _ => () | _ => raise InternalError "doGeneral: not LoadLocal or Constant" val () = findDecs simpleSpec in (* Convert to an immutable data structure. This will continue to be referenced in any inline function after the code has run. *) val refVector = Array.vector refArray end val decArray = Array.array(localCount, CodeZero) fun addDec(addr, dec) = if Vector.sub(refVector, addr) <> UnVisited then Array.update(decArray, addr, dec) else () fun addDecs(Declar{addr, ...}) = addDec(addr, mkLoadLocal addr) | addDecs(RecDecs decs) = List.app(fn {addr, ...} => addDec(addr, mkLoadLocal addr)) decs | addDecs(NullBinding _) = () | addDecs(Container{addr, size, ...}) = addDec(addr, mkTupleFromContainer(addr, size)) val () = List.app addDecs decs (* Construct the tuple and add the "general" value at the start. *) val resultTuple = mkTuple(gen :: Array.foldr(op ::) nil decArray) (* Now generate the machine code and return it as a function that can be called. *) val (code, codeProps) = codeAndPrint (mkEnv (decs, resultTuple), localCount) in (* Return a function that executes the compiled code and then creates the final "global" value as the result. *) fn () => let local (* Execute the code. This will perform any side-effects the user has programmed and may raise an exception if that is required. *) val resVector = code () (* The result is a vector containing the "general" value as the first word and the evaluated bindings for any "special" entries in subsequent words. *) val decVals : address = if isShort resVector then raise InternalError "Result vector is not an address" else toAddress resVector in fun resultWordN n = loadWord (decVals, n) (* Get the general value, the zero'th entry in the vector. *) val generalVal = resultWordN 0w0 (* Get the properties for a field in the tuple. Because the result is a tuple all the properties should be contained in a tupleTag entry. *) val fieldProps = case Option.map (Universal.tagProject CodeTags.tupleTag) (List.find(Universal.tagIs CodeTags.tupleTag) codeProps) of NONE => (fn _ => []) | SOME p => (fn n => List.nth(p, n)) val generalProps = fieldProps 0 end (* Construct a new environment so that when an entry is looked up the corresponding constant is returned. *) fun newEnviron (oldEnv) args = let val (oldGeneral, oldSpecial) = oldEnv args val genPair = case oldGeneral of EnvGenLoad(LoadLocal addr) => ( (* For the moment retain this check. It's better to have an assertion failure than a segfault. *) Vector.sub(refVector, addr) <> UnVisited orelse raise InternalError "Reference to non-existent binding"; (resultWordN(Word.fromInt addr+0w1), fieldProps(addr+1)) ) | EnvGenConst c => c | _ => raise InternalError "codetree newEnviron: Not Extract or Constnt" val specVal = mapSpec oldSpecial in (EnvGenConst genPair, specVal) end and mapSpec EnvSpecNone = EnvSpecNone | mapSpec (EnvSpecTuple(size, env)) = EnvSpecTuple(size, newEnviron env) | mapSpec (EnvSpecInlineFunction(spec, env)) = EnvSpecInlineFunction(spec, (newEnviron env)) | mapSpec (EnvSpecUnary _) = EnvSpecNone | mapSpec (EnvSpecBinary _) = EnvSpecNone in (* and return the whole lot as a global value. *) Constnt(generalVal, setInline(mapSpec simpleSpec) generalProps) end end end (* genCode *) (* Constructor functions for the front-end of the compiler. *) local fun mkSimpleFunction inlineType (lval, args, name, closure, numLocals) = { body = lval, isInline = inlineType, name = if name = "" then "" else name, closure = map deExtract closure, argTypes = List.tabulate(args, fn _ => (GeneralType, [])), resultType = GeneralType, localCount = numLocals, recUse = [] } in val mkProc = Lambda o mkSimpleFunction DontInline (* Normal function *) and mkInlproc = Lambda o mkSimpleFunction InlineAlways (* Explicitly inlined by the front-end *) (* Unless Compiler.inlineFunctor is false functors are treated as macros and expanded when they are applied. Unlike core-language functions they are not first-class values so if they are inline the "value" returned in the initial binding can just be zero except if there is something in the closure. Almost always the closure will be empty since free variables will come from previous topdecs and will be constants, The exception is if a structure and a functor using the structure appear in the same topdec (no semicolon between them). In that case we can't leave it. We would have to update the closure even if we leave the body untouched but we could have closure entries that are constants. e.g. structure S = struct val x = 1 end functor F() = struct open S end *) fun mkMacroProc (args as (_, _, _, [], _)) = Constnt(toMachineWord 0, setInline ( EnvSpecInlineFunction(mkSimpleFunction InlineAlways args, fn _ => raise InternalError "mkMacroProc: closure")) []) | mkMacroProc args = Lambda(mkSimpleFunction InlineAlways args) end local fun mkFunWithTypes inlineType { body, argTypes=argsAndTypes, resultType, name, closure, numLocals } = Lambda { body = body, isInline = inlineType, name = if name = "" then "" else name, closure = map deExtract closure, argTypes = map (fn t => (t, [])) argsAndTypes, resultType = resultType, localCount = numLocals, recUse = [] } in val mkFunction = mkFunWithTypes DontInline and mkInlineFunction = mkFunWithTypes InlineAlways end fun mkEval (ct, clist) = Eval { function = ct, argList = List.map(fn c => (c, GeneralType)) clist, resultType=GeneralType } fun mkCall(func, argsAndTypes, resultType) = Eval { function = func, argList = argsAndTypes, resultType=resultType } (* Basic built-in operations. *) fun mkUnary (oper, arg1) = Unary { oper = oper, arg1 = arg1 } and mkBinary (oper, arg1, arg2) = Binary { oper = oper, arg1 = arg1, arg2 = arg2 } val getCurrentThreadId = Nullary{oper=BuiltIns.GetCurrentThreadId} val getCurrentThreadIdFn = mkInlproc(getCurrentThreadId, 1 (* Ignores argument *), "GetThreadId()", [], 0) - - val checkRTSException = Nullary{oper=BuiltIns.CheckRTSException} + + val cpuPause = Nullary{oper=BuiltIns.CPUPause} + val cpuPauseFn = + mkInlproc(cpuPause, 1 (* Ignores argument *), "CPUPause()", [], 0) + val createMutex = Nullary{oper=BuiltIns.CreateMutex} val createMutexFn = mkInlproc(createMutex, 1 (* Ignores argument *), "CreateMutex()", [], 0) fun mkAllocateWordMemory (numWords, flags, initial) = AllocateWordMemory { numWords = numWords, flags = flags, initial = initial } val mkAllocateWordMemoryFn = mkInlproc( mkAllocateWordMemory(mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0), mkInd(2, mkLoadArgument 0)), 1, "AllocateWordMemory()", [], 0) (* Builtins wrapped as functions. N.B. These all take a single argument which may be a tuple. *) fun mkUnaryFn oper = mkInlproc(mkUnary(oper, mkLoadArgument 0), 1, BuiltIns.unaryRepr oper ^ "()", [], 0) and mkBinaryFn oper = mkInlproc(mkBinary(oper, mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0)), 1, BuiltIns.binaryRepr oper ^ "()", [], 0) local open BuiltIns (* Word equality. The value of isSigned doesn't matter. *) val eqWord = WordComparison{test=TestEqual, isSigned=false} in fun mkNot arg = Unary{oper=NotBoolean, arg1=arg} and mkIsShort arg = Unary{oper=IsTaggedValue, arg1=arg} and mkEqualTaggedWord (arg1, arg2) = Binary{oper=eqWord, arg1=arg1, arg2=arg2} and mkEqualPointerOrWord (arg1, arg2) = Binary{oper=PointerEq, arg1=arg1, arg2=arg2} val equalTaggedWordFn = (* This takes two words, not a tuple. *) mkInlproc(mkBinary(eqWord, mkLoadArgument 0, mkLoadArgument 1), 2, "EqualWord()", [], 0) and equalPointerOrWordFn = (* This takes two words, not a tuple. *) mkInlproc(mkBinary(PointerEq, mkLoadArgument 0, mkLoadArgument 1), 2, "EqualWord()", [], 0) end fun mkLoadOperation(oper, base, index) = LoadOperation{kind=oper, address={base=base, index=SOME index, offset=0}} fun mkLoadOperationFn oper = mkInlproc(mkLoadOperation(oper, mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0)), 1, "loadOperation()", [], 0) fun mkStoreOperation(oper, base, index, value) = StoreOperation{kind=oper, address={base=base, index=SOME index, offset=0}, value=value} fun mkStoreOperationFn oper = mkInlproc(mkStoreOperation(oper, mkInd(0, mkLoadArgument 0), mkInd(1, mkLoadArgument 0), mkInd(2, mkLoadArgument 0)), 1, "storeOperation()", [], 0) fun mkBlockOperation {kind, leftBase, leftIndex, rightBase, rightIndex, length } = BlockOperation { kind = kind, sourceLeft={base=leftBase, index=SOME leftIndex, offset=0}, destRight={base=rightBase, index=SOME rightIndex, offset=0}, length=length} (* Construct a function that takes five arguments. The order is left-base, right-base, left-index, right-index, length. *) fun mkBlockOperationFn kind = mkInlproc( mkBlockOperation{kind=kind, leftBase=mkInd(0, mkLoadArgument 0), rightBase=mkInd(1, mkLoadArgument 0), leftIndex=mkInd(2, mkLoadArgument 0), rightIndex=mkInd(3, mkLoadArgument 0), length=mkInd(4, mkLoadArgument 0)}, 1, "blockOperation()", [], 0) fun identityFunction (name : string) : codetree = mkInlproc (mkLoadArgument 0, 1, name, [], 0) (* Returns its argument. *); (* Test a tag value. *) fun mkTagTest(test: codetree, tagValue: word, maxTag: word) = TagTest {test=test, tag=tagValue, maxTag=maxTag } fun mkHandle (exp, handler, exId) = Handle {exp = exp, handler = handler, exPacketAddr = exId} fun mkStr (strbuff:string) = Constnt (toMachineWord strbuff, []) (* If we have multiple references to a piece of code we may have to save it in a temporary and then use it from there. If the code has side-effects we certainly must do that to ensure that the side-effects are done exactly once and in the correct order, however if the code is just a constant or a load we can reduce the amount of code we generate by simply returning the original code. *) fun multipleUses (code as Constnt _, _, _) = {load = (fn _ => code), dec = []} (* | multipleUses (code as Extract(LoadLegacy{addr, level=loadLevel, ...}), _, level) = let (* May have to adjust the level. *) fun loadFn lev = if lev = level then code else mkLoad (addr, loadLevel + lev, level)) in {load = loadFn, dec = []} end | multipleUses (code as Extract(LoadLocal addr), _, level) = let (* May have to adjust the level. *) fun loadFn lev = if lev = level then code else mkLoad (addr, lev - level) in {load = loadFn, dec = []} end | multipleUses (code as Extract(LoadArgument _), _, level) = let (* May have to adjust the level. *) fun loadFn lev = if lev = level then code else raise InternalError "multipleUses: different level" (*else mkLoad (addr, lev - level)*) in {load = loadFn, dec = []} end | multipleUses (Extract _, _, _) = raise InternalError "multipleUses: TODO" *) | multipleUses (code, nextAddress, level) = let val addr = nextAddress(); fun loadFn lev = mkLoad (addr, lev, level); in {load = loadFn, dec = [mkDec (addr, code)]} end (* multipleUses *); fun mkMutualDecs [] = raise InternalError "mkMutualDecs: empty declaration list" | mkMutualDecs l = let fun convertDec(a, Lambda lam) = {lambda = lam, addr = a, use=[]} | convertDec _ = raise InternalError "mkMutualDecs: Recursive declaration is not a function" in RecDecs(List.map convertDec l) end val mkNullDec = NullBinding fun mkContainer(addr, size, setter) = Container{addr=addr, size=size, use=[], setter=setter} val mkIf = Cond and mkRaise = Raise fun mkConst v = Constnt(v, []) (* For the moment limit these to general arguments. *) fun mkLoop args = Loop (List.map(fn c => (c, GeneralType)) args) and mkBeginLoop(exp, args) = BeginLoop{loop=exp, arguments=List.map(fn(i, v) => ({value=v, addr=i, use=[]}, GeneralType)) args} fun mkWhile(b, e) = (* Generated as if b then (e; ) else (). *) mkBeginLoop(mkIf(b, mkEnv([NullBinding e], mkLoop[]), CodeZero), []) (* We previously had conditional-or and conditional-and as separate instructions. I've taken them out since they can be implemented just as efficiently as a normal conditional. In addition they were interfering with the optimisation where the second expression contained the last reference to something. We needed to add a "kill entry" to the other branch but there wasn't another branch to add it to. DCJM 7/12/00. *) fun mkCor(xp1, xp2) = mkIf(xp1, CodeTrue, xp2); fun mkCand(xp1, xp2) = mkIf(xp1, xp2, CodeZero); val mkSetContainer = fn (container, tuple, size) => mkSetContainer(container, tuple, BoolVector.tabulate(size, fn _ => true)) (* We don't generate the +, -, < etc operations directly here. Instead we create functions that the basis library can use to create the final versions by applying these functions to the arguments and an RTS function. The inline expansion system takes care of all the optimisation. An arbitrary precision operation takes a tuple consisting of a pair of arguments and a function. The code that is constructed checks both arguments to see if they are short. If they are not or the short precision operation overflows the code to call the function is executed. *) local val argX = mkInd(0, mkLoadArgument 0) and argY = mkInd(1, mkLoadArgument 0) val testShort = mkCand(mkIsShort argX, mkIsShort argY) val longCall = mkEval(mkInd(2, mkLoadArgument 0), [mkTuple[argX, argY]]) in fun mkArbitraryFn (oper as ArbArith arith) = mkInlproc( Arbitrary{oper=oper, shortCond=testShort, arg1=argX, arg2=argY, longCall=longCall }, 1, "Arbitrary" ^ BuiltIns.arithRepr arith ^ "()", [], 0) | mkArbitraryFn (oper as ArbCompare test) = (* The long function here is PolyCompareArbitrary which returns -1,0,+1 so the result has to be compared with zero. *) let val comparedResult = Binary{oper=BuiltIns.WordComparison{test=test, isSigned=true}, arg1=longCall, arg2=CodeZero} in mkInlproc( Arbitrary{oper=oper, shortCond=testShort, arg1=argX, arg2=argY, longCall=comparedResult }, 1, "Arbitrary" ^ BuiltIns.testRepr test ^ "()", [], 0) end end structure Foreign = Backend.Foreign structure Sharing = struct type machineWord = machineWord type codetree = codetree type pretty = pretty type argumentType=argumentType type codeBinding = codeBinding type level = level end end (* CODETREE functor body *); diff --git a/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML b/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML index 1ad5b27f..67bbe391 100644 --- a/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML +++ b/mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML @@ -1,625 +1,625 @@ (* - Copyright (c) 2012,13,16,18-21 David C.J. Matthews + Copyright (c) 2012,13,16,18-22 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Miscellaneous construction and operation functions on the code-tree. *) functor CodetreeFunctions( structure BaseCodeTree: BASECODETREE structure Strongly: sig val stronglyConnectedComponents: {nodeAddress: 'a -> int, arcs: 'a -> int list } -> 'a list -> 'a list list end ) : CODETREEFUNCTIONS = struct open BaseCodeTree open Strongly open Address exception InternalError = Misc.InternalError fun mkEnv([], exp) = exp | mkEnv(decs, exp) = Newenv(decs, exp) val word0 = toMachineWord 0 and word1 = toMachineWord 1 val False = word0 and True = word1 val F_mutable_words : Word8.word = Word8.orb (F_words, F_mutable) val CodeFalse = Constnt(False, []) and CodeTrue = Constnt(True, []) and CodeZero = Constnt(word0, []) (* Properties of code. This indicates the extent to which the code has side-effects (i.e. where even if the result is unused the code still needs to be produced) or is applicative (i.e. where its value depends only arguments and can safely be reordered). *) (* The RTS has a table of properties for RTS functions. The 103 call returns these Or-ed into the register mask. *) val PROPWORD_NORAISE = 0wx40000000 and PROPWORD_NOUPDATE = 0wx20000000 and PROPWORD_NODEREF = 0wx10000000 (* Since RTS calls are being eliminated leave residual versions of these. *) fun earlyRtsCall _ = false and sideEffectFreeRTSCall _ = false local infix orb andb val op orb = Word.orb and op andb = Word.andb val noSideEffect = PROPWORD_NORAISE orb PROPWORD_NOUPDATE val applicative = noSideEffect orb PROPWORD_NODEREF in fun codeProps (Lambda _) = applicative | codeProps (Constnt _) = applicative | codeProps (Extract _) = applicative | codeProps (TagTest{ test, ... }) = codeProps test | codeProps (Cond(i, t, e)) = codeProps i andb codeProps t andb codeProps e | codeProps (Newenv(decs, exp)) = List.foldl (fn (d, r) => bindingProps d andb r) (codeProps exp) decs | codeProps (Handle { exp, handler, ... }) = (* A handler processes all the exceptions in the body *) (codeProps exp orb PROPWORD_NORAISE) andb codeProps handler | codeProps (Tuple { fields, ...}) = testList fields | codeProps (Indirect{base, ...}) = codeProps base (* A built-in function may be side-effect free. This can occur if we have, for example, "if exp1 orelse exp2" where exp2 can be reduced to "true", typically because it's inside an inline function and some of the arguments to the function are constants. This then gets converted to (exp1; true) and we can eliminate exp1 if it is simply a comparison. *) | codeProps (Unary{oper, arg1}) = let open BuiltIns val operProps = case oper of NotBoolean => applicative | IsTaggedValue => applicative | MemoryCellLength => applicative (* MemoryCellFlags could return a different result if a mutable cell was locked. *) | MemoryCellFlags => applicative | ClearMutableFlag => Word.orb(PROPWORD_NODEREF, PROPWORD_NORAISE) | LongWordToTagged => applicative | SignedToLongWord => applicative | UnsignedToLongWord => applicative | RealAbs _ => applicative (* Does not depend on rounding setting. *) | RealNeg _ => applicative (* Does not depend on rounding setting. *) (* If we float a 64-bit int to a 64-bit floating point value we may lose precision so this depends on the current rounding mode. *) | RealFixedInt _ => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) | FloatToDouble => applicative (* This also depends on the current rounding mode. *) | DoubleToFloat => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) (* May raise the overflow exception *) | RealToInt _ => PROPWORD_NOUPDATE orb PROPWORD_NODEREF | TouchAddress => PROPWORD_NORAISE (* Treat as updating a notional reference count. *) | AllocCStack => PROPWORD_NORAISE | LockMutex => PROPWORD_NORAISE | TryLockMutex => PROPWORD_NORAISE | UnlockMutex => PROPWORD_NORAISE in operProps andb codeProps arg1 end | codeProps (Binary{oper, arg1, arg2}) = let open BuiltIns val mayRaise = PROPWORD_NOUPDATE orb PROPWORD_NODEREF val operProps = case oper of WordComparison _ => applicative | FixedPrecisionArith _ => mayRaise | WordArith _ => applicative (* Quot and Rem don't raise exceptions - zero checking is done before. *) | WordLogical _ => applicative | WordShift _ => applicative | AllocateByteMemory => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) (* Allocation returns a different value on each call. *) | LargeWordComparison _ => applicative | LargeWordArith _ => applicative (* Quot and Rem don't raise exceptions - zero checking is done before. *) | LargeWordLogical _ => applicative | LargeWordShift _ => applicative | RealComparison _ => applicative (* Real arithmetic operations depend on the current rounding setting. *) | RealArith _ => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) | FreeCStack => PROPWORD_NORAISE orb PROPWORD_NODEREF | PointerEq => applicative in operProps andb codeProps arg1 andb codeProps arg2 end | codeProps (Nullary{oper=BuiltIns.GetCurrentThreadId}) = Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) - | codeProps (Nullary{oper=BuiltIns.CheckRTSException}) = PROPWORD_NOUPDATE + | codeProps (Nullary{oper=BuiltIns.CPUPause}) = PROPWORD_NORAISE | codeProps (Nullary{oper=BuiltIns.CreateMutex}) = Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) | codeProps (Arbitrary{shortCond, arg1, arg2, longCall, ...}) = (* Arbitrary precision operations are applicative but the longCall is a function call. It should never have a side-effect so it might be better to remove it. *) codeProps shortCond andb codeProps arg1 andb codeProps arg2 andb codeProps longCall | codeProps (AllocateWordMemory {numWords, flags, initial}) = let val operProps = Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) in operProps andb codeProps numWords andb codeProps flags andb codeProps initial end | codeProps (Eval _) = 0w0 | codeProps(Raise exp) = codeProps exp andb (Word.notb PROPWORD_NORAISE) (* Treat these as unsafe at least for the moment. *) | codeProps(BeginLoop _) = 0w0 | codeProps(Loop _) = 0w0 | codeProps (SetContainer _) = 0w0 | codeProps (LoadOperation {address, kind}) = let val operProps = case kind of LoadStoreMLWord {isImmutable=true} => applicative | LoadStoreMLByte {isImmutable=true} => applicative | _ => Word.orb(PROPWORD_NOUPDATE, PROPWORD_NORAISE) in operProps andb addressProps address end | codeProps (StoreOperation {address, value, ...}) = Word.orb(PROPWORD_NODEREF, PROPWORD_NORAISE) andb addressProps address andb codeProps value | codeProps (BlockOperation {kind, sourceLeft, destRight, length}) = let val operProps = case kind of BlockOpMove _ => PROPWORD_NORAISE | BlockOpEqualByte => applicative | BlockOpCompareByte => applicative in operProps andb addressProps sourceLeft andb addressProps destRight andb codeProps length end and testList t = List.foldl(fn (c, r) => codeProps c andb r) applicative t and bindingProps(Declar{value, ...}) = codeProps value | bindingProps(RecDecs _) = applicative (* These should all be lambdas *) | bindingProps(NullBinding c) = codeProps c | bindingProps(Container{setter, ...}) = codeProps setter and addressProps{base, index=NONE, ...} = codeProps base | addressProps{base, index=SOME index, ...} = codeProps base andb codeProps index (* sideEffectFree - does not raise an exception or make an assignment. *) fun sideEffectFree c = (codeProps c andb noSideEffect) = noSideEffect (* reorderable - does not raise an exception or access a reference. *) and reorderable c = codeProps c = applicative end (* Return the inline property if it is set. *) fun findInline [] = EnvSpecNone | findInline (h::t) = if Universal.tagIs CodeTags.inlineCodeTag h then Universal.tagProject CodeTags.inlineCodeTag h else findInline t (* Makes a constant value from an expression which is known to be constant but may involve inline functions, tuples etc. *) fun makeConstVal (cVal:codetree) = let fun makeVal (c as Constnt _) = c (* should just be a tuple *) (* Get a vector, copy the entries into it and return it as a constant. *) | makeVal (Tuple {fields= [], ...}) = CodeZero (* should have been optimised already! *) | makeVal (Tuple {fields, ...}) = let val tupleSize = List.length fields val vec : address = allocWordData(Word.fromInt tupleSize, F_mutable_words, word0) val fieldCode = map makeVal fields fun copyToVec ([], _) = [] | copyToVec (Constnt(w, prop) :: t, locn) = ( assignWord (vec, locn, w); prop :: copyToVec (t, locn + 0w1) ) | copyToVec _ = raise InternalError "not constant" val props = copyToVec(fieldCode, 0w0) (* If any of the constants have properties create a tuple property for the result. *) val tupleProps = if List.all null props then [] else let (* We also need to construct an EnvSpecTuple property because findInline does not look at tuple properties. *) val inlineProps = map findInline props val inlineProp = if List.all (fn EnvSpecNone => true | _ => false) inlineProps then [] else let fun tupleEntry n = (EnvGenConst(loadWord(vec, Word.fromInt n), List.nth(props, n)), List.nth(inlineProps, n)) in [Universal.tagInject CodeTags.inlineCodeTag (EnvSpecTuple(tupleSize, tupleEntry))] end in Universal.tagInject CodeTags.tupleTag props :: inlineProp end in lock vec; Constnt(toMachineWord vec, tupleProps) end | makeVal _ = raise InternalError "makeVal - not constant or tuple" in makeVal cVal end local fun allConsts [] = true | allConsts (Constnt _ :: t) = allConsts t | allConsts _ = false fun mkRecord isVar xp = let val tuple = Tuple{fields = xp, isVariant = isVar } in if allConsts xp then (* Make it now. *) makeConstVal tuple else tuple end; in val mkTuple = mkRecord false and mkDatatype = mkRecord true end (* Set the inline property. If the property is already present it is replaced. If the property we are setting is EnvSpecNone no property is set. *) fun setInline p (h::t) = if Universal.tagIs CodeTags.inlineCodeTag h then setInline p t else h :: setInline p t | setInline EnvSpecNone [] = [] | setInline p [] = [Universal.tagInject CodeTags.inlineCodeTag p] (* These are very frequently used and it might be worth making special bindings for values such as 0, 1, 2, 3 etc to reduce garbage. *) fun checkNonZero n = if n < 0 then raise InternalError "mkLoadxx: argument negative" else n val mkLoadLocal = Extract o LoadLocal o checkNonZero and mkLoadArgument = Extract o LoadArgument o checkNonZero and mkLoadClosure = Extract o LoadClosure o checkNonZero (* Set the container to the fields of the record. Try to push this down as far as possible. *) fun mkSetContainer(container, Cond(ifpt, thenpt, elsept), filter) = Cond(ifpt, mkSetContainer(container, thenpt, filter), mkSetContainer(container, elsept, filter)) | mkSetContainer(container, Newenv(decs, exp), filter) = Newenv(decs, mkSetContainer(container, exp, filter)) | mkSetContainer(_, r as Raise _, _) = r (* We may well have the situation where one branch of an "if" raises an exception. We can simply raise the exception on that branch. *) | mkSetContainer(container, Handle {exp, handler, exPacketAddr}, filter) = Handle{exp=mkSetContainer(container, exp, filter), handler=mkSetContainer(container, handler, filter), exPacketAddr = exPacketAddr} | mkSetContainer(container, tuple, filter) = SetContainer{container = container, tuple = tuple, filter = filter } local val except: exn = InternalError "Invalid load encountered in compiler" (* Exception value to use for invalid cases. We put this in the code but it should never actually be executed. *) val raiseError = Raise (Constnt (toMachineWord except, [])) in (* Look for an entry in a tuple. Used in both the optimiser and in mkInd. *) fun findEntryInBlock (Tuple { fields, isVariant, ...}, offset, isVar) = ( isVariant = isVar orelse raise InternalError "findEntryInBlock: tuple/datatype mismatch"; if offset < List.length fields then List.nth(fields, offset) (* This can arise if we're processing a branch of a case discriminating on a datatype which won't actually match at run-time. e.g. Tests/Succeed/Test030. *) else if isVar then raiseError else raise InternalError "findEntryInBlock: invalid address" ) | findEntryInBlock (Constnt (b, props), offset, isVar) = let (* Find the tuple property if it is present and extract the field props. *) val fieldProps = case List.find(Universal.tagIs CodeTags.tupleTag) props of NONE => [] | SOME p => List.nth(Universal.tagProject CodeTags.tupleTag p, offset) in case findInline props of EnvSpecTuple(_, env) => (* Do the selection now. This is especially useful if we have a global structure *) (* At the moment at least we assume that we can get all the properties from the tuple selection. *) ( case env offset of (EnvGenConst(w, p), inl) => Constnt(w, setInline inl p) (* The general value from selecting a field from a constant tuple must be a constant. *) | _ => raise InternalError "findEntryInBlock: not constant" ) | _ => (* The ML compiler may generate loads from invalid addresses as a result of a val binding to a constant which has the wrong shape. e.g. val a :: b = nil It will always result in a Bind exception being generated before the invalid load, but we have to be careful that the optimiser does not fall over. *) if isShort b orelse not (Address.isWords (toAddress b)) orelse Address.length (toAddress b) <= Word.fromInt offset then if isVar then raiseError else raise InternalError "findEntryInBlock: invalid address" else Constnt (loadWord (toAddress b, Word.fromInt offset), fieldProps) end | findEntryInBlock(base, offset, isVar) = Indirect {base = base, offset = offset, indKind = if isVar then IndVariant else IndTuple} (* anything else *) end (* Exported indirect load operation i.e. load a field from a tuple. We can't use findEntryInBlock in every case since that discards unused entries in a tuple and at this point we haven't checked that the unused entries don't have side-effects/raise exceptions e.g. #1 (1, raise Fail "bad") *) local fun mkIndirect isVar (addr, base as Constnt _) = findEntryInBlock(base, addr, isVar) | mkIndirect isVar (addr, base) = Indirect {base = base, offset = addr, indKind = if isVar then IndVariant else IndTuple} in val mkInd = mkIndirect false and mkVarField = mkIndirect true end fun mkIndContainer(addr, base) = Indirect{offset=addr, base=base, indKind=IndContainer} (* Create a tuple from a container. *) fun mkTupleFromContainer(addr, size) = Tuple{fields = List.tabulate(size, fn n => mkIndContainer(n, mkLoadLocal addr)), isVariant = false} (* Get the value from the code. *) fun evalue (Constnt(c, _)) = SOME c | evalue _ = NONE (* This is really to simplify the change from mkEnv taking a codetree list to taking a codeBinding list * code. This extracts the last entry which must be a NullBinding and packages the declarations with it. *) fun decSequenceWithFinalExp decs = let fun splitLast _ [] = raise InternalError "decSequenceWithFinalExp: empty" | splitLast decs [NullBinding exp] = (List.rev decs, exp) | splitLast _ [_] = raise InternalError "decSequenceWithFinalExp: last is not a NullDec" | splitLast decs (hd::tl) = splitLast (hd:: decs) tl in mkEnv(splitLast [] decs) end local type node = { addr: int, lambda: lambdaForm, use: codeUse list } fun nodeAddress({addr, ...}: node) = addr and arcs({lambda={closure, ...}, ...}: node) = List.foldl(fn (LoadLocal addr, l) => addr :: l | (_, l) => l) [] closure in val stronglyConnected = stronglyConnectedComponents{nodeAddress=nodeAddress, arcs=arcs} end (* In general any mutually recursive declaration can refer to any other. It's better to partition the recursive declarations into strongly connected components i.e. those that actually refer to each other. *) fun partitionMutualBindings(RecDecs rlist) = let val processed = stronglyConnected rlist (* Convert the result. Note that stronglyConnectedComponents returns the dependencies in the reverse order i.e. if X depends on Y but not the other way round then X will appear before Y in the list. We need to reverse it so that X goes after Y. *) fun rebuild ([{lambda, addr, use}], tl) = Declar{addr=addr, use=use, value=Lambda lambda} :: tl | rebuild (multiple, tl) = RecDecs multiple :: tl in List.foldl rebuild [] processed end (* This is only intended for RecDecs but it's simpler to handle all bindings. *) | partitionMutualBindings other = [other] (* Functions to help in building a closure. *) datatype createClosure = Closure of (loadForm * int) list ref fun makeClosure() = Closure(ref []) (* Function to build a closure. Items are added to the closure if they are not already there. *) fun addToClosure (Closure closureList) (ext: loadForm): loadForm = case (List.find (fn (l, _) => l = ext) (!closureList), ! closureList) of (SOME(_, n), _) => (* Already there *) LoadClosure n | (NONE, []) => (* Not there - first *) (closureList := [(ext, 0)]; LoadClosure 0) | (NONE, cl as (_, n) :: _) => (closureList := (ext, n+1) :: cl; LoadClosure(n+1)) fun extractClosure(Closure (ref closureList)) = List.foldl (fn ((ext, _), l) => ext :: l) [] closureList datatype inlineTest = TooBig | NonRecursive | TailRecursive of bool vector | NonTailRecursive of bool vector fun evaluateInlining(function, numArgs, maxInlineSize) = let (* This checks for the possibility of inlining a function. It sees if it is small enough according to some rough estimate of the cost and it also looks for recursive uses of the function. Typically if the function is small enough to inline there will be only one recursive use but we consider the possibility of more than one. If the only uses are tail recursive we can replace the recursive calls by a Loop with a BeginLoop outside it. If there are non-tail recursive calls we may be able to lift out arguments that are unchanged. For example for fun map f [] = [] | map f (a::b) = f a :: map f b it may be worth lifting out f and generating specific mapping functions for each application. *) val hasRecursiveCall = ref false (* Set to true if rec call *) val allTail = ref true (* Set to false if non recursive *) (* An element of this is set to false if the actual value if anything other than the original argument. At the end we are then left with the arguments that are unchanged. *) val argMod = Array.array(numArgs, true) infix 6 -- (* Subtract y from x but return 0 rather than a negative number. *) fun x -- y = if x >= y then x-y else 0 (* Check for the code size and also recursive references. N,B. We assume in hasLoop that tail recursion applies only with Cond, Newenv and Handler. *) fun checkUse _ (_, 0, _) = 0 (* The function is too big to inline. *) | checkUse isMain (Newenv(decs, exp), cl, isTail) = let fun checkBind (Declar{value, ...}, cl) = checkUse isMain(value, cl, false) | checkBind (RecDecs decs, cl) = List.foldl(fn ({lambda, ...}, n) => checkUse isMain (Lambda lambda, n, false)) cl decs | checkBind (NullBinding c, cl) = checkUse isMain (c, cl, false) | checkBind (Container{setter, ...}, cl) = checkUse isMain(setter, cl -- 1, false) in checkUse isMain (exp, List.foldl checkBind cl decs, isTail) end | checkUse _ (Constnt(w, _), cl, _) = if isShort w then cl else cl -- 1 (* A recursive reference in any context other than a call prevents any inlining. *) | checkUse true (Extract LoadRecursive, _, _) = 0 | checkUse _ (Extract _, cl, _) = cl -- 1 | checkUse isMain (Indirect{base, ...}, cl, _) = checkUse isMain (base, cl -- 1, false) | checkUse _ (Lambda {body, argTypes, closure, ...}, cl, _) = (* For the moment, any recursive use in an inner function prevents inlining. *) if List.exists (fn LoadRecursive => true | _ => false) closure then 0 else checkUse false (body, cl -- (List.length argTypes + List.length closure), false) | checkUse true (Eval{function = Extract LoadRecursive, argList, ...}, cl, isTail) = let (* If the actual argument is anything but the original argument then the corresponding entry in the array is set to false. *) fun testArg((exp, _), n) = ( if (case exp of Extract(LoadArgument a) => n = a | _ => false) then () else Array.update(argMod, n, false); n+1 ) in List.foldl testArg 0 argList; hasRecursiveCall := true; if isTail then () else allTail := false; List.foldl(fn ((e, _), n) => checkUse true (e, n, false)) (cl--3) argList end | checkUse isMain (Eval{function, argList, ...}, cl, _) = checkUse isMain (function, List.foldl(fn ((e, _), n) => checkUse isMain (e, n, false)) (cl--2) argList, false) | checkUse _ (Nullary _, cl, _) = cl -- 1 | checkUse isMain (Unary{arg1, ...}, cl, _) = checkUse isMain (arg1, cl -- 1, false) | checkUse isMain (Binary{arg1, arg2, ...}, cl, _) = checkUseList isMain ([arg1, arg2], cl -- 1) | checkUse isMain (Arbitrary{arg1, arg2, ...}, cl, _) = checkUseList isMain ([arg1, arg2], cl -- 4) | checkUse isMain (AllocateWordMemory {numWords, flags, initial}, cl, _) = checkUseList isMain ([numWords, flags, initial], cl -- 1) | checkUse isMain (Cond(i, t, e), cl, isTail) = checkUse isMain (i, checkUse isMain (t, checkUse isMain (e, cl -- 2, isTail), isTail), false) | checkUse isMain (BeginLoop { loop, arguments, ...}, cl, _) = checkUse isMain (loop, List.foldl (fn (({value, ...}, _), n) => checkUse isMain (value, n, false)) cl arguments, false) | checkUse isMain (Loop args, cl, _) = List.foldl(fn ((e, _), n) => checkUse isMain (e, n, false)) cl args | checkUse isMain (Raise c, cl, _) = checkUse isMain (c, cl -- 1, false) | checkUse isMain (Handle {exp, handler, ...}, cl, isTail) = checkUse isMain (exp, checkUse isMain (handler, cl, isTail), false) | checkUse isMain (Tuple{ fields, ...}, cl, _) = checkUseList isMain (fields, cl) | checkUse isMain (SetContainer{container, tuple = Tuple { fields, ...}, ...}, cl, _) = (* This can be optimised *) checkUse isMain (container, checkUseList isMain (fields, cl), false) | checkUse isMain (SetContainer{container, tuple, filter}, cl, _) = checkUse isMain (container, checkUse isMain (tuple, cl -- (BoolVector.length filter), false), false) | checkUse isMain (TagTest{test, ...}, cl, _) = checkUse isMain (test, cl -- 1, false) | checkUse isMain (LoadOperation{address, ...}, cl, _) = checkUseAddress isMain (address, cl -- 1) | checkUse isMain (StoreOperation{address, value, ...}, cl, _) = checkUse isMain (value, checkUseAddress isMain (address, cl -- 1), false) | checkUse isMain (BlockOperation{sourceLeft, destRight, length, ...}, cl, _) = checkUse isMain (length, checkUseAddress isMain (destRight, checkUseAddress isMain (sourceLeft, cl -- 1)), false) and checkUseList isMain (elems, cl) = List.foldl(fn (e, n) => checkUse isMain (e, n, false)) cl elems and checkUseAddress isMain ({base, index=NONE, ...}, cl) = checkUse isMain (base, cl, false) | checkUseAddress isMain ({base, index=SOME index, ...}, cl) = checkUseList isMain ([base, index], cl) val costLeft = checkUse true (function, maxInlineSize, true) in if costLeft = 0 then TooBig else if not (! hasRecursiveCall) then NonRecursive else if ! allTail then TailRecursive(Array.vector argMod) else NonTailRecursive(Array.vector argMod) end structure Sharing = struct type codetree = codetree and codeBinding = codeBinding and loadForm = loadForm and createClosure = createClosure and envSpecial = envSpecial end end; diff --git a/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML b/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML index 815ea678..84942f0d 100644 --- a/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML +++ b/mlsource/MLCompiler/CodeTree/X86Code/X86CodetreeToICode.ML @@ -1,4089 +1,4090 @@ (* Copyright David C. J. Matthews 2016-22 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor X86CodetreeToICode( structure BackEndTree: BACKENDINTERMEDIATECODE structure X86ICode: X86ICODE structure Debug: DEBUG structure X86Foreign: FOREIGNCALL structure ICodeTransform: X86ICODETRANSFORM structure CodeArray: CODEARRAY sharing X86ICode.Sharing = ICodeTransform.Sharing = CodeArray.Sharing ): GENCODE = struct open BackEndTree open Address open X86ICode open CodeArray exception InternalError = Misc.InternalError local val regs = case targetArch of Native32Bit => [eax, ebx] | Native64Bit => [eax, ebx, r8, r9, r10] | ObjectId32Bit => [eax, esi, r8, r9, r10] val fpResult = case targetArch of Native32Bit => FPReg fp0 | _ => XMMReg xmm0 val fpArgRegs = case targetArch of Native32Bit => [] | _ => [xmm0, xmm1, xmm2] in val generalArgRegs = List.map GenReg regs val floatingPtArgRegs = List.map XMMReg fpArgRegs fun resultReg GeneralType = GenReg eax | resultReg DoubleFloatType = fpResult | resultReg SingleFloatType = fpResult | resultReg (ContainerType _) = GenReg eax (* Doesn't need a result. *) end (* tag a short constant *) fun tag c = 2 * c + 1 (* shift a short constant, but don't set tag bit *) fun semitag c = 2 * c (* Reverse a list and append the second. This is used a lot when converting between the reverse and forward list versions. e.g. codeToICode and codeToICodeRev *) fun revApp([], l) = l | revApp(hd :: tl, l) = revApp(tl, hd :: l) datatype blockStruct = BlockSimple of x86ICode | BlockExit of x86ICode | BlockLabel of int | BlockFlow of controlFlow | BlockBegin of { regArgs: (preg * reg) list, stackArgs: stackLocn list } | BlockRaiseAndHandle of x86ICode * int | BlockOptionalHandle of {call: x86ICode, handler: int, label: int } local open RunCall val F_mutable_bytes = Word.fromLargeWord(Word8.toLargeWord(Word8.orb (F_mutable, F_bytes))) fun makeRealConst l = let val r = allocateByteMemory(0wx8 div bytesPerWord, F_mutable_bytes) fun setBytes([], _) = () | setBytes(hd::tl, n) = (storeByte(r, n, hd); setBytes(tl, n+0wx1)) val () = setBytes(l, 0w0) val () = clearMutableBit r in r end in (* These are floating point constants used to change and mask the sign bit. *) val realSignBit: machineWord = makeRealConst [0wx00, 0wx00, 0wx00, 0wx00, 0wx00, 0wx00, 0wx00, 0wx80] and realAbsMask: machineWord = makeRealConst [0wxff, 0wxff, 0wxff, 0wxff, 0wxff, 0wxff, 0wxff, 0wx7f] and floatSignBit: machineWord = makeRealConst [0wx00, 0wx00, 0wx00, 0wx80, 0wx00, 0wx00, 0wx00, 0wx00] and floatAbsMask: machineWord = makeRealConst [0wxff, 0wxff, 0wxff, 0wx7f, 0wx00, 0wx00, 0wx00, 0wx00] end datatype commutative = Commutative | NonCommutative (* Check that a large-word constant looks right and get the value as a large int*) fun largeWordConstant value = if isShort value then raise InternalError "largeWordConstant: invalid" else let val addr = toAddress value in if length addr <> nativeWordSize div wordSize orelse flags addr <> F_bytes then raise InternalError "largeWordConstant: invalid" else (); LargeWord.toLargeInt(RunCall.unsafeCast addr) end fun codeFunctionToX86({body, localCount, name, argTypes, resultType=fnResultType, closure, ...}:bicLambdaForm, debugSwitches, resultClosure) = let (* Pseudo-registers are allocated sequentially and the properties added to the list. *) val pregCounter = ref 0 val pregPropList = ref [] fun newPReg() = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropGeneral :: !pregPropList in PReg regNo end and newUReg() = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropUntagged :: !pregPropList in PReg regNo end and newStackLoc size = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropStack size :: !pregPropList in StackLoc{size=size, rno=regNo} end and newMergeReg() = let val regNo = !pregCounter before pregCounter := !pregCounter + 1 val () = pregPropList := RegPropMultiple :: !pregPropList in PReg regNo end datatype locationValue = NoLocation | PregLocation of preg | ContainerLocation of { container: stackLocn, stackOffset: int } val locToPregArray = Array.array(localCount, NoLocation) val labelCounter = ref 1 (* Start at 1. Zero is used for the root. *) fun newLabel() = !labelCounter before labelCounter := !labelCounter + 1 val ccRefCounter = ref 0 fun newCCRef() = CcRef(!ccRefCounter) before ccRefCounter := !ccRefCounter + 1 (* The profile object is a single mutable with the F_bytes bit set. *) val profileObject = createProfileObject() (* Switch to indicate if we want to trace where live data has been allocated. *) (* TODO: This should be used in AllocateMemoryOperation and BoxValue and possibly AllocateMemoryVariable. *) val addAllocatingFunction = Debug.getParameter Debug.profileAllocationTag debugSwitches = 1 fun constantAsArgument value = if isShort value then IntegerConstant(tag(Word.toLargeIntX(toShort value))) else AddressConstant value (* Create the branch condition from the test, isSigned and jumpOn values. (In)equality tests are the same for signed and unsigned values. *) local open BuiltIns in fun testAsBranch(TestEqual, _, true) = JE | testAsBranch(TestEqual, _, false) = JNE (* Signed tests *) | testAsBranch(TestLess, true, true) = JL | testAsBranch(TestLess, true, false) = JGE | testAsBranch(TestLessEqual, true, true) = JLE | testAsBranch(TestLessEqual, true, false) = JG | testAsBranch(TestGreater, true, true) = JG | testAsBranch(TestGreater, true, false) = JLE | testAsBranch(TestGreaterEqual, true, true) = JGE | testAsBranch(TestGreaterEqual, true, false) = JL (* Unsigned tests *) | testAsBranch(TestLess, false, true) = JB | testAsBranch(TestLess, false, false) = JNB | testAsBranch(TestLessEqual, false, true) = JNA | testAsBranch(TestLessEqual, false, false) = JA | testAsBranch(TestGreater, false, true) = JA | testAsBranch(TestGreater, false, false) = JNA | testAsBranch(TestGreaterEqual, false, true) = JNB | testAsBranch(TestGreaterEqual, false, false) = JB | testAsBranch(TestUnordered, _, _) = raise InternalError "TestUnordered" (* Switch the direction of a test if we turn c op x into x op c. *) fun leftRightTest TestEqual = TestEqual | leftRightTest TestLess = TestGreater | leftRightTest TestLessEqual = TestGreaterEqual | leftRightTest TestGreater = TestLess | leftRightTest TestGreaterEqual = TestLessEqual | leftRightTest TestUnordered = TestUnordered end (* Overflow check. This raises Overflow if the overflow bit is set in the cc. This generates a single block for the function unless there is a handler. As well as reducing the size of the code this also means that overflow checks are generally JO instructions to the end of the code. Since the default branch prediction is not to take forward jumps this should improve prefetching on the normal, non-overflow, path. *) fun checkOverflow ({currHandler=NONE, overflowBlock=ref(SOME overFlowLab), ...}) ccRef = (* It's already been set and there's no surrounding handler - use this. *) let val noOverflowLab = newLabel() in [ BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=overFlowLab, falseJump=noOverflowLab }), BlockLabel noOverflowLab ] end | checkOverflow ({currHandler=NONE, overflowBlock, ...}) ccRef = let (* *) val overFlowLab = newLabel() and noOverflowLab = newLabel() val packetReg = newPReg() val () = overflowBlock := SOME overFlowLab in [ BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=overFlowLab, falseJump=noOverflowLab }), BlockLabel overFlowLab, BlockSimple(LoadArgument{source=AddressConstant(toMachineWord(Overflow)), dest=packetReg, kind=movePolyWord}), BlockExit(RaiseExceptionPacket{packetReg=packetReg}), BlockLabel noOverflowLab ] end | checkOverflow ({currHandler=SOME h, ...}) ccRef = let val overFlowLab = newLabel() and noOverflowLab = newLabel() val packetReg = newPReg() in [ BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=overFlowLab, falseJump=noOverflowLab }), BlockLabel overFlowLab, BlockSimple(LoadArgument{source=AddressConstant(toMachineWord(Overflow)), dest=packetReg, kind=movePolyWord}), BlockRaiseAndHandle(RaiseExceptionPacket{packetReg=packetReg}, h), BlockLabel noOverflowLab ] end fun setAndRestoreRounding (rndMode, doWithRounding) = let open IEEEReal val savedRnd = newUReg() and setRnd = newUReg() in case fpMode of FPModeX87 => [BlockSimple(GetX87ControlReg{dest=savedRnd})] @ (* Set the appropriate bits in the control word. *) (case rndMode of TO_NEAREST => (* The bits need to be zero - just mask them. *) [BlockSimple( ArithmeticFunction{oper=AND, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0xf3ff, ccRef=newCCRef(), opSize=OpSize32})] | TO_NEGINF => let val wrk = newUReg() in (* Mask the bits and set to 01 *) [BlockSimple( ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd, operand2=IntegerConstant 0xf3ff, ccRef=newCCRef(), opSize=OpSize32}), BlockSimple( ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0x400, ccRef=newCCRef(), opSize=OpSize32})] end | TO_POSINF => let val wrk = newUReg() in (* Mask the bits and set to 10 *) [BlockSimple( ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd, operand2=IntegerConstant 0xf3ff, ccRef=newCCRef(), opSize=OpSize32}), BlockSimple( ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0x800, ccRef=newCCRef(), opSize=OpSize32})] end | TO_ZERO => (* The bits need to be one - just set them. *) [BlockSimple( ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0xc00, ccRef=newCCRef(), opSize=OpSize32})]) @ [BlockSimple(SetX87ControlReg{source=setRnd})] @ doWithRounding() @ (* Restore the original rounding. *) [BlockSimple(SetX87ControlReg{source=savedRnd})] | FPModeSSE2 => [BlockSimple(GetSSE2ControlReg{dest=savedRnd})] @ (* Set the appropriate bits in the control word. *) (case rndMode of TO_NEAREST => (* The bits need to be zero - just mask them. *) [BlockSimple( ArithmeticFunction{oper=AND, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0xffff9fff, ccRef=newCCRef(), opSize=OpSize32})] | TO_NEGINF => let val wrk = newUReg() in (* Mask the bits and set to 01 *) [BlockSimple( ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd, operand2=IntegerConstant 0xffff9fff, ccRef=newCCRef(), opSize=OpSize32}), BlockSimple( ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0x2000, ccRef=newCCRef(), opSize=OpSize32})] end | TO_POSINF => let val wrk = newUReg() in (* Mask the bits and set to 10 *) [BlockSimple( ArithmeticFunction{oper=AND, resultReg=wrk, operand1=savedRnd, operand2=IntegerConstant 0xffff9fff, ccRef=newCCRef(), opSize=OpSize32}), BlockSimple( ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0x4000, ccRef=newCCRef(), opSize=OpSize32})] end | TO_ZERO => (* The bits need to be one - just set them. *) [BlockSimple( ArithmeticFunction{oper=OR, resultReg=setRnd, operand1=savedRnd, operand2=IntegerConstant 0x6000, ccRef=newCCRef(), opSize=OpSize32})]) @ [BlockSimple(SetSSE2ControlReg{source=setRnd})] @ doWithRounding() @ [BlockSimple(SetSSE2ControlReg{source=savedRnd})] end (* Put a floating point value into a box or tag it so the value can be held in a general register. *) fun boxOrTagReal(srcReg, destReg, precision) = if precision = BuiltIns.PrecDouble orelse wordSize <> 0w8 then let open BuiltIns val boxFloat = case (fpMode, precision) of (FPModeX87, PrecDouble) => BoxX87Double | (FPModeX87, PrecSingle) => BoxX87Float | (FPModeSSE2, PrecDouble) => BoxSSE2Double | (FPModeSSE2, PrecSingle) => BoxSSE2Float in [BlockSimple(BoxValue{boxKind=boxFloat, source=srcReg, dest=destReg, saveRegs=[]})] end else [BlockSimple(TagFloat{source=srcReg, dest=destReg})] (* Indicate that the base address is actually an object index where appropriate. *) val memIndexOrObject = case targetArch of ObjectId32Bit => ObjectIndex | _ => NoMemIndex (* Generally we have an offset in words and no index register. *) fun wordOffsetAddress(offset, baseReg: preg): argument = MemoryLocation{offset=offset*Word.toInt wordSize, base=baseReg, index=memIndexOrObject, cache=NONE} (* The large-word operations all work on the value within the box pointed at by the register. We generate all large-word operations using this even where the X86 instruction requires a register. This allows the next level to optimise cases of cascaded instructions and avoid creating boxes for intermediate values. *) fun wordAt reg = wordOffsetAddress(0, reg) val returnAddressEntry = newStackLoc 1 datatype argLoc = ArgInReg of { realReg: reg, argReg: preg } | ArgOnStack of { stackOffset: int, stackReg: stackLocn } (* Pseudo-regs for the result, the closure and the args that were passed in real regs. *) val resultTarget = newPReg() val closureRegAddr = newPReg() (* Create a map for the arguments indicating their register or stack location. *) local (* Select the appropriate argument register depending on the argument type. *) fun argTypesToArgEntries([], _, _, _) = ([], [], [], []) | argTypesToArgEntries(DoubleFloatType :: tl, gRegs, fpReg :: fpRegs, n) = let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, fpRegs, n-1) val pRegArg = newPReg() and uRegArg = newUReg() in (ArgInReg{realReg=fpReg, argReg=pRegArg} :: argTypes, boxOrTagReal(uRegArg, pRegArg, BuiltIns.PrecDouble) @ argCode, (uRegArg, fpReg) :: argRegs, stackArgs) end | argTypesToArgEntries(SingleFloatType :: tl, gRegs, fpReg :: fpRegs, n) = let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, fpRegs, n-1) val pRegArg = newPReg() and uRegArg = newUReg() in (ArgInReg{realReg=fpReg, argReg=pRegArg} :: argTypes, boxOrTagReal(uRegArg, pRegArg, BuiltIns.PrecSingle) @ argCode, (uRegArg, fpReg) :: argRegs, stackArgs) end | argTypesToArgEntries(_ :: tl, gReg :: gRegs, fpRegs, n) = (* This deals with general arguments but also with extra floating point arguments. They are boxed as usual. *) let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, gRegs, fpRegs, n-1) val argReg=newPReg() in (ArgInReg{realReg=gReg, argReg=argReg} :: argTypes, argCode, (argReg, gReg) :: argRegs, stackArgs) end | argTypesToArgEntries(_ :: tl, [], fpRegs, n) = let val (argTypes, argCode, argRegs, stackArgs) = argTypesToArgEntries(tl, [], fpRegs, n-1) val stackLoc = newStackLoc 1 in (ArgOnStack {stackOffset=n, stackReg = stackLoc } :: argTypes, argCode, argRegs, stackLoc :: stackArgs) end val (argEntries, argCode, argRegs, stackArguments) = argTypesToArgEntries(argTypes, generalArgRegs, floatingPtArgRegs, List.length argTypes) val clReg = case closure of [] => [] | _ => [(closureRegAddr, GenReg edx)] in val argumentVector = Vector.fromList argEntries (* Start code for the function. *) val beginInstructions = argCode @ [BlockBegin{regArgs=clReg @ argRegs, stackArgs=stackArguments @ [returnAddressEntry]}] (* The number of arguments on the stack. Needed in return instrs and tail calls. *) val currentStackArgs = List.length stackArguments end (* The return instruction. This can be added on to various tails but there is always one at the end anyway. *) fun returnInstruction({stackPtr, ...}, target, tailCode) = let val (returnCode, resReg) = case fnResultType of GeneralType => ([], target) | ContainerType _ => ([], target) | DoubleFloatType => let val resReg = newUReg() in ([BlockSimple(LoadArgument{source=wordAt target, dest=resReg, kind=MoveDouble})], resReg) end | SingleFloatType => let val resReg = newUReg() val unpack = if wordSize = 0w8 then BlockSimple(UntagFloat{source=RegisterArgument target, dest=resReg, cache=NONE}) else BlockSimple(LoadArgument{source=wordAt target, dest=resReg, kind=MoveFloat}) in ([unpack], resReg) end in BlockExit(ReturnResultFromFunction{resultReg=resReg, realReg=resultReg fnResultType, numStackArgs=currentStackArgs}) :: returnCode @ (if stackPtr <> 0 then BlockSimple(ResetStackPtr{numWords=stackPtr, preserveCC=false}) :: tailCode else tailCode) end (* This controls what codeAsArgument returns. Different instructions have different requirements. If an option is set to false the value is instead loaded into a new preg. "const32s" means that it will fit into 32-bits. Any constant satisfies that on X86/32 but on the X86/64 we don't allow addresses because we can't be sure whether they will fit or not. *) type allowedArgument = { anyConstant: bool, const32s: bool, memAddr: bool, existingPreg: bool } val allowInMemMove = (* We can move a 32-bit constant into memory but not a long constant. *) { anyConstant=false, const32s=true, memAddr=false, existingPreg=true } and allowInPReg = { anyConstant=false, const32s=false, memAddr=false, existingPreg=true } (* AllowDefer can be used to ensure that any side-effects are done before something else but otherwise we only evaluate afterwards. *) and allowDefer = { anyConstant=true, const32s=true, memAddr=true, existingPreg=true } datatype destination = SpecificPReg of preg | NoResult | Allowed of allowedArgument (* Context type. *) type context = { loopArgs: (preg list * int * int) option, stackPtr: int, currHandler: int option, overflowBlock: int option ref } (* If a preg has been provided, use that, otherwise generate a new one. *) fun asTarget(SpecificPReg preg) = preg | asTarget NoResult = newPReg() | asTarget(Allowed _) = newPReg() fun moveIfNotAllowed(NoResult, code, arg) = (code, arg, false) | moveIfNotAllowed(Allowed{anyConstant=true, ...}, code, arg as AddressConstant _) = (code, arg, false) | moveIfNotAllowed(Allowed{anyConstant=true, ...}, code, arg as IntegerConstant _) = (code, arg, false) | moveIfNotAllowed(dest as Allowed{const32s=true, ...}, code, arg as IntegerConstant value) = (* This is allowed if the value is within 32-bits *) if is32bit value then (code, arg, false) else moveToTarget(dest, code, arg) | moveIfNotAllowed(dest as Allowed{const32s=true, ...}, code, arg as AddressConstant _) = if targetArch = Native32Bit then (code, arg, false) (* We can store the address directly *) else moveToTarget(dest, code, arg) | moveIfNotAllowed(Allowed{existingPreg=true, ...}, code, arg as RegisterArgument(PReg _)) = (code, arg, false) | moveIfNotAllowed(Allowed{memAddr=true, ...}, code, arg as MemoryLocation _) = (code, arg, false) | moveIfNotAllowed(dest, code, arg) = moveToTarget(dest, code, arg) and moveToTarget(dest, code, arg) = let val target = asTarget dest val moveSize = case arg of AddressConstant _ => movePolyWord | MemoryLocation _ => movePolyWord | _ => moveNativeWord in (code @ [BlockSimple(LoadArgument{source=arg, dest=target, kind=moveSize})], RegisterArgument target, false) end (* Create a bool result from a test by returning true or false. *) fun makeBoolResultRev(condition, ccRef, target, testCode) = let val trueLab = newLabel() and falseLab = newLabel() and mergeLab = newLabel() val mergeReg = newMergeReg() in BlockSimple(LoadArgument{dest=target, source=RegisterArgument mergeReg, kind=Move32Bit}) :: BlockLabel mergeLab :: BlockFlow(Unconditional mergeLab) :: BlockSimple(LoadArgument{dest=mergeReg, source=IntegerConstant(tag 0), kind=Move32Bit}) :: BlockLabel falseLab :: BlockFlow(Unconditional mergeLab) :: BlockSimple(LoadArgument{dest=mergeReg, source=IntegerConstant(tag 1), kind=Move32Bit}) :: BlockLabel trueLab :: BlockFlow(Conditional{ ccRef=ccRef, condition=condition, trueJump=trueLab, falseJump=falseLab }) :: testCode end fun moveIfNotAllowedRev(NoResult, code, arg) = (code, arg, false) | moveIfNotAllowedRev(Allowed{anyConstant=true, ...}, code, arg as AddressConstant _) = (code, arg, false) | moveIfNotAllowedRev(Allowed{anyConstant=true, ...}, code, arg as IntegerConstant _) = (code, arg, false) | moveIfNotAllowedRev(dest as Allowed{const32s=true, ...}, code, arg as IntegerConstant value) = (* This is allowed if the value is within 32-bits *) if is32bit value then (code, arg, false) else moveToTargetRev(dest, code, arg) | moveIfNotAllowedRev(dest as Allowed{const32s=true, ...}, code, arg as AddressConstant _) = if targetArch = Native32Bit then (code, arg, false) else moveToTargetRev(dest, code, arg) | moveIfNotAllowedRev(Allowed{existingPreg=true, ...}, code, arg as RegisterArgument(PReg _)) = (code, arg, false) | moveIfNotAllowedRev(Allowed{memAddr=true, ...}, code, arg as MemoryLocation _) = (code, arg, false) | moveIfNotAllowedRev(dest, code, arg) = moveToTargetRev(dest, code, arg) and moveToTargetRev(dest, code, arg) = let val target = asTarget dest val moveSize = case arg of AddressConstant _ => movePolyWord | MemoryLocation _ => movePolyWord | _ => moveNativeWord in (BlockSimple(LoadArgument{source=arg, dest=target, kind=moveSize}) :: code, RegisterArgument target, false) end (* Allocate a fixed size cell with a reference to the profile object if we want to trace the location of live data. Currently only used for tuples and for closures in native 32/64 bit. *) and allocateWithProfileRev(n, flags, memAddr, tlCode) = if addAllocatingFunction then let val restAndAlloc = BlockSimple(AllocateMemoryOperation{size=n+1, flags=Word8.orb(flags, Address.F_profile), dest=memAddr, saveRegs=[]}) :: tlCode val (code2, source, _) = moveIfNotAllowedRev(Allowed allowInMemMove, restAndAlloc, AddressConstant profileObject) val storeValue = BlockSimple(StoreArgument{ source=source, offset=n*Word.toInt wordSize, base=memAddr, index=memIndexOrObject, kind=movePolyWord, isMutable=false}) in storeValue :: code2 end else BlockSimple(AllocateMemoryOperation{size=n, flags=flags, dest=memAddr, saveRegs=[]}) :: tlCode (* Use a move if there's no offset or index. We could use an add if there's no index. *) and loadAddress{base, offset=0, index=NoMemIndex, dest} = LoadArgument{source=RegisterArgument base, dest=dest, kind=movePolyWord} | loadAddress{base, offset, index, dest} = LoadEffectiveAddress{base=SOME base, offset=offset, dest=dest, index=index, opSize=nativeWordOpSize} and codeToICodeTarget(instr, context: context, isTail, target) = (* This is really for backwards compatibility. *) let val (code, _, _) = codeToICode(instr, context, isTail, SpecificPReg target) in code end and codeToPReg(instr, context) = let (* Many instructions require an argument in a register. If it's already in a register use that rather than creating a new one. *) val (code, result, _) = codeToICode(instr, context, false, Allowed allowInPReg) val preg = case result of RegisterArgument pr => pr | _ => raise InternalError "codeToPReg" in (code, preg) end and codeToPRegRev(instr, context, tailCode) = let (* Many instructions require an argument in a register. If it's already in a register use that rather than creating a new one. *) val (code, result, _) = codeToICodeRev(instr, context, false, Allowed allowInPReg, tailCode) val preg = case result of RegisterArgument pr => pr | _ => raise InternalError "codeToPRegRev" in (code, preg) end and codeToICode(instr, context, isTail, destination) = let val (code, dest, haveExited) = codeToICodeRev(instr, context, isTail, destination, []) in (List.rev code, dest, haveExited) end (* Main function to turn the codetree into ICode. Optimisation is generally left to later passes. This does detect tail recursion. This builds the result up in reverse order. There was an allocation hotspot in loadFields in the BICTuple case which was eliminated by building the list in reverse and then reversing the result. It seems better to build the list in reverse generally but for the moment there are too many special cases to do everything. *) and codeToICodeRev(BICNewenv (bindings, exp), context: context as {stackPtr=initialSp, ...} , isTail, destination, tailCode) = let (* Process a list of bindings. We need to accumulate the space used by any containers and reset the stack pointer at the end if necessary. *) fun doBindings([], context, tailCode) = (tailCode, context) | doBindings(BICDeclar{value=BICExtract(BICLoadLocal l), addr, ...} :: decs, context, tailCode) = let (* Giving a new name to an existing entry. This should have been removed at a higher level but it doesn't always seem to be. In particular we must treat this specially if it's a container. *) val original = Array.sub(locToPregArray, l) val () = Array.update(locToPregArray, addr, original) in doBindings(decs, context, tailCode) end | doBindings(BICDeclar{value, addr, ...} :: decs, context, tailCode) = let val (code, dest) = codeToPRegRev(value, context, tailCode) val () = Array.update(locToPregArray, addr, PregLocation dest) in doBindings(decs, context, code) end | doBindings(BICRecDecs [{lambda, addr, ...}] :: decs, context, tailCode) = (* We shouldn't have single entries in RecDecs but it seems to occur at the moment. *) let val dest = newPReg() val (code, _, _) = codeToICodeRev(BICLambda lambda, context, false, SpecificPReg dest, tailCode) val () = Array.update(locToPregArray, addr, PregLocation dest) in doBindings(decs, context, code) end | doBindings(BICRecDecs recDecs :: decs, context, tailCode) = let val destRegs = map (fn _ => newPReg()) recDecs (* First build the closures as mutable cells containing zeros. Set the entry in the address table to the register containing the address. *) fun makeClosure({lambda={closure, ...}, addr, ...}, dest, c) = let val () = Array.update(locToPregArray, addr, PregLocation dest) val sizeClosure = List.length closure + (if targetArch = ObjectId32Bit then 2 else 1) open Address fun clear n = if n = sizeClosure then [BlockSimple(AllocateMemoryOperation{size=sizeClosure, flags=if targetArch = ObjectId32Bit then Word8.orb(F_mutable, F_closure) else F_mutable, dest=dest, saveRegs=[]})] else (clear (n+1) @ [BlockSimple( StoreArgument{source=IntegerConstant(tag 0), base=dest, offset=n*Word.toInt wordSize, index=memIndexOrObject, kind=movePolyWord, isMutable=false})]) in c @ clear 0 @ [BlockSimple InitialisationComplete] end val allocClosures = ListPair.foldlEq makeClosure [] (recDecs, destRegs) fun setClosure({lambda as {closure, ...}, ...}, dest, l) = let val clResult = makeConstantClosure() val () = codeFunctionToX86(lambda, debugSwitches, clResult) (* Basically the same as tuple except we load the address of the closure we've made. *) fun loadFields([], _) = [] | loadFields(f :: rest, n) = let val (code, source, _) = codeToICode(BICExtract f, context, false, Allowed allowInMemMove) val storeValue = [BlockSimple(StoreArgument{ source=source, base=dest, offset=n*Word.toInt wordSize, index=memIndexOrObject, kind=movePolyWord, isMutable=false })] in code @ storeValue @ loadFields(rest, n+1) end val setCodeAddress = if targetArch = ObjectId32Bit then let (* We can't get the code address until run time. *) val codeReg = newUReg() val closureReg = newPReg() in map BlockSimple [ LoadArgument{ source=AddressConstant(toMachineWord clResult), dest=closureReg, kind=movePolyWord}, LoadArgument{ source=MemoryLocation{offset=0, base=closureReg, index=ObjectIndex, cache=NONE}, dest=codeReg, kind=Move64Bit}, StoreArgument{ source=RegisterArgument codeReg, offset=0, base=dest, index=ObjectIndex, kind=moveNativeWord, isMutable=false} ] end else let val codeAddr = codeAddressFromClosure clResult val (code, source, _) = moveIfNotAllowed(Allowed allowInMemMove, [], AddressConstant codeAddr) in code @ [BlockSimple( StoreArgument{ source=source, base=dest, offset=0, index=NoMemIndex, kind=movePolyWord, isMutable=false })] end val setFields = setCodeAddress @ loadFields(closure, if targetArch = ObjectId32Bit then 2 else 1) in l @ setFields @ [BlockSimple(LockMutable{addr=dest})] end val setClosures = ListPair.foldlEq setClosure [] (recDecs, destRegs) val code = List.rev(allocClosures @ setClosures) in doBindings(decs, context, code @ tailCode) end | doBindings(BICNullBinding exp :: decs, context, tailCode) = let val (code, _, _) = codeToICodeRev(exp, context, false, NoResult, tailCode) (* And discard result. *) in doBindings(decs, context, code) end | doBindings(BICDecContainer{ addr, size } :: decs, {loopArgs, stackPtr, currHandler, overflowBlock}, tailCode) = let val containerReg = newStackLoc size val () = Array.update(locToPregArray, addr, ContainerLocation{container=containerReg, stackOffset=stackPtr+size}) in doBindings(decs, {loopArgs=loopArgs, stackPtr=stackPtr+size, currHandler=currHandler, overflowBlock=overflowBlock}, BlockSimple(ReserveContainer{size=size, container=containerReg}) :: tailCode) end val (codeBindings, resContext as {stackPtr=finalSp, ...}) = doBindings(bindings, context, tailCode) (* If we have had a container we'll need to reset the stack *) in if initialSp <> finalSp then let val _ = finalSp >= initialSp orelse raise InternalError "codeToICode - stack ptr" val bodyReg = newPReg() and resultReg = asTarget destination val (codeExp, result, haveExited) = codeToICodeRev(exp, resContext, isTail, SpecificPReg bodyReg, codeBindings) val afterAdjustSp = if haveExited then codeExp else BlockSimple(LoadArgument{source=result, dest=resultReg, kind=movePolyWord}) :: BlockSimple(ResetStackPtr{numWords=finalSp-initialSp, preserveCC=false}) :: codeExp in (afterAdjustSp, RegisterArgument resultReg, haveExited) end else codeToICodeRev(exp, resContext, isTail, destination, codeBindings) end | codeToICodeRev(BICConstnt(value, _), _, _, destination, tailCode) = moveIfNotAllowedRev(destination, tailCode, constantAsArgument value) | codeToICodeRev(BICExtract(BICLoadLocal l), {stackPtr, ...}, _, destination, tailCode) = ( case Array.sub(locToPregArray, l) of NoLocation => raise InternalError "codeToICodeRev - local unset" | PregLocation preg => moveIfNotAllowedRev(destination, tailCode, RegisterArgument preg) | ContainerLocation{container, stackOffset} => (* This always returns a ContainerAddr whatever the "allowed". *) (tailCode, ContainerAddr{container=container, stackOffset=stackPtr-stackOffset}, false) ) | codeToICodeRev(BICExtract(BICLoadArgument a), {stackPtr, ...}, _, destination, tailCode) = ( case Vector.sub(argumentVector, a) of ArgInReg{argReg, ...} => (* It was originally in a register. It's now in a preg. *) moveIfNotAllowedRev(destination, tailCode, RegisterArgument argReg) | ArgOnStack{stackOffset, stackReg} => (* Pushed before call. *) let val target = asTarget destination in (BlockSimple(LoadArgument{ source=StackLocation{wordOffset=stackOffset+stackPtr, container=stackReg, field=0, cache=NONE}, dest=target, kind=moveNativeWord}) :: tailCode, RegisterArgument target, false) end ) | codeToICodeRev(BICExtract(BICLoadClosure c), _, _, destination, tailCode) = let (* Add the number of words for the code address. This is 1 in native but 2 in 32-in-64. *) val offset = case targetArch of ObjectId32Bit => c+2 | _ => c+1 in if c >= List.length closure then raise InternalError "BICExtract: closure" else (); (* N.B. We need to add one to the closure entry because zero is the code address. *) moveIfNotAllowedRev(destination, tailCode, wordOffsetAddress(offset, closureRegAddr)) end | codeToICodeRev(BICExtract BICLoadRecursive, _, _, destination, tailCode) = (* If the closure is empty we must use the constant. We can't guarantee that the caller will actually load the closure register if it knows the closure is empty. *) moveIfNotAllowedRev(destination, tailCode, case closure of [] => AddressConstant(closureAsAddress resultClosure) | _ => RegisterArgument closureRegAddr) | codeToICodeRev(BICField{base, offset}, context, _, destination, tailCode) = let val (codeBase, baseEntry, _) = codeToICodeRev(base, context, false, Allowed allowInPReg, tailCode) in (* This should not be used with a container. *) case baseEntry of RegisterArgument baseR => moveIfNotAllowedRev(destination, codeBase, wordOffsetAddress(offset, baseR)) | _ => raise InternalError "codeToICodeRev-BICField" end | codeToICodeRev(BICLoadContainer{base, offset}, context, _, destination, tailCode) = let val (codeBase, baseEntry, _) = codeToICodeRev(base, context, false, Allowed allowInPReg, tailCode) val multiplier = Word.toInt(nativeWordSize div wordSize) in (* If this is a local container we extract the field. *) case baseEntry of RegisterArgument baseR => moveIfNotAllowedRev(destination, codeBase, wordOffsetAddress(offset*multiplier, baseR)) | ContainerAddr{container, stackOffset} => let val target = asTarget destination val finalOffset = stackOffset+offset val _ = finalOffset >= 0 orelse raise InternalError "offset" in (BlockSimple(LoadArgument{ source=StackLocation{wordOffset=finalOffset, container=container, field=offset, cache=NONE}, dest=target, kind=moveNativeWord}) :: tailCode, RegisterArgument target, false) end | _ => raise InternalError "codeToICodeRev-BICField" end | codeToICodeRev(BICEval{function, argList, resultType, ...}, context as { currHandler, ...}, isTail, destination, tailCode) = let val target = asTarget destination (* Create pregs for the closure and each argument. *) val clPReg = newPReg() (* If we have a constant closure we can go directly to the entry point. If the closure is a single word we don't need to load the closure register. *) val (functionCode, closureEntry, callKind) = case function of BICConstnt(addr, _) => let val addrAsAddr = toAddress addr (* If this is a closure we're still compiling we can't get the code address. However if this is directly recursive we can use the recursive convention. *) in if wordEq(closureAsAddress resultClosure, addr) then (tailCode, [], Recursive) else if flags addrAsAddr <> Address.F_words andalso flags addrAsAddr <> Address.F_closure then (BlockSimple(LoadArgument{source=AddressConstant addr, dest=clPReg, kind=movePolyWord}) :: tailCode, [(RegisterArgument clPReg, GenReg edx)], FullCall) else if targetArch = ObjectId32Bit then (* We can't actually load the code address here. *) let val addrLength = length addrAsAddr val _ = addrLength >= 0w1 orelse raise InternalError "BICEval address" val _ = flags addrAsAddr = Address.F_closure orelse raise InternalError "BICEval address not a closure" in if addrLength = 0w2 then (tailCode, [], ConstantCode addr) else (BlockSimple(LoadArgument{source=AddressConstant addr, dest=clPReg, kind=movePolyWord}) :: tailCode, [(RegisterArgument clPReg, GenReg edx)], ConstantCode addr) end else (* Native 32 or 64-bits. *) let val addrLength = length addrAsAddr val _ = addrLength >= 0w1 orelse raise InternalError "BICEval address" val codeAddr = loadWord(addrAsAddr, 0w0) val _ = isCode (toAddress codeAddr) orelse raise InternalError "BICEval address not code" in if addrLength = 0w1 then (tailCode, [], ConstantCode codeAddr) else (BlockSimple(LoadArgument{source=AddressConstant addr, dest=clPReg, kind=movePolyWord}) :: tailCode, [(RegisterArgument clPReg, GenReg edx)], ConstantCode codeAddr) end end | BICExtract BICLoadRecursive => ( (* If the closure is empty we don't need to load rdx *) case closure of [] => (tailCode, [], Recursive) | _ => (BlockSimple(LoadArgument {source=RegisterArgument closureRegAddr, dest=clPReg, kind=movePolyWord}) :: tailCode, [(RegisterArgument clPReg, GenReg edx)], Recursive) ) | function => (* General case. *) (#1 (codeToICodeRev(function, context, false, SpecificPReg clPReg, tailCode)), [(RegisterArgument clPReg, GenReg edx)], FullCall) (* Optimise arguments. We have to be careful with tail-recursive functions because they need to save any stack arguments that could be overwritten. This is complicated because we overwrite the stack before loading the register arguments. In some circumstances it could be safe but for the moment leave it. This should be safe in the new code-transform but not the old codeICode. Currently we don't allow memory arguments at all. There's the potential for problems later. Memory arguments could possibly lead to aliasing of the stack if the memory actually refers to a container on the stack. That would mess up the code that ensures that stack arguments are stored in the right order. *) (* We don't allow long constants in stack arguments to a tail-recursive call because we may use a memory move to set them. We also don't allow them in 32-in-64 because we can't push an address constant. *) val allowInStackArg = Allowed {anyConstant=not isTail andalso targetArch <> ObjectId32Bit, const32s=true, memAddr=false, existingPreg=not isTail } and allowInRegArg = Allowed {anyConstant=true, const32s=true, memAddr=false, existingPreg=not isTail } (* Load the first arguments into registers and the rest to the stack. *) fun loadArgs ([], _, _, tailCode) = (tailCode, [], []) | loadArgs ((arg, DoubleFloatType) :: args, gRegs, fpReg :: fpRegs, tailCode) = let (* Floating point register argument. *) val (c, r) = codeToPRegRev(arg, context, tailCode) val r1 = newUReg() val c1 = BlockSimple(LoadArgument{source=wordAt r, dest=r1, kind=MoveDouble}) :: c val (code, regArgs, stackArgs) = loadArgs(args, gRegs, fpRegs, c1) in (code, (RegisterArgument r1, fpReg) :: regArgs, stackArgs) end | loadArgs ((arg, SingleFloatType) :: args, gRegs, fpReg :: fpRegs, tailCode) = let (* Floating point register argument. *) val (c, r) = codeToPRegRev(arg, context, tailCode) val r1 = newUReg() val c1 = if wordSize = 0w8 then BlockSimple(UntagFloat{source=RegisterArgument r, dest=r1, cache=NONE}) :: c else BlockSimple(LoadArgument{source=wordAt r, dest=r1, kind=MoveFloat}) :: c val (code, regArgs, stackArgs) = loadArgs(args, gRegs, fpRegs, c1) in (code, (RegisterArgument r1, fpReg) :: regArgs, stackArgs) end | loadArgs ((arg, _) :: args, gReg::gRegs, fpRegs, tailCode) = let (* General register argument. *) val (c, r, _) = codeToICodeRev(arg, context, false, allowInRegArg, tailCode) val (code, regArgs, stackArgs) = loadArgs(args, gRegs, fpRegs, c) in (code, (r, gReg) :: regArgs, stackArgs) end | loadArgs ((arg, _) :: args, [], fpRegs, tailCode) = let (* Stack argument. *) val (c, r, _) = codeToICodeRev(arg, context, false, allowInStackArg, tailCode) val (code, regArgs, stackArgs) = loadArgs(args, [], fpRegs, c) in (code, regArgs, r :: stackArgs) end val (codeArgs, regArgs, stackArgs) = loadArgs(argList, generalArgRegs, floatingPtArgRegs, functionCode) (* If this is at the end of the function and the result types are the same we can use a tail-recursive call. *) val tailCall = isTail andalso resultType = fnResultType val callCode = if tailCall then let val {stackPtr, ...} = context (* The number of arguments currently on the stack. *) val currentStackArgCount = currentStackArgs val newStackArgCount = List.length stackArgs (* The offset of the first argument or the return address if there are no stack arguments. N.B. We actually have currentStackArgCount+1 items on the stack including the return address. Offsets can be negative. *) val stackOffset = stackPtr val firstArgumentAddr = currentStackArgCount fun makeStackArgs([], _) = [] | makeStackArgs(arg::args, offset) = {src=arg, stack=offset} :: makeStackArgs(args, offset-1) val stackArgs = makeStackArgs(stackArgs, firstArgumentAddr) (* The stack adjustment needed to compensate for any items that have been pushed and the differences in the number of arguments. May be positive or negative. This is also the destination address of the return address so when we enter the new function the return address will be the first item on the stack. *) val stackAdjust = firstArgumentAddr - newStackArgCount (* Add an entry for the return address to the stack arguments. *) val returnEntry = {src=StackLocation{wordOffset=stackPtr, container=returnAddressEntry, field=0, cache=NONE}, stack=stackAdjust} (* Because we're storing into the stack we may be overwriting values we want. If the source of any value is a stack location below the current stack pointer we load it except in the special case where the destination is the same as the source (which is often the case with the return address). *) local fun loadArgs [] = ([], []) | loadArgs (arg :: rest) = let val (loadCode, loadedArgs) = loadArgs rest in case arg of {src as StackLocation{wordOffset, ...}, stack} => if wordOffset = stack+stackOffset (* Same location *) orelse stack+stackOffset < 0 (* Storing above current top of stack *) orelse stackOffset+wordOffset > ~ stackAdjust (* Above the last argument *) then (loadCode, arg :: loadedArgs) else let val preg = newPReg() in (BlockSimple(LoadArgument{source=src, dest=preg, kind=moveNativeWord}) :: loadCode, {src=RegisterArgument preg, stack=stack} :: loadedArgs) end | _ => (loadCode, arg :: loadedArgs) end in val (loadStackArgs, loadedStackArgs) = loadArgs(returnEntry :: stackArgs) end in BlockExit(TailRecursiveCall{regArgs=closureEntry @ regArgs, stackArgs=loadedStackArgs, stackAdjust = stackAdjust, currStackSize=stackOffset, callKind=callKind, workReg=newPReg()}) :: loadStackArgs @ codeArgs end else let val (moveResult, resReg) = case resultType of GeneralType => ([], target) | ContainerType _ => ([], target) | DoubleFloatType => let val fpRegDest = newUReg() in (boxOrTagReal(fpRegDest, target, BuiltIns.PrecDouble), fpRegDest) end | SingleFloatType => let val fpRegDest = newUReg() in (boxOrTagReal(fpRegDest, target, BuiltIns.PrecSingle), fpRegDest) end val call = FunctionCall{regArgs=closureEntry @ regArgs, stackArgs=stackArgs, dest=resReg, realDest=resultReg resultType, callKind=callKind, saveRegs=[]} val callBlock = case currHandler of NONE => BlockSimple call :: codeArgs | SOME h => BlockOptionalHandle{call=call, handler=h, label=newLabel()} :: codeArgs in moveResult @ callBlock end in (callCode, RegisterArgument target, tailCall (* We've exited if this was a tail jump *)) end | codeToICodeRev(BICNullary{oper=BuiltIns.GetCurrentThreadId}, _, _, destination, tailCode) = (* Get the ID of the current thread. *) let val target = asTarget destination in (BlockSimple(LoadMemReg{offset=memRegThreadSelf, dest=target, kind=movePolyWord}) :: tailCode, RegisterArgument target, false) end - | codeToICodeRev(BICNullary{oper=BuiltIns.CheckRTSException}, _, _, destination, tailCode) = - moveIfNotAllowedRev(destination, tailCode, (* Unit result *) IntegerConstant(tag 0)) + | codeToICodeRev(BICNullary{oper=BuiltIns.CPUPause}, _, _, destination, tailCode) = + (* Pause during spinlock phase of mutex locking. *) + moveIfNotAllowedRev(destination, BlockSimple PauseCPU :: tailCode, (* Unit result *) IntegerConstant(tag 0)) | codeToICodeRev(BICNullary{oper=BuiltIns.CreateMutex}, _, _, destination, tailCode) = let (* Allocate memory for a mutex. Use a native word as a mutable, weak, no-overwrite, byte cell which is the same as a volatileRef. This ensures that it will always be cleared when it is loaded even if it was locked when it was saved. *) val target = asTarget destination val flags = Word8.orb(F_mutable, Word8.orb(F_weak, Word8.orb(F_noOverwrite, F_bytes))) (* 0wx69 *) in (BlockSimple InitialisationComplete :: BlockSimple(StoreArgument{source=IntegerConstant 0, base=target, offset=0, index=memIndexOrObject, kind=moveNativeWord, isMutable=false }) :: BlockSimple(AllocateMemoryOperation{size=Word.toInt(nativeWordSize div wordSize), flags=flags, dest=target, saveRegs=[]}) :: tailCode, RegisterArgument target, false) end | codeToICodeRev(BICUnary instr, context, isTail, destination, tailCode) = codeToICodeUnaryRev(instr, context, isTail, destination, tailCode) | codeToICodeRev(BICBinary instr, context, isTail, destination, tailCode) = codeToICodeBinaryRev(instr, context, isTail, destination, tailCode) | codeToICodeRev(BICArbitrary{oper, shortCond, arg1, arg2, longCall}, context, _, destination, tailCode) = let val startLong = newLabel() and resultLabel = newLabel() val target = asTarget destination val condResult = newMergeReg() (* Overflow check - if there's an overflow jump to the long precision case. *) fun jumpOnOverflow ccRef = let val noOverFlow = newLabel() in [BlockFlow(Conditional{ ccRef=ccRef, condition=JO, trueJump=startLong, falseJump=noOverFlow }), BlockLabel noOverFlow] end val (longCode, _, _) = codeToICode(longCall, context, false, SpecificPReg condResult) (* We could use a tail jump here if this is a tail. *) val (code, dest, haveExited) = ( (* Test the tag bits and skip to the long case if either is clear. *) List.rev(codeConditionRev(shortCond, context, false, startLong, [])) @ (* Try evaluating as fixed precision and jump if we get an overflow. *) codeFixedPrecisionArith(oper, arg1, arg2, context, condResult, jumpOnOverflow) @ (* If we haven't had an overflow jump to the result. *) [BlockFlow(Unconditional resultLabel), (* If we need to use the full long-precision call we come here. *) BlockLabel startLong] @ longCode @ [BlockLabel resultLabel, BlockSimple(LoadArgument{source=RegisterArgument condResult, dest=target, kind=movePolyWord})], RegisterArgument target, false) in (revApp(code, tailCode), dest, haveExited) end | codeToICodeRev(BICAllocateWordMemory instr, context, isTail, destination, tailCode) = let val (code, dest, haveExited) = codeToICodeAllocate(instr, context, isTail, destination) in (revApp(code, tailCode), dest, haveExited) end | codeToICodeRev(BICLambda(lambda as { closure = [], ...}), _, _, destination, tailCode) = (* Empty closure - create a constant closure for any recursive calls. *) let val closure = makeConstantClosure() val () = codeFunctionToX86(lambda, debugSwitches, closure) (* Return the closure itself as the value. *) in moveIfNotAllowedRev(destination, tailCode, AddressConstant(closureAsAddress closure)) end | codeToICodeRev(BICLambda(lambda as { closure, ...}), context, isTail, destination, tailCode) = (* Non-empty closure. Ignore stack closure option at the moment. *) let val closureRef = makeConstantClosure() val () = codeFunctionToX86(lambda, debugSwitches, closureRef) in if targetArch = ObjectId32Bit then let val target = asTarget destination val memAddr = newPReg() fun loadFields([], n, tlCode) = let val codeReg = newUReg() val closureReg = newPReg() in (* The code address occupies the first native word but we need to extract it at run-time. We don't currently have a way to have 64-bit constants. *) BlockSimple( StoreArgument{ source=RegisterArgument codeReg, offset=0, base=memAddr, index=ObjectIndex, kind=moveNativeWord, isMutable=false}) :: BlockSimple(LoadArgument{ source=MemoryLocation{offset=0, base=closureReg, index=ObjectIndex, cache=NONE}, dest=codeReg, kind=Move64Bit}) :: BlockSimple(LoadArgument{ source=AddressConstant(toMachineWord closureRef), dest=closureReg, kind=movePolyWord}) :: BlockSimple(AllocateMemoryOperation{size=n, flags=F_closure, dest=memAddr, saveRegs=[]}) :: tlCode end | loadFields(f :: rest, n, tlCode) = let (* Defer the evaluation if possible. We may have a constant that we can't move directly but it's better to load it after the allocation otherwise we will have to push the register if we need to GC. *) val (code1, source1, _) = codeToICodeRev(BICExtract f, context, false, Allowed allowDefer, tlCode) val restAndAlloc = loadFields(rest, n+1, code1) val (code2, source, _) = moveIfNotAllowedRev(Allowed allowInMemMove, restAndAlloc, source1) val storeValue = BlockSimple(StoreArgument{ source=source, offset=n*Word.toInt wordSize, base=memAddr, index=ObjectIndex, kind=movePolyWord, isMutable=false}) in storeValue :: code2 end val code = BlockSimple InitialisationComplete :: BlockSimple(LoadArgument{source=RegisterArgument memAddr, dest=target, kind=movePolyWord}) :: loadFields(closure, 2, tailCode) in (code, RegisterArgument target, false) end (* Treat it as a tuple with the code as the first field. *) else codeToICodeRev(BICTuple(BICConstnt(codeAddressFromClosure closureRef, []) :: map BICExtract closure), context, isTail, destination, tailCode) end | codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, NoResult, tailCode) = let (* If we don't want the result but are only evaluating for side-effects we may be able to optimise special cases. This was easier in the forward case but for now we don't bother and leave it to the lower levels. *) val startElse = newLabel() and skipElse = newLabel() val codeTest = codeConditionRev(test, context, false, startElse, tailCode) val (codeThen, _, _) = codeToICodeRev(thenPt, context, isTail, NoResult, codeTest) val (codeElse, _, _) = codeToICodeRev(elsePt, context, isTail, NoResult, BlockLabel startElse :: BlockFlow(Unconditional skipElse) :: codeThen) in (BlockLabel skipElse :: codeElse, (* Unit result *) IntegerConstant(tag 0), false) end | codeToICodeRev(BICCond(test, thenPt, elsePt), context, isTail, destination, tailCode) = let (* Because we may push the result onto the stack we have to create a new preg to hold the result and then copy that to the final result. *) (* If this is a tail each arm will exit separately and neither will return a result. *) val target = asTarget destination val condResult = newMergeReg() val thenTarget = if isTail then newPReg() else condResult val startElse = newLabel() val testCode = codeConditionRev(test, context, false, startElse, tailCode) (* Put the result in the target register. *) val (thenCode, _, thenExited) = codeToICodeRev(thenPt, context, isTail, SpecificPReg thenTarget, testCode) (* Add a jump round the else-part except that if this is a tail we return. The then-part could have exited e.g. with a raise or a loop. *) val (exitThen, thenLabel, elseTarget) = if thenExited then (thenCode, [], target (* Can use original target. *)) else if isTail then (returnInstruction(context, thenTarget, thenCode), [], newPReg()) else let val skipElse = newLabel() in (BlockFlow(Unconditional skipElse) :: thenCode, [BlockSimple(LoadArgument{source=RegisterArgument condResult, dest=target, kind=movePolyWord}), BlockLabel skipElse], condResult) end val (elseCode, _, elseExited) = codeToICodeRev(elsePt, context, isTail, SpecificPReg elseTarget, BlockLabel startElse :: exitThen) (* Add a return to the else-part if necessary so we will always exit on a tail. *) val exitElse = if isTail andalso not elseExited then returnInstruction(context, elseTarget, elseCode) else elseCode in (thenLabel @ exitElse, RegisterArgument target, isTail orelse thenExited andalso elseExited) end | codeToICodeRev(BICCase { cases, test, default, isExhaustive, firstIndex}, context, isTail, destination, tailCode) = let (* We have to create a new preg for the result in case we need to push it to the stack. *) val targetReg = newMergeReg() local val initialTestReg = newPReg() val (testCode, _, _) = codeToICodeRev(test, context, false, SpecificPReg initialTestReg, tailCode) (* Subtract the minimum value so the value we're testing is always in the range of (tagged) 0 to the maximum. It is possible to adjust the value when computing the index but that can lead to overflows during compilation if the minimum is very large or small. We can ignore overflow and allow values to wrap round. *) in val (testCode, testReg) = if firstIndex = 0w0 then (testCode, initialTestReg) else let val newTestReg = newPReg() val subtract = BlockSimple(ArithmeticFunction{oper=SUB, resultReg=newTestReg, operand1=initialTestReg, operand2=IntegerConstant(semitag(Word.toLargeInt firstIndex)), ccRef=newCCRef(), opSize=polyWordOpSize}) in (subtract :: testCode, newTestReg) end end val workReg = newPReg() (* Unless this is exhaustive we need to add a range check. *) val (rangeCheck, extraDefaults) = if isExhaustive then (testCode, []) else let val defLab1 = newLabel() val tReg1 = newPReg() val ccRef1 = newCCRef() (* Since we've subtracted any minimum we only have to check whether the value is greater (unsigned) than the maximum. *) val numberOfCases = LargeInt.fromInt(List.length cases) val continueLab = newLabel() val testCode2 = BlockLabel continueLab :: BlockFlow(Conditional{ccRef=ccRef1, condition=JNB, trueJump=defLab1, falseJump=continueLab}) :: BlockSimple(WordComparison{arg1=tReg1, arg2=IntegerConstant(tag numberOfCases), ccRef=ccRef1, opSize=polyWordOpSize}) :: BlockSimple(LoadArgument {source=RegisterArgument testReg, dest=tReg1, kind=movePolyWord}) :: testCode in (testCode2, [defLab1]) end (* Make a label for each item in the list. *) val codeLabels = map (fn _ => newLabel()) cases (* Create an exit label in case it's needed. *) val labelForExit = if isTail then ~1 (* Illegal label. *) else newLabel() (* Generate the code for each of the cases and the default. We need to put an unconditional branch after each to skip the other cases. *) fun codeCases (SOME c :: otherCases, startLabel :: otherLabels, tailCode) = let val caseTarget = if isTail then newPReg() else targetReg (* Put in the case with a jump to the end of the sequence. *) val (codeThisCase, _, caseExited) = codeToICodeRev(c, context, isTail, SpecificPReg caseTarget, BlockLabel startLabel :: tailCode) val exitThisCase = if caseExited then codeThisCase else if isTail then returnInstruction(context, caseTarget, codeThisCase) else BlockFlow(Unconditional labelForExit) :: codeThisCase in codeCases(otherCases, otherLabels, exitThisCase) end | codeCases(NONE :: otherCases, _ :: otherLabels, tailCode) = codeCases(otherCases, otherLabels, tailCode) | codeCases ([], [], tailCode) = let (* We need to add labels for all the gaps we filled and also for a "default" label for the indexed-case instruction itself as well as any range checks. *) fun addDefault (startLabel, NONE, l) = BlockLabel startLabel :: l | addDefault (_, SOME _, l) = l fun asForward l = BlockLabel l val dLabs = map asForward extraDefaults @ tailCode val defLabels = ListPair.foldlEq addDefault dLabs (codeLabels, cases) val defaultTarget = if isTail then newPReg() else targetReg val (defaultCode, _, defaultExited) = codeToICodeRev(default, context, isTail, SpecificPReg defaultTarget, defLabels) in (* Put in the default. Because this is the last we don't need to jump round it. However if this is a tail and we haven't exited we put in a return. That way the case will always have exited if this is a tail. *) if isTail andalso not defaultExited then returnInstruction(context, defaultTarget, defaultCode) else defaultCode end | codeCases _ = raise InternalError "codeCases: mismatch" val codedCases = codeCases(cases, codeLabels, BlockFlow(IndexedBr codeLabels) :: BlockSimple(IndexedCaseOperation{testReg=testReg, workReg=workReg}) :: rangeCheck) (* We can now copy to the target. If we need to push the result this load will be converted into a push. *) val target = asTarget destination val copyToTarget = if isTail then codedCases else BlockSimple(LoadArgument{source=RegisterArgument targetReg, dest=target, kind=movePolyWord}) :: BlockLabel labelForExit :: codedCases in (copyToTarget, RegisterArgument target, isTail (* We have always exited on a tail. *)) end | codeToICodeRev(BICBeginLoop {loop, arguments}, context as { stackPtr, currHandler, overflowBlock, ...}, isTail, destination, tailCode) = let val target = asTarget destination fun codeArgs ([], tailCode) = ([], tailCode) | codeArgs (({value, addr}, _) :: rest, tailCode) = let val pr = newPReg() val () = Array.update(locToPregArray, addr, PregLocation pr) val (code, _, _) = codeToICodeRev(value, context, false, SpecificPReg pr, tailCode) val (pregs, othercode) = codeArgs(rest, code) in (pr::pregs, othercode) end val (loopRegs, argCode) = codeArgs(arguments, tailCode) val loopLabel = newLabel() val (loopBody, _, loopExited) = codeToICodeRev(loop, {loopArgs=SOME (loopRegs, loopLabel, stackPtr), stackPtr=stackPtr, currHandler=currHandler, overflowBlock=overflowBlock }, isTail, SpecificPReg target, BlockLabel loopLabel :: BlockSimple BeginLoop :: argCode) in (loopBody, RegisterArgument target, loopExited) end | codeToICodeRev(BICLoop args, context as {loopArgs=SOME (loopRegs, loopLabel, loopSp), stackPtr, currHandler, ...}, _, destination, tailCode) = let val target = asTarget destination (* Registers to receive the evaluated arguments. We can't put the values into the loop variables yet because the values could depend on the current values of the loop variables. *) val argPRegs = map(fn _ => newPReg()) args val codeArgs = ListPair.foldlEq(fn ((arg, _), pr, l) => #1 (codeToICodeRev(arg, context, false, SpecificPReg pr, l))) tailCode (args, argPRegs) val jumpArgs = ListPair.mapEq(fn (s, l) => (RegisterArgument s, l)) (argPRegs, loopRegs) (* If we've allocated a container in the loop we have to remove it before jumping back. *) val stackReset = if loopSp = stackPtr then codeArgs else BlockSimple(ResetStackPtr{numWords=stackPtr-loopSp, preserveCC=false}) :: codeArgs val jumpLoop = JumpLoop{regArgs=jumpArgs, stackArgs=[], checkInterrupt=SOME[], workReg=NONE} (* "checkInterrupt" could result in a Interrupt exception so we treat this like a function call. *) val code = case currHandler of NONE => BlockFlow(Unconditional loopLabel) :: BlockSimple jumpLoop :: stackReset | SOME h => BlockOptionalHandle{call=jumpLoop, handler=h, label=loopLabel} :: stackReset in (code, RegisterArgument target, true) end | codeToICodeRev(BICLoop _, {loopArgs=NONE, ...}, _, _, _) = raise InternalError "BICLoop without BICBeginLoop" | codeToICodeRev(BICRaise exc, context as { currHandler, ...}, _, destination, tailCode) = let val packetReg = newPReg() val (code, _, _) = codeToICodeRev(exc, context, false, SpecificPReg packetReg, tailCode) val raiseCode = RaiseExceptionPacket{packetReg=packetReg} val block = case currHandler of NONE => BlockExit raiseCode | SOME h => BlockRaiseAndHandle(raiseCode, h) in (block :: code, RegisterArgument(asTarget destination), true (* Always exits *)) end | codeToICodeRev(BICHandle{exp, handler, exPacketAddr}, context as { stackPtr, loopArgs, overflowBlock, ... }, isTail, destination, tailCode) = let (* As with BICCond and BICCase we need to create a new register for the result in case we need to push it to the stack. *) val handleResult = newMergeReg() val handlerLab = newLabel() and startHandling = newLabel() val (bodyTarget, handlerTarget) = if isTail then (newPReg(), newPReg()) else (handleResult, handleResult) (* TODO: Even if we don't actually want a result we force one in here by using "asTarget". *) (* The expression cannot be treated as a tail because the handler has to be removed after. It may "exit" if it has raised an unconditional exception. If it has we mustn't generate a PopExceptionHandler because there won't be any result for resultReg. We need to add two words to the stack to account for the items pushed by PushExceptionHandler. We create an instruction to push the handler followed by a block fork to the start of the code and, potentially the handler, then a label to start the code that the handler is in effect for. *) val initialCode = BlockLabel startHandling :: BlockFlow(SetHandler{handler=handlerLab, continue=startHandling}) :: BlockSimple(PushExceptionHandler{workReg=newPReg()}) :: tailCode val (expCode, _, expExit) = codeToICodeRev(exp, {stackPtr=stackPtr+2, loopArgs=loopArgs, currHandler=SOME handlerLab, overflowBlock=overflowBlock}, false (* Not tail *), SpecificPReg bodyTarget, initialCode) (* If this is the tail we can replace the jump at the end of the handled code with returns. If the handler has exited we don't need a return there. Otherwise we need to add an unconditional jump to skip the handler. *) val (atExpEnd, skipExpLabel) = case (isTail, expExit) of (true, true) => (* Tail and exited. *) (expCode, NONE) | (true, false) => (* Tail and not exited. *) (returnInstruction(context, bodyTarget, BlockSimple(PopExceptionHandler{workReg=newPReg()}) :: expCode), NONE) | (false, true) => (* Not tail but exited. *) (expCode, NONE) | (false, false) => let val skipHandler = newLabel() in (BlockFlow(Unconditional skipHandler) :: BlockSimple(PopExceptionHandler{workReg=newPReg()}) :: expCode, SOME skipHandler) end (* Make a register to hold the exception packet and put eax into it. *) val packetAddr = newPReg() val () = Array.update(locToPregArray, exPacketAddr, PregLocation packetAddr) val (handleCode, _, handleExit) = codeToICodeRev(handler, context, isTail, SpecificPReg handlerTarget, BlockSimple(BeginHandler{workReg=newPReg(), packetReg=packetAddr}) :: BlockLabel handlerLab :: atExpEnd) val target = asTarget destination val afterHandler = case (isTail, handleExit) of (true, true) => (* Tail and exited. *) handleCode | (true, false) => (* Tail and not exited. *) returnInstruction(context, handlerTarget, handleCode) | (false, _) => (* Not tail. *) handleCode val addLabel = case skipExpLabel of SOME lab => BlockLabel lab:: afterHandler | NONE => afterHandler in (BlockSimple(LoadArgument{source=RegisterArgument handleResult, dest=target, kind=movePolyWord}) :: addLabel, RegisterArgument target, isTail) end | codeToICodeRev(BICTuple fields, context, _, destination, tailCode) = let (* TODO: This is a relic of the old fall-back code-generator. It required the result of a tuple to be at the top of the stack. It should be changed. *) val target = asTarget destination (* Actually we want this. *) val memAddr = newPReg() fun loadFields([], n, tlCode) = allocateWithProfileRev(n, 0w0, memAddr, tlCode) | loadFields(f :: rest, n, tlCode) = let (* Defer the evaluation if possible. We may have a constant that we can't move directly but it's better to load it after the allocation otherwise we will have to push the register if we need to GC. *) val (code1, source1, _) = codeToICodeRev(f, context, false, Allowed allowDefer, tlCode) val restAndAlloc = loadFields(rest, n+1, code1) val (code2, source, _) = moveIfNotAllowedRev(Allowed allowInMemMove, restAndAlloc, source1) val storeValue = BlockSimple(StoreArgument{ source=source, offset=n*Word.toInt wordSize, base=memAddr, index=memIndexOrObject, kind=movePolyWord, isMutable=false}) in storeValue :: code2 end val code = BlockSimple InitialisationComplete :: BlockSimple(LoadArgument{source=RegisterArgument memAddr, dest=target, kind=movePolyWord}) :: loadFields(fields, 0, tailCode) in (code, RegisterArgument target, false) end (* Copy the source tuple into the container. There are important special cases for both the source tuple and the container. If the source tuple is a BICTuple we have the fields and can store them without creating a tuple on the heap. If the destination is a local container we can store directly into the stack. *) | codeToICodeRev(BICSetContainer{container, tuple, filter}, context as {stackPtr, ...}, _, destination, tailCode) = let local fun createStore containerReg (source, destWord) = StoreArgument{source=source, offset=destWord*Word.toInt nativeWordSize, base=containerReg, index=NoMemIndex, kind=moveNativeWord, isMutable=false} in val findContainer = case container of BICExtract(BICLoadLocal l) => ( case Array.sub(locToPregArray, l) of ContainerLocation{container, stackOffset} => let fun storeToStack(source, destWord) = StoreToStack{source=source, container=container, field=destWord, stackOffset=stackPtr-stackOffset+destWord} in SOME storeToStack end | _ => NONE ) | _ => NONE val (codeContainer, storeInstr) = case findContainer of SOME storeToStack => (tailCode, storeToStack) | NONE => let val containerTarget = newPReg() val (codeContainer, _, _) = codeToICodeRev(container, context, false, SpecificPReg containerTarget, tailCode) in (codeContainer, createStore containerTarget) end end val filterLength = BoolVector.length filter val code = case tuple of BICTuple cl => let (* In theory it's possible that the tuple could contain fields that are not used but nevertheless need to be evaluated for their side-effects. Create all the fields and push to the stack. *) fun codeField(arg, (regs, tailCode)) = let val (c, r, _) = codeToICodeRev(arg, context, false, Allowed allowInMemMove, tailCode) in (r :: regs, c) end val (pregsRev, codeFields) = List.foldl codeField ([], codeContainer) cl val pregs = List.rev pregsRev fun copyField(srcReg, (sourceWord, destWord, tailCode)) = if sourceWord < filterLength andalso BoolVector.sub(filter, sourceWord) then (sourceWord+1, destWord+1, BlockSimple(storeInstr(srcReg, destWord)) :: tailCode) else (sourceWord+1, destWord, tailCode) val (_, _, resultCode) = List.foldl copyField (0, 0, codeFields) pregs in resultCode end | tuple => let (* Copy a heap tuple. It is possible that this is another container in which case we must load the fields directly. We mustn't load its address and then copy because loading the address would be the last reference and might cause the container to be reused prematurely. *) val findContainer = case tuple of BICExtract(BICLoadLocal l) => ( case Array.sub(locToPregArray, l) of ContainerLocation{container, stackOffset} => let fun getAddr sourceWord = StackLocation{wordOffset=stackPtr-stackOffset+sourceWord, container=container, field=sourceWord, cache=NONE} in SOME getAddr end | _ => NONE ) | _ => NONE val (codeTuple, loadField) = case findContainer of SOME getAddr => (codeContainer, getAddr) | NONE => let val tupleTarget = newPReg() val (codeTuple, _, _) = codeToICodeRev(tuple, context, false, SpecificPReg tupleTarget, codeContainer) fun loadField sourceWord = wordOffsetAddress(sourceWord, tupleTarget) in (codeTuple, loadField) end fun copyContainer(sourceWord, destWord, tailCode) = if sourceWord = filterLength then tailCode else if BoolVector.sub(filter, sourceWord) then let val loadReg = newPReg() val code = BlockSimple(storeInstr(RegisterArgument loadReg, destWord)) :: BlockSimple(LoadArgument{source=loadField sourceWord, dest=loadReg, kind=movePolyWord}) :: tailCode in copyContainer(sourceWord+1, destWord+1, code) end else copyContainer(sourceWord+1, destWord, tailCode) in copyContainer(0, 0, codeTuple) end in moveIfNotAllowedRev(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeRev(BICTagTest{test, tag=tagValue, ...}, context, _, destination, tailCode) = (* Check the "tag" word of a union (datatype). N.B. Not the same as testing the tag bit of a word. *) let val ccRef = newCCRef() val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true } val (testCode, tagArg, _) = codeToICodeRev(test, context, false, Allowed memOrReg, tailCode) val target = asTarget destination in (makeBoolResultRev(JE, ccRef, target, (* Use CompareLiteral because the tag must fit in 32-bits. *) BlockSimple(CompareLiteral{arg1=tagArg, arg2=tag(Word.toLargeInt tagValue), opSize=polyWordOpSize, ccRef=ccRef}) :: testCode), RegisterArgument target, false) end | codeToICodeRev(BICLoadOperation instr, context, isTail, destination, tailCode) = let val (code, dest, haveExited) = codeToICodeLoad(instr, context, isTail, destination) in (revApp(code, tailCode), dest, haveExited) end | codeToICodeRev(BICStoreOperation instr, context, isTail, destination, tailCode) = let val (code, dest, haveExited) = codeToICodeStore(instr, context, isTail, destination) in (revApp(code, tailCode), dest, haveExited) end | codeToICodeRev(BICBlockOperation ({kind=BlockOpEqualByte, sourceLeft, destRight, length}), context, _, destination, tailCode) = let val vec1Reg = newUReg() and vec2Reg = newUReg() val ccRef = newCCRef() val (leftCode, leftUntag, {base=leftBase, offset=leftOffset, index=leftIndex, ...}) = codeAddressRev(sourceLeft, true, context, tailCode) val (rightCode, rightUntag, {base=rightBase, offset=rightOffset, index=rightIndex, ...}) = codeAddressRev(destRight, true, context, leftCode) val (lengthCode, lengthUntag, lengthArg) = codeAsUntaggedToRegRev(length, false (* unsigned *), context, rightCode) val target = asTarget destination val code = makeBoolResultRev(JE, ccRef, target, BlockSimple(CompareByteVectors{ vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lengthArg, ccRef=ccRef }) :: lengthUntag @ BlockSimple(loadAddress{base=rightBase, offset=rightOffset, index=rightIndex, dest=vec2Reg}) :: rightUntag @ BlockSimple(loadAddress{base=leftBase, offset=leftOffset, index=leftIndex, dest=vec1Reg}) :: leftUntag @ lengthCode) in (code, RegisterArgument target, false) end | codeToICodeRev(BICBlockOperation instr, context, isTail, destination, tailCode) = let val (code, dest, haveExited) = codeToICodeBlock(instr, context, isTail, destination) in (revApp(code, tailCode), dest, haveExited) end and codeToICodeUnaryRev({oper=BuiltIns.NotBoolean, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val ccRef = newCCRef() val allow = Allowed {anyConstant=false, const32s=false, memAddr=true, existingPreg=true} val (argCode, testDest, _) = codeToICodeRev(arg1, context, false, allow, tailCode) in (* Test the argument and return a boolean result. If either the argument is a condition or the result is used in a test this will be better than using XOR. *) (makeBoolResultRev(JNE, ccRef, target, BlockSimple(CompareLiteral{arg1=testDest, arg2=tag 1, opSize=polyWordOpSize, ccRef=ccRef}) :: argCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.IsTaggedValue, arg1}, context, _, destination, tailCode) = let val ccRef = newCCRef() val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true } val (testCode, testResult, _) = codeToICodeRev(arg1, context, false, Allowed memOrReg, tailCode) (* Test the tag bit. This sets the zero bit if the value is untagged. *) val target = asTarget destination in (makeBoolResultRev(JNE, ccRef, target, BlockSimple(TestTagBit{arg=testResult, ccRef=ccRef}) :: testCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.MemoryCellLength, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val argReg1 = newUReg() and argReg2 = newUReg() and argReg3 = newUReg() (* These are untagged until the tag is put in. *) and ccRef1 = newCCRef() and ccRef2 = newCCRef() and ccRef3 = newCCRef() (* Get the length of a memory cell (heap object). We need to mask out the top byte containing the flags and to tag the result. The mask is 56 bits on 64-bit which won't fit in an inline constant. Since we have to shift it anyway we might as well do this by shifts. *) val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) in (BlockSimple(ArithmeticFunction{oper=OR, resultReg=target, operand1=argReg3, operand2=IntegerConstant 1, ccRef=ccRef3, opSize=polyWordOpSize}) :: BlockSimple(ShiftOperation{shift=SHR, resultReg=argReg3, operand=argReg2, shiftAmount=IntegerConstant 7 (* 8-tagshift*), ccRef=ccRef2, opSize=polyWordOpSize }) :: BlockSimple(ShiftOperation{shift=SHL, resultReg=argReg2, operand=argReg1, shiftAmount=IntegerConstant 8, ccRef=ccRef1, opSize=polyWordOpSize }) :: BlockSimple(LoadArgument{source=wordOffsetAddress(~1, addrReg), dest=argReg1, kind=movePolyWord}) :: argCode, RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.MemoryCellFlags, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val argReg1 = newUReg() val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) in (BlockSimple(TagValue{ source=argReg1, dest=target, isSigned=false, opSize=OpSize32 }) :: BlockSimple(LoadArgument{source=MemoryLocation{offset= ~1, base=addrReg, index=memIndexOrObject, cache=NONE}, dest=argReg1, kind=MoveByte}) :: argCode, RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.ClearMutableFlag, arg1}, context, _, destination, tailCode) = let val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) in moveIfNotAllowedRev(destination, BlockSimple(LockMutable{addr=addrReg}) :: argCode, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeUnaryRev({oper=BuiltIns.LongWordToTagged, arg1}, context, _, destination, tailCode) = let (* This is exactly the same as StringLengthWord at the moment. TODO: introduce a new ICode entry so that the next stage can optimise longword operations. *) val target = asTarget destination val argReg1 = newUReg() val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) val code = BlockSimple(TagValue{ source=argReg1, dest=target, isSigned=false, opSize=polyWordOpSize }) :: (* Use movePolyWord even on 32-in-64 since we're producing a 32-bit value anyway. *) BlockSimple(LoadArgument{source=wordAt addrReg, dest=argReg1, kind=movePolyWord}) :: argCode in (code, RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.SignedToLongWord, arg1}, context, _, destination, tailCode) = let val addrReg = newPReg() and untagArg = newUReg() val (argCode, argReg1) = codeToPRegRev(arg1, context, tailCode) val (signExtend, sxReg) = case targetArch of ObjectId32Bit => let val sReg = newUReg() in ([BlockSimple(SignExtend32To64{source=RegisterArgument argReg1, dest=sReg})], sReg) end | _ => ([], argReg1) val code = BlockSimple(BoxValue{boxKind=BoxLargeWord, source=untagArg, dest=addrReg, saveRegs=[]}) :: BlockSimple(UntagValue{source=sxReg, dest=untagArg, isSigned=true, cache=NONE, opSize=nativeWordOpSize}) :: signExtend @ argCode in moveIfNotAllowedRev(destination, code, RegisterArgument addrReg) end | codeToICodeUnaryRev({oper=BuiltIns.UnsignedToLongWord, arg1}, context, _, destination, tailCode) = let val addrReg = newPReg() and untagArg = newUReg() val (argCode, argReg1) = codeToPRegRev(arg1, context, tailCode) val code = BlockSimple(BoxValue{boxKind=BoxLargeWord, source=untagArg, dest=addrReg, saveRegs=[]}) :: (* We can just use a polyWord operation to untag the unsigned value. *) BlockSimple(UntagValue{source=argReg1, dest=untagArg, isSigned=false, cache=NONE, opSize=polyWordOpSize}) :: argCode in moveIfNotAllowedRev(destination, code, RegisterArgument addrReg) end | codeToICodeUnaryRev({oper=BuiltIns.RealNeg precision, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val fpRegSrc = newUReg() and fpRegDest = newUReg() and sse2ConstReg = newUReg() (* The SSE2 code uses an SSE2 logical operation to flip the sign bit. This requires the values to be loaded into registers first because the logical operations require 128-bit operands. *) val (argCode, aReg1) = codeToPReg(arg1, context) (* Double precision values are always boxed and single precision values if they won't fit in a word. Otherwise we can using tagging. *) open BuiltIns val load = if precision = PrecDouble then BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveDouble}) else if wordSize = 0w8 then BlockSimple(UntagFloat{source=RegisterArgument aReg1, dest=fpRegSrc, cache=NONE}) else BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveFloat}) val code = case fpMode of FPModeX87 => [BlockSimple(X87FPUnaryOps{ fpOp=FCHS, dest=fpRegDest, source=fpRegSrc})] | FPModeSSE2 => let (* In single precision mode the sign bit is in the low 32-bits. There may be a better way to load it. *) val signBit = if precision = PrecDouble then realSignBit else floatSignBit in [BlockSimple(LoadArgument{source=AddressConstant signBit, dest=sse2ConstReg, kind=MoveDouble}), BlockSimple(SSE2FPBinary{opc=SSE2BXor, resultReg=fpRegDest, arg1=fpRegSrc, arg2=RegisterArgument sse2ConstReg})] end val result = boxOrTagReal(fpRegDest, target, precision) in (revApp(argCode @ load :: code @ result, tailCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.RealAbs precision, arg1}, context, _, destination, tailCode) = let val target = asTarget destination val fpRegSrc = newUReg() and fpRegDest = newUReg() and sse2ConstReg = newUReg() val (argCode, aReg1) = codeToPReg(arg1, context) open BuiltIns val load = if precision = PrecDouble then BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveDouble}) else if wordSize = 0w8 then BlockSimple(UntagFloat{source=RegisterArgument aReg1, dest=fpRegSrc, cache=NONE}) else BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpRegSrc, kind=MoveFloat}) val code = case fpMode of FPModeX87 => [BlockSimple(X87FPUnaryOps{ fpOp=FABS, dest=fpRegDest, source=fpRegSrc})] | FPModeSSE2 => let val mask = if precision = PrecDouble then realAbsMask else floatAbsMask in [BlockSimple(LoadArgument{source=AddressConstant mask, dest=sse2ConstReg, kind=MoveDouble}), BlockSimple(SSE2FPBinary{opc=SSE2BAnd, resultReg=fpRegDest, arg1=fpRegSrc, arg2=RegisterArgument sse2ConstReg})] end val result = boxOrTagReal(fpRegDest, target, precision) in (revApp(argCode @ load :: code @ result, tailCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.RealFixedInt precision, arg1}, context, _, destination, tailCode) = let open BuiltIns val target = asTarget destination val untagReg = newUReg() and fpReg = newUReg() val (argCode, aReg1) = codeToPReg(arg1, context) val floatOp = case fpMode of FPModeX87 => X87Float{ dest=fpReg, source=RegisterArgument untagReg} | FPModeSSE2 => SSE2IntToReal{ dest=fpReg, source=RegisterArgument untagReg, isDouble=precision=PrecDouble} val code = argCode @ [BlockSimple(UntagValue{source=aReg1, dest=untagReg, isSigned=true, cache=NONE, opSize=polyWordOpSize}), BlockSimple floatOp] @ boxOrTagReal(fpReg, target, precision) in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.FloatToDouble, arg1}, context, _, destination, tailCode) = let (* Convert a single precision floating point value to double precision. *) val target = asTarget destination val fpReg = newUReg() and fpReg2 = newUReg() val (argCode, aReg1) = codeToPReg(arg1, context) (* MoveFloat always converts from single to double-precision. *) val unboxOrUntag = case (fpMode, wordSize) of (FPModeX87, _) => [BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpReg2, kind=MoveFloat})] | (FPModeSSE2, 0w4) => [BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpReg, kind=MoveFloat}), BlockSimple(SSE2FPUnary{opc=SSE2UFloatToDouble, resultReg=fpReg2, source=RegisterArgument fpReg})] | (FPModeSSE2, _) => [BlockSimple(UntagFloat{source=RegisterArgument aReg1, dest=fpReg, cache=NONE}), BlockSimple(SSE2FPUnary{opc=SSE2UFloatToDouble, resultReg=fpReg2, source=RegisterArgument fpReg})] val boxFloat = case fpMode of FPModeX87 => BoxX87Double | FPModeSSE2 => BoxSSE2Double val code = argCode @ unboxOrUntag @ [BlockSimple(BoxValue{boxKind=boxFloat, source=fpReg2, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.DoubleToFloat, arg1}, context, _, destination, tailCode) = let (* Convert a double precision value to a single precision using the current rounding mode. This is simpler than setting the rounding mode and then restoring it. *) val target = asTarget destination val fpReg = newUReg() and fpReg2 = newUReg() val (argCode, aReg1) = codeToPReg(arg1, context) (* In 32-bit mode we need to box the float. In 64-bit mode we can tag it. *) val boxOrTag = case fpMode of FPModeX87 => [BlockSimple(BoxValue{boxKind=BoxX87Float, source=fpReg, dest=target, saveRegs=[]})] | FPModeSSE2 => BlockSimple(SSE2FPUnary{opc=SSE2UDoubleToFloat, resultReg=fpReg2, source=RegisterArgument fpReg}) :: boxOrTagReal(fpReg2, target, BuiltIns.PrecSingle) val code = argCode @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=fpReg, kind=MoveDouble})] @ boxOrTag in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.RealToInt(precision, rndMode), arg1}, context, _, destination, tailCode) = let val target = asTarget destination val chkOverflow = newCCRef() val convResult = newUReg() and wrkReg2 = newUReg() (* Convert a floating point value to an integer. We need to raise overflow if the result is out of range. We first convert the value to 32/64 bits then tag it. An overflow can happen either because the real number does not fit in 32/64 bits or if it is not a 31/63 bit value. Fortunately, if the first conversion fails the result is a value that causes an overflow when we try it shift it so the check for overflow only needs to happen there. There is an SSE2 instruction that implements truncation (round to zero) directly but in other cases we need to set the rounding mode. *) val doConvert = case (fpMode, precision) of (FPModeX87, _) => let val fpReg = newUReg() val (argCode, aReg) = codeToPReg(arg1, context) fun doConvert() = [BlockSimple(X87RealToInt{source=fpReg, dest=convResult })] in argCode @ [BlockSimple(LoadArgument{source=wordAt aReg, dest=fpReg, kind=MoveDouble})] @ setAndRestoreRounding(rndMode, doConvert) end | (FPModeSSE2, BuiltIns.PrecDouble) => let val (argCode, argReg) = codeToPReg(arg1, context) fun doConvert() = [BlockSimple( SSE2RealToInt{source=wordAt argReg, dest=convResult, isDouble=true, isTruncate = rndMode = IEEEReal.TO_ZERO }) ] in argCode @ ( case rndMode of IEEEReal.TO_ZERO => doConvert() | _ => setAndRestoreRounding(rndMode, doConvert)) end | (FPModeSSE2, BuiltIns.PrecSingle) => let val (argCode, aReg) = codeToPReg(arg1, context) val fpReg = newUReg() fun doConvert() = [BlockSimple( SSE2RealToInt{source=RegisterArgument fpReg, dest=convResult, isDouble=false, isTruncate = rndMode = IEEEReal.TO_ZERO })] in argCode @ [BlockSimple(UntagFloat{source=RegisterArgument aReg, dest=fpReg, cache=NONE})] @ ( case rndMode of IEEEReal.TO_ZERO => doConvert() | _ => setAndRestoreRounding(rndMode, doConvert) ) end val checkAndTag = BlockSimple(ShiftOperation{ shift=SHL, resultReg=wrkReg2, operand=convResult, shiftAmount=IntegerConstant 1, ccRef=chkOverflow, opSize=polyWordOpSize}) :: checkOverflow context chkOverflow @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=wrkReg2, operand2=IntegerConstant 1, ccRef = newCCRef(), opSize=polyWordOpSize})] in (revApp(doConvert @ checkAndTag, tailCode), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.TouchAddress, arg1}, context, _, destination, tailCode) = let (* Put the value in a register. This is not entirely necessary but ensures that if the value is a constant the constant will be included in the code. *) val (argCode, aReg) = codeToPRegRev(arg1, context, tailCode) in moveIfNotAllowedRev(destination, BlockSimple(TouchArgument{source=aReg}) :: argCode, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeUnaryRev({oper=BuiltIns.AllocCStack, arg1}, context, _, destination, tailCode) = (* Allocate space on the C stack. Assumes that the argument has already been aligned. *) let val target = asTarget destination val (argCode, untaggedArg) = case arg1 of BICConstnt(value, _) => (tailCode, IntegerConstant(Word.toLargeInt(toShort value)) (* Leave untagged *)) | _ => let val (argCode, aReg) = codeToPRegRev(arg1, context, tailCode) val arg1Untagged = newUReg() in ( BlockSimple(UntagValue{source=aReg, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}) :: argCode, RegisterArgument arg1Untagged ) end val argReg1 = newUReg() and resReg1 = newUReg() val code = BlockSimple(BoxValue{boxKind=BoxLargeWord, source=resReg1, dest=target, saveRegs=[]}) :: BlockSimple(StoreMemReg{offset=memRegCStackPtr, source=resReg1, kind=moveNativeWord}) :: BlockSimple(ArithmeticFunction{oper=SUB, resultReg=resReg1, operand1=argReg1, operand2=untaggedArg, ccRef=newCCRef(), opSize=nativeWordOpSize}) :: BlockSimple(LoadMemReg{offset=memRegCStackPtr, dest=argReg1, kind=moveNativeWord}) :: argCode in (code, RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.LockMutex, arg1}, context, _, destination, tailCode) = let (* Temporarily don't bother with the spin-lock. *) val incrReg = newUReg() and resultReg = newUReg() and ccRef = newCCRef() val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) val target = asTarget destination val code = BlockSimple(CompareLiteral{arg1=RegisterArgument resultReg, arg2=0, opSize=nativeWordOpSize, ccRef=ccRef }) :: BlockSimple(AtomicExchangeAndAdd{base=addrReg, source=incrReg, resultReg=resultReg}) :: BlockSimple(LoadArgument{source=IntegerConstant 1, dest=incrReg, kind=moveNativeWord}) :: argCode in (makeBoolResultRev(JE, ccRef, target, code), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.TryLockMutex, arg1}, context, _, destination, tailCode) = let val toStoreReg = newUReg() and compareReg = newUReg() and resultReg = newUReg() and ccRef = newCCRef() val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) val target = asTarget destination (* If the current value is zero (unlocked) set it to one (locked) otherwise do nothing. *) val code = BlockSimple(AtomicCompareAndExchange{base=addrReg, compare=compareReg, toStore=toStoreReg, resultReg=resultReg, ccRef=ccRef}) :: BlockSimple(LoadArgument{source=IntegerConstant 0, dest=compareReg, kind=moveNativeWord}) :: BlockSimple(LoadArgument{source=IntegerConstant 1, dest=toStoreReg, kind=moveNativeWord}) :: argCode in (makeBoolResultRev(JE, ccRef, target, code), RegisterArgument target, false) end | codeToICodeUnaryRev({oper=BuiltIns.UnlockMutex, arg1}, context, _, destination, tailCode) = let (* Unlock a mutex - (atomically) exchange with 0 (unlock) and return true if the previous value was 1 i.e. only this thread has locked it. *) val newValReg = newUReg() and resultReg = newUReg() and ccRef = newCCRef() val (argCode, addrReg) = codeToPRegRev(arg1, context, tailCode) val target = asTarget destination val code = BlockSimple(CompareLiteral{arg1=RegisterArgument resultReg, arg2=1, opSize=nativeWordOpSize, ccRef=ccRef }) :: BlockSimple(AtomicExchange{base=addrReg, source=newValReg, resultReg=resultReg}) :: BlockSimple(LoadArgument{source=IntegerConstant 0, dest=newValReg, kind=moveNativeWord}) :: argCode in (makeBoolResultRev(JE, ccRef, target, code), RegisterArgument target, false) end and codeToICodeBinaryRev({oper=BuiltIns.WordComparison{test, isSigned}, arg1, arg2=BICConstnt(arg2Value, _)}, context, _, destination, tailCode) = let (* Comparisons. Because this is also used for pointer equality and even for exception matching it is perfectly possible that the argument could be an address. The higher levels used to generate this for pointer equality. *) val ccRef = newCCRef() val comparison = (* If the argument is a tagged value that will fit in 32-bits we can use the literal version. Use toLargeIntX here because the value will be sign-extended even if we're actually doing an unsigned comparison. *) if isShort arg2Value andalso is32bit(tag(Word.toLargeIntX(toShort arg2Value))) then let val allow = Allowed {anyConstant=false, const32s=false, memAddr=true, existingPreg=true} in (* We're often comparing with a character or a string length field that has to be untagged. In that case we can avoid loading it into a register and untagging it by doing the comparison directly. *) case arg1 of BICLoadOperation{kind=LoadStoreUntaggedUnsigned, address} => let val (codeBaseIndex, codeUntag, memLoc) = codeAddressRev(address, false, context, tailCode) val literal = Word.toLargeIntX(toShort arg2Value) in BlockSimple(CompareLiteral{arg1=MemoryLocation memLoc, arg2=literal, opSize=polyWordOpSize, ccRef=ccRef}) :: codeUntag @ codeBaseIndex end | BICLoadOperation{kind=LoadStoreMLByte _, address} => let val (codeBaseIndex, codeUntag, {base, index, offset, ...}) = codeAddressRev(address, true, context, tailCode) val _ = toShort arg2Value >= 0w0 andalso toShort arg2Value < 0w256 orelse raise InternalError "Compare byte not a byte" val literal = Word8.fromLargeWord(Word.toLargeWord(toShort arg2Value)) in BlockSimple(CompareByteMem{arg1={base=base, index=index, offset=offset}, arg2=literal, ccRef=ccRef}) :: codeUntag @ codeBaseIndex end | BICUnary({oper=BuiltIns.MemoryCellFlags, arg1}) => (* This occurs particularly in arbitrary precision comparisons. *) let val (baseCode, baseReg) = codeToPRegRev(arg1, context, tailCode) val _ = toShort arg2Value >= 0w0 andalso toShort arg2Value < 0w256 orelse raise InternalError "Compare memory cell not a byte" val literal = Word8.fromLargeWord(Word.toLargeWord(toShort arg2Value)) in BlockSimple(CompareByteMem{arg1={base=baseReg, index=memIndexOrObject, offset= ~1}, arg2=literal, ccRef=ccRef}) :: baseCode end | _ => let (* TODO: We could include rarer cases of tagging by looking at the code and seeing if it's a TagValue. *) val (testCode, testDest, _) = codeToICodeRev(arg1, context, false, allow, tailCode) val literal = tag(Word.toLargeIntX(toShort arg2Value)) in BlockSimple(CompareLiteral{arg1=testDest, arg2=literal, opSize=polyWordOpSize, ccRef=ccRef}) :: testCode end end else (* Addresses or larger values. We need to use a register comparison. *) let val (testCode, testReg) = codeToPRegRev(arg1, context, tailCode) val arg2Arg = constantAsArgument arg2Value in BlockSimple(WordComparison{arg1=testReg, arg2=arg2Arg, ccRef=ccRef, opSize=polyWordOpSize}) :: testCode end val target = asTarget destination in (makeBoolResultRev(testAsBranch(test, isSigned, true), ccRef, target, comparison), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordComparison{test, isSigned}, arg1=BICConstnt(arg1Value, _), arg2}, context, _, destination, tailCode) = let (* If we have the constant first we need to reverse the test so the first argument is a register. *) val ccRef = newCCRef() val comparison = if isShort arg1Value andalso is32bit(tag(Word.toLargeIntX(toShort arg1Value))) then let val allow = Allowed {anyConstant=false, const32s=false, memAddr=true, existingPreg=true} val (testCode, testDest, _) = codeToICodeRev(arg2, context, false, allow, tailCode) val literal = tag(Word.toLargeIntX(toShort arg1Value)) in BlockSimple(CompareLiteral{arg1=testDest, arg2=literal, opSize=polyWordOpSize, ccRef=ccRef}) :: testCode end else (* Addresses or larger values. We need to use a register comparison. *) let val (testCode, testReg) = codeToPRegRev(arg2, context, tailCode) val arg1Arg = constantAsArgument arg1Value in BlockSimple(WordComparison{arg1=testReg, arg2=arg1Arg, ccRef=ccRef, opSize=polyWordOpSize}) :: testCode end val target = asTarget destination in (makeBoolResultRev(testAsBranch(leftRightTest test, isSigned, true), ccRef, target, comparison), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordComparison {test, isSigned}, arg1, arg2}, context, _, destination, tailCode) = let val ccRef = newCCRef() val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true } val (arg1Code, arg1Result, _) = codeToICodeRev(arg1, context, false, Allowed memOrReg, tailCode) val (arg2Code, arg2Result, _) = codeToICodeRev(arg2, context, false, Allowed memOrReg, arg1Code) val target = asTarget destination val code = case (arg1Result, arg2Result) of (RegisterArgument arg1Reg, arg2Result) => makeBoolResultRev(testAsBranch(test, isSigned, true), ccRef, target, BlockSimple(WordComparison{arg1=arg1Reg, arg2=arg2Result, ccRef=ccRef, opSize=polyWordOpSize}) :: arg2Code) | (arg1Result, RegisterArgument arg2Reg) => (* The second argument is in a register - switch the sense of the test. *) makeBoolResultRev(testAsBranch(leftRightTest test, isSigned, true), ccRef, target, BlockSimple(WordComparison{arg1=arg2Reg, arg2=arg1Result, ccRef=ccRef, opSize=polyWordOpSize}) :: arg2Code) | (arg1Result, arg2Result) => let (* Have to load an argument - pick the first. *) val arg1Reg = newPReg() in makeBoolResultRev(testAsBranch(test, isSigned, true), ccRef, target, BlockSimple(WordComparison{arg1=arg1Reg, arg2=arg2Result, ccRef=ccRef, opSize=polyWordOpSize}) :: BlockSimple(LoadArgument{source=arg1Result, dest=arg1Reg, kind=movePolyWord}) :: arg2Code) end in (code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.PointerEq, arg1, arg2}, context, isTail, destination, tailCode) = (* Equality of general values which can include pointers. This can be treated exactly as a word equality. It has to be analysed differently for indexed cases. *) codeToICodeBinaryRev({oper=BuiltIns.WordComparison{test=BuiltIns.TestEqual, isSigned=false}, arg1=arg1, arg2=arg2}, context, isTail, destination, tailCode) | codeToICodeBinaryRev({oper=BuiltIns.FixedPrecisionArith oper, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val code = codeFixedPrecisionArith(oper, arg1, arg2, context, target, checkOverflow context) in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithAdd, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = let val target = asTarget destination (* If the argument is a constant we can subtract the tag beforehand. N.B. it is possible to have type-incorrect values in dead code. i.e. code that will never be executed because of a run-time check. *) val constVal = if isShort value then semitag(Word.toLargeIntX(toShort value)) else 0 val (arg1Code, aReg1) = codeToPRegRev(arg1, context, tailCode) in (BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef = newCCRef(), opSize=polyWordOpSize}) :: arg1Code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithAdd, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) = let val target = asTarget destination (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *) val constVal = if isShort value then semitag(Word.toLargeIntX(toShort value)) else 0 val (arg2Code, aReg2) = codeToPRegRev(arg2, context, tailCode) in (BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg2, operand2=IntegerConstant constVal, ccRef = newCCRef(), opSize=polyWordOpSize}) :: arg2Code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithAdd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) (* Use LEA to do the addition since we're not concerned with overflow. This is shorter than subtracting the tag and adding the values and also moves the result into the appropriate register. *) val code = arg1Code @ arg2Code @ [BlockSimple(LoadEffectiveAddress{base=SOME aReg1, offset= ~1, index=MemIndex1 aReg2, dest=target, opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithSub, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = let val target = asTarget destination (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *) val constVal = if isShort value then semitag(Word.toLargeIntX(toShort value)) else 0 val (arg1Code, aReg1) = codeToPRegRev(arg1, context, tailCode) in (BlockSimple(ArithmeticFunction{oper=SUB, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef=newCCRef(), opSize=polyWordOpSize}) :: arg1Code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithSub, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val aReg3 = newPReg() val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val code = arg1Code @ arg2Code @ (* Do the subtraction and add in the tag bit. This could be reordered if we have cascaded operations since we don't need to check for overflow. *) [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=aReg1, operand2=RegisterArgument aReg2, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg3, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMult, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = codeMultiplyConstantWordRev(arg1, context, destination, if isShort value then toShort value else 0w0, tailCode) | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMult, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) = codeMultiplyConstantWordRev(arg2, context, destination, if isShort value then toShort value else 0w0, tailCode) | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMult, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val arg1Untagged = newUReg() and arg2Untagged = newUReg() and resUntagged = newUReg() val code = arg1Code @ arg2Code @ (* Shift one argument and subtract the tag from the other. It's possible this could be reordered if we have a value that is already untagged. *) [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}), BlockSimple(ArithmeticFunction{oper=SUB, resultReg=arg2Untagged, operand1=aReg2, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(Multiplication{resultReg=resUntagged, operand1=arg1Untagged, operand2=RegisterArgument arg2Untagged, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithDiv, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val arg1Untagged = newUReg() and arg2Untagged = newUReg() val quotient = newUReg() and remainder = newUReg() val code = arg1Code @ arg2Code @ (* Shift both of the arguments to remove the tags. We don't test for zero here - that's done explicitly. *) [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}), BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}), BlockSimple(Division { isSigned = false, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged, quotient=quotient, remainder=remainder, opSize=polyWordOpSize }), BlockSimple(TagValue { source=quotient, dest=target, isSigned=false, opSize=polyWordOpSize })] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith BuiltIns.ArithMod, arg1, arg2}, context, _, destination, tailCode) = let (* Identical to Quot except that the result is the remainder. *) val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val arg1Untagged = newUReg() and arg2Untagged = newUReg() val quotient = newUReg() and remainder = newUReg() val code = arg1Code @ arg2Code @ (* Shift both of the arguments to remove the tags. *) [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}), BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}), BlockSimple(Division { isSigned = false, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged, quotient=quotient, remainder=remainder, opSize=polyWordOpSize }), BlockSimple(TagValue { source=remainder, dest=target, isSigned=false, opSize=polyWordOpSize })] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordArith _, ...}, _, _, _, _) = raise InternalError "codeToICodeNonRev: WordArith - unimplemented operation" | codeToICodeBinaryRev({oper=BuiltIns.WordLogical logOp, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, arg1Reg) = codeToPReg(arg1, context) (* Use a semitagged value for XOR. This preserves the tag bit. Use toLargeIntX here because the operations will sign-extend 32-bit values. *) val constVal = if isShort value then (case logOp of BuiltIns.LogicalXor => semitag | _ => tag) (Word.toLargeIntX(toShort value)) else 0 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *) val opSize = if logOp = BuiltIns.LogicalAnd andalso constVal <= 0xffffffff andalso constVal >= 0 then OpSize32 else polyWordOpSize val code = arg1Code @ [BlockSimple(ArithmeticFunction{oper=oper, resultReg=target, operand1=arg1Reg, operand2=IntegerConstant constVal, ccRef=newCCRef(), opSize=opSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordLogical logOp, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg2Code, arg2Reg) = codeToPReg(arg2, context) (* Use a semitagged value for XOR. This preserves the tag bit. *) val constVal = if isShort value then (case logOp of BuiltIns.LogicalXor => semitag | _ => tag) (Word.toLargeIntX(toShort value)) else 0 val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *) val opSize = if logOp = BuiltIns.LogicalAnd andalso constVal <= 0xffffffff andalso constVal >= 0 then OpSize32 else polyWordOpSize val code = arg2Code @ [BlockSimple(ArithmeticFunction{oper=oper, resultReg=target, operand1=arg2Reg, operand2=IntegerConstant constVal, ccRef=newCCRef(), opSize=opSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordLogical BuiltIns.LogicalOr, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, arg1Reg) = codeToPReg(arg1, context) val (arg2Code, arg2Reg) = codeToPReg(arg2, context) val code = arg1Code @ arg2Code @ (* Or-ing preserves the tag bit. *) [BlockSimple(ArithmeticFunction{oper=OR, resultReg=target, operand1=arg1Reg, operand2=RegisterArgument arg2Reg, ccRef=newCCRef(), opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordLogical BuiltIns.LogicalAnd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, arg1Reg) = codeToPReg(arg1, context) val (arg2Code, arg2Reg) = codeToPReg(arg2, context) val code = arg1Code @ arg2Code @ (* Since they're both tagged the result will be tagged. *) [BlockSimple(ArithmeticFunction{oper=AND, resultReg=target, operand1=arg1Reg, operand2=RegisterArgument arg2Reg, ccRef=newCCRef(), opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordLogical BuiltIns.LogicalXor, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, arg1Reg) = codeToPReg(arg1, context) val (arg2Code, arg2Reg) = codeToPReg(arg2, context) val aReg3 = newPReg() val code = arg1Code @ arg2Code @ (* We need to restore the tag bit after the operation. *) [BlockSimple(ArithmeticFunction{oper=XOR, resultReg=aReg3, operand1=arg1Reg, operand2=RegisterArgument arg2Reg, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(ArithmeticFunction{oper=OR, resultReg=target, operand1=aReg3, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.WordShift BuiltIns.ShiftLeft, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = (* Use the general case multiplication code. This will use a shift except for small values. It does detect special cases such as multiplication by 4 and 8 which can be implemented with LEA. *) codeMultiplyConstantWordRev(arg1, context, destination, if isShort value then Word.<<(0w1, toShort value) else 0w1, tailCode) | codeToICodeBinaryRev({oper=BuiltIns.WordShift shift, arg1, arg2}, context, _, destination, tailCode) = (* N.B. X86 shifts of greater than the word length mask the higher bits. That isn't what ML wants but that is dealt with at a higher level *) let open BuiltIns val target = asTarget destination (* Load the value into an untagged register. If this is a left shift we need to clear the tag bit. We don't need to do that for right shifts. *) val argRegUntagged = newUReg() val arg1Code = case arg1 of BICConstnt(value, _) => let (* Remove the tag bit. This isn't required for right shifts. *) val cnstntVal = if isShort value then semitag(Word.toLargeInt(toShort value)) else 1 in [BlockSimple(LoadArgument{source=IntegerConstant cnstntVal, dest=argRegUntagged, kind=movePolyWord})] end | _ => let val (arg1Code, arg1Reg) = codeToPReg(arg1, context) val removeTag = case shift of ShiftLeft => ArithmeticFunction{oper=SUB, resultReg=argRegUntagged, operand1=arg1Reg, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize} | _ => LoadArgument{source=RegisterArgument arg1Reg, dest=argRegUntagged, kind=movePolyWord} in arg1Code @ [BlockSimple removeTag] end (* The shift amount can usefully be a constant. *) val (arg2Code, untag2Code, arg2Arg) = codeAsUntaggedByte(arg2, false, context) val resRegUntagged = newUReg() val shiftOp = case shift of ShiftLeft => SHL | ShiftRightLogical => SHR | ShiftRightArithmetic => SAR val code = arg1Code @ arg2Code @ untag2Code @ [BlockSimple(ShiftOperation{ shift=shiftOp, resultReg=resRegUntagged, operand=argRegUntagged, shiftAmount=arg2Arg, ccRef=newCCRef(), opSize=polyWordOpSize }), (* Set the tag by ORing it in. This will work whether or not a right shift has shifted a 1 into this position. *) BlockSimple( ArithmeticFunction{oper=OR, resultReg=target, operand1=resRegUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.AllocateByteMemory, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val sizeReg = newPReg() and baseReg = newPReg() val sizeCode = codeToICodeTarget(arg1, context, false, sizeReg) val (flagsCode, flagUntag, flagArg) = codeAsUntaggedByte(arg2, false, context) val code =sizeCode @ flagsCode @ [BlockSimple(AllocateMemoryVariable{size=sizeReg, dest=baseReg, saveRegs=[]})] @ flagUntag @ [BlockSimple(StoreArgument{ source=flagArg, base=baseReg, offset= ~1, index=memIndexOrObject, kind=MoveByte, isMutable=false}), BlockSimple InitialisationComplete, BlockSimple(LoadArgument{ source=RegisterArgument baseReg, dest=target, kind=movePolyWord})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordComparison test, arg1, arg2}, context, _, destination, tailCode) = let val ccRef = newCCRef() val (arg1Code, arg1Reg) = codeToPRegRev(arg1, context, tailCode) (* In X64 we can extract the word from a constant and do the comparison directly. That can't be done in X86/32 because the value isn't tagged and might look like an address. The RTS scans for comparisons with inline constant addresses. *) val (arg2Code, arg2Operand) = if targetArch <> Native32Bit then (* Native 64-bit or 32-in-64. *) ( case arg2 of BICConstnt(value, _) => (arg1Code, IntegerConstant(largeWordConstant value)) | _ => let val (code, reg) = codeToPRegRev(arg2, context, arg1Code) in (code, wordAt reg) end ) else let val (code, reg) = codeToPRegRev(arg2, context, arg1Code) in (code, wordAt reg) end val argReg = newUReg() val target = asTarget destination val code = makeBoolResultRev(testAsBranch(test, false, true), ccRef, target, BlockSimple(WordComparison{arg1=argReg, arg2=arg2Operand, ccRef=ccRef, opSize=nativeWordOpSize}) :: BlockSimple(LoadArgument{source=wordAt arg1Reg, dest=argReg, kind=moveNativeWord}) :: arg2Code) in (code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithAdd, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val aReg3 = newUReg() val argReg = newUReg() val constantValue = largeWordConstant value val code =arg1Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=ADD, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithAdd, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg2Code, aReg2) = codeToPReg(arg2, context) val aReg3 = newUReg() val argReg = newUReg() val constantValue = largeWordConstant value val code = arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg2, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=ADD, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithAdd, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val aReg3 = newUReg() val argReg = newUReg() val code = arg1Code @ arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=ADD, resultReg=aReg3, operand1=argReg, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithSub, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val aReg3 = newUReg() val argReg = newUReg() val constantValue = largeWordConstant value val code = arg1Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithSub, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val aReg3 = newUReg() val argReg = newUReg() val code = arg1Code @ arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=argReg, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithMult, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val resValue = newUReg() val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val argReg1 = newUReg() val code = arg1Code @ arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg1, kind=moveNativeWord}), BlockSimple(Multiplication{resultReg=resValue, operand1=argReg1, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=resValue, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithDiv, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val quotient = newUReg() and remainder = newUReg() val dividendReg = newUReg() and divisorReg = newUReg() val code = arg1Code @ arg2Code @ (* We don't test for zero here - that's done explicitly. *) [BlockSimple(LoadArgument{source=wordAt aReg1, dest=dividendReg, kind=moveNativeWord}), BlockSimple(LoadArgument{source=wordAt aReg2, dest=divisorReg, kind=moveNativeWord}), BlockSimple(Division { isSigned = false, dividend=dividendReg, divisor=RegisterArgument divisorReg, quotient=quotient, remainder=remainder, opSize=nativeWordOpSize }), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=quotient, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith BuiltIns.ArithMod, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val quotient = newUReg() and remainder = newUReg() val dividendReg = newUReg() and divisorReg = newUReg() val code = arg1Code @ arg2Code @ (* We don't test for zero here - that's done explicitly. *) [BlockSimple(LoadArgument{source=wordAt aReg1, dest=dividendReg, kind=moveNativeWord}), BlockSimple(LoadArgument{source=wordAt aReg2, dest=divisorReg, kind=moveNativeWord}), BlockSimple(Division { isSigned = false, dividend=dividendReg, divisor=RegisterArgument divisorReg, quotient=quotient, remainder=remainder, opSize=nativeWordOpSize }), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=remainder, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordArith _, ...}, _, _, _, _) = raise InternalError "codeToICodeNonRev: LargeWordArith - unimplemented operation" | codeToICodeBinaryRev({oper=BuiltIns.LargeWordLogical logOp, arg1, arg2=BICConstnt(value, _)}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val aReg3 = newUReg() val argReg = newUReg() val constantValue = largeWordConstant value val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *) val opSize = if logOp = BuiltIns.LogicalAnd andalso constantValue <= 0xffffffff andalso constantValue >= 0 then OpSize32 else nativeWordOpSize val code = arg1Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=oper, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=opSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordLogical logOp, arg1=BICConstnt(value, _), arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg2Code, aReg2) = codeToPReg(arg2, context) val aReg3 = newUReg() val argReg = newUReg() val constantValue = largeWordConstant value val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR (* If we AND with a value that fits in 32-bits we can use a 32-bit operation. *) val opSize = if logOp = BuiltIns.LogicalAnd andalso constantValue <= 0xffffffff andalso constantValue >= 0 then OpSize32 else nativeWordOpSize val code = arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg2, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=oper, resultReg=aReg3, operand1=argReg, operand2=IntegerConstant constantValue, ccRef=newCCRef(), opSize=opSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordLogical logOp, arg1, arg2}, context, _, destination, tailCode) = let val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) val aReg3 = newUReg() val argReg = newUReg() val oper = case logOp of BuiltIns.LogicalOr => OR | BuiltIns.LogicalAnd => AND | BuiltIns.LogicalXor => XOR val code = arg1Code @ arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord}), BlockSimple(ArithmeticFunction{oper=oper, resultReg=aReg3, operand1=argReg, operand2=wordAt aReg2, ccRef=newCCRef(), opSize=nativeWordOpSize}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.LargeWordShift shift, arg1, arg2}, context, _, destination, tailCode) = (* The shift is always a Word.word value i.e. tagged. There is a check at the higher level that the shift does not exceed 32/64 bits. *) let open BuiltIns val target = asTarget destination val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, untag2Code, arg2Arg) = codeAsUntaggedByte(arg2, false, context) val aReg3 = newUReg() val shiftOp = case shift of ShiftLeft => SHL | ShiftRightLogical => SHR | ShiftRightArithmetic => SAR val argReg = newUReg() val code = arg1Code @ arg2Code @ [BlockSimple(LoadArgument{source=wordAt aReg1, dest=argReg, kind=moveNativeWord})] @ untag2Code @ [BlockSimple(ShiftOperation{ shift=shiftOp, resultReg=aReg3, operand=argReg, shiftAmount=arg2Arg, ccRef=newCCRef(), opSize=nativeWordOpSize }), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=aReg3, dest=target, saveRegs=[]})] in (revApp(code, tailCode), RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.RealArith(fpOpPrec as (fpOp, fpPrec)), arg1, arg2}, context, _, destination, tailCode) = let open BuiltIns val commutative = case fpOp of ArithSub => NonCommutative | ArithDiv => NonCommutative | ArithAdd => Commutative | ArithMult => Commutative | _ => raise InternalError "codeToICodeNonRev: RealArith - unimplemented operation" val (argCodeRev, fpRegSrc, arg2Value) = codeFPBinaryArgsRev(arg1, arg2, fpPrec, commutative, context, []) val argCode = List.rev argCodeRev val target = asTarget destination val fpRegDest = newUReg() val arith = case fpMode of FPModeX87 => let val fpOp = case fpOp of ArithAdd => FADD | ArithSub => FSUB | ArithMult => FMUL | ArithDiv => FDIV | _ => raise InternalError "codeToICodeNonRev: RealArith - unimplemented operation" val isDouble = case fpPrec of PrecSingle => false | PrecDouble => true in [BlockSimple(X87FPArith{ opc=fpOp, resultReg=fpRegDest, arg1=fpRegSrc, arg2=arg2Value, isDouble=isDouble})] end | FPModeSSE2 => let val fpOp = case fpOpPrec of (ArithAdd, PrecSingle) => SSE2BAddSingle | (ArithSub, PrecSingle) => SSE2BSubSingle | (ArithMult, PrecSingle) => SSE2BMulSingle | (ArithDiv, PrecSingle) => SSE2BDivSingle | (ArithAdd, PrecDouble) => SSE2BAddDouble | (ArithSub, PrecDouble) => SSE2BSubDouble | (ArithMult, PrecDouble) => SSE2BMulDouble | (ArithDiv, PrecDouble) => SSE2BDivDouble | _ => raise InternalError "codeToICodeNonRev: RealArith - unimplemented operation" in [BlockSimple(SSE2FPBinary{ opc=fpOp, resultReg=fpRegDest, arg1=fpRegSrc, arg2=arg2Value})] end (* Box or tag the result. *) val result = boxOrTagReal(fpRegDest, target, fpPrec) in (revApp(argCode @ arith @ result, tailCode), RegisterArgument target, false) end (* Floating point comparison. This is complicated because we have different instruction sequences for SSE2 and X87. We also have to get the handling of unordered (NaN) values right. All the tests are treated as false if either argument is a NaN. To combine that test with the other tests we sometimes have to reverse the comparison. *) | codeToICodeBinaryRev({oper=BuiltIns.RealComparison(BuiltIns.TestEqual, precision), arg1, arg2}, context, _, destination, tailCode) = let (* Get the arguments. It's commutative. *) val (arg2Code, fpReg, arg2Val) = codeFPBinaryArgsRev(arg1, arg2, precision, Commutative, context, tailCode) val ccRef1 = newCCRef() and ccRef2 = newCCRef() val testReg1 = newUReg() and testReg2 = newUReg() and testReg3 = newUReg() (* If this is X87 we get the condition into RAX and test it there. If it is SSE2 we have to treat the unordered result (parity set) specially. *) val isDouble = precision = BuiltIns.PrecDouble val target = asTarget destination val code = case fpMode of FPModeX87 => makeBoolResultRev(JE, ccRef2, target, BlockSimple(ArithmeticFunction{ oper=XOR, resultReg=testReg3, operand1=testReg2, operand2=IntegerConstant 0x4000, ccRef=ccRef2, opSize=OpSize32 }) :: BlockSimple(ArithmeticFunction{ oper=AND, resultReg=testReg2, operand1=testReg1, operand2=IntegerConstant 0x4400, ccRef=newCCRef(), opSize=OpSize32 }) :: BlockSimple(X87FPGetCondition { ccRef=ccRef1, dest=testReg1 }) :: BlockSimple(X87Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) :: arg2Code) | FPModeSSE2 => let val noParityLabel = newLabel() val resultLabel = newLabel() val falseLabel = newLabel() val trueLabel = newLabel() val mergeReg = newMergeReg() in BlockSimple(LoadArgument{ source=RegisterArgument mergeReg, dest=target, kind=Move32Bit }) :: BlockLabel resultLabel :: BlockFlow(Unconditional resultLabel) :: (* Result is false if parity is set i.e. unordered or if unequal. *) BlockSimple(LoadArgument{ source=IntegerConstant(tag 0), dest=mergeReg, kind=Move32Bit }) :: BlockLabel falseLabel :: BlockFlow(Unconditional resultLabel) :: (* Result is true if it's ordered and equal. *) BlockSimple(LoadArgument{ source=IntegerConstant(tag 1), dest=mergeReg, kind=Move32Bit }) :: BlockLabel trueLabel :: (* Not unordered - test the equality *) BlockFlow(Conditional{ccRef=ccRef1, condition=JE, trueJump=trueLabel, falseJump=falseLabel}) :: BlockLabel noParityLabel :: (* Go to falseLabel if unordered and therefore not equal. *) BlockFlow(Conditional{ccRef=ccRef1, condition=JP, trueJump=falseLabel, falseJump=noParityLabel}) :: BlockSimple(SSE2Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) :: arg2Code end in (code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.RealComparison(BuiltIns.TestUnordered, precision), arg1, arg2}, context, _, destination, tailCode) = let (* The unordered test is really included because it is easy to implement and is the simplest way of implementing isNan. *) (* Get the arguments. It's commutative. *) val (arg2Code, fpReg, arg2Val) = codeFPBinaryArgsRev(arg1, arg2, precision, Commutative, context, tailCode) val ccRef1 = newCCRef() and ccRef2 = newCCRef() val testReg1 = newUReg() and testReg2 = newUReg() and testReg3 = newUReg() (* If this is X87 we get the condition into RAX and test it there. If it is SSE2 we have to treat the unordered result (parity set) specially. *) val isDouble = precision = BuiltIns.PrecDouble val target = asTarget destination val code = case fpMode of FPModeX87 => (* And with 0x4500. We have to use XOR rather than CMP to avoid having an untagged constant comparison. *) makeBoolResultRev(JE, ccRef2, target, BlockSimple(ArithmeticFunction{ oper=XOR, resultReg=testReg3, operand1=testReg2, operand2=IntegerConstant 0x4500, ccRef=ccRef2, opSize=OpSize32 }) :: BlockSimple(ArithmeticFunction{ oper=AND, resultReg=testReg2, operand1=testReg1, operand2=IntegerConstant 0x4500, ccRef=newCCRef(), opSize=OpSize32 }) :: BlockSimple(X87FPGetCondition { ccRef=ccRef1, dest=testReg1 }) :: BlockSimple(X87Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) :: arg2Code) | FPModeSSE2 => makeBoolResultRev(JP, ccRef1, target, BlockSimple(SSE2Compare{arg1=fpReg, arg2=arg2Val, ccRef=ccRef1, isDouble = isDouble}) :: arg2Code) in (code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.RealComparison(comparison, precision), arg1, arg2}, context, _, destination, tailCode) = let (* Ordered comparisons are complicated because they are all defined to be false if either argument is a NaN. We have two different tests for a > b and a >= b and implement a < b and a <= b by changing the order of the arguments. *) val (arg1Code, arg1Value) = codeFPArgument(arg1, precision, context, tailCode) val (arg2Code, arg2Value) = codeFPArgument(arg2, precision, context, arg1Code) val (regArg, opArg, isGeq) = case comparison of BuiltIns.TestGreater => (arg1Value, arg2Value, false) | BuiltIns.TestLess => (arg2Value, arg1Value, false) (* Reversed: aa. *) | BuiltIns.TestGreaterEqual => (arg1Value, arg2Value, true) | BuiltIns.TestLessEqual => (arg2Value, arg1Value, true) (* Reversed: a<=b is b>=a. *) | _ => raise InternalError "RealComparison: unimplemented operation" (* Load the first operand into a register. *) val (fpReg, loadCode) = case regArg of RegisterArgument fpReg => (fpReg, arg2Code) | regArg => let val fpReg = newUReg() val moveOp = case precision of BuiltIns.PrecDouble => MoveDouble | BuiltIns.PrecSingle => MoveFloat in (fpReg, BlockSimple(LoadArgument{source=regArg, dest=fpReg, kind=moveOp}) :: arg2Code) end val isDouble = precision = BuiltIns.PrecDouble val target = asTarget destination val code = case fpMode of FPModeX87 => let val testReg1 = newUReg() and testReg2 = newUReg() val ccRef1 = newCCRef() and ccRef2 = newCCRef() val testBits = if isGeq then 0x500 else 0x4500 in makeBoolResultRev(JE, ccRef2, target, BlockSimple(ArithmeticFunction{ oper=AND, resultReg=testReg2, operand1=testReg1, operand2=IntegerConstant testBits, ccRef=ccRef2, opSize=OpSize32 }) :: BlockSimple(X87FPGetCondition { ccRef=ccRef1, dest=testReg1 }) :: BlockSimple(X87Compare{arg1=fpReg, arg2=opArg, ccRef=ccRef1, isDouble = isDouble}) :: loadCode) end | FPModeSSE2 => let val ccRef1 = newCCRef() val condition = if isGeq then JNB (* >=, <= *) else JA (* >, < *) in makeBoolResultRev(condition, ccRef1, target, BlockSimple(SSE2Compare{arg1=fpReg, arg2=opArg, ccRef=ccRef1, isDouble = isDouble}) :: loadCode) end in (code, RegisterArgument target, false) end | codeToICodeBinaryRev({oper=BuiltIns.FreeCStack, arg1, arg2}, context, _, destination, tailCode) = (* Free space on the C stack by storing the address in the argument into the "memory register". This is a binary operation that takes the base address and the size. The base address isn't used in this version. *) let val (arg2Code, untaggedLength) = case arg2 of BICConstnt(value, _) => (tailCode, IntegerConstant(Word.toLargeInt(toShort value)) (* Leave untagged *)) | _ => let val (arg2Code, lengthReg) = codeToPRegRev(arg2, context, tailCode) val lengthUntagged = newUReg() in ( BlockSimple(UntagValue{source=lengthReg, dest=lengthUntagged, isSigned=false, cache=NONE, opSize=polyWordOpSize}) :: arg2Code, RegisterArgument lengthUntagged ) end (* Evaluate the first argument for side-effects but discard it. *) val (arg1Code, _, _) = codeToICodeRev(arg1, context, false, Allowed allowDefer, arg2Code) val addrReg = newUReg() and resAddrReg = newUReg() val code = BlockSimple(StoreMemReg{offset=memRegCStackPtr, source=resAddrReg, kind=moveNativeWord}) :: BlockSimple(ArithmeticFunction{oper=ADD, resultReg=resAddrReg, operand1=addrReg, operand2=untaggedLength, ccRef=newCCRef(), opSize=nativeWordOpSize}) :: BlockSimple(LoadMemReg{offset=memRegCStackPtr, dest=addrReg, kind=moveNativeWord}) :: arg1Code in moveIfNotAllowedRev(destination, code, (* Unit result *) IntegerConstant(tag 0)) end (* Multiply tagged word by a constant. We're not concerned with overflow so it's possible to use various short cuts. *) and codeMultiplyConstantWordRev(arg, context, destination, multiplier, tailCode) = let val target = asTarget destination val (argCode, aReg) = codeToPReg(arg, context) val doMultiply = case multiplier of 0w0 => [BlockSimple(LoadArgument{source=IntegerConstant 1, dest=target, kind=movePolyWord})] | 0w1 => [BlockSimple(LoadArgument{source=RegisterArgument aReg, dest=target, kind=movePolyWord})] | 0w2 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~1, index=MemIndex1 aReg, dest=target, opSize=polyWordOpSize})] | 0w3 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~2, index=MemIndex2 aReg, dest=target, opSize=polyWordOpSize})] | 0w4 => [BlockSimple(LoadEffectiveAddress{base=NONE, offset= ~3, index=MemIndex4 aReg, dest=target, opSize=polyWordOpSize})] | 0w5 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~4, index=MemIndex4 aReg, dest=target, opSize=polyWordOpSize})] | 0w8 => [BlockSimple(LoadEffectiveAddress{base=NONE, offset= ~7, index=MemIndex8 aReg, dest=target, opSize=polyWordOpSize})] | 0w9 => [BlockSimple(LoadEffectiveAddress{base=SOME aReg, offset= ~8, index=MemIndex8 aReg, dest=target, opSize=polyWordOpSize})] | _ => let val tReg = newUReg() val tagCorrection = Word.toLargeInt multiplier - 1 fun getPower2 n = let fun p2 (n, l) = if n = 0w1 then SOME l else if Word.andb(n, 0w1) = 0w1 then NONE else p2(Word.>>(n, 0w1), l+0w1) in if n = 0w0 then NONE else p2(n,0w0) end val multiply = case getPower2 multiplier of SOME power => (* Shift it including the tag. *) BlockSimple(ShiftOperation{ shift=SHL, resultReg=tReg, operand=aReg, shiftAmount=IntegerConstant(Word.toLargeInt power), ccRef=newCCRef(), opSize=polyWordOpSize }) | NONE => (* Multiply including the tag. *) BlockSimple(Multiplication{resultReg=tReg, operand1=aReg, operand2=IntegerConstant(Word.toLargeInt multiplier), ccRef=newCCRef(), opSize=polyWordOpSize}) in [multiply, BlockSimple(ArithmeticFunction{oper=SUB, resultReg=target, operand1=tReg, operand2=IntegerConstant tagCorrection, ccRef=newCCRef(), opSize=polyWordOpSize})] end in (revApp(argCode @ doMultiply, tailCode), RegisterArgument target, false) end and codeToICodeAllocate({numWords as BICConstnt(length, _), flags as BICConstnt(flagValue, _), initial}, context, _, destination) = (* Constant length and flags is used for ref. We could handle other cases. *) if isShort length andalso isShort flagValue andalso toShort length = 0w1 then let val target = asTarget destination (* Force a different register. *) val vecLength = Word.toInt(toShort length) val flagByte = Word8.fromLargeWord(Word.toLargeWord(toShort flagValue)) val memAddr = newPReg() and valueReg = newPReg() fun initialise n = BlockSimple(StoreArgument{ source=RegisterArgument valueReg, offset=n*Word.toInt wordSize, base=memAddr, index=memIndexOrObject, kind=movePolyWord, isMutable=false}) val code = codeToICodeTarget(initial, context, false, valueReg) @ [BlockSimple(AllocateMemoryOperation{size=vecLength, flags=flagByte, dest=memAddr, saveRegs=[]})] @ List.tabulate(vecLength, initialise) @ [BlockSimple InitialisationComplete, BlockSimple(LoadArgument{source=RegisterArgument memAddr, dest=target, kind=movePolyWord})] in (code, RegisterArgument target, false) end else (* If it's longer use the full run-time form. *) allocateMemoryVariable(numWords, flags, initial, context, destination) | codeToICodeAllocate({numWords, flags, initial}, context, _, destination) = allocateMemoryVariable(numWords, flags, initial, context, destination) and codeToICodeLoad({kind=LoadStoreMLWord _, address}, context, _, destination) = let val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeAddress(address, false, context) in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument {source=MemoryLocation memLoc, dest=target, kind=movePolyWord})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreMLByte _, address}, context, _, destination) = let val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeAddress(address, true, context) val untaggedResReg = newUReg() in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveByte}), BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize32})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreC8, address}, context, _, destination) = let (* Load a byte from C memory. This is almost exactly the same as LoadStoreMLByte except that the base address is a LargeWord.word value. *) val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w1, context) val untaggedResReg = newUReg() in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveByte}), BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize32})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreC16, address}, context, _, destination) = let (* Load a 16-bit value from C memory. *) val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w2, context) val untaggedResReg = newUReg() in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=Move16Bit}), BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize32})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreC32, address}, context, _, destination) = let (* Load a 32-bit value from C memory. If this is 64-bit mode we can tag it but if this is 32-bit mode we need to box it. *) val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w4, context) val untaggedResReg = newUReg() val boxTagCode = if targetArch = Native64Bit then BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=OpSize64 (* It becomes 33 bits *)}) else BlockSimple(BoxValue{boxKind=BoxLargeWord, source=untaggedResReg, dest=target, saveRegs=[]}) in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=Move32Bit}), boxTagCode], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreC64, address}, context, _, destination) = let (* Load a 64-bit value from C memory. This is only allowed in 64-bit mode. The result is a boxed value. *) val _ = targetArch <> Native32Bit orelse raise InternalError "codeToICodeNonRev: BICLoadOperation LoadStoreC64 in 32-bit" val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w8, context) val untaggedResReg = newUReg() in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=Move64Bit}), BlockSimple(BoxValue{boxKind=BoxLargeWord, source=untaggedResReg, dest=target, saveRegs=[]})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreCFloat, address}, context, _, destination) = let val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w4, context) val untaggedResReg = newUReg() val boxFloat = case fpMode of FPModeX87 => BoxX87Double | FPModeSSE2 => BoxSSE2Double (* We need to convert the float into a double. *) val loadArg = case fpMode of FPModeX87 => BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveFloat}) | FPModeSSE2 => BlockSimple(SSE2FPUnary { source=MemoryLocation memLoc, resultReg=untaggedResReg, opc=SSE2UFloatToDouble}) in (codeBaseIndex @ codeUntag @ [loadArg, BlockSimple(BoxValue{boxKind=boxFloat, source=untaggedResReg, dest=target, saveRegs=[]})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreCDouble, address}, context, _, destination) = let val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeCAddress(address, 0w8, context) val untaggedResReg = newUReg() val boxFloat = case fpMode of FPModeX87 => BoxX87Double | FPModeSSE2 => BoxSSE2Double in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=MoveDouble}), BlockSimple(BoxValue{boxKind=boxFloat, source=untaggedResReg, dest=target, saveRegs=[]})], RegisterArgument target, false) end | codeToICodeLoad({kind=LoadStoreUntaggedUnsigned, address}, context, _, destination) = let val target = asTarget destination val (codeBaseIndex, codeUntag, memLoc) = codeAddress(address, false, context) val untaggedResReg = newUReg() in (codeBaseIndex @ codeUntag @ [BlockSimple(LoadArgument { source=MemoryLocation memLoc, dest=untaggedResReg, kind=movePolyWord}), BlockSimple(TagValue {source=untaggedResReg, dest=target, isSigned=false, opSize=polyWordOpSize})], RegisterArgument target, false) end and codeToICodeStore({kind=LoadStoreMLWord _, address, value}, context, _, destination) = let val (sourceCode, source, _) = codeToICode(value, context, false, Allowed allowInMemMove) val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeAddress(address, false, context) val code = codeBaseIndex @ sourceCode @ codeUntag @ [BlockSimple(StoreArgument {source=source, base=base, offset=offset, index=index, kind=movePolyWord, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreMLByte _, address, value}, context, _, destination) = let val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeAddress(address, true, context) (* We have to untag the value to store. *) val (valueCode, untagValue, valueArg) = codeAsUntaggedByte(value, false, context) val code = codeBaseIndex @ valueCode @ untagValue @ codeUntag @ [BlockSimple(StoreArgument {source=valueArg, base=base, offset=offset, index=index, kind=MoveByte, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreC8, address, value}, context, _, destination) = let (* Store a byte to C memory. Almost exactly the same as LoadStoreMLByte. *) val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w1, context) val (valueCode, untagValue, valueArg) = codeAsUntaggedByte(value, false, context) val code = codeBaseIndex @ valueCode @ untagValue @ codeUntag @ [BlockSimple(StoreArgument {source=valueArg, base=base, offset=offset, index=index, kind=MoveByte, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreC16, address, value}, context, _, destination) = let (* Store a 16-bit value to C memory. *) val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w2, context) (* We don't currently implement 16-bit constant moves so this must always be in a reg. *) val (valueCode, untagValue, valueArg) = codeAsUntaggedToReg(value, false, context) val code = codeBaseIndex @ valueCode @ untagValue @ codeUntag @ [BlockSimple(StoreArgument {source=RegisterArgument valueArg, base=base, offset=offset, index=index, kind=Move16Bit, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreC32, address, value}, context, _, destination) = (* Store a 32-bit value. If this is 64-bit mode we untag it but if this is 32-bit mode we unbox it. *) let val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w4, context) val code = if targetArch = Native64Bit then let (* We don't currently implement 32-bit constant moves so this must always be in a reg. *) val (valueCode, untagValue, valueArg) = codeAsUntaggedToReg(value, false, context) in codeBaseIndex @ valueCode @ untagValue @ codeUntag @ [BlockSimple(StoreArgument {source=RegisterArgument valueArg, base=base, offset=offset, index=index, kind=Move32Bit, isMutable=true})] end else let val (valueCode, valueReg) = codeToPReg(value, context) val valueReg1 = newUReg() in codeBaseIndex @ valueCode @ BlockSimple(LoadArgument{source=wordAt valueReg, dest=valueReg1, kind=Move32Bit}) :: codeUntag @ [BlockSimple(StoreArgument {source=RegisterArgument valueReg1, base=base, offset=offset, index=index, kind=Move32Bit, isMutable=true})] end in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreC64, address, value}, context, _, destination) = let (* Store a 64-bit value. *) val _ = targetArch <> Native32Bit orelse raise InternalError "codeToICodeNonRev: BICStoreOperation LoadStoreC64 in 32-bit" val (valueCode, valueReg) = codeToPReg(value, context) val valueReg1 = newUReg() val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w8, context) val code = codeBaseIndex @ valueCode @ codeUntag @ [BlockSimple(LoadArgument{source=wordAt valueReg, dest=valueReg1, kind=Move64Bit}), BlockSimple(StoreArgument {source=RegisterArgument valueReg1, base=base, offset=offset, index=index, kind=Move64Bit, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreCFloat, address, value}, context, _, destination) = let val floatReg = newUReg() and float2Reg = newUReg() val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w4, context) val (valueCode, valueReg) = codeToPReg(value, context) (* If we're using an SSE2 reg we have to convert it from double to single precision. *) val (storeReg, cvtCode) = case fpMode of FPModeSSE2 => (float2Reg, [BlockSimple(SSE2FPUnary{opc=SSE2UDoubleToFloat, resultReg=float2Reg, source=RegisterArgument floatReg})]) | FPModeX87 => (floatReg, []) val code = codeBaseIndex @ valueCode @ codeUntag @ BlockSimple(LoadArgument{source=wordAt valueReg, dest=floatReg, kind=MoveDouble}) :: cvtCode @ [BlockSimple(StoreArgument {source=RegisterArgument storeReg, base=base, offset=offset, index=index, kind=MoveFloat, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreCDouble, address, value}, context, _, destination) = let val floatReg = newUReg() val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeCAddress(address, 0w8, context) val (valueCode, valueReg) = codeToPReg(value, context) val code = codeBaseIndex @ valueCode @ codeUntag @ [BlockSimple(LoadArgument{source=wordAt valueReg, dest=floatReg, kind=MoveDouble}), BlockSimple(StoreArgument {source=RegisterArgument floatReg, base=base, offset=offset, index=index, kind=MoveDouble, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | codeToICodeStore({kind=LoadStoreUntaggedUnsigned, address, value}, context, _, destination) = let (* We have to untag the value to store. *) val (codeBaseIndex, codeUntag, {base, offset, index, ...}) = codeAddress(address, false, context) (* See if it's a constant. This is frequently used to set the last word of a string to zero. *) (* We have to be a bit more careful on the X86. We use moves to store constants that can include addresses. To avoid problems we only use a move if the value is zero or odd and so looks like a tagged value. *) val storeAble = case value of BICConstnt(value, _) => if not(isShort value) then NONE else let val ival = Word.toLargeIntX(toShort value) in if targetArch = Native64Bit then if is32bit ival then SOME ival else NONE else if ival = 0 orelse ival mod 2 = 1 then SOME ival else NONE end | _ => NONE val (storeVal, valCode) = case storeAble of SOME value => (IntegerConstant value (* Leave untagged *), []) | NONE => let val valueReg = newPReg() and valueReg1 = newUReg() in (RegisterArgument valueReg1, codeToICodeTarget(value, context, false, valueReg) @ [BlockSimple(UntagValue{dest=valueReg1, source=valueReg, isSigned=false, cache=NONE, opSize=polyWordOpSize})]) end val code = codeBaseIndex @ valCode @ codeUntag @ [BlockSimple(StoreArgument {source=storeVal, base=base, offset=offset, index=index, kind=movePolyWord, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end and codeToICodeBlock({kind=BlockOpCompareByte, sourceLeft, destRight, length}, context, _, destination) = let (* This is effectively a big-endian comparison since we compare the bytes until we find an inequality. *) val target = asTarget destination val mergeResult = newMergeReg() val vec1Reg = newUReg() and vec2Reg = newUReg() val (leftCode, leftUntag, {base=leftBase, offset=leftOffset, index=leftIndex, ...}) = codeAddress(sourceLeft, true, context) val (rightCode, rightUntag, {base=rightBase, offset=rightOffset, index=rightIndex, ...}) = codeAddress(destRight, true, context) val ccRef = newCCRef() val labLess = newLabel() and labGreater = newLabel() and exitLab = newLabel() val labNotLess = newLabel() and labNotGreater = newLabel() val (lengthCode, lengthUntag, lengthArg) = codeAsUntaggedToReg(length, false (* unsigned *), context) val code = leftCode @ rightCode @ lengthCode @ leftUntag @ [BlockSimple(loadAddress{base=leftBase, offset=leftOffset, index=leftIndex, dest=vec1Reg})] @ rightUntag @ [BlockSimple(loadAddress{base=rightBase, offset=rightOffset, index=rightIndex, dest=vec2Reg})] @ lengthUntag @ [BlockSimple(CompareByteVectors{ vec1Addr=vec1Reg, vec2Addr=vec2Reg, length=lengthArg, ccRef=ccRef }), (* N.B. These are unsigned comparisons. *) BlockFlow(Conditional{ ccRef=ccRef, condition=JB, trueJump=labLess, falseJump=labNotLess }), BlockLabel labNotLess, BlockFlow(Conditional{ ccRef=ccRef, condition=JA, trueJump=labGreater, falseJump=labNotGreater }), BlockLabel labNotGreater, BlockSimple(LoadArgument{ source=IntegerConstant(tag 0), dest=mergeResult, kind=movePolyWord }), BlockFlow(Unconditional exitLab), BlockLabel labLess, BlockSimple(LoadArgument{ source=IntegerConstant(tag ~1), dest=mergeResult, kind=movePolyWord }), BlockFlow(Unconditional exitLab), BlockLabel labGreater, BlockSimple(LoadArgument{ source=IntegerConstant(tag 1), dest=mergeResult, kind=movePolyWord }), BlockLabel exitLab, BlockSimple(LoadArgument{ source=RegisterArgument mergeResult, dest=target, kind=movePolyWord })] in (code, RegisterArgument target, false) end | codeToICodeBlock({kind=BlockOpMove {isByteMove}, sourceLeft, destRight, length}, context, _, destination) = let (* Moves of 4 or 8 bytes can be done as word moves provided the alignment is correct. Although this will work for strings it is really to handle moves between SysWord and volatileRef in Foreign.Memory. Moves of 1, 2 or 3 bytes or words are converted into a sequence of byte or word moves. *) val isWordMove = case (isByteMove, length) of (true, BICConstnt(l, _)) => if not (isShort l) orelse (toShort l <> 0w4 andalso toShort l <> nativeWordSize) then NONE else let val leng = Word.toInt(toShort l) val moveKind = if toShort l = nativeWordSize then moveNativeWord else Move32Bit val isLeftAligned = case sourceLeft of {index=NONE, offset:int, ...} => offset mod leng = 0 | _ => false val isRightAligned = case destRight of {index=NONE, offset: int, ...} => offset mod leng = 0 | _ => false in if isLeftAligned andalso isRightAligned then SOME moveKind else NONE end | _ => NONE in case isWordMove of SOME moveKind => let val (leftCode, leftUntag, leftMem) = codeAddress(sourceLeft, isByteMove, context) val (rightCode, rightUntag, {base, offset, index, ...}) = codeAddress(destRight, isByteMove, context) val untaggedResReg = newUReg() val code = leftCode @ rightCode @ leftUntag @ rightUntag @ [BlockSimple(LoadArgument { source=MemoryLocation leftMem, dest=untaggedResReg, kind=moveKind}), BlockSimple(StoreArgument {source=RegisterArgument untaggedResReg, base=base, offset=offset, index=index, kind=moveKind, isMutable=true})] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end | _ => let val vec1Reg = newUReg() and vec2Reg = newUReg() val (leftCode, leftUntag, {base=leftBase, offset=leftOffset, index=leftIndex, ...}) = codeAddress(sourceLeft, isByteMove, context) val (rightCode, rightUntag, {base=rightBase, offset=rightOffset, index=rightIndex, ...}) = codeAddress(destRight, isByteMove, context) val (lengthCode, lengthUntag, lengthArg) = codeAsUntaggedToReg(length, false (* unsigned *), context) val code = leftCode @ rightCode @ lengthCode @ leftUntag @ [BlockSimple(loadAddress{base=leftBase, offset=leftOffset, index=leftIndex, dest=vec1Reg})] @ rightUntag @ [BlockSimple(loadAddress{base=rightBase, offset=rightOffset, index=rightIndex, dest=vec2Reg})] @ lengthUntag @ [BlockSimple(BlockMove{ srcAddr=vec1Reg, destAddr=vec2Reg, length=lengthArg, isByteMove=isByteMove })] in moveIfNotAllowed(destination, code, (* Unit result *) IntegerConstant(tag 0)) end end | codeToICodeBlock({kind=BlockOpEqualByte, ...}, _, _, _) = (* TODO: Move the code from codeToICodeRev. However, that is already reversed. *) raise InternalError "codeToICodeBlock - BlockOpEqualByte" (* Already done *) and codeConditionRev(condition, context, jumpOn, jumpLabel, tailCode) = (* General case. Load the value into a register and compare it with 1 (true) *) let val ccRef = newCCRef() val (testCode, testReg) = codeToPRegRev(condition, context, tailCode) val noJumpLabel = newLabel() in BlockLabel noJumpLabel :: BlockFlow(Conditional{ccRef=ccRef, condition=if jumpOn then JE else JNE, trueJump=jumpLabel, falseJump=noJumpLabel}) :: BlockSimple(CompareLiteral{arg1=RegisterArgument testReg, arg2=tag 1, opSize=OpSize32, ccRef=ccRef}) :: testCode end (* The fixed precision functions are also used for arbitrary precision but instead of raising Overflow we need to jump to the code that handles the long format. *) and codeFixedPrecisionArith(BuiltIns.ArithAdd, arg1, BICConstnt(value, _), context, target, onOverflow) = let val ccRef = newCCRef() (* If the argument is a constant we can subtract the tag beforehand. This should always be a tagged value if the type is correct. However it's possible for it not to be if we have an arbitrary precision value. There will be a run-time check that the value is short and so this code will never be executed. It will generally be edited out by the higher level be we can't rely on that. Because it's never executed we can just put in zero. *) val constVal = if isShort value then semitag(Word.toLargeIntX(toShort value)) else 0 val (arg1Code, aReg1) = codeToPReg(arg1, context) in arg1Code @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef=ccRef, opSize=polyWordOpSize})] @ onOverflow ccRef end | codeFixedPrecisionArith(BuiltIns.ArithAdd, BICConstnt(value, _), arg2, context, target, onOverflow) = let val ccRef = newCCRef() (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *) val constVal = if isShort value then semitag(Word.toLargeIntX(toShort value)) else 0 val (arg2Code, aReg2) = codeToPReg(arg2, context) in arg2Code @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg2, operand2=IntegerConstant constVal, ccRef=ccRef, opSize=polyWordOpSize})] @ onOverflow ccRef end | codeFixedPrecisionArith(BuiltIns.ArithAdd, arg1, arg2, context, target, onOverflow) = let val aReg3 = newPReg() and ccRef = newCCRef() val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) in arg1Code @ arg2Code @ (* Subtract the tag bit from the second argument, do the addition and check for overflow. *) (* TODO: We should really do the detagging in the transform phase. It can make a better choice of the argument if one of the arguments is already untagged or if we have a constant argument. *) [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=aReg1, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg3, operand2=RegisterArgument aReg2, ccRef=ccRef, opSize=polyWordOpSize})] @ onOverflow ccRef end (* Subtraction. We can handle the special case of the second argument being a constant but not the first. *) | codeFixedPrecisionArith(BuiltIns.ArithSub, arg1, BICConstnt(value, _), context, target, onOverflow) = let val ccRef = newCCRef() (* If the argument is a constant we can subtract the tag beforehand. Check for short - see comment above. *) val constVal = if isShort value then semitag(Word.toLargeIntX(toShort value)) else 0 val (arg1Code, aReg1) = codeToPReg(arg1, context) in arg1Code @ [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=target, operand1=aReg1, operand2=IntegerConstant constVal, ccRef=ccRef, opSize=polyWordOpSize})] @ onOverflow ccRef end | codeFixedPrecisionArith(BuiltIns.ArithSub, arg1, arg2, context, target, onOverflow) = let val aReg3 = newPReg() val ccRef = newCCRef() val (arg1Code, aReg1) = codeToPReg(arg1, context) val (arg2Code, aReg2) = codeToPReg(arg2, context) in arg1Code @ arg2Code @ (* Do the subtraction, test for overflow and afterwards add in the tag bit. *) [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=aReg3, operand1=aReg1, operand2=RegisterArgument aReg2, ccRef=ccRef, opSize=polyWordOpSize})] @ onOverflow ccRef @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=aReg3, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] end | codeFixedPrecisionArith(BuiltIns.ArithMult, arg1, BICConstnt(value, _), context, target, onOverflow) = let val aReg = newPReg() and argUntagged = newUReg() and resUntagged = newUReg() val mulCC = newCCRef() (* Is it better to untag the constant or the register argument? *) val constVal = if isShort value then Word.toLargeIntX(toShort value) else 0 in codeToICodeTarget(arg1, context, false, aReg) @ [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=argUntagged, operand1=aReg, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(Multiplication{resultReg=resUntagged, operand1=argUntagged, operand2=IntegerConstant constVal, ccRef=mulCC, opSize=polyWordOpSize} )] @ onOverflow mulCC @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] end | codeFixedPrecisionArith(BuiltIns.ArithMult, BICConstnt(value, _), arg2, context, target, onOverflow) = let val aReg = newPReg() and argUntagged = newUReg() and resUntagged = newUReg() val mulCC = newCCRef() (* Is it better to untag the constant or the register argument? *) val constVal = if isShort value then Word.toLargeIntX(toShort value) else 0 in codeToICodeTarget(arg2, context, false, aReg) @ [BlockSimple(ArithmeticFunction{oper=SUB, resultReg=argUntagged, operand1=aReg, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(Multiplication{resultReg=resUntagged, operand1=argUntagged, operand2=IntegerConstant constVal, ccRef=mulCC, opSize=polyWordOpSize} )] @ onOverflow mulCC @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] end | codeFixedPrecisionArith(BuiltIns.ArithMult, arg1, arg2, context, target, onOverflow) = let val aReg1 = newPReg() and aReg2 = newPReg() and arg1Untagged = newUReg() and arg2Untagged = newUReg() and resUntagged = newUReg() val mulCC = newCCRef() (* This is almost the same as the word operation except we use a signed shift and check for overflow. *) in codeToICodeTarget(arg1, context, false, aReg1) @ codeToICodeTarget(arg2, context, false, aReg2) @ (* Shift one argument and subtract the tag from the other. It's possible this could be reordered if we have a value that is already untagged. *) [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=true (* Signed shift here. *), cache=NONE, opSize=polyWordOpSize}), BlockSimple(ArithmeticFunction{oper=SUB, resultReg=arg2Untagged, operand1=aReg2, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize}), BlockSimple(Multiplication{resultReg=resUntagged, operand1=arg1Untagged, operand2=RegisterArgument arg2Untagged, ccRef=mulCC, opSize=polyWordOpSize} )] @ onOverflow mulCC @ [BlockSimple(ArithmeticFunction{oper=ADD, resultReg=target, operand1=resUntagged, operand2=IntegerConstant 1, ccRef=newCCRef(), opSize=polyWordOpSize})] end | codeFixedPrecisionArith(BuiltIns.ArithQuot, arg1, arg2, context, target, _) = let val aReg1 = newPReg() and aReg2 = newPReg() val arg1Untagged = newUReg() and arg2Untagged = newUReg() val quotient = newUReg() and remainder = newUReg() in codeToICodeTarget(arg1, context, false, aReg1) @ codeToICodeTarget(arg2, context, false, aReg2) @ (* Shift both of the arguments to remove the tags. We don't test for zero here - that's done explicitly. *) [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}), BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}), BlockSimple(Division { isSigned = true, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged, quotient=quotient, remainder=remainder, opSize=polyWordOpSize }), BlockSimple(TagValue { source=quotient, dest=target, isSigned=true, opSize=polyWordOpSize})] end | codeFixedPrecisionArith(BuiltIns.ArithRem, arg1, arg2, context, target, _) = let (* Identical to Quot except that the result is the remainder. *) val aReg1 = newPReg() and aReg2 = newPReg() val arg1Untagged = newUReg() and arg2Untagged = newUReg() val quotient = newUReg() and remainder = newUReg() in codeToICodeTarget(arg1, context, false, aReg1) @ codeToICodeTarget(arg2, context, false, aReg2) @ (* Shift both of the arguments to remove the tags. *) [BlockSimple(UntagValue{source=aReg1, dest=arg1Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}), BlockSimple(UntagValue{source=aReg2, dest=arg2Untagged, isSigned=true, cache=NONE, opSize=polyWordOpSize}), BlockSimple(Division { isSigned = true, dividend=arg1Untagged, divisor=RegisterArgument arg2Untagged, quotient=quotient, remainder=remainder, opSize=polyWordOpSize }), BlockSimple(TagValue { source=remainder, dest=target, isSigned=true, opSize=polyWordOpSize})] end | codeFixedPrecisionArith(_, _, _, _, _, _) = raise InternalError "codeToICode: FixedPrecisionArith - unimplemented operation" (* Generate code for floating point arguments where one of the arguments must be in a register. If the first argument is in a register use that, if the second is in a register and it's commutative use that otherwise load the first argument into a register. *) and codeFPBinaryArgsRev(arg1, arg2, precision, commutative, context, tailCode) = let val (arg1Code, arg1Value) = codeFPArgument(arg1, precision, context, tailCode) val (arg2Code, arg2Value) = codeFPArgument(arg2, precision, context, arg1Code) in case (arg1Value, arg2Value, commutative) of (RegisterArgument fpReg, _, _) => (arg2Code, fpReg, arg2Value) | (_, RegisterArgument fpReg, Commutative) => (arg2Code, fpReg, arg1Value) | (arg1Val, _, _) => let val fpReg = newUReg() val moveOp = case precision of BuiltIns.PrecDouble => MoveDouble | BuiltIns.PrecSingle => MoveFloat in (BlockSimple(LoadArgument{source=arg1Val, dest=fpReg, kind=moveOp}) :: arg2Code, fpReg, arg2Value) end end (* Generate code to evaluate a floating point argument. The aim of this code is to avoid the overhead of untagging a short-precision floating point value in memory. *) and codeFPArgument(BICConstnt(value, _), _, _, tailCode) = let val argVal = (* Single precision constants in 64-bit mode are represented by the value shifted left 32 bits. A word is shifted left one bit so the result is 0w31. *) if isShort value then IntegerConstant(Word.toLargeInt(Word.>>(toShort value, 0w31))) else AddressConstant value in (tailCode, argVal) end | codeFPArgument(arg, precision, context, tailCode) = ( case (precision, wordSize) of (BuiltIns.PrecSingle, 0w8) => (* If this is a single precision value and the word size is 8 the values are tagged. If it is memory we can load the value directly from the high-order word. *) let val memOrReg = { anyConstant=false, const32s=false, memAddr=true, existingPreg=true } val (code, result, _) = codeToICodeRev(arg, context, false, Allowed memOrReg, tailCode) in case result of RegisterArgument argReg => let val fpReg = newUReg() in (BlockSimple(UntagFloat{source=RegisterArgument argReg, dest=fpReg, cache=NONE}) :: code, RegisterArgument fpReg) end | MemoryLocation{offset, base, index, ...} => (code, MemoryLocation{offset=offset+4, base=base, index=index, cache=NONE}) | _ => raise InternalError "codeFPArgument" end | _ => (* Otherwise the value is boxed. *) let val (argCode, argReg) = codeToPRegRev(arg, context, tailCode) in (argCode, wordAt argReg) end ) (* Code an address. The index is optional. *) and codeAddressRev({base, index=SOME index, offset}, true (* byte move *), context, tailCode) = let (* Byte address with index. The index needs to be untagged. *) val indexReg1 = newUReg() val (codeBase, baseReg) = codeToPRegRev(base, context, tailCode) val (codeIndex, indexReg) = codeToPRegRev(index, context, codeBase) val untagCode = [BlockSimple(UntagValue{dest=indexReg1, source=indexReg, isSigned=false, cache=NONE, opSize=polyWordOpSize})] val (codeLoadAddr, realBase) = if targetArch = ObjectId32Bit then let val addrReg = newUReg() in ([BlockSimple(LoadEffectiveAddress{ base=SOME baseReg, offset=0, index=ObjectIndex, dest=addrReg, opSize=nativeWordOpSize})], addrReg) end else ([], baseReg) val memResult = {base=realBase, offset=offset, index=MemIndex1 indexReg1, cache=NONE} in (codeLoadAddr @ codeIndex, untagCode, memResult) end | codeAddressRev({base, index=SOME index, offset}, false (* word move *), context, tailCode) = let (* Word address with index. We can avoid untagging the index by adjusting the multiplier and offset *) val (codeBase, baseReg) = codeToPRegRev(base, context, tailCode) val (codeIndex, indexReg) = codeToPRegRev(index, context, codeBase) val (codeLoadAddr, realBase) = if targetArch = ObjectId32Bit then let val addrReg = newUReg() in ([BlockSimple(LoadEffectiveAddress{ base=SOME baseReg, offset=0, index=ObjectIndex, dest=addrReg, opSize=nativeWordOpSize})], addrReg) end else ([], baseReg) val memResult = if wordSize = 0w8 then {base=realBase, offset=offset-4, index=MemIndex4 indexReg, cache=NONE} else {base=realBase, offset=offset-2, index=MemIndex2 indexReg, cache=NONE} in (codeLoadAddr @ codeIndex, [], memResult) end | codeAddressRev({base, index=NONE, offset}, _, context, tailCode) = let val (codeBase, baseReg) = codeToPRegRev(base, context, tailCode) val memResult = {offset=offset, base=baseReg, index=memIndexOrObject, cache=NONE} in (codeBase, [], memResult) end and codeAddress(addr, isByte, context) = let val (code, untag, res) = codeAddressRev(addr, isByte, context, []) in (List.rev code, untag, res) end (* C-memory operations are slightly different. The base address is a LargeWord.word value. The index is a byte index so may have to be untagged. *) and codeCAddress({base, index=SOME index, offset}, 0w1, context) = let (* Byte address with index. The index needs to be untagged. *) val untaggedBaseReg = newUReg() val (codeBase, baseReg) = codeToPReg(base, context) and (codeIndex, indexReg) = codeToPReg(index, context) (* The index needs to untagged and, if necessary, sign-extended to the native word size. *) val (untagCode, sxReg) = if targetArch = ObjectId32Bit then let val sReg1 = newUReg() and sReg2 = newUReg() in ([BlockSimple(SignExtend32To64{dest=sReg1, source=RegisterArgument indexReg}), BlockSimple(UntagValue{dest=sReg2, source=sReg1, isSigned=true, cache=NONE, opSize=nativeWordOpSize})], sReg2) end else let val sReg = newUReg() in ([BlockSimple(UntagValue{dest=sReg, source=indexReg, isSigned=true, cache=NONE, opSize=nativeWordOpSize})], sReg) end val loadCode = [BlockSimple(LoadArgument{source=wordAt baseReg, dest=untaggedBaseReg, kind=moveNativeWord})] val memResult = {base=untaggedBaseReg, offset=offset, index=MemIndex1 sxReg, cache=NONE} in (codeBase @ codeIndex, loadCode @ untagCode, memResult) end | codeCAddress({base, index=SOME index, offset}, size, context) = let (* Non-byte address with index. By using an appropriate multiplier we can avoid having to untag the index. *) val untaggedBaseReg = newUReg() val (codeBase, baseReg) = codeToPReg(base, context) and (codeIndex, indexReg) = codeToPReg(index, context) (* The index is signed i.e. negative index values are legal. We don't have to do anything special on the native code versions but on 32-in-64 we need to sign extend. *) val (untagCode, sxReg) = if targetArch = ObjectId32Bit then let val sReg = newUReg() in ([BlockSimple(SignExtend32To64{source=RegisterArgument indexReg, dest=sReg})], sReg) end else ([], indexReg) val loadCode = [BlockSimple(LoadArgument{source=wordAt baseReg, dest=untaggedBaseReg, kind=moveNativeWord})] val memResult = case size of 0w2 => {base=untaggedBaseReg, offset=offset-1, index=MemIndex1 sxReg, cache=NONE} | 0w4 => {base=untaggedBaseReg, offset=offset-2, index=MemIndex2 sxReg, cache=NONE} | 0w8 => {base=untaggedBaseReg, offset=offset-4, index=MemIndex4 sxReg, cache=NONE} | _ => raise InternalError "codeCAddress: unknown size" in (codeBase @ codeIndex, loadCode @ untagCode, memResult) end | codeCAddress({base, index=NONE, offset}, _, context) = let val untaggedBaseReg = newUReg() val (codeBase, baseReg) = codeToPReg(base, context) val untagCode = [BlockSimple(LoadArgument{source=wordAt baseReg, dest=untaggedBaseReg, kind=moveNativeWord})] val memResult = {offset=offset, base=untaggedBaseReg, index=NoMemIndex, cache=NONE} in (codeBase, untagCode, memResult) end (* Return an untagged value. If we have a constant just return it. Otherwise return the code to evaluate the argument, the code to untag it and the reference to the untagged register. *) and codeAsUntaggedToRegRev(BICConstnt(value, _), isSigned, _, tailCode) = let (* Should always be short except for unreachable code. *) val untagReg = newUReg() val cval = if isShort value then toShort value else 0w0 val cArg = IntegerConstant(if isSigned then Word.toLargeIntX cval else Word.toLargeInt cval) (* Don't tag *) val untag = [BlockSimple(LoadArgument{source=cArg, dest=untagReg, kind=movePolyWord})] in (tailCode, untag, untagReg) (* Don't tag. *) end | codeAsUntaggedToRegRev(arg, isSigned, context, tailCode) = let val untagReg = newUReg() val (code, srcReg) = codeToPRegRev(arg, context, tailCode) val untag = [BlockSimple(UntagValue{source=srcReg, dest=untagReg, isSigned=isSigned, cache=NONE, opSize=polyWordOpSize})] in (code, untag, untagReg) end and codeAsUntaggedToReg(arg, isSigned, context) = let val (code, untag, untagReg) = codeAsUntaggedToRegRev(arg, isSigned, context, []) in (List.rev code, untag, untagReg) end (* Return the argument as an untagged value. We separate evaluating the argument from untagging because we may have to evaluate other arguments and that could involve a function call and we can't save the value to the stack after we've untagged it. Currently this is only used for byte values but we may have to be careful if we use it for word values on the X86. Moving an untagged value into a register might look like loading a constant address. *) and codeAsUntaggedByte(BICConstnt(value, _), isSigned, _) = let val cval = if isShort value then toShort value else 0w0 val cArg = IntegerConstant(if isSigned then Word.toLargeIntX cval else Word.toLargeInt cval) (* Don't tag *) in ([], [], cArg) end | codeAsUntaggedByte(arg, isSigned, context) = let val untagReg = newUReg() val (code, argReg) = codeToPReg(arg, context) val untag = [BlockSimple(UntagValue{source=argReg, dest=untagReg, isSigned=isSigned, cache=NONE, opSize=OpSize32})] in (code, untag, RegisterArgument untagReg) end (* Allocate memory. This is used both for true variable length cells and also for longer constant length cells. *) and allocateMemoryVariable(numWords, flags, initial, context, destination) = let val target = asTarget destination (* With the exception of flagReg all these registers are modified by the code. So, we have to copy the size value into a new register. *) val sizeReg = newPReg() and initReg = newPReg() val sizeReg2 = newPReg() val untagSizeReg = newUReg() and initAddrReg = newPReg() and allocReg = newPReg() val sizeCode = codeToICodeTarget(numWords, context, false, sizeReg) and (flagsCode, flagUntag, flagArg) = codeAsUntaggedByte(flags, false, context) (* We're better off deferring the initialiser if possible. If the value is a constant we don't have to save it. *) val (initCode, initResult, _) = codeToICode(initial, context, false, Allowed allowDefer) in (sizeCode @ flagsCode @ initCode @ [(* We need to copy the size here because AllocateMemoryVariable modifies the size in order to store the length word. This is unfortunate especially as we're going to untag it anyway. *) BlockSimple(LoadArgument{source=RegisterArgument sizeReg, dest=sizeReg2, kind=movePolyWord}), BlockSimple(AllocateMemoryVariable{size=sizeReg, dest=allocReg, saveRegs=[]})] @ flagUntag @ [BlockSimple(StoreArgument{ source=flagArg, base=allocReg, offset= ~1, index=memIndexOrObject, kind=MoveByte, isMutable=false}), (* We need to copy the address here because InitialiseMem modifies all its arguments. *) BlockSimple( if targetArch = ObjectId32Bit then LoadEffectiveAddress{ base=SOME allocReg, offset=0, index=ObjectIndex, dest=initAddrReg, opSize=nativeWordOpSize} else LoadArgument{source=RegisterArgument allocReg, dest=initAddrReg, kind=movePolyWord}), BlockSimple(UntagValue{source=sizeReg2, dest=untagSizeReg, isSigned=false, cache=NONE, opSize=polyWordOpSize}), BlockSimple(LoadArgument{source=initResult, dest=initReg, kind=movePolyWord}), BlockSimple(InitialiseMem{size=untagSizeReg, init=initReg, addr=initAddrReg}), BlockSimple InitialisationComplete, BlockSimple(LoadArgument{source=RegisterArgument allocReg, dest=target, kind=movePolyWord})], RegisterArgument target, false) end (*Turn the codetree structure into icode. *) val bodyContext = {loopArgs=NONE, stackPtr=0, currHandler=NONE, overflowBlock=ref NONE} val (bodyCode, _, bodyExited) = codeToICodeRev(body, bodyContext, true, SpecificPReg resultTarget, beginInstructions) val icode = if bodyExited then bodyCode else returnInstruction(bodyContext, resultTarget, bodyCode) (* Turn the icode list into basic blocks. The input list is in reverse so as part of this we reverse the list. *) local val resArray = Array.array(!labelCounter, BasicBlock{ block=[], flow=ExitCode }) fun createEntry (blockNo, block, flow) = Array.update(resArray, blockNo, BasicBlock{ block=block, flow=flow}) fun splitCode([], _, _) = (* End of code. We should have had a BeginFunction. *) raise InternalError "splitCode - no begin" | splitCode(BlockBegin args :: _, sinceLabel, flow) = (* Final instruction. Create the initial block and exit. *) createEntry(0, BeginFunction args ::sinceLabel, flow) | splitCode(BlockSimple instr :: rest, sinceLabel, flow) = splitCode(rest, instr :: sinceLabel, flow) | splitCode(BlockLabel label :: rest, sinceLabel, flow) = (* Label - finish this block and start another. *) ( createEntry(label, sinceLabel, flow); (* Default to a jump to this label. That is used if we have assumed a drop-through. *) splitCode(rest, [], Unconditional label) ) | splitCode(BlockExit instr :: rest, _, _) = splitCode(rest, [instr], ExitCode) | splitCode(BlockFlow flow :: rest, _, _) = splitCode(rest, [], flow) | splitCode(BlockRaiseAndHandle(instr, handler) :: rest, _, _) = splitCode(rest, [instr], UnconditionalHandle handler) | splitCode(BlockOptionalHandle{call, handler, label} :: rest, sinceLabel, flow) = let (* A function call within a handler. This could go to the handler but if there is no exception will go to the next instruction. Also includes JumpLoop since the stack check could result in an Interrupt exception. *) in createEntry(label, sinceLabel, flow); splitCode(rest, [call], ConditionalHandle{handler=handler, continue=label}) end in val () = splitCode(icode, [], ExitCode) val resultVector = Array.vector resArray end open ICodeTransform val pregProperties = Vector.fromList(List.rev(! pregPropList)) in codeICodeFunctionToX86{blocks = resultVector, functionName = name, pregProps = pregProperties, ccCount= ! ccRefCounter, debugSwitches = debugSwitches, resultClosure = resultClosure, profileObject = profileObject} end fun gencodeLambda(lambda, debugSwitches, closure) = let open Debug Universal (*val debugSwitches = [tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), tagInject assemblyCodeTag true] @ debugSwitches*) in codeFunctionToX86(lambda, debugSwitches, closure) end structure Foreign = X86Foreign structure Sharing = struct type backendIC = backendIC and bicLoadForm = bicLoadForm and argumentType = argumentType and closureRef = closureRef end end; diff --git a/mlsource/MLCompiler/INITIALISE_.ML b/mlsource/MLCompiler/INITIALISE_.ML index e9d87c18..f6c7ffac 100644 --- a/mlsource/MLCompiler/INITIALISE_.ML +++ b/mlsource/MLCompiler/INITIALISE_.ML @@ -1,2079 +1,2074 @@ (* Copyright (c) 2000 Cambridge University Technical Services Limited Updated David C.J. Matthews 2008-9, 2012, 2013, 2015-21 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Title: Initialise ML Global Declarations. Author: Dave Matthews,Cambridge University Computer Laboratory Copyright Cambridge University 1985 *) functor INITIALISE_ ( structure LEX: LEXSIG structure TYPETREE : TYPETREESIG structure STRUCTVALS : STRUCTVALSIG structure VALUEOPS : VALUEOPSSIG structure CODETREE : CODETREE structure EXPORTTREE: EXPORTTREESIG structure DATATYPEREP: DATATYPEREPSIG structure TYPEIDCODE: TYPEIDCODESIG structure MAKE: MAKESIG structure ADDRESS : AddressSig structure DEBUG: DEBUG structure DEBUGGER : DEBUGGER structure PRETTY : PRETTY structure PRINTTABLE: PRINTTABLESIG structure MISC : sig val unescapeString : string -> string exception Conversion of string; (* string to int conversion failure *) end structure VERSION: sig val compilerVersion: string val versionNumber: int end structure UNIVERSALTABLE: sig type universal = Universal.universal type univTable type 'a tag = 'a Universal.tag val univLookup: univTable * 'a tag * string -> 'a option val fold: (string * universal * 'a -> 'a) -> 'a -> univTable -> 'a end sharing STRUCTVALS.Sharing = VALUEOPS.Sharing = TYPETREE.Sharing = EXPORTTREE.Sharing = PRETTY.Sharing = CODETREE.Sharing = MAKE.Sharing = ADDRESS = DATATYPEREP.Sharing = TYPEIDCODE.Sharing = DEBUGGER.Sharing = LEX.Sharing = PRINTTABLE.Sharing sharing STRUCTVALS.Sharing = VALUEOPS.Sharing = TYPETREE.Sharing = EXPORTTREE.Sharing = PRETTY.Sharing = CODETREE.Sharing = MAKE.Sharing = ADDRESS = DATATYPEREP.Sharing = TYPEIDCODE.Sharing = DEBUGGER.Sharing = LEX.Sharing = PRINTTABLE.Sharing = UNIVERSALTABLE ) : sig type gEnv val initGlobalEnv : {globalTable : gEnv, intIsArbitraryPrecision: bool } -> unit end = struct open STRUCTVALS; open TYPETREE open VALUEOPS; open CODETREE; open ADDRESS; open MAKE; open MISC; open EXPORTTREE open DATATYPEREP val intInfType = mkTypeConstruction ("int", intInfConstr, [], []) and realType = mkTypeConstruction ("real", realConstr, [], []) and charType = mkTypeConstruction ("char", charConstr, [], []) and wordType = mkTypeConstruction ("word", wordConstr, [], []) val declInBasis = [DeclaredAt inBasis] fun applyList _ [] = () | applyList f (h :: t) = (f h : unit; applyList f t); fun initGlobalEnv{globalTable : gEnv, intIsArbitraryPrecision: bool } = let val Env globalEnv = MAKE.gEnvAsEnv globalTable val enterGlobalValue = #enterVal globalEnv; val enterGlobalType = #enterType globalEnv; (* Some routines to help make the types. *) local (* careful - STRUCTVALS.intType differs from TYPETREE.intType *) open TYPETREE; in (* Make some type variables *) fun makeEqTV () = mkTypeVar (generalisable, true, false, false) fun makeTV () = mkTypeVar (generalisable, false, false, false) fun makePrintTV() = mkTypeVar (generalisable, false, false, true) fun makeTypeVariable() = makeTv {value=emptyType, level=generalisable, equality=false, nonunifiable=false, printable=false} (* Make some functions *) infixr 5 ->> fun a ->> b = mkFunctionType (a, b); infix 7 **; fun a ** b = mkProductType [a, b]; (* Type identifiers for the types of the declarations. *) val Int = if intIsArbitraryPrecision then intInfType else fixedIntType val String = stringType; val Bool = boolType; val Unit = unitType; val Char = charType; val Word = wordType; val Real = realType val Exn = exnType val mkTypeConstruction = mkTypeConstruction; val () = setPreferredInt(if intIsArbitraryPrecision then intInfConstr else fixedIntConstr) end; fun makePolymorphic(tvs, c) = let open TYPEIDCODE val tvs = List.filter(fn TypeVar tv => not justForEqualityTypes orelse tvEquality tv | _ => false) tvs in if null tvs then c else mkInlproc(c, List.length tvs, "", [], 0) end (* Function to make a type identifier with a pretty printer that just prints "?". None of the types are equality types so the equality function is empty. *) local fun monotypePrinter _ = PRETTY.PrettyString "?" in fun defaultEqAndPrintCode () = let open TypeValue val code = createTypeValue{ eqCode = CodeZero, printCode = mkConst (toMachineWord (ref monotypePrinter)), boxedCode = boxedNever, sizeCode = singleWord } in Global (genCode(code, [], 0) ()) end end fun makeTypeAbbreviation(name, fullName, typeVars, typeResult, locations) = makeTypeConstructor( name, typeVars, makeTypeFunction(basisDescription fullName, (typeVars, typeResult)), locations) (* Make an opaque type and add it to an environment. *) fun makeAndDeclareOpaqueType(typeName, fullName, env) = let val typeconstr = makeTypeConstructor(typeName, [], makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription fullName), declInBasis); in #enterType env (typeName, TypeConstrSet(typeconstr, [])); mkTypeConstruction (typeName, typeconstr, [], declInBasis) end; (* List of something *) fun List (base : types) : types = mkTypeConstruction ("list", tsConstr listConstr, [base], declInBasis); (* ref something *) fun Ref (base : types) : types = mkTypeConstruction ("ref", refConstr, [base], declInBasis); fun Option (base : types) : types = mkTypeConstruction ("option", tsConstr optionConstr, [base], declInBasis); (* Type-dependent functions. *) fun mkSpecialFun (name:string, typeof:types, opn: typeDependent) : values = makeOverloaded (name, typeof, opn); (* Overloaded functions. *) fun mkOverloaded (name:string) (typeof: types) : values = mkSpecialFun(name, typeof, TypeDep) (* Make a structure. Returns the table as an environment so that entries can be added to the structure. *) fun makeStructure(parentEnv, name) = let val str as Struct{signat=Signatures{tab, ...}, ...} = makeEmptyGlobal name val () = #enterStruct parentEnv (name, str) val Env env = makeEnv tab in env end val () = enterGlobalType ("unit", TypeConstrSet(unitConstr, [])); local val falseCons = mkGconstr ("false", Bool, createNullaryConstructor(EnumForm{tag=0w0, maxTag=0w1}, [], "false"), true, 2, declInBasis) val trueCons = mkGconstr ("true", Bool, createNullaryConstructor(EnumForm{tag=0w1, maxTag=0w1}, [], "true"), true, 2, declInBasis) val boolEnv = makeStructure(globalEnv, "Bool") (* Bool structure *) val notFn = mkGvar("not", Bool ->> Bool, mkUnaryFn BuiltIns.NotBoolean, declInBasis) in val () = #enterType boolEnv ("bool", TypeConstrSet(boolConstr, [trueCons, falseCons])) val () = #enterVal boolEnv ("true", trueCons) val () = #enterVal boolEnv ("false", falseCons) val () = #enterVal boolEnv ("not", notFn) end; val () = enterGlobalType ("int", TypeConstrSet(if intIsArbitraryPrecision then intInfConstr else fixedIntConstr, [])) val () = enterGlobalType ("char", TypeConstrSet(charConstr, [])) val () = enterGlobalType ("string", TypeConstrSet(stringConstr, [])) val () = enterGlobalType ("real", TypeConstrSet(realConstr, [])) val () = (* Enter :: and nil. *) List.app(fn(tv as Value{name, ...}) => enterGlobalValue(name, tv)) (tsConstructors listConstr) val () = enterGlobalType ("list", listConstr); val () = (* Enter NONE and SOME. *) List.app(fn(tv as Value{name, ...}) => enterGlobalValue(name, tv)) (tsConstructors optionConstr) val () = enterGlobalType ("option", optionConstr); local val refCons = let val a = mkTypeVar(generalisable, false, false, false) in mkGconstr ("ref", a ->> Ref a, createUnaryConstructor(RefForm, [a], "ref"), false, 1, declInBasis) end in val () = enterGlobalType ("ref", TypeConstrSet(refConstr, [refCons])); val () = enterGlobalValue ("ref", refCons); end local open BuiltIns fun monoTypePrinter _ = PRETTY.PrettyString "?" val idCode = let open TypeValue val equalLongWordFn = mkInlproc( mkBinary(LargeWordComparison TestEqual, mkLoadArgument 0, mkLoadArgument 1), 2, "EqualLargeWord()", [], 0) val code = createTypeValue{ eqCode=equalLongWordFn, printCode=mkConst (toMachineWord (ref monoTypePrinter)), boxedCode = boxedNever, sizeCode = singleWord } in Global (genCode(code, [], 0) ()) end in val largeWordType = makeTypeConstructor("word", [], makeFreeId(0, idCode, true, basisDescription "LargeWord.word"), declInBasis) val LargeWord = mkTypeConstruction ("LargeWord.word", largeWordType, [], declInBasis) end val () = enterGlobalType ("exn", TypeConstrSet(exnConstr, [])); val () = enterGlobalType ("word", TypeConstrSet(wordConstr, [])); val runCallEnv = makeStructure(globalEnv, "RunCall") fun enterRunCall (name : string, entry : codetree, typ : types) : unit = let val value = mkGvar (name, typ, entry, declInBasis); in #enterVal runCallEnv (name, value) end local (* unsafeCast. Can be used to convert any value to any type. *) val a = makeTV () val b = makeTV () val unsafeCastEntry = mkInlproc (mkLoadArgument 0 (* just the parameter *), 1, "unsafeCast(1)", [], 0) in val () = enterRunCall ("unsafeCast", makePolymorphic([a, b], unsafeCastEntry), a ->> b) end local val a = makeTV() and b = makeTV() open BuiltIns in (* isShort - test if a value is tagged rather than being an address. *) val () = enterRunCall ("isShort", makePolymorphic([a], mkUnaryFn IsTaggedValue), a ->> Bool) (* pointer equality *) val () = enterRunCall ("pointerEq", makePolymorphic([a], mkBinaryFn PointerEq), a ** a ->> Bool) (* load a word. The index is in words and is always zero or positive. *) val () = enterRunCall ("loadWord", makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLWord{isImmutable=false})), a ** Word ->> b) (* Load a word from an immutable. *) val () = enterRunCall ("loadWordFromImmutable", makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLWord{isImmutable=true})), a ** Word ->> b) (* load a byte. The index is in bytes and is always zero or positive. Probably the result should be a Word8.word value or a char. *) val () = enterRunCall ("loadByte", makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLByte{isImmutable=false})), a ** Word ->> b) (* Load a byte from an immutable. *) val () = enterRunCall ("loadByteFromImmutable", makePolymorphic([a, b], mkLoadOperationFn(LoadStoreMLByte{isImmutable=true})), a ** Word ->> b) (* Get the length of a heap cell. *) val () = enterRunCall ("memoryCellLength", makePolymorphic([a], mkUnaryFn MemoryCellLength), a ->> Word) (* Return the flags. Perhaps this could return a Word8.word value instead of a word. *) val () = enterRunCall ("memoryCellFlags", makePolymorphic([a], mkUnaryFn MemoryCellFlags), a ->> Word) (* Return the number of bytes per word. This is a constant since we have separate pre-built compilers for 32-bit and 64-bit. N.B. The byte order is not a constant since we only have a single pre-built compiler for little-endian and big-endian interpreted code. *) val () = enterRunCall ("bytesPerWord", mkConst(toMachineWord wordSize), Word) (* Store a word *) val () = enterRunCall ("storeWord", makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLWord{isImmutable=false})), mkProductType[a, Word, b] ->> Unit) (* Store a byte *) val () = enterRunCall ("storeByte", makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLByte{isImmutable=false})), mkProductType[a, Word, b] ->> Unit) (* Store a word without release semantics. *) val () = enterRunCall ("storeWordInitialise", makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLWord{isImmutable=true})), mkProductType[a, Word, b] ->> Unit) (* Store a byte without release semantics *) val () = enterRunCall ("storeByteInitialise", makePolymorphic([a, b], mkStoreOperationFn(LoadStoreMLByte{isImmutable=true})), mkProductType[a, Word, b] ->> Unit) (* Lock a mutable cell. *) val () = enterRunCall ("clearMutableBit", makePolymorphic([a], mkUnaryFn ClearMutableFlag), a ->> Unit) (* Allocate a byte cell. The second argument is the flags byte. It might be better if this were a Word8.word value. *) val () = enterRunCall ("allocateByteMemory", makePolymorphic([a], mkBinaryFn AllocateByteMemory), Word ** Word ->> a) (* Allocate a word cell. *) val () = enterRunCall ("allocateWordMemory", makePolymorphic([a, b], mkAllocateWordMemoryFn), mkProductType[Word, Word, a] ->> b) (* Byte vector operations. *) val () = enterRunCall ("byteVectorEqual", makePolymorphic([a], mkBlockOperationFn BlockOpEqualByte), mkProductType[a, a, Word, Word, Word] ->> Bool) val () = enterRunCall ("byteVectorCompare", makePolymorphic([a], mkBlockOperationFn BlockOpCompareByte), mkProductType[a, a, Word, Word, Word] ->> Int) (* Block moves. *) val () = enterRunCall ("moveBytes", makePolymorphic([a], mkBlockOperationFn (BlockOpMove{isByteMove=true})), mkProductType[a, a, Word, Word, Word] ->> Unit) val () = enterRunCall ("moveWords", makePolymorphic([a], mkBlockOperationFn (BlockOpMove{isByteMove=false})), mkProductType[a, a, Word, Word, Word] ->> Unit) (* Untagged loads and stores. *) val () = enterRunCall ("loadUntagged", mkLoadOperationFn LoadStoreUntaggedUnsigned, String ** Word ->> Word) val () = enterRunCall ("storeUntagged", mkStoreOperationFn LoadStoreUntaggedUnsigned, mkProductType[String, Word, Word] ->> Unit) val () = enterRunCall ("touch", makePolymorphic([a], mkUnaryFn TouchAddress), a ->> Unit) end local val debugOpts = [] (* Place to add debugging if necessary. *) (* [tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), tagInject assemblyCodeTag true] *) fun makeCall rtsCall n entryName = rtsCall (entryName, n, debugOpts) val makeFastCall = makeCall CODETREE.Foreign.rtsCallFast (* We need to wrap this so that the arguments are passed in registers. *) fun makeRunCallTupled (argTypes, resultType, callN) : codetree = let val width = List.length argTypes val name = "rtsCall" ^ Int.toString width; local val f = mkLoadClosure 0 (* first item from enclosing scope *) val tuple = mkLoadArgument 0 (* the inner parameter *) val args = case argTypes of [singleType] => [(tuple, singleType)] | argTypes => let val argVals = List.tabulate(width, fn n => mkInd (n, tuple)) in ListPair.zipEq(argVals, argTypes) end in val innerBody = mkCall (f, args, resultType) end local (* The closure contains the address of the RTS call. *) val f = mkEval(mkConst callN, [mkLoadArgument 0]) val innerLambda = mkInlproc (innerBody, 1, name ^ "(1)", [mkLoadLocal 0], 0) in val outerBody = mkEnv([mkDec (0, f)], innerLambda) end val outerLambda = mkInlproc (outerBody, 1, name, [], 1) in outerLambda end (* "Full" calls pass the thread Id as the first parameter. *) fun makeRunCallTupledFull (argTypes, resultType) = let val width = List.length argTypes val callN = toMachineWord(makeFastCall(width + 1)) val name = "rtsCall" ^ Int.toString width; local val f = mkLoadClosure 0 (* first item from enclosing scope *) val tuple = mkLoadArgument 0 (* the inner parameter *) val args = case argTypes of [singleType] => [(tuple, singleType)] | argTypes => let val argVals = List.tabulate(width, fn n => mkInd (n, tuple)) in ListPair.zipEq(argVals, argTypes) end in - val innerBody = - mkEnv( - [ - mkDec(0, mkCall (f, (getCurrentThreadId, GeneralType) :: args, resultType)), - mkNullDec checkRTSException - ], mkLoadLocal 0) + val innerBody = mkCall (f, (getCurrentThreadId, GeneralType) :: args, resultType) end local (* The closure contains the address of the RTS call. *) val f = mkEval(mkConst callN, [mkLoadArgument 0]) (* This creates the actual call. *) - val innerLambda = mkInlproc (innerBody, 1, name ^ "(1)", [mkLoadLocal 0], 1) + val innerLambda = mkInlproc (innerBody, 1, name ^ "(1)", [mkLoadLocal 0], 0) in val outerBody = mkEnv([mkDec (0, f)], innerLambda) end val outerLambda = mkInlproc (outerBody, 1, name, [], 1) in outerLambda end local val a = makeTV () and b = makeTV () fun makeInlCode(makeCall, name) = let val call1 = toMachineWord(makeCall 1) val body = mkEval(mkConst call1, [mkLoadArgument 0]) val proc = mkInlproc (body, 1, name, [], 0) in makePolymorphic([a, b], proc) end in val rtsCallFast1Entry = makeInlCode(makeFastCall, "rtsCallFast1") end local val a = makeTV () and b = makeTV () and c = makeTV () and d = makeTV () and e = makeTV () and f = makeTV () fun makeRtsCall(n, makeCall) = makeRunCallTupled(List.tabulate(n, fn _ => GeneralType), GeneralType, toMachineWord(makeCall n)) fun makeFullRtsCall n = makeRunCallTupledFull(List.tabulate(n, fn _ => GeneralType), GeneralType) in val rtsCallFull0Entry = makePolymorphic([a], makeFullRtsCall 0) and rtsCallFast0Entry = makePolymorphic([a], makeRtsCall(0, makeFastCall)) val rtsCall0Type = String ->> Unit ->> a val rtsCall1Type = String ->> a ->> b val rtsCallFull1Entry = makePolymorphic([a, b], makeFullRtsCall 1) val rtsCallFull2Entry = makePolymorphic([a, b, c], makeFullRtsCall 2) and rtsCallFast2Entry = makePolymorphic([a, b, c], makeRtsCall(2, makeFastCall)) val rtsCall2Type = String ->> TYPETREE.mkProductType [a,b] ->> c val rtsCallFull3Entry = makePolymorphic([a, b, c, d], makeFullRtsCall 3) val rtsCallFast3Entry = makePolymorphic([a, b, c, d], makeRtsCall(3, makeFastCall)) val rtsCall3Type = String ->> TYPETREE.mkProductType [a,b,c] ->> d val rtsCallFull4Entry = makePolymorphic([a, b, c, d, e], makeFullRtsCall 4) val rtsCallFast4Entry = makePolymorphic([a, b, c, d, e], makeRtsCall(4, makeFastCall)) val rtsCall4Type = String ->> TYPETREE.mkProductType [a,b,c,d] ->> e val rtsCallFull5Entry = makePolymorphic([a, b, c, d, e, f], makeFullRtsCall 5) val rtsCall5Type = String ->> TYPETREE.mkProductType [a,b,c,d,e] ->> f end in val () = enterRunCall ("rtsCallFull0", rtsCallFull0Entry, rtsCall0Type) val () = enterRunCall ("rtsCallFast0", rtsCallFast0Entry, rtsCall0Type) val () = enterRunCall ("rtsCallFull1", rtsCallFull1Entry, rtsCall1Type) val () = enterRunCall ("rtsCallFast1", rtsCallFast1Entry, rtsCall1Type) val () = enterRunCall ("rtsCallFull2", rtsCallFull2Entry, rtsCall2Type) val () = enterRunCall ("rtsCallFast2", rtsCallFast2Entry, rtsCall2Type) val () = enterRunCall ("rtsCallFull3", rtsCallFull3Entry, rtsCall3Type) val () = enterRunCall ("rtsCallFast3", rtsCallFast3Entry, rtsCall3Type) val () = enterRunCall ("rtsCallFast4", rtsCallFast4Entry, rtsCall4Type) val () = enterRunCall ("rtsCallFull4", rtsCallFull4Entry, rtsCall4Type) val () = enterRunCall ("rtsCallFull5", rtsCallFull5Entry, rtsCall5Type) val makeRunCallTupled = makeRunCallTupled (* Needed for reals. *) end local (* Create nullary exception. *) fun makeException0(name, id) = let val exc = Value{ name = name, typeOf = TYPETREE.exnType, access = Global(mkConst(toMachineWord id)), class = Exception, locations = declInBasis, references = NONE, instanceTypes=NONE } in #enterVal runCallEnv (name, exc) end (* Create exception with parameter. *) and makeException1(name, id, exType) = let val exc = Value{ name = name, typeOf = exType ->> TYPETREE.exnType, access = Global(mkConst(toMachineWord id)), class = Exception, locations = declInBasis, references = NONE, instanceTypes=NONE } in #enterVal runCallEnv (name, exc) end (* Exception numbers. Most of these are hard-coded in the RTS. *) val EXC_interrupt = 1 val EXC_syserr = 2 val EXC_size = 4 val EXC_overflow = 5 val EXC_divide = 7 val EXC_conversion = 8 val EXC_XWindows = 10 val EXC_subscript = 11 val EXC_thread = 12 val EXC_Bind = 100 (* In Match compiler. *) val EXC_Match = 101 val EXC_Fail = 103 in val () = List.app makeException0 [ ("Interrupt", EXC_interrupt), ("Size", EXC_size), ("Bind", EXC_Bind), ("Div", EXC_divide), ("Match", EXC_Match), ("Overflow", EXC_overflow), ("Subscript", EXC_subscript) ] val () = List.app makeException1 [ ("Fail", EXC_Fail, String), ("Conversion", EXC_conversion, String), ("XWindows", EXC_XWindows, String), ("Thread", EXC_thread, String), ("SysErr", EXC_syserr, String ** Option LargeWord) ] end (* Standard Basis structures for basic types. These contain the definitions of the basic types and operations on them. The structures are extended in the basis library and overloaded functions are extracted from them. *) local val largeIntEnv = makeStructure(globalEnv, "LargeInt") (* The comparison operations take two arbitrary precision ints and a general "compare" function that returns a fixed precision int. *) val compareType = mkProductType[intInfType, intInfType, intInfType ** intInfType ->> fixedIntType] ->> Bool val arithType = mkProductType[intInfType, intInfType, intInfType ** intInfType ->> intInfType] ->> intInfType fun enterArbitrary(name, oper, typ) = let val value = mkGvar (name, typ, mkArbitraryFn oper, declInBasis) in #enterVal largeIntEnv (name, value) end in val () = #enterType largeIntEnv ("int", TypeConstrSet(intInfConstr, [])) (* These functions are used internally. *) val () = enterArbitrary("less", ArbCompare BuiltIns.TestLess, compareType) val () = enterArbitrary("greater", ArbCompare BuiltIns.TestGreater, compareType) val () = enterArbitrary("lessEq", ArbCompare BuiltIns.TestLessEqual, compareType) val () = enterArbitrary("greaterEq", ArbCompare BuiltIns.TestGreaterEqual, compareType) val () = enterArbitrary("add", ArbArith BuiltIns.ArithAdd, arithType) val () = enterArbitrary("subtract", ArbArith BuiltIns.ArithSub, arithType) val () = enterArbitrary("multiply", ArbArith BuiltIns.ArithMult, arithType) end local val fixedIntEnv = makeStructure(globalEnv, "FixedInt") open BuiltIns fun enterBinary(name, oper, typ) = let val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis) in #enterVal fixedIntEnv (name, value) end val compareType = fixedIntType ** fixedIntType ->> Bool and binaryType = fixedIntType ** fixedIntType ->> fixedIntType fun enterComparison(name, test) = enterBinary(name, WordComparison{test=test, isSigned=true}, compareType) and enterBinaryOp(name, oper) = enterBinary(name, FixedPrecisionArith oper, binaryType) in val () = #enterType fixedIntEnv ("int", TypeConstrSet(fixedIntConstr, [])) val () = enterComparison("<", TestLess) val () = enterComparison("<=", TestLessEqual) val () = enterComparison(">", TestGreater) val () = enterComparison(">=", TestGreaterEqual) val () = enterBinaryOp("+", ArithAdd) val () = enterBinaryOp("-", ArithSub) val () = enterBinaryOp("*", ArithMult) val () = enterBinaryOp("quot", ArithQuot) val () = enterBinaryOp("rem", ArithRem) end local open BuiltIns val largeWordEnv = makeStructure(globalEnv, "LargeWord") fun enterBinary(name, oper, typ) = let val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis) in #enterVal largeWordEnv (name, value) end val compareType = LargeWord ** LargeWord ->> Bool and binaryType = LargeWord ** LargeWord ->> LargeWord and shiftType = LargeWord ** Word ->> LargeWord (* The shift amount is a Word. *) fun enterComparison(name, test) = enterBinary(name, LargeWordComparison test, compareType) and enterBinaryOp(name, oper) = enterBinary(name, LargeWordArith oper, binaryType) and enterBinaryLogical(name, oper) = enterBinary(name, LargeWordLogical oper, binaryType) and enterBinaryShift(name, oper) = enterBinary(name, LargeWordShift oper, shiftType) in val () = #enterType largeWordEnv ("word", TypeConstrSet(largeWordType, [])) val () = enterComparison("<", TestLess) val () = enterComparison("<=", TestLessEqual) val () = enterComparison(">", TestGreater) val () = enterComparison(">=", TestGreaterEqual) val () = enterBinaryOp("+", ArithAdd) val () = enterBinaryOp("-", ArithSub) val () = enterBinaryOp("*", ArithMult) val () = enterBinaryOp("div", ArithDiv) val () = enterBinaryOp("mod", ArithMod) val () = enterBinaryLogical("orb", LogicalOr) val () = enterBinaryLogical("andb", LogicalAnd) val () = enterBinaryLogical("xorb", LogicalXor) val () = enterBinaryShift("<<", ShiftLeft) val () = enterBinaryShift(">>", ShiftRightLogical) val () = enterBinaryShift("~>>", ShiftRightArithmetic) val LargeWord = LargeWord end local val wordStructEnv = makeStructure(globalEnv, "Word") open BuiltIns fun enterBinary(name, oper, typ) = let val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis) in #enterVal wordStructEnv (name, value) end val compareType = Word ** Word ->> Bool and binaryType = Word ** Word ->> Word fun enterComparison(name, test) = enterBinary(name, WordComparison{test=test, isSigned=false}, compareType) and enterBinaryOp(name, oper) = enterBinary(name, WordArith oper, binaryType) and enterBinaryLogical(name, oper) = enterBinary(name, WordLogical oper, binaryType) and enterBinaryShift(name, oper) = enterBinary(name, WordShift oper, binaryType) val toLargeWordFn = mkGvar ("toLargeWord", Word ->> LargeWord, mkUnaryFn UnsignedToLongWord, declInBasis) and toLargeWordXFn = mkGvar ("toLargeWordX", Word ->> LargeWord, mkUnaryFn SignedToLongWord, declInBasis) and fromLargeWordFn = mkGvar ("fromLargeWord", LargeWord ->> Word, mkUnaryFn LongWordToTagged, declInBasis) in val () = #enterType wordStructEnv ("word", TypeConstrSet(wordConstr, [])) val () = enterComparison("<", TestLess) val () = enterComparison("<=", TestLessEqual) val () = enterComparison(">", TestGreater) val () = enterComparison(">=", TestGreaterEqual) val () = enterBinaryOp("+", ArithAdd) val () = enterBinaryOp("-", ArithSub) val () = enterBinaryOp("*", ArithMult) val () = enterBinaryOp("div", ArithDiv) val () = enterBinaryOp("mod", ArithMod) val () = enterBinaryLogical("orb", LogicalOr) val () = enterBinaryLogical("andb", LogicalAnd) val () = enterBinaryLogical("xorb", LogicalXor) val () = enterBinaryShift("<<", ShiftLeft) val () = enterBinaryShift(">>", ShiftRightLogical) val () = enterBinaryShift("~>>", ShiftRightArithmetic) val () = #enterVal wordStructEnv ("toLargeWord", toLargeWordFn) val () = #enterVal wordStructEnv ("toLargeWordX", toLargeWordXFn) val () = #enterVal wordStructEnv ("fromLargeWord", fromLargeWordFn) end local val charEnv = makeStructure(globalEnv, "Char") open BuiltIns (* Comparison functions are the same as Word. *) fun enterComparison(name, test) = let val typ = Char ** Char ->> Bool val entry = mkBinaryFn(WordComparison{test=test, isSigned=false}) val value = mkGvar (name, typ, entry, declInBasis) in #enterVal charEnv (name, value) end in val () = #enterType charEnv ("char", TypeConstrSet(charConstr, [])) val () = enterComparison("<", TestLess) val () = enterComparison("<=", TestLessEqual) val () = enterComparison(">", TestGreater) val () = enterComparison(">=", TestGreaterEqual) end local val stringEnv = makeStructure(globalEnv, "String") in val () = #enterType stringEnv ("string", TypeConstrSet(stringConstr, [])) end local val realEnv = makeStructure(globalEnv, "Real") (* These are only used in Real so are included here rather than in RunCall. rtsCallFastRealtoReal is used for functions such as sqrt. rtsCallFastGeneraltoReal is used for Real.fromLargeInt. *) val debugOpts = [] (* Place to add debugging if necessary. *) fun makeFastRealRealCall entryName = CODETREE.Foreign.rtsCallFastRealtoReal (entryName, debugOpts) and makeFastRealRealRealCall entryName = CODETREE.Foreign.rtsCallFastRealRealtoReal (entryName, debugOpts) and makeFastIntInfRealCall entryName = CODETREE.Foreign.rtsCallFastGeneraltoReal (entryName, debugOpts) and makeFastRealGeneralRealCall entryName = CODETREE.Foreign.rtsCallFastRealGeneraltoReal (entryName, debugOpts) val rtsCallFastR_REntry = makeRunCallTupled([DoubleFloatType], DoubleFloatType, toMachineWord makeFastRealRealCall) (* This needs to be tupled. *) val rtsCallFastRR_REntry = makeRunCallTupled([DoubleFloatType, DoubleFloatType], DoubleFloatType, toMachineWord makeFastRealRealRealCall) and rtsCallFastRI_REntry = makeRunCallTupled([DoubleFloatType, GeneralType], DoubleFloatType, toMachineWord makeFastRealGeneralRealCall) val rtsCallFastI_REntry = makeRunCallTupled([GeneralType], DoubleFloatType, toMachineWord makeFastIntInfRealCall) val rtsCallFastF_F = mkGvar ("rtsCallFastR_R", String ->> Real ->> Real, rtsCallFastR_REntry, declInBasis) val rtsCallFastFF_F = mkGvar ("rtsCallFastRR_R", String ->> Real ** Real ->> Real, rtsCallFastRR_REntry, declInBasis) val rtsCallFastFG_F = mkGvar ("rtsCallFastRI_R", String ->> Real ** Int ->> Real, rtsCallFastRI_REntry, declInBasis) val rtsCallFastG_F = mkGvar ("rtsCallFastI_R", String ->> intInfType ->> Real, rtsCallFastI_REntry, declInBasis) fun enterUnary(name, oper, typ) = let val value = mkGvar (name, typ, mkUnaryFn oper, declInBasis) in #enterVal realEnv (name, value) end fun enterBinary(name, oper, typ) = let val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis) in #enterVal realEnv (name, value) end val compareType = Real ** Real ->> Bool and binaryType = Real ** Real ->> Real and unaryType = Real ->> Real and realToFixType = Real ->> fixedIntType open BuiltIns IEEEReal fun enterComparison(name, test) = enterBinary(name, RealComparison(test, PrecDouble), compareType) and enterBinaryOp(name, oper) = enterBinary(name, RealArith(oper, PrecDouble), binaryType) in val () = #enterType realEnv ("real", TypeConstrSet(realConstr, [])) val () = #enterVal realEnv ("rtsCallFastR_R", rtsCallFastF_F) val () = #enterVal realEnv ("rtsCallFastRR_R", rtsCallFastFF_F) val () = #enterVal realEnv ("rtsCallFastRI_R", rtsCallFastFG_F) val () = #enterVal realEnv ("rtsCallFastI_R", rtsCallFastG_F) val () = enterComparison("<", TestLess) val () = enterComparison("<=", TestLessEqual) val () = enterComparison(">", TestGreater) val () = enterComparison(">=", TestGreaterEqual) val () = enterComparison("==", TestEqual) (* real is not an eqtype. *) (* Included unordered mainly because it's easy to implement isNan. *) val () = enterComparison("unordered", TestUnordered) val () = enterBinaryOp("+", ArithAdd) val () = enterBinaryOp("-", ArithSub) val () = enterBinaryOp("*", ArithMult) val () = enterBinaryOp("/", ArithDiv) val () = enterUnary("~", RealNeg PrecDouble, unaryType) val () = enterUnary("abs", RealAbs PrecDouble, unaryType) val () = enterUnary("fromFixedInt", RealFixedInt PrecDouble, fixedIntType ->> Real) val () = enterUnary("truncFix", RealToInt(PrecDouble, TO_ZERO), realToFixType) val () = enterUnary("roundFix", RealToInt(PrecDouble, TO_NEAREST), realToFixType) val () = enterUnary("ceilFix", RealToInt(PrecDouble, TO_POSINF), realToFixType) val () = enterUnary("floorFix", RealToInt(PrecDouble, TO_NEGINF), realToFixType) end local val real32Env = makeStructure(globalEnv, "Real32") val floatType = mkTypeConstruction ("real", floatConstr, [], []) val Float = floatType val debugOpts = [] (* Place to add debugging if necessary. *) fun makeFastFloatFloatCall entryName = CODETREE.Foreign.rtsCallFastFloattoFloat (entryName, debugOpts) and makeFastFloatFloatFloatCall entryName = CODETREE.Foreign.rtsCallFastFloatFloattoFloat (entryName, debugOpts) and makeFastIntInfFloatCall entryName = CODETREE.Foreign.rtsCallFastGeneraltoFloat (entryName, debugOpts) and makeFastFloatGeneralFloatCall entryName = CODETREE.Foreign.rtsCallFastFloatGeneraltoFloat (entryName, debugOpts) val rtsCallFastR_REntry = makeRunCallTupled([SingleFloatType], SingleFloatType, toMachineWord makeFastFloatFloatCall) (* This needs to be tupled. *) val rtsCallFastRR_REntry = makeRunCallTupled([SingleFloatType, SingleFloatType], SingleFloatType, toMachineWord makeFastFloatFloatFloatCall) and rtsCallFastRI_REntry = makeRunCallTupled([SingleFloatType, GeneralType], SingleFloatType, toMachineWord makeFastFloatGeneralFloatCall) val rtsCallFastI_REntry = makeRunCallTupled([GeneralType], SingleFloatType, toMachineWord makeFastIntInfFloatCall) val rtsCallFastF_F = mkGvar ("rtsCallFastF_F", String ->> Float ->> Float, rtsCallFastR_REntry, declInBasis) val rtsCallFastFF_F = mkGvar ("rtsCallFastFF_F", String ->> Float ** Float ->> Float, rtsCallFastRR_REntry, declInBasis) val rtsCallFastFG_F = mkGvar ("rtsCallFastFI_F", String ->> Float ** Int ->> Float, rtsCallFastRI_REntry, declInBasis) val rtsCallFastG_F = mkGvar ("rtsCallFastI_F", String ->> intInfType ->> Float, rtsCallFastI_REntry, declInBasis) fun enterUnary(name, oper, typ) = let val value = mkGvar (name, typ, mkUnaryFn oper, declInBasis) in #enterVal real32Env (name, value) end fun enterBinary(name, oper, typ) = let val value = mkGvar (name, typ, mkBinaryFn oper, declInBasis) in #enterVal real32Env (name, value) end val compareType = Float ** Float ->> Bool and binaryType = Float ** Float ->> Float and unaryType = Float ->> Float and floatToFixType = Float ->> fixedIntType open BuiltIns IEEEReal fun enterComparison(name, test) = enterBinary(name, RealComparison(test, PrecSingle), compareType) and enterBinaryOp(name, oper) = enterBinary(name, RealArith(oper, PrecSingle), binaryType) in val () = #enterType real32Env ("real", TypeConstrSet(floatConstr, [])) val () = enterUnary("toLarge", BuiltIns.FloatToDouble, floatType ->> Real) (* Conversion with the current rounding mode. *) and () = enterUnary("fromReal", BuiltIns.DoubleToFloat, Real ->> floatType) val () = #enterVal real32Env ("rtsCallFastR_R", rtsCallFastF_F) val () = #enterVal real32Env ("rtsCallFastRR_R", rtsCallFastFF_F) val () = #enterVal real32Env ("rtsCallFastRI_R", rtsCallFastFG_F) val () = #enterVal real32Env ("rtsCallFastI_R", rtsCallFastG_F) val () = enterComparison("<", TestLess) val () = enterComparison("<=", TestLessEqual) val () = enterComparison(">", TestGreater) val () = enterComparison(">=", TestGreaterEqual) val () = enterComparison("==", TestEqual) (* Real32.real is not an eqtype. *) val () = enterComparison("unordered", TestUnordered) val () = enterBinaryOp("+", ArithAdd) val () = enterBinaryOp("-", ArithSub) val () = enterBinaryOp("*", ArithMult) val () = enterBinaryOp("/", ArithDiv) val () = enterUnary("~", RealNeg PrecSingle, unaryType) val () = enterUnary("abs", RealAbs PrecSingle, unaryType) val () = enterUnary("fromFixedInt", RealFixedInt PrecSingle, fixedIntType ->> floatType) val () = enterUnary("truncFix", RealToInt(PrecSingle, TO_ZERO), floatToFixType) val () = enterUnary("roundFix", RealToInt(PrecSingle, TO_NEAREST), floatToFixType) val () = enterUnary("ceilFix", RealToInt(PrecSingle, TO_POSINF), floatToFixType) val () = enterUnary("floorFix", RealToInt(PrecSingle, TO_NEGINF), floatToFixType) end val bootstrapEnv = makeStructure(globalEnv, "Bootstrap") fun enterBootstrap (name : string, entry : codetree, typ : types) : unit = let val value = mkGvar (name, typ, entry, declInBasis) in #enterVal bootstrapEnv (name, value) end local val threadEnv = makeStructure(globalEnv, "Thread") open TypeValue fun monoTypePrinter _ = PRETTY.PrettyString "?" val code = createTypeValue{ eqCode=equalPointerOrWordFn, printCode=mkConst (toMachineWord (ref monoTypePrinter)), boxedCode=boxedAlways, sizeCode=singleWord } (* Thread.thread type. This is an equality type with pointer equality. *) val threadConstr= makeTypeConstructor ( "thread", [], makeFreeId(0, Global (genCode(code, [], 0) ()), true, basisDescription "thread"), [DeclaredAt inBasis]) val threadType = mkTypeConstruction ("thread", threadConstr, [], []); val selfFunction = mkGvar ("self", Unit ->> threadType, getCurrentThreadIdFn, declInBasis) val createMutexFunction = mkGvar("createMutex", Unit ->> Ref Word, createMutexFn, declInBasis) and lockMutexFunction = mkGvar("lockMutex", Ref Word ->> Bool, mkUnaryFn BuiltIns.LockMutex, declInBasis) and tryLockMutexFunction = mkGvar("tryLockMutex", Ref Word ->> Bool, mkUnaryFn BuiltIns.TryLockMutex, declInBasis) and unlockMutexFunction = mkGvar("unlockMutex", Ref Word ->> Bool, mkUnaryFn BuiltIns.UnlockMutex, declInBasis) in val () = #enterType threadEnv ("thread", TypeConstrSet(threadConstr, [])) val () = #enterVal threadEnv ("self", selfFunction) val () = #enterVal threadEnv ("createMutex", createMutexFunction) val () = #enterVal threadEnv ("lockMutex", lockMutexFunction) val () = #enterVal threadEnv ("tryLockMutex", tryLockMutexFunction) val () = #enterVal threadEnv ("unlockMutex", unlockMutexFunction) end local val fmemEnv = makeStructure(globalEnv, "ForeignMemory") val a = makeTV() (* We don't have Word8.word or Word32.word at this point so the easiest way to deal with this is to make them polymorphic. *) val get8Function = mkGvar("get8", LargeWord ** Word ->> a, makePolymorphic([a], mkLoadOperationFn LoadStoreC8), declInBasis) val get16Function = mkGvar("get16", LargeWord ** Word ->> Word, mkLoadOperationFn LoadStoreC16, declInBasis) val get32Function = mkGvar("get32", LargeWord ** Word ->> a, makePolymorphic([a], mkLoadOperationFn LoadStoreC32), declInBasis) val get64Function = mkGvar("get64", LargeWord ** Word ->> LargeWord, mkLoadOperationFn LoadStoreC64, declInBasis) val getFloatFunction = mkGvar("getFloat", LargeWord ** Word ->> Real, mkLoadOperationFn LoadStoreCFloat, declInBasis) val getDoubleFunction = mkGvar("getDouble", LargeWord ** Word ->> Real, mkLoadOperationFn LoadStoreCDouble, declInBasis) val set8Function = mkGvar("set8", mkProductType[LargeWord, Word, a] ->> Unit, makePolymorphic([a], mkStoreOperationFn LoadStoreC8), declInBasis) val set16Function = mkGvar("set16", mkProductType[LargeWord, Word, Word] ->> Unit, mkStoreOperationFn LoadStoreC16, declInBasis) val set32Function = mkGvar("set32", mkProductType[LargeWord, Word, a] ->> Unit, makePolymorphic([a], mkStoreOperationFn LoadStoreC32), declInBasis) val set64Function = mkGvar("set64", mkProductType[LargeWord, Word, LargeWord] ->> Unit, mkStoreOperationFn LoadStoreC64, declInBasis) val setFloatFunction = mkGvar("setFloat", mkProductType[LargeWord, Word, Real] ->> Unit, mkStoreOperationFn LoadStoreCFloat, declInBasis) val setDoubleFunction = mkGvar("setDouble", mkProductType[LargeWord, Word, Real] ->> Unit, mkStoreOperationFn LoadStoreCDouble, declInBasis) val allocCStackFn = mkGvar("allocCStack", Word ->> LargeWord, mkUnaryFn BuiltIns.AllocCStack, declInBasis) val freeCStackFn = mkGvar("freeCStack", LargeWord ** Word ->> Unit, mkBinaryFn BuiltIns.FreeCStack, declInBasis) in val () = #enterVal fmemEnv ("get8", get8Function) val () = #enterVal fmemEnv ("get16", get16Function) val () = #enterVal fmemEnv ("get32", get32Function) val () = #enterVal fmemEnv ("get64", get64Function) val () = #enterVal fmemEnv ("getFloat", getFloatFunction) val () = #enterVal fmemEnv ("getDouble", getDoubleFunction) val () = #enterVal fmemEnv ("set8", set8Function) val () = #enterVal fmemEnv ("set16", set16Function) val () = #enterVal fmemEnv ("set32", set32Function) val () = #enterVal fmemEnv ("set64", set64Function) val () = #enterVal fmemEnv ("setFloat", setFloatFunction) val () = #enterVal fmemEnv ("setDouble", setDoubleFunction) val () = #enterVal fmemEnv ("allocCStack", allocCStackFn) (* Free is a binary operation that takes both the allocated address and the size. The size is used by the compiled code where this is implemented using the C-stack. The allocated address is intended for possible use by the interpreter where so that it can be implemented as malloc/free. *) val () = #enterVal fmemEnv ("freeCStack", freeCStackFn) end local val foreignEnv = makeStructure(globalEnv, "Foreign") local val EXC_foreign = 23 val foreignException = Value{ name = "Foreign", typeOf = String ->> TYPETREE.exnType, access = Global(mkConst(toMachineWord EXC_foreign)), class = Exception, locations = declInBasis, references = NONE, instanceTypes=NONE } in val () = #enterVal foreignEnv ("Foreign", foreignException) end val arg0 = mkLoadArgument 0 val arg1 = mkLoadArgument 1 local val callForeignCall = mkEval(mkConst (toMachineWord CODETREE.Foreign.foreignCall), [arg0]) val innerBody = mkEval(mkLoadClosure 0, [mkInd(0, arg0), mkInd(1, arg0), mkInd(2, arg0)]) val outerBody = mkEnv([mkDec(0, callForeignCall)], mkInlproc(innerBody, 1, "foreignCall(1)(1)", [mkLoadLocal 0], 0)) in val foreignCallEntry = mkInlproc(outerBody, 1, "foreignCall(1)", [], 1) end local (* Build a callback. First apply the compiler to the abi/argtype/restype values. Then apply the result to a function to generate the final C callback code. The C callback code calls the function with two arguments. Here we have to pass it a function that expects a tuple and unwrap it. *) val innerMost = mkInlproc(mkEval(mkLoadClosure 0, [mkTuple[arg0, arg1]]), 2, "buildCallBack(1)(1)2", [mkLoadArgument 0], 0) val resultFn = mkInlproc(mkEval(mkLoadClosure 0, [innerMost]), 1, "buildCallBack(1)(1)", [mkLoadLocal 0], 0) val firstBuild = mkEval(mkConst (toMachineWord CODETREE.Foreign.buildCallBack), [arg0]) val outerBody = mkEnv([mkDec(0, firstBuild)], resultFn) in val buildCallBackEntry = mkInlproc(outerBody, 1, "buildCallBack(1)", [], 1) end (* Abi - an eqtype. An enumerated type or short int. *) local open TypeValue fun monotypePrinter _ = PRETTY.PrettyString "?" val code = createTypeValue{ eqCode = equalTaggedWordFn, printCode = mkConst (toMachineWord (ref monotypePrinter)), boxedCode = boxedNever, sizeCode = singleWord } val abiEqAndPrint = Global (genCode(code, [], 0) ()) in val abiConstr = makeTypeConstructor("abi", [], makeFreeId(0, abiEqAndPrint, true, basisDescription "Foreign.LowLevel.abi"), declInBasis) end val () = #enterType foreignEnv ("abi", TypeConstrSet(abiConstr, [])) val abiType = mkTypeConstruction ("abi", abiConstr, [], declInBasis) (* It would be possible to put the definition of cType in here but it's complicated. It's easier to use an opaque type and put in a cast later. *) val ctypeConstr = makeTypeConstructor("ctype", [], makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription "Foreign.LowLevel.ctype"), declInBasis) val () = #enterType foreignEnv ("ctype", TypeConstrSet(ctypeConstr, [])) val ffiType = mkTypeConstruction ("ctype", ctypeConstr, [], declInBasis) val foreignCallType = mkProductType[abiType, List ffiType, ffiType] ->> mkProductType[LargeWord, LargeWord, LargeWord] ->> Unit val buildCallBackType = mkProductType[abiType, List ffiType, ffiType] ->> (mkProductType[LargeWord, LargeWord] ->> Unit) ->> LargeWord fun enterForeign (name, entry, typ) = #enterVal foreignEnv (name, mkGvar (name, typ, entry, declInBasis)) in val () = enterForeign("foreignCall", foreignCallEntry, foreignCallType) val () = enterForeign("buildCallBack", buildCallBackEntry, buildCallBackType) (* Apply the abiList function here. The ABIs depend on the platform in the interpreted version. *) val () = enterForeign("abiList", mkConst(toMachineWord(CODETREE.Foreign.abiList())), List (String ** abiType)) end local fun addVal (name : string, value : 'a, typ : types) : unit = enterBootstrap (name, mkConst (toMachineWord value), typ) (* These are only used during the bootstrap phase. Replacements are installed once the appropriate modules of the basis library are compiled. *) fun intOfString s = let val radix = if String.size s >= 3 andalso String.substring(s, 0, 2) = "0x" orelse String.size s >= 4 andalso String.substring(s, 0, 3) = "~0x" then StringCvt.HEX else StringCvt.DEC in case StringCvt.scanString (Int.scan radix) s of NONE => raise Conversion "Invalid integer constant" | SOME res => res end fun wordOfString s = let val radix = if String.size s > 2 andalso String.sub(s, 2) = #"x" then StringCvt.HEX else StringCvt.DEC in case StringCvt.scanString (Word.scan radix) s of NONE => raise Conversion "Invalid word constant" | SOME res => res end fun unescapeChar (s: string) : char = let fun rdr i = if i = size s then NONE else SOME(String.sub(s, i), i+1) in case Char.scan rdr 0 of NONE => (* Bad conversion *) raise Conversion "Invalid string constant" | SOME(res, _) => res end open PRINTTABLE val convstringCode = genCode(mkConst(toMachineWord unescapeString), [], 0) () val convintCode = genCode(mkConst(toMachineWord intOfString), [], 0) () val convwordCode = genCode(mkConst(toMachineWord wordOfString), [], 0) () val convcharCode = genCode(mkConst(toMachineWord unescapeChar), [], 0) () in (* We need this for compatibility with the 5.8.2 bootstrap. *) val () = addVal ("convString", unescapeString: string -> string, String ->> String) (* Flag to indicate which version of Int to compile *) val () = addVal ("intIsArbitraryPrecision", intIsArbitraryPrecision, Bool) (* Install the overloads now. *) val () = addOverload("convString", stringConstr, convstringCode) val () = addOverload("convInt", fixedIntConstr, convintCode) val () = addOverload("convInt", intInfConstr, convintCode) val () = addOverload("convWord", wordConstr, convwordCode) val () = addOverload("convChar", charConstr, convcharCode) end (* The only reason we have vector here is to get equality right. We need vector to be an equality type and to have a specific equality function. *) local fun polyTypePrinter _ _ = PRETTY.PrettyString "?" (* The equality function takes the base equality type as an argument. The inner function takes two arguments which are the two vectors to compare, checks the lengths and if they're equal applies the base equality to each field. *) val eqCode = mkInlproc( mkProc( mkEnv([ (* Length of the items. *) mkDec(0, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 0)), mkDec(1, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 1)), mkMutualDecs[(2, (* Loop function. *) mkProc( mkIf( (* Finished? *) mkEqualTaggedWord(mkLoadClosure 0, mkLoadArgument 0), CodeTrue, (* Yes, all equal. *) mkIf( mkEval( TypeValue.extractEquality(mkLoadClosure 2), (* Base equality fn *) [ mkLoadOperation(LoadStoreMLWord{isImmutable=true}, mkLoadClosure 3, mkLoadArgument 0), mkLoadOperation(LoadStoreMLWord{isImmutable=true}, mkLoadClosure 4, mkLoadArgument 0) ]), mkEval(mkLoadClosure 1, (* Recursive call with index+1. *) [ mkBinary(BuiltIns.WordArith BuiltIns.ArithAdd, mkLoadArgument 0, mkConst(toMachineWord 1)) ]), CodeFalse (* Not equal elements - result false *) ) ), 1, "vector-loop", [mkLoadLocal 0 (* Length *), mkLoadLocal 2 (* Loop function *), mkLoadClosure 0 (* Base equality function *), mkLoadArgument 0 (* Vector 0 *), mkLoadArgument 1 (* Vector 1 *)], 0))] ], mkIf( (* Test the lengths. *) mkEqualTaggedWord(mkLoadLocal 0, mkLoadLocal 1), (* Equal - test the contents. *) mkEval(mkLoadLocal 2, [CodeZero]), CodeFalse (* Not same length- result false *) ) ), 2, "vector-eq", [mkLoadArgument 0], 3), 1, "vector-eq()", [], 0) val idCode = (* Polytype *) let open TypeValue val code = createTypeValue{ eqCode=eqCode, printCode=mkConst (toMachineWord (ref polyTypePrinter)), boxedCode=mkInlproc(boxedAlways, 1, "boxed-vector", [], 0), sizeCode=mkInlproc(singleWord, 1, "size-vector", [], 0)} in Global (genCode(code, [], 0) ()) end in val vectorType = makeTypeConstructor("vector", [makeTypeVariable()], makeFreeId(1, idCode, true, basisDescription "vector"), declInBasis) val () = enterGlobalType ("vector", TypeConstrSet(vectorType, [])) end (* We also need a type with byte-wise equality. *) local fun monoTypePrinter _ = PRETTY.PrettyString "?" (* This is a monotype equality function that takes two byte vectors and compares them byte-by-byte for equality. Because they are vectors of bytes it's unsafe to load the whole words which could look like addresses if the bottom bit happens to be zero. *) val eqCode = mkProc( mkEnv([ (* Length of the items. *) mkDec(0, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 0)), mkDec(1, mkUnary(BuiltIns.MemoryCellLength, mkLoadArgument 1)) ], mkIf( (* Test the lengths. *) mkEqualTaggedWord(mkLoadLocal 0, mkLoadLocal 1), (* Equal - test the contents. *) mkEnv([ (* ByteVecEqual takes a byte length so we have to multiply by the number of bytes per word. *) mkDec(2, mkBinary(BuiltIns.WordArith BuiltIns.ArithMult, mkConst(toMachineWord RunCall.bytesPerWord), mkLoadLocal 0)) ], mkBlockOperation{kind=BlockOpEqualByte, leftBase=mkLoadArgument 0, rightBase=mkLoadArgument 1, leftIndex=CodeZero, rightIndex=CodeZero, length=mkLoadLocal 2}), CodeFalse (* Not same length- result false *) ) ), 2, "byteVector-eq", [], 3) val idCode = (* Polytype *) let open TypeValue val code = createTypeValue{ eqCode=eqCode, printCode=mkConst (toMachineWord (ref monoTypePrinter)), boxedCode=boxedAlways, sizeCode=singleWord} in Global (genCode(code, [], 0) ()) end in val byteVectorType = makeTypeConstructor("byteVector", [], makeFreeId(0, idCode, true, basisDescription "byteVector"), declInBasis) val () = #enterType bootstrapEnv ("byteVector", TypeConstrSet(byteVectorType, [])) end (* We also need array and Array2.array to be passed through here so that they have the special property of being eqtypes even if their argument is not. "array" is defined to be in the global environment. *) val () = enterGlobalType ("array", TypeConstrSet(arrayConstr, [])) val () = #enterType bootstrapEnv ("array", TypeConstrSet(array2Constr, [])) val () = #enterType bootstrapEnv ("byteArray", TypeConstrSet(byteArrayConstr, [])) (* "=', '<>', PolyML.print etc are type-specific function which appear to be polymorphic. The compiler recognises these and treats them specially. For (in)equality that means generating type-specific versions of the equality operations; for print etc that means printing in a type-specific way. They can become true polymorphic functions and lose their type-specificity. For (in)equality that means defaulting to structure equality which is normal and expected behaviour. For print etc that means losing the ability to print and just printing "?" so it's important to avoid that happening. "open" treats type-specific functions specially and retains the type-specificity. That's important to allow the prelude code to expand the PolyML structure. *) local val eqType = let val a = makeEqTV () in a ** a ->> Bool end val eqVal = mkSpecialFun("=", eqType, Equal) in val () = enterGlobalValue ("=", eqVal) end local val neqType = let val a = makeEqTV () in a ** a ->> Bool end val neqVal = mkSpecialFun("<>", neqType, NotEqual) in val () = enterGlobalValue ("<>", neqVal) end val polyMLEnv = makeStructure(globalEnv, "PolyML") val enterPolyMLVal = #enterVal polyMLEnv local (* This version of the environment must match that used in the NameSpace structure. *) open TYPETREE (* Create a new structure for them. *) val nameSpaceEnv = makeStructure(polyMLEnv, "NameSpace") (* Substructures. *) val valuesEnv = makeStructure(nameSpaceEnv, "Values") and typesEnv = makeStructure(nameSpaceEnv, "TypeConstrs") and fixesEnv = makeStructure(nameSpaceEnv, "Infixes") and structsEnv = makeStructure(nameSpaceEnv, "Structures") and sigsEnv = makeStructure(nameSpaceEnv, "Signatures") and functsEnv = makeStructure(nameSpaceEnv, "Functors") (* Types for the basic values. These are opaque. *) val valueVal = makeAndDeclareOpaqueType("value", "PolyML.NameSpace.Values.value", valuesEnv) (* Representation of the type of a value. *) val Types = makeAndDeclareOpaqueType("typeExpression", "PolyML.NameSpace.Values.typeExpression", valuesEnv) val typeVal = makeAndDeclareOpaqueType("typeConstr", "PolyML.NameSpace.TypeConstrs.typeConstr", typesEnv) val fixityVal = makeAndDeclareOpaqueType("fixity", "PolyML.NameSpace.Infixes.fixity", fixesEnv) val signatureVal = makeAndDeclareOpaqueType("signatureVal", "PolyML.NameSpace.Signatures.signatureVal", sigsEnv) val structureVal = makeAndDeclareOpaqueType("structureVal", "PolyML.NameSpace.Structures.structureVal", structsEnv) val functorVal = makeAndDeclareOpaqueType("functorVal", "PolyML.NameSpace.Functors.functorVal", functsEnv) (* nameSpace type. Labelled record. *) fun createFields(name, vType): { name: string, typeof: types} list = let val enterFun = String ** vType ->> Unit val lookupFun = String ->> Option vType val allFun = Unit ->> List (String ** vType) in [mkLabelEntry("enter" ^ name, enterFun), mkLabelEntry("lookup" ^ name, lookupFun), mkLabelEntry("all" ^ name, allFun)] end (* We have to use the same names as we use in the env type because we're passing "env" values through the bootstrap. *) val valTypes = [("Val", valueVal), ("Type", typeVal), ("Fix", fixityVal), ("Struct", structureVal), ("Sig", signatureVal), ("Funct", functorVal)] val fields = List.foldl (fn (p,l) => createFields p @ l) [] valTypes val recordType = makeTypeAbbreviation("nameSpace", "PolyML.NameSpace.nameSpace", [], mkLabelled(sortLabels fields, true), declInBasis); val () = #enterType nameSpaceEnv ("nameSpace", TypeConstrSet(recordType, [])); (* The result type of the compiler includes valueVal etc. *) val resultFields = List.map TYPETREE.mkLabelEntry [("values", List(String ** valueVal)), ("fixes", List(String ** fixityVal)), ("types", List(String ** typeVal)), ("structures", List(String ** structureVal)), ("signatures", List(String ** signatureVal)), ("functors", List(String ** functorVal))] in val nameSpaceType = mkTypeConstruction ("nameSpace", recordType, [], declInBasis) val execResult = mkLabelled(sortLabels resultFields, true) type execResult = { fixes: (string * fixStatus) list, values: (string * values) list, structures: (string * structVals) list, signatures: (string * signatures) list, functors: (string * functors) list, types: (string * typeConstrSet) list } val valueVal = valueVal val typeVal = typeVal val fixityVal = fixityVal val signatureVal = signatureVal val structureVal = structureVal val functorVal = functorVal val Types = Types val valuesEnv = valuesEnv and typesEnv = typesEnv and fixesEnv = fixesEnv and structsEnv = structsEnv and sigsEnv = sigsEnv and functsEnv = functsEnv end local val typeconstr = locationConstr val () = #enterType polyMLEnv ("location", typeconstr); in val Location = mkTypeConstruction ("location", tsConstr typeconstr, [], declInBasis) end (* Interface to the debugger. *) local open TYPETREE val debuggerEnv = makeStructure(polyMLEnv, "DebuggerInterface") (* Make these opaque at this level. *) val locationPropList = makeAndDeclareOpaqueType("locationPropList", "PolyML.DebuggerInterface.locationPropList", debuggerEnv) val typeId = makeAndDeclareOpaqueType("typeId", "PolyML.DebuggerInterface.typeId", debuggerEnv) val machineWordType = makeAndDeclareOpaqueType("machineWord", "PolyML.DebuggerInterface.machineWord", debuggerEnv) (* For long term security keep these as different from global types and sigs. Values in the static environment need to be copied before they are global. *) val localType = makeAndDeclareOpaqueType("localType", "PolyML.DebuggerInterface.localType", debuggerEnv) val localTypeConstr = makeAndDeclareOpaqueType("localTypeConstr", "PolyML.DebuggerInterface.localTypeConstr", debuggerEnv) val localSig = makeAndDeclareOpaqueType("localSig", "PolyML.DebuggerInterface.localSig", debuggerEnv) open DEBUGGER (* Entries in the static list. This type is only used within the implementation of DebuggerInterface in the basis library and does not appear in the final signature. *) val environEntryConstr = makeTypeConstructor("environEntry", [], makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription "PolyML.DebuggerInterface.environEntry"), declInBasis) val environEntryType = mkTypeConstruction ("environEntry", environEntryConstr, [], declInBasis) val constrs = (* Order is significant. *) [ ("EnvEndFunction", mkProductType[String, Location, localType]), ("EnvException", mkProductType[String, localType, locationPropList]), ("EnvStartFunction", mkProductType[String, Location, localType]), ("EnvStructure", mkProductType[String, localSig, locationPropList]), ("EnvTConstr", String ** localTypeConstr), ("EnvTypeid", typeId ** typeId), ("EnvVConstr", mkProductType[String, localType, Bool, Int, locationPropList]), ("EnvValue", mkProductType[String, localType, locationPropList]) ] (* This representation must match the representation defined in DEBUGGER_.sml. *) val numConstrs = List.length constrs val {constrs=constrReps, ...} = chooseConstrRepr(constrs, []) val constructors = ListPair.map (fn ((s,t), code) => mkGconstr(s, t ->> environEntryType, code, false, numConstrs, declInBasis)) (constrs, constrReps) val () = List.app (fn c => #enterVal debuggerEnv(valName c, c)) constructors (* Put these constructors onto the type. *) val () = #enterType debuggerEnv ("environEntry", TypeConstrSet(environEntryConstr, constructors)) (* Debug state type. *) val debugStateConstr = makeTypeAbbreviation("debugState", "PolyML.DebuggerInterface.debugState", [], mkProductType[List environEntryType, List machineWordType, Location], declInBasis) val () = #enterType debuggerEnv ("debugState", TypeConstrSet(debugStateConstr, [])) val debugStateType = mkTypeConstruction ("debugState", debugStateConstr, [], declInBasis) in val () = applyList (fn (name, v, t) => #enterVal debuggerEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("makeValue", toMachineWord(makeValue: debugState -> string * types * locationProp list * machineWord -> values), debugStateType ->> mkProductType[String, localType, locationPropList, machineWordType] ->> valueVal), ("makeException", toMachineWord(makeException: debugState -> string * types * locationProp list * machineWord -> values), debugStateType ->> mkProductType[String, localType, locationPropList, machineWordType] ->> valueVal), ("makeConstructor", toMachineWord(makeConstructor: debugState -> string * types * bool * int * locationProp list * machineWord -> values), debugStateType ->> mkProductType[String, localType, Bool, Int, locationPropList, machineWordType] ->> valueVal), ("makeAnonymousValue", toMachineWord(makeAnonymousValue: debugState -> types * machineWord -> values), debugStateType ->> mkProductType[localType, machineWordType] ->> valueVal), ("makeStructure", toMachineWord(makeStructure: debugState -> string * signatures * locationProp list * machineWord -> structVals), debugStateType ->> mkProductType[String, localSig, locationPropList, machineWordType] ->> structureVal), ("makeTypeConstr", toMachineWord(makeTypeConstr: debugState -> typeConstrSet -> typeConstrSet), debugStateType ->> localTypeConstr ->> typeVal), ("unitValue", toMachineWord(mkGvar("", unitType, CodeZero, []): values), valueVal), (* Used as a default *) ("setOnEntry", toMachineWord(setOnEntry: (string * PolyML.location -> unit) option -> unit), Option (String ** Location ->> Unit) ->> Unit), ("setOnExit", toMachineWord(setOnExit: (string * PolyML.location -> unit) option -> unit), Option (String ** Location ->> Unit) ->> Unit), ("setOnExitException", toMachineWord(setOnExitException: (string * PolyML.location -> exn -> unit) option -> unit), Option (String ** Location ->> Exn ->> Unit) ->> Unit), ("setOnBreakPoint", toMachineWord(setOnBreakPoint: (PolyML.location * bool ref -> unit) option -> unit), Option (Location ** Ref Bool ->> Unit) ->> Unit) ] end local val typeconstr = contextConstr in val () = #enterType polyMLEnv ("context", typeconstr); val () = List.app(fn(tv as Value{name, ...}) => #enterVal polyMLEnv(name, tv)) (tsConstructors typeconstr) end local val typeconstr = prettyConstr in val () = #enterType polyMLEnv ("pretty", typeconstr); val () = List.app(fn(tv as Value{name, ...}) => #enterVal polyMLEnv(name, tv)) (tsConstructors typeconstr) val PrettyType = mkTypeConstruction ("pretty", tsConstr typeconstr, [], declInBasis) end local val printType = let val a = makePrintTV () in a ->> a end; val printVal = mkSpecialFun("print", printType, Print); in val () = enterPolyMLVal ("print", printVal); end; local val makeStringType = let val a = makePrintTV () in a ->> String end; val makeStringVal = mkSpecialFun("makestring", makeStringType, MakeString); in val () = enterPolyMLVal ("makestring", makeStringVal); end; local val prettyType = let val a = makePrintTV () in a ** fixedIntType ->> PrettyType end; val prettyVal = mkSpecialFun("prettyRepresentation", prettyType, GetPretty); in val () = enterPolyMLVal ("prettyRepresentation", prettyVal); end; local (* addPrettyPrinter is the new function to install a pretty printer. *) val a = makeTV () val b = makeTV () val addPrettyType = (TYPETREE.fixedIntType ->> b ->> a ->> PrettyType) ->> Unit; val addPrettyVal = mkSpecialFun("addPrettyPrinter", addPrettyType, AddPretty); in val () = enterPolyMLVal ("addPrettyPrinter", addPrettyVal); end; (* This goes in RunCall since it's only for the basis library. *) local val addOverloadType = let val a = makeTV () and b = makeTV () in (a ->> b) ->> String ->> Unit end; val addOverloadVal = mkSpecialFun("addOverload", addOverloadType, AddOverload); in val () = #enterVal runCallEnv ("addOverload", addOverloadVal); end local (* Add a function to switch the default integer type. *) fun setType isArbitrary = setPreferredInt(if isArbitrary then intInfConstr else fixedIntConstr) in val () = #enterVal runCallEnv ("setDefaultIntTypeArbitrary", mkGvar ("setDefaultIntTypeArbitrary", Bool ->> Unit, mkConst (toMachineWord setType), declInBasis)) end local val sourceLocVal = mkSpecialFun("sourceLocation", Unit ->> Location, GetLocation); in val () = enterPolyMLVal ("sourceLocation", sourceLocVal); end; local (* This is used as one of the arguments to the compiler function. *) open TYPETREE val uniStructEnv = makeStructure(bootstrapEnv, "Universal") fun enterUniversal (name : string, entry : codetree, typ : types) : unit = let val value = mkGvar (name, typ, entry, declInBasis); in #enterVal uniStructEnv (name, value) end; local fun polyTypePrinter _ _ = PRETTY.PrettyString "?" open TypeValue val idCode = let val code = createTypeValue{ eqCode=CodeZero, (* Not an equality type *) printCode=mkConst (toMachineWord (ref polyTypePrinter)), boxedCode=mkInlproc(boxedEither(* Assume worst case *), 1, "boxed-tag", [], 0), sizeCode=mkInlproc(singleWord, 1, "size-tag", [], 0)} in Global (genCode(code, [], 0) ()) end in (* type 'a tag *) val tagConstr = makeTypeConstructor("tag", [makeTypeVariable()], makeFreeId(1, idCode, false, basisDescription "tag"), declInBasis); val () = #enterType uniStructEnv ("tag", TypeConstrSet(tagConstr, [])) end (* type universal *) val univConstr = makeTypeConstructor("universal", [], makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription "universal"), declInBasis); val () = #enterType uniStructEnv ("universal", TypeConstrSet(univConstr, [])); fun Tag base = mkTypeConstruction ("tag", tagConstr, [base], declInBasis) val Universal = mkTypeConstruction ("universal", univConstr, [], declInBasis) val a = makeTV() (* val tagInject : 'a tag -> 'a -> universal *) val injectType = Tag a ->> a ->> Universal val () = enterUniversal ("tagInject", makePolymorphic([a], mkConst (toMachineWord (Universal.tagInject: 'a Universal.tag -> 'a -> Universal.universal))), injectType) (* We don't actually need tagIs and tagProject since this is only used for the compiler. Universal is redefined in the basis library. *) val projectType = Tag a ->> Universal ->> a val () = enterUniversal ("tagProject", makePolymorphic([a], mkConst (toMachineWord(Universal.tagProject: 'a Universal.tag -> Universal.universal -> 'a))), projectType) val testType = Tag a ->> Universal ->> Bool val () = enterUniversal ("tagIs", makePolymorphic([a], mkConst (toMachineWord(Universal.tagIs: 'a Universal.tag -> Universal.universal -> bool))), testType) in val Tag = Tag and Universal = Universal end local open TYPETREE (* Parsetree properties datatype. *) val propConstr = makeTypeConstructor("ptProperties", [], makeFreeId(0, defaultEqAndPrintCode(), false, basisDescription "PolyML.ptProperties"), declInBasis); val PtProperties = mkTypeConstruction ("ptProperties", propConstr, [], declInBasis) (* Parsetree type. *) val parseTreeConstr = makeTypeAbbreviation("parseTree", "PolyML.parseTree", [], Location ** List PtProperties, declInBasis); val ParseTree = mkTypeConstruction ("parseTree", parseTreeConstr, [], declInBasis) val () = #enterType polyMLEnv ("parseTree", TypeConstrSet(parseTreeConstr, [])); val constrs = (* Order is significant. *) [ ("PTbreakPoint", Ref Bool), ("PTcompletions", List String), ("PTdeclaredAt", Location), ("PTdefId", fixedIntType), ("PTfirstChild", Unit ->> ParseTree), ("PTnextSibling", Unit ->> ParseTree), ("PTopenedAt", Location), ("PTparent", Unit ->> ParseTree), ("PTpreviousSibling", Unit ->> ParseTree), ("PTprint", fixedIntType ->> PrettyType), ("PTreferences", Bool ** List Location), ("PTrefId", fixedIntType), ("PTstructureAt", Location), ("PTtype", Types) ]; (* This representation must match the representation defined in ExportTree.sml. *) val numConstrs = List.length constrs val {constrs=constrReps, ...} = chooseConstrRepr(constrs, []) val constructors = ListPair.map (fn ((s,t), code) => mkGconstr(s, t ->> PtProperties, code, false, numConstrs, declInBasis)) (constrs, constrReps) val () = List.app (fn c => #enterVal polyMLEnv(valName c, c)) constructors (* Put these constructors onto the type. *) val () = #enterType polyMLEnv ("ptProperties", TypeConstrSet(propConstr, constructors)); in val ParseTree = ParseTree and PtProperties = PtProperties end local open TYPETREE val compilerType : types = mkProductType[nameSpaceType, Unit ->> Option Char, List Universal] ->> mkProductType[Option ParseTree, Option (Unit ->> execResult)] type compilerType = nameSpace * (unit -> char option) * Universal.universal list -> exportTree option * (unit->execResult) option in val () = enterBootstrap ("use", mkConst (toMachineWord ((useIntoEnv globalTable []): string -> unit)), String ->> Unit) val () = enterBootstrap ("useWithParms", mkConst (toMachineWord ((useIntoEnv globalTable): Universal.universal list -> string -> unit)), List Universal ->> String ->> Unit) val () = enterPolyMLVal("compiler", mkGvar ("compiler", compilerType, mkConst (toMachineWord (compiler: compilerType)), declInBasis)); val () = enterBootstrap("globalSpace", mkConst (toMachineWord(gEnvAsNameSpace globalTable: nameSpace)), nameSpaceType) (* Add a print function so we can print a message at the start of a bootstrap phase. *) val () = enterBootstrap("print", mkConst (toMachineWord TextIO.print), String ->> Unit) end; local val ty = TYPETREE.mkOverloadSet[] val addType = ty ** ty ->> ty; val negType = ty ->> ty; val cmpType = ty ** ty ->> Bool; in val () = enterGlobalValue ("+", mkOverloaded "+" addType); val () = enterGlobalValue ("-", mkOverloaded "-" addType); val () = enterGlobalValue ("*", mkOverloaded "*" addType); val () = enterGlobalValue ("~", mkOverloaded "~" negType); val () = enterGlobalValue ("abs", mkOverloaded "abs" negType); val () = enterGlobalValue (">=", mkOverloaded ">=" cmpType); val () = enterGlobalValue ("<=", mkOverloaded "<=" cmpType); val () = enterGlobalValue (">", mkOverloaded ">" cmpType); val () = enterGlobalValue ("<", mkOverloaded "<" cmpType); (* The following overloads are added in ML97 *) val () = enterGlobalValue ("div", mkOverloaded "div" addType); val () = enterGlobalValue ("mod", mkOverloaded "mod" addType); val () = enterGlobalValue ("/", mkOverloaded "/" addType); end; local open DEBUG; local open TYPETREE val fields = [ mkLabelEntry("location", Location), mkLabelEntry("hard", Bool), mkLabelEntry("message", PrettyType), mkLabelEntry("context", Option PrettyType) ] in val errorMessageProcType = mkLabelled(sortLabels fields, true) ->> Unit type errorMessageProcType = { location: location, hard: bool, message: pretty, context: pretty option } -> unit end local open TYPETREE val optNav = Option(Unit->>ParseTree) val fields = [ mkLabelEntry("parent", optNav), mkLabelEntry("next", optNav), mkLabelEntry("previous", optNav) ] in val navigationType = mkLabelled(sortLabels fields, true) type navigationType = { parent: (unit->exportTree) option, next: (unit->exportTree) option, previous: (unit->exportTree) option } end type 'a tag = 'a Universal.tag in val () = applyList (fn (name, v, t) => enterBootstrap(name, mkConst v, t)) [ ("compilerVersion", toMachineWord (VERSION.compilerVersion: string), String), ("compilerVersionNumber", toMachineWord (VERSION.versionNumber: int), Int), ("lineNumberTag", toMachineWord (lineNumberTag : (unit->FixedInt.int) tag), Tag (Unit->>fixedIntType)), ("offsetTag", toMachineWord (offsetTag: (unit->FixedInt.int) tag), Tag (Unit->>fixedIntType)), ("fileNameTag", toMachineWord (fileNameTag: string tag), Tag String), ("bindingCounterTag", toMachineWord (bindingCounterTag: (unit->FixedInt.int) tag), Tag (Unit->>fixedIntType)), ("maxInlineSizeTag", toMachineWord (maxInlineSizeTag: FixedInt.int tag), Tag fixedIntType), ("assemblyCodeTag", toMachineWord (assemblyCodeTag: bool tag), Tag Bool), ("parsetreeTag", toMachineWord (parsetreeTag: bool tag), Tag Bool), ("codetreeTag", toMachineWord (codetreeTag: bool tag), Tag Bool), ("icodeTag", toMachineWord (icodeTag: bool tag), Tag Bool), ("lowlevelOptimiseTag", toMachineWord (lowlevelOptimiseTag: bool tag), Tag Bool), ("codetreeAfterOptTag", toMachineWord (codetreeAfterOptTag: bool tag), Tag Bool), ("inlineFunctorsTag", toMachineWord (inlineFunctorsTag: bool tag), Tag Bool), ("compilerDebugTag", toMachineWord (compilerDebugTag: int tag), Tag Int), ("debugTag", toMachineWord (debugTag: bool tag), Tag Bool), ("printDepthFunTag", toMachineWord (DEBUG.printDepthFunTag: (unit->FixedInt.int) tag), Tag (Unit->>fixedIntType)), ("errorDepthTag", toMachineWord (DEBUG.errorDepthTag: FixedInt.int tag), Tag fixedIntType), ("lineLengthTag", toMachineWord (DEBUG.lineLengthTag: FixedInt.int tag), Tag fixedIntType), ("profileAllocationTag", toMachineWord (DEBUG.profileAllocationTag: FixedInt.int tag), Tag fixedIntType), ("printOutputTag", toMachineWord (PRETTY.printOutputTag: (pretty->unit) tag), Tag (PrettyType->>Unit)) , ("compilerOutputTag", toMachineWord (PRETTY.compilerOutputTag: (pretty->unit) tag), Tag (PrettyType->>Unit)), ("errorMessageProcTag", toMachineWord (LEX.errorMessageProcTag: errorMessageProcType tag), Tag errorMessageProcType), ("rootTreeTag", toMachineWord (EXPORTTREE.rootTreeTag: navigation tag), Tag navigationType), ("reportUnreferencedIdsTag", toMachineWord (reportUnreferencedIdsTag: bool tag), Tag Bool), ("reportExhaustiveHandlersTag", toMachineWord (reportExhaustiveHandlersTag: bool tag), Tag Bool), ("narrowOverloadFlexRecordTag", toMachineWord (narrowOverloadFlexRecordTag: bool tag), Tag Bool), ("createPrintFunctionsTag", toMachineWord (createPrintFunctionsTag: bool tag), Tag Bool), ("reportDiscardedValuesTag", toMachineWord (reportDiscardedValuesTag: FixedInt.int tag), Tag fixedIntType) ] end; (* PolyML.CodeTree structure. This exports the CodeTree structure into the ML space. *) local open CODETREE val codetreeEnv = makeStructure(polyMLEnv, "CodeTree") fun createType typeName = makeAndDeclareOpaqueType(typeName, "PolyML.CodeTree." ^ typeName, codetreeEnv) val CodeTree = createType "codetree" and MachineWord = createType "machineWord" and CodeBinding = createType "codeBinding" (* For the moment export these only for the general argument and result types. *) fun simpleFn (code, nArgs, name, closure, nLocals) = mkFunction{body=code, argTypes=List.tabulate(nArgs, fn _ => GeneralType), resultType=GeneralType, name=name, closure=closure, numLocals=nLocals} and simpleInlineFn (code, nArgs, name, closure, nLocals) = mkInlineFunction{body=code, argTypes=List.tabulate(nArgs, fn _ => GeneralType), resultType=GeneralType, name=name, closure=closure, numLocals=nLocals} and simpleCall(func, args) = mkCall(func, List.map (fn c => (c, GeneralType)) args, GeneralType) in val CodeTree = CodeTree val () = applyList (fn (name, v, t) => #enterVal codetreeEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("pretty", toMachineWord (CODETREE.pretty: codetree -> pretty), CodeTree ->> PrettyType), ("mkConstant", toMachineWord(mkConst: machineWord -> codetree), MachineWord ->> CodeTree), ("genCode", toMachineWord (genCode: codetree * Universal.universal list * int -> (unit->codetree)), mkProductType[CodeTree, List Universal, Int] ->> (Unit ->> CodeTree)), ("evalue", toMachineWord (evalue: codetree -> machineWord option), CodeTree ->> Option MachineWord), ("mkFunction", toMachineWord (simpleFn: codetree * int * string * codetree list * int -> codetree), mkProductType[CodeTree, Int, String, List CodeTree, Int] ->> CodeTree), ("mkInlineFunction", toMachineWord (simpleInlineFn: codetree * int * string * codetree list * int -> codetree), mkProductType[CodeTree, Int, String, List CodeTree, Int] ->> CodeTree), ("mkCall", toMachineWord (simpleCall: codetree * codetree list -> codetree), CodeTree ** List CodeTree ->> CodeTree), ("mkLoadLocal", toMachineWord (mkLoadLocal: int -> codetree), Int ->> CodeTree), ("mkLoadArgument", toMachineWord (mkLoadArgument: int -> codetree), Int ->> CodeTree), ("mkLoadClosure", toMachineWord (mkLoadClosure: int -> codetree), Int ->> CodeTree), ("mkDec", toMachineWord (mkDec: int * codetree -> codeBinding), Int ** CodeTree ->> CodeBinding), ("mkInd", toMachineWord (mkInd: int * codetree -> codetree), Int ** CodeTree ->> CodeTree), ("mkIf", toMachineWord (mkIf: codetree * codetree * codetree -> codetree), mkProductType[CodeTree, CodeTree, CodeTree] ->> CodeTree), ("mkWhile", toMachineWord (mkWhile: codetree * codetree -> codetree), CodeTree ** CodeTree ->> CodeTree), ("mkLoop", toMachineWord (mkLoop: codetree list -> codetree), List CodeTree ->> CodeTree), ("mkBeginLoop", toMachineWord (mkBeginLoop: codetree * (int * codetree) list -> codetree), CodeTree ** List(Int ** CodeTree) ->> CodeTree), ("mkEnv", toMachineWord (mkEnv: codeBinding list * codetree -> codetree), List CodeBinding ** CodeTree ->> CodeTree), ("mkMutualDecs", toMachineWord (mkMutualDecs: (int * codetree) list -> codeBinding), List(Int ** CodeTree) ->> CodeBinding), ("mkTuple", toMachineWord (mkTuple: codetree list -> codetree), List CodeTree ->> CodeTree), ("mkRaise", toMachineWord (mkRaise: codetree -> codetree), CodeTree ->> CodeTree), ("mkHandle", toMachineWord (mkHandle: codetree * codetree * int -> codetree), mkProductType[CodeTree, CodeTree, Int] ->> CodeTree), ("mkNullDec", toMachineWord (mkNullDec: codetree -> codeBinding), CodeTree ->> CodeBinding) ] end local (* Finish off the NameSpace structure now we have types such as pretty. *) open TYPETREE (* The exported versions expect full name spaces as arguments. Because we convert the exported versions to machineWord and give them types as data structures the compiler can't actually check that the type we give matched the internal type. *) fun makeTypeEnv NONE = { lookupType = fn _ => NONE, lookupStruct = fn _ => NONE } | makeTypeEnv(SOME(nameSpace: nameSpace)): printTypeEnv = { lookupType = fn s => case #lookupType nameSpace s of NONE => NONE | SOME t => SOME(t, NONE), lookupStruct = fn s => case #lookupStruct nameSpace s of NONE => NONE | SOME t => SOME(t, NONE) } local (* Values substructure. This also has operations related to type expressions. *) fun codeForValue (Value{access = Global code, class = ValBound, ...}) = code | codeForValue _ = raise Fail "Not a global value" and exportedDisplayTypeExp(ty, depth, nameSpace: nameSpace option) = TYPETREE.display(ty, depth, makeTypeEnv nameSpace) and exportedDisplayValues(valu, depth, nameSpace: nameSpace option) = displayValues(valu, depth, makeTypeEnv nameSpace) and propsForValue (Value {locations, typeOf, ...}) = PTtype typeOf :: mapLocationProps locations fun isConstructor (Value{class = Exception, ...}) = true | isConstructor (Value{class = Constructor _, ...}) = true | isConstructor _ = false fun isException (Value{class = Exception, ...}) = true | isException _ = false in val () = applyList (fn (name, v, t) => #enterVal valuesEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("name", toMachineWord (valName: values -> string), valueVal ->> String), ("print", toMachineWord (printValues: values * FixedInt.int -> pretty), mkProductType[valueVal, fixedIntType] ->> PrettyType), ("printWithType", toMachineWord (exportedDisplayValues: values * FixedInt.int * nameSpace option -> pretty), mkProductType[valueVal, fixedIntType, Option nameSpaceType] ->> PrettyType), ("printType", toMachineWord(exportedDisplayTypeExp: types * FixedInt.int * nameSpace option -> pretty), mkProductType[Types, fixedIntType, Option nameSpaceType] ->> PrettyType), ("typeof", toMachineWord (valTypeOf: values -> types), valueVal ->> Types), ("code", toMachineWord (codeForValue: values -> codetree), valueVal ->> CodeTree), ("properties", toMachineWord (propsForValue: values ->ptProperties list), valueVal ->> List PtProperties), ("isConstructor", toMachineWord(isConstructor: values -> bool), valueVal ->> Bool), ("isException", toMachineWord(isException: values -> bool), valueVal ->> Bool) ] end local (* TypeConstrs substructure. *) fun exportedDisplayTypeConstr(tyCons, depth, nameSpace: nameSpace option) = TYPETREE.displayTypeConstrs(tyCons, depth, makeTypeEnv nameSpace) and propsForTypeConstr (TypeConstrSet(TypeConstrs {locations, ...}, _)) = mapLocationProps locations and nameForType (TypeConstrSet(TypeConstrs{name, ...}, _)) = name in val () = applyList (fn (name, v, t) => #enterVal typesEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("name", toMachineWord(nameForType: typeConstrSet -> string), typeVal ->> String), ("print", toMachineWord (exportedDisplayTypeConstr: typeConstrSet * FixedInt.int * nameSpace option -> pretty), mkProductType[typeVal, fixedIntType, Option nameSpaceType] ->> PrettyType), ("properties", toMachineWord (propsForTypeConstr: typeConstrSet ->ptProperties list), typeVal ->> List PtProperties) ] end local (* Structures substructure *) fun exportedDisplayStructs(str, depth, nameSpace: nameSpace option) = displayStructures(str, depth, makeTypeEnv nameSpace) and codeForStruct (Struct{access = Global code, ...}) = code | codeForStruct _ = raise Fail "Not a global structure" and propsForStruct (Struct {locations, ...}) = mapLocationProps locations and nameForStruct (Struct{name, ...}) = name fun nameSpaceForStruct(baseStruct as Struct{signat=Signatures { tab, ...}, ...}): nameSpace = let open UNIVERSALTABLE fun lookupVal s = case univLookup (tab, valueVar, s) of NONE => NONE | SOME v => SOME(makeSelectedValue(v, baseStruct)) and lookupType s = case univLookup (tab, typeConstrVar, s) of NONE => NONE | SOME t => SOME(makeSelectedType(t, baseStruct)) and lookupStruct s = case univLookup (tab, structVar, s) of NONE => NONE | SOME s => SOME(makeSelectedStructure(s, baseStruct)) local fun extractItems t tab = UNIVERSALTABLE.fold (fn (s, u, l) => if Universal.tagIs t u then (s, Universal.tagProject t u) :: l else l ) [] tab in fun allValues() = map(fn (s, v) => (s, makeSelectedValue(v, baseStruct))) (extractItems valueVar tab) and allTypes() = map(fn (s, t) => (s, makeSelectedType(t, baseStruct))) (extractItems typeConstrVar tab) and allStructs() = map(fn (s, v) => (s, makeSelectedStructure(v, baseStruct))) (extractItems structVar tab) end fun enterFunction _ = raise Fail "updating a structure is not possible." (* Raise an exception for any attempt to enter a new value. Return empty for the classes that can't exist in a structure. *) in { lookupVal = lookupVal, lookupType = lookupType, lookupStruct = lookupStruct, lookupFix = fn _ => NONE, lookupSig = fn _ => NONE, lookupFunct = fn _ => NONE, enterVal = enterFunction, enterType = enterFunction, enterFix = enterFunction, enterStruct = enterFunction, enterSig = enterFunction, enterFunct = enterFunction, allVal = allValues, allType = allTypes, allStruct = allStructs, allFix = fn () => [], allSig = fn () => [], allFunct = fn () => [] } end in val () = applyList (fn (name, v, t) => #enterVal structsEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("name", toMachineWord(nameForStruct: structVals -> string), structureVal ->> String), ("print", toMachineWord (exportedDisplayStructs: structVals * FixedInt.int * nameSpace option -> pretty), mkProductType[structureVal, fixedIntType, Option nameSpaceType] ->> PrettyType), ("code", toMachineWord (codeForStruct: structVals -> codetree), structureVal ->> CodeTree), ("properties", toMachineWord (propsForStruct: structVals ->ptProperties list), structureVal ->> List PtProperties), ("contents", toMachineWord(nameSpaceForStruct: structVals -> nameSpace), structureVal ->> nameSpaceType) ] end local (* Signatures substructure *) fun exportedDisplaySigs(sign, depth, nameSpace: nameSpace option) = displaySignatures(sign, depth, makeTypeEnv nameSpace) and propsForSig (Signatures {locations, ...}) = mapLocationProps locations and nameForSig (Signatures{name, ...}) = name in val () = applyList (fn (name, v, t) => #enterVal sigsEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("name", toMachineWord(nameForSig: signatures -> string), signatureVal ->> String), ("print", toMachineWord (exportedDisplaySigs: signatures * FixedInt.int * nameSpace option -> pretty), mkProductType[signatureVal, fixedIntType, Option nameSpaceType] ->> PrettyType), ("properties", toMachineWord (propsForSig: signatures ->ptProperties list), signatureVal ->> List PtProperties) ] end local (* Functors substructure *) fun exportedDisplayFunctors(funct, depth, nameSpace: nameSpace option) = displayFunctors(funct, depth, makeTypeEnv nameSpace) and codeForFunct (Functor{access = Global code, ...}) = code | codeForFunct _ = raise Fail "Not a global functor" and propsForFunctor (Functor {locations, ...}) = mapLocationProps locations and nameForFunctor (Functor{name, ...}) = name in val () = applyList (fn (name, v, t) => #enterVal functsEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("name", toMachineWord(nameForFunctor: functors -> string), functorVal ->> String), ("print", toMachineWord (exportedDisplayFunctors: functors * FixedInt.int * nameSpace option -> pretty), mkProductType[functorVal, fixedIntType, Option nameSpaceType] ->> PrettyType), ("code", toMachineWord (codeForFunct: functors -> codetree), functorVal ->> CodeTree), ("properties", toMachineWord (propsForFunctor: functors ->ptProperties list), functorVal ->> List PtProperties) ] end local (* Infixes substructure *) fun nameForFix(FixStatus(s, _)) = s in val () = applyList (fn (name, v, t) => #enterVal fixesEnv (name, mkGvar (name, t, mkConst v, declInBasis))) [ ("name", toMachineWord(nameForFix: fixStatus -> string), fixityVal ->> String), ("print", toMachineWord (displayFixStatus: fixStatus -> pretty), fixityVal ->> PrettyType) ] end in end in () end (* initGlobalEnv *); end;