diff --git a/RootArm64.ML b/RootArm64.ML index 4c94d26c..14c5c7c5 100644 --- a/RootArm64.ML +++ b/RootArm64.ML @@ -1,153 +1,150 @@ (* Copyright (c) 2021 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Compiler root file for Arm64. This gives the "use" instructions necessary to build the compiler and suitable for use with an IDE project file. It was constructed from the Poly/ML make files. *) PolyML.print_depth 1; PolyML.Compiler.reportUnreferencedIds := true; use "mlsource/MLCompiler/Address.ML"; use "mlsource/MLCompiler/Misc.ML"; use "mlsource/MLCompiler/HashTable.ML"; use "mlsource/MLCompiler/UniversalTable.ML"; use "mlsource/MLCompiler/StronglyConnected.sml"; use "mlsource/MLCompiler/StretchArray.ML"; use "mlsource/MLCompiler/STRUCTVALSIG.sml"; use "mlsource/MLCompiler/PRETTY.sig"; use "mlsource/MLCompiler/LEXSIG.sml"; use "mlsource/MLCompiler/SymbolsSig.sml"; use "mlsource/MLCompiler/COMPILERBODYSIG.sml"; use "mlsource/MLCompiler/DEBUG.sig"; use "mlsource/MLCompiler/MAKESIG.sml"; use "mlsource/MLCompiler/MAKE_.ML"; use "mlsource/MLCompiler/FOREIGNCALL.sig"; use "mlsource/MLCompiler/BUILTINS.sml"; use "mlsource/MLCompiler/CODETREE.sig"; use "mlsource/MLCompiler/STRUCT_VALS.ML"; use "mlsource/MLCompiler/CodeTree/BACKENDINTERMEDIATECODE.sig"; use "mlsource/MLCompiler/CodeTree/BASECODETREE.sig"; use "mlsource/MLCompiler/CodeTree/CODETREEFUNCTIONS.sig"; use "mlsource/MLCompiler/CodeTree/CODEARRAY.sig"; use "mlsource/MLCompiler/CodeTree/CODEGENTREE.sig"; use "mlsource/MLCompiler/CodeTree/GENCODE.sig"; use "mlsource/MLCompiler/CodeTree/CodetreeFunctions.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeStaticLinkAndCases.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeCodegenConstantFunctions.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeLambdaLift.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeRemoveRedundant.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeSimplifier.ML"; use "mlsource/MLCompiler/CodeTree/CodetreeOptimiser.ML"; use "mlsource/MLCompiler/CodeTree/CodeTreeConstruction.ML"; use "mlsource/MLCompiler/Pretty.sml"; use "mlsource/MLCompiler/CodeTree/CodeArray.ML"; use "mlsource/MLCompiler/Debug.ML"; use "mlsource/MLCompiler/CodeTree/BackendIntermediateCode.sml"; use "mlsource/MLCompiler/CodeTree/BaseCodeTree.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ASSEMBLY.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Assembly.sml"; -use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64SEQUENCES.sig"; -use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Sequences.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML"; -use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODE.sig"; use "mlsource/MLCompiler/CodeTree/INTSET.sig"; use "mlsource/MLCompiler/CodeTree/IntSet.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICode.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64IDENTIFYREFERENCES.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64IdentifyReferences.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PUSHREGISTERS.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PushRegisters.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEOPTIMISE.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeOptimise.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ALLOCATEREGISTERS.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64AllocateRegisters.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODEGENERATE.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeToArm64Code.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ARM64ICODETRANSFORM.sig"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ICodeTransform.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/Arm64CodetreeToICode.ML"; use "mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML"; use "mlsource/MLCompiler/CodeTree/GCode.arm64.ML"; use "mlsource/MLCompiler/CodeTree/ml_bind.ML"; use "mlsource/MLCompiler/StructVals.ML"; use "mlsource/MLCompiler/LEX_.ML"; use "mlsource/MLCompiler/Symbols.ML"; use "mlsource/MLCompiler/Lex.ML"; use "mlsource/MLCompiler/SymsetSig.sml"; use "mlsource/MLCompiler/DATATYPEREPSIG.sml"; use "mlsource/MLCompiler/VALUEOPSSIG.sml"; use "mlsource/MLCompiler/EXPORTTREESIG.sml"; use "mlsource/MLCompiler/STRUCTURESSIG.sml"; use "mlsource/MLCompiler/COMPILER_BODY.ML"; use "mlsource/MLCompiler/SymSet.ML"; use "mlsource/MLCompiler/TYPETREESIG.sml"; use "mlsource/MLCompiler/COPIERSIG.sml"; use "mlsource/MLCompiler/TYPEIDCODESIG.sml"; use "mlsource/MLCompiler/DATATYPE_REP.ML"; use "mlsource/MLCompiler/PRINTTABLESIG.sml"; use "mlsource/MLCompiler/VALUE_OPS.ML"; use "mlsource/MLCompiler/TYPE_TREE.ML"; use "mlsource/MLCompiler/UTILITIES_.ML"; use "mlsource/MLCompiler/Utilities.ML"; use "mlsource/MLCompiler/PRINT_TABLE.ML"; use "mlsource/MLCompiler/PrintTable.ML"; use "mlsource/MLCompiler/ExportTree.sml"; use "mlsource/MLCompiler/ExportTreeStruct.sml"; use "mlsource/MLCompiler/TypeTree.ML"; use "mlsource/MLCompiler/COPIER.sml"; use "mlsource/MLCompiler/CopierStruct.sml"; use "mlsource/MLCompiler/TYPEIDCODE.sml"; use "mlsource/MLCompiler/TypeIDCodeStruct.sml"; use "mlsource/MLCompiler/DatatypeRep.ML"; use "mlsource/MLCompiler/ValueOps.ML"; use "mlsource/MLCompiler/PARSETREESIG.sml"; use "mlsource/MLCompiler/SIGNATURESSIG.sml"; use "mlsource/MLCompiler/DEBUGGER.sig"; use "mlsource/MLCompiler/STRUCTURES_.ML"; use "mlsource/MLCompiler/DEBUGGER_.sml"; use "mlsource/MLCompiler/Debugger.sml"; use "mlsource/MLCompiler/ParseTree/BaseParseTreeSig.sml"; use "mlsource/MLCompiler/ParseTree/BASE_PARSE_TREE.sml"; use "mlsource/MLCompiler/ParseTree/PrintParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/PRINT_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/ExportParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/EXPORT_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/TypeCheckParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/TYPECHECK_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/MatchCompilerSig.sml"; use "mlsource/MLCompiler/ParseTree/MATCH_COMPILER.sml"; use "mlsource/MLCompiler/ParseTree/CodegenParsetreeSig.sml"; use "mlsource/MLCompiler/ParseTree/CODEGEN_PARSETREE.sml"; use "mlsource/MLCompiler/ParseTree/PARSE_TREE.ML"; use "mlsource/MLCompiler/ParseTree/ml_bind.ML"; use "mlsource/MLCompiler/SIGNATURES.sml"; use "mlsource/MLCompiler/SignaturesStruct.sml"; use "mlsource/MLCompiler/Structures.ML"; use "mlsource/MLCompiler/PARSE_DEC.ML"; use "mlsource/MLCompiler/SKIPS_.ML"; use "mlsource/MLCompiler/Skips.ML"; use "mlsource/MLCompiler/PARSE_TYPE.ML"; use "mlsource/MLCompiler/ParseType.ML"; use "mlsource/MLCompiler/ParseDec.ML"; use "mlsource/MLCompiler/CompilerBody.ML"; use "mlsource/MLCompiler/CompilerVersion.sml"; use "mlsource/MLCompiler/Make.ML"; use "mlsource/MLCompiler/INITIALISE_.ML"; use "mlsource/MLCompiler/Initialise.ML"; use "mlsource/MLCompiler/ml_bind.ML"; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig index 9b09ce91..998feaee 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64PREASSEMBLY.sig @@ -1,264 +1,266 @@ (* Copyright (c) 2021-2 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *) signature ARM64PREASSEMBLY = sig type closureRef type machineWord = Address.machineWord (* XZero and XSP are both encoded as 31 but the interpretation depends on the instruction The datatype definition is included here to allow for pattern matching on XSP and XZero. *) datatype xReg = XReg of Word8.word | XZero | XSP and vReg = VReg of Word8.word val X0: xReg and X1: xReg and X2: xReg and X3: xReg and X4: xReg and X5: xReg and X6: xReg and X7: xReg and X8: xReg and X9: xReg and X10: xReg and X11: xReg and X12: xReg and X13: xReg and X14: xReg and X15: xReg and X16: xReg and X17: xReg and X18: xReg and X19: xReg and X20: xReg and X21: xReg and X22: xReg and X23: xReg and X24: xReg and X25: xReg and X26: xReg and X27: xReg and X28: xReg and X29: xReg and X30: xReg val X_MLHeapLimit: xReg (* ML Heap limit pointer *) and X_MLAssemblyInt: xReg (* ML assembly interface pointer. *) and X_MLHeapAllocPtr: xReg (* ML Heap allocation pointer. *) and X_MLStackPtr: xReg (* ML Stack pointer. *) and X_LinkReg: xReg (* Link reg - return address *) and X_Base32in64: xReg (* X24 is used for the heap base in 32-in-64. *) val V0: vReg and V1: vReg and V2: vReg and V3: vReg and V4: vReg and V5: vReg and V6: vReg and V7: vReg (* Condition for conditional branches etc. *) datatype condition = CondEqual (* Z=1 *) | CondNotEqual (* Z=0 *) | CondCarrySet (* C=1 *) | CondCarryClear (* C=0 *) | CondNegative (* N=1 *) | CondPositive (* N=0 imcludes zero *) | CondOverflow (* V=1 *) | CondNoOverflow (* V=0 *) | CondUnsignedHigher (* C=1 && Z=0 *) | CondUnsignedLowOrEq (* ! (C=1 && Z=0) *) | CondSignedGreaterEq (* N=V *) | CondSignedLess (* N<>V *) | CondSignedGreater (* Z==0 && N=V *) | CondSignedLessEq (* !(Z==0 && N=V) *) val invertTest: condition -> condition (* i.e. jump when the condition is not true. *) val condToString: condition -> string datatype shiftType = ShiftLSL of Word8.word | ShiftLSR of Word8.word | ShiftASR of Word8.word | ShiftNone datatype wordSize = WordSize32 | WordSize64 datatype 'a extend = ExtUXTB of 'a (* Unsigned extend byte *) | ExtUXTH of 'a (* Unsigned extend byte *) | ExtUXTW of 'a (* Unsigned extend byte *) | ExtUXTX of 'a (* Left shift *) | ExtSXTB of 'a (* Sign extend byte *) | ExtSXTH of 'a (* Sign extend halfword *) | ExtSXTW of 'a (* Sign extend word *) | ExtSXTX of 'a (* Left shift *) (* Load/store instructions have only a single bit for the shift. For byte operations this is one bit shift; for others it scales by the size of the operand if set. *) datatype scale = ScaleOrShift | NoScale datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP and unscaledType = NoUpdate | PreIndex | PostIndex and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate and bitfieldKind = BFUnsigned | BFSigned | BFInsert and brRegType = BRRBranch | BRRAndLink | BRRReturn type precodeLabel val createLabel: unit -> precodeLabel datatype precode = (* Basic instructions *) AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind} | DivideRegs of {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize} | LogicalShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool} | LoadRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | LoadFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | StoreRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | StoreFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | LoadRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} | StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} (* LoadAcquire and StoreRelease are used for mutables. *) | LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType} | StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType} (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *) | LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg} | StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg} | MemBarrier | LoadRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | ConditionalSet of {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize} | BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind} | ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection} | BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp} (* Floating point *) | MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize} | MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize} | CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize} | CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize} | FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary} | FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize} | FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary} (* Branches and Labels. *) | SetLabel of precodeLabel | ConditionalBranch of condition * precodeLabel | UnconditionalBranch of precodeLabel | BranchAndLink of precodeLabel | BranchReg of {regD: xReg, brRegType: brRegType } | LoadLabelAddress of xReg * precodeLabel | TestBitBranch of { test: xReg, bit: Word8.word, label: precodeLabel, onZero: bool } | CompareBranch of { test: xReg, label: precodeLabel, onZero: bool, opSize: opSize } (* Composite instructions *) | MoveXRegToXReg of {sReg: xReg, dReg: xReg} | LoadNonAddr of xReg * Word64.word | LoadAddr of xReg * machineWord | RTSTrap of { rtsEntry: int, work: xReg, save: xReg list } (* Allocate memory - bytes includes the length word and rounding. *) | AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg } (* Allocate memory - sizeReg is number of ML words needed for cell. *) | AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg } (* Branch table for indexed case. startLabel is the address of the first label in the list. The branch table is a sequence of unconditional branches. *) | BranchTable of { startLabel: precodeLabel, brTable: precodeLabel list } + | LoadGlobalHeapBaseInCallback of xReg (* Wrapper for BitField *) val shiftConstant: { direction: shiftDirection, regD: xReg, regN: xReg, shift: word, opSize: opSize } -> precode (* Convenient sequences. N.B. These are in reverse order. *) val boxDouble: {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list and boxFloat: {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list and boxSysWord: {source: xReg, destination: xReg, workReg: xReg, saveRegs: xReg list} * precode list -> precode list (* Create the vector of code from the list of instructions and update the closure reference to point to it. *) val generateFinalCode: {instrs: precode list, name: string, parameters: Universal.universal list, resultClosure: closureRef, profileObject: machineWord, labelCount: int} -> unit (* Temporarily for development. *) type instr val toInstr: precode -> instr (* Take a forward order sequence of instructions and generate a forward order output sequence. *) val toInstrs: precode list -> instr list (* Offsets in the assembly code interface pointed at by X26 These are in units of 64-bits NOT bytes. *) val heapOverflowCallOffset: int and stackOverflowCallOffset: int and stackOverflowXCallOffset: int and exceptionHandlerOffset: int and stackLimitOffset: int and threadIdOffset: int and heapLimitPtrOffset: int and heapAllocPtrOffset: int and mlStackPtrOffset: int + and exceptionPacketOffset: int val is32in64: bool and isBigEndian: bool val isEncodableBitPattern: Word64.word * wordSize -> bool structure Sharing: sig type closureRef = closureRef type loadType = loadType type opSize = opSize type logicalOp = logicalOp type floatSize = floatSize type shiftDirection = shiftDirection type multKind = multKind type fpUnary = fpUnary type fpBinary = fpBinary type unscaledType = unscaledType type condSet = condSet type bitfieldKind = bitfieldKind type brRegType = brRegType type precode = precode type xReg = xReg type vReg = vReg type precodeLabel = precodeLabel type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale type instr = instr end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64SEQUENCES.sig b/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64SEQUENCES.sig deleted file mode 100644 index 767a0fe9..00000000 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ARM64SEQUENCES.sig +++ /dev/null @@ -1,47 +0,0 @@ -(* - Copyright (c) 2021 David C. J. Matthews - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - Licence version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public Licence for more details. - - You should have received a copy of the GNU Lesser General Public - Licence along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*) - -signature ARM64SEQUENCES = -sig - type instr and xReg and vReg - - (* Sequences used in both the main code-generator and the FFI code*) - - (* Copy a value to another register. *) - val moveRegToReg: {sReg: xReg, dReg: xReg} -> instr - - (* Load a non-address constant. Tries to use movz/movn/movk if - that can be done easily, othewise uses loadNonAddressConstant to - load the value from the non-address constant area. *) - val loadNonAddress: xReg * Word64.word -> instr list - - (* Boxing operations. Floats are tagged rather than boxed in native 64-bit. - The destination is a poly address. *) - val boxDouble: - {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} -> instr list - and boxFloat: - {source: vReg, destination: xReg, workReg: xReg, saveRegs: xReg list} -> instr list - and boxSysWord: - {source: xReg, destination: xReg, workReg: xReg, saveRegs: xReg list} -> instr list - - structure Sharing: - sig - type instr = instr - type xReg = xReg - type vReg = vReg - end -end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml index 43adacf4..0b791ac9 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64ForeignCall.sml @@ -1,860 +1,854 @@ (* Copyright (c) 2021-2 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor Arm64ForeignCall( structure CodeArray: CODEARRAY and Arm64PreAssembly: ARM64PREASSEMBLY - and Arm64Assembly: ARM64ASSEMBLY and Debug: DEBUG - and Arm64Sequences: ARM64SEQUENCES - sharing CodeArray.Sharing = Arm64Assembly.Sharing = Arm64Sequences.Sharing + sharing CodeArray.Sharing = Arm64PreAssembly.Sharing ): FOREIGNCALL = struct - open CodeArray Arm64Assembly Arm64Sequences + open CodeArray Arm64PreAssembly exception InternalError = Misc.InternalError and Foreign = Foreign.Foreign datatype fastArgs = FastArgFixed | FastArgDouble | FastArgFloat val makeEntryPoint: string -> machineWord = RunCall.rtsCallFull1 "PolyCreateEntryPointObject" (* Turn an index into an absolute address. *) fun indexToAbsoluteAddress(iReg, absReg) = if is32in64 - then [addShiftedReg{regM=iReg, regN=X_Base32in64, regD=absReg, shift=ShiftLSL 0w2}] + then [AddShiftedReg{regM=iReg, regN=X_Base32in64, regD=absReg, shift=ShiftLSL 0w2, opSize=OpSize64, setFlags=false}] else if iReg = absReg then [] - else [moveRegToReg{sReg=iReg, dReg=absReg}] + else [MoveXRegToXReg{sReg=iReg, dReg=absReg}] fun unboxDouble(addrReg, workReg, valReg) = if is32in64 then indexToAbsoluteAddress(addrReg, workReg) @ - [loadRegScaledDouble{regT=valReg, regN=workReg, unitOffset=0}] - else [loadRegScaledDouble{regT=valReg, regN=addrReg, unitOffset=0}] + [LoadFPRegScaled{regT=valReg, regN=workReg, unitOffset=0, floatSize=Double64}] + else [LoadFPRegScaled{regT=valReg, regN=addrReg, unitOffset=0, floatSize=Double64}] fun unboxOrUntagSingle(addrReg, workReg, valReg) = if is32in64 - then [loadRegIndexedFloat{regN=X_Base32in64, regM=addrReg, regT=valReg, option=ExtUXTX ScaleOrShift}] + then [LoadFPRegIndexed{regN=X_Base32in64, regM=addrReg, regT=valReg, option=ExtUXTX ScaleOrShift, floatSize=Float32}] else [ - logicalShiftRight{shift=0w32, regN=addrReg, regD=workReg}, - moveGeneralToFloat{regN=workReg, regD=valReg} + shiftConstant{direction=ShiftRightLogical, shift=0w32, regN=addrReg, regD=workReg, opSize=OpSize64}, + MoveGeneralToFP{regN=workReg, regD=valReg, floatSize=Float32} ] fun boxOrTagFloat{floatReg, fixedReg, workReg, saveRegs} = if is32in64 - then boxFloat{source=floatReg, destination=fixedReg, workReg=workReg, saveRegs=saveRegs} + then List.rev(boxFloat({source=floatReg, destination=fixedReg, workReg=workReg, saveRegs=saveRegs}, [])) else [ - moveFloatToGeneral{regN=floatReg, regD=fixedReg}, - logicalShiftLeft{shift=0w32, regN=fixedReg, regD=fixedReg}, - bitwiseOrImmediate{regN=fixedReg, regD=fixedReg, bits=0w1} + MoveFPToGeneral{regN=floatReg, regD=fixedReg, floatSize=Float32}, + shiftConstant{direction=ShiftLeft, shift=0w32, regN=fixedReg, regD=fixedReg, opSize=OpSize64}, + BitwiseLogical{regN=fixedReg, regD=fixedReg, bits=0w1, logOp=LogOr, setFlags=false, opSize=OpSize64} ] (* Call the RTS. Previously this did not check for exceptions raised in the RTS and instead there was code added after each call. Doing it after the call doesn't affect the time taken but makes the code larger especially as this is needed in every arbitrary precision operation. Currently we clear the RTS exception packet field before the call. The field is cleared in "full" calls that may raise an exception but not in fast calls. They may not raise an exception but the packet may not have been cleared from a previous call. *) fun rtsCallFastGeneral (functionName, argFormats, resultFormat, debugSwitches) = let val entryPointAddr = makeEntryPoint functionName (* The maximum we currently have is five so we don't need to worry about stack args. *) fun loadArgs([], _, _, _) = [] | loadArgs(FastArgFixed :: argTypes, srcReg :: srcRegs, fixed :: fixedRegs, fpRegs) = if srcReg = fixed then loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) (* Already in the right reg *) - else moveRegToReg{sReg=srcReg, dReg=fixed} :: + else MoveXRegToXReg{sReg=srcReg, dReg=fixed} :: loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) | loadArgs(FastArgDouble :: argTypes, srcReg :: srcRegs, fixedRegs, fp :: fpRegs) = (* Unbox the value into a fp reg. *) unboxDouble(srcReg, srcReg, fp) @ loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) | loadArgs(FastArgFloat :: argTypes, srcReg :: srcRegs, fixedRegs, fp :: fpRegs) = (* Untag and move into the fp reg *) unboxOrUntagSingle(srcReg, srcReg, fp) @ loadArgs(argTypes, srcRegs, fixedRegs, fpRegs) | loadArgs _ = raise InternalError "rtsCall: Too many arguments" val noRTSException = createLabel() val instructions = loadArgs(argFormats, (* ML Arguments *) [X0, X1, X2, X3, X4, X5, X6, X7], (* C fixed pt args *) [X0, X1, X2, X3, X4, X5, X6, X7], (* C floating pt args *) [V0, V1, V2, V3, V4, V5, V6, V7]) @ (* Clear the RTS exception state. *) - loadNonAddress(X16, 0w1) @ + LoadNonAddr(X16, 0w1) :: [ - storeRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset}, + StoreRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset, loadType=Load64}, (* Move X30 to X23, a callee-save register. *) (* Note: maybe we should push X24 just in case this is the only reachable reference to the code. *) - orrShiftedReg{regN=XZero, regM=X_LinkReg, regD=X23, shift=ShiftNone}, - loadAddressConstant(X16, entryPointAddr) (* Load entry point *) + LogicalShiftedReg{regN=XZero, regM=X_LinkReg, regD=X23, shift=ShiftNone, logOp=LogOr, opSize=OpSize64, setFlags=false}, + LoadAddr(X16, entryPointAddr) (* Load entry point *) ] @ indexToAbsoluteAddress(X16, X16) @ [ - loadRegScaled{regT=X16, regN=X16, unitOffset=0}, (* Load the actual address. *) + LoadRegScaled{regT=X16, regN=X16, unitOffset=0, loadType=Load64}, (* Load the actual address. *) (* Store the current heap allocation pointer. *) - storeRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, + StoreRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset, loadType=Load64}, (* For the moment save and restore the ML stack pointer. No RTS call should change it and it's callee-save but just in case... *) - storeRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, - branchAndLinkReg X16, (* Call the function. *) + StoreRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset, loadType=Load64}, + BranchReg{regD=X16, brRegType=BRRAndLink}, (* Call the function. *) (* Restore the ML stack pointer. *) - loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, + LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset, loadType=Load64}, (* Load the heap allocation ptr and limit. We could have GCed in the RTS call. *) - loadRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, - loadRegScaled{regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset}, + LoadRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset, loadType=Load64}, + LoadRegScaled{regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset, loadType=Load64}, (* Check the RTS exception. *) - loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset}, - subSImmediate{regN=X16, regD=XZero, immed=0w1, shifted=false}, - conditionalBranch(CondEqual, noRTSException), + LoadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset, loadType=Load64}, + SubImmediate{regN=X16, regD=XZero, immed=0w1, shifted=false, setFlags=true, opSize=OpSize64}, + ConditionalBranch(CondEqual, noRTSException), (* If it isn't then raise the exception. *) - moveRegToReg{sReg=X16, dReg=X0}, - loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, - loadRegScaled{regT=X16, regN=X_MLStackPtr, unitOffset=0}, - branchRegister X16, - setLabel noRTSException + MoveXRegToXReg{sReg=X16, dReg=X0}, + LoadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset, loadType=Load64}, + LoadRegScaled{regT=X16, regN=X_MLStackPtr, unitOffset=0, loadType=Load64}, + BranchReg{regD=X16, brRegType=BRRBranch}, + SetLabel noRTSException ] @ ( case resultFormat of FastArgFixed => [] - | FastArgDouble => (* This must be boxed. *) boxDouble{source=V0, destination=X0, workReg=X1, saveRegs=[]} + | FastArgDouble => (* This must be boxed. *) List.rev(boxDouble({source=V0, destination=X0, workReg=X1, saveRegs=[]}, [])) | FastArgFloat => (* This must be tagged or boxed *) boxOrTagFloat{floatReg=V0, fixedReg=X0, workReg=X1, saveRegs=[]} ) @ [ - returnRegister X23 + BranchReg{regD=X23, brRegType=BRRReturn} ] val closure = makeConstantClosure() - val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure, - profileObject=createProfileObject()} + val () = generateFinalCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure, + profileObject=createProfileObject(), labelCount=1} in closureAsAddress closure end fun rtsCallFast (functionName, nArgs, debugSwitches) = rtsCallFastGeneral (functionName, List.tabulate(nArgs, fn _ => FastArgFixed), FastArgFixed, debugSwitches) (* RTS call with one double-precision floating point argument and a floating point result. *) fun rtsCallFastRealtoReal (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgDouble], FastArgDouble, debugSwitches) (* RTS call with two double-precision floating point arguments and a floating point result. *) fun rtsCallFastRealRealtoReal (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgDouble, FastArgDouble], FastArgDouble, debugSwitches) (* RTS call with one double-precision floating point argument, one fixed point argument and a floating point result. *) fun rtsCallFastRealGeneraltoReal (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgDouble, FastArgFixed], FastArgDouble, debugSwitches) (* RTS call with one general (i.e. ML word) argument and a floating point result. This is used only to convert arbitrary precision values to floats. *) fun rtsCallFastGeneraltoReal (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgFixed], FastArgDouble, debugSwitches) (* Operations on Real32.real values. *) fun rtsCallFastFloattoFloat (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgFloat], FastArgFloat, debugSwitches) fun rtsCallFastFloatFloattoFloat (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgFloat, FastArgFloat], FastArgFloat, debugSwitches) (* RTS call with one double-precision floating point argument, one fixed point argument and a floating point result. *) fun rtsCallFastFloatGeneraltoFloat (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgFloat, FastArgFixed], FastArgFloat, debugSwitches) (* RTS call with one general (i.e. ML word) argument and a floating point result. This is used only to convert arbitrary precision values to floats. *) fun rtsCallFastGeneraltoFloat (functionName, debugSwitches) = rtsCallFastGeneral (functionName, [FastArgFixed], FastArgFloat, debugSwitches) (* There is only one ABI value. *) datatype abi = ARM64Abi fun abiList () = [("default", ARM64Abi)] fun alignUp(s, align) = Word.andb(s + align-0w1, ~ align) val getThreadDataCall = makeEntryPoint "PolyArm64GetThreadData" (* This must match the type in Foreign.LowLevel. Once this is bootstrapped we could use that type but note that this is the type we use within the compiler and we build Foreign.LowLevel AFTER compiling this. *) datatype cTypeForm = CTypeFloatingPt | CTypePointer | CTypeSignedInt | CTypeUnsignedInt | CTypeStruct of cType list | CTypeVoid withtype cType = { typeForm: cTypeForm, align: word, size: word } (* Load a byte, halfword, word or long *) fun loadAlignedValue(reg, base, offset, size) = let val _ = offset mod size = 0w0 orelse raise InternalError "loadAlignedValue: not aligned" val loadOp = case size of - 0w8 => loadRegScaled - | 0w4 => loadRegScaled32 - | 0w2 => loadRegScaled16 - | 0w1 => loadRegScaledByte + 0w8 => Load64 + | 0w4 => Load32 + | 0w2 => Load16 + | 0w1 => Load8 | _ => raise InternalError "loadAlignedValue: invalid length" in - loadOp{regT=reg, regN=base, unitOffset=Word.toInt(offset div size)} + LoadRegScaled{regT=reg, regN=base, unitOffset=Word.toInt(offset div size), loadType=loadOp} end (* Store a register into upto 8 bytes. Most values will involve a single store but odd-sized structs can require shifts and multiple stores. N.B. May modify the source register. *) and storeUpTo8(reg, base, offset, size) = let val storeOp = - if size = 0w8 then storeRegUnscaled else if size >= 0w4 then storeRegUnscaled32 - else if size >= 0w2 then storeRegUnscaled16 else storeRegUnscaledByte + if size = 0w8 then Load64 else if size >= 0w4 then Load32 + else if size >= 0w2 then Load16 else Load8 in - [storeOp{regT=reg, regN=base, byteOffset=offset}] + [StoreRegUnscaled{regT=reg, regN=base, byteOffset=offset, loadType=storeOp, unscaledType=NoUpdate}] end @ ( if size = 0w6 orelse size = 0w7 then [ - logicalShiftRight{regN=reg, regD=reg, shift=0w32 }, - storeRegUnscaled16{regT=reg, regN=base, byteOffset=offset+4} + shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=0w32, opSize=OpSize64 }, + StoreRegUnscaled{regT=reg, regN=base, byteOffset=offset+4, loadType=Load16, unscaledType=NoUpdate} ] else [] ) @ ( if size = 0w3 orelse size = 0w5 orelse size = 0w7 then [ - logicalShiftRight{regN=reg, regD=reg, shift=(size-0w1)*0w8 }, - storeRegUnscaledByte{regT=reg, regN=base, byteOffset=offset+Word.toInt(size-0w1)} + shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=(size-0w1)*0w8, opSize=OpSize64 }, + StoreRegUnscaled{regT=reg, regN=base, byteOffset=offset+Word.toInt(size-0w1), loadType=Load8, unscaledType=NoUpdate} ] else [] ) (* Extract the elements of structures. *) fun unwrap(CTypeStruct ctypes, _) = List.foldr(fn({typeForm, size, ...}, l) => unwrap(typeForm, size) @ l) [] ctypes | unwrap (ctype, size) = [(ctype, size)] (* Structures of up to four floating point values of the same precision are treated specially. *) datatype argClass = ArgClassHFA of Word8.word * bool (* 1 - 4 floating pt values *) | ArgLargeStruct (* > 16 bytes and not an HFA *) | ArgSmall (* Scalars or small structures *) fun classifyArg(ctype, size) = case unwrap (ctype, size) of [(CTypeFloatingPt, 0w4)] => ArgClassHFA(0w1, false) | [(CTypeFloatingPt, 0w4), (CTypeFloatingPt, 0w4)] => ArgClassHFA(0w2, false) | [(CTypeFloatingPt, 0w4), (CTypeFloatingPt, 0w4), (CTypeFloatingPt, 0w4)] => ArgClassHFA(0w3, false) | [(CTypeFloatingPt, 0w4), (CTypeFloatingPt, 0w4), (CTypeFloatingPt, 0w4), (CTypeFloatingPt, 0w4)] => ArgClassHFA(0w4, false) | [(CTypeFloatingPt, 0w8)] => ArgClassHFA(0w1, true) | [(CTypeFloatingPt, 0w8), (CTypeFloatingPt, 0w8)] => ArgClassHFA(0w2, true) | [(CTypeFloatingPt, 0w8), (CTypeFloatingPt, 0w8), (CTypeFloatingPt, 0w8)] => ArgClassHFA(0w3, true) | [(CTypeFloatingPt, 0w8), (CTypeFloatingPt, 0w8), (CTypeFloatingPt, 0w8), (CTypeFloatingPt, 0w8)] => ArgClassHFA(0w4, true) | _ => if size > 0w16 then ArgLargeStruct else ArgSmall (* Can we load this in a single instruction? *) fun alignedLoadStore(_, 0w1) = true | alignedLoadStore(addr, 0w2) = addr mod 0w2 = 0w0 | alignedLoadStore(addr, 0w4) = addr mod 0w4 = 0w0 | alignedLoadStore(addr, 0w8) = addr mod 0w8 = 0w0 | alignedLoadStore(addr, 0w16) = addr mod 0w8 = 0w0 (* Can use load-pair. *) | alignedLoadStore _ = false (* This builds a piece of code that takes three arguments and returns a unit result. All three arguments are SysWord.word values i.e. ML addresses containing the address of the actual C value. The first argument (X0) is the address of the function to call. The second argument (X1) points to a struct that contains the argument(s) for the function. The arguments have to be unpacked from the struct into the appropriate registers or to the C stack. The third argument (X2) points to a piece of memory to receive the result of the call. It may be empty if the function returns void. It may only be as big as required for the result type. *) fun foreignCall(_: abi, args: cType list, result: cType): Address.machineWord = let val resultAreaPtr = X19 (* Unboxed value from X2 - This is callee save. *) val argPtrReg = X9 (* A scratch register that isn't used for arguments. *) val entryPtReg = X16 (* Contains the address of the function to call. *) val argWorkReg = X10 (* Used in loading arguments if necessary. *) and argWorkReg2 = X11 and structSpacePtr = X12 and argWorkReg3 = X13 and argWorkReg4 = X14 fun loadArgs([], stackOffset, _, _, _, code, largeStructSpace) = (code, stackOffset, largeStructSpace) | loadArgs(arg::args, stackOffset, argOffset, gRegNo, fpRegNo, code, largeStructSpace) = let val {size, align, typeForm, ...} = arg val newArgOffset = alignUp(argOffset, align) in case classifyArg(typeForm, size) of ArgClassHFA(numItems, isDouble) => if fpRegNo + numItems <= 0w8 then let val scale = if isDouble then 0w8 else 0w4 (* Load the values to the floating point registers. *) fun loadFPRegs(0w0, _, _) = [] | loadFPRegs(0w1, fpRegNo, offset) = - [(if isDouble then loadRegScaledDouble else loadRegScaledFloat) - {regT=VReg fpRegNo, regN=argPtrReg, unitOffset=offset}] + [LoadFPRegScaled{regT=VReg fpRegNo, regN=argPtrReg, unitOffset=offset, floatSize=if isDouble then Double64 else Float32}] | loadFPRegs(n, fpRegNo, offset) = - (if isDouble then loadPairOffsetDouble else loadPairOffsetFloat) - {regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=argPtrReg, unitOffset=offset} :: - loadFPRegs(n-0w2, fpRegNo+0w2, offset+2) + (LoadFPRegPair{regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=argPtrReg, unitOffset=offset, + floatSize=if isDouble then Double64 else Float32, unscaledType=NoUpdate} :: + loadFPRegs(n-0w2, fpRegNo+0w2, offset+2)) in loadArgs(args, stackOffset, newArgOffset+size, gRegNo, fpRegNo+numItems, loadFPRegs(numItems, fpRegNo, Word.toInt(newArgOffset div scale)) @ code, largeStructSpace) end else let (* If we have insufficient number of registers we discard any that are left and push the argument to the stack. *) (* The floating point value or structure is copied to the stack as a contiguous area. Use general registers to copy the data. It could be on a 4-byte alignment. In the typical case of a single floating point value this will just be a single load and store. *) fun copyData(0w0, _, _) = [] | copyData(n, srcOffset, stackOffset) = if isDouble - then loadRegScaled{regT=argWorkReg2, regN=argPtrReg, unitOffset=srcOffset} :: - storeRegScaled{regT=argWorkReg2, regN=XSP, unitOffset=stackOffset} :: + then LoadRegScaled{loadType=Load64, regT=argWorkReg2, regN=argPtrReg, unitOffset=srcOffset} :: + StoreRegScaled{loadType=Load64, regT=argWorkReg2, regN=XSP, unitOffset=stackOffset} :: copyData(n-0w1, srcOffset+1, stackOffset+1) - else loadRegScaled32{regT=argWorkReg2, regN=argPtrReg, unitOffset=srcOffset} :: - storeRegScaled32{regT=argWorkReg2, regN=XSP, unitOffset=stackOffset} :: + else LoadRegScaled{loadType=Load32, regT=argWorkReg2, regN=argPtrReg, unitOffset=srcOffset} :: + StoreRegScaled{loadType=Load32, regT=argWorkReg2, regN=XSP, unitOffset=stackOffset} :: copyData(n-0w1, srcOffset+1, stackOffset+1) val copyToStack = if isDouble then copyData(numItems, Word.toInt(newArgOffset div 0w8), stackOffset) else copyData(numItems, Word.toInt(newArgOffset div 0w4), stackOffset*2) (* The overall size is rounded up to a multiple of 8 *) val newStackOffset = stackOffset + Word.toInt(alignUp(size, 0w8) div 0w8) in loadArgs(args, newStackOffset, newArgOffset+size, gRegNo, 0w8, copyToStack @ code, largeStructSpace) end | _ => let (* Load an aligned argument into one or two registers or copy it to the stack. *) fun loadArgumentValues(argSize, sourceOffset, sourceBase, newStructSpace, preCode) = if gRegNo <= 0w6 orelse (size <= 0w8 andalso gRegNo <= 0w7) then (* There are sufficient registers *) let val (loadInstr, nextGReg) = if argSize = 0w16 - then ([loadPairOffset{regT1=XReg gRegNo, regT2=XReg(gRegNo+0w1), + then ([LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=XReg gRegNo, regT2=XReg(gRegNo+0w1), regN=sourceBase, unitOffset=Word.toInt(sourceOffset div 0w8)}], gRegNo+0w2) else ([loadAlignedValue(XReg gRegNo, sourceBase, sourceOffset, size)], gRegNo+0w1) in loadArgs(args, stackOffset, newArgOffset+size, nextGReg, fpRegNo, preCode @ loadInstr @ code, newStructSpace) end else if argSize = 0w16 then loadArgs(args, stackOffset+2, newArgOffset+size, 0w8, fpRegNo, preCode @ - loadPairOffset{regT1=argWorkReg2, regT2=argWorkReg3, regN=sourceBase, unitOffset=Word.toInt(sourceOffset div 0w8)} :: - storePairOffset{regT1=argWorkReg2, regT2=argWorkReg3, regN=XSP, unitOffset=stackOffset} :: code, newStructSpace) + LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=argWorkReg2, regT2=argWorkReg3, regN=sourceBase, unitOffset=Word.toInt(sourceOffset div 0w8)} :: + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=argWorkReg2, regT2=argWorkReg3, regN=XSP, unitOffset=stackOffset} :: code, newStructSpace) else loadArgs(args, stackOffset+1, newArgOffset+size, 0w8, fpRegNo, preCode @ loadAlignedValue(argWorkReg2, sourceBase, sourceOffset, argSize) :: - storeRegScaled{regT=argWorkReg2, regN=XSP, unitOffset=stackOffset} :: code, newStructSpace) + StoreRegScaled{loadType=Load64, regT=argWorkReg2, regN=XSP, unitOffset=stackOffset} :: code, newStructSpace) in if alignedLoadStore(newArgOffset, size) then loadArgumentValues(size, newArgOffset, argPtrReg, largeStructSpace, []) else (* General case. Either a large structure or a small structure that can't easily be loaded, First copy it to the stack, and either pass the address or load it once it's aligned. *) let val newStructSpace = alignUp(largeStructSpace + size, 0w16) val loopLabel = createLabel() (* The address of the area we're copying to is in argRegNo. *) val argRegNo = if gRegNo < 0w8 then XReg gRegNo else argWorkReg (* Copy from the end back to the start. *) val copyToStructSpace = [ - addImmediate{regN=structSpacePtr, regD=argRegNo, immed=largeStructSpace, shifted=false}, - addImmediate{regN=argRegNo, regD=argWorkReg2, immed=size, shifted=false}, (* End of dest area *) - addImmediate{regN=argPtrReg, regD=argWorkReg3, immed=newArgOffset+size, shifted=false}, (* end of source *) - setLabel loopLabel, - loadRegPreIndexByte{regT=argWorkReg4, regN=argWorkReg3, byteOffset= ~1}, - storeRegPreIndexByte{regT=argWorkReg4, regN=argWorkReg2, byteOffset= ~1}, - subSShiftedReg{regM=argWorkReg2, regN=argRegNo, regD=XZero, shift=ShiftNone}, (* At start? *) - conditionalBranch(CondNotEqual, loopLabel) + AddImmediate{opSize=OpSize64, setFlags=false, regN=structSpacePtr, regD=argRegNo, immed=largeStructSpace, shifted=false}, + AddImmediate{opSize=OpSize64, setFlags=false, regN=argRegNo, regD=argWorkReg2, immed=size, shifted=false}, (* End of dest area *) + AddImmediate{opSize=OpSize64, setFlags=false, regN=argPtrReg, regD=argWorkReg3, immed=newArgOffset+size, shifted=false}, (* end of source *) + SetLabel loopLabel, + LoadRegUnscaled{loadType=Load8, unscaledType=PreIndex, regT=argWorkReg4, regN=argWorkReg3, byteOffset= ~1}, + StoreRegUnscaled{loadType=Load8, unscaledType=PreIndex, regT=argWorkReg4, regN=argWorkReg2, byteOffset= ~1}, + SubShiftedReg{opSize=OpSize64, setFlags=true, regM=argWorkReg2, regN=argRegNo, regD=XZero, shift=ShiftNone}, (* At start? *) + ConditionalBranch(CondNotEqual, loopLabel) ] in if size > 0w16 then (* Large struct - pass by reference *) ( if gRegNo < 0w8 then loadArgs(args, stackOffset, newArgOffset+size, gRegNo+0w1, fpRegNo, copyToStructSpace @ code, newStructSpace) else loadArgs(args, stackOffset+1, newArgOffset+size, 0w8, fpRegNo, - copyToStructSpace @ storeRegScaled{regT=argWorkReg, regN=XSP, unitOffset=stackOffset} :: code, + copyToStructSpace @ StoreRegScaled{loadType=Load64, regT=argWorkReg, regN=XSP, unitOffset=stackOffset} :: code, newStructSpace) ) else (* Small struct. Since it's now in an area at least 16 bytes and properly aligned we can load it. *) (* argRegNo points to where we copied it *) loadArgumentValues(if size > 0w8 then 0w16 else 0w8, 0w0, argRegNo, newStructSpace, copyToStructSpace) end end end local val {size, typeForm, ...} = result (* Store a result register into the result area. In almost all cases this is very simple: the only complication is with structs of odd sizes. *) fun storeResult(reg, offset, size) = storeUpTo8(reg, resultAreaPtr, offset, size) in val (getResult, passArgAddress) = if typeForm = CTypeVoid then ([], false) else case classifyArg(typeForm, size) of (* Floating point values are returned in s0-sn, d0-dn. *) ArgClassHFA(numItems, isDouble) => let fun storeFPRegs(0w0, _, _) = [] | storeFPRegs(0w1, fpRegNo, offset) = - [(if isDouble then storeRegScaledDouble else storeRegScaledFloat) - {regT=VReg fpRegNo, regN=resultAreaPtr, unitOffset=offset}] + [StoreFPRegScaled{regT=VReg fpRegNo, regN=resultAreaPtr, unitOffset=offset, floatSize=if isDouble then Double64 else Float32}] | storeFPRegs(n, fpRegNo, offset) = - (if isDouble then storePairOffsetDouble else storePairOffsetFloat) - {regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=resultAreaPtr, unitOffset=offset} :: - storeFPRegs(n-0w2, fpRegNo+0w2, offset+2) + StoreFPRegPair{regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=resultAreaPtr, unitOffset=offset, + floatSize=if isDouble then Double64 else Float32, unscaledType=NoUpdate} :: + storeFPRegs(n-0w2, fpRegNo+0w2, offset+2) in (storeFPRegs(numItems, 0w0 (* V0-Vn*), 0), false) end | ArgLargeStruct => ([], true) (* Structures larger than 16 bytes are passed by reference. *) | _ => if size = 0w16 - then ([storePairOffset{regT1=X0, regT2=X1, regN=resultAreaPtr, unitOffset=0}], false) + then ([StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X0, regT2=X1, regN=resultAreaPtr, unitOffset=0}], false) else if size > 0w8 - then (storeRegScaled{regT=X0, regN=resultAreaPtr, unitOffset=0} :: storeResult(X1, 8, size-0w8), false) + then (StoreRegScaled{loadType=Load64, regT=X0, regN=resultAreaPtr, unitOffset=0} :: storeResult(X1, 8, size-0w8), false) else (storeResult(X0, 0, size), false) end val (argCode, argStack, largeStructSpace) = loadArgs(args, 0, 0w0, 0w0, 0w0, if passArgAddress (* If we have to pass the address of the result struct it goes in X8. *) - then [moveRegToReg{sReg=resultAreaPtr, dReg=X8}] + then [MoveXRegToXReg{sReg=resultAreaPtr, dReg=X8}] else [], 0w0) val stackSpaceRequired = alignUp(Word.fromInt argStack * 0w8, 0w16) + largeStructSpace val instructions = [(* Push the return address to the stack. We could put it in a callee-save register but there's a very small chance that this could be the last reference to a piece of code. *) - storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}, + StoreRegUnscaled{loadType=Load64, unscaledType=PreIndex, regT=X30, regN=X_MLStackPtr, byteOffset= ~8}, (* Save heap ptr. Needed in case we have a callback. *) - storeRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset} + StoreRegScaled{loadType=Load64, regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset} ] @ indexToAbsoluteAddress(X0, X0) @ (* Load the entry point address. *) - loadRegScaled{regT=entryPtReg, regN=X0, unitOffset=0} :: + LoadRegScaled{loadType=Load64, regT=entryPtReg, regN=X0, unitOffset=0} :: ( (* Unbox the address of the result area into a callee save resgister. This is where the result will be stored on return if it is anything other than a struct. We have to put the C address in there now because an ML address wouldn't be updated by a possible GC in a callback. *) if #typeForm(result) <> CTypeVoid - then indexToAbsoluteAddress(X2, X2) @ [loadRegScaled{regT=resultAreaPtr, regN=X2, unitOffset=0}] + then indexToAbsoluteAddress(X2, X2) @ [LoadRegScaled{loadType=Load64, regT=resultAreaPtr, regN=X2, unitOffset=0}] else [] ) @ - [storeRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}] @ (* Save the stack pointer. *) + [StoreRegScaled{loadType=Load64, regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}] @ (* Save the stack pointer. *) ( if stackSpaceRequired = 0w0 then [] - else [subImmediate{regN=XSP, regD=XSP, immed=stackSpaceRequired, shifted=false}] + else [SubImmediate{opSize=OpSize64, setFlags=false, regN=XSP, regD=XSP, immed=stackSpaceRequired, shifted=false}] ) @ ( (* If we need to copy a struct load a register with a pointer to the area for it. *) if largeStructSpace = 0w0 then [] - else [addImmediate{regN=XSP, regD=structSpacePtr, immed=stackSpaceRequired-largeStructSpace, shifted=false}] + else [AddImmediate{opSize=OpSize64, setFlags=false, regN=XSP, regD=structSpacePtr, immed=stackSpaceRequired-largeStructSpace, shifted=false}] ) @ ( (* The second argument is a SysWord containing the address of a malloced area of memory with the actual arguments in it. *) if null args then [] - else indexToAbsoluteAddress(X1, X1) @ [loadRegScaled{regT=argPtrReg, regN=X1, unitOffset=0}] + else indexToAbsoluteAddress(X1, X1) @ [LoadRegScaled{loadType=Load64, regT=argPtrReg, regN=X1, unitOffset=0}] ) @ argCode @ - [branchAndLinkReg X16] @ (* Call the function. *) + [BranchReg{regD=X16, brRegType=BRRAndLink}] @ (* Call the function. *) (* Restore the C stack value in case it's been changed by a callback. *) ( if stackSpaceRequired = 0w0 then [] - else [addImmediate{regN=XSP, regD=XSP, immed=stackSpaceRequired, shifted=false}] + else [AddImmediate{opSize=OpSize64, setFlags=false, regN=XSP, regD=XSP, immed=stackSpaceRequired, shifted=false}] ) @ [ (* Reload the ML stack pointer even though it's callee save. If we've made a callback the ML stack could have grown and so moved to a different address. *) - loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, + LoadRegScaled{loadType=Load64, regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, (* Load the heap allocation ptr and limit in case of a callback. *) - loadRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, - loadRegScaled{regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset} + LoadRegScaled{loadType=Load64, regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, + LoadRegScaled{loadType=Load64, regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset} ] @ (* Store the result in the destination. *) getResult @ (* Pop the return address and return. *) - [ loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}, returnRegister X30 ] + [ LoadRegUnscaled{regT=X30, regN=X_MLStackPtr, byteOffset= 8, loadType=Load64, unscaledType=PostIndex}, BranchReg{regD=X30,brRegType=BRRReturn} ] val functionName = "foreignCall" val debugSwitches = [(*Universal.tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), Universal.tagInject Debug.assemblyCodeTag true*)] val closure = makeConstantClosure() - val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure, - profileObject=createProfileObject()} + val () = generateFinalCode{instrs=instructions, name=functionName, parameters=debugSwitches, resultClosure=closure, + profileObject=createProfileObject(), labelCount=0} in closureAsAddress closure end (* Build a callback function. The arguments are the abi, the list of argument types and the result type. The result is an ML function that takes an ML function, f, as its argument, registers it as a callback and returns the C function as its result. When the C function is called the arguments are copied into temporary memory and the vector passed to f along with the address of the memory for the result. "f" stores the result in it when it returns and the result is then passed back as the result of the callback. N.B. This returns a closure cell which contains the address of the code. It can be used as a SysWord.word value except that while it exists the code will not be GCed. *) fun buildCallBack(_: abi, args: cType list, result: cType): Address.machineWord = let val argWorkReg = X10 (* Used in loading arguments if necessary. *) and argWorkReg2 = X11 and argWorkReg3 = X13 and argWorkReg4 = X14 (* The stack contains a 32-byte result area then an aligned area for the arguments. *) (* Store the argument values to the structure that will be passed to the ML callback function. *) (* Note. We've loaded the frame pointer with the original stack ptr-96 so we can access any stack arguments from that. *) fun moveArgs([], _, _, _, _, moveFromStack) = moveFromStack | moveArgs(arg::args, stackSpace, argOffset, gRegNo, fpRegNo, moveFromStack) = let val {size, align, typeForm, ...} = arg val newArgOffset = alignUp(argOffset, align) in case classifyArg(typeForm, size) of ArgClassHFA(numItems, isDouble) => if fpRegNo + numItems <= 0w8 then let val scale = if isDouble then 0w8 else 0w4 (* Store the values from the FP registers. *) fun storeFPRegs(0w0, _, _) = [] | storeFPRegs(0w1, fpRegNo, offset) = - [(if isDouble then storeRegScaledDouble else storeRegScaledFloat) - {regT=VReg fpRegNo, regN=XSP, unitOffset=offset}] + [StoreFPRegScaled{regT=VReg fpRegNo, regN=XSP, unitOffset=offset, floatSize=if isDouble then Double64 else Float32}] | storeFPRegs(n, fpRegNo, offset) = - (if isDouble then storePairOffsetDouble else storePairOffsetFloat) - {regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=XSP, unitOffset=offset} :: + StoreFPRegPair{regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=XSP, unitOffset=offset, + floatSize=if isDouble then Double64 else Float32, unscaledType=NoUpdate} :: storeFPRegs(n-0w2, fpRegNo+0w2, offset+2) in moveArgs(args, stackSpace, newArgOffset+size, gRegNo, fpRegNo+numItems, storeFPRegs(numItems, fpRegNo, Word.toInt(newArgOffset div scale)) @ moveFromStack) end else let (* Load the arguments from the stack and store into the result area. *) fun copyData(0w0, _, _) = [] | copyData(n, dstOffset, stackOffset) = if isDouble - then loadRegScaled{regT=argWorkReg2, regN=X29, unitOffset=stackOffset} :: - storeRegScaled{regT=argWorkReg2, regN=XSP, unitOffset=dstOffset} :: + then LoadRegScaled{loadType=Load64, regT=argWorkReg2, regN=X29, unitOffset=stackOffset} :: + StoreRegScaled{loadType=Load64, regT=argWorkReg2, regN=XSP, unitOffset=dstOffset} :: copyData(n-0w1, dstOffset+1, stackOffset+1) - else loadRegScaled32{regT=argWorkReg2, regN=X29, unitOffset=stackOffset} :: - storeRegScaled32{regT=argWorkReg2, regN=XSP, unitOffset=dstOffset} :: + else LoadRegScaled{loadType=Load32, regT=argWorkReg2, regN=X29, unitOffset=stackOffset} :: + StoreRegScaled{loadType=Load32, regT=argWorkReg2, regN=XSP, unitOffset=dstOffset} :: copyData(n-0w1, dstOffset+1, stackOffset+1) val copyFromStack = if isDouble then copyData(numItems, Word.toInt(newArgOffset div 0w8), stackSpace) else copyData(numItems, Word.toInt(newArgOffset div 0w4), stackSpace*2) (* The overall size is rounded up to a multiple of 8 *) val newStackOffset = stackSpace + Word.toInt(alignUp(size, 0w8) div 0w8) in moveArgs(args, newStackOffset, newArgOffset+size, gRegNo, 0w8, copyFromStack @ moveFromStack) end | _ => if alignedLoadStore(newArgOffset, size) andalso (gRegNo <= 0w6 orelse gRegNo = 0w7 andalso size <= 0w8) then (* Usual case: argument passed in one or two registers. *) ( if size > 0w8 then moveArgs(args, stackSpace, newArgOffset+size, gRegNo + 0w2, fpRegNo, - storePairOffset{regT1=XReg gRegNo, regT2=XReg(gRegNo+0w1), regN=XSP, + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=XReg gRegNo, regT2=XReg(gRegNo+0w1), regN=XSP, unitOffset=Word.toInt(newArgOffset div 0w8)} :: moveFromStack) else moveArgs(args, stackSpace, newArgOffset+size, gRegNo + 0w1, fpRegNo, storeUpTo8(XReg gRegNo, XSP, Word.toInt newArgOffset, size) @ moveFromStack) ) else (* General case. Store the argument registers if necessary and then use a byte copy to copy into the argument area. This sorts out any odd alignments or lengths. In some cases the source will be in memory already. *) let (* The source is either the register value or the value on the stack. *) val (argRegNo, nextGReg, newStack, loadArg) = if size > 0w16 then ( if gRegNo < 0w8 then (XReg gRegNo, gRegNo + 0w1, stackSpace, []) - else (argWorkReg, 0w8, stackSpace+1, [loadRegScaled{regT=argWorkReg, regN=X29, unitOffset=stackSpace}]) + else (argWorkReg, 0w8, stackSpace+1, [LoadRegScaled{loadType=Load64, regT=argWorkReg, regN=X29, unitOffset=stackSpace}]) ) else let val regsNeeded = if size > 0w8 then 0w2 else 0w1 in if gRegNo + regsNeeded <= 0w8 then (XReg gRegNo, gRegNo+regsNeeded, stackSpace, [if size > 0w8 - then storePairOffset{regT1=XReg gRegNo, regT2=XReg(gRegNo+0w1), regN=XSP, unitOffset=2} - else storeRegScaled{regT=XReg gRegNo, regN=XSP, unitOffset=2}, - addImmediate{regD=XReg gRegNo, regN=XSP, immed=0w16, shifted=false}]) + then StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=XReg gRegNo, regT2=XReg(gRegNo+0w1), regN=XSP, unitOffset=2} + else StoreRegScaled{loadType=Load64, regT=XReg gRegNo, regN=XSP, unitOffset=2}, + AddImmediate{opSize=OpSize64, setFlags=false, regD=XReg gRegNo, regN=XSP, immed=0w16, shifted=false}]) else (* Being passed on the stack *) (argWorkReg, 0w8, stackSpace+Word8.toInt regsNeeded, - [addImmediate{regD=argWorkReg, regN=X29, immed=Word.fromInt stackSpace*0w8, shifted=false}]) + [AddImmediate{opSize=OpSize64, setFlags=false, regD=argWorkReg, regN=X29, immed=Word.fromInt stackSpace*0w8, shifted=false}]) end val loopLabel = createLabel() val copyCode = [ - addImmediate{regN=argRegNo, regD=argWorkReg3, immed=size, shifted=false}, (* End of source area *) - addImmediate{regN=XSP, regD=argWorkReg2, immed=newArgOffset+size, shifted=false}, (* end of dest *) - setLabel loopLabel, - loadRegPreIndexByte{regT=argWorkReg4, regN=argWorkReg3, byteOffset= ~1}, - storeRegPreIndexByte{regT=argWorkReg4, regN=argWorkReg2, byteOffset= ~1}, - subSShiftedReg{regM=argWorkReg3, regN=argRegNo, regD=XZero, shift=ShiftNone}, (* At start? *) - conditionalBranch(CondNotEqual, loopLabel) + AddImmediate{opSize=OpSize64, setFlags=false, regN=argRegNo, regD=argWorkReg3, immed=size, shifted=false}, (* End of source area *) + AddImmediate{opSize=OpSize64, setFlags=false, regN=XSP, regD=argWorkReg2, immed=newArgOffset+size, shifted=false}, (* end of dest *) + SetLabel loopLabel, + LoadRegUnscaled{loadType=Load8, unscaledType=PreIndex, regT=argWorkReg4, regN=argWorkReg3, byteOffset= ~1}, + StoreRegUnscaled{loadType=Load8, unscaledType=PreIndex, regT=argWorkReg4, regN=argWorkReg2, byteOffset= ~1}, + SubShiftedReg{opSize=OpSize64, setFlags=true, regM=argWorkReg3, regN=argRegNo, regD=XZero, shift=ShiftNone}, (* At start? *) + ConditionalBranch(CondNotEqual, loopLabel) ] in moveArgs(args, newStack, newArgOffset+size, nextGReg, fpRegNo, loadArg @ copyCode @ moveFromStack) end end val copyArgsFromRegsAndStack = moveArgs(args, 12 (* Offset to first stack arg *), 0w32 (* Size of result area *), 0w0, 0w0, []) local fun getNextSize (arg, argOffset) = let val {size, align, ...} = arg in alignUp(argOffset, align) + size end in val argumentSpace = alignUp(List.foldl getNextSize 0w0 args, 0w16) end local val {size, typeForm, ...} = result in (* Load the results from the result area except that if we're passing the result structure by reference this is done by the caller. Generally similar to how arguments are passed in a call. *) val (loadResults, resultByReference) = if typeForm = CTypeVoid then ([], false) else case classifyArg(typeForm, size) of ArgClassHFA(numItems, isDouble) => let (* Load the values to the floating point registers. *) fun loadFPRegs(0w0, _, _) = [] | loadFPRegs(0w1, fpRegNo, offset) = - [(if isDouble then loadRegScaledDouble else loadRegScaledFloat) - {regT=VReg fpRegNo, regN=XSP, unitOffset=offset}] + [LoadFPRegScaled{regT=VReg fpRegNo, regN=XSP, unitOffset=offset, floatSize=if isDouble then Double64 else Float32}] | loadFPRegs(n, fpRegNo, offset) = - (if isDouble then loadPairOffsetDouble else loadPairOffsetFloat) - {regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=XSP, unitOffset=offset} :: + LoadFPRegPair{regT1=VReg fpRegNo, regT2=VReg(fpRegNo+0w1), regN=XSP, unitOffset=offset, unscaledType=NoUpdate, + floatSize=if isDouble then Double64 else Float32} :: loadFPRegs(n-0w2, fpRegNo+0w2, offset+2) in (loadFPRegs(numItems, 0w0, 0 (* result area *)), false) end | ArgLargeStruct => ([], true) (* Structures larger than 16 bytes are passed by reference. *) | _ => (* We've allocated a 32-byte area aligned onto a 16-byte boundary so we can simply load one or two registers. *) if size > 0w8 - then ([loadPairOffset{regT1=X0, regT2=X1, regN=XSP, unitOffset=0}], false) - else ([loadRegScaled{regT=X0, regN=XSP, unitOffset=0}], false) + then ([LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X0, regT2=X1, regN=XSP, unitOffset=0}], false) + else ([LoadRegScaled{loadType=Load64, regT=X0, regN=XSP, unitOffset=0}], false) end val instructions = [ (* Push LR, FP and the callee-save registers. *) - storePairPreIndexed{regT1=X29, regT2=X30, regN=XSP, unitOffset= ~12}, - moveRegToReg{sReg=XSP, dReg=X29}, - storePairOffset{regT1=X19, regT2=X20, regN=X29, unitOffset=2}, - storePairOffset{regT1=X21, regT2=X22, regN=X29, unitOffset=4}, - storePairOffset{regT1=X23, regT2=X24, regN=X29, unitOffset=6}, - storePairOffset{regT1=X25, regT2=X26, regN=X29, unitOffset=8}, - storePairOffset{regT1=X27, regT2=X28, regN=X29, unitOffset=10}, + StoreRegPair{loadType=Load64, unscaledType=PreIndex, regT1=X29, regT2=X30, regN=XSP, unitOffset= ~12}, + MoveXRegToXReg{sReg=XSP, dReg=X29}, + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X19, regT2=X20, regN=X29, unitOffset=2}, + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X21, regT2=X22, regN=X29, unitOffset=4}, + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X23, regT2=X24, regN=X29, unitOffset=6}, + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X25, regT2=X26, regN=X29, unitOffset=8}, + StoreRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X27, regT2=X28, regN=X29, unitOffset=10}, (* Reserve space for the arguments and results. *) - subImmediate{regN=XSP, regD=XSP, immed=argumentSpace+0w32, shifted=false}, + SubImmediate{opSize=OpSize64, setFlags=false, regN=XSP, regD=XSP, immed=argumentSpace+0w32, shifted=false}, (* We passed the function we're calling in X9 but we need to move it to a callee-save register before we call the RTS. *) - moveRegToReg{sReg=X9, dReg=X20} + MoveXRegToXReg{sReg=X9, dReg=X20} ] @ (* Save X8 if we're going to need it. *) - (if resultByReference then [storeRegScaled{regT=X8, regN=XSP, unitOffset=0}] else []) @ + (if resultByReference then [StoreRegScaled{loadType=Load64, regT=X8, regN=XSP, unitOffset=0}] else []) @ (* Now we've saved X24 we can move the global heap base into it. *) - (if is32in64 then [moveRegToReg{sReg=X10, dReg=X_Base32in64}] else []) @ + (if is32in64 then [MoveXRegToXReg{sReg=X10, dReg=X_Base32in64}] else []) @ copyArgsFromRegsAndStack @ - [loadAddressConstant(X0, getThreadDataCall)] @ + [LoadAddr(X0, getThreadDataCall)] @ ( if is32in64 - then [addShiftedReg{regM=X0, regN=X_Base32in64, regD=X0, shift=ShiftLSL 0w2}] + then [AddShiftedReg{setFlags=false, opSize=OpSize64, regM=X0, regN=X_Base32in64, regD=X0, shift=ShiftLSL 0w2}] else [] ) @ [ (* Call into the RTS to get the thread data ptr. *) - loadRegScaled{regT=X0, regN=X0, unitOffset=0}, - branchAndLinkReg X0, - moveRegToReg{sReg=X0, dReg=X_MLAssemblyInt}, + LoadRegScaled{loadType=Load64, regT=X0, regN=X0, unitOffset=0}, + BranchReg{regD=X0, brRegType=BRRAndLink}, + MoveXRegToXReg{sReg=X0, dReg=X_MLAssemblyInt}, (* Load the ML regs. *) - loadRegScaled{regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset}, - loadRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, - loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, + LoadRegScaled{loadType=Load64, regT=X_MLHeapLimit, regN=X_MLAssemblyInt, unitOffset=heapLimitPtrOffset}, + LoadRegScaled{loadType=Load64, regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, + LoadRegScaled{loadType=Load64, regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset}, (* Prepare the arguments. They are both syswords so have to be boxed. First load the address of the argument area which is after the 32-byte result area. *) - addImmediate{regN=XSP, regD=X2, immed=0w32, shifted=false} - ] @ boxSysWord{source=X2, destination=X0, workReg=X3, saveRegs=[]} @ (* Address of arguments. *) + AddImmediate{opSize=OpSize64, setFlags=false, regN=XSP, regD=X2, immed=0w32, shifted=false} + ] @ List.rev(boxSysWord({source=X2, destination=X0, workReg=X3, saveRegs=[]}, [])) @ (* Address of arguments. *) ( (* Result area pointer. If we're returning by reference this is the original value of X8 otherwise it's the address of the 32 bytes we've reserved. *) if resultByReference - then [loadRegScaled{regT=X2, regN=XSP, unitOffset=0}] - else [moveRegToReg{sReg=XSP, dReg=X2}] - ) @ boxSysWord{source=X2, destination=X1, workReg=X3, saveRegs=[]} @ + then [LoadRegScaled{loadType=Load64, regT=X2, regN=XSP, unitOffset=0}] + else [MoveXRegToXReg{sReg=XSP, dReg=X2}] + ) @ List.rev(boxSysWord({source=X2, destination=X1, workReg=X3, saveRegs=[]}, [])) @ (* Put the ML closure pointer, originally in X9 now in X20, into the ML closure pointer register, X8. Then call the ML code. *) - [moveRegToReg{sReg=X20, dReg=X8}] @ + [MoveXRegToXReg{sReg=X20, dReg=X8}] @ ( if is32in64 then [ - addShiftedReg{regM=X8, regN=X_Base32in64, regD=X16, shift=ShiftLSL 0w2}, - loadRegScaled{regT=X16, regN=X16, unitOffset=0} + AddShiftedReg{regM=X8, regN=X_Base32in64, regD=X16, shift=ShiftLSL 0w2, opSize=OpSize64, setFlags=false}, + LoadRegScaled{loadType=Load64, regT=X16, regN=X16, unitOffset=0} ] - else [loadRegScaled{regT=X16, regN=X8, unitOffset=0}] + else [LoadRegScaled{loadType=Load64, regT=X16, regN=X8, unitOffset=0}] ) @ [ - branchAndLinkReg X16, + BranchReg{regD=X16, brRegType=BRRAndLink}, (* Save the ML stack and heap pointers. We could have allocated or grown the stack. The limit pointer is maintained by the RTS. *) - storeRegScaled{regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, - storeRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset} + StoreRegScaled{loadType=Load64, regT=X_MLHeapAllocPtr, regN=X_MLAssemblyInt, unitOffset=heapAllocPtrOffset}, + StoreRegScaled{loadType=Load64, regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=mlStackPtrOffset} ] @ loadResults @ (* Load the return values *) [ (* Restore the callee-save registers and return. *) - moveRegToReg{sReg=X29, dReg=XSP}, - loadPairOffset{regT1=X19, regT2=X20, regN=X29, unitOffset=2}, - loadPairOffset{regT1=X21, regT2=X22, regN=X29, unitOffset=4}, - loadPairOffset{regT1=X23, regT2=X24, regN=X29, unitOffset=6}, - loadPairOffset{regT1=X25, regT2=X26, regN=X29, unitOffset=8}, - loadPairOffset{regT1=X27, regT2=X28, regN=X29, unitOffset=10}, - loadPairPostIndexed{regT1=X29, regT2=X30, regN=XSP, unitOffset=12}, - returnRegister X30 + MoveXRegToXReg{sReg=X29, dReg=XSP}, + LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X19, regT2=X20, regN=X29, unitOffset=2}, + LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X21, regT2=X22, regN=X29, unitOffset=4}, + LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X23, regT2=X24, regN=X29, unitOffset=6}, + LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X25, regT2=X26, regN=X29, unitOffset=8}, + LoadRegPair{loadType=Load64, unscaledType=NoUpdate, regT1=X27, regT2=X28, regN=X29, unitOffset=10}, + LoadRegPair{loadType=Load64, unscaledType=PostIndex, regT1=X29, regT2=X30, regN=XSP, unitOffset=12}, + BranchReg{regD=X30, brRegType=BRRReturn} ] val functionName = "foreignCallBack(2)" val debugSwitches = [(*Universal.tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), Universal.tagInject Debug.assemblyCodeTag true*)] val closure = makeConstantClosure() - val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, - resultClosure=closure, profileObject=createProfileObject()} + val () = generateFinalCode{instrs=instructions, name=functionName, parameters=debugSwitches, + resultClosure=closure, profileObject=createProfileObject(), labelCount=0} val stage2Code = closureAsAddress closure fun resultFunction f = let (* Generate a small function to load the address of f into a register and then jump to stage2. The idea is that it should be possible to generate this eventually in a single RTS call. That could be done by using a version of this as a model. *) val instructions = if is32in64 then (* Get the global heap base into X10. *) - loadGlobalHeapBaseInCallback X10 @ [ - loadAddressConstant(X9, Address.toMachineWord f), + LoadGlobalHeapBaseInCallback X10, + LoadAddr(X9, Address.toMachineWord f), (* Have to load the actual address at run-time. *) - loadAddressConstant(X16, stage2Code), - addShiftedReg{regM=X16, regN=X10, regD=X16, shift=ShiftLSL 0w2}, - loadRegScaled{regT=X16, regN=X16, unitOffset=0}, - branchRegister X16 + LoadAddr(X16, stage2Code), + AddShiftedReg{setFlags=false, opSize=OpSize64, regM=X16, regN=X10, regD=X16, shift=ShiftLSL 0w2}, + LoadRegScaled{loadType=Load64, regT=X16, regN=X16, unitOffset=0}, + BranchReg{regD=X16, brRegType=BRRBranch} ] else let (* We can extract the actual code address in the native address version. *) val codeAddress = Address.loadWord(Address.toAddress stage2Code, 0w0) in [ - loadAddressConstant(X9, Address.toMachineWord f), - loadAddressConstant(X16, codeAddress), - branchRegister X16 + LoadAddr(X9, Address.toMachineWord f), + LoadAddr(X16, codeAddress), + BranchReg{regD=X16, brRegType=BRRBranch} ] end val functionName = "foreignCallBack(1)" val debugSwitches = [(*Universal.tagInject Pretty.compilerOutputTag (Pretty.prettyPrint(print, 70)), Universal.tagInject Debug.assemblyCodeTag true*)] val closure = makeConstantClosure() - val () = generateCode{instrs=instructions, name=functionName, parameters=debugSwitches, - resultClosure=closure, profileObject=createProfileObject()} + val () = generateFinalCode{instrs=instructions, name=functionName, parameters=debugSwitches, + resultClosure=closure, profileObject=createProfileObject(), labelCount=0} val res = closureAsAddress closure (*val _ = print("Address is " ^ (LargeWord.toString(RunCall.unsafeCast res)) ^ "\n")*) in res end in Address.toMachineWord resultFunction end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml deleted file mode 100644 index f062ad5f..00000000 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64GenCode.sml +++ /dev/null @@ -1,2950 +0,0 @@ -(* - Copyright (c) 2021 David C. J. Matthews - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - Licence version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public Licence for more details. - - You should have received a copy of the GNU Lesser General Public - Licence along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*) - -functor Arm64GenCode ( - structure BackendTree: BACKENDINTERMEDIATECODE - and CodeArray: CODEARRAY - and Arm64Assembly: ARM64ASSEMBLY - and Debug: DEBUG - and Arm64Foreign: FOREIGNCALL - and Arm64Sequences: ARM64SEQUENCES - - sharing BackendTree.Sharing = CodeArray.Sharing = Arm64Assembly.Sharing = Arm64Sequences.Sharing - -) : GENCODE = -struct - - open BackendTree CodeArray Arm64Assembly Address Arm64Sequences - - open BuiltIns - - exception InternalError = Misc.InternalError - - (* tag a short constant *) - fun tag c = 2 * c + 1 - and semitag c = 2*c - - fun taggedWord w: word = w * 0w2 + 0w1 - and taggedWord64 w: Word64.word = w * 0w2 + 0w1 - - val tagBitMask = Word64.<<(Word64.fromInt ~1, 0w1) - val tagBitMask32 = Word64.andb(tagBitMask, 0wxffffffff) - - fun gen(instr, code) = code := instr :: !code - - fun genList([], _) = () - | genList(instr :: instrs, code) = (gen(instr, code); genList(instrs, code)) - - fun genPushReg(reg, code) = gen(storeRegPreIndex{regT=reg, regN=X_MLStackPtr, byteOffset= ~8}, code) - and genPopReg(reg, code) = gen(loadRegPostIndex{regT=reg, regN=X_MLStackPtr, byteOffset= 8}, code) - - (* Load a value using a scaled offset. This uses a normal scaled load if - the offset is in the range and an indexed offset if it is not. *) - fun loadScaledOffset(scale, loadScaled, loadIndexed) {base, dest, work, offset} = - if offset < 0 then raise InternalError "loadScaledOffset: negative offset" - else if offset < 0x1000 - then [loadScaled{regT=dest, regN=base, unitOffset=offset}] - else - loadNonAddress(work, Word64.fromInt offset) @ - [loadIndexed{regN=base, regM=work, regT=dest, - option=ExtUXTX(if scale = 1 then NoScale else ScaleOrShift)}] - - (* Similar store. *) - and storeScaledOffset(scale, storeScaled, storeIndexed) {base, store, work, offset} = - if offset < 0 then raise InternalError "storeScaledOffset: negative offset" - else if offset < 0x1000 - then [storeScaled{regT=store, regN=base, unitOffset=offset}] - else - loadNonAddress(work, Word64.fromInt offset) @ - [storeIndexed{regN=base, regM=work, regT=store, - option=ExtUXTX(if scale = 1 then NoScale else ScaleOrShift)}] - - val loadScaledWord = loadScaledOffset(8, loadRegScaled, loadRegIndexed) - and storeScaledWord = storeScaledOffset(8, storeRegScaled, storeRegIndexed) - - val loadScaledPolyWord = - if is32in64 - then loadScaledOffset(4, loadRegScaled32, loadRegIndexed32) - else loadScaledOffset(8, loadRegScaled, loadRegIndexed) - - and storeScaledPolyWord = - if is32in64 - then storeScaledOffset(4, storeRegScaled32, storeRegIndexed32) - else storeScaledOffset(8, storeRegScaled, storeRegIndexed) - - (* Add a constant word to the source register and put the result in the - destination. regW is used as a work register if necessary. This is used - both for addition and subtraction. *) - fun addConstantWord({regS, regD, value=0w0, ...}, code) = - if regS = regD then () else gen(moveRegToReg{sReg=regS, dReg=regD}, code) - - | addConstantWord({regS, regD, regW, value}, code) = - let - (* If we have to load the constant it's better if the top 32-bits are - zero if possible. *) - val (isSub, unsigned) = - if value > Word64.<<(0w1, 0w63) - then (true, ~ value) - else (false, value) - in - if unsigned < Word64.<<(0w1, 0w24) - then (* We can put up to 24 in a shifted and an unshifted constant. *) - let - val w = Word.fromLarge(Word64.toLarge unsigned) - val high = Word.andb(Word.>>(w, 0w12), 0wxfff) - val low = Word.andb(w, 0wxfff) - val addSub = if isSub then subImmediate else addImmediate - in - if high <> 0w0 - then - ( - gen(addSub{regN=regS, regD=regD, immed=high, shifted=true}, code); - if low <> 0w0 - then gen(addSub{regN=regD, regD=regD, immed=low, shifted=false}, code) - else () - ) - else gen(addSub{regN=regS, regD=regD, immed=low, shifted=false}, code) - end - else - let - (* To minimise the constant and increase the chances that it - will fit in a single word look to see if we can shift it. *) - fun getShift(value, shift) = - if Word64.andb(value, 0w1) = 0w0 - then getShift(Word64.>>(value, 0w1), shift+0w1) - else (value, shift) - val (shifted, shift) = getShift(unsigned, 0w0) - in - genList(loadNonAddress(regW, shifted), code); - gen((if isSub then subShiftedReg else addShiftedReg) - {regM=regW, regN=regS, regD=regD, shift=ShiftLSL shift}, code) - end - end - - (* Remove items from the stack. If the second argument is true the value - on the top of the stack has to be moved. *) - fun resetStack(0, _, _) = () - | resetStack(nItems, true, code) = - ( - genPopReg(X0, code); - resetStack(nItems, false, code); - genPushReg(X0, code) - ) - | resetStack(nItems, false, code) = - addConstantWord({regS=X_MLStackPtr, regD=X_MLStackPtr, regW=X3, - value=Word64.fromLarge(Word.toLarge nativeWordSize) * Word64.fromInt nItems}, code) - - fun compareRegs(reg1, reg2, code) = - gen(subSShiftedReg{regM=reg2, regN=reg1, regD=XZero, shift=ShiftNone}, code) - - (* Compare registers using 32-bit operation on 323-in-64. *) - fun comparePolyRegs(reg1, reg2, code) = - gen((if is32in64 then subSShiftedReg32 else subSShiftedReg){regM=reg2, regN=reg1, regD=XZero, shift=ShiftNone}, code) - - (* Turn an absolute address into an object index. Does nothing in - native mode. *) - fun absoluteAddressToIndex(reg, cvec) = - if is32in64 - then - ( - gen(subShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone}, cvec); - gen(logicalShiftRight{shift=0w2, regN=reg, regD=reg}, cvec) - ) - else () - - (* Turn an index into an absolute address. *) - and indexToAbsoluteAddress(iReg, absReg, cvec) = - if is32in64 - then gen(addShiftedReg{regM=iReg, regN=X_Base32in64, regD=absReg, shift=ShiftLSL 0w2}, cvec) - else if iReg = absReg - then () - else gen(moveRegToReg{sReg=iReg, dReg=absReg}, cvec) - - (* Get a large word value from a "box". *) - fun unboxLargeWord(addrReg, valReg, cvec) = - if is32in64 - then - ( - indexToAbsoluteAddress(addrReg, valReg, cvec); - gen(loadRegScaled{regT=valReg, regN=valReg, unitOffset=0}, cvec) - ) - else gen(loadRegScaled{regT=valReg, regN=addrReg, unitOffset=0}, cvec) - - fun unboxDouble(addrReg, workReg, valReg, cvec) = - if is32in64 - then - ( - indexToAbsoluteAddress(addrReg, workReg, cvec); - gen(loadRegScaledDouble{regT=valReg, regN=workReg, unitOffset=0}, cvec) - ) - else gen(loadRegScaledDouble{regT=valReg, regN=addrReg, unitOffset=0}, cvec) - - fun unboxOrUntagSingle(addrReg, workReg, valReg, cvec) = - if is32in64 - then gen(loadRegIndexedFloat{regN=X_Base32in64, regM=addrReg, regT=valReg, option=ExtUXTX ScaleOrShift}, cvec) - else - ( - gen(logicalShiftRight{shift=0w32, regN=addrReg, regD=workReg}, cvec); - gen(moveGeneralToFloat{regN=workReg, regD=valReg}, cvec) - ) - - (* Sequence to allocate on the heap. The words are not initialised - apart from the length word. Returns the absolute address. *) - fun genAllocateFixedSize(words, flags, resultReg, workReg, code) = - let - val label = createLabel() - val wordsRequired = - if is32in64 - then (* Have to round this up to 8 bytes *) - Word64.andb(Word64.fromInt(words+2), ~ 0w2) - else Word64.fromInt(words+1) - in - (* Subtract the number of bytes required from the heap pointer and put in X0. *) - addConstantWord({regS=X_MLHeapAllocPtr, regD=X0, regW=X3, - value= ~ (Word64.fromLarge(Word.toLarge wordSize)) * wordsRequired}, code); - compareRegs(resultReg, X_MLHeapLimit, code); - gen(conditionalBranch(CondCarrySet, label), code); - gen(loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset}, code); - gen(branchAndLinkReg X16, code); - gen(registerMask [], code); (* Not used at the moment. *) - gen(setLabel label, code); - gen(moveRegToReg{sReg=resultReg, dReg=X_MLHeapAllocPtr}, code); - genList(loadNonAddress(workReg, - Word64.orb(Word64.fromInt words, Word64.<<(Word64.fromLarge(Word8.toLarge flags), - if is32in64 then 0w24 else 0w56))), code); - (* Store the length word. Have to use the unaligned version because offset is -ve. *) - if is32in64 - then gen(storeRegUnscaled32{regT=workReg, regN=resultReg, byteOffset= ~4}, code) - else gen(storeRegUnscaled{regT=workReg, regN=resultReg, byteOffset= ~8}, code) - end - - (* Allocate space on the heap for a vector, string etc. sizeReg and flagsReg - contain the size and flags as untagged values. sizeReg is unchanged, flagsReg - is modified. The result address is in resultReg. All the registers must - be different. *) - fun allocateVariableSize({sizeReg, flagsReg, resultReg}, code) = - let - val trapLabel = createLabel() and noTrapLabel = createLabel() - in - (* Subtract the size as a number of bytes from the allocation ptr. *) - if is32in64 - then - ( - gen(subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=resultReg, shift=ShiftLSL 0w2}, code); - (* Subtract another 4 to allow for the length word. *) - gen(subImmediate{regN=resultReg, regD=resultReg, immed=0w4, shifted=false}, code); - (* Round this down to an 8-byte boundary. *) - gen(bitwiseAndImmediate{bits= ~ 0w8, regN=resultReg, regD=resultReg}, code) - ) - else - ( - gen(subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=resultReg, shift=ShiftLSL 0w3}, code); - (* Subtract another 8 to allow for the length word. *) - gen(subImmediate{regN=resultReg, regD=resultReg, immed=0w8, shifted=false}, code) - ); - (* If the size is large enough it is possible that this could wrap round. To check for that - we trap if either the result is less than the limit or if it is now greater than - the allocation pointer. *) - compareRegs(resultReg, X_MLHeapLimit, code); - gen(conditionalBranch(CondCarryClear, trapLabel), code); - compareRegs(resultReg, X_MLHeapAllocPtr, code); - gen(conditionalBranch(CondCarryClear, noTrapLabel), code); - gen(setLabel trapLabel, code); - gen(loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset}, code); - gen(branchAndLinkReg X16, code); - gen(registerMask [], code); (* Not used at the moment. *) - gen(setLabel noTrapLabel, code); - gen(moveRegToReg{sReg=resultReg, dReg=X_MLHeapAllocPtr}, code); - (* Combine the size with the flags in the top byte. *) - gen(orrShiftedReg{regM=flagsReg, regN=sizeReg, regD=flagsReg, - shift=ShiftLSL(if is32in64 then 0w24 else 0w56)}, code); - (* Store the length word. Have to use the unaligned version because offset is -ve. *) - if is32in64 - then gen(storeRegUnscaled32{regT=flagsReg, regN=resultReg, byteOffset= ~4}, code) - else gen(storeRegUnscaled{regT=flagsReg, regN=resultReg, byteOffset= ~8}, code) - end - - (* Set a register to either tagged(1) i.e. true or tagged(0) i.e. false. *) - fun setBooleanCondition(reg, condition, code) = - ( - genList(loadNonAddress(reg, Word64.fromInt(tag 1)), code); - (* If the condition is false the value used is the XZero incremented by 1 i.e. 1 *) - gen(conditionalSetIncrement{regD=reg, regTrue=reg, regFalse=XZero, cond=condition}, code) - ) - - (* Raise the overflow exception if the overflow bit has been set. *) - fun checkOverflow code = - let - val noOverflow = createLabel() - in - gen(conditionalBranch(CondNoOverflow, noOverflow), code); - gen(loadAddressConstant(X0, toMachineWord Overflow), code); - gen(loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, code); - gen(loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, code); - gen(branchRegister X1, code); - gen(setLabel noOverflow, code) - end - - (* Stack check code: this is inserted at the start of a function to check that there - is sufficient ML stack available. It is also inserted, with a zero space value, - in a loop to ensure that the RTS can interrupt a function. - debugTrapAlways can be used to set a sort of breakpoint during debugging. *) - fun checkStackCode(regW, space, debugTrapAlways, code) = - let - val skipCheck = createLabel() - val defaultWords = 10 (* This is wired into the RTS. *) - val (testReg, entryPt) = - if space <= defaultWords - then (X_MLStackPtr, stackOverflowCallOffset) - else - ( - (* This is only used at the start of the code. X9 is wired into the RTS. *) - addConstantWord({regS=X_MLStackPtr, regD=X9, regW=regW, - value= ~ (Word64.fromLarge(Word.toLarge nativeWordSize)) * Word64.fromInt space}, code); - (X9, stackOverflowXCallOffset) - ) - in - gen(loadRegScaled{regT=regW, regN=X_MLAssemblyInt, unitOffset=stackLimitOffset}, code); - if debugTrapAlways - then () - else - ( - compareRegs(testReg, regW, code); - gen(conditionalBranch(CondCarrySet, skipCheck), code) - ); - gen(loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=entryPt}, code); - gen(branchAndLinkReg X16, code); - gen(registerMask [], code); (* Not used at the moment. *) - gen(setLabel skipCheck, code) - end - - (* Allocate a single byte cell and store the register into it. The result is - in X0 so reg must not be X0. *) - fun boxLargeWord(reg, code) = - ( - reg <> X0 orelse raise InternalError "boxLargeWord: X0"; - genAllocateFixedSize(Word.toInt(nativeWordSize div wordSize), F_bytes, X0, X5, code); - gen(storeRegScaled{regT=reg, regN=X0, unitOffset=0}, code); - absoluteAddressToIndex(X0, code) - ) - - (* Allocate a single byte cell for a "real" i.e. double-precision floating - point number. *) - fun boxDouble(reg, code) = - ( - genAllocateFixedSize(Word.toInt(0w8 div wordSize), F_bytes, X0, X5, code); - gen(storeRegScaledDouble{regT=reg, regN=X0, unitOffset=0}, code); - absoluteAddressToIndex(X0, code) - ) - - (* Single precision floats are shifted and tagged in native 64-bit but - boxed in 32-in-64. *) - fun boxOrTagFloat(reg, code) = - if is32in64 - then - ( - genAllocateFixedSize(1, F_bytes, X0, X5, code); - gen(storeRegScaledFloat{regT=reg, regN=X0, unitOffset=0}, code); - absoluteAddressToIndex(X0, code) - ) - else - ( - gen(moveFloatToGeneral{regN=V0, regD=X0}, code); - gen(logicalShiftLeft{shift=0w32, regN=X0, regD=X0}, code); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, code) - ) - - type caseForm = - { - cases : (backendIC * word) list, - test : backendIC, - caseType: caseType, - default : backendIC - } - - (* Where the result, if any, should go *) - datatype whereto = - NoResult (* discard result *) - | ToStack (* Need a result but it can stay on the pseudo-stack *) - | ToX0 (* Need a result in X0. *) - - (* Are we at the end of the function. *) - datatype tail = - EndOfProc - | NotEnd - - - (* The flags byte is the high-order byte of length word. *) - val flagsByteOffset = if isBigEndian then ~ (Word.toInt wordSize) else ~1 - - (* Code generate a function or global declaration *) - fun codegen (pt, name, resultClosure, numOfArgs, localCount, parameters, hasClosure) = - let - val cvec = ref [] - - datatype decEntry = - StackAddr of int - | Empty - - val decVec = Array.array (localCount, Empty) - - (* Count of number of items on the stack. This excludes the arguments and - the return address. *) - val realstackptr = ref 1 (* The closure ptr is already there *) - - (* Maximum size of the stack. *) - val maxStack = ref 1 - - (* Whether the top of the stack is actually in X0. *) - val topInX0 = ref false - - (* Push a value onto the stack. *) - fun incsp () = - ( - realstackptr := !realstackptr + 1; - if !realstackptr > !maxStack - then maxStack := !realstackptr - else () - ) - - (* An entry has been removed from the stack. *) - fun decsp () = realstackptr := !realstackptr - 1 - - fun ensureX0 () = if ! topInX0 then (genPushReg(X0, cvec); incsp(); topInX0 := false) else () - - (* generates code from the tree *) - fun gencde (pt : backendIC, whereto : whereto, tailKind : tail, loopAddr) : unit = - let - val _ = !topInX0 andalso raise InternalError "topInX0 true at start" - - (* Save the stack pointer value here. We may want to reset the stack. *) - val oldsp = !realstackptr; - - (* Operations on ML memory always have the base as an ML address. - Word operations are always word aligned. The higher level will - have extracted any constant offset and scaled it if necessary. - That's helpful for the X86 but not for the ARM. We - have to turn them back into indexes. *) - (* This pushes two values to the stack: the base address and the index. *) - fun genMLAddress({base, index, offset}, scale) = - ( - gencde (base, ToStack, NotEnd, loopAddr); - offset mod scale = 0 orelse raise InternalError "genMLAddress"; - case (index, offset div scale) of - (NONE, soffset) => - (genList(loadNonAddress(X0, Word64.fromInt(tag soffset)), cvec); genPushReg(X0, cvec); incsp()) - | (SOME indexVal, 0) => gencde (indexVal, ToStack, NotEnd, loopAddr) - | (SOME indexVal, soffset) => - ( - gencde (indexVal, ToX0, NotEnd, loopAddr); - (* Add the offset as a shifted but not tagged value. *) - addConstantWord({regS=X0, regD=X0, regW=X1, value=Word64.fromInt(semitag soffset)}, cvec); - genPushReg(X0, cvec); - incsp(); - topInX0 := false - ) - ) - - val genCAddress = genMLAddress - - datatype mlLoadKind = MLLoadOffset of int | MLLoadReg of xReg - - fun genMLLoadAddress({base, index=NONE, offset}, scale) = - (* The index, if any, is a constant. *) - ( - gencde (base, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - (X0, MLLoadOffset(offset div scale)) - ) - - | genMLLoadAddress({base, index=SOME indexVal, offset}, scale) = - ( - gencde (base, ToStack, NotEnd, loopAddr); (* Push base addr to stack. *) - gencde (indexVal, ToX0, NotEnd, loopAddr); - (* Shift right to remove the tag. N.B. Indexes into ML memory are - unsigned. Unlike on the X86 we can't remove the tag by providing - a displacement and the only options are to scale by either 1 or 8. *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - (* Add any constant offset. Does nothing if it's zero. *) - addConstantWord({regS=X0, regD=X0, regW=X3, - value=Word64.fromInt (* unsigned *)(offset div scale)}, cvec); - genPopReg(X1, cvec); (* Pop base reg into X1. *) - decsp(); - indexToAbsoluteAddress(X1, X1, cvec); - (X1, MLLoadReg X0) - ) - - (* Similar to genMLLoadAddress but for C addresses. There are two - differences. The index is signed so we use an arithmetic shift and - the base address is a LargeWord value i.e. the actual - address is held in the word pointed at by "base", unlike with - ML addresses. *) - fun genCLoadAddress({base, index=NONE, offset}, scale) = - ( - gencde (base, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - gen(loadRegScaled{regT=X0, regN=X0, unitOffset=0}, cvec); - (* In principle we could have a problem if the index is out of the - range for the offset field. However the simplifier only - converts index offsets into constant offsets for small positive - offsets. *) - (X0, MLLoadOffset(offset div scale)) - ) - | genCLoadAddress({base, index=SOME indexVal, offset}, scale) = - ( - gencde (base, ToStack, NotEnd, loopAddr); (* Push base addr to stack. *) - gencde (indexVal, ToX0, NotEnd, loopAddr); - (* Shift right to remove the tag. C indexes are SIGNED. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X0, regD=X0, shift=0w1}, cvec); - (* Add any constant offset. Does nothing if it's zero. *) - addConstantWord({regS=X0, regD=X0, regW=X3, - value=Word64.fromInt (* unsigned *)(offset div scale)}, cvec); - genPopReg(X1, cvec); (* Pop base reg into X1. *) - indexToAbsoluteAddress(X1, X1, cvec); - gen(loadRegScaled{regT=X1, regN=X1, unitOffset=0}, cvec); - decsp(); - (X1, MLLoadReg X0) - ) - - (* Have to sign-extend 32-bit signed index in 32-in64. *) - val cIndexFactor = (if is32in64 then ExtSXTW else ExtUXTX) - - (* Compare a block of bytes. Jumps to labelEqual if all the bytes are - equal up to the length. Otherwise it drops through with the condition - code set to the last byte comparison that tested unequal. *) - fun blockCompareBytes(leftArg, rightArg, length, labelEqual, setZeroCC) = - let - val loopLabel = createLabel() - in - genMLAddress(leftArg, 1); - genMLAddress(rightArg, 1); - gencde (length, ToX0, NotEnd, loopAddr); - genPopReg(X2, cvec); (* right arg index - tagged value. *) - genPopReg(X1, cvec); (* right arg base address. *) - indexToAbsoluteAddress(X1, X1, cvec); - (* Add in the index N.B. ML index values are unsigned. *) - gen(addShiftedReg{regM=X2, regN=X1, regD=X1, shift=ShiftLSR 0w1}, cvec); - genPopReg(X3, cvec); (* left index *) - genPopReg(X2, cvec); - indexToAbsoluteAddress(X2, X2, cvec); - decsp(); decsp(); decsp(); decsp(); - gen(addShiftedReg{regM=X3, regN=X2, regD=X2, shift=ShiftLSR 0w1}, cvec); - (* Untag the length *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - (* If necessary set the cc for the case where the length is zero. *) - if setZeroCC then compareRegs(X0, X0, cvec) else (); - gen(setLabel loopLabel, cvec); - gen(compareBranchZero(X0, labelEqual), cvec); - (* X2 is left arg addr, X1 is right arg addr. *) - gen(loadRegPostIndexByte{regT=X4, regN=X2, byteOffset=1}, cvec); - gen(loadRegPostIndexByte{regT=X3, regN=X1, byteOffset=1}, cvec); - compareRegs(X4, X3, cvec); - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - (* Loop if they're equal. *) - gen(conditionalBranch(CondEqual, loopLabel), cvec) - end - - val () = - case pt of - BICEval evl => genEval (evl, tailKind) - - | BICExtract ext => - (* This may just be being used to discard a value which isn't - used on this branch. N.B. genProc for mutual closures - assumes that this does not affect X1. *) - if whereto = NoResult then () - else - let - fun loadLocalStackValue addr = - ( - genList (loadScaledWord{dest=X0, base=X_MLStackPtr, work=X16, offset= !realstackptr + addr}, cvec); - topInX0 := true - ) - in - case ext of - BICLoadArgument locn => - (* The register arguments appear in order on the - stack, followed by the stack argumens in reverse - order. *) - if locn < 8 - then loadLocalStackValue (locn+1) - else loadLocalStackValue (numOfArgs-locn+8) - | BICLoadLocal locn => - ( - case Array.sub (decVec, locn) of - StackAddr n => loadLocalStackValue (~ n) - | _ => (* Should be on the stack, not a function. *) - raise InternalError "locaddr: bad stack address" - ) - | BICLoadClosure locn => - ( - loadLocalStackValue ~1; (* The closure itself. *) - indexToAbsoluteAddress(X0, X0, cvec); - genList(loadScaledPolyWord{dest=X0, base=X0, work=X16, - (* The first word is the code. This is two poly words in 32-in-64. *) - offset=if is32in64 then locn+2 else locn+1}, cvec) - ) - | BICLoadRecursive => - loadLocalStackValue ~1 (* The closure itself - first value on the stack. *) - end - - | BICField {base, offset} => - ( - gencde (base, ToX0, NotEnd, loopAddr); - if is32in64 - then - ( - (* Can use an indexed load if the offset is zero otherwise it takes two instrs. *) - if offset = 0 - then gen(loadRegIndexed32{regN=X_Base32in64, regM=X0, regT=X0, option=ExtUXTX ScaleOrShift}, cvec) - else - ( - indexToAbsoluteAddress(X0, X0, cvec); - genList (loadScaledPolyWord{dest=X0, base=X0, work=X16, offset=offset}, cvec) - ) - ) - else genList (loadScaledPolyWord{dest=X0, base=X0, work=X16, offset=offset}, cvec) - ) - - | BICLoadContainer {base, offset} => - ( - gencde (base, ToX0, NotEnd, loopAddr); - genList (loadScaledWord{dest=X0, base=X0, work=X16, offset=offset}, cvec) - ) - - | BICLambda lam => genProc (lam, false, fn () => ()) - - | BICConstnt(w, _) => - ( - if isShort w - then - let - val taggedValue = taggedWord64(Word64.fromLarge(Word.toLarge(toShort w))) - in - genList(loadNonAddress(X0, - (* We have sign extended this to a 64-bit value but the high-order bits - should always be zero. *) - if is32in64 then LargeWord.andb(taggedValue, 0wxffffffff) else taggedValue), cvec) - end - else gen(loadAddressConstant(X0, w), cvec); - topInX0 := true - ) - - | BICCond (testPart, thenPart, elsePart) => - genCond (testPart, thenPart, elsePart, whereto, tailKind, loopAddr) - - | BICNewenv(decls, exp) => - let - (* Processes a list of entries. *) - - (* Mutually recursive declarations. May be either lambdas or constants. Recurse down - the list pushing the addresses of the closure vectors, then unwind the - recursion and fill them in. *) - fun genMutualDecs [] = () - - | genMutualDecs ({lambda, addr, ...} :: otherDecs) = - genProc (lambda, true, - fn() => - ( - Array.update (decVec, addr, StackAddr (! realstackptr)); - genMutualDecs (otherDecs) - )) - - fun codeDecls(BICRecDecs dl) = genMutualDecs dl - - | codeDecls(BICDecContainer{size, addr}) = - ( - (* If this is a container we have to process it here otherwise it - will be removed in the stack adjustment code. *) - (* The stack entries have to be initialised. Set them to tagged(0). *) - genList(loadNonAddress(X0, Word64.fromInt(tag 0)), cvec); - let fun pushN 0 = () | pushN n = (genPushReg(X0, cvec); pushN (n-1)) in pushN size end; - gen(moveRegToReg{sReg=X_MLStackPtr, dReg=X0}, cvec); - genPushReg(X0, cvec); (* Push the address of this container. *) - realstackptr := !realstackptr + size + 1; (* Pushes N words plus the address. *) - Array.update (decVec, addr, StackAddr(!realstackptr)) - ) - - | codeDecls(BICDeclar{value, addr, ...}) = - ( - gencde (value, ToStack, NotEnd, loopAddr); - Array.update (decVec, addr, StackAddr(!realstackptr)) - ) - | codeDecls(BICNullBinding exp) = gencde (exp, NoResult, NotEnd, loopAddr) - in - List.app codeDecls decls; - gencde (exp, whereto, tailKind, loopAddr) - end - - | BICBeginLoop {loop=body, arguments} => - (* Execute the body which will contain at least one Loop instruction. - There will also be path(s) which don't contain Loops and these - will drop through. *) - let - val args = List.map #1 arguments - (* Evaluate each of the arguments, pushing the result onto the stack. *) - fun genLoopArg ({addr, value, ...}) = - ( - gencde (value, ToStack, NotEnd, loopAddr); - Array.update (decVec, addr, StackAddr (!realstackptr)); - !realstackptr (* Return the posn on the stack. *) - ) - val argIndexList = map genLoopArg args; - - val startSp = ! realstackptr; (* Remember the current top of stack. *) - val startLoop = createLabel () - val () = gen(setLabel startLoop, cvec) (* Start of loop *) - in - (* Process the body, passing the jump-back address down for the Loop instruction(s). *) - gencde (body, whereto, tailKind, SOME(startLoop, startSp, argIndexList)) - (* Leave the arguments on the stack. They can be cleared later if needed. *) - end - - | BICLoop argList => (* Jump back to the enclosing BeginLoop. *) - let - val (startLoop, startSp, argIndexList) = - case loopAddr of - SOME l => l - | NONE => raise InternalError "No BeginLoop for Loop instr" - (* Evaluate the arguments. First push them to the stack because evaluating - an argument may depend on the current value of others. Only when we've - evaluated all of them can we overwrite the original argument positions. *) - fun loadArgs ([], []) = !realstackptr - startSp (* The offset of all the args. *) - | loadArgs (arg:: argList, _ :: argIndexList) = - let - (* Evaluate all the arguments. *) - val () = gencde (arg, ToStack, NotEnd, NONE); - val argOffset = loadArgs(argList, argIndexList); - in - genPopReg(X0, cvec); - genList(storeScaledWord{store=X0, base=X_MLStackPtr, work=X16, offset=argOffset-1}, cvec); - decsp(); (* The argument has now been popped. *) - argOffset - end - | loadArgs _ = raise InternalError "loadArgs: Mismatched arguments"; - - val _: int = loadArgs(List.map #1 argList, argIndexList) - in - if !realstackptr <> startSp - then resetStack (!realstackptr - startSp, false, cvec) (* Remove any local variables. *) - else (); - - (* Jump back to the start of the loop. *) - checkStackCode(X10, 0, false, cvec); - - gen(unconditionalBranch startLoop, cvec) - end - - | BICRaise exp => - ( - gencde (exp, ToStack, NotEnd, loopAddr); - genPopReg(X0, cvec); - (* Copy the handler "register" into the stack pointer. Then - jump to the address in the first word. The second word is - the next handler. This is set up in the handler. We have a lot - more raises than handlers since most raises are exceptional conditions - such as overflow so it makes sense to minimise the code in each raise. *) - gen(loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec); - gen(loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, cvec); - gen(branchRegister X1, cvec) - ) - - | BICHandle {exp, handler, exPacketAddr} => - let - (* Save old handler *) - val () = gen(loadRegScaled{regT=X0, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec) - val () = genPushReg(X0, cvec) - val () = incsp () - val handlerLabel = createLabel() - (* Push address of handler. *) - val () = gen(loadLabelAddress(X0, handlerLabel), cvec) - val () = genPushReg(X0, cvec) - val () = incsp() - (* Store the address of the stack pointer into the handler register. *) - val () = gen(storeRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec) - - (* Code generate the body; "NotEnd" because we have to come back - to remove the handler; "ToStack" because delHandler needs - a result to carry down. *) - val () = gencde (exp, ToStack, NotEnd, loopAddr) - - (* Now get out of the handler and restore the old one. *) - val () = genPopReg(X0, cvec) (* Pop the result. *) - val () = genPopReg(X1, cvec) (* Pop and discard the handler address. *) - val () = genPopReg(X1, cvec) (* Pop the old handler. *) - val () = gen(storeRegScaled{regT=X1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec) - val () = genPushReg(X0, cvec) (* Push the result. *) - - val skipHandler = createLabel() - val () = gen(unconditionalBranch skipHandler, cvec) - val () = realstackptr := oldsp - val () = gen(setLabel handlerLabel, cvec) - (* The exception raise code resets the stack pointer to the value in the exception handler - so this is probably redundant. Leave it for the moment, *) - val () = gen(loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec) - (* We must, though, restore the old handler. *) - val () = genPopReg(X1, cvec) (* Pop and discard the handler address. *) - val () = genPopReg(X1, cvec) (* Pop the old handler. *) - val () = gen(storeRegScaled{regT=X1, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec) - (* Push the exception packet which is in X0 and set the address. *) - val () = genPushReg(X0, cvec) - val () = incsp () - val () = Array.update (decVec, exPacketAddr, StackAddr(!realstackptr)) - val () = gencde (handler, ToStack, NotEnd, loopAddr) - (* Have to remove the exception packet. *) - val () = resetStack(1, true, cvec) - val () = decsp() - - (* Finally fix-up the jump around the handler *) - val () = gen(setLabel skipHandler, cvec) - in - () - end - - | BICCase ({cases, test, default, firstIndex, ...}) => - let - val () = gencde (test, ToStack, NotEnd, loopAddr) - (* Label to jump to at the end of each case. *) - val exitJump = createLabel() - val () = genPopReg(X0, cvec) - val () = decsp () - - (* Subtract the minimum even if it is zero to remove the tag. - This leaves us with a shifted but untagged value. Don't check for overflow. - Instead allow large values to wrap around and check later.*) - val () = addConstantWord({regS=X0, regD=X0, regW=X1, - value= ~(taggedWord64(Word64.fromLarge(Word.toLargeX firstIndex)))}, cvec) - - (* Create the case labels. *) - val nCases = List.length cases - val caseLabels = List.tabulate(nCases, fn _ => createLabel()) - val defaultLabel = createLabel() - - (* Compare with the number of cases and go to the default if it is - not less. We use an unsigned comparison and compare with - the semitagged value because we've removed the tag bit. *) - (* TODO: Not necessary if it exhaustive. *) - (* For the moment load the value into a register and compare. *) - val () = genList(loadNonAddress(X1, Word64.fromInt nCases * 0w2), cvec) - val () = compareRegs(X0, X1, cvec) - val () = gen(conditionalBranch(CondCarrySet, defaultLabel), cvec) - (* Load the address of the jump table. *) - val tableLabel = createLabel() - val () = gen(loadLabelAddress(X1, tableLabel), cvec) - (* Add the value shifted by one since it's already shifted. *) - val () = gen(addShiftedReg{regM=X0, regN=X1, regD=X0, shift=ShiftLSL 0w1}, cvec) - val () = gen(branchRegister X0, cvec) - (* Put in the branch table. *) - val () = gen(setLabel tableLabel, cvec) - val () = List.app(fn label => gen(unconditionalBranch label, cvec)) caseLabels - - (* The default case, if any, follows the case statement. *) - (* If we have a jump to the default set it to jump here. *) - local - fun fixDefault(NONE, defCase) = gen(setLabel defCase, cvec) - | fixDefault(SOME _, _) = () - in - val () = ListPair.appEq fixDefault (cases, caseLabels) - end - val () = gen(setLabel defaultLabel, cvec) - val () = gencde (default, whereto, tailKind, loopAddr) - - fun genCases(SOME body, label) = - ( - (* First exit from the previous case or the default if - this is the first. *) - gen(unconditionalBranch exitJump, cvec); - (* Remove the result - the last case will leave it. *) - case whereto of ToStack => decsp () | NoResult => () | ToX0 => (); - topInX0 := false; - (* Fix up the jump to come here. *) - gen(setLabel label, cvec); - gencde (body, whereto, tailKind, loopAddr) - ) - | genCases(NONE, _) = () - - val () = ListPair.appEq genCases (cases, caseLabels) - - (* Finally set the exit jump to come here. *) - val () = gen(setLabel exitJump, cvec) - in - () - end - - | BICTuple recList => - let - val size = List.length recList - in - (* Get the fields and push them to the stack. *) - List.app(fn v => gencde (v, ToStack, NotEnd, loopAddr)) recList; - genAllocateFixedSize(size, 0w0, X0, X1, cvec); - List.foldl(fn (_, w) => - ( - genPopReg(X1, cvec); - genList(storeScaledPolyWord{store=X1, base=X0, work=X16, offset=w-1}, cvec); - w-1) - ) - size recList; - (* If it's 32-in-64 this has to be converted to an object pointer. *) - absoluteAddressToIndex(X0, cvec); - topInX0 := true; - realstackptr := !realstackptr - size - end - - | BICSetContainer{container, tuple, filter} => - (* Copy the contents of a tuple into a container. If the tuple is a - Tuple instruction we can avoid generating the tuple and then - unpacking it and simply copy the fields that make up the tuple - directly into the container. *) - ( - case tuple of - BICTuple cl => - (* Simply set the container from the values. *) - let - (* Push the address of the container to the stack. *) - val _ = gencde (container, ToStack, NotEnd, loopAddr) - - fun setValues([], _, _) = () - - | setValues(v::tl, sourceOffset, destOffset) = - if sourceOffset < BoolVector.length filter andalso BoolVector.sub(filter, sourceOffset) - then - ( - (* Get the value to store into X0. *) - gencde (v, ToX0, NotEnd, loopAddr); - (* Load the address of the container from the stack and store - the value into the container. *) - gen(loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, cvec); - genList(storeScaledWord{store=X0, base=X1, work=X16, offset=destOffset}, cvec); - topInX0 := false; (* We've used it. *) - setValues(tl, sourceOffset+1, destOffset+1) - ) - else setValues(tl, sourceOffset+1, destOffset) - in - setValues(cl, 0, 0) - (* The container address is still on the stack. *) - end - - | _ => - let (* General case: copy values from the source tuple. *) - (* First the target tuple, then the container. *) - val () = gencde (tuple, ToStack, NotEnd, loopAddr) - val () = gencde (container, ToX0, NotEnd, loopAddr) - val () = genPopReg(X1, cvec) - val () = indexToAbsoluteAddress(X1, X1, cvec) - val () = decsp() - (* Container address is in X0, tuple in X1. *) - - val last = BoolVector.foldli(fn (i, true, _) => i | (_, false, n) => n) ~1 filter - - fun copy (sourceOffset, destOffset) = - if BoolVector.sub(filter, sourceOffset) - then - ( - (* Load the value in the tuple. *) - genList(loadScaledPolyWord{dest=X2, base=X1, work=X16, offset=sourceOffset}, cvec); - (* Store into the container. *) - genList(storeScaledWord{store=X2, base=X0, work=X16, offset=destOffset}, cvec); - if sourceOffset = last - then () - else copy (sourceOffset+1, destOffset+1) - ) - else copy(sourceOffset+1, destOffset) - in - copy (0, 0); - topInX0 := true (* Container address is in X0 *) - end - ) - - | BICTagTest { test, tag=tagValue, ... } => - ( - gencde (test, ToStack, NotEnd, loopAddr); - genPopReg(X0, cvec); - gen(subSImmediate{regN=X0, regD=XZero, immed=taggedWord tagValue, shifted=false}, cvec); - setBooleanCondition(X0, CondEqual, cvec); - genPushReg(X0, cvec) - ) - - | BICNullary {oper=GetCurrentThreadId} => - ( - gen(loadRegScaled{regT=X0, regN=X_MLAssemblyInt, unitOffset=threadIdOffset}, cvec); - topInX0 := true - ) - - | BICNullary {oper=CheckRTSException} => - (* Raise an exception in ML if the last RTS call set the exception packet. *) - let (* It may be better to do this in all RTS calls. *) - val noException = createLabel() - in - (* Load the packet and see if it is nil (tagged 0) *) - gen(loadRegScaled{regT=X0, regN=X_MLAssemblyInt, unitOffset=exceptionPacketOffset}, cvec); - gen(subSImmediate{regN=X0, regD=XZero, immed=taggedWord 0w0, shifted=false}, cvec); - gen(conditionalBranch(CondEqual, noException), cvec); - (* If it isn't then raise the exception. *) - gen(loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec); - gen(loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, cvec); - gen(branchRegister X1, cvec); - gen(setLabel noException, cvec) - end - - | BICNullary {oper=CreateMutex} => - let - (* Allocate memory for a mutex. Use a native word as a mutable, weak, no-overwrite, byte cell - which is the same as a volatileRef. This ensures that it will always be cleared when it is - loaded even if it was locked when it was saved. *) - val flags = Word8.orb(F_mutable, Word8.orb(F_weak, Word8.orb(F_noOverwrite, F_bytes))) (* 0wx69 *) - in - genAllocateFixedSize(Word.toInt(nativeWordSize div wordSize), flags, X0, X5, cvec); - gen(storeRegScaled{regT=XZero, regN=X0, unitOffset=0}, cvec); - absoluteAddressToIndex(X0, cvec); - topInX0 := true - end - - | BICUnary { oper, arg1 } => genUnary(oper, arg1, loopAddr) - - | BICBinary { oper, arg1, arg2 } => genBinary(oper, arg1, arg2, loopAddr) - - | BICAllocateWordMemory {numWords, flags, initial } => - let - fun doAllocateAndInit() = - let - val () = gencde (numWords, ToStack, NotEnd, loopAddr) - val () = gencde (flags, ToStack, NotEnd, loopAddr) - val () = gencde (initial, ToX0, NotEnd, loopAddr) - val exitLabel = createLabel() and loopLabel = createLabel() - in - genPopReg(X2, cvec); (* Flags as tagged value. *) - gen(logicalShiftRight32(*byte*){regN=X2, regD=X2, shift=0w1}, cvec); - genPopReg(X1, cvec); (* Length as tagged value. *) - gen(logicalShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - genPushReg(X0, cvec); (* Save initialiser - TODO: Add it to save set. *) - allocateVariableSize({sizeReg=X1, flagsReg=X2, resultReg=X0}, cvec); - genPopReg(X3, cvec); (* Pop initialiser. *) - (* Add the length in bytes so we point at the end. *) - gen(addShiftedReg{regM=X1, regN=X0, regD=X1, - shift=ShiftLSL(if is32in64 then 0w2 else 0w3)}, cvec); - (* Loop to initialise. *) - gen(setLabel loopLabel, cvec); - compareRegs(X1, X0, cvec); (* Are we at the start? *) - gen(conditionalBranch(CondEqual, exitLabel), cvec); - if is32in64 - then gen(storeRegPreIndex32{regT=X3, regN=X1, byteOffset= ~4}, cvec) - else gen(storeRegPreIndex{regT=X3, regN=X1, byteOffset= ~8}, cvec); - gen(unconditionalBranch loopLabel, cvec); - gen(setLabel exitLabel, cvec); - absoluteAddressToIndex(X0, cvec); - decsp(); decsp() - end - in - case (numWords, flags) of - (BICConstnt(length, _), BICConstnt(flagValue, _)) => - if isShort length andalso toShort length = 0w1 andalso isShort flagValue - then (* This is a very common case for refs. *) - let - val flagByte = Word8.fromLargeWord(Word.toLargeWord(toShort flagValue)) - in - gencde (initial, ToStack, NotEnd, loopAddr); (* Initialiser. *) - genAllocateFixedSize(1, flagByte, X0, X1, cvec); - genPopReg(X1, cvec); - gen((if is32in64 then storeRegScaled32 else storeRegScaled){regT=X1, regN=X0, unitOffset=0}, cvec); - absoluteAddressToIndex(X0, cvec); - decsp(); topInX0 := true - end - else (* Constant but not a single. *) doAllocateAndInit() - | _ => (* Not constant. *) doAllocateAndInit() - end - - | BICLoadOperation { kind=LoadStoreMLWord {isImmutable=false}, address={base, index=NONE, offset=0}} => - ( - gencde (base, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - gen((if is32in64 then loadAcquire32 else loadAcquire){regN=X0, regT=X0}, cvec) - ) - - | BICLoadOperation { kind=LoadStoreMLWord _, address} => - ( - case genMLLoadAddress(address, Word.toInt wordSize) of - (base, MLLoadOffset offset) => - genList(loadScaledPolyWord{dest=X0, base=base, work=X16, offset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen((if is32in64 then loadRegIndexed32 else loadRegIndexed) - {regN=base, regM=indexR, regT=X0, option=ExtUXTX ScaleOrShift}, cvec) - ) - - | BICLoadOperation { kind=LoadStoreMLByte _, address} => - ( - case genMLLoadAddress(address, 1) of - (base, MLLoadOffset offset) => - gen(loadRegScaledByte{regT=X0, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexedByte{regN=base, regM=indexR, regT=X0, option=ExtUXTX NoScale}, cvec); - - (* Have to tag the result. *) - gen(logicalShiftLeft32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | BICLoadOperation { kind=LoadStoreC8, address} => - ( - case genCLoadAddress(address, 1) of - (base, MLLoadOffset offset) => - if offset < 0 (* C offsets can be negative. *) - then gen(loadRegUnscaledByte{regT=X0, regN=base, byteOffset=offset}, cvec) - else gen(loadRegScaledByte{regT=X0, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexedByte{regN=base, regM=indexR, regT=X0, option=cIndexFactor NoScale}, cvec); - - (* Have to tag the result. *) - gen(logicalShiftLeft32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | BICLoadOperation { kind=LoadStoreC16, address} => - ( - case genCLoadAddress(address, 2) of - (base, MLLoadOffset offset) => - if offset < 0 (* C offsets can be negative. *) - then gen(loadRegUnscaled16{regT=X0, regN=base, byteOffset=offset*2}, cvec) - else gen(loadRegScaled16{regT=X0, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexed16{regN=base, regM=indexR, regT=X0, option=cIndexFactor ScaleOrShift}, cvec); - - (* Have to tag the result. *) - gen(logicalShiftLeft32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | BICLoadOperation { kind=LoadStoreC32, address} => - ( - case genCLoadAddress(address, 4) of - (base, MLLoadOffset offset) => - if offset < 0 (* C offsets can be negative. *) - then gen(loadRegUnscaled32{regT=X1, regN=base, byteOffset=offset*4}, cvec) - else gen(loadRegScaled32{regT=X1, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexed32{regN=base, regM=indexR, regT=X1, option=cIndexFactor ScaleOrShift}, cvec); - - (* Tag this in native 64-bits but box it in 32-in-64 *) - if is32in64 - then boxLargeWord(X1, cvec) - else - ( - gen(logicalShiftLeft{regN=X1, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec) - ) - ) - - | BICLoadOperation { kind=LoadStoreC64, address} => - ( - case genCLoadAddress(address, 8) of - (base, MLLoadOffset offset) => - if offset < 0 (* C offsets can be negative. *) - then gen(loadRegUnscaled{regT=X1, regN=base, byteOffset=offset*8}, cvec) - else gen(loadRegScaled{regT=X1, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexed{regN=base, regM=indexR, regT=X1, option=cIndexFactor ScaleOrShift}, cvec); - - (* Load the value at the address and box it. *) - boxLargeWord(X1, cvec) - ) - - | BICLoadOperation { kind=LoadStoreCFloat, address} => - ( - case genCLoadAddress(address, 4) of - (base, MLLoadOffset offset) => - if offset < 0 (* C offsets can be negative. *) - then gen(loadRegUnscaledFloat{regT=V0, regN=base, byteOffset=offset*4}, cvec) - else gen(loadRegScaledFloat{regT=V0, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexedFloat{regN=base, regM=indexR, regT=V0, option=cIndexFactor ScaleOrShift}, cvec); - (* This is defined to return a "real" i.e. a double so we need to convert it - to a double and then box the result. *) - gen(convertFloatToDouble{regN=V0, regD=V0}, cvec); - boxDouble(V0, cvec) - ) - - | BICLoadOperation { kind=LoadStoreCDouble, address} => - ( - case genCLoadAddress(address, 8) of - (base, MLLoadOffset offset) => - if offset < 0 (* C offsets can be negative. *) - then gen(loadRegUnscaledDouble{regT=V0, regN=base, byteOffset=offset*8}, cvec) - else gen(loadRegScaledDouble{regT=V0, regN=base, unitOffset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen(loadRegIndexedDouble{regN=base, regM=indexR, regT=V0, option=cIndexFactor ScaleOrShift}, cvec); - (* Box the result. *) - boxDouble(V0, cvec) - ) - - | BICLoadOperation { kind=LoadStoreUntaggedUnsigned, address} => - ( - case genMLLoadAddress(address, Word.toInt wordSize) of - (base, MLLoadOffset offset) => - genList(loadScaledPolyWord{dest=X0, base=base, work=X16, offset=offset}, cvec) - | (base, MLLoadReg indexR) => - gen((if is32in64 then loadRegIndexed32 else loadRegIndexed) - {regN=base, regM=indexR, regT=X0, option=ExtUXTX ScaleOrShift}, cvec); - - (* Have to tag the result. *) - gen(logicalShiftLeft{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | BICStoreOperation { kind=LoadStoreMLWord _, address={base, index=NONE, offset=0}, value } => - ( - gencde (base, ToStack, NotEnd, loopAddr); - gencde (value, ToX0, NotEnd, loopAddr); - genPopReg(X1, cvec); (* Address *) - indexToAbsoluteAddress(X1, X1, cvec); - decsp(); - gen((if is32in64 then storeRelease32 else storeRelease){regN=X1, regT=X0}, cvec) - ) - - | BICStoreOperation { kind=LoadStoreMLWord _, address, value } => - ( - genMLAddress(address, Word.toInt wordSize); - gencde (value, ToStack, NotEnd, loopAddr); - genPopReg(X0, cvec); (* Value to store *) - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Shift right to remove the tag. N.B. Indexes into ML memory are - unsigned. Unlike on the X86 we can't remove the tag by providing - a displacement and the only options are to scale by either 1 or 8. *) - gen(logicalShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen((if is32in64 then storeRegIndexed32 else storeRegIndexed) - {regN=X2, regM=X1, regT=X0, option=ExtUXTX ScaleOrShift}, cvec); - (* Don't put the unit result in; it probably isn't needed, *) - decsp(); decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreMLByte _, address, value } => - ( - (* Untag the value and store the byte. *) - genMLAddress(address, 1); - gencde (value, ToStack, NotEnd, loopAddr); - genPopReg(X0, cvec); (* Value to store *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); (* Untag it *) - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Shift right to remove the tag. N.B. Indexes into ML memory are - unsigned. Unlike on the X86 we can't remove the tag by providing - a displacement and the only options are to scale by either 1 or 8. *) - gen(logicalShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(storeRegIndexedByte{regN=X2, regM=X1, regT=X0, option=ExtUXTX NoScale}, cvec); - (* Don't put the unit result in; it probably isn't needed, *) - decsp(); decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreC8, address, value} => - ( - genCAddress(address, 1); - gencde (value, ToX0, NotEnd, loopAddr); (* Value to store *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); (* Untag it *) - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Untag. C indexes are signed. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address as a SysWord.word value. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(loadRegScaled{regT=X2, regN=X2, unitOffset=0}, cvec); (* Actual address *) - gen(storeRegIndexedByte{regN=X2, regM=X1, regT=X0, option=cIndexFactor NoScale}, cvec); - topInX0 := false; - decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreC16, address, value} => - ( - genCAddress(address, 2); - gencde (value, ToX0, NotEnd, loopAddr); (* Value to store *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); (* Untag it *) - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Untag. C indexes are signed. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address as a SysWord.word value. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(loadRegScaled{regT=X2, regN=X2, unitOffset=0}, cvec); (* Actual address *) - gen(storeRegIndexed16{regN=X2, regM=X1, regT=X0, option=cIndexFactor ScaleOrShift}, cvec); - topInX0 := false; - decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreC32, address, value} => - ( - genCAddress(address, 4); - gencde (value, ToX0, NotEnd, loopAddr); (* Value to store *) - (* This is tagged in 64-bit but boxed in 32-bit *) - if is32in64 - then - ( - indexToAbsoluteAddress(X0, X0, cvec); - gen(loadRegScaled32{regT=X0, regN=X0, unitOffset=0}, cvec) - ) - else gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); (* Untag it *) - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Untag. C indexes are signed. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address as a SysWord.word value. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(loadRegScaled{regT=X2, regN=X2, unitOffset=0}, cvec); (* Actual address *) - gen(storeRegIndexed32{regN=X2, regM=X1, regT=X0, option=cIndexFactor ScaleOrShift}, cvec); - topInX0 := false; - decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreC64, address, value} => - ( - genCAddress(address, 8); - gencde (value, ToX0, NotEnd, loopAddr); (* Value to store. This is boxed. *) - indexToAbsoluteAddress(X0, X0, cvec); - gen(loadRegScaled{regT=X0, regN=X0, unitOffset=0}, cvec); - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Untag. C indexes are signed. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address as a SysWord.word value. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(loadRegScaled{regT=X2, regN=X2, unitOffset=0}, cvec); (* Actual address *) - gen(storeRegIndexed{regN=X2, regM=X1, regT=X0, option=cIndexFactor ScaleOrShift}, cvec); - topInX0 := false; - decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreCFloat, address, value} => - ( - genCAddress(address, 4); - gencde (value, ToX0, NotEnd, loopAddr); (* Value to store *) - (* This is a boxed double. It needs to be converted to a float. *) - indexToAbsoluteAddress(X0, X0, cvec); - gen(loadRegScaledDouble{regT=V0, regN=X0, unitOffset=0}, cvec); - gen(convertDoubleToFloat{regN=V0, regD=V0}, cvec); - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Untag. C indexes are signed. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address as a SysWord.word value. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(loadRegScaled{regT=X2, regN=X2, unitOffset=0}, cvec); (* Actual address *) - gen(storeRegIndexedFloat{regN=X2, regM=X1, regT=V0, option=cIndexFactor ScaleOrShift}, cvec); - topInX0 := false; - decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreCDouble, address, value} => - ( - genCAddress(address, 8); - gencde (value, ToX0, NotEnd, loopAddr); (* Value to store *) - (* This is a boxed double. *) - indexToAbsoluteAddress(X0, X0, cvec); - gen(loadRegScaledDouble{regT=V0, regN=X0, unitOffset=0}, cvec); - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Untag. C indexes are signed. *) - gen((if is32in64 then arithmeticShiftRight32 else arithmeticShiftRight) - {regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address as a SysWord.word value. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen(loadRegScaled{regT=X2, regN=X2, unitOffset=0}, cvec); (* Actual address *) - gen(storeRegIndexedDouble{regN=X2, regM=X1, regT=V0, option=cIndexFactor ScaleOrShift}, cvec); - topInX0 := false; - decsp(); decsp() - ) - - | BICStoreOperation { kind=LoadStoreUntaggedUnsigned, address, value} => - ( - (* Almost the same as LoadStoreMLWord except that the value to be stored - must be untagged before it is stored. This is used primarily to set - the length word on a string. *) - genMLAddress(address, Word.toInt wordSize); - gencde (value, ToStack, NotEnd, loopAddr); - genPopReg(X0, cvec); (* Value to store *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - genPopReg(X1, cvec); (* Index: a tagged value. *) - (* Shift right to remove the tag. N.B. Indexes into ML memory are - unsigned. Unlike on the X86 we can't remove the tag by providing - a displacement and the only options are to scale by either 1 or 8. *) - gen(logicalShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - genPopReg(X2, cvec); (* Base address. *) - indexToAbsoluteAddress(X2, X2, cvec); - gen((if is32in64 then storeRegIndexed32 else storeRegIndexed) - {regN=X2, regM=X1, regT=X0, option=ExtUXTX ScaleOrShift}, cvec); - (* Don't put the unit result in; it probably isn't needed, *) - decsp(); decsp(); decsp() - ) - - | BICBlockOperation { kind=BlockOpMove{isByteMove}, sourceLeft, destRight, length } => - let - val exitLabel = createLabel() and loopLabel = createLabel() - (* Add the index, an unsigned, tagged value, to the base to create the - effective address. *) - fun addIndex(baseReg, indexReg) = - if isByteMove - then gen(addShiftedReg{regM=indexReg, regN=baseReg, regD=baseReg, shift=ShiftLSR 0w1}, cvec) - else - ( - gen(logicalShiftRight{regN=indexReg, regD=indexReg, shift=0w1}, cvec); (* Untag *) - gen(addShiftedReg{regM=indexReg, regN=baseReg, regD=baseReg, - shift=ShiftLSL (if is32in64 then 0w2 else 0w3)}, cvec) - ) - val scale = if isByteMove then 1 else if is32in64 then 4 else 8 - in - genMLAddress(sourceLeft, scale); - genMLAddress(destRight, scale); - gencde (length, ToX0, NotEnd, loopAddr); (* Length *) - genPopReg(X2, cvec); (* Dest index - tagged value. *) - genPopReg(X1, cvec); (* Dest base address. *) - indexToAbsoluteAddress(X1, X1, cvec); - (* Add in the index N.B. ML index values are unsigned. *) - addIndex(X1, X2); - (*gen(addShiftedReg{regM=X2, regN=X1, regD=X1, shift=ShiftLSR 0w1}, cvec);*) - genPopReg(X3, cvec); (* Source index *) - genPopReg(X2, cvec); - indexToAbsoluteAddress(X2, X2, cvec); - addIndex(X2, X3); - (*gen(addShiftedReg{regM=X3, regN=X2, regD=X2, shift=ShiftLSR 0w1}, cvec);*) - (* Untag the length *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - (* Test the loop value at the top in case it's already zero. *) - compareRegs(X0, X0, cvec); - gen(setLabel loopLabel, cvec); - gen(compareBranchZero(X0, exitLabel), cvec); - if isByteMove - then - ( - gen(loadRegPostIndexByte{regT=X3, regN=X2, byteOffset=1}, cvec); - gen(storeRegPostIndexByte{regT=X3, regN=X1, byteOffset=1}, cvec) - ) - else if is32in64 - then - ( - gen(loadRegPostIndex32{regT=X3, regN=X2, byteOffset=4}, cvec); - gen(storeRegPostIndex32{regT=X3, regN=X1, byteOffset=4}, cvec) - ) - else - ( - gen(loadRegPostIndex{regT=X3, regN=X2, byteOffset=8}, cvec); - gen(storeRegPostIndex{regT=X3, regN=X1, byteOffset=8}, cvec) - ); - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - (* Back to the start. *) - gen(unconditionalBranch loopLabel, cvec); - gen(setLabel exitLabel, cvec); - topInX0 := false; (* X0 does not contain "unit" *) - decsp(); decsp(); decsp(); decsp() - end - - | BICBlockOperation { kind=BlockOpEqualByte, sourceLeft, destRight, length } => - (* Compare byte vectors for equality - returns a boolean result. *) - let - val equalLabel = createLabel() - in - blockCompareBytes(sourceLeft, destRight, length, equalLabel, true); - gen(setLabel equalLabel, cvec); - (* Set the result condition. *) - setBooleanCondition(X0, CondEqual, cvec) - end - - | BICBlockOperation { kind=BlockOpCompareByte, sourceLeft, destRight, length } => - (* Compare byte vectors for ordering - return tagged -1, 0, +1. *) - let - val equalLabel = createLabel() and resultLabel = createLabel() - in - blockCompareBytes(sourceLeft, destRight, length, equalLabel, false); - (* We drop through if we have found unequal bytes. *) - genList(loadNonAddress(X0, Word64.fromInt(tag 1)), cvec); - (* Set X0 to either 1 or -1 depending on whether it's greater or less. *) - gen(conditionalSetInverted{regD=X0, regTrue=X0, regFalse=XZero, cond=CondUnsignedHigher}, cvec); - gen(unconditionalBranch resultLabel, cvec); - gen(setLabel equalLabel, cvec); - (* Equal case - set it to zero. *) - genList(loadNonAddress(X0, Word64.fromInt(tag 0)), cvec); - gen(setLabel resultLabel, cvec) - end - - | BICArbitrary { oper=ArithAdd, shortCond, arg1, arg2, longCall } => - let - val startLong = createLabel() and resultLabel = createLabel() - in - (* Check tag bits *) - gencde (shortCond, ToX0, NotEnd, loopAddr); - gen(subSImmediate{regN=X0, regD=XZero, immed=taggedWord 0w1, shifted=false}, cvec); - (* Go to the long case if it's not short. *) - gen(conditionalBranch(CondNotEqual, startLong), cvec); - topInX0 := false; - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - genPopReg(X1, cvec); - decsp(); - (* Add and set the flag bits *) - gen((if is32in64 then addSShiftedReg32 else addSShiftedReg) - {regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec); - (* If there's no overflow skip to the end otherwise drop into - the call to the RTS. *) - gen(conditionalBranch(CondNoOverflow, resultLabel), cvec); - gen(setLabel startLong, cvec); - topInX0 := false; - gencde (longCall, ToX0, tailKind, loopAddr); - gen(setLabel resultLabel, cvec) - end - - | BICArbitrary { oper=ArithSub, shortCond, arg1, arg2, longCall } => - let - val startLong = createLabel() and resultLabel = createLabel() - in - (* Check tag bits *) - gencde (shortCond, ToX0, NotEnd, loopAddr); - gen(subSImmediate{regN=X0, regD=XZero, immed=taggedWord 0w1, shifted=false}, cvec); - (* Go to the long case if it's not short. *) - gen(conditionalBranch(CondNotEqual, startLong), cvec); - topInX0 := false; - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - genPopReg(X1, cvec); - decsp(); - (* Subtract and set the flag bits *) - gen((if is32in64 then subSShiftedReg32 else subSShiftedReg) - {regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec); - gen(conditionalBranch(CondNoOverflow, resultLabel), cvec); - gen(setLabel startLong, cvec); - topInX0 := false; - gencde (longCall, ToX0, tailKind, loopAddr); - gen(setLabel resultLabel, cvec) - end - - | BICArbitrary { oper=ArithMult, longCall, ... } => - (* Multiply - Just implement as a call to the long-precision case. *) - ( - gencde (longCall, whereto, tailKind, loopAddr) - ) - - | BICArbitrary _ => - raise InternalError "BICArbitrary: unimplemented operation" - - in (* body of gencde *) - - (* This ensures that there is precisely one item on the stack if - whereto = ToStack and no items if whereto = NoResult. *) - case whereto of - ToStack => - let - val () = ensureX0() - val newsp = oldsp + 1; - val adjustment = !realstackptr - newsp - - val () = - if adjustment = 0 - then () - else if adjustment < ~1 - then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment) - (* Hack for declarations that should push values, but don't *) - else if adjustment = ~1 - then - ( - genList(loadNonAddress(X0, Word64.fromInt(tag 0)), cvec); - genPushReg(X0, cvec) - ) - else resetStack (adjustment, true, cvec) - in - realstackptr := newsp - end - - | NoResult => - let - val () = topInX0 := false - val adjustment = !realstackptr - oldsp - - val () = - if adjustment = 0 - then () - else if adjustment < 0 - then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment) - else resetStack (adjustment, false, cvec) - in - realstackptr := oldsp - end - - | ToX0 => - let - (* If we have not pushed anything we have to push a unit result. *) - val () = - if !topInX0 then () - else if !realstackptr = oldsp - then genList(loadNonAddress(X0, Word64.fromInt(tag 0)), cvec) - else - ( - genPopReg(X0, cvec); - decsp() - ) - val () = topInX0 := true - - val adjustment = !realstackptr - oldsp - - val () = - if adjustment = 0 - then () - else if adjustment < 0 - then raise InternalError ("gencde: bad adjustment " ^ Int.toString adjustment) - else resetStack (adjustment, false, cvec) - in - realstackptr := oldsp - end - end (* gencde *) - - - and genUnary(NotBoolean, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* Flip true to false and the reverse. *) - gen(bitwiseXorImmediate32{bits=0w2, regN=X0, regD=X0}, cvec) - ) - - | genUnary(IsTaggedValue, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - gen(bitwiseAndSImmediate32{bits=0w1, regN=X0, regD=XZero}, cvec); - setBooleanCondition(X0, CondNotEqual (*Non-zero*), cvec) - ) - - | genUnary(MemoryCellLength, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - (* Load the length word. *) - if is32in64 - then - ( - gen(loadRegUnscaled32{regT=X0, regN=X0, byteOffset= ~4}, cvec); - (* Extract the length, excluding the flag bytes and shift by one bit. *) - gen(unsignedBitfieldInsertinZeros32{lsb=0w1, width=0w24, regN=X0, regD=X0}, cvec) - ) - else - ( - gen(loadRegUnscaled{regT=X0, regN=X0, byteOffset= ~8}, cvec); - (* Extract the length, excluding the flag bytes and shift by one bit. *) - gen(unsignedBitfieldInsertinZeros{lsb=0w1, width=0w56, regN=X0, regD=X0}, cvec) - ); - (* Set the tag bit. *) - gen(bitwiseOrImmediate{bits=0w1, regN=X0, regD=X0}, cvec) - ) - - | genUnary(MemoryCellFlags, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - (* Load the flags byte. *) - gen(loadRegUnscaledByte{regT=X0, regN=X0, byteOffset=flagsByteOffset}, cvec); - (* Tag the result. *) - gen(logicalShiftLeft{shift=0w1, regN=X0, regD=X0}, cvec); - gen(bitwiseOrImmediate{bits=0w1, regN=X0, regD=X0}, cvec) - ) - - | genUnary(ClearMutableFlag, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - gen(loadRegUnscaledByte{regT=X1, regN=X0, byteOffset=flagsByteOffset}, cvec); - gen(bitwiseAndImmediate32{bits=Word64.xorb(0wxffffffff, 0wx40), regN=X1, regD=X1}, cvec); - gen(storeRegUnscaledByte{regT=X1, regN=X0, byteOffset=flagsByteOffset}, cvec) - ) - - | genUnary(LongWordToTagged, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - (* Load the value and tag it. *) - gen(loadRegScaled{regT=X0, regN=X0, unitOffset=0}, cvec); - (* Tag the result. *) - gen(logicalShiftLeft{shift=0w1, regN=X0, regD=X0}, cvec); - gen((if is32in64 then bitwiseOrImmediate32 else bitwiseOrImmediate) - {bits=0w1, regN=X0, regD=X0}, cvec) - ) - - | genUnary(SignedToLongWord, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* We want to generate a 64-bit signed value. In 32-in-64 we use a - 64-bit SBFX to do the right shift and set the sign bits. *) - if is32in64 - then gen(signedBitfieldExtract{lsb=0w1, width=0w31, regN=X0, regD=X1}, cvec) - else gen(arithmeticShiftRight{shift=0w1, regN=X0, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genUnary(UnsignedToLongWord, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - gen(logicalShiftRight{shift=0w1, regN=X0, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genUnary(RealAbs PrecDouble, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - unboxDouble(X0, X0, V0, cvec); - gen(loadRegScaledDouble{regT=V0, regN=X0, unitOffset=0}, cvec); - gen(absDouble{regN=V0, regD=V0}, cvec); - boxDouble(V0, cvec) - ) - - | genUnary(RealNeg PrecDouble, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - unboxDouble(X0, X0, V0, cvec); - gen(loadRegScaledDouble{regT=V0, regN=X0, unitOffset=0}, cvec); - gen(negDouble{regN=V0, regD=V0}, cvec); - boxDouble(V0, cvec) - ) - - | genUnary(RealFixedInt PrecDouble, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* Shift to remove the tag. *) - gen(arithmeticShiftRight{shift=0w1, regN=X0, regD=X0}, cvec); - gen(convertIntToDouble{regN=X0, regD=V0}, cvec); - boxDouble(V0, cvec) - ) - - | genUnary(RealAbs PrecSingle, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - unboxOrUntagSingle(X0, X0, V0, cvec); - gen(absFloat{regN=V0, regD=V0}, cvec); - boxOrTagFloat(V0, cvec) - ) - - | genUnary(RealNeg PrecSingle, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - unboxOrUntagSingle(X0, X0, V0, cvec); - gen(negFloat{regN=V0, regD=V0}, cvec); - boxOrTagFloat(V0, cvec) - ) - - | genUnary(RealFixedInt PrecSingle, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* Shift to remove the tag. *) - gen(arithmeticShiftRight{shift=0w1, regN=X0, regD=X0}, cvec); - gen((if is32in64 then convertInt32ToFloat else convertIntToFloat){regN=X0, regD=V0}, cvec); - boxOrTagFloat(V0, cvec) - ) - - | genUnary(FloatToDouble, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - unboxOrUntagSingle(X0, X0, V0, cvec); - gen(convertFloatToDouble{regN=V0, regD=V0}, cvec); - boxDouble(V0, cvec) - ) - - | genUnary(DoubleToFloat, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* Convert double to float using current rounding mode. *) - unboxDouble(X0, X0, V0, cvec); - gen(loadRegScaledDouble{regT=V0, regN=X0, unitOffset=0}, cvec); - gen(convertDoubleToFloat{regN=V0, regD=V0}, cvec); - boxOrTagFloat(V0, cvec) - ) - - | genUnary(RealToInt (PrecDouble, rnding), arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* We could get an overflow in either the conversion to integer - or in the conversion to a tagged value. Fortunately if the - conversion detects an overflow it sets the result to a - value that will cause an overflow in the addition. *) - unboxDouble(X0, X0, V0, cvec); - gen(loadRegScaledDouble{regT=V0, regN=X0, unitOffset=0}, cvec); - if is32in64 - then - ( - gen(convertDoubleToInt32 rnding {regN=V0, regD=X0}, cvec); - gen(addSShiftedReg32{regM=X0, regN=X0, regD=X0, shift=ShiftNone}, cvec) - ) - else - ( - gen(convertDoubleToInt rnding {regN=V0, regD=X0}, cvec); - gen(addSShiftedReg{regM=X0, regN=X0, regD=X0, shift=ShiftNone}, cvec) - ); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec); - checkOverflow cvec - ) - - | genUnary(RealToInt (PrecSingle, rnding), arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - unboxOrUntagSingle(X0, X0, V0, cvec); - if is32in64 - then - ( - gen(convertFloatToInt32 rnding {regN=V0, regD=X0}, cvec); - gen(addSShiftedReg32{regM=X0, regN=X0, regD=X0, shift=ShiftNone}, cvec) - ) - else - ( - gen(convertFloatToInt rnding {regN=V0, regD=X0}, cvec); - gen(addSShiftedReg{regM=X0, regN=X0, regD=X0, shift=ShiftNone}, cvec) - ); - gen(addSShiftedReg{regM=X0, regN=X0, regD=X0, shift=ShiftNone}, cvec); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec); - checkOverflow cvec - ) - - | genUnary(TouchAddress, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - topInX0 := false (* Discard this *) - ) - - | genUnary(AllocCStack, arg1, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - (* Allocate space on the stack. The higher levels have already aligned - the size to a multiple of 16. *) - (* Remove the tag and then use add-extended. This can use SP unlike - the shifted case. *) - gen(logicalShiftRight{shift=0w1, regN=X0, regD=X0}, cvec); - gen(subExtendedReg{regM=X0, regN=XSP, regD=XSP, extend=ExtUXTX 0w0}, cvec); - (* The result is a large-word. We can't box SP directly. - We have to use add here to get the SP into X1 instead of the usual move. *) - gen(addImmediate{regN=XSP, regD=X1, immed=0w0, shifted=false}, cvec); - boxLargeWord(X1, cvec) - ) - - | genUnary(LockMutex, arg1, loopAddr) = - (* The earliest versions of the Arm8 do not have the LDADD instruction which - will do this directly. To preserve compatibility we use LDAXR/STLXR - which require a loop. *) - let - val loopLabel = createLabel() - (* For the moment don't try to use a spin lock. *) - in - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - gen(setLabel loopLabel, cvec); - (* Get the original value into X1. *) - gen(loadAcquireExclusiveRegister{regN=X0, regT=X1}, cvec); - (* Add and put the result into X2 *) - gen(addImmediate{regN=X1, regD=X2, immed=0w1, shifted=false}, cvec); - (* Store the result of the addition. W4 will be zero if this succeeded. *) - gen(storeReleaseExclusiveRegister{regS=X4, regT=X2, regN=X0}, cvec); - gen(compareBranchNonZero32(X4, loopLabel), cvec); - (* Put in the memory barrier. *) - gen(dmbIsh, cvec); - (* The result is true if the old value was zero. *) - gen(subSImmediate{regN=X1, regD=XZero, immed=0w0, shifted=false}, cvec); - setBooleanCondition(X0, CondEqual, cvec) - end - - | genUnary(TryLockMutex, arg1, loopAddr) = - (* Could use LDUMAXAL to set it the greater of the current value or 1. *) - let - val loopLabel = createLabel() and exitLabel = createLabel() - in - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - gen(setLabel loopLabel, cvec); - (* Get the original value into X1. *) - gen(loadAcquireExclusiveRegister{regN=X0, regT=X1}, cvec); - (* If it is non-zero the lock is already taken. *) - gen(compareBranchNonZero(X1, exitLabel), cvec); - genList(loadNonAddress(X2, 0w1), cvec); - (* Store zero into the memory. W4 will be zero if this succeeded. *) - gen(storeReleaseExclusiveRegister{regS=X4, regT=X2, regN=X0}, cvec); - gen(compareBranchNonZero32(X4, loopLabel), cvec); - gen(setLabel exitLabel, cvec); - gen(dmbIsh, cvec); - (* The result is true if the old value was zero. *) - gen(subSImmediate{regN=X1, regD=XZero, immed=0w0, shifted=false}, cvec); - setBooleanCondition(X0, CondEqual, cvec) - end - - | genUnary(UnlockMutex, arg1, loopAddr) = - (* Could use SWAPAL *) - let - val loopLabel = createLabel() - in - gencde (arg1, ToX0, NotEnd, loopAddr); - indexToAbsoluteAddress(X0, X0, cvec); - gen(setLabel loopLabel, cvec); - (* Get the original value into X1. *) - gen(loadAcquireExclusiveRegister{regN=X0, regT=X1}, cvec); - (* Store zero into the memory. W4 will be zero if this succeeded. *) - gen(storeReleaseExclusiveRegister{regS=X4, regT=XZero, regN=X0}, cvec); - gen(compareBranchNonZero32(X4, loopLabel), cvec); - (* Put in the memory barrier. *) - gen(dmbIsh, cvec); - (* The result is true if the old value was 1. *) - gen(subSImmediate{regN=X1, regD=XZero, immed=0w1, shifted=false}, cvec); - setBooleanCondition(X0, CondEqual, cvec) - end - - and genBinary(WordComparison{test, isSigned}, arg1, arg2, loopAddr) = - let - val cond = - case (test, isSigned) of - (TestEqual, _) => CondEqual - | (TestLess, true) => CondSignedLess - | (TestLessEqual, true) => CondSignedLessEq - | (TestGreater, true) => CondSignedGreater - | (TestGreaterEqual, true) => CondSignedGreaterEq - | (TestLess, false) => CondCarryClear - | (TestLessEqual, false) => CondUnsignedLowOrEq - | (TestGreater, false) => CondUnsignedHigher - | (TestGreaterEqual, false) => CondCarrySet - | (TestUnordered, _) => raise InternalError "WordComparison: TestUnordered" - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); (* First argument. *) - comparePolyRegs(X1, X0, cvec); - setBooleanCondition(X0, cond, cvec) - end - - | genBinary(PointerEq, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); (* First argument. *) - comparePolyRegs(X1, X0, cvec); - setBooleanCondition(X0, CondEqual, cvec) - ) - - | genBinary(FixedPrecisionArith ArithAdd, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* Subtract the tag bit. *) - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - genPopReg(X1, cvec); - (* Add and set the flag bits *) - gen((if is32in64 then addSShiftedReg32 else addSShiftedReg) - {regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec); - checkOverflow cvec - ) - - | genBinary(FixedPrecisionArith ArithSub, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* Subtract the tag bit. *) - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - genPopReg(X1, cvec); - (* Subtract and set the flag bits *) - gen((if is32in64 then subSShiftedReg32 else subSShiftedReg) - {regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec); - checkOverflow cvec - ) - - | genBinary(FixedPrecisionArith ArithMult, arg1, arg2, loopAddr) = - let - (* There's no simple way of detecting overflow. We have to compute the - high-order word and then check that it is either all zeros with - the sign bit zero or all ones with the sign bit one. *) - val noOverflow = createLabel() - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* Compute the result in the same way as for Word.* apart from the - arithmetic shift. *) - genPopReg(X1, cvec); - - if is32in64 - then - ( - (* Shift to remove the tags on one argument. *) - gen(arithmeticShiftRight32{regN=X0, regD=X2, shift=0w1}, cvec); - (* Remove the tag on the other. *) - gen(bitwiseAndImmediate32{regN=X1, regD=X1, bits=tagBitMask32}, cvec); - (* Multiply two 32-bit quantities. *) - gen(signedMultiplyAndAddLong{regM=X1, regN=X2, regA=XZero, regD=X0}, cvec); - (* Get the top word which should be either all ones or all zeros. *) - gen(arithmeticShiftRight{shift=0w32, regN=X0, regD=X2}, cvec); - (* The result is in X0. Use a 32-bit operation to clear the top word. *) - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec); - (* Compare with the sign bit of the result. *) - gen(subSShiftedReg32{regD=XZero, regN=X2, regM=X0, shift=ShiftASR 0w31}, cvec) - ) - else - ( - (* Shift to remove the tags on one argument. *) - gen(arithmeticShiftRight{regN=X0, regD=X2, shift=0w1}, cvec); - (* Remove the tag on the other. *) - gen(bitwiseAndImmediate{regN=X1, regD=X1, bits=tagBitMask}, cvec); - gen(multiplyAndAdd{regM=X1, regN=X2, regA=XZero, regD=X0}, cvec); - (* Put back the tag. *) - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec); - (* Compute the high order part into X2 *) - gen(signedMultiplyHigh{regM=X1, regN=X2, regD=X2}, cvec); - (* Compare with the sign bit of the result. *) - gen(subSShiftedReg{regD=XZero, regN=X2, regM=X0, shift=ShiftASR 0w63}, cvec) - ); - gen(conditionalBranch(CondEqual, noOverflow), cvec); - gen(loadAddressConstant(X0, toMachineWord Overflow), cvec); - gen(loadRegScaled{regT=X_MLStackPtr, regN=X_MLAssemblyInt, unitOffset=exceptionHandlerOffset}, cvec); - gen(loadRegScaled{regT=X1, regN=X_MLStackPtr, unitOffset=0}, cvec); - gen(branchRegister X1, cvec); - gen(setLabel noOverflow, cvec) - end - - | genBinary(FixedPrecisionArith ArithQuot, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* The word version avoids an extra shift. Don't do that here at least - for the moment. Division by zero and overflow are checked for at - the higher level. *) - genPopReg(X1, cvec); - if is32in64 - then - ( - (* Shift to remove the tags on the arguments *) - gen(arithmeticShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(arithmeticShiftRight32{regN=X1, regD=X1, shift=0w1}, cvec); - gen(signedDivide32{regM=X0, regN=X1, regD=X0}, cvec); - (* Restore the tag. *) - gen(logicalShiftLeft32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec) - ) - else - ( - (* Shift to remove the tags on the arguments *) - gen(arithmeticShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - gen(arithmeticShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - gen(signedDivide{regM=X0, regN=X1, regD=X0}, cvec); - (* Restore the tag. *) - gen(logicalShiftLeft{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec) - ) - ) - - | genBinary(FixedPrecisionArith ArithRem, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* For the moment we remove the tags and then retag afterwards. The word - version avoids this but at least for the moment we do it the longer way. *) - (* There's no direct way to get the remainder - have to use divide and multiply. *) - genPopReg(X1, cvec); - (* Shift to remove the tags on the arguments *) - if is32in64 - then - ( - gen(arithmeticShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(arithmeticShiftRight32{regN=X1, regD=X1, shift=0w1}, cvec); - gen(signedDivide32{regM=X0, regN=X1, regD=X2}, cvec); - (* X0 = X1 - (X2/X0)*X0 *) - gen(multiplyAndSub32{regM=X2, regN=X0, regA=X1, regD=X0}, cvec); - (* Restore the tag. *) - gen(logicalShiftLeft32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec) - ) - else - ( - gen(arithmeticShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - gen(arithmeticShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - gen(signedDivide{regM=X0, regN=X1, regD=X2}, cvec); - (* X0 = X1 - (X2/X0)*X0 *) - gen(multiplyAndSub{regM=X2, regN=X0, regA=X1, regD=X0}, cvec); - (* Restore the tag. *) - gen(logicalShiftLeft{regN=X0, regD=X0, shift=0w1}, cvec); - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec) - ) - ) - - | genBinary(FixedPrecisionArith ArithDiv, _, _, _) = - raise InternalError "unimplemented operation: FixedPrecisionArith ArithDiv" - - | genBinary(FixedPrecisionArith ArithMod, _, _, _) = - raise InternalError "unimplemented operation: FixedPrecisionArith ArithMod" - - | genBinary(WordArith ArithAdd, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* Subtract the tag bit. *) - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - genPopReg(X1, cvec); - gen((if is32in64 then addShiftedReg32 else addShiftedReg){regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec) - ) - - | genBinary(WordArith ArithSub, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* Subtract the tag bit. *) - gen(subImmediate{regN=X0, regD=X0, immed=0w1, shifted=false}, cvec); - genPopReg(X1, cvec); - gen((if is32in64 then subShiftedReg32 else subShiftedReg){regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec) - ) - - | genBinary(WordArith ArithMult, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Shift to remove the tags on one argument. *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - (* Remove the tag on the other. *) - gen(bitwiseAndImmediate{regN=X1, regD=X1, bits=tagBitMask}, cvec); - gen((if is32in64 then multiplyAndAdd32 else multiplyAndAdd) - {regM=X1, regN=X0, regA=XZero, regD=X0}, cvec); - (* Put back the tag. *) - gen((if is32in64 then bitwiseOrImmediate32 else bitwiseOrImmediate) - {regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | genBinary(WordArith ArithDiv, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Shift to remove the tag on the divisor *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - (* Untag but don't shift the dividend. *) - gen(bitwiseAndImmediate{regN=X1, regD=X1, bits=tagBitMask}, cvec); - gen((if is32in64 then unsignedDivide32 else unsignedDivide){regM=X0, regN=X1, regD=X0}, cvec); - (* Restore the tag: Note: it may already be set depending on the result of - the division. *) - gen((if is32in64 then bitwiseOrImmediate32 else bitwiseOrImmediate) - {regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | genBinary(WordArith ArithMod, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* There's no direct way to get the remainder - have to use divide and multiply. *) - genPopReg(X1, cvec); - (* Shift to remove the tag on the divisor *) - gen(logicalShiftRight{regN=X0, regD=X0, shift=0w1}, cvec); - (* Untag but don't shift the dividend. *) - gen(bitwiseAndImmediate{regN=X1, regD=X2, bits=tagBitMask}, cvec); - gen((if is32in64 then unsignedDivide32 else unsignedDivide){regM=X0, regN=X2, regD=X2}, cvec); - (* Clear the bottom bit before the multiplication. *) - gen(bitwiseAndImmediate{regN=X2, regD=X2, bits=tagBitMask}, cvec); - (* X0 = X1 - (X2/X0)*X0 *) - gen((if is32in64 then multiplyAndSub32 else multiplyAndSub){regM=X2, regN=X0, regA=X1, regD=X0}, cvec) - (* Because we're subtracting from the original, tagged, dividend - the result is tagged. *) - ) - - | genBinary(WordArith _, _, _, _) = raise InternalError "WordArith - unimplemented instruction" - - | genBinary(WordLogical LogicalAnd, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Since they're both tagged the tag bit is preserved. *) - gen(andShiftedReg{regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec) - ) - - | genBinary(WordLogical LogicalOr, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Since they're both tagged the tag bit is preserved. *) - gen(orrShiftedReg{regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec) - ) - - | genBinary(WordLogical LogicalXor, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Have to restore the tag bit because that will be cleared. *) - gen(eorShiftedReg{regN=X1, regM=X0, regD=X0, shift=ShiftNone}, cvec); - gen((if is32in64 then bitwiseOrImmediate32 else bitwiseOrImmediate) - {regN=X0, regD=X0, bits=0w1}, cvec) - ) - - (* Shifts: ARM64 shifts are taken modulo the word length but that's - dealt with at a higher level. *) - | genBinary(WordShift ShiftLeft, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Remove the tag from value we're shifting. *) - gen(bitwiseAndImmediate{regN=X1, regD=X1, bits=tagBitMask}, cvec); - (* Untag the shift amount. Can use 32-bit op here. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen((if is32in64 then logicalShiftLeftVariable32 else logicalShiftLeftVariable) - {regM=X0, regN=X1, regD=X0}, cvec); - (* Put back the tag. *) - gen((if is32in64 then bitwiseOrImmediate32 else bitwiseOrImmediate) - {regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | genBinary(WordShift ShiftRightLogical, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Don't need to remove the tag. *) - (* Untag the shift amount. Can use 32-bit op here. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(logicalShiftRightVariable{regM=X0, regN=X1, regD=X0}, cvec); - (* Put back the tag. *) - gen((if is32in64 then bitwiseOrImmediate32 else bitwiseOrImmediate) - {regN=X0, regD=X0, bits=0w1}, cvec) - ) - - | genBinary(WordShift ShiftRightArithmetic, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - (* Don't need to remove the tag. *) - (* Untag the shift amount. Can use 32-bit op here. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - if is32in64 - then - ( - (* Must use a 32 bit shift here. *) - gen(arithmeticShiftRightVariable32{regM=X0, regN=X1, regD=X0}, cvec); - (* Put back the tag. *) - gen(bitwiseOrImmediate32{regN=X0, regD=X0, bits=0w1}, cvec) - ) - else - ( - gen(arithmeticShiftRightVariable{regM=X0, regN=X1, regD=X0}, cvec); - (* Put back the tag. *) - gen(bitwiseOrImmediate{regN=X0, regD=X0, bits=0w1}, cvec) - ) - ) - - | genBinary(AllocateByteMemory, arg1, arg2, loopAddr) = - (* Allocate memory for byte data. Unlike for word data it is not necessary to - initialise it before any further allocation provided it has the mutable bit - set. *) - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* Load and untag the size and flags. The size is the number of words even - though this is byte data. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - genPopReg(X1, cvec); - gen(logicalShiftRight{regN=X1, regD=X1, shift=0w1}, cvec); - allocateVariableSize({sizeReg=X1, flagsReg=X0, resultReg=X2}, cvec); - gen(moveRegToReg{sReg=X2, dReg=X0}, cvec); - absoluteAddressToIndex(X0, cvec) - ) - - | genBinary(LargeWordComparison test, arg1, arg2, loopAddr) = - let - val cond = - case test of - TestEqual => CondEqual - | TestLess => CondCarryClear - | TestLessEqual => CondUnsignedLowOrEq - | TestGreater => CondUnsignedHigher - | TestGreaterEqual => CondCarrySet - | TestUnordered => raise InternalError "LargeWordComparison: TestUnordered" - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - (* The values are boxed so have to be loaded first. *) - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - compareRegs(X1, X0, cvec); - setBooleanCondition(X0, cond, cvec) - end - - | genBinary(LargeWordArith ArithAdd, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(addShiftedReg{regN=X1, regM=X0, regD=X1, shift=ShiftNone}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordArith ArithSub, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(subShiftedReg{regN=X1, regM=X0, regD=X1, shift=ShiftNone}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordArith ArithMult, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(multiplyAndAdd{regM=X1, regN=X0, regA=XZero, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordArith ArithDiv, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(unsignedDivide{regM=X0, regN=X1, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordArith ArithMod, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(unsignedDivide{regM=X0, regN=X1, regD=X2}, cvec); - gen(multiplyAndSub{regM=X2, regN=X0, regA=X1, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordArith _, _, _, _) = - raise InternalError "LargeWordArith - unimplemented instruction" - - | genBinary(LargeWordLogical LogicalAnd, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(andShiftedReg{regN=X1, regM=X0, regD=X1, shift=ShiftNone}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordLogical LogicalOr, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(orrShiftedReg{regN=X1, regM=X0, regD=X1, shift=ShiftNone}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordLogical LogicalXor, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X0, X0, cvec); - unboxLargeWord(X1, X1, cvec); - gen(eorShiftedReg{regN=X1, regM=X0, regD=X1, shift=ShiftNone}, cvec); - boxLargeWord(X1, cvec) - ) - - (* The shift is always a Word.word value i.e. tagged. There is a check at the higher level - that the shift does not exceed 32/64 bits. *) - | genBinary(LargeWordShift ShiftLeft, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X1, X1, cvec); - (* Untag the shift amount. Can use 32-bit op here. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(logicalShiftLeftVariable{regM=X0, regN=X1, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordShift ShiftRightLogical, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X1, X1, cvec); - (* Untag the shift amount. Can use 32-bit op here. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(logicalShiftRightVariable{regM=X0, regN=X1, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - | genBinary(LargeWordShift ShiftRightArithmetic, arg1, arg2, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxLargeWord(X1, X1, cvec); - (* Untag the shift amount. Can use 32-bit op here. *) - gen(logicalShiftRight32{regN=X0, regD=X0, shift=0w1}, cvec); - gen(arithmeticShiftRightVariable{regM=X0, regN=X1, regD=X1}, cvec); - boxLargeWord(X1, cvec) - ) - - (* Floating point comparisons. - The fcmp instruction differs from integer comparison. If either - argument is a NaN the overflow bit is set and the other bits are - cleared. That means that in order to get a true result only - if the values are not NaNs we have to test that at least one of - C, N, or Z are set. We use unsigned tests for < and <= - and signed tests for > and >=. *) - | genBinary(RealComparison (test, PrecDouble), arg1, arg2, loopAddr) = - let - val cond = - case test of - TestEqual => CondEqual - | TestLess => CondCarryClear - | TestLessEqual => CondUnsignedLowOrEq - | TestGreater => CondSignedGreater - | TestGreaterEqual => CondSignedGreaterEq - | TestUnordered => CondOverflow - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxDouble(X0, X0, V0, cvec); - unboxDouble(X1, X1, V1, cvec); - gen(compareDouble{regM=V0, regN=V1}, cvec); - setBooleanCondition(X0, cond, cvec) - end - - | genBinary(RealComparison (test, PrecSingle), arg1, arg2, loopAddr) = - let - val cond = - case test of - TestEqual => CondEqual - | TestLess => CondCarryClear - | TestLessEqual => CondUnsignedLowOrEq - | TestGreater => CondSignedGreater - | TestGreaterEqual => CondSignedGreaterEq - | TestUnordered => CondOverflow - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxOrUntagSingle(X0, X0, V0, cvec); - unboxOrUntagSingle(X1, X1, V1, cvec); - gen(compareFloat{regM=V0, regN=V1}, cvec); - setBooleanCondition(X0, cond, cvec) - end - - | genBinary(RealArith (oper, PrecDouble), arg1, arg2, loopAddr) = - let - val operation = - case oper of - ArithAdd => addDouble - | ArithSub => subtractDouble - | ArithMult => multiplyDouble - | ArithDiv => divideDouble - | _ => raise InternalError "RealArith - unimplemented instruction" - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxDouble(X0, X0, V0, cvec); - unboxDouble(X1, X1, V1, cvec); - gen(operation{regM=V0, regN=V1, regD=V0}, cvec); - boxDouble(V0, cvec) - end - - | genBinary(RealArith (oper, PrecSingle), arg1, arg2, loopAddr) = - let - val operation = - case oper of - ArithAdd => addFloat - | ArithSub => subtractFloat - | ArithMult => multiplyFloat - | ArithDiv => divideFloat - | _ => raise InternalError "RealArith - unimplemented instruction" - in - (* 32-bit floats are represented as the value in the top 32-bits of - a general register with the low-order word containing all zeros - except the bottom bit which is one. *) - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); - unboxOrUntagSingle(X0, X0, V0, cvec); - unboxOrUntagSingle(X1, X1, V1, cvec); - gen(operation{regM=V0, regN=V1, regD=V0}, cvec); - boxOrTagFloat(V0, cvec) - end - - | genBinary(FreeCStack, arg1, arg2, loopAddr) = - (* Free space on the C stack. This is a binary operation that takes the base address - and the size. The base address isn't used in this version. *) - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - genPopReg(X1, cvec); (* Pop and discard the address *) - (* Can't use the shifted addition which would remove the tag as part of the add. *) - gen(logicalShiftRight{shift=0w1, regN=X0, regD=X0}, cvec); - gen(addExtendedReg{regM=X0, regN=XSP, regD=XSP, extend=ExtUXTX 0w0}, cvec) - ) - - (* doNext is only used for mutually recursive functions where a - function may not be able to fill in its closure if it does not have - all the remaining declarations. *) - (* TODO: This always creates the closure on the heap even when makeClosure is false. *) - and genProc ({ closure=[], localCount, body, argTypes, name, ...}: bicLambdaForm, mutualDecs, doNext: unit -> unit) : unit = - let - (* Create a one word item for the closure. This is returned for recursive references - and filled in with the address of the code when we've finished. *) - val closure = makeConstantClosure() - (* Code-gen function. No non-local references. *) - val () = codegen (body, name, closure, List.length argTypes, localCount, parameters, false) - val () = gen(loadAddressConstant(X0, closureAsAddress closure), cvec) - val () = genPushReg(X0, cvec) - val () = incsp(); - in - if mutualDecs then doNext () else () - end - - | genProc ({ localCount, body, name, argTypes, closure, ...}, mutualDecs, doNext) = - let (* Full closure required. *) - val resClosure = makeConstantClosure() - (* Code-gen function. *) - val () = codegen (body, name, resClosure, List.length argTypes, localCount, parameters, true) - val closureVars = List.length closure (* Size excluding the code address *) - (* The first native word is the code address. *) - val firstEntry = Word.toInt(nativeWordSize div wordSize) - in - if mutualDecs - then - let (* Have to make the closure now and fill it in later. *) - val () = genAllocateFixedSize(closureVars+firstEntry, Word8.orb(F_mutable, F_closure), X0, X1, cvec) - val () = - if is32in64 - then (* Have to get the code address at run-time. *) - ( - gen(loadAddressConstant(X1, toMachineWord resClosure), cvec); - indexToAbsoluteAddress(X1, X1, cvec); - gen(loadRegScaled{regT=X1, regN=X1, unitOffset=0}, cvec) - ) - else gen(loadAddressConstant(X1, codeAddressFromClosure resClosure), cvec) - val () = gen(storeRegScaled{regT=X1, regN=X0, unitOffset=0}, cvec) - val () = absoluteAddressToIndex(X0, cvec) - val () = genPushReg(X0, cvec) - val () = incsp () - - val entryAddr : int = !realstackptr - (* Set the address of this entry in the declaration table and - then process any other mutual-recursive functions. *) - val () = doNext () - - (* Reload the address of the vector - If we have processed other - closures the closure will no longer be on the top of the stack. *) - val () = genList(loadScaledWord{dest=X1, base=X_MLStackPtr, work=X16, offset= !realstackptr - entryAddr}, cvec) - val () = indexToAbsoluteAddress(X1, X1, cvec) - - (* Load items for the closure. *) - fun loadItems ([], _) = () - | loadItems (v :: vs, addr : int) = - ( - (* Generate an item and move it into the closure *) - gencde (BICExtract v, ToX0, NotEnd, NONE); - (* The closure "address" excludes the code address. *) - genList(storeScaledPolyWord{store=X0, base=X1, work=X16, offset=addr+firstEntry}, cvec); - topInX0 := false; - loadItems (vs, addr + 1) - ) - - val () = loadItems (closure, 0) - - (* Lock it by setting the top byte to the zero or the closure bit. *) - val () = - if is32in64 - then - ( - genList(loadNonAddress(X16, Word64.fromLargeWord(Word8.toLargeWord F_closure)), cvec); - gen(storeRegUnscaledByte{regT=X16, regN=X1, byteOffset=flagsByteOffset}, cvec) - ) - else gen(storeRegUnscaledByte{regT=XZero, regN=X1, byteOffset=flagsByteOffset}, cvec) - in - () (* Don't need to do anything now. *) - end - - else - let - val () = List.app (fn pt => gencde (BICExtract pt, ToStack, NotEnd, NONE)) closure - in - genAllocateFixedSize(closureVars+firstEntry, if is32in64 then F_closure else 0w0, X0, X1, cvec); - List.foldl(fn (_, w) => - ( - genPopReg(X1, cvec); - genList(storeScaledPolyWord{store=X1, base=X0, work=X16, offset=w-1}, cvec); - w-1 - ) - ) (closureVars+firstEntry) closure; - if is32in64 - then (* Have to get the code address at run-time. *) - ( - gen(loadAddressConstant(X1, toMachineWord resClosure), cvec); - indexToAbsoluteAddress(X1, X1, cvec); - gen(loadRegScaled{regT=X1, regN=X1, unitOffset=0}, cvec) - ) - else gen(loadAddressConstant(X1, codeAddressFromClosure resClosure), cvec); - gen(storeRegScaled{regT=X1, regN=X0, unitOffset=0}, cvec); - absoluteAddressToIndex(X0, cvec); - genPushReg(X0, cvec); - realstackptr := !realstackptr - closureVars + 1 (* Popped the closure vars and pushed the address. *) - end - end - - and genCond (testCode, thenCode, elseCode, whereto, tailKind, loopAddr) = - let - val toElse = createLabel() and exitJump = createLabel() - val () = genTest(testCode, false, toElse, loopAddr) - val () = gencde (thenCode, whereto, tailKind, loopAddr) - (* Get rid of the result from the stack. If there is a result then the - ``else-part'' will push it. *) - val () = case whereto of ToStack => decsp () | NoResult => () | ToX0 => () - val () = topInX0 := false - - val () = gen(unconditionalBranch exitJump, cvec) - - (* start of "else part" *) - val () = gen(setLabel toElse, cvec) - val () = gencde (elseCode, whereto, tailKind, loopAddr) - val () = gen(setLabel exitJump, cvec) - in - () - end (* genCond *) - - (* andalso and orelse are turned into conditionals with constants. - Convert this into a series of tests. *) - and genTest(BICConstnt(w, _), jumpOn, targetLabel, _) = - let - val cVal = case toShort w of 0w0 => false | 0w1 => true | _ => raise InternalError "genTest" - in - if cVal = jumpOn - then gen(unconditionalBranch targetLabel, cvec) - else () - end - - | genTest(BICUnary { oper=NotBoolean, arg1 }, jumpOn, targetLabel, loopAddr) = - genTest(arg1, not jumpOn, targetLabel, loopAddr) - - | genTest(BICUnary { oper=IsTaggedValue, arg1 }, jumpOn, targetLabel, loopAddr) = - ( - gencde (arg1, ToX0, NotEnd, loopAddr); - topInX0 := false; - gen((if jumpOn then testBitBranchNonZero else testBitBranchZero) - (X0, 0w0, targetLabel), cvec) - ) - - | genTest(BICBinary{oper=WordComparison{test, isSigned}, arg1, arg2}, jumpOn, targetLabel, loopAddr) = - let - val (cond, condNot) = - case (test, isSigned) of - (TestEqual, _) => (CondEqual, CondNotEqual) - | (TestLess, true) => (CondSignedLess, CondSignedGreaterEq) - | (TestLessEqual, true) => (CondSignedLessEq, CondSignedGreater) - | (TestGreater, true) => (CondSignedGreater, CondSignedLessEq) - | (TestGreaterEqual, true) => (CondSignedGreaterEq, CondSignedLess) - | (TestLess, false) => (CondCarryClear, CondCarrySet) - | (TestLessEqual, false) => (CondUnsignedLowOrEq, CondUnsignedHigher) - | (TestGreater, false) => (CondUnsignedHigher, CondUnsignedLowOrEq) - | (TestGreaterEqual, false) => (CondCarrySet, CondCarryClear) - | (TestUnordered, _) => raise InternalError "WordComparison: TestUnordered" - in - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - topInX0 := false; - genPopReg(X1, cvec); (* First argument. *) - comparePolyRegs(X1, X0, cvec); - gen(conditionalBranch(if jumpOn then cond else condNot, targetLabel), cvec) - end - - | genTest(BICBinary{oper=PointerEq, arg1, arg2}, jumpOn, targetLabel, loopAddr) = - ( - gencde (arg1, ToStack, NotEnd, loopAddr); - gencde (arg2, ToX0, NotEnd, loopAddr); - decsp(); - topInX0 := false; - genPopReg(X1, cvec); (* First argument. *) - comparePolyRegs(X1, X0, cvec); - gen(conditionalBranch(if jumpOn then CondEqual else CondNotEqual, targetLabel), cvec) - ) - - | genTest(BICTagTest { test, tag=tagValue, ... }, jumpOn, targetLabel, loopAddr) = - ( - gencde (test, ToX0, NotEnd, loopAddr); - topInX0 := false; - gen((if is32in64 then subSImmediate32 else subSImmediate) - {regN=X0, regD=XZero, immed=taggedWord tagValue, shifted=false}, cvec); - gen(conditionalBranch(if jumpOn then CondEqual else CondNotEqual, targetLabel), cvec) - ) - - | genTest(BICCond (testPart, thenPart, elsePart), jumpOn, targetLabel, loopAddr) = - let - val toElse = createLabel() and exitJump = createLabel() - in - genTest(testPart, false, toElse, loopAddr); - genTest(thenPart, jumpOn, targetLabel, loopAddr); - gen(unconditionalBranch exitJump, cvec); - gen(setLabel toElse, cvec); - genTest(elsePart, jumpOn, targetLabel, loopAddr); - gen(setLabel exitJump, cvec) - end - - | genTest(testCode, jumpOn, targetLabel, loopAddr) = - ( - gencde (testCode, ToStack, NotEnd, loopAddr); - genPopReg(X0, cvec); - gen((if is32in64 then subSImmediate32 else subSImmediate) - {regN=X0, regD=XZero, immed=taggedWord 0w1, shifted=false}, cvec); - gen(conditionalBranch(if jumpOn then CondEqual else CondNotEqual, targetLabel), cvec); - decsp() (* conditional branch pops a value. *) - ) - - and genEval (eval, tailKind : tail) : unit = - let - val argList : backendIC list = List.map #1 (#argList eval) - val argsToPass : int = List.length argList; - - (* Load arguments *) - fun loadArgs [] = () - | loadArgs (v :: vs) = - let (* Push each expression onto the stack. *) - val () = gencde(v, ToStack, NotEnd, NONE) - in - loadArgs vs - end; - - (* Have to guarantee that the expression to return the function - is evaluated before the arguments. *) - - (* Returns true if evaluating it later is safe. *) - fun safeToLeave (BICConstnt _) = true - | safeToLeave (BICLambda _) = true - | safeToLeave (BICExtract _) = true - | safeToLeave (BICField {base, ...}) = safeToLeave base - | safeToLeave (BICLoadContainer {base, ...}) = safeToLeave base - | safeToLeave _ = false - - val () = - if (case argList of [] => true | _ => safeToLeave (#function eval)) - then - let - (* Can load the args first. *) - val () = loadArgs argList - in - gencde (#function eval, ToStack, NotEnd, NONE) - end - - else - let - (* The expression for the function is too complicated to - risk leaving. It might have a side-effect and we must - ensure that any side-effects it has are done before the - arguments are loaded. *) - val () = gencde(#function eval, ToStack, NotEnd, NONE); - val () = loadArgs(argList); - (* Load the function again. *) - val () = genList(loadScaledWord{dest=X0, base=X_MLStackPtr, work=X16, offset=argsToPass}, cvec) - val () = genPushReg(X0, cvec) - in - incsp () - end - - in (* body of genEval *) - case tailKind of - NotEnd => (* Normal call. *) - let - val () = genPopReg(X8, cvec) (* Pop the closure pointer. *) - (* We need to put the first 8 arguments into registers and - leave the rest on the stack. *) - fun loadArg(n, reg) = - if argsToPass > n - then genList(loadScaledWord{dest=reg, base=X_MLStackPtr, work=X16, offset=argsToPass-n-1}, cvec) - else () - val () = loadArg(0, X0) - val () = loadArg(1, X1) - val () = loadArg(2, X2) - val () = loadArg(3, X3) - val () = loadArg(4, X4) - val () = loadArg(5, X5) - val () = loadArg(6, X6) - val () = loadArg(7, X7) - in - if is32in64 - then - ( - (* Can't use an indexed load because the only options for scaling are either 1 or 8. *) - indexToAbsoluteAddress(X8, X9, cvec); - gen(loadRegScaled{regT=X9, regN=X9, unitOffset=0}, cvec) (* Entry point *) - ) - else gen(loadRegScaled{regT=X9, regN=X8, unitOffset=0}, cvec); (* Entry point *) - gen(branchAndLinkReg X9, cvec); - (* We have popped the closure pointer. The caller has popped the stack - arguments and we have pushed the result value. The register arguments - are still on the stack. *) - topInX0 := true; - realstackptr := !realstackptr - Int.max(argsToPass-8, 0) - 1 (* Args popped by caller. *) - end - - | EndOfProc => (* Tail recursive call. *) - let - val () = genPopReg(X8, cvec) (* Pop the closure pointer. *) - val () = decsp() - (* Get the return address into X30. *) - val () = genList(loadScaledWord{dest=X30, base=X_MLStackPtr, work=X16, offset= !realstackptr}, cvec) - - (* Load the register arguments *) - fun loadArg(n, reg) = - if argsToPass > n - then genList(loadScaledWord{dest=reg, base=X_MLStackPtr, work=X16, offset=argsToPass-n-1}, cvec) - else () - val () = loadArg(0, X0) - val () = loadArg(1, X1) - val () = loadArg(2, X2) - val () = loadArg(3, X3) - val () = loadArg(4, X4) - val () = loadArg(5, X5) - val () = loadArg(6, X6) - val () = loadArg(7, X7) - (* We need to move the stack arguments into the original argument area. *) - - (* This is the total number of words that this function is responsible for. - It includes the stack arguments that the caller expects to be removed. *) - val itemsOnStack = !realstackptr + 1 + numOfArgs - - (* Stack arguments are moved using X9. *) - fun moveStackArg n = - if n >= argsToPass - then () - else - let - val () = loadArg(n, X9) - val destOffset = itemsOnStack - (n-8) - 1 - val () = genList(storeScaledWord{store=X9, base=X_MLStackPtr, work=X16, offset=destOffset}, cvec) - in - moveStackArg(n+1) - end - - val () = moveStackArg 8 - in - resetStack(itemsOnStack - Int.max(argsToPass-8, 0), false, cvec); - if is32in64 - then - ( - indexToAbsoluteAddress(X8, X9, cvec); - gen(loadRegScaled{regT=X9, regN=X9, unitOffset=0}, cvec) (* Entry point *) - ) - else gen(loadRegScaled{regT=X9, regN=X8, unitOffset=0}, cvec); (* Entry point *) - gen(branchRegister X9, cvec) - (* Since we're not returning we can ignore the stack pointer value. *) - end - end (* genEval *) - - (* Begin generating the code for the function. *) - val prefix = ref [] - (* Push the arguments passed in registers. *) - val () = if numOfArgs >= 8 then genPushReg (X7, prefix) else () - val () = if numOfArgs >= 7 then genPushReg (X6, prefix) else () - val () = if numOfArgs >= 6 then genPushReg (X5, prefix) else () - val () = if numOfArgs >= 5 then genPushReg (X4, prefix) else () - val () = if numOfArgs >= 4 then genPushReg (X3, prefix) else () - val () = if numOfArgs >= 3 then genPushReg (X2, prefix) else () - val () = if numOfArgs >= 2 then genPushReg (X1, prefix) else () - val () = if numOfArgs >= 1 then genPushReg (X0, prefix) else () - val () = genPushReg (X30, prefix) - (* The new code generator does not pass X8 if it is calling a constant function with no - closure. For simplicity we set X8 to the current closure here. *) - val () = if hasClosure then () else gen(loadAddressConstant(X8, closureAsAddress resultClosure), prefix) - val () = genPushReg (X8, prefix) (* Push closure pointer *) - - (* Generate the function. *) - (* Assume we always want a result. There is otherwise a problem if the - called routine returns a result of type void (i.e. no result) but the - caller wants a result (e.g. the identity function). *) - val () = gencde (pt, ToX0, EndOfProc, NONE) - val () = resetStack(1, false, cvec) (* Skip over the pushed closure *) - val () = genPopReg(X30, cvec) (* Return address => pop into X30 *) - val () = resetStack(numOfArgs, false, cvec) (* Remove the arguments *) - val () = gen(returnRegister X30, cvec) (* Jump to X30 *) - - (* Now we know the maximum stack size we can code-gen the stack check. - This needs to go in after we have saved X30. *) - val () = checkStackCode(X10, !maxStack, false, prefix) - val instructions = List.rev(!prefix) @ List.rev(!cvec) - - in (* body of codegen *) - (* Having code-generated the body of the function, it is copied - into a new data segment. *) - generateCode{instrs=instructions, name=name, parameters=parameters, resultClosure=resultClosure, profileObject=createProfileObject()} - end (* codegen *) - - fun gencodeLambda({ name, body, argTypes, localCount, closure, ...}:bicLambdaForm, parameters, closureRef) = - codegen (body, name, closureRef, List.length argTypes, localCount, parameters, not(null closure)) - - - structure Foreign = Arm64Foreign - - structure Sharing = - struct - open BackendTree.Sharing - type closureRef = closureRef - end - -end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML index 554583bb..2d2bef55 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64PreAssembly.ML @@ -1,898 +1,904 @@ (* Copyright (c) 2021-2 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* The pre-assembly layer goes below the icode and allows peep-hole optimisation. *) functor Arm64PreAssembly( structure Arm64Assembly: ARM64ASSEMBLY structure Debug: DEBUG structure Pretty: PRETTY ): ARM64PREASSEMBLY = struct open Arm64Assembly exception InternalError = Misc.InternalError (* Reversed cons and append to make the code easier to read. *) infix 5 <::> <@> fun tl <::> hd = hd :: tl and snd <@> fst = fst @ snd (* Many of the datatypes are inherited from Arm64Assembly *) datatype loadType = Load64 | Load32 | Load16 | Load8 and opSize = OpSize32 | OpSize64 and logicalOp = LogAnd | LogOr | LogXor and floatSize = Float32 | Double64 and shiftDirection = ShiftLeft | ShiftRightLogical | ShiftRightArithmetic and multKind = MultAdd32 | MultSub32 | MultAdd64 | MultSub64 | SignedMultAddLong (* 32bit*32bit + 64bit => 64Bit *) | SignedMultHigh (* High order part of 64bit*64Bit *) and fpUnary = NegFloat | NegDouble | AbsFloat | AbsDouble | ConvFloatToDble | ConvDbleToFloat and fpBinary = MultiplyFP | DivideFP | AddFP | SubtractFP and unscaledType = NoUpdate | PreIndex | PostIndex and condSet = CondSet | CondSetIncr | CondSetInvert | CondSetNegate and bitfieldKind = BFUnsigned | BFSigned | BFInsert and brRegType = BRRBranch | BRRAndLink | BRRReturn type precodeLabel = labels datatype precode = (* Basic instructions *) AddImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | SubImmediate of {regN: xReg, regD: xReg, immed: word, shifted: bool, opSize: opSize, setFlags: bool} | AddShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | SubShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, opSize: opSize, setFlags: bool} | AddExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | SubExtendedReg of {regM: xReg, regN: xReg, regD: xReg, extend: Word8.word extend, opSize: opSize, setFlags: bool} | MultiplyAndAddSub of {regM: xReg, regN: xReg, regA: xReg, regD: xReg, multKind: multKind} | DivideRegs of {regM: xReg, regN: xReg, regD: xReg, isSigned: bool, opSize: opSize} | LogicalShiftedReg of {regM: xReg, regN: xReg, regD: xReg, shift: shiftType, logOp: logicalOp, opSize: opSize, setFlags: bool} | LoadRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | LoadFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | StoreRegScaled of {regT: xReg, regN: xReg, unitOffset: int, loadType: loadType} | StoreFPRegScaled of {regT: vReg, regN: xReg, unitOffset: int, floatSize: floatSize} | LoadRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegUnscaled of {regT: xReg, regN: xReg, byteOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegUnscaled of {regT: vReg, regN: xReg, byteOffset: int, floatSize: floatSize, unscaledType: unscaledType} | LoadRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | StoreRegIndexed of {regT: xReg, regN: xReg, regM: xReg, loadType: loadType, option: scale extend} | LoadFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} | StoreFPRegIndexed of {regT: vReg, regN: xReg, regM: xReg, floatSize: floatSize, option: scale extend} (* LoadAcquire and StoreRelease are used for mutables. *) | LoadAcquireReg of {regN: xReg, regT: xReg, loadType: loadType} | StoreReleaseReg of {regN: xReg, regT: xReg, loadType: loadType} (* LoadAcquireExclusiveRegister and StoreReleaseExclusiveRegister are used for mutexes. *) | LoadAcquireExclusiveRegister of {regN: xReg, regT: xReg} | StoreReleaseExclusiveRegister of {regS: xReg, regT: xReg, regN: xReg} | MemBarrier | LoadRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | StoreRegPair of { regT1: xReg, regT2: xReg, regN: xReg, unitOffset: int, loadType: loadType, unscaledType: unscaledType} | LoadFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | StoreFPRegPair of { regT1: vReg, regT2: vReg, regN: xReg, unitOffset: int, floatSize: floatSize, unscaledType: unscaledType} | ConditionalSet of {regD: xReg, regTrue: xReg, regFalse: xReg, cond: condition, condSet: condSet, opSize: opSize} | BitField of {immr: word, imms: word, regN: xReg, regD: xReg, opSize: opSize, bitfieldKind: bitfieldKind} | ShiftRegisterVariable of {regM: xReg, regN: xReg, regD: xReg, opSize: opSize, shiftDirection: shiftDirection} | BitwiseLogical of { bits: Word64.word, regN: xReg, regD: xReg, opSize: opSize, setFlags: bool, logOp: logicalOp} (* Floating point *) | MoveGeneralToFP of { regN: xReg, regD: vReg, floatSize: floatSize} | MoveFPToGeneral of {regN: vReg, regD: xReg, floatSize: floatSize} | CvtIntToFP of { regN: xReg, regD: vReg, floatSize: floatSize, opSize: opSize} | CvtFloatToInt of { round: IEEEReal.rounding_mode, regN: vReg, regD: xReg, floatSize: floatSize, opSize: opSize} | FPBinaryOp of { regM: vReg, regN: vReg, regD: vReg, floatSize: floatSize, fpOp: fpBinary} | FPComparison of { regM: vReg, regN: vReg, floatSize: floatSize} | FPUnaryOp of {regN: vReg, regD: vReg, fpOp: fpUnary} (* Branches and Labels. *) | SetLabel of precodeLabel | ConditionalBranch of condition * precodeLabel | UnconditionalBranch of precodeLabel | BranchAndLink of precodeLabel | BranchReg of {regD: xReg, brRegType: brRegType } | LoadLabelAddress of xReg * precodeLabel | TestBitBranch of { test: xReg, bit: Word8.word, label: precodeLabel, onZero: bool } | CompareBranch of { test: xReg, label: precodeLabel, onZero: bool, opSize: opSize } (* Composite instructions *) | MoveXRegToXReg of {sReg: xReg, dReg: xReg} | LoadNonAddr of xReg * Word64.word | LoadAddr of xReg * machineWord | RTSTrap of { rtsEntry: int, work: xReg, save: xReg list } | AllocateMemoryFixedSize of { bytes: word, dest: xReg, save: xReg list, work: xReg } | AllocateMemoryVariableSize of { sizeReg: xReg, dest: xReg, save: xReg list, work: xReg } (* Branch table for indexed case. startLabel is the address of the first label in the list. The branch table is a sequence of unconditional branches. *) | BranchTable of { startLabel: precodeLabel, brTable: precodeLabel list } + | LoadGlobalHeapBaseInCallback of xReg fun toAssembler([], code) = code | toAssembler(AddImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => addImmediate | (OpSize32, false) => addImmediate32 | (OpSize64, true) => addSImmediate | (OpSize32, true) => addSImmediate32 in toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted}) end | toAssembler(SubImmediate{regN, regD, immed, shifted, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subImmediate | (OpSize32, false) => subImmediate32 | (OpSize64, true) => subSImmediate | (OpSize32, true) => subSImmediate32 in toAssembler(rest, code <::> instr{regN=regN, regD=regD, immed=immed, shifted=shifted}) end | toAssembler(AddShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => addShiftedReg | (OpSize32, false) => addShiftedReg32 | (OpSize64, true) => addSShiftedReg | (OpSize32, true) => addSShiftedReg32 in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift}) end | toAssembler(SubShiftedReg{regM, regN, regD, shift, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subShiftedReg | (OpSize32, false) => subShiftedReg32 | (OpSize64, true) => subSShiftedReg | (OpSize32, true) => subSShiftedReg32 in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, shift=shift}) end | toAssembler(AddExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) = (* Add/SubExtended are only used to access XSP. *) let val instr = case (opSize, setFlags) of (OpSize64, false) => addExtendedReg | (OpSize32, false) => raise InternalError "AddExtendedReg; 32" | (OpSize64, true) => addSExtendedReg | (OpSize32, true) => raise InternalError "AddExtendedReg; 32" in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend}) end | toAssembler(SubExtendedReg{regM, regN, regD, extend, opSize, setFlags} :: rest, code) = let val instr = case (opSize, setFlags) of (OpSize64, false) => subExtendedReg | (OpSize32, false) => raise InternalError "AddExtendedReg; 32" | (OpSize64, true) => subSExtendedReg | (OpSize32, true) => raise InternalError "AddExtendedReg; 32" in toAssembler(rest, code <::> instr{regM=regM, regN=regN, regD=regD, extend=extend}) end | toAssembler(MultiplyAndAddSub{regM, regN, regA, regD, multKind} :: rest, code) = let val instr = case multKind of MultAdd32 => multiplyAndAdd32{regM=regM, regN=regN, regA=regA, regD=regD} | MultSub32 => multiplyAndSub32{regM=regM, regN=regN, regA=regA, regD=regD} | MultAdd64 => multiplyAndAdd{regM=regM, regN=regN, regA=regA, regD=regD} | MultSub64 => multiplyAndSub{regM=regM, regN=regN, regA=regA, regD=regD} | SignedMultAddLong => signedMultiplyAndAddLong{regM=regM, regN=regN, regA=regA, regD=regD} | SignedMultHigh => signedMultiplyHigh{regM=regM, regN=regN, regD=regD} in toAssembler(rest, code <::> instr) end | toAssembler(DivideRegs{regM, regN, regD, isSigned, opSize} :: rest, code) = let val instr = case (isSigned, opSize) of (true, OpSize64) => signedDivide | (true, OpSize32) => signedDivide32 | (false, OpSize64) => unsignedDivide | (false, OpSize32) => unsignedDivide32 in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD}) end | toAssembler(LogicalShiftedReg{regM, regN, regD, shift, logOp, opSize, setFlags} :: rest, code) = let val instr = case (logOp, setFlags, opSize) of (LogAnd, false, OpSize64) => andShiftedReg | (LogAnd, true, OpSize64) => andsShiftedReg | (LogOr, false, OpSize64) => orrShiftedReg | (LogXor, false, OpSize64) => eorShiftedReg | (LogAnd, false, OpSize32) => andShiftedReg32 | (LogAnd, true, OpSize32) => andsShiftedReg32 | (LogOr, false, OpSize32) => orrShiftedReg32 | (LogXor, false, OpSize32) => eorShiftedReg32 | _ => raise InternalError "setFlags not valid with OR or XOR" (* There are also versions of AND/OR/XOR which operate on a complement (NOT) of the shifted register. It's probably not worth looking for a use for them. *) in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD, shift=shift}) end | toAssembler(LoadRegScaled{regT, regN, unitOffset, loadType} :: rest, code) = let val instr = case loadType of Load64 => loadRegScaled | Load32 => loadRegScaled32 | Load16 => loadRegScaled16 | Load8 => loadRegScaledByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreRegScaled{regT, regN, unitOffset, loadType} :: rest, code) = let val instr = case loadType of Load64 => storeRegScaled | Load32 => storeRegScaled32 | Load16 => storeRegScaled16 | Load8 => storeRegScaledByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) = let val instr = case floatSize of Float32 => loadRegScaledFloat | Double64 => loadRegScaledDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreFPRegScaled{regT, regN, unitOffset, floatSize} :: rest, code) = let val instr = case floatSize of Float32 => storeRegScaledFloat | Double64 => storeRegScaledDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => loadRegUnscaled | (Load32, NoUpdate) => loadRegUnscaled32 | (Load16, NoUpdate) => loadRegUnscaled16 | (Load8, NoUpdate) => loadRegUnscaledByte | (Load64, PreIndex) => loadRegPreIndex | (Load32, PreIndex) => loadRegPreIndex32 | (Load16, PreIndex) => raise InternalError "loadRegPreIndex16" | (Load8, PreIndex) => loadRegPreIndexByte | (Load64, PostIndex) => loadRegPostIndex | (Load32, PostIndex) => loadRegPostIndex32 | (Load16, PostIndex) => raise InternalError "loadRegPostIndex16" | (Load8, PostIndex) => loadRegPostIndexByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(LoadFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Float32, NoUpdate) => loadRegUnscaledFloat | (Double64, NoUpdate) => loadRegUnscaledDouble | _ => raise InternalError "LoadFPRegUnscaled: pre/post indexed" in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(StoreRegUnscaled{regT, regN, byteOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => storeRegUnscaled | (Load32, NoUpdate) => storeRegUnscaled32 | (Load16, NoUpdate) => storeRegUnscaled16 | (Load8, NoUpdate) => storeRegUnscaledByte | (Load64, PreIndex) => storeRegPreIndex | (Load32, PreIndex) => storeRegPreIndex32 | (Load16, PreIndex) => raise InternalError "storeRegPreIndex16" | (Load8, PreIndex) => storeRegPreIndexByte | (Load64, PostIndex) => storeRegPostIndex | (Load32, PostIndex) => storeRegPostIndex32 | (Load16, PostIndex) => raise InternalError "storeRegPostIndex16" | (Load8, PostIndex) => storeRegPostIndexByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(StoreFPRegUnscaled{regT, regN, byteOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Float32, NoUpdate) => storeRegUnscaledFloat | (Double64, NoUpdate) => storeRegUnscaledDouble | _ => raise InternalError "StoreFPRegUnscaled: pre/post indexed" in toAssembler(rest, code <::> instr{regT=regT, regN=regN, byteOffset=byteOffset}) end | toAssembler(LoadRegIndexed{regT, regN, regM, loadType, option} :: rest, code) = let val instr = case loadType of Load64 => loadRegIndexed | Load32 => loadRegIndexed32 | Load16 => loadRegIndexed16 | Load8 => loadRegIndexedByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreRegIndexed{regT, regN, regM, loadType, option} :: rest, code) = let val instr = case loadType of Load64 => storeRegIndexed | Load32 => storeRegIndexed32 | Load16 => storeRegIndexed16 | Load8 => storeRegIndexedByte in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(LoadFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) = let val instr = case floatSize of Float32 => loadRegIndexedFloat | Double64 => loadRegIndexedDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(StoreFPRegIndexed{regT, regN, regM, floatSize, option} :: rest, code) = let val instr = case floatSize of Float32 => storeRegIndexedFloat | Double64 => storeRegIndexedDouble in toAssembler(rest, code <::> instr{regT=regT, regN=regN, regM=regM, option=option}) end | toAssembler(LoadAcquireReg{regN, regT, loadType} :: rest, code) = let val loadInstr = case loadType of Load64 => loadAcquire | Load32 => loadAcquire32 | Load8 => loadAcquireByte | _ => raise InternalError "LoadAcquire: Unsupported size" (* Not used *) in toAssembler(rest, code <::> loadInstr{regT=regT, regN=regN}) end | toAssembler(StoreReleaseReg{regN, regT, loadType} :: rest, code) = let val storeInstr = case loadType of Load64 => storeRelease | Load32 => storeRelease32 | Load8 => storeReleaseByte | _ => raise InternalError "StoreRelease: Unsupported size" (* Not used *) in toAssembler(rest, code <::> storeInstr{regT=regT, regN=regN}) end | toAssembler(LoadAcquireExclusiveRegister{regN, regT} :: rest, code) = toAssembler(rest, code <::> loadAcquireExclusiveRegister{regN=regN, regT=regT}) | toAssembler(StoreReleaseExclusiveRegister{regN, regT, regS} :: rest, code) = toAssembler(rest, code <::> storeReleaseExclusiveRegister{regN=regN, regT=regT, regS=regS}) | toAssembler(MemBarrier :: rest, code) = toAssembler(rest, code <::> dmbIsh) | toAssembler(LoadRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => loadPairOffset | (Load64, PreIndex) => loadPairPreIndexed | (Load64, PostIndex) => loadPairPostIndexed | (Load32, NoUpdate) => loadPairOffset32 | (Load32, PreIndex) => loadPairPreIndexed32 | (Load32, PostIndex) => loadPairPostIndexed32 | _ => raise InternalError "LoadRegPair: unimplemented" in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreRegPair{ regT1, regT2, regN, unitOffset, loadType, unscaledType} :: rest, code) = let val instr = case (loadType, unscaledType) of (Load64, NoUpdate) => storePairOffset | (Load64, PreIndex) => storePairPreIndexed | (Load64, PostIndex) => storePairPostIndexed | (Load32, NoUpdate) => storePairOffset32 | (Load32, PreIndex) => storePairPreIndexed32 | (Load32, PostIndex) => storePairPostIndexed32 | _ => raise InternalError "StoreRegPair: unimplemented" in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(LoadFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Double64, NoUpdate) => loadPairOffsetDouble | (Double64, PreIndex) => loadPairPreIndexedDouble | (Double64, PostIndex) => loadPairPostIndexedDouble | (Float32, NoUpdate) => loadPairOffsetFloat | (Float32, PreIndex) => loadPairPreIndexedFloat | (Float32, PostIndex) => loadPairPostIndexedFloat in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(StoreFPRegPair{ regT1, regT2, regN, unitOffset, floatSize, unscaledType} :: rest, code) = let val instr = case (floatSize, unscaledType) of (Double64, NoUpdate) => storePairOffsetDouble | (Double64, PreIndex) => storePairPreIndexedDouble | (Double64, PostIndex) => storePairPostIndexedDouble | (Float32, NoUpdate) => storePairOffsetFloat | (Float32, PreIndex) => storePairPreIndexedFloat | (Float32, PostIndex) => storePairPostIndexedFloat in toAssembler(rest, code <::> instr{regT1=regT1, regT2=regT2, regN=regN, unitOffset=unitOffset}) end | toAssembler(ConditionalSet{regD, regTrue, regFalse, cond, condSet, opSize} :: rest, code) = let val instr = case (condSet, opSize) of (CondSet, OpSize64) => conditionalSet | (CondSetIncr, OpSize64) => conditionalSetIncrement | (CondSetInvert, OpSize64) => conditionalSetInverted | (CondSetNegate, OpSize64) => conditionalSetNegated | (CondSet, OpSize32) => conditionalSet32 | (CondSetIncr, OpSize32) => conditionalSetIncrement32 | (CondSetInvert, OpSize32) => conditionalSetInverted32 | (CondSetNegate, OpSize32) => conditionalSetNegated32 in toAssembler(rest, code <::> instr{regD=regD, regTrue=regTrue, regFalse=regFalse, cond=cond}) end | toAssembler(BitField{immr, imms, regN, regD, opSize, bitfieldKind} :: rest, code) = let val bfInstr = case (bitfieldKind, opSize) of (BFSigned, OpSize64) => signedBitfieldMove64 | (BFUnsigned, OpSize64) => unsignedBitfieldMove64 | (BFInsert, OpSize64) => bitfieldMove64 | (BFSigned, OpSize32) => signedBitfieldMove32 | (BFUnsigned, OpSize32) => unsignedBitfieldMove32 | (BFInsert, OpSize32) => bitfieldMove32 in toAssembler(rest, code <::> bfInstr{immr=immr, imms=imms, regN=regN, regD=regD}) end | toAssembler(ShiftRegisterVariable{regM, regN, regD, opSize, shiftDirection} :: rest, code) = let val instr = case (shiftDirection, opSize) of (ShiftLeft, OpSize64) => logicalShiftLeftVariable | (ShiftLeft, OpSize32) => logicalShiftLeftVariable32 | (ShiftRightLogical, OpSize64) => logicalShiftRightVariable | (ShiftRightLogical, OpSize32) => logicalShiftRightVariable32 | (ShiftRightArithmetic, OpSize64) => arithmeticShiftRightVariable | (ShiftRightArithmetic, OpSize32) => arithmeticShiftRightVariable32 in toAssembler(rest, code <::> instr{regN=regN, regM=regM, regD=regD}) end | toAssembler(BitwiseLogical{ bits, regN, regD, opSize, setFlags, logOp} :: rest, code) = let val instr = case (logOp, setFlags, opSize) of (LogAnd, false, OpSize64) => bitwiseAndImmediate | (LogAnd, true, OpSize64) => bitwiseAndSImmediate | (LogOr, false, OpSize64) => bitwiseOrImmediate | (LogXor, false, OpSize64) => bitwiseXorImmediate | (LogAnd, false, OpSize32) => bitwiseAndImmediate32 | (LogAnd, true, OpSize32) => bitwiseAndSImmediate32 | (LogOr, false, OpSize32) => bitwiseOrImmediate32 | (LogXor, false, OpSize32) => bitwiseXorImmediate32 | _ => raise InternalError "flags not valid with OR or XOR" in toAssembler(rest, code <::> instr{regN=regN, regD=regD, bits=bits}) end | toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Float32} :: rest, code) = toAssembler(rest, code <::> moveGeneralToFloat{regN=regN, regD=regD}) | toAssembler(MoveGeneralToFP{ regN, regD, floatSize=Double64} :: rest, code) = toAssembler(rest, code <::> moveGeneralToDouble{regN=regN, regD=regD}) | toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Float32} :: rest, code) = toAssembler(rest, code <::> moveFloatToGeneral{regN=regN, regD=regD}) | toAssembler(MoveFPToGeneral{ regN, regD, floatSize=Double64} :: rest, code) = toAssembler(rest, code <::> moveDoubleToGeneral{regN=regN, regD=regD}) | toAssembler(CvtIntToFP{ regN, regD, floatSize, opSize} :: rest, code) = let val instr = case (opSize, floatSize) of (OpSize32, Float32) => convertInt32ToFloat | (OpSize64, Float32) => convertIntToFloat | (OpSize32, Double64) => convertInt32ToDouble | (OpSize64, Double64) => convertIntToDouble in toAssembler(rest, code <::> instr{regN=regN, regD=regD}) end | toAssembler(CvtFloatToInt{ round, regN, regD, floatSize, opSize} :: rest, code) = let val instr = case (floatSize, opSize) of (Float32, OpSize32) => convertFloatToInt32 | (Float32, OpSize64) => convertFloatToInt | (Double64, OpSize32) => convertDoubleToInt32 | (Double64, OpSize64) => convertDoubleToInt in toAssembler(rest, code <::> instr round {regN=regN, regD=regD}) end | toAssembler(FPBinaryOp{ regM, regN, regD, floatSize, fpOp} :: rest, code) = let val instr = case (fpOp, floatSize) of (MultiplyFP, Float32) => multiplyFloat | (DivideFP, Float32) => divideFloat | (AddFP, Float32) => addFloat | (SubtractFP, Float32) => subtractFloat | (MultiplyFP, Double64) => multiplyDouble | (DivideFP, Double64) => divideDouble | (AddFP, Double64) => addDouble | (SubtractFP, Double64) => subtractDouble in toAssembler(rest, code <::> instr {regN=regN, regM=regM, regD=regD}) end | toAssembler(FPComparison{ regM, regN, floatSize} :: rest, code) = toAssembler(rest, code <::> (case floatSize of Float32 => compareFloat | Double64 => compareDouble){regN=regN, regM=regM}) | toAssembler(FPUnaryOp{ regN, regD, fpOp} :: rest, code) = let val instr = case fpOp of NegFloat => negFloat | NegDouble => negDouble | AbsFloat => absFloat | AbsDouble => absDouble | ConvFloatToDble => convertFloatToDouble | ConvDbleToFloat => convertDoubleToFloat in toAssembler(rest, code <::> instr {regN=regN, regD=regD}) end | toAssembler(SetLabel label :: rest, code) = toAssembler(rest, code <::> setLabel label) | toAssembler(ConditionalBranch(cond, label) :: rest, code) = toAssembler(rest, code <::> conditionalBranch(cond, label)) | toAssembler(UnconditionalBranch label :: rest, code) = toAssembler(rest, code <::> unconditionalBranch label) | toAssembler(BranchAndLink label :: rest, code) = toAssembler(rest, code <::> branchAndLink label) | toAssembler(BranchReg{regD, brRegType=BRRBranch} :: rest, code) = toAssembler(rest, code <::> branchRegister regD) | toAssembler(BranchReg{regD, brRegType=BRRAndLink} :: rest, code) = toAssembler(rest, code <::> branchAndLinkReg regD) | toAssembler(BranchReg{regD, brRegType=BRRReturn} :: rest, code) = toAssembler(rest, code <::> returnRegister regD) | toAssembler(LoadLabelAddress(reg, label) :: rest, code) = toAssembler(rest, code <::> loadLabelAddress(reg, label)) | toAssembler(TestBitBranch{ test, bit, label, onZero } :: rest, code) = toAssembler(rest, code <::> (if onZero then testBitBranchZero else testBitBranchNonZero)(test, bit, label)) | toAssembler(CompareBranch{ test, label, onZero, opSize } :: rest, code) = let val instr = case (onZero, opSize) of (true, OpSize64) => compareBranchZero | (false, OpSize64) => compareBranchNonZero | (true, OpSize32) => compareBranchZero32 | (false, OpSize32) => compareBranchNonZero32 in toAssembler(rest, code <::> instr(test, label)) end (* Register-register moves - special case for XSP. *) | toAssembler(MoveXRegToXReg{sReg=XSP, dReg} :: rest, code) = toAssembler(rest, code <::> addImmediate{regN=XSP, regD=dReg, immed=0w0, shifted=false}) | toAssembler(MoveXRegToXReg{sReg, dReg=XSP} :: rest, code) = toAssembler(rest, code <::> addImmediate{regN=sReg, regD=XSP, immed=0w0, shifted=false}) | toAssembler(MoveXRegToXReg{sReg, dReg} :: rest, code) = toAssembler(rest, code <::> orrShiftedReg{regN=XZero, regM=sReg, regD=dReg, shift=ShiftNone}) | toAssembler(LoadNonAddr(xReg, value) :: rest, code) = let (* Load a non-address constant. Tries to use movz/movn/movk if that can be done easily, othewise uses loadNonAddressConstant to load the value from the non-address constant area. *) fun extW (v, h) = Word.andb(Word.fromLarge(LargeWord.>>(Word64.toLarge v, h*0w16)), 0wxffff) val hw0 = extW(value, 0w3) and hw1 = extW(value, 0w2) and hw2 = extW(value, 0w1) and hw3 = extW(value, 0w0) val nextCode = if value < 0wx100000000 then let (* 32-bit constants can be loaded using at most a movz and movk but various cases can be reduced since all 32-bit operations set the top word to zero. *) val hi = hw2 and lo = hw3 in (* 32-bit constants can be loaded with at most a movz and a movk but it may be that there is something shorter. *) if hi = 0w0 then code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0} else if hi = 0wxffff then code <::> moveNot32{regD=xReg, immediate=Word.xorb(0wxffff, lo), shift=0w0} else if lo = 0w0 then code <::> moveZero32{regD=xReg, immediate=hi, shift=0w16} else if isEncodableBitPattern(value, WordSize32) then code <::> bitwiseOrImmediate32{bits=value, regN=XZero, regD=xReg} else (* Have to use two instructions *) code <::> moveZero32{regD=xReg, immediate=lo, shift=0w0} <::> moveKeep{regD=xReg, immediate=hi, shift=0w16} end else if hw0 = 0wxffff andalso hw1 = 0wxffff andalso hw2 = 0wxffff then code <::> moveNot{regD=xReg, immediate=Word.xorb(0wxffff, hw3), shift=0w0} else if hw1 = 0w0 andalso hw2 = 0w0 then (* This is common for length words with a flags byte *) code <::> moveZero32{regD=xReg, immediate=hw3, shift=0w0} <::> moveKeep{regD=xReg, immediate=hw0, shift=0w48} else code <::> loadNonAddressConstant(xReg, value) in toAssembler(rest, nextCode) end | toAssembler(LoadAddr(dReg, source) :: rest, code) = toAssembler(rest, loadAddressConstant(dReg, source) :: code) | toAssembler(RTSTrap{ rtsEntry, work, save } :: rest, code) = let (* Because X30 is used in the branchAndLink it has to be pushed across any trap. *) val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save in toAssembler(rest, code <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=rtsEntry} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) ) end | toAssembler(AllocateMemoryFixedSize{ bytes, dest, save, work } :: rest, code) = let val label = createLabel() val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save val allocCode = code <::> (* Subtract the number of bytes required from the heap pointer. *) subImmediate{regN=X_MLHeapAllocPtr, regD=dest, immed=bytes, shifted=false} <::> (* Compare the result with the heap limit. *) subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::> conditionalBranch(CondCarrySet, label) <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::> setLabel label <::> (* Update the heap pointer. *) orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone} in toAssembler(rest, allocCode) end | toAssembler(AllocateMemoryVariableSize{ sizeReg, dest, save, work } :: rest, code) = let val trapLabel = createLabel() and noTrapLabel = createLabel() val saveX30 = List.exists (fn r => r = X30) save val preserve = List.filter (fn r => r <> X30) save val allocCode = ( (* Subtract the size into the result register. Subtract a further word for the length word and round down in 32-in-64. *) if is32in64 then code <::> subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w2} <::> subImmediate{regN=dest, regD=dest, immed=0w4, shifted=false} <::> bitwiseAndImmediate{bits= ~ 0w8, regN=dest, regD=dest} else code <::> subShiftedReg{regM=sizeReg, regN=X_MLHeapAllocPtr, regD=dest, shift=ShiftLSL 0w3} <::> subImmediate{regN=dest, regD=dest, immed=0w8, shifted=false} ) <::> (* Check against the limit. If the size is large enough it is possible that this could wrap round. To check for that we trap if either the result is less than the limit or if it is now greater than the allocation pointer. *) subSShiftedReg{regM=X_MLHeapLimit, regN=dest, regD=XZero, shift=ShiftNone} <::> conditionalBranch(CondCarryClear, trapLabel) <::> subSShiftedReg{regM=X_MLHeapAllocPtr, regN=dest, regD=XZero, shift=ShiftNone} <::> conditionalBranch(CondCarryClear, noTrapLabel) <::> setLabel trapLabel <@> (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) <::> loadRegScaled{regT=work, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset} <::> branchAndLinkReg work <::> registerMask preserve <@> (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) <::> setLabel noTrapLabel <::> (* Update the heap pointer. *) orrShiftedReg{regN=XZero, regM=dest, regD=X_MLHeapAllocPtr, shift=ShiftNone} in toAssembler(rest, allocCode) end | toAssembler(BranchTable{ startLabel, brTable } :: rest, code) = toAssembler(rest, List.foldl (fn (label, code) => (unconditionalBranch label) :: code) (code <::> setLabel startLabel) brTable) + | toAssembler(LoadGlobalHeapBaseInCallback dest :: rest, code) = + toAssembler(rest, + code <@> List.rev(loadGlobalHeapBaseInCallback dest)) + + fun toInstr precode = case toAssembler([precode], []) of [single] => single | _ => raise InternalError "toInstr" (* Take a forward order sequence of instructions and generate a forward order output sequence. *) fun toInstrs precode = List.rev(toAssembler(precode, [])) (* Constant shifts are encoded in the immr and imms fields of the bit-field instruction. *) fun shiftConstant{ direction, regD, regN, shift, opSize } = let val (bitfieldKind, immr, imms) = case (direction, opSize) of (ShiftLeft, OpSize64) => (BFUnsigned, Word.~ shift mod 0w64, 0w64-0w1-shift) | (ShiftLeft, OpSize32) => (BFUnsigned, Word.~ shift mod 0w32, 0w32-0w1-shift) | (ShiftRightLogical, OpSize64) => (BFUnsigned, shift, 0wx3f) | (ShiftRightLogical, OpSize32) => (BFUnsigned, shift, 0wx1f) | (ShiftRightArithmetic, OpSize64) => (BFSigned, shift, 0wx3f) | (ShiftRightArithmetic, OpSize32) => (BFSigned, shift, 0wx1f) in BitField{ regN=regN, regD=regD, opSize=opSize, immr=immr, imms=imms, bitfieldKind=bitfieldKind } end (* These sequences are used both in the ML code-generator and in the FFI code so it is convenient to have them here and share the code. *) local fun allocateWords(fixedReg, workReg, words, bytes, regMask, code) = let val (lengthWord, setLength, flagShift) = if is32in64 then (~4, Load32, 0w24) else (~8, Load64, 0w56) in code <::> AllocateMemoryFixedSize{ bytes=bytes, dest=fixedReg, save=regMask, work=X16 } <::> LoadNonAddr(workReg, Word64.orb(words, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), flagShift))) <::> (* Store the length word. Have to use the unaligned version because offset is -ve. *) StoreRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= lengthWord, loadType=setLength, unscaledType=NoUpdate} end fun absoluteAddressToIndex(reg, code) = if is32in64 then code <::> SubShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone, opSize=OpSize64, setFlags=false} <::> shiftConstant{direction=ShiftRightLogical, regN=reg, regD=reg, shift=0w2, opSize=OpSize64} else code in fun boxDouble({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::> StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Double64}) and boxSysWord({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs, code) <::> StoreRegScaled{regT=source, regN=destination, unitOffset=0, loadType=Load64}) and boxFloat({source, destination, workReg, saveRegs}, code) = absoluteAddressToIndex(destination, allocateWords(destination, workReg, 0w1, 0w8, saveRegs, code) <::> StoreFPRegScaled{regT=source, regN=destination, unitOffset=0, floatSize=Float32}) end (* Optimise the pre-assembler code and then generate the final code. *) fun generateFinalCode {instrs, name, parameters, resultClosure, profileObject, labelCount=_} = let in generateCode{instrs=List.rev(toAssembler(instrs, [])), name=name, parameters=parameters, resultClosure=resultClosure, profileObject=profileObject} end structure Sharing = struct type closureRef = closureRef type loadType = loadType type opSize = opSize type logicalOp = logicalOp type floatSize = floatSize type shiftDirection = shiftDirection type multKind = multKind type fpUnary = fpUnary type fpBinary = fpBinary type unscaledType = unscaledType type condSet = condSet type bitfieldKind = bitfieldKind type brRegType = brRegType type precode = precode type xReg = xReg type vReg = vReg type precodeLabel = precodeLabel type condition = condition type shiftType = shiftType type wordSize = wordSize type 'a extend = 'a extend type scale = scale type instr = instr end end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Sequences.sml b/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Sequences.sml deleted file mode 100644 index 46303d15..00000000 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/Arm64Sequences.sml +++ /dev/null @@ -1,181 +0,0 @@ -(* - Copyright (c) 2021 David C. J. Matthews - - This library is free software; you can redistribute it and/or - modify it under the terms of the GNU Lesser General Public - Licence version 2.1 as published by the Free Software Foundation. - - This library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public Licence for more details. - - You should have received a copy of the GNU Lesser General Public - Licence along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA -*) - -(* Sequences of instructions for various purposes that are - used both in the main code-generator and the FFI code. *) - -functor Arm64Sequences ( - structure Arm64Assembly: ARM64ASSEMBLY -) : ARM64SEQUENCES = - -struct - open Arm64Assembly - - (* Move register. If we're moving to/from SP we have to use addImmediate *) - fun moveRegToReg{sReg=XSP, dReg} = - addImmediate{regN=XSP, regD=dReg, immed=0w0, shifted=false} - | moveRegToReg{sReg, dReg=XSP} = - addImmediate{regN=sReg, regD=XSP, immed=0w0, shifted=false} - | moveRegToReg{sReg, dReg} = - orrShiftedReg{regN=XZero, regM=sReg, regD=dReg, shift=ShiftNone} - - (* Load a non-address constant. Tries to use movz/movn/movk if - that can be done easily, othewise uses loadNonAddressConstant to - load the value from the non-address constant area. *) - local - fun extW (v, h) = Word.andb(Word.fromLarge(LargeWord.>>(Word64.toLarge v, h*0w16)), 0wxffff) - (* Use one or more movk instructions to change the current value into - the target value. *) - (*fun modifyValue(xReg, current, target) = - if current = target - then [] - else - let - val curr0 = extW(current, 0w3) and curr1 = extW(current, 0w2) - and curr2 = extW(current, 0w1) - and targ0 = extW(target, 0w3) and targ1 = extW(target, 0w2) - and targ2 = extW(target, 0w1) and targ3 = extW(target, 0w0) - fun replace(v, s) = - Word64.orb(Word64.<<(Word64.fromLarge(Word.toLarge v), s), - Word64.andb(current, Word64.notb(Word64.<<(0wxffff, s)))) - in - if curr0 <> targ0 - then moveKeep{regD=xReg, immediate=targ0, shift=0w48} :: - modifyValue(xReg, replace(targ0, 0w48), target) - else if curr1 <> targ1 - then moveKeep{regD=xReg, immediate=targ1, shift=0w32} :: - modifyValue(xReg, replace(targ1, 0w32), target) - else if curr2 <> targ2 - then moveKeep{regD=xReg, immediate=targ2, shift=0w16} :: - modifyValue(xReg, replace(targ2, 0w16), target) - else [moveKeep{regD=xReg, immediate=targ3, shift=0w0}] - end*) - in - fun loadNonAddress(xReg, value) = - let - val hw0 = extW(value, 0w3) and hw1 = extW(value, 0w2) - and hw2 = extW(value, 0w1) and hw3 = extW(value, 0w0) - in - if value < 0wx100000000 - then - let - (* 32-bit constants can be loaded using at most a movz and movk but - various cases can be reduced since all 32-bit operations set - the top word to zero. *) - val hi = hw2 - and lo = hw3 - in - (* 32-bit constants can be loaded with at most a movz and a movk but - it may be that there is something shorter. *) - if hi = 0w0 - then [moveZero32{regD=xReg, immediate=lo, shift=0w0}] - else if hi = 0wxffff - then [moveNot32{regD=xReg, immediate=Word.xorb(0wxffff, lo), shift=0w0}] - else if lo = 0w0 - then [moveZero32{regD=xReg, immediate=hi, shift=0w16}] - else if isEncodableBitPattern(value, WordSize32) - then [bitwiseOrImmediate32{bits=value, regN=XZero, regD=xReg}] - else (* Have to use two instructions *) - [ - moveZero32{regD=xReg, immediate=lo, shift=0w0}, - moveKeep{regD=xReg, immediate=hi, shift=0w16} - ] - end - else if hw0 = 0wxffff andalso hw1 = 0wxffff andalso hw2 = 0wxffff - then [moveNot{regD=xReg, immediate=Word.xorb(0wxffff, hw3), shift=0w0}] - else if hw1 = 0w0 andalso hw2 = 0w0 - then (* This is common for length words with a flags byte *) - [ - moveZero32{regD=xReg, immediate=hw3, shift=0w0}, - moveKeep{regD=xReg, immediate=hw0, shift=0w48} - ] - else [loadNonAddressConstant(xReg, value)] - - end - end - - local - fun allocateWords(fixedReg, workReg, words, bytes, regMask) = - let - val label = createLabel() - (* Because X30 is used in the branchAndLink it has to be pushed - across any trap. *) - val saveX30 = List.exists (fn r => r = X30) regMask - val preserve = List.filter (fn r => r <> X30) regMask - in - [ - (* Subtract the number of bytes required from the heap pointer. *) - subImmediate{regN=X_MLHeapAllocPtr, regD=fixedReg, immed=bytes, shifted=false}, - (* Compare the result with the heap limit. *) - subSShiftedReg{regM=X_MLHeapLimit, regN=fixedReg, regD=XZero, shift=ShiftNone}, - conditionalBranch(CondCarrySet, label) - ] @ - (if saveX30 then [storeRegPreIndex{regT=X30, regN=X_MLStackPtr, byteOffset= ~8}] else []) @ - [ - loadRegScaled{regT=X16, regN=X_MLAssemblyInt, unitOffset=heapOverflowCallOffset}, - branchAndLinkReg X16, - registerMask preserve - ] @ - (if saveX30 then [loadRegPostIndex{regT=X30, regN=X_MLStackPtr, byteOffset= 8}] else []) @ - [ - setLabel label, - (* Update the heap pointer. *) - moveRegToReg{sReg=fixedReg, dReg=X_MLHeapAllocPtr} - ] @ - loadNonAddress(workReg, - Word64.orb(words, Word64.<<(Word64.fromLarge(Word8.toLarge Address.F_bytes), - if is32in64 then 0w24 else 0w56))) - @ - [ - (* Store the length word. Have to use the unaligned version because offset is -ve. *) - if is32in64 - then storeRegUnscaled32{regT=workReg, regN=fixedReg, byteOffset= ~4} - else storeRegUnscaled{regT=workReg, regN=fixedReg, byteOffset= ~8} - ] - end - - fun absoluteAddressToIndex reg = - if is32in64 - then - [ - subShiftedReg{regM=X_Base32in64, regN=reg, regD=reg, shift=ShiftNone}, - logicalShiftRight{shift=0w2, regN=reg, regD=reg} - ] - else [] - - in - fun boxDouble{source, destination, workReg, saveRegs} = - allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs) @ - storeRegScaledDouble{regT=source, regN=destination, unitOffset=0} :: - absoluteAddressToIndex destination - and boxSysWord{source, destination, workReg, saveRegs} = - allocateWords(destination, workReg, if is32in64 then 0w2 else 0w1, 0w16, saveRegs) @ - storeRegScaled{regT=source, regN=destination, unitOffset=0} :: - absoluteAddressToIndex destination - and boxFloat{source, destination, workReg, saveRegs} = - allocateWords(destination, workReg, 0w1, 0w8, saveRegs) @ - storeRegScaledFloat{regT=source, regN=destination, unitOffset=0} :: - absoluteAddressToIndex destination - end - - structure Sharing = - struct - type instr = instr - type xReg = xReg - type vReg = vReg - end -end; diff --git a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML b/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML index 8a58b0d2..53af1a62 100644 --- a/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML +++ b/mlsource/MLCompiler/CodeTree/Arm64Code/ml_bind.ML @@ -1,121 +1,113 @@ (* Copyright (c) 2021-2 David C. J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public Licence version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public Licence for more details. You should have received a copy of the GNU Lesser General Public Licence along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) local structure Arm64Assembly = Arm64Assembly ( structure Debug = Debug and Pretty = Pretty and CodeArray = CodeArray ) - - structure Arm64Sequences = - Arm64Sequences ( - structure Arm64Assembly = Arm64Assembly - ) structure Arm64PreAssembly = Arm64PreAssembly ( structure Arm64Assembly = Arm64Assembly and Debug = Debug and Pretty = Pretty ) structure Arm64Foreign = Arm64ForeignCall ( structure CodeArray = CodeArray and Arm64PreAssembly = Arm64PreAssembly - and Arm64Assembly = Arm64Assembly - and Arm64Sequences = Arm64Sequences and Debug = Debug ) structure Arm64ICode = Arm64ICode ( structure Arm64Code = Arm64PreAssembly ) structure Arm64ICodeIdentify = Arm64IdentifyReferences ( structure Debug = Debug structure Arm64ICode = Arm64ICode structure IntSet = IntSet ) structure Arm64PushRegs = Arm64PushRegisters ( structure Arm64ICode = Arm64ICode structure IntSet = IntSet structure Identify = Arm64ICodeIdentify ) structure Arm64Opt = Arm64ICodeOptimise ( structure Arm64ICode = Arm64ICode structure IntSet = IntSet structure Identify = Arm64ICodeIdentify structure Debug = Debug structure Pretty = Pretty ) structure Arm64IAllocate = Arm64AllocateRegisters ( structure Arm64ICode = Arm64ICode structure Identify = Arm64ICodeIdentify structure IntSet = IntSet ) structure Arm64ICodeGenerate = Arm64ICodeToArm64Code ( structure Debug = Debug structure Arm64ICode = Arm64ICode structure Identify = Arm64ICodeIdentify structure Pretty = Pretty structure IntSet = IntSet structure Arm64PreAssembly = Arm64PreAssembly structure Arm64Assembly = Arm64Assembly - structure Arm64Sequences = Arm64Sequences structure Strongly = StronglyConnected ) structure Arm64ICodeTransform = Arm64ICodeTransform ( structure Debug = Debug structure Arm64ICode = Arm64ICode structure Identify = Arm64ICodeIdentify structure Allocate = Arm64IAllocate structure PushRegisters = Arm64PushRegs structure Optimise = Arm64Opt structure Pretty = Pretty structure IntSet = IntSet structure Codegen = Arm64ICodeGenerate ) in structure Arm64Code = Arm64CodetreeToICode ( structure BackendTree = BackendIntermediateCode structure Debug = Debug - structure Arm64ICode = Arm64ICode + structure Arm64ICode = Arm64ICode structure Arm64Foreign = Arm64Foreign structure ICodeTransform = Arm64ICodeTransform structure CodeArray = CodeArray and Pretty = Pretty ) end; diff --git a/polymlArm64.pyp b/polymlArm64.pyp index f8c402f9..28ef5ae8 100644 --- a/polymlArm64.pyp +++ b/polymlArm64.pyp @@ -1,243 +1,240 @@ - - -