diff --git a/Tests/Succeed/Test201.ML b/Tests/Succeed/Test201.ML new file mode 100644 index 00000000..b1c767a2 --- /dev/null +++ b/Tests/Succeed/Test201.ML @@ -0,0 +1,184 @@ +(* This version of the Ryu floating point conversion contained a + code-generator bug when compiled on X86/32. *) + + +val floatBias = 127 (* This is the exponent value for 1.0 *) +val floatExponentBits = 8 +val floatMantissaBits = Real32.precision - 1 (* One bit is implicit *) +val floatImplicitBit = FixedInt.fromInt(Word.toInt(Word.<<(0w1, Word.fromInt floatMantissaBits))) + +(* Returns floor(log10(2^e)) for values of e between 0 and 1650. *) +fun log10Pow2 e = + if e < 0 orelse e > 1650 then raise General.Domain + else Int.quot(e * 78913, 0x40000) (* >> 18 *) +(* Returns floor(log10(5^e)) for values of e between 0 and 2620 *) +and log10Pow5 e = + if e < 0 orelse e > 2620 then raise General.Domain + else Int.quot(e * 732923, 0x100000) (* >> 20 *) + +fun pow5bits e = + if e < 0 orelse e > 3528 then raise General.Domain + else Int.quot(e * 1217359, 0x80000) (* >> 19 *) + 1 + +local + (* Keep dividing by 5 while the remainder is zero *) + fun p5 count value = + if Int.rem(value, 5) <> 0 + then count + else p5 (count+1) (Int.quot(value, 5)) +in + (* Returns whether value is divisible by 5 to the power p. *) + fun multipleOfPow5(value, e5) = + p5 0 value >= e5 +end + +fun multipleOfPowerOf2(value, p) = + Word.andb(Word.fromInt value, Word.<<(0w1, Word.fromInt p) - 0w1) = 0w0; + +local + val posTableSize = 47 and invTableSize = 55 + val pow5BitCount = 61 and pow5InvBitCount = 59 + + fun createInvSplit i = + let + val pow = IntInf.pow(5, i) + val pow5len = IntInf.log2 pow + 1 (* Bit length *) + val j = pow5len - 1 + pow5InvBitCount + val pow5inv = IntInf.<<(1, Word.fromInt j) div pow + 1 + in + pow5inv + end + + and createSplit i = + let + val pow = IntInf.pow(5, i) + val pow5len = IntInf.log2 pow + 1 (* Bit length *) + val shift = pow5len-pow5BitCount + val pow5 = + if shift < 0 + then IntInf.<<(pow, Word.fromInt(~shift)) + else IntInf.~>>(pow, Word.fromInt shift) + in + pow5 + end + + val floatPow5InvSplit = Vector.tabulate(invTableSize, createInvSplit) + and floatPow5Split = Vector.tabulate(posTableSize, createSplit) + + (* We don't have 64-bit arithmetic on 32-bit platforms so this uses arbitrary precision + arithmetic. It might be possible to select different versions depending on the + word length. + The Java version uses two tables of 31 bit values which would be an + alternative. *) + fun mulShift32(m: int, factor, shift: int): int = + if shift <= 32 then raise Fail "mulShift32" + else LargeInt.toInt(IntInf.~>>(factor*LargeInt.fromInt m, Word.fromInt shift)) +in + fun mulPow5InvDivPow2(m, q, j) = mulShift32(m, Vector.sub(floatPow5InvSplit, q), j) + and mulPow5DivPow2(m, i, j) = mulShift32(m, Vector.sub(floatPow5Split, i), j) + + val floatPow5InvBitCount = pow5InvBitCount + and floatPow5BitCount = pow5BitCount +end; + +fun f2d(ieeeMantissa, ieeeExponent) = +let + (* Step 1: Normalise the value. Normalised values, with exponent non-zero, + have an implicit one in the top bit position. *) + val (e2, m2) = + if ieeeExponent = 0 + then (1-floatBias-floatMantissaBits-2, ieeeMantissa) + else (ieeeExponent-floatBias-floatMantissaBits-2, ieeeMantissa + floatImplicitBit) + + val isEven = Int.rem(m2, 2) = 0 + val acceptBounds = isEven + + (* Step 2: Determine the interval of valid decimal representations (??) *) + val mmShift = if ieeeMantissa <> 0 orelse ieeeExponent <= 1 then 1 else 0 + (* Presumably this is 4* because we've subtracted 2 from e2. *) + val mm = 4 * m2 - 1 - mmShift + val mv = 4 * m2 + val mp = 4 * m2 + 2 + + (* Step 3: Convert to a decimal power base *) + val (e10, vr, vp, vm, lastRemovedDigit, vrIsTrailingZeros, vmIsTrailingZeros) = + if e2 >= 0 + then + let + val q = log10Pow2 e2 + val e10 = q + val k = floatPow5InvBitCount + pow5bits q - 1 + val i = ~e2 + q + k + val vr = mulPow5InvDivPow2(mv, q, i) + and vp = mulPow5InvDivPow2(mp, q, i) + and vm = mulPow5InvDivPow2(mm, q, i) + in + if q > 9 + then (e10, vr, vp, vm, 0, false, false) (* Too large to be power of 5. *) + else if Int.rem(mv, 5) = 0 + then (e10, vr, vp, vm, 0, multipleOfPow5(mv, q), false) + else if acceptBounds + then (e10, vr, vp, vm, 0, false, multipleOfPow5(mm, q)) + else (e10, vr, vp - (if multipleOfPow5(mp, q) then 1 else 0), vm, 0, false, false) + end + else raise Bind + + (* Step 4: Find the shortest decimal representation in the interval *) + val (output, removed) = + if vmIsTrailingZeros orelse vrIsTrailingZeros + then + let + fun removeVrDigits(vr, vp, vm, removed, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) = + let + val vpDiv10 = Int.quot(vp, 10) + val vmDiv10 = Int.quot(vm, 10) + in + if vpDiv10 > vmDiv10 + then removeVrDigits(Int.quot(vr, 10), vpDiv10, vmDiv10, removed+1, Int.rem(vr, 10), + vmIsTrailingZeros andalso Int.rem(vm, 10) = 0, + vrIsTrailingZeros andalso lastRemovedDigit = 0) + else removeVmDigits(vr, vp, vm, removed, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) + end + + and removeVmDigits(vr, vp, vm, removed, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) = + let + in + if vmIsTrailingZeros andalso Int.rem(vm, 10) = 0 + then removeVmDigits(Int.quot(vr, 10), Int.quot(vp, 10), Int.quot(vm, 10), removed+1, Int.rem(vr, 10), + vmIsTrailingZeros, vrIsTrailingZeros andalso lastRemovedDigit = 0) + else + let + val lastRemovedDigit2 = + if vrIsTrailingZeros andalso lastRemovedDigit = 5 andalso Int.rem(vr, 2) = 0 + then 4 (* ??? *) + else lastRemovedDigit + val vrCorrect = + (vr = vm andalso (not acceptBounds orelse not vmIsTrailingZeros)) orelse lastRemovedDigit2 >= 5 + in + (vr + (if vrCorrect then 1 else 0), removed) + end + end + in + removeVrDigits(vr, vp, vm, 0, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) + end + else + let + fun removeDigits(vr, vp, vm, removed, lastRemovedDigit) = + let + val vpDiv10 = Int.quot(vp, 10) + and vmDiv10 = Int.quot(vm, 10) + in + if vpDiv10 > vmDiv10 + then removeDigits(Int.quot(vr, 10), vpDiv10, vmDiv10, removed+1, Int.rem(vr, 10)) + else (vr + (if vr = vm orelse lastRemovedDigit >= 5 then 1 else 0), removed) + end + in + removeDigits(vr, vp, vm, 0, lastRemovedDigit) + end +in + {mantissa=output, e10=e10, removed=removed} +end +; +val result = f2d (7221787, 155); + +if #removed result = #mantissa result then raise Fail "It's a bug" else (); diff --git a/mlsource/MLCompiler/CodeTree/X86Code/X86ICodeGetConflictSets.ML b/mlsource/MLCompiler/CodeTree/X86Code/X86ICodeGetConflictSets.ML index 289dc3ac..1e81ac8f 100644 --- a/mlsource/MLCompiler/CodeTree/X86Code/X86ICodeGetConflictSets.ML +++ b/mlsource/MLCompiler/CodeTree/X86Code/X86ICodeGetConflictSets.ML @@ -1,291 +1,296 @@ (* Copyright (c) 2016-18, 2021 David C.J. Matthews This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) functor X86ICodeGetConflictSets( structure X86ICode: X86ICODE structure IntSet: INTSET structure Identify: X86IDENTIFYREFERENCES sharing X86ICode.Sharing = Identify.Sharing = IntSet ): X86GETCONFLICTSET = struct open X86ICode open IntSet open Identify type conflictState = { conflicts: intSet, realConflicts: reg list } type triple = {instr: x86ICode, current: intSet, active: intSet} exception InternalError = Misc.InternalError (* Get the conflict sets. This code was originally part of identifyRegisterState and was split off. *) fun getConflictStates (blocks: extendedBasicBlock vector, maxPRegs) = let (* Other registers that conflict with this i.e. cannot share the same real register. *) val regConflicts = Array.array(maxPRegs, emptySet) (* Real registers that cannot be used for this because they are needed for an instruction e.g. shift or block move, that requires these. *) and regRealConflicts = Array.array(maxPRegs, []: reg list) fun addConflictsTo(addTo, conflicts) = List.app(fn aReg => Array.update(regConflicts, aReg, union(Array.sub(regConflicts, aReg), conflicts))) addTo (* To reserve a register we need to add the real register to the real conflict sets of all the abstract conflicts. *) local fun isInset reg set = List.exists (fn r => r = reg) set in fun reserveRegister(reserveFor, reg) = let fun reserveAReg r = let val absConflicts = Array.sub(regConflicts, r) fun addConflict i = if isInset i reserveFor then () else addRealConflict (i, reg) in List.app addConflict (setToList absConflicts) end in List.app reserveAReg reserveFor end and addRealConflict (i, reg) = let val currentConflicts = Array.sub(regRealConflicts, i) in if isInset reg currentConflicts then () else Array.update(regRealConflicts, i, reg :: currentConflicts) end end fun conflictsForInstr passThrough {instr, current, ...} = let val {sources, dests} = getInstructionRegisters instr fun regNo(PReg i) = i val destRegNos = map regNo dests and sourceRegNos = map regNo sources val destSet = listToSet destRegNos val afterRemoveDests = minus(current, destSet) local (* In almost all circumstances the destination and sources don't conflict and the same register can be used as a destination and a source. The exceptions are AllocateMemoryVariable and BoxValues which can only store the size or the contents after the memory has been allocated. We also have to make sure that the test and work registers are different in IndexedCase. *) val postInstruction = case instr of AllocateMemoryVariable _ => destRegNos @ sourceRegNos | BoxValue _ => destRegNos @ sourceRegNos | IndexedCaseOperation _ => destRegNos @ sourceRegNos | ArithmeticFunction{oper=SUB, operand2, ...} => (* Special case for subtraction - we can't use the same register for the result and the second operand. *) destRegNos @ map regNo (argRegs operand2) | ArithmeticFunction{operand2 as MemoryLocation _, ...} => (* If operand1 is not in the destination register we will move it there before the instruction. That means that we must not have the destination register as either a base or index register. *) destRegNos @ map regNo (argRegs operand2) | Multiplication{operand2 as MemoryLocation _, ...} => (* Likewise for multiplication. *) destRegNos @ map regNo (argRegs operand2) | AtomicExchangeAndAdd{base, ...} => (* and for atomic operations. The base is always a memory address. *) regNo base :: destRegNos | AtomicExchange{base, ...} => regNo base :: destRegNos | AtomicCompareAndExchange{base, ...} => regNo base :: destRegNos (* TailRecursiveCall and JumpLoop may require a work register. This is the only destination but if present it must be distinct from the arguments. *) | TailRecursiveCall _ => destRegNos @ sourceRegNos | JumpLoop _ => destRegNos @ sourceRegNos | _ => destRegNos in (* If there is more than one destination they conflict with each other. *) val () = addConflictsTo(postInstruction, listToSet postInstruction); (* Mark conflicts for the destinations, i.e. after the instruction. The destinations conflict with the registers that are used subsequently. *) val () = addConflictsTo(postInstruction, current); val () = addConflictsTo(postInstruction, passThrough); (* Mark conflicts for the sources i.e. before the instruction. *) (* Sources must be set up as conflicts with each other i.e. when we come to allocate registers we must choose different real registers for different abstract registers. *) val () = addConflictsTo(sourceRegNos, listToSet sourceRegNos) val () = addConflictsTo(sourceRegNos, afterRemoveDests); val () = addConflictsTo(sourceRegNos, passThrough) end (* I'm not sure if this is needed. There was a check in the old code to ensure that different registers were used for loop variables even if they were actually unused. This may happen anyway. *) + (* However, we need to ensure that if there is a work register it is distinct from any + of the registers being used for arguments. So at the least we need to add a + conflict between these. *) val () = case instr of - JumpLoop{regArgs, ...} => + JumpLoop{regArgs, workReg, ...} => let val destRegs = List.foldl(fn ((_, PReg loopReg), dests) => loopReg :: dests) [] regArgs + val workDest = case workReg of SOME(PReg reg) => [reg] | NONE => [] + val destRegs = destRegs @ workDest in addConflictsTo(destRegs, listToSet destRegs); addConflictsTo(destRegs, current); addConflictsTo(destRegs, passThrough) end | _ => () (* Certain instructions are specific as to the real registers. *) val () = case instr of (* Storing a byte value. This is messy on X86/32 because we can't use edi or esi as the register to store. To get round this we reserve ecx as a possible register as with shifts. We don't actually need to use this but it is available if necessary. *) StoreArgument { source=RegisterArgument sReg, kind=MoveByte, ...} => if targetArch <> Native32Bit then () else reserveRegister([regNo sReg], GenReg ecx) | InitialiseMem{size, addr, init} => ( (* We are going to use rep stosl/q to set the memory. That requires the length to be in ecx, the initialiser to be in eax and the address to be edi. *) reserveRegister([regNo addr], GenReg edi); reserveRegister([regNo init], GenReg eax); reserveRegister([regNo size], GenReg ecx) ) | ShiftOperation{shiftAmount=RegisterArgument shiftAmount, ...} => ( (* Shift with by amount specified in a register. This must be ecx. *) reserveRegister([regNo shiftAmount], GenReg ecx); (* reserveRegister only sets a conflict between the args. We need to include the result because that will be allocated first. *) List.app(fn r => addRealConflict (r, GenReg ecx)) destRegNos ) | Division{dividend, quotient, remainder, ...} => ( (* Division is specific as to the registers. The dividend must be eax, quotient is eax and the remainder is edx. The divisor must not be in either edx or eax because we need to sign extend the dividend before the division. *) reserveRegister([regNo quotient, regNo dividend], GenReg eax); (* In addition, we need to register conflicts with the divisor, at least for edx. The remainder is a result and may well not be in the conflict set with the divisor. *) List.app(fn r => addRealConflict (r, GenReg edx)) sourceRegNos; reserveRegister([regNo remainder], GenReg edx) ) | CompareByteVectors{vec1Addr, vec2Addr, length, ...} => ( (* We have to use specific registers. *) reserveRegister([regNo vec1Addr], GenReg esi); reserveRegister([regNo vec2Addr], GenReg edi); reserveRegister([regNo length], GenReg ecx) ) | BlockMove{srcAddr, destAddr, length, ...} => ( (* We have to use specific registers. *) reserveRegister([regNo srcAddr], GenReg esi); reserveRegister([regNo destAddr], GenReg edi); reserveRegister([regNo length], GenReg ecx) ) | X87FPGetCondition{dest, ...} => (* This can only put the result in rax. *) reserveRegister([regNo dest], GenReg eax) | RaiseExceptionPacket{ packetReg } => (* This wasn't needed previously because we always pushed the registers across an exception. *) reserveRegister([regNo packetReg], GenReg eax) | BeginHandler { packetReg, ...} => reserveRegister([regNo packetReg], GenReg eax) | FunctionCall { dest, realDest, regArgs, ...} => (* This is only needed if we are saving the registers rather than marking them as "must push". *) ( reserveRegister([regNo dest], realDest); (* The argument registers also conflict. In order to execute this call we need to load the arguments into specific registers so we can't use them for values that we want after the call. We use regNo dest here because that will conflict with everything immediately afterwards. *) List.app(fn (_, r) => reserveRegister([regNo dest], r)) regArgs ) | TailRecursiveCall {workReg=PReg wReg, regArgs, ...} => (* Prevent the work reg from using any of the real register args. *) List.app(fn (_, r) => addRealConflict (wReg, r)) regArgs | _ => () in () end (* Process the block. *) fun conflictsForBlock(ExtendedBasicBlock{block, passThrough, exports, ...}) = let (* We need to establish conflicts between all the registers active at the end of the block since they may not be established elsewhere. This isn't necessary for an unconditional branch since the same registers will be included in the block that is the target of the branch, possibly along with others. However if this is a conditional or indexed branch we may have different sets at each of the targets and we have to ensure that all the registers differ. *) val united = union(exports, passThrough) val () = addConflictsTo(setToList united, united) val () = List.app (conflictsForInstr passThrough) block in () end val () = Vector.app conflictsForBlock blocks val conflictState: conflictState vector = Vector.tabulate(maxPRegs, fn i => { conflicts = Array.sub(regConflicts, i), realConflicts = Array.sub(regRealConflicts, i) } ) in conflictState end structure Sharing = struct type x86ICode = x86ICode and reg = reg and preg = preg and intSet = intSet and extendedBasicBlock = extendedBasicBlock end end;