diff --git a/Tests/Succeed/Test199.ML b/Tests/Succeed/Test199.ML index a9fe87f4..463edb22 100644 --- a/Tests/Succeed/Test199.ML +++ b/Tests/Succeed/Test199.ML @@ -1,98 +1,104 @@ (* Test for exact conversion. *) (* We don't have a random number generator in Poly/ML so this will do. *) functor Check(structure RealStr: REAL and Pack: PACK_REAL sharing type RealStr.real = Pack.real) = struct local (* Random number generator. From Isabelle. *) local fun rmod x y = x - y * Real.realFloor (x / y); val a = 16807.0; val m = 2147483647.0; val random_seed = ref 1.0; in fun randByte _ = let val r = rmod (a * ! random_seed) m in random_seed := r; Word8.fromLargeInt(Real.toLargeInt IEEEReal.TO_NEGINF r) end end fun repeat _ 0 = () | repeat f n = (f(); repeat f (n-1)); open Pack fun checkAFloat r = let val exact = RealStr.fmt StringCvt.EXACT r val fromExact = valOf (RealStr.fromString exact) (* The values must be equal or both nans. *) val _ = RealStr.==(r, fromExact) orelse RealStr.isNan r andalso RealStr.isNan fromExact orelse raise Fail ("Exact does not match:" ^ RealStr.toString r) val toDec = RealStr.toDecimal r val fromDec = valOf (RealStr.fromDecimal toDec) (* The conversion back from decimal should produce the same result. *) val _ = RealStr.==(r, fromDec) orelse RealStr.isNan r andalso RealStr.isNan fromDec orelse raise Fail ("Decimal does not match:" ^ RealStr.toString r) (* The last digit should not be zero. Trailing zeros should have been removed. *) val _ = case List.rev(#digits toDec) of 0::_ => raise Fail ("Decimal has trailing zero:" ^ RealStr.toString r) | _ => () (* The result should be minimal. Dropping the last digit should change the value. *) val { class, digits, exp, sign } = toDec val numDigits = List.length digits val _ = if numDigits <= 1 then () else let val dec2 = {class=class, digits=List.take(digits, numDigits-1), exp=exp, sign=sign} val fromDec2 = valOf (RealStr.fromDecimal dec2) in RealStr.!=(fromDec2, r) orelse raise Fail ("Decimal is not minimal:" ^ RealStr.toString r); () end + (* Test a long string. Double values have at most 16 digits of precision. *) + val long = RealStr.fmt(StringCvt.SCI(SOME 22)) r + val fromLong = valOf (RealStr.fromString long) + val _ = + RealStr.==(r, fromLong) orelse RealStr.isNan r andalso RealStr.isNan fromLong + orelse raise Fail ("Long string does not match:" ^ long) in () end fun checkRandom () = let val v = Word8Vector.tabulate(bytesPerElem, randByte) val r = subVec(v, 0) in checkAFloat r end open RealStr val zero = fromInt 0 val pInf = fromInt 1 / zero val nInf = fromInt ~1 / zero val aNan = zero / zero in fun runCheck() = ( repeat checkRandom 1000; checkAFloat zero; checkAFloat pInf; checkAFloat nInf; checkAFloat aNan ) end end; structure CheckReal = Check(structure RealStr=Real and Pack=PackRealBig); CheckReal.runCheck(); structure CheckReal32 = Check(structure RealStr=Real32 and Pack=PackReal32Big); CheckReal32.runCheck(); diff --git a/basis/Real.sml b/basis/Real.sml index 76a9fc94..67e9fb44 100644 --- a/basis/Real.sml +++ b/basis/Real.sml @@ -1,865 +1,859 @@ (* Title: Standard Basis Library: Real and Real32 structures. Author: David Matthews Copyright David Matthews 2000, 2005, 2008, 2016-18, 2023 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) local open RealNumbersAsBits - val floatMaxFiniteExp: FixedInt.int = 254 - and doubleMaxFiniteExp: FixedInt.int = 2046 + val floatMaxFiniteExp = 254 + and doubleMaxFiniteExp = 2046 (* Functions common to Real and Real32 *) local open StringCvt in (* How many digits will the mantissa take? It's possible to unroll this loop since the maximum for float is 9 and for double is 17. We want to avoid long-format arbitrary precision arithmetic as much as possible so we stop at 10 digits which is the maximum short-format power of ten in 32-bit mode. *) fun ndigits (i: LargeInt.int) = if i >= 1000000000 then ndigits(i div 1000000000) + 9 else if i >= 100000000 then 9 else if i >= 10000000 then 8 else if i >= 1000000 then 7 else if i >= 100000 then 6 else if i >= 10000 then 5 else if i >= 1000 then 4 else if i >= 100 then 3 else if i >= 10 then 2 else 1 local (* PolyRealDoubleToDecimal returns an arbitrary precision number for the mantissa because it could be greater than 31 bits. In 64-bit mode it will always be short and we want to use fixed int arithmetic if we can. *) fun fixedDigitList (0, r) = r | fixedDigitList (n, r) = - fixedDigitList (FixedInt.quot(n, 10), FixedInt.toInt(FixedInt.rem(n, 10)) :: r) + fixedDigitList (Int.quot(n, 10), Int.rem(n, 10) :: r) in fun digitList(n, r) = - if LibrarySupport.largeIntIsSmall n then fixedDigitList (FixedInt.fromLarge n, r) + if LibrarySupport.largeIntIsSmall n then fixedDigitList (Int.fromLarge n, r) else let val (qu, rm) = IntInf.quotRem(n, 10) in digitList (qu, (Int.fromLarge rm) :: r) end end (* Common functions to convert real/Real32.real to strings *) (* "ndigs" is the number of digits after the decimal point. For sciFmt that means that "ndigs+1" digits are produced. For fixFmt at least "ndigs+1" digits are produced. If "ndigs" is 0 the value is printed as an integer without any ".0". In both cases ndigs > 0. *) fun exactFmt convert r = let val {sign, exponent, mantissa, class} = convert r open IEEEReal in case class of NAN => "nan" | INF => if sign then "~inf" else "inf" | _ (* NORMAL, ZERO, SUBNORMAL *) => let val s = if sign then "~" else "" val exponent = exponent + ndigits mantissa val (e, exp) = if exponent = 0 then ("", "") else ("E", Int.toString exponent) in String.concat[s, "0.", LargeInt.toString mantissa, e, exp] end end local val dble2str = RunCall.rtsCallFull3 "PolyRealDoubleToString" : real * char * int -> string in fun nonExactFmt (toDble, fmt, ndigs) r = let val dr = toDble r val exp = doubleExponent dr in if exp > doubleMaxFiniteExp then (* Non-finite *) ( if doubleMantissa dr <> 0 then "nan" else if doubleSignBit dr then "~inf" else "inf" ) else let (* Use snprintf to do the conversion. *) val str = dble2str(dr, fmt, ndigs) (* G-format does not always put in a decimal point so we may need to add .0 to the end to make it look like an ML real. *) fun hasDecOrE(_, true) = true | hasDecOrE(#".", _) = true | hasDecOrE(#"E", _) = true | hasDecOrE _ = false in if fmt = #"G" andalso ndigs > 1 andalso not(CharVector.foldl hasDecOrE false str) then str ^ ".0" else str end end end (* Note: The definition says, reasonably, that negative values for the number of digits raises Size. The tests also check for a very large value for the number of digits and seem to expect Size to be raised in that case. Note that the exception is raised when fmt spec is evaluated and before it is applied to an actual real argument. *) fun fmtFunction (_, toDble) (SCI NONE) = nonExactFmt(toDble, #"E", 6) | fmtFunction (_, toDble) (SCI (SOME d) ) = if d < 0 orelse d > 200 then raise General.Size else nonExactFmt(toDble, #"E", d) | fmtFunction (_, toDble) (FIX NONE) = nonExactFmt(toDble, #"F", 6) | fmtFunction (_, toDble) (FIX (SOME d) ) = if d < 0 orelse d > 200 then raise General.Size else nonExactFmt(toDble, #"F", d) | fmtFunction (_, toDble) (GEN NONE) = nonExactFmt(toDble, #"G", 12) | fmtFunction (_, toDble) (GEN (SOME d) ) = if d < 1 orelse d > 200 then raise General.Size else nonExactFmt(toDble, #"G", d) | fmtFunction (toExact, _) EXACT = exactFmt toExact end in structure Real: REAL = struct open IEEEReal val fromLargeInt: LargeInt.int -> real = Real.rtsCallFastI_R "PolyFloatArbitraryPrecision" val fromInt: int -> real = (* We have to select the appropriate conversion. This will be reduced down to the appropriate function but has to be type-correct whether int is arbitrary precision or fixed precision. Hence the "o Large/FixedInt.fromInt". *) if Bootstrap.intIsArbitraryPrecision then fromLargeInt o LargeInt.fromInt else Real.fromFixedInt o FixedInt.fromInt (* These are needed because we don't yet have conversion from string to real. They are filtered out by the signature. *) val zero = fromInt 0 and one = fromInt 1 and four = fromInt 4 type real = real (* Pick up from globals. *) structure Math: MATH = struct type real = real (* Pick up from globals. *) val sqrt = Real.rtsCallFastR_R "PolyRealSqrt" and sin = Real.rtsCallFastR_R "PolyRealSin" and cos = Real.rtsCallFastR_R "PolyRealCos" and atan = Real.rtsCallFastR_R "PolyRealArctan" and exp = Real.rtsCallFastR_R "PolyRealExp" and ln = Real.rtsCallFastR_R "PolyRealLog" and tan = Real.rtsCallFastR_R "PolyRealTan" and asin = Real.rtsCallFastR_R "PolyRealArcSin" and acos = Real.rtsCallFastR_R "PolyRealArcCos" and log10 = Real.rtsCallFastR_R "PolyRealLog10" and sinh = Real.rtsCallFastR_R "PolyRealSinh" and cosh = Real.rtsCallFastR_R "PolyRealCosh" and tanh = Real.rtsCallFastR_R "PolyRealTanh" val atan2 = Real.rtsCallFastRR_R "PolyRealAtan2" val pow = Real.rtsCallFastRR_R "PolyRealPow" (* Derived values. *) val e = exp one val pi = four * atan one end; infix 4 == != ?=; val op == = Real.== val op != : real * real -> bool = not o op == val radix = 2 val precision = 53 val maxFinite = doubleFromBinary{sign=false, exp=doubleMaxFiniteExp, mantissa = 0xFFFFFFFFFFFFF} val minNormalPos = doubleFromBinary{sign=false, exp=1, mantissa = 0} val minPos = doubleFromBinary{sign=false, exp=0, mantissa = 1} val posInf : real = one/zero val negInf : real = ~one/zero (* Real is LargeReal. *) fun toLarge (x: real) : (*LargeReal.*)real =x fun fromLarge (_ : IEEEReal.rounding_mode) (x: (*LargeReal.*)real): real = x (* isNan can be defined in terms of unordered. *) fun isNan x = Real.unordered(x, x) fun isFinite x = doubleExponent x <= doubleMaxFiniteExp (* This could be implemented using signBit and doubleFromBinary *) val copySign : (real * real) -> real = Real.rtsCallFastRR_R "PolyRealCopySign" val signBit = doubleSignBit fun isNormal x = let val exp = doubleExponent x in exp > 0 andalso exp <= doubleMaxFiniteExp end fun class x = let val exp = doubleExponent x in if exp > doubleMaxFiniteExp then ( if doubleMantissa x <> 0 then NAN else INF ) else if exp = 0 then ( if doubleMantissa x = 0 then ZERO else SUBNORMAL ) else NORMAL end fun sign x = if isNan x then raise General.Domain else if x == zero then 0 else if x < zero then ~1 else 1 fun sameSign (x, y) = signBit x = signBit y (* Returns the minimum. In the case where one is a NaN it returns the other. In that case the comparison will be false. *) fun min (a: real, b: real): real = if a < b orelse isNan b then a else b (* Similarly for max. *) fun max (a: real, b: real): real = if a > b orelse isNan b then a else b fun checkFloat x = if isFinite x then x else if isNan x then raise General.Div else raise General.Overflow local val frExp: real -> int * real = RunCall.rtsCallFull1 "PolyRealFrexp" val fromManAndExp: real*int -> real = Real.rtsCallFastRI_R "PolyRealLdexp" open Real in fun toManExp r = if not (isFinite r) orelse r == zero (* Nan, infinities and +/-0 all return r in the mantissa. We include 0 to preserve its sign. *) then {man=r, exp=0} else let val (exp, man) = frExp r in {man=man, exp=exp} end fun fromManExp {man, exp} = if not (isFinite man) orelse man == zero (* Nan, infinities and +/-0 in the mantissa all return their argument. *) then man else if LibrarySupport.isShortInt exp then fromManAndExp(man, exp) else (* Long arbitrary precision *) copySign(if Int.>(exp, 0) then posInf else zero, man) end (* Convert to integer. *) local (* These are defined to raise Domain rather than Overflow on Nans. *) fun checkNan x = if isNan x then raise Domain else x in val realFloor = Real.rtsCallFastR_R "PolyRealFloor" and realCeil = Real.rtsCallFastR_R "PolyRealCeil" and realTrunc = Real.rtsCallFastR_R "PolyRealTrunc" and realRound = Real.rtsCallFastR_R "PolyRealRound" local val doubleBias = 1023 (* This is the exponent value for 1.0 *) val doubleMantissaBits = precision - 1 (* One bit is implicit *) - val doubleImplicitBit = IntInf.<<(1, Word.fromInt(FixedInt.toInt doubleMantissaBits)) + val doubleImplicitBit = IntInf.<<(1, Word.fromInt doubleMantissaBits) in (* Convert a real number to arbitrary precision. It might be possible to include the rounding/truncation here. *) fun toArbitrary x = let open RealNumbersAsBits val ieeeExp = doubleExponent x and ieeeMant = doubleMantissa x and ieeeSign = doubleSignBit x in if ieeeExp = 2047 then (* Non-finite *) if ieeeMant <> 0 then raise General.Domain else raise General.Overflow else if ieeeExp < doubleBias then 0 (* less than 1 *) else let (* Add the implicit bit to the mantissa and set the sign. *) val m2a = ieeeMant + doubleImplicitBit val m2s = if ieeeSign then ~m2a else m2a val shift = ieeeExp - doubleBias - doubleMantissaBits in if shift < 0 then IntInf.~>>(m2s, Word.fromInt(~shift)) else IntInf.<<(m2s, Word.fromInt shift) end end end fun toLargeInt IEEEReal.TO_NEGINF = toArbitrary o realFloor | toLargeInt IEEEReal.TO_POSINF = toArbitrary o realCeil | toLargeInt IEEEReal.TO_ZERO = toArbitrary o realTrunc | toLargeInt IEEEReal.TO_NEAREST = toArbitrary o realRound (* Conversions to FixedInt are put in by the compiler. If int is fixed we can use them otherwise we use the long versions. N.B. FixedInt.toInt is a no-op but is needed so this is type-correct when int is arbitrary. *) val floor = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realFloor else FixedInt.toInt o Real.floorFix o checkNan and ceil = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realCeil else FixedInt.toInt o Real.ceilFix o checkNan and trunc = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realTrunc else FixedInt.toInt o Real.truncFix o checkNan and round = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realRound else FixedInt.toInt o Real.roundFix o checkNan fun toInt IEEEReal.TO_NEGINF = floor | toInt IEEEReal.TO_POSINF = ceil | toInt IEEEReal.TO_ZERO = trunc | toInt IEEEReal.TO_NEAREST = round end; local - val realConv: string->real = RunCall.rtsCallFull1 "PolyRealBoxedFromString" - val posNan = abs(zero / zero) val negNan = ~posNan in fun fromDecimal { class = INF, sign=true, ...} = SOME negInf | fromDecimal { class = INF, sign=false, ...} = SOME posInf | fromDecimal { class = ZERO, sign=true, ...} = SOME (~ zero) | fromDecimal { class = ZERO, sign=false, ...} = SOME zero (* Generate signed Nans ignoring the digits and mantissa. There was code here to set the mantissa but there's no reference to that in the current version of the Basis library. *) | fromDecimal { class = NAN, sign=true, ... } = SOME negNan | fromDecimal { class = NAN, sign=false, ... } = SOME posNan | fromDecimal { class = _ (* NORMAL or SUBNORMAL *), sign, digits, exp} = - (let - fun toChar x = - if x < 0 orelse x > 9 then raise General.Domain - else Char.chr (x + Char.ord #"0") - (* Turn the number into a string. *) - val str = "0." ^ String.implode(List.map toChar digits) ^"E" ^ - Int.toString exp - (* Convert it to a real using the RTS conversion function. - Change any Conversion exceptions into Domain. *) - val result = realConv str handle RunCall.Conversion _ => raise General.Domain - in - if sign then SOME (~result) else SOME result - end - handle General.Domain => NONE - ) + (SOME(RealToDecimalConversion.decimalToDouble(sign, exp, digits)) handle General.Domain => NONE) end fun toDecimal r = let val {sign, exponent, mantissa, class} = RealToDecimalConversion.doubleToMinimal r in { class = class, sign = sign, exp = exponent + ndigits mantissa, digits = digitList(mantissa, []) } end val fmt = fmtFunction (RealToDecimalConversion.doubleToMinimal, toLarge) val toString = fmt (StringCvt.GEN NONE) (* Define these in terms of IEEEReal.scan since that deals with all the special cases. *) fun scan getc src = case IEEEReal.scan getc src of NONE => NONE | SOME (ieer, rest) => ( case fromDecimal ieer of NONE => NONE | SOME r => SOME(r, rest) ) val fromString = Option.composePartial (fromDecimal, IEEEReal.fromString) (* Converter to real values. This replaces the basic conversion function for reals installed in the bootstrap process. For more information see convInt in Int. *) local fun convReal (s: string) : real = let + (* Conversion doesn't involve any real rounding so this should + no longer be relevant. *) (* Set the rounding mode to TO_NEAREST whatever the current rounding mode. Otherwise the result of compiling a piece of code with a literal constant could depend on what the rounding mode was set to. We should always support TO_NEAREST. *) val oldRounding = IEEEReal.getRoundingMode() val () = IEEEReal.setRoundingMode IEEEReal.TO_NEAREST val scanResult = StringCvt.scanString scan s val () = IEEEReal.setRoundingMode oldRounding in case scanResult of NONE => raise RunCall.Conversion "Invalid real constant" | SOME res => res end in (* Install this as a conversion function for real literals. *) val (): unit = RunCall.addOverload convReal "convReal" end open Real (* Get the other definitions. *) fun compare (r1, r2) = if r1 == r2 then General.EQUAL else if r1 < r2 then General.LESS else if r1 > r2 then General.GREATER else raise Unordered fun compareReal (r1, r2) = if r1 == r2 then EQUAL else if r1 < r2 then LESS else if r1 > r2 then GREATER else UNORDERED (* This seems to be similar to == except that where == always returns false if either argument is a NaN this returns true. The implementation of == treats the unordered case specially so it would be possible to implement this in the same way. *) fun op ?= (x, y) = unordered(x, y) orelse x == y (* Although these may be built in in some architectures it's probably not worth treating them specially at the moment. *) fun *+ (x: real, y: real, z: real): real = x*y+z and *- (x: real, y: real, z: real): real = x*y-z val rem = Real.rtsCallFastRR_R "PolyRealRem" (* Split a real into whole and fractional parts. The fractional part must have the same sign as the number even if it is zero. *) fun split r = let val whole = realTrunc r val frac = r - whole in { whole = whole, frac = if not (isFinite r) then if isNan r then r else (* Infinity *) if r < zero then ~zero else zero else if frac == zero then if signBit r then ~zero else zero else frac } end (* Get the fractional part of a real. *) fun realMod r = #frac(split r) (* nextAfter: This was previously implemented in ML but, at the very least, needed to work with rounding to something other than TO_NEAREST. *) val nextAfter = Real.rtsCallFastRR_R "PolyRealNextAfter" end (* Real *) structure Real32: REAL where type real = Real32.real = (* Real32 uses some definitions from the Real structure above. *) struct open IEEEReal (* On both the X86 and ARM there is only a single conversion from double to float using the current rounding mode. If we want a specific rounding mode we need to set the rounding. *) fun fromLarge mode value = let val current = getRoundingMode() val () = setRoundingMode mode val result = Real32.fromReal value val () = setRoundingMode current in result end val fromRealRound = fromLarge TO_NEAREST (* Defined to use the current rounding mode. *) val fromLargeInt = Real32.fromReal o Real.fromLargeInt val fromInt: int -> Real32.real = (* We have to select the appropriate conversion. This will be reduced down to the appropriate function but has to be type-correct whether int is arbitrary precision or fixed precision. Hence the "o Large/FixedInt.fromInt". *) if Bootstrap.intIsArbitraryPrecision then fromLargeInt o LargeInt.fromInt else Real32.fromFixedInt o FixedInt.fromInt val zero = fromInt 0 and one = fromInt 1 and four = fromInt 4 val radix = 2 val precision = 24 val maxFinite = floatFromBinary{sign=false, exp=floatMaxFiniteExp, mantissa = 0x7FFFFF} val minNormalPos = floatFromBinary{sign=false, exp=1, mantissa = 0} val minPos = floatFromBinary{sign=false, exp=0, mantissa = 1} local open Real32 in val posInf : real = one/zero val negInf : real = ~one/zero val op != : real * real -> bool = not o op == end infix 4 == != ?=; (* isNan can be defined in terms of unordered. *) fun isNan x = Real32.unordered(x, x) fun isFinite x = floatExponent x <= floatMaxFiniteExp local open Real32 in val copySign : (real * real) -> real = rtsCallFastFF_F "PolyRealFCopySign" end val signBit = floatSignBit fun isNormal x = let val exp = floatExponent x in exp > 0 andalso exp <= floatMaxFiniteExp end fun class x = let val exp = floatExponent x in if exp > floatMaxFiniteExp then ( if floatMantissa x <> 0 then NAN else INF ) else if exp = 0 then ( if floatMantissa x = 0 then ZERO else SUBNORMAL ) else NORMAL end local open Real32 in fun sign x = if isNan x then raise General.Domain else if x == zero then 0 else if x < zero then ~1 else 1 end fun sameSign (x, y) = signBit x = signBit y local local val floatBias = 127 (* This is the exponent value for 1.0 *) val floatMantissaBits = precision - 1 (* One bit is implicit *) - val floatImplicitBit = IntInf.<<(1, Word.fromInt(FixedInt.toInt floatMantissaBits)) + val floatImplicitBit = IntInf.<<(1, Word.fromInt floatMantissaBits) in (* Convert a real number to arbitrary precision. It might be possible to include the rounding/truncation here. *) fun toArbitrary x = let open RealNumbersAsBits val ieeeExp = floatExponent x and ieeeMant = floatMantissa x and ieeeSign = floatSignBit x in if ieeeExp = 255 then (* Non-finite *) if ieeeMant <> 0 then raise General.Domain else raise General.Overflow else if ieeeExp < floatBias then 0 (* less than 1 *) else let (* Add the implicit bit to the mantissa and set the sign. *) - val m2a = FixedInt.toLarge ieeeMant + floatImplicitBit + val m2a = Int.toLarge ieeeMant + floatImplicitBit val m2s = if ieeeSign then ~m2a else m2a val shift = ieeeExp - floatBias - floatMantissaBits in if shift < 0 then IntInf.~>>(m2s, Word.fromInt(~shift)) else IntInf.<<(m2s, Word.fromInt shift) end end end open Real32 in (* Returns the minimum. In the case where one is a NaN it returns the other. In that case the comparison will be false. *) fun min (a: real, b: real): real = if a < b orelse isNan b then a else b (* Similarly for max. *) fun max (a: real, b: real): real = if a > b orelse isNan b then a else b fun checkFloat x = if isFinite x then x else if isNan x then raise General.Div else raise General.Overflow (* On certain platforms e.g. mips, toLarge does not preserve the sign on nans. We deal with the non-finite cases here. *) (* Use the Real versions for the moment. *) fun toManExp r = if not (isFinite r) orelse r == zero (* Nan, infinities and +/-0 all return r in the mantissa. We include 0 to preserve its sign. *) then {man=r, exp=0} else let val {man, exp} = Real.toManExp(toLarge r) in {man = fromRealRound man, exp = exp } end and fromManExp {man, exp} = if not (isFinite man) orelse man == zero (* Nan, infinities and +/-0 in the mantissa all return their argument. *) then man else fromRealRound(Real.fromManExp{man=toLarge man, exp=exp}) fun compare (r1, r2) = if r1 == r2 then General.EQUAL else if r1 < r2 then General.LESS else if r1 > r2 then General.GREATER else raise Unordered fun compareReal (r1, r2) = if r1 == r2 then EQUAL else if r1 < r2 then LESS else if r1 > r2 then GREATER else UNORDERED fun op ?= (x, y) = unordered(x, y) orelse x == y (* Although these may be built in in some architectures it's probably not worth treating them specially at the moment. *) fun *+ (x: real, y: real, z: real): real = x*y+z and *- (x: real, y: real, z: real): real = x*y-z val realFloor = rtsCallFastF_F "PolyRealFFloor" and realCeil = rtsCallFastF_F "PolyRealFCeil" and realTrunc = rtsCallFastF_F "PolyRealFTrunc" and realRound = rtsCallFastF_F "PolyRealFRound" val rem = rtsCallFastFF_F "PolyRealFRem" (* Split a real into whole and fractional parts. The fractional part must have the same sign as the number even if it is zero. *) fun split r = let val whole = realTrunc r val frac = r - whole in { whole = whole, frac = if not (isFinite r) then if isNan r then r else (* Infinity *) if r < zero then ~zero else zero else if frac == zero then if signBit r then ~zero else zero else frac } end (* Get the fractional part of a real. *) fun realMod r = #frac(split r) val nextAfter = rtsCallFastFF_F "PolyRealFNextAfter" fun toLargeInt IEEEReal.TO_NEGINF = toArbitrary o realFloor | toLargeInt IEEEReal.TO_POSINF = toArbitrary o realCeil | toLargeInt IEEEReal.TO_ZERO = toArbitrary o realTrunc | toLargeInt IEEEReal.TO_NEAREST = toArbitrary o realRound (* These are defined to raise Domain rather than Overflow on Nans. *) fun checkNan x = if isNan x then raise Domain else x (* If int is fixed we use the hardware conversions otherwise we use the real to arbitrary conversions. *) val floor = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realFloor else FixedInt.toInt o floorFix o checkNan and ceil = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realCeil else FixedInt.toInt o ceilFix o checkNan and trunc = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realTrunc else FixedInt.toInt o truncFix o checkNan and round = if Bootstrap.intIsArbitraryPrecision then LargeInt.toInt o toArbitrary o realRound else FixedInt.toInt o roundFix o checkNan fun toInt IEEEReal.TO_NEGINF = floor | toInt IEEEReal.TO_POSINF = ceil | toInt IEEEReal.TO_ZERO = trunc | toInt IEEEReal.TO_NEAREST = round end - - (* Scan input source for a valid number. The format is the same as - for double precision. Convert it using the current rounding mode. *) - fun scan getc src = - case Real.scan getc src of - NONE => NONE - | SOME (r, a) => SOME(Real32.fromReal r, a) - - val fromString = StringCvt.scanString scan - + fun toDecimal r = let val {sign, exponent, mantissa, class} = RealToDecimalConversion.floatToMinimal r in { class = class, sign = sign, exp = exponent + ndigits mantissa, digits = digitList(mantissa, []) } end val fmt = fmtFunction (RealToDecimalConversion.floatToMinimal, Real32.toLarge) val toString = fmt (StringCvt.GEN NONE) open Real32 (* Inherit the type and the built-in functions. *) (* Convert from decimal. This is defined to use TO_NEAREST. We need to handle NaNs specially because fromRealRound loses the sign on a NaN. *) local val posNan = abs(zero / zero) val negNan = ~posNan in fun fromDecimal { class = INF, sign=true, ...} = SOME negInf | fromDecimal { class = INF, sign=false, ...} = SOME posInf | fromDecimal { class = NAN, sign=true, ... } = SOME negNan | fromDecimal { class = NAN, sign=false, ... } = SOME posNan - | fromDecimal arg = Option.map fromRealRound (Real.fromDecimal arg) + | fromDecimal { sign, exp, digits, ... } = + (SOME(RealToDecimalConversion.decimalToFloat(sign, exp, digits)) handle General.Domain => NONE) end + (* Define these in terms of IEEEReal.scan since that deals with all the special cases. *) + fun scan getc src = + case IEEEReal.scan getc src of + NONE => NONE + | SOME (ieer, rest) => + ( + case fromDecimal ieer of + NONE => NONE + | SOME r => SOME(r, rest) + ) + + val fromString = Option.composePartial (fromDecimal, IEEEReal.fromString) + + structure Math = struct type real = real val sqrt = rtsCallFastF_F "PolyRealFSqrt" and sin = rtsCallFastF_F "PolyRealFSin" and cos = rtsCallFastF_F "PolyRealFCos" and atan = rtsCallFastF_F "PolyRealFArctan" and exp = rtsCallFastF_F "PolyRealFExp" and ln = rtsCallFastF_F "PolyRealFLog" and tan = rtsCallFastF_F "PolyRealFTan" and asin = rtsCallFastF_F "PolyRealFArcSin" and acos = rtsCallFastF_F "PolyRealFArcCos" and log10 = rtsCallFastF_F "PolyRealFLog10" and sinh = rtsCallFastF_F "PolyRealFSinh" and cosh = rtsCallFastF_F "PolyRealFCosh" and tanh = rtsCallFastF_F "PolyRealFTanh" val atan2 = rtsCallFastFF_F "PolyRealFAtan2" val pow = rtsCallFastFF_F "PolyRealFPow" (* Derived values. *) val e = exp one val pi = four * atan one end (* Converter for literal constants. Copied from Real. *) local fun convReal (s: string) : real = let + (* Conversion doesn't involve any real rounding so this should + no longer be relevant. *) (* Set the rounding mode to TO_NEAREST whatever the current rounding mode. Otherwise the result of compiling a piece of code with a literal constant could depend on what the rounding mode was set to. We should always support TO_NEAREST. *) val oldRounding = IEEEReal.getRoundingMode() val () = IEEEReal.setRoundingMode IEEEReal.TO_NEAREST val scanResult = StringCvt.scanString scan s val () = IEEEReal.setRoundingMode oldRounding in case scanResult of NONE => raise RunCall.Conversion "Invalid real constant" | SOME res => res end in (* Install this as a conversion function for real literals. *) val (): unit = RunCall.addOverload convReal "convReal" end end (* Real32 *) end; structure Math = Real.Math; structure LargeReal: REAL = Real; (* Values available unqualified at the top-level. *) val real : int -> real = Real.fromInt val trunc : real -> int = Real.trunc val floor : real -> int = Real.floor val ceil : real -> int = Real.ceil val round : real -> int =Real.round; (* Overloads for Real32.real. The overloads for real were added in InitialBasis. *) val () = RunCall.addOverload Real32.>= ">=" and () = RunCall.addOverload Real32.<= "<=" and () = RunCall.addOverload Real32.> ">" and () = RunCall.addOverload Real32.< "<" and () = RunCall.addOverload Real32.+ "+" and () = RunCall.addOverload Real32.- "-" and () = RunCall.addOverload Real32.* "*" and () = RunCall.addOverload Real32.~ "~" and () = RunCall.addOverload Real32.abs "abs" and () = RunCall.addOverload Real32./ "/"; (* Install print functions. *) local fun print_real32 _ _ (r: Real32.real) = PolyML.PrettyString(Real32.fmt (StringCvt.GEN(SOME 7)) r) in val () = PolyML.addPrettyPrinter print_real32 end; local fun print_real _ _ (r: real) = PolyML.PrettyString(Real.fmt (StringCvt.GEN(SOME 10)) r) in val () = PolyML.addPrettyPrinter print_real; end; diff --git a/basis/RealNumbersAsBits.ML b/basis/RealNumbersAsBits.ML index 34a3d7ca..655047af 100644 --- a/basis/RealNumbersAsBits.ML +++ b/basis/RealNumbersAsBits.ML @@ -1,176 +1,173 @@ (* Title: Standard Basis Library: Real number support Author: David Matthews Copyright David Matthews 2023 This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License version 2.1 as published by the Free Software Foundation. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA *) (* Extract the components of Real.real (double) and Real32.real (float). The formats, particularly of Real32.real, differ between 32-bit and 64-bit systems. *) structure RealNumbersAsBits: sig (* Floats. The mantissa will fit in a short int in both 32- and 64-bits . *) val floatSignBit: Real32.real -> bool - and floatExponent: Real32.real -> FixedInt.int - and floatMantissa: Real32.real -> FixedInt.int - and floatFromBinary: {sign: bool, exp: FixedInt.int, mantissa: FixedInt.int} -> Real32.real + and floatExponent: Real32.real -> int + and floatMantissa: Real32.real -> int + and floatFromBinary: {sign: bool, exp: int, mantissa: int} -> Real32.real (* Doubles. The mantissa is too large for a fixed int in 32-bit mode. *) val doubleSignBit: real -> bool - and doubleExponent: real -> FixedInt.int + and doubleExponent: real -> int and doubleMantissa: real -> LargeInt.int - and doubleFromBinary: {sign: bool, exp: FixedInt.int, mantissa: LargeInt.int} -> real + and doubleFromBinary: {sign: bool, exp: int, mantissa: LargeInt.int} -> real end = struct local (* IEEE-754 format. Float: 1 sign bit, 8 exponent bits, 23 mantissa bits. *) (* In native 64-bit a float is implemented as a tagged value. The top 32 bits contain the float, the lower 32-bits are zeros except for the tag. Because word values are tagged this is equivalent to a shift of 31 bits in ML. In native 32-bit and 32-in-64 a float is implemented as a boxed value. *) open LibrarySupport val floatIsTagged = wordSize = 0w8 val floatAsWord: Real32.real->word = RunCall.unsafeCast - val floatAsInt: Real32.real->FixedInt.int = RunCall.unsafeCast + val floatAsInt: Real32.real->int = RunCall.unsafeCast val wordAsFloat: word->Real32.real = RunCall.unsafeCast fun byte(r: Real32.real, b: word): Word8.word = RunCall.loadByteFromImmutable(r, if bigEndian then b else 0w3-b) and setByte(r: Real32.real, b: word, v: Word8.word) = RunCall.storeByte(r, if bigEndian then b else 0w3-b, v) in fun floatSignBit (r: Real32.real): bool = if floatIsTagged then floatAsInt r < 0 else byte(r, 0w0) >= 0w128 - fun floatExponent (r: Real32.real): FixedInt.int = + fun floatExponent (r: Real32.real): int = if floatIsTagged - then FixedInt.fromInt(Word.toInt(Word.andb(Word.>>(floatAsWord r, 0w23+0w31), 0wxff))) - else FixedInt.fromInt( - Word8.toInt(Word8.andb(byte(r, 0w0), 0wx7f)) * 2 + - Word8.toInt(Word8.>>(byte(r, 0w1), 0w7))) + then Word.toInt(Word.andb(Word.>>(floatAsWord r, 0w23+0w31), 0wxff)) + else Word8.toInt(Word8.andb(byte(r, 0w0), 0wx7f)) * 2 + + Word8.toInt(Word8.>>(byte(r, 0w1), 0w7)) - fun floatMantissa (r: Real32.real): FixedInt.int = + fun floatMantissa (r: Real32.real): int = if floatIsTagged - then FixedInt.fromInt(Word.toInt(Word.andb(Word.>>(floatAsWord r, 0w31), 0wx7fffff))) - else - (FixedInt.fromInt(Word8.toInt(Word8.andb(byte(r, 0w1), 0wx7f))) * 256 + - FixedInt.fromInt(Word8.toInt(byte(r, 0w2)))) * 256 + - FixedInt.fromInt(Word8.toInt(byte(r, 0w3))) + then Word.toInt(Word.andb(Word.>>(floatAsWord r, 0w31), 0wx7fffff)) + else (Word8.toInt(Word8.andb(byte(r, 0w1), 0wx7f)) * 256 + + Word8.toInt(byte(r, 0w2))) * 256 + + Word8.toInt(byte(r, 0w3)) - fun floatFromBinary{sign: bool, exp: FixedInt.int, mantissa: FixedInt.int}: Real32.real = + fun floatFromBinary{sign: bool, exp: int, mantissa: int}: Real32.real = if floatIsTagged then let val signBit = if sign then Word.<<(0w1, 0w31+0w31) else 0w0 - val expo = Word.<<(Word.fromInt(FixedInt.toInt exp), 0w23+0w31) + val expo = Word.<<(Word.fromInt exp, 0w23+0w31) (* This assumes that the mantissa value is not too large. *) - val mant = Word.<<(Word.fromInt(FixedInt.toInt mantissa), 0w31) + val mant = Word.<<(Word.fromInt mantissa, 0w31) in wordAsFloat(Word.orb(signBit, Word.orb(expo, mant))) end else let val r: Real32.real = RunCall.allocateByteMemory(0w1, 0wx41) - val b0 = Word8.orb(if sign then 0wx80 else 0w0, Word8.fromInt(FixedInt.toInt(FixedInt.quot(exp, 2)))) + val b0 = Word8.orb(if sign then 0wx80 else 0w0, Word8.fromInt(Int.toInt(Int.quot(exp, 2)))) val () = setByte(r, 0w0, b0) (* The low order 24 bits will always fit in a word. *) - val b = Word.orb(Word.<<(Word.fromInt(FixedInt.toInt exp), 0w23), - Word.fromInt(FixedInt.toInt mantissa)) + val b = Word.orb(Word.<<(Word.fromInt exp, 0w23), Word.fromInt mantissa) fun w8fromW x = Word8.fromLarge(Word.toLarge x) val () = setByte(r, 0w1, w8fromW(Word.>>(b, 0w16))) val () = setByte(r, 0w2, w8fromW(Word.>>(b, 0w8))) val () = setByte(r, 0w3, w8fromW b) val () = RunCall.clearMutableBit r in r end end local (* IEEE-754 format. Double: 1 sign bit, 11 exponent bits, 52 mantissa bits. *) open LibrarySupport (* In native 64-bit and 32-in-64 Real.real and LargeWord.word are both boxed 64-bit quantities. *) val realAsWord64: real -> LargeWord.word = RunCall.unsafeCast and word64AsReal: LargeWord.word -> real = RunCall.unsafeCast val realIsWord64 = sysWordSize = 0w8 fun byte(r: real, b: word): Word8.word = RunCall.loadByteFromImmutable(r, if bigEndian then b else 0w7-b) and setByte(r: real, b: word, v: Word8.word) = RunCall.storeByte(r, if bigEndian then b else 0w7-b, v) (* We use this mask when LargeWord.word is 64-bits. We don't write out the constant directly because it would cause an overflow when compiled in 32-bit mode even though it's not used. *) val doubleMantissaMask = LargeWord.>>(LargeWord.fromInt ~1, 0w12) in fun doubleSignBit (r: real) : bool = byte(r, 0w0) >= 0w128 - fun doubleExponent (r: real): FixedInt.int = - FixedInt.fromInt( + fun doubleExponent (r: real): int = + Int.fromInt( Word8.toInt(Word8.andb(byte(r, 0w0), 0wx7f)) * 16 + Word8.toInt(Word8.>>(byte(r, 0w1), 0w4))) fun doubleMantissa (r: real): LargeInt.int = if realIsWord64 then LargeWord.toLargeInt(LargeWord.andb(realAsWord64 r, doubleMantissaMask)) else (((((Word8.toLargeInt(Word8.andb(byte(r, 0w1), 0wxf)) * 256 + Word8.toLargeInt(byte(r, 0w2))) * 256 + Word8.toLargeInt(byte(r, 0w3))) * 256 + Word8.toLargeInt(byte(r, 0w4))) * 256 + Word8.toLargeInt(byte(r, 0w5))) * 256 + Word8.toLargeInt(byte(r, 0w6))) * 256 + Word8.toLargeInt(byte(r, 0w7)) - fun doubleFromBinary{sign: bool, exp: FixedInt.int, mantissa: LargeInt.int}: real = + fun doubleFromBinary{sign: bool, exp: int, mantissa: LargeInt.int}: real = if realIsWord64 then (* We can construct the value as a LargeWord.word and then cast it as a real. *) let val signBit = if sign then LargeWord.<<(0w1, 0w63) else 0w0 - val expo = LargeWord.<<(LargeWord.fromInt(FixedInt.toInt exp), 0w52) + val expo = LargeWord.<<(LargeWord.fromInt exp, 0w52) (* This assumes that the mantissa value is not too large. *) val mant = LargeWord.fromLargeInt mantissa in word64AsReal(LargeWord.orb(signBit, LargeWord.orb(expo, mant))) end else let val r: real = RunCall.allocateByteMemory(0w8 div wordSize, 0wx41) - val b0 = Word8.orb(if sign then 0wx80 else 0w0, Word8.fromInt(FixedInt.toInt(FixedInt.quot(exp, 16)))) + val b0 = Word8.orb(if sign then 0wx80 else 0w0, Word8.fromInt(Int.toInt(Int.quot(exp, 16)))) val () = setByte(r, 0w0, b0) val b1 = Word8.orb( - Word8.<<(Word8.fromInt(FixedInt.toInt(FixedInt.rem(exp, 16))), 0w4), + Word8.<<(Word8.fromInt(Int.rem(exp, 16)), 0w4), Word8.andb(Word8.fromLargeInt(IntInf.~>>(mantissa, 0w48)), 0wxf)) val () = setByte(r, 0w1, b1) val () = setByte(r, 0w2, Word8.fromLargeInt(IntInf.~>>(mantissa, 0w40))) val () = setByte(r, 0w3, Word8.fromLargeInt(IntInf.~>>(mantissa, 0w32))) val () = setByte(r, 0w4, Word8.fromLargeInt(IntInf.~>>(mantissa, 0w24))) val () = setByte(r, 0w5, Word8.fromLargeInt(IntInf.~>>(mantissa, 0w16))) val () = setByte(r, 0w6, Word8.fromLargeInt(IntInf.~>>(mantissa, 0w8))) val () = setByte(r, 0w7, Word8.fromLargeInt mantissa) val () = RunCall.clearMutableBit r in r end end end; \ No newline at end of file diff --git a/basis/RealToDecimalConversion.ML b/basis/RealToDecimalConversion.ML index 133d176f..54246650 100644 --- a/basis/RealToDecimalConversion.ML +++ b/basis/RealToDecimalConversion.ML @@ -1,382 +1,560 @@ (* Title: Standard Basis Library: Conversion from floating point to decimal Author: David Matthews The underlying conversion code was translated from the C version of Ryu. That code is Copyright 2018 Ulf Adams and is licensed under the terms of the Apache License version 2.0 or Boost Software License, Version 1.0 Boost Software License - Version 1.0 - August 17th, 2003 Boost Licence Permission is hereby granted, free of charge, to any person or organization obtaining a copy of the software and accompanying documentation covered by this license (the "Software") to use, reproduce, display, distribute, execute, and transmit the Software, and to prepare derivative works of the Software, and to permit third-parties to whom the Software is furnished to do so, all subject to the following: The copyright notices in the Software and this entire statement, including the above license grant, this restriction and the following disclaimer, must be included in all copies of the Software, in whole or in part, and all derivative works of the Software, unless such copies or derivative works are solely in the form of machine-executable object code generated by a source language processor. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. The ML translation and related code is copyright David Matthews 2023 and is released under the Boost Licence. The remainder of the Poly/ML system is licensed under LGPL. *) (* This uses arbitrary precision arithmetic even for Real32.real (float) values. It would be possible to do some of the arithmetic using fixed precision integers but that's difficult on 32-bit platforms even for float. *) (* RealNumbersAsBits is defined in Poly/ML using a faster version. This is provided as an example. *) (* structure RealNumbersAsBits: sig (* Floats. The mantissa will fit in a short int in both 32- and 64-bits . *) val floatSignBit: Real32.real -> bool - and floatExponent: Real32.real -> FixedInt.int - and floatMantissa: Real32.real -> FixedInt.int + and floatExponent: Real32.real -> int + and floatMantissa: Real32.real -> int + and floatFromBinary: {sign: bool, exp: int, mantissa: int} -> Real32.real (* Doubles. The mantissa is too large for a fixed int in 32-bit mode. *) val doubleSignBit: real -> bool - and doubleExponent: real -> FixedInt.int + and doubleExponent: real -> int and doubleMantissa: real -> LargeInt.int + and doubleFromBinary: {sign: bool, exp: int, mantissa: LargeInt.int} -> real end = struct local open PackReal32Big Word8Vector - val word8ToFix = FixedInt.fromInt o Word8.toInt in fun floatSignBit r = sub(toBytes r, 0) >= 0w128 fun floatExponent r = let val v = toBytes r in - word8ToFix(Word8.andb(sub(v, 0), 0wx7f)) * 2 + - word8ToFix(Word8.>>(sub(v, 1), 0w7)) + Word8.toInt(Word8.andb(sub(v, 0), 0wx7f)) * 2 + + Word8.toInt(Word8.>>(sub(v, 1), 0w7)) end fun floatMantissa r = let val v = toBytes r in - (word8ToFix(Word8.andb(sub(v, 1), 0wx7f)) * 256 + - word8ToFix(sub(v, 2))) * 256 + - word8ToFix(sub(v, 3)) + (Word8.toInt(Word8.andb(sub(v, 1), 0wx7f)) * 256 + + Word8.toInt(sub(v, 2))) * 256 + + Word8.toInt(sub(v, 3)) + end + + fun floatFromBinary{sign, exp, mantissa} = + let + val _ = if mantissa < 0 orelse mantissa > 0x800000 then raise General.Domain else () + val mw = Word.fromInt mantissa + val wordToWord8 = Word8.fromLarge o Word.toLarge + val b0 = Word8.orb(if sign then 0wx80 else 0w0, Word8.>>(Word8.fromInt exp, 0w1)) + val b1 = Word8.orb(Word8.<<(Word8.andb(Word8.fromInt exp, 0w1), 0w7), + wordToWord8(Word.>>(mw, 0w16))) + val b2 = wordToWord8(Word.>>(mw, 0w8)) + val b3 = wordToWord8 mw + in + subVec(Word8Vector.fromList[b0, b1, b2, b3], 0) end end local open PackRealBig Word8Vector - val word8ToFix = FixedInt.fromInt o Word8.toInt in fun doubleSignBit r = sub(toBytes r, 0) >= 0w128 fun doubleExponent r = let val v = toBytes r in - word8ToFix(Word8.andb(sub(v, 0), 0wx7f)) * 16 + - word8ToFix(Word8.>>(sub(v, 1), 0w4)) + Word8.toInt(Word8.andb(sub(v, 0), 0wx7f)) * 16 + + Word8.toInt(Word8.>>(sub(v, 1), 0w4)) end fun doubleMantissa r = let fun fromBytes (i, byte, acc) = if i = 0 then 0 else if i = 1 then Word8.toLargeInt(Word8.andb(byte, 0wx0f)) else acc*256 + Word8.toLargeInt byte in foldli fromBytes 0 (toBytes r) end + + fun doubleFromBinary{sign, exp, mantissa} = + let + val _ = if mantissa < 0 orelse mantissa > 0x10000000000000 then raise General.Domain else () + fun toDigits(n, l, mant) = + if n = 1 + then Word8.orb(if sign then 0wx80 else 0w0, Word8.fromInt(Int.toInt(Int.quot(exp, 16)))) :: + Word8.orb(Word8.<<(Word8.fromInt(Int.toInt exp), 0w4), + Word8.andb(Word8.fromLargeInt mant, 0wxf)) :: l + else + let + val (q, r) = IntInf.quotRem(mant, 256) + in + toDigits(n-1, Word8.fromLargeInt r :: l, q) + end + + in + subVec(Word8Vector.fromList(toDigits(7, [], mantissa)), 0) + end end end; *) structure RealToDecimalConversion: sig val floatToMinimal: Real32.real -> {sign:bool, exponent: int, mantissa: LargeInt.int, class: IEEEReal.float_class} val doubleToMinimal: real -> {sign:bool, exponent: int, mantissa: LargeInt.int, class: IEEEReal.float_class} + val decimalToFloat: bool * int * int list -> Real32.real + and decimalToDouble: bool * int * int list -> real end = struct (* Common functions *) (* Returns floor(log10(2^e)) for values of e between 0 and 1650. *) fun log10Pow2 e = if e < 0 orelse e > 1650 then raise General.Domain else Int.quot(e * 78913, 0x40000) (* >> 18 *) (* Returns floor(log10(5^e)) for values of e between 0 and 2620 *) and log10Pow5 e = if e < 0 orelse e > 2620 then raise General.Domain else Int.quot(e * 732923, 0x100000) (* >> 20 *) - fun pow5bits e = + fun log2Pow5 e = if e < 0 orelse e > 3528 then raise General.Domain - else Int.quot(e * 1217359, 0x80000) (* >> 19 *) + 1 + else Int.quot(e * 1217359, 0x80000) (* >> 19 *) + + fun pow5bits e = log2Pow5 e + 1 local (* Keep dividing by 5 while the remainder is zero *) fun p5 count value = if LargeInt.rem(value, 5) <> 0 then count else p5 (count+1) (LargeInt.quot(value, 5)) in (* Returns whether value is divisible by 5 to the power p. *) fun multipleOfPow5(value, e5) = p5 0 value >= e5 end fun multipleOfPowerOf2(value, p) = IntInf.andb(value, IntInf.<<(1, Word.fromInt p) - 1) = 0 local val posTableSize = 326 and invTableSize = 342 val pow5BitCount = 125 and pow5InvBitCount = 125 fun createInvSplit i = let val pow = IntInf.pow(5, i) val pow5len = IntInf.log2 pow + 1 (* Bit length *) val j = pow5len - 1 + pow5InvBitCount val pow5inv = IntInf.<<(1, Word.fromInt j) div pow + 1 in pow5inv end and createSplit i = let val pow = IntInf.pow(5, i) val pow5len = IntInf.log2 pow + 1 (* Bit length *) val shift = pow5len-pow5BitCount val pow5 = if shift < 0 then IntInf.<<(pow, Word.fromInt(~shift)) else IntInf.~>>(pow, Word.fromInt shift) in pow5 end val doublePow5InvSplit = Vector.tabulate(invTableSize, createInvSplit) and doublePow5Split = Vector.tabulate(posTableSize, createSplit) (* The original C code uses 128-bit arithmetic here. We don't have that so this uses arbitrary precision arithmetic. The result will usually (always?) be short on 64-bit platforms. *) fun mulShift(m, factor, shift: int) = IntInf.~>>(factor*m, Word.fromInt shift) in fun mulPow5InvDivPow2(m, i, j) = mulShift(m, Vector.sub(doublePow5InvSplit, i), j) and mulPow5DivPow2(m, i, j) = mulShift(m, Vector.sub(doublePow5Split, i), j) val doublePow5InvBitCount = pow5InvBitCount and doublePow5BitCount = pow5BitCount end (* Apart from the first step the remainder is common. *) fun computeDecimal(e2: int, m2: LargeInt.int, mmShift, acceptBounds) = let (* Step 2: Determine the interval of valid decimal representations *) val mm = 4 * m2 - 1 - mmShift val mv = 4 * m2 val mp = 4 * m2 + 2 (* Step 3: Convert to a decimal power base *) val (e10, vr, vp, vm, lastRemovedDigit, vrIsTrailingZeros, vmIsTrailingZeros) = if e2 >= 0 then let val q = log10Pow2 e2 val e10 = q val k = doublePow5InvBitCount + pow5bits q - 1 val i = ~e2 + q + k val vr = mulPow5InvDivPow2(mv, q, i) and vp = mulPow5InvDivPow2(mp, q, i) and vm = mulPow5InvDivPow2(mm, q, i) in if q > 21 then (e10, vr, vp, vm, 0, false, false) (* Too large to be power of 5. *) else if LargeInt.rem(mv, 5) = 0 then (e10, vr, vp, vm, 0, multipleOfPow5(mv, q), false) else if acceptBounds then (e10, vr, vp, vm, 0, false, multipleOfPow5(mm, q)) else (e10, vr, vp - (if multipleOfPow5(mp, q) then 1 else 0), vm, 0, false, false) end else let val q = log10Pow5(~ e2) val e10 = q + e2 val i = ~e2 - q val k = pow5bits i - doublePow5BitCount val j = q - k val vr = mulPow5DivPow2(mv, i, j) and vp = mulPow5DivPow2(mp, i, j) and vm = mulPow5DivPow2(mm, i, j) val lastRemovedDigit = if q <> 0 andalso LargeInt.quot(vp-1, 10) <= LargeInt.quot(vm, 10) then let val j' = q-1-(pow5bits(i+1)-doublePow5BitCount) val lrm = LargeInt.rem(mulPow5DivPow2(mv, i+1, j'), 10) in lrm end else 0 in if q <= 1 then if acceptBounds then (e10, vr, vp, vm, lastRemovedDigit, true, mmShift = 1) else (e10, vr, vp-1, vm, lastRemovedDigit, true, false) else if q < 31 then (e10, vr, vp, vm, lastRemovedDigit, multipleOfPowerOf2(mv, q-1), false) else (e10, vr, vp, vm, lastRemovedDigit, false, false) end (* Step 4: Find the shortest decimal representation in the interval *) val (output, removed) = if vmIsTrailingZeros orelse vrIsTrailingZeros then let fun removeVrDigits(vr, vp, vm, removed, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) = let val vpDiv10 = LargeInt.quot(vp, 10) and vmDiv10 = LargeInt.quot(vm, 10) in if vpDiv10 > vmDiv10 then removeVrDigits(LargeInt.quot(vr, 10), vpDiv10, vmDiv10, removed+1, LargeInt.rem(vr, 10), vmIsTrailingZeros andalso LargeInt.rem(vm, 10) = 0, vrIsTrailingZeros andalso lastRemovedDigit = 0) else removeVmDigits(vr, vp, vm, removed, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) end and removeVmDigits(vr, vp, vm, removed, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) = let in if vmIsTrailingZeros andalso LargeInt.rem(vm, 10) = 0 then removeVmDigits(LargeInt.quot(vr, 10), LargeInt.quot(vp, 10), LargeInt.quot(vm, 10), removed+1, LargeInt.rem(vr, 10), vmIsTrailingZeros, vrIsTrailingZeros andalso lastRemovedDigit = 0) else let val lastRemovedDigit2 = if vrIsTrailingZeros andalso lastRemovedDigit = 5 andalso LargeInt.rem(vr, 2) = 0 then 4 (* Don't round up *) else lastRemovedDigit val vrCorrect = (vr = vm andalso (not acceptBounds orelse not vmIsTrailingZeros)) orelse lastRemovedDigit2 >= 5 in (vr + (if vrCorrect then 1 else 0), removed) end end in removeVrDigits(vr, vp, vm, 0, lastRemovedDigit, vmIsTrailingZeros, vrIsTrailingZeros) end else let fun removeDigits(vr, vp, vm, removed, lastRemovedDigit) = let val vpDiv10 = LargeInt.quot(vp, 10) and vmDiv10 = LargeInt.quot(vm, 10) in if vpDiv10 > vmDiv10 then removeDigits(LargeInt.quot(vr, 10), vpDiv10, vmDiv10, removed+1, LargeInt.rem(vr, 10)) else (vr + (if vr = vm orelse lastRemovedDigit >= 5 then 1 else 0), removed) end in removeDigits(vr, vp, vm, 0, lastRemovedDigit) end in {mantissa=output, exponent=e10+removed} end val doubleBias = 1023 (* This is the exponent value for 1.0 *) val doubleMantissaBits = 53 - 1 (* One bit is implicit *) - val doubleImplicitBit = IntInf.<<(1, Word.fromInt(FixedInt.toInt doubleMantissaBits)) + val doubleImplicitBit = IntInf.<<(1, Word.fromInt(Int.toInt doubleMantissaBits)) val floatBias = 127 (* This is the exponent value for 1.0 *) val floatMantissaBits = 24 - 1 (* One bit is implicit *) - val floatImplicitBit = FixedInt.fromLarge(IntInf.<<(1, Word.fromInt(FixedInt.toInt floatMantissaBits))) + val floatImplicitBit = Int.fromLarge(IntInf.<<(1, Word.fromInt(Int.toInt floatMantissaBits))) fun doubleToMinimal(r: real) = let open RealNumbersAsBits IEEEReal val ieeeSign = doubleSignBit r and ieeeExponent = doubleExponent r and ieeeMantissa = doubleMantissa r in if ieeeExponent = 2047 then (* Non-finite *) {sign=ieeeSign, exponent=0, mantissa=0, class=if ieeeMantissa = 0 then INF else NAN} else if ieeeExponent = 0 andalso ieeeMantissa = 0 then {sign=ieeeSign, exponent=0, mantissa=0, class=ZERO} else let (* Step 1: Normalise the value. Normalised values, with exponent non-zero, have an implicit one in the top bit position. *) val (e2, m2) = if ieeeExponent = 0 then (1-doubleBias-doubleMantissaBits-2, ieeeMantissa) else (ieeeExponent-doubleBias-doubleMantissaBits-2, ieeeMantissa + doubleImplicitBit) val acceptBounds = LargeInt.rem(m2, 2) = 0 val mmShift = if ieeeMantissa <> 0 orelse ieeeExponent <= 1 then 1 else 0 - val {mantissa, exponent} = computeDecimal(FixedInt.toInt e2, m2, mmShift, acceptBounds) + val {mantissa, exponent} = computeDecimal(Int.toInt e2, m2, mmShift, acceptBounds) in {sign=ieeeSign, exponent=exponent, mantissa=mantissa, class=if ieeeExponent = 0 then SUBNORMAL else NORMAL} end end and floatToMinimal(f: Real32.real) = let open RealNumbersAsBits IEEEReal val ieeeSign = floatSignBit f and ieeeExponent = floatExponent f and ieeeMantissa = floatMantissa f in if ieeeExponent = 255 then (* Non-finite *) {sign=ieeeSign, exponent=0, mantissa=0, class=if ieeeMantissa = 0 then INF else NAN} else if ieeeExponent = 0 andalso ieeeMantissa = 0 then {sign=ieeeSign, exponent=0, mantissa=0, class=ZERO} else let (* Step 1: Normalise the value. Normalised values, with exponent non-zero, have an implicit one in the top bit position. *) val (e2, m2) = if ieeeExponent = 0 then (1-floatBias-floatMantissaBits-2, ieeeMantissa) else (ieeeExponent-floatBias-floatMantissaBits-2, ieeeMantissa + floatImplicitBit) - val isEven = FixedInt.rem(m2, 2) = 0 + val isEven = Int.rem(m2, 2) = 0 val acceptBounds = isEven (* Step 2: Determine the interval of valid decimal representations (??) *) val mmShift = if ieeeMantissa <> 0 orelse ieeeExponent <= 1 then 1 else 0 - val {mantissa, exponent} = computeDecimal(FixedInt.toInt e2, FixedInt.toLarge m2, mmShift, acceptBounds) + val {mantissa, exponent} = computeDecimal(Int.toInt e2, Int.toLarge m2, mmShift, acceptBounds) in {sign=ieeeSign, exponent=exponent, mantissa=mantissa, class=if ieeeExponent = 0 then SUBNORMAL else NORMAL} end end - + + val floatPosInf = RealNumbersAsBits.floatFromBinary{sign=false, exp=255, mantissa = 0} + and floatNegInf = RealNumbersAsBits.floatFromBinary{sign=true, exp=255, mantissa = 0} + and floatPosZero = RealNumbersAsBits.floatFromBinary{sign=false, exp=0, mantissa = 0} (* We haven't defined constants yet. *) + and floatNegZero = RealNumbersAsBits.floatFromBinary{sign=true, exp=0, mantissa = 0} + + fun decimalToFloat (sign, e10, digits) = + let + (* At most 8 digits are significant. When we encounter the 9th we use it for rounding. *) + fun digitsToMantissa([], acc, nDigs) = (acc, nDigs) + | digitsToMantissa(d :: _, acc, 9) = + (if d >= 5 then acc+1 else acc, 9) + | digitsToMantissa(d :: l, acc, nDigs) = + if d < 0 orelse d > 9 then raise General.Domain + else digitsToMantissa(l, acc*10 + Int.fromInt d, nDigs+1) + val (m10, m10digits) = digitsToMantissa(digits, 0, 0) + in + if e10 <= ~46 orelse m10 = 0 (* Number is less than 1e~46. Return +/- 0.0 *) + then if sign then floatNegZero else floatPosZero + else if e10 >= 40 + then if sign then floatNegInf else floatPosInf + else + let + val e10 = e10-m10digits + val m10L = Int.toLarge m10 + val (e2, m2, trailingZeros) = + if e10 >= 0 + then + let + val l2p5 = log2Pow5 e10 + val e2 = IntInf.log2 m10L + e10 + l2p5 - (floatMantissaBits + 1) + val j = e2 - e10 - l2p5 - 1 + doublePow5BitCount + val m2 = mulPow5DivPow2(m10L, e10, j) + val trailingZeros = e2 < e10 orelse e2 - e10 < 32 andalso multipleOfPowerOf2(m10L, e2 - e10) + in + (e2, m2, trailingZeros) + end + else + let + val cl2p5 = log2Pow5(~e10)+1 + val e2 = IntInf.log2 m10L + e10 - cl2p5 - (floatMantissaBits + 1) + val j = e2 - e10 + cl2p5 - 1 + doublePow5InvBitCount + val m2 = mulPow5InvDivPow2(m10L, ~e10, j) + val trailingZeros = + (e2 < e10 orelse e2 - e10 < 32 andalso multipleOfPowerOf2(m10L, e2 - e10)) andalso + multipleOfPow5(m10L, ~e10) + in + (e2, m2, trailingZeros) + end + val ieeeExp = Int.max(0, e2+floatBias+IntInf.log2 m2) + (* The shift should put the top bit into the implicit bit position + so it is then masked out except when the exponent is zero + when we have a denormal number. *) + val shift = + (if ieeeExp = 0 then 1 else ieeeExp) - e2 - floatBias - floatMantissaBits + val trailingZeros = trailingZeros andalso + Word.andb(Word.fromLargeInt m2, Word.<<(0w1, Word.fromInt shift - 0w1) - 0w1) = 0w0 + val lastRemovedBit = Word.andb(Word.>>(Word.fromLargeInt m2, Word.fromInt shift - 0w1), 0w1) + val roundup = + if lastRemovedBit <> 0w0 andalso (not trailingZeros orelse + Word.andb(Word.>>(Word.fromLargeInt m2, Word.fromInt shift), 0w1) <> 0w0) + then 0w1 else 0w0 + val ieeeM2 = Word.toInt(Word.andb(Word.fromInt(floatImplicitBit-1), Word.>>(Word.fromLargeInt m2, Word.fromInt shift) + roundup)) + in + if ieeeExp > 254 (* Exponent is too large - return infinity *) + then if sign then floatNegInf else floatPosInf + else RealNumbersAsBits.floatFromBinary{sign=sign, exp=ieeeExp, mantissa=ieeeM2} + end + end + + val doublePosInf = RealNumbersAsBits.doubleFromBinary{sign=false, exp=2047, mantissa = 0} + and doubleNegInf = RealNumbersAsBits.doubleFromBinary{sign=true, exp=2047, mantissa = 0} + and doublePosZero = RealNumbersAsBits.doubleFromBinary{sign=false, exp=0, mantissa = 0} + and doubleNegZero = RealNumbersAsBits.doubleFromBinary{sign=true, exp=0, mantissa = 0} + + fun decimalToDouble (sign, e10, digits) = + let + (* At most 16 digits are significant. We need to limit the mantissa to this + number of digits to avoid a potential subscript exception when accessing the + pre-built tables. When we encounter the 17th we use it for rounding. *) + fun digitsToMantissa([], acc: LargeInt.int, nDigs) = (acc, nDigs) + | digitsToMantissa(d :: _, acc, 17) = + (if d >= 5 then acc+1 else acc, 17) + | digitsToMantissa(d :: l, acc, nDigs) = + if d < 0 orelse d > 9 then raise General.Domain + else digitsToMantissa(l, acc*10 + LargeInt.fromInt d, nDigs+1) + val (m10, m10digits) = digitsToMantissa(digits, 0, 0) + in + if e10 <= ~324 orelse m10 = 0 (* Number is less than 1e~324. Return +/- 0.0 *) + then if sign then doubleNegZero else doublePosZero + else if e10 >= 310 + then if sign then doubleNegInf else doublePosInf + else + let + val e10 = e10-m10digits + val m10L = m10 + val (e2, m2, trailingZeros) = + if e10 >= 0 + then + let + val l2p5 = log2Pow5 e10 + val e2 = IntInf.log2 m10L + e10 + l2p5 - (doubleMantissaBits + 1) + val j = e2 - e10 - l2p5 - 1 + doublePow5BitCount + val m2 = mulPow5DivPow2(m10L, e10, j) + val trailingZeros = e2 < e10 orelse e2 - e10 < 32 andalso multipleOfPowerOf2(m10L, e2 - e10) + in + (e2, m2, trailingZeros) + end + else + let + val cl2p5 = log2Pow5(~e10)+1 + val e2 = IntInf.log2 m10L + e10 - cl2p5 - (doubleMantissaBits + 1) + val j = e2 - e10 + cl2p5 - 1 + doublePow5InvBitCount + val m2 = mulPow5InvDivPow2(m10L, ~e10, j) + val trailingZeros = + (e2 < e10 orelse e2 - e10 < 32 andalso multipleOfPowerOf2(m10L, e2 - e10)) andalso + multipleOfPow5(m10L, ~e10) + in + (e2, m2, trailingZeros) + end + val ieeeExp = Int.max(0, e2 + doubleBias + IntInf.log2 m2) + (* The shift should put the top bit into the implicit bit position + so it is then masked out except when the exponent is zero + when we have a denormal number. *) + val shift = + (if ieeeExp = 0 then 1 else ieeeExp) - e2 - doubleBias - doubleMantissaBits + val trailingZeros = trailingZeros andalso + IntInf.andb(m2, IntInf.<<(1, Word.fromInt shift - 0w1) - 1) = 0 + val lastRemovedBit = IntInf.andb(IntInf.~>>(m2, Word.fromInt shift - 0w1), 1) + val roundup = + if lastRemovedBit <> 0 andalso (not trailingZeros orelse + IntInf.andb(IntInf.~>>(m2, Word.fromInt shift), 1) <> 0) + then 1 else 0 + val ieeeM2 = IntInf.andb(doubleImplicitBit-1, IntInf.~>>(m2, Word.fromInt shift) + roundup) + in + if ieeeExp > 2046 (* Exponent is too large - return infinity *) + then if sign then doubleNegInf else doublePosInf + else RealNumbersAsBits.doubleFromBinary{sign=sign, exp=Int.fromInt ieeeExp, mantissa=ieeeM2} + end + end + + end;