diff --git a/src/Pure/General/set.ML b/src/Pure/General/set.ML --- a/src/Pure/General/set.ML +++ b/src/Pure/General/set.ML @@ -1,421 +1,417 @@ (* Title: Pure/General/set.ML Author: Makarius Efficient representation of sets (see also Pure/General/table.ML). *) signature SET = sig structure Key: KEY type elem type T val size: T -> int val empty: T val build: (T -> T) -> T val is_empty: T -> bool - val is_single: T -> bool val fold: (elem -> 'a -> 'a) -> T -> 'a -> 'a val fold_rev: (elem -> 'a -> 'a) -> T -> 'a -> 'a val dest: T -> elem list val exists: (elem -> bool) -> T -> bool val forall: (elem -> bool) -> T -> bool val get_first: (elem -> 'a option) -> T -> 'a option val member: T -> elem -> bool val subset: T * T -> bool val ord: T ord val insert: elem -> T -> T val make: elem list -> T val merge: T * T -> T val remove: elem -> T -> T val subtract: T -> T -> T end; functor Set(Key: KEY): SET = struct (* keys *) structure Key = Key; type elem = Key.key; val eq_key = is_equal o Key.ord; (* datatype *) datatype T = Empty | Leaf1 of elem | Leaf2 of elem * elem | Leaf3 of elem * elem * elem | Branch2 of T * elem * T | Branch3 of T * elem * T * elem * T; (*literal copy from table.ML*) fun make2 (Empty, e, Empty) = Leaf1 e | make2 (Branch2 (Empty, e1, Empty), e2, right) = make2 (Leaf1 e1, e2, right) | make2 (left, e1, Branch2 (Empty, e2, Empty)) = make2 (left, e1, Leaf1 e2) | make2 (Branch3 (Empty, e1, Empty, e2, Empty), e3, right) = make2 (Leaf2 (e1, e2), e3, right) | make2 (left, e1, Branch3 (Empty, e2, Empty, e3, Empty)) = make2 (left, e1, Leaf2 (e2, e3)) | make2 (Leaf1 e1, e2, Empty) = Leaf2 (e1, e2) | make2 (Empty, e1, Leaf1 e2) = Leaf2 (e1, e2) | make2 (Leaf1 e1, e2, Leaf1 e3) = Leaf3 (e1, e2, e3) | make2 (Leaf2 (e1, e2), e3, Empty) = Leaf3 (e1, e2, e3) | make2 (Empty, e1, Leaf2 (e2, e3)) = Leaf3 (e1, e2, e3) | make2 arg = Branch2 arg; (*literal copy from table.ML*) fun make3 (Empty, e1, Empty, e2, Empty) = Leaf2 (e1, e2) | make3 (Branch2 (Empty, e1, Empty), e2, mid, e3, right) = make3 (Leaf1 e1, e2, mid, e3, right) | make3 (left, e1, Branch2 (Empty, e2, Empty), e3, right) = make3 (left, e1, Leaf1 e2, e3, right) | make3 (left, e1, mid, e2, Branch2 (Empty, e3, Empty)) = make3 (left, e1, mid, e2, Leaf1 e3) | make3 (Leaf1 e1, e2, Empty, e3, Empty) = Leaf3 (e1, e2, e3) | make3 (Empty, e1, Leaf1 e2, e3, Empty) = Leaf3 (e1, e2, e3) | make3 (Empty, e1, Empty, e2, Leaf1 e3) = Leaf3 (e1, e2, e3) | make3 arg = Branch3 arg; (*literal copy from table.ML*) fun unmake (Leaf1 e) = Branch2 (Empty, e, Empty) | unmake (Leaf2 (e1, e2)) = Branch3 (Empty, e1, Empty, e2, Empty) | unmake (Leaf3 (e1, e2, e3)) = Branch2 (Branch2 (Empty, e1, Empty), e2, Branch2 (Empty, e3, Empty)) | unmake arg = arg; (* size *) local fun add_size Empty n = n | add_size (Leaf1 _) n = n + 1 | add_size (Leaf2 _) n = n + 2 | add_size (Leaf3 _) n = n + 3 | add_size (Branch2 (left, _, right)) n = n + 1 |> add_size left |> add_size right | add_size (Branch3 (left, _, mid, _, right)) n = n + 2 |> add_size left |> add_size mid |> add_size right; in fun size set = add_size set 0; end; (* empty and single *) val empty = Empty; fun build (f: T -> T) = f empty; fun is_empty Empty = true | is_empty _ = false; -fun is_single (Leaf1 _) = true - | is_single _ = false; - (* fold combinators *) fun fold_set f = let fun fold Empty x = x | fold (Leaf1 e) x = f e x | fold (Leaf2 (e1, e2)) x = f e2 (f e1 x) | fold (Leaf3 (e1, e2, e3)) x = f e3 (f e2 (f e1 x)) | fold (Branch2 (left, e, right)) x = fold right (f e (fold left x)) | fold (Branch3 (left, e1, mid, e2, right)) x = fold right (f e2 (fold mid (f e1 (fold left x)))); in fold end; fun fold_rev_set f = let fun fold_rev Empty x = x | fold_rev (Leaf1 e) x = f e x | fold_rev (Leaf2 (e1, e2)) x = f e1 (f e2 x) | fold_rev (Leaf3 (e1, e2, e3)) x = f e1 (f e2 (f e3 x)) | fold_rev (Branch2 (left, e, right)) x = fold_rev left (f e (fold_rev right x)) | fold_rev (Branch3 (left, e1, mid, e2, right)) x = fold_rev left (f e1 (fold_rev mid (f e2 (fold_rev right x)))); in fold_rev end; fun dest tab = fold_rev_set cons tab []; (* exists and forall *) fun exists pred = let fun ex Empty = false | ex (Leaf1 e) = pred e | ex (Leaf2 (e1, e2)) = pred e1 orelse pred e2 | ex (Leaf3 (e1, e2, e3)) = pred e1 orelse pred e2 orelse pred e3 | ex (Branch2 (left, e, right)) = ex left orelse pred e orelse ex right | ex (Branch3 (left, e1, mid, e2, right)) = ex left orelse pred e1 orelse ex mid orelse pred e2 orelse ex right; in ex end; fun forall pred = not o exists (not o pred); (* get_first *) fun get_first f = let fun get Empty = NONE | get (Leaf1 e) = f e | get (Leaf2 (e1, e2)) = (case f e1 of NONE => f e2 | some => some) | get (Leaf3 (e1, e2, e3)) = (case f e1 of NONE => (case f e2 of NONE => f e3 | some => some) | some => some) | get (Branch2 (left, e, right)) = (case get left of NONE => (case f e of NONE => get right | some => some) | some => some) | get (Branch3 (left, e1, mid, e2, right)) = (case get left of NONE => (case f e1 of NONE => (case get mid of NONE => (case f e2 of NONE => get right | some => some) | some => some) | some => some) | some => some); in get end; (* member *) fun member set elem = let fun mem Empty = false | mem (Leaf1 e) = eq_key (elem, e) | mem (Leaf2 (e1, e2)) = (case Key.ord (elem, e1) of LESS => false | EQUAL => true | GREATER => eq_key (elem, e2)) | mem (Leaf3 (e1, e2, e3)) = (case Key.ord (elem, e2) of LESS => eq_key (elem, e1) | EQUAL => true | GREATER => eq_key (elem, e3)) | mem (Branch2 (left, e, right)) = (case Key.ord (elem, e) of LESS => mem left | EQUAL => true | GREATER => mem right) | mem (Branch3 (left, e1, mid, e2, right)) = (case Key.ord (elem, e1) of LESS => mem left | EQUAL => true | GREATER => (case Key.ord (elem, e2) of LESS => mem mid | EQUAL => true | GREATER => mem right)); in mem set end; (* subset and order *) fun subset (set1, set2) = forall (member set2) set1; val ord = pointer_eq_ord (fn sets => (case int_ord (apply2 size sets) of EQUAL => if subset sets then EQUAL else dict_ord Key.ord (apply2 dest sets) | ord => ord)); (* insert *) datatype growth = Stay of T | Sprout of T * elem * T; fun insert elem set = if member set elem then set else let fun ins Empty = Sprout (Empty, elem, Empty) | ins (t as Leaf1 _) = ins (unmake t) | ins (t as Leaf2 _) = ins (unmake t) | ins (t as Leaf3 _) = ins (unmake t) | ins (Branch2 (left, e, right)) = (case Key.ord (elem, e) of LESS => (case ins left of Stay left' => Stay (make2 (left', e, right)) | Sprout (left1, e', left2) => Stay (make3 (left1, e', left2, e, right))) | EQUAL => Stay (make2 (left, e, right)) | GREATER => (case ins right of Stay right' => Stay (make2 (left, e, right')) | Sprout (right1, e', right2) => Stay (make3 (left, e, right1, e', right2)))) | ins (Branch3 (left, e1, mid, e2, right)) = (case Key.ord (elem, e1) of LESS => (case ins left of Stay left' => Stay (make3 (left', e1, mid, e2, right)) | Sprout (left1, e', left2) => Sprout (make2 (left1, e', left2), e1, make2 (mid, e2, right))) | EQUAL => Stay (make3 (left, e1, mid, e2, right)) | GREATER => (case Key.ord (elem, e2) of LESS => (case ins mid of Stay mid' => Stay (make3 (left, e1, mid', e2, right)) | Sprout (mid1, e', mid2) => Sprout (make2 (left, e1, mid1), e', make2 (mid2, e2, right))) | EQUAL => Stay (make3 (left, e1, mid, e2, right)) | GREATER => (case ins right of Stay right' => Stay (make3 (left, e1, mid, e2, right')) | Sprout (right1, e', right2) => Sprout (make2 (left, e1, mid), e2, make2 (right1, e', right2))))); in (case ins set of Stay set' => set' | Sprout br => make2 br) end; fun make elems = build (fold insert elems); fun merge (set1, set2) = if pointer_eq (set1, set2) then set1 else if is_empty set1 then set2 else if is_empty set2 then set1 else if size set1 >= size set2 then fold_set insert set2 set1 else fold_set insert set1 set2; (* remove *) local fun compare NONE _ = LESS | compare (SOME e1) e2 = Key.ord (e1, e2); fun if_equal ord x y = if is_equal ord then x else y; exception UNDEF of elem; (*literal copy from table.ML*) fun del (SOME k) Empty = raise UNDEF k | del NONE Empty = raise Match | del NONE (Leaf1 p) = (p, (true, Empty)) | del NONE (Leaf2 (p, q)) = (p, (false, Leaf1 q)) | del k (Leaf1 p) = (case compare k p of EQUAL => (p, (true, Empty)) | _ => raise UNDEF (the k)) | del k (Leaf2 (p, q)) = (case compare k p of EQUAL => (p, (false, Leaf1 q)) | _ => (case compare k q of EQUAL => (q, (false, Leaf1 p)) | _ => raise UNDEF (the k))) | del k (Leaf3 (p, q, r)) = del k (Branch2 (Leaf1 p, q, Leaf1 r)) | del k (Branch2 (l, p, r)) = (case compare k p of LESS => (case del k l of (p', (false, l')) => (p', (false, make2 (l', p, r))) | (p', (true, l')) => (p', case unmake r of Branch2 (rl, rp, rr) => (true, make3 (l', p, rl, rp, rr)) | Branch3 (rl, rp, rm, rq, rr) => (false, make2 (make2 (l', p, rl), rp, make2 (rm, rq, rr))))) | ord => (case del (if_equal ord NONE k) r of (p', (false, r')) => (p', (false, make2 (l, if_equal ord p' p, r'))) | (p', (true, r')) => (p', case unmake l of Branch2 (ll, lp, lr) => (true, make3 (ll, lp, lr, if_equal ord p' p, r')) | Branch3 (ll, lp, lm, lq, lr) => (false, make2 (make2 (ll, lp, lm), lq, make2 (lr, if_equal ord p' p, r')))))) | del k (Branch3 (l, p, m, q, r)) = (case compare k q of LESS => (case compare k p of LESS => (case del k l of (p', (false, l')) => (p', (false, make3 (l', p, m, q, r))) | (p', (true, l')) => (p', (false, case (unmake m, unmake r) of (Branch2 (ml, mp, mr), Branch2 _) => make2 (make3 (l', p, ml, mp, mr), q, r) | (Branch3 (ml, mp, mm, mq, mr), _) => make3 (make2 (l', p, ml), mp, make2 (mm, mq, mr), q, r) | (Branch2 (ml, mp, mr), Branch3 (rl, rp, rm, rq, rr)) => make3 (make2 (l', p, ml), mp, make2 (mr, q, rl), rp, make2 (rm, rq, rr))))) | ord => (case del (if_equal ord NONE k) m of (p', (false, m')) => (p', (false, make3 (l, if_equal ord p' p, m', q, r))) | (p', (true, m')) => (p', (false, case (unmake l, unmake r) of (Branch2 (ll, lp, lr), Branch2 _) => make2 (make3 (ll, lp, lr, if_equal ord p' p, m'), q, r) | (Branch3 (ll, lp, lm, lq, lr), _) => make3 (make2 (ll, lp, lm), lq, make2 (lr, if_equal ord p' p, m'), q, r) | (_, Branch3 (rl, rp, rm, rq, rr)) => make3 (l, if_equal ord p' p, make2 (m', q, rl), rp, make2 (rm, rq, rr)))))) | ord => (case del (if_equal ord NONE k) r of (q', (false, r')) => (q', (false, make3 (l, p, m, if_equal ord q' q, r'))) | (q', (true, r')) => (q', (false, case (unmake l, unmake m) of (Branch2 _, Branch2 (ml, mp, mr)) => make2 (l, p, make3 (ml, mp, mr, if_equal ord q' q, r')) | (_, Branch3 (ml, mp, mm, mq, mr)) => make3 (l, p, make2 (ml, mp, mm), mq, make2 (mr, if_equal ord q' q, r')) | (Branch3 (ll, lp, lm, lq, lr), Branch2 (ml, mp, mr)) => make3 (make2 (ll, lp, lm), lq, make2 (lr, p, ml), mp, make2 (mr, if_equal ord q' q, r')))))); in fun remove elem set = if member set elem then snd (snd (del (SOME elem) set)) else set; val subtract = fold_set remove; end; (* ML pretty-printing *) val _ = ML_system_pp (fn depth => fn _ => fn set => ML_Pretty.to_polyml (ML_Pretty.enum "," "{" "}" (ML_Pretty.from_polyml o ML_system_pretty) (dest set, depth))); (*final declarations of this structure!*) val fold = fold_set; val fold_rev = fold_rev_set; end; structure Intset = Set(Inttab.Key); structure Symset = Set(Symtab.Key); diff --git a/src/Pure/General/table.ML b/src/Pure/General/table.ML --- a/src/Pure/General/table.ML +++ b/src/Pure/General/table.ML @@ -1,593 +1,589 @@ (* Title: Pure/General/table.ML Author: Markus Wenzel and Stefan Berghofer, TU Muenchen Generic tables. Efficient purely functional implementation using balanced 2-3 trees. *) signature KEY = sig type key val ord: key ord end; signature TABLE = sig structure Key: KEY type key type 'a table exception DUP of key exception SAME exception UNDEF of key val empty: 'a table val build: ('a table -> 'a table) -> 'a table val is_empty: 'a table -> bool - val is_single: 'a table -> bool val map: (key -> 'a -> 'b) -> 'a table -> 'b table val fold: (key * 'b -> 'a -> 'a) -> 'b table -> 'a -> 'a val fold_rev: (key * 'b -> 'a -> 'a) -> 'b table -> 'a -> 'a val dest: 'a table -> (key * 'a) list val keys: 'a table -> key list val min: 'a table -> (key * 'a) option val max: 'a table -> (key * 'a) option val exists: (key * 'a -> bool) -> 'a table -> bool val forall: (key * 'a -> bool) -> 'a table -> bool val get_first: (key * 'a -> 'b option) -> 'a table -> 'b option val lookup_key: 'a table -> key -> (key * 'a) option val lookup: 'a table -> key -> 'a option val defined: 'a table -> key -> bool val update: key * 'a -> 'a table -> 'a table val update_new: key * 'a -> 'a table -> 'a table (*exception DUP*) val default: key * 'a -> 'a table -> 'a table val map_entry: key -> ('a -> 'a) (*exception SAME*) -> 'a table -> 'a table val map_default: key * 'a -> ('a -> 'a) -> 'a table -> 'a table val make: (key * 'a) list -> 'a table (*exception DUP*) val join: (key -> 'a * 'a -> 'a) (*exception SAME*) -> 'a table * 'a table -> 'a table (*exception DUP*) val merge: ('a * 'a -> bool) -> 'a table * 'a table -> 'a table (*exception DUP*) val delete: key -> 'a table -> 'a table (*exception UNDEF*) val delete_safe: key -> 'a table -> 'a table val member: ('b * 'a -> bool) -> 'a table -> key * 'b -> bool val insert: ('a * 'a -> bool) -> key * 'a -> 'a table -> 'a table (*exception DUP*) val remove: ('b * 'a -> bool) -> key * 'b -> 'a table -> 'a table val lookup_list: 'a list table -> key -> 'a list val cons_list: key * 'a -> 'a list table -> 'a list table val insert_list: ('a * 'a -> bool) -> key * 'a -> 'a list table -> 'a list table val remove_list: ('b * 'a -> bool) -> key * 'b -> 'a list table -> 'a list table val update_list: ('a * 'a -> bool) -> key * 'a -> 'a list table -> 'a list table val make_list: (key * 'a) list -> 'a list table val dest_list: 'a list table -> (key * 'a) list val merge_list: ('a * 'a -> bool) -> 'a list table * 'a list table -> 'a list table type set = unit table val insert_set: key -> set -> set val remove_set: key -> set -> set val make_set: key list -> set end; functor Table(Key: KEY): TABLE = struct (* keys *) structure Key = Key; type key = Key.key; val eq_key = is_equal o Key.ord; exception DUP of key; (* datatype *) datatype 'a table = Empty | Leaf1 of key * 'a | Leaf2 of (key * 'a) * (key * 'a) | Leaf3 of (key * 'a) * (key * 'a) * (key * 'a) | Branch2 of 'a table * (key * 'a) * 'a table | Branch3 of 'a table * (key * 'a) * 'a table * (key * 'a) * 'a table; (*literal copy from set.ML*) fun make2 (Empty, e, Empty) = Leaf1 e | make2 (Branch2 (Empty, e1, Empty), e2, right) = make2 (Leaf1 e1, e2, right) | make2 (left, e1, Branch2 (Empty, e2, Empty)) = make2 (left, e1, Leaf1 e2) | make2 (Branch3 (Empty, e1, Empty, e2, Empty), e3, right) = make2 (Leaf2 (e1, e2), e3, right) | make2 (left, e1, Branch3 (Empty, e2, Empty, e3, Empty)) = make2 (left, e1, Leaf2 (e2, e3)) | make2 (Leaf1 e1, e2, Empty) = Leaf2 (e1, e2) | make2 (Empty, e1, Leaf1 e2) = Leaf2 (e1, e2) | make2 (Leaf1 e1, e2, Leaf1 e3) = Leaf3 (e1, e2, e3) | make2 (Leaf2 (e1, e2), e3, Empty) = Leaf3 (e1, e2, e3) | make2 (Empty, e1, Leaf2 (e2, e3)) = Leaf3 (e1, e2, e3) | make2 arg = Branch2 arg; (*literal copy from set.ML*) fun make3 (Empty, e1, Empty, e2, Empty) = Leaf2 (e1, e2) | make3 (Branch2 (Empty, e1, Empty), e2, mid, e3, right) = make3 (Leaf1 e1, e2, mid, e3, right) | make3 (left, e1, Branch2 (Empty, e2, Empty), e3, right) = make3 (left, e1, Leaf1 e2, e3, right) | make3 (left, e1, mid, e2, Branch2 (Empty, e3, Empty)) = make3 (left, e1, mid, e2, Leaf1 e3) | make3 (Leaf1 e1, e2, Empty, e3, Empty) = Leaf3 (e1, e2, e3) | make3 (Empty, e1, Leaf1 e2, e3, Empty) = Leaf3 (e1, e2, e3) | make3 (Empty, e1, Empty, e2, Leaf1 e3) = Leaf3 (e1, e2, e3) | make3 arg = Branch3 arg; (*literal copy from set.ML*) fun unmake (Leaf1 e) = Branch2 (Empty, e, Empty) | unmake (Leaf2 (e1, e2)) = Branch3 (Empty, e1, Empty, e2, Empty) | unmake (Leaf3 (e1, e2, e3)) = Branch2 (Branch2 (Empty, e1, Empty), e2, Branch2 (Empty, e3, Empty)) | unmake arg = arg; (* empty and single *) val empty = Empty; fun build (f: 'a table -> 'a table) = f empty; fun is_empty Empty = true | is_empty _ = false; -fun is_single (Leaf1 _) = true - | is_single _ = false; - (* map and fold combinators *) fun map_table f = let fun map Empty = Empty | map (Leaf1 (k, x)) = Leaf1 (k, f k x) | map (Leaf2 ((k1, x1), (k2, x2))) = Leaf2 ((k1, f k1 x1), (k2, f k2 x2)) | map (Leaf3 ((k1, x1), (k2, x2), (k3, x3))) = Leaf3 ((k1, f k1 x1), (k2, f k2 x2), (k3, f k3 x3)) | map (Branch2 (left, (k, x), right)) = Branch2 (map left, (k, f k x), map right) | map (Branch3 (left, (k1, x1), mid, (k2, x2), right)) = Branch3 (map left, (k1, f k1 x1), map mid, (k2, f k2 x2), map right); in map end; fun fold_table f = let fun fold Empty x = x | fold (Leaf1 e) x = f e x | fold (Leaf2 (e1, e2)) x = f e2 (f e1 x) | fold (Leaf3 (e1, e2, e3)) x = f e3 (f e2 (f e1 x)) | fold (Branch2 (left, e, right)) x = fold right (f e (fold left x)) | fold (Branch3 (left, e1, mid, e2, right)) x = fold right (f e2 (fold mid (f e1 (fold left x)))); in fold end; fun fold_rev_table f = let fun fold_rev Empty x = x | fold_rev (Leaf1 e) x = f e x | fold_rev (Leaf2 (e1, e2)) x = f e1 (f e2 x) | fold_rev (Leaf3 (e1, e2, e3)) x = f e1 (f e2 (f e3 x)) | fold_rev (Branch2 (left, e, right)) x = fold_rev left (f e (fold_rev right x)) | fold_rev (Branch3 (left, e1, mid, e2, right)) x = fold_rev left (f e1 (fold_rev mid (f e2 (fold_rev right x)))); in fold_rev end; fun dest tab = fold_rev_table cons tab []; fun keys tab = fold_rev_table (cons o #1) tab []; (* min/max entries *) fun min Empty = NONE | min (Leaf1 e) = SOME e | min (Leaf2 (e, _)) = SOME e | min (Leaf3 (e, _, _)) = SOME e | min (Branch2 (Empty, e, _)) = SOME e | min (Branch3 (Empty, e, _, _, _)) = SOME e | min (Branch2 (left, _, _)) = min left | min (Branch3 (left, _, _, _, _)) = min left; fun max Empty = NONE | max (Leaf1 e) = SOME e | max (Leaf2 (_, e)) = SOME e | max (Leaf3 (_, _, e)) = SOME e | max (Branch2 (_, e, Empty)) = SOME e | max (Branch3 (_, _, _, e, Empty)) = SOME e | max (Branch2 (_, _, right)) = max right | max (Branch3 (_, _, _, _, right)) = max right; (* exists and forall *) fun exists pred = let fun ex Empty = false | ex (Leaf1 e) = pred e | ex (Leaf2 (e1, e2)) = pred e1 orelse pred e2 | ex (Leaf3 (e1, e2, e3)) = pred e1 orelse pred e2 orelse pred e3 | ex (Branch2 (left, e, right)) = ex left orelse pred e orelse ex right | ex (Branch3 (left, e1, mid, e2, right)) = ex left orelse pred e1 orelse ex mid orelse pred e2 orelse ex right; in ex end; fun forall pred = not o exists (not o pred); (* get_first *) fun get_first f = let fun get Empty = NONE | get (Leaf1 e) = f e | get (Leaf2 (e1, e2)) = (case f e1 of NONE => f e2 | some => some) | get (Leaf3 (e1, e2, e3)) = (case f e1 of NONE => (case f e2 of NONE => f e3 | some => some) | some => some) | get (Branch2 (left, e, right)) = (case get left of NONE => (case f e of NONE => get right | some => some) | some => some) | get (Branch3 (left, e1, mid, e2, right)) = (case get left of NONE => (case f e1 of NONE => (case get mid of NONE => (case f e2 of NONE => get right | some => some) | some => some) | some => some) | some => some); in get end; (* lookup *) fun lookup tab key = let fun look Empty = NONE | look (Leaf1 (k, x)) = if eq_key (key, k) then SOME x else NONE | look (Leaf2 ((k1, x1), (k2, x2))) = (case Key.ord (key, k1) of LESS => NONE | EQUAL => SOME x1 | GREATER => if eq_key (key, k2) then SOME x2 else NONE) | look (Leaf3 ((k1, x1), (k2, x2), (k3, x3))) = (case Key.ord (key, k2) of LESS => if eq_key (key, k1) then SOME x1 else NONE | EQUAL => SOME x2 | GREATER => if eq_key (key, k3) then SOME x3 else NONE) | look (Branch2 (left, (k, x), right)) = (case Key.ord (key, k) of LESS => look left | EQUAL => SOME x | GREATER => look right) | look (Branch3 (left, (k1, x1), mid, (k2, x2), right)) = (case Key.ord (key, k1) of LESS => look left | EQUAL => SOME x1 | GREATER => (case Key.ord (key, k2) of LESS => look mid | EQUAL => SOME x2 | GREATER => look right)); in look tab end; fun lookup_key tab key = let fun look Empty = NONE | look (Leaf1 (k, x)) = if eq_key (key, k) then SOME (k, x) else NONE | look (Leaf2 ((k1, x1), (k2, x2))) = (case Key.ord (key, k1) of LESS => NONE | EQUAL => SOME (k1, x1) | GREATER => if eq_key (key, k2) then SOME (k2, x2) else NONE) | look (Leaf3 ((k1, x1), (k2, x2), (k3, x3))) = (case Key.ord (key, k2) of LESS => if eq_key (key, k1) then SOME (k1, x1) else NONE | EQUAL => SOME (k2, x2) | GREATER => if eq_key (key, k3) then SOME (k3, x3) else NONE) | look (Branch2 (left, (k, x), right)) = (case Key.ord (key, k) of LESS => look left | EQUAL => SOME (k, x) | GREATER => look right) | look (Branch3 (left, (k1, x1), mid, (k2, x2), right)) = (case Key.ord (key, k1) of LESS => look left | EQUAL => SOME (k1, x1) | GREATER => (case Key.ord (key, k2) of LESS => look mid | EQUAL => SOME (k2, x2) | GREATER => look right)); in look tab end; fun defined tab key = let fun def Empty = false | def (Leaf1 (k, _)) = eq_key (key, k) | def (Leaf2 ((k1, _), (k2, _))) = (case Key.ord (key, k1) of LESS => false | EQUAL => true | GREATER => eq_key (key, k2)) | def (Leaf3 ((k1, _), (k2, _), (k3, _))) = (case Key.ord (key, k2) of LESS => eq_key (key, k1) | EQUAL => true | GREATER => eq_key (key, k3)) | def (Branch2 (left, (k, _), right)) = (case Key.ord (key, k) of LESS => def left | EQUAL => true | GREATER => def right) | def (Branch3 (left, (k1, _), mid, (k2, _), right)) = (case Key.ord (key, k1) of LESS => def left | EQUAL => true | GREATER => (case Key.ord (key, k2) of LESS => def mid | EQUAL => true | GREATER => def right)); in def tab end; (* modify *) datatype 'a growth = Stay of 'a table | Sprout of 'a table * (key * 'a) * 'a table; exception SAME; fun modify key f tab = let fun modfy Empty = Sprout (Empty, (key, f NONE), Empty) | modfy (t as Leaf1 _) = modfy (unmake t) | modfy (t as Leaf2 _) = modfy (unmake t) | modfy (t as Leaf3 _) = modfy (unmake t) | modfy (Branch2 (left, p as (k, x), right)) = (case Key.ord (key, k) of LESS => (case modfy left of Stay left' => Stay (make2 (left', p, right)) | Sprout (left1, q, left2) => Stay (make3 (left1, q, left2, p, right))) | EQUAL => Stay (make2 (left, (k, f (SOME x)), right)) | GREATER => (case modfy right of Stay right' => Stay (make2 (left, p, right')) | Sprout (right1, q, right2) => Stay (make3 (left, p, right1, q, right2)))) | modfy (Branch3 (left, p1 as (k1, x1), mid, p2 as (k2, x2), right)) = (case Key.ord (key, k1) of LESS => (case modfy left of Stay left' => Stay (make3 (left', p1, mid, p2, right)) | Sprout (left1, q, left2) => Sprout (make2 (left1, q, left2), p1, make2 (mid, p2, right))) | EQUAL => Stay (make3 (left, (k1, f (SOME x1)), mid, p2, right)) | GREATER => (case Key.ord (key, k2) of LESS => (case modfy mid of Stay mid' => Stay (make3 (left, p1, mid', p2, right)) | Sprout (mid1, q, mid2) => Sprout (make2 (left, p1, mid1), q, make2 (mid2, p2, right))) | EQUAL => Stay (make3 (left, p1, mid, (k2, f (SOME x2)), right)) | GREATER => (case modfy right of Stay right' => Stay (make3 (left, p1, mid, p2, right')) | Sprout (right1, q, right2) => Sprout (make2 (left, p1, mid), p2, make2 (right1, q, right2))))); in (case modfy tab of Stay tab' => tab' | Sprout br => make2 br) handle SAME => tab end; fun update (key, x) tab = modify key (fn _ => x) tab; fun update_new (key, x) tab = modify key (fn NONE => x | SOME _ => raise DUP key) tab; fun default (key, x) tab = modify key (fn NONE => x | SOME _ => raise SAME) tab; fun map_entry key f = modify key (fn NONE => raise SAME | SOME x => f x); fun map_default (key, x) f = modify key (fn NONE => f x | SOME y => f y); (* delete *) exception UNDEF of key; local fun compare NONE _ = LESS | compare (SOME k1) (k2, _) = Key.ord (k1, k2); fun if_equal ord x y = if is_equal ord then x else y; (*literal copy from set.ML*) fun del (SOME k) Empty = raise UNDEF k | del NONE Empty = raise Match | del NONE (Leaf1 p) = (p, (true, Empty)) | del NONE (Leaf2 (p, q)) = (p, (false, Leaf1 q)) | del k (Leaf1 p) = (case compare k p of EQUAL => (p, (true, Empty)) | _ => raise UNDEF (the k)) | del k (Leaf2 (p, q)) = (case compare k p of EQUAL => (p, (false, Leaf1 q)) | _ => (case compare k q of EQUAL => (q, (false, Leaf1 p)) | _ => raise UNDEF (the k))) | del k (Leaf3 (p, q, r)) = del k (Branch2 (Leaf1 p, q, Leaf1 r)) | del k (Branch2 (l, p, r)) = (case compare k p of LESS => (case del k l of (p', (false, l')) => (p', (false, make2 (l', p, r))) | (p', (true, l')) => (p', case unmake r of Branch2 (rl, rp, rr) => (true, make3 (l', p, rl, rp, rr)) | Branch3 (rl, rp, rm, rq, rr) => (false, make2 (make2 (l', p, rl), rp, make2 (rm, rq, rr))))) | ord => (case del (if_equal ord NONE k) r of (p', (false, r')) => (p', (false, make2 (l, if_equal ord p' p, r'))) | (p', (true, r')) => (p', case unmake l of Branch2 (ll, lp, lr) => (true, make3 (ll, lp, lr, if_equal ord p' p, r')) | Branch3 (ll, lp, lm, lq, lr) => (false, make2 (make2 (ll, lp, lm), lq, make2 (lr, if_equal ord p' p, r')))))) | del k (Branch3 (l, p, m, q, r)) = (case compare k q of LESS => (case compare k p of LESS => (case del k l of (p', (false, l')) => (p', (false, make3 (l', p, m, q, r))) | (p', (true, l')) => (p', (false, case (unmake m, unmake r) of (Branch2 (ml, mp, mr), Branch2 _) => make2 (make3 (l', p, ml, mp, mr), q, r) | (Branch3 (ml, mp, mm, mq, mr), _) => make3 (make2 (l', p, ml), mp, make2 (mm, mq, mr), q, r) | (Branch2 (ml, mp, mr), Branch3 (rl, rp, rm, rq, rr)) => make3 (make2 (l', p, ml), mp, make2 (mr, q, rl), rp, make2 (rm, rq, rr))))) | ord => (case del (if_equal ord NONE k) m of (p', (false, m')) => (p', (false, make3 (l, if_equal ord p' p, m', q, r))) | (p', (true, m')) => (p', (false, case (unmake l, unmake r) of (Branch2 (ll, lp, lr), Branch2 _) => make2 (make3 (ll, lp, lr, if_equal ord p' p, m'), q, r) | (Branch3 (ll, lp, lm, lq, lr), _) => make3 (make2 (ll, lp, lm), lq, make2 (lr, if_equal ord p' p, m'), q, r) | (_, Branch3 (rl, rp, rm, rq, rr)) => make3 (l, if_equal ord p' p, make2 (m', q, rl), rp, make2 (rm, rq, rr)))))) | ord => (case del (if_equal ord NONE k) r of (q', (false, r')) => (q', (false, make3 (l, p, m, if_equal ord q' q, r'))) | (q', (true, r')) => (q', (false, case (unmake l, unmake m) of (Branch2 _, Branch2 (ml, mp, mr)) => make2 (l, p, make3 (ml, mp, mr, if_equal ord q' q, r')) | (_, Branch3 (ml, mp, mm, mq, mr)) => make3 (l, p, make2 (ml, mp, mm), mq, make2 (mr, if_equal ord q' q, r')) | (Branch3 (ll, lp, lm, lq, lr), Branch2 (ml, mp, mr)) => make3 (make2 (ll, lp, lm), lq, make2 (lr, p, ml), mp, make2 (mr, if_equal ord q' q, r')))))); in fun delete key tab = snd (snd (del (SOME key) tab)); fun delete_safe key tab = if defined tab key then delete key tab else tab; end; (* membership operations *) fun member eq tab (key, x) = (case lookup tab key of NONE => false | SOME y => eq (x, y)); fun insert eq (key, x) = modify key (fn NONE => x | SOME y => if eq (x, y) then raise SAME else raise DUP key); fun remove eq (key, x) tab = (case lookup tab key of NONE => tab | SOME y => if eq (x, y) then delete key tab else tab); (* simultaneous modifications *) fun make entries = build (fold update_new entries); fun join f (table1, table2) = let fun add (key, y) tab = modify key (fn NONE => y | SOME x => f key (x, y)) tab; in if pointer_eq (table1, table2) then table1 else if is_empty table1 then table2 else fold_table add table2 table1 end; fun merge eq = join (fn key => fn xy => if eq xy then raise SAME else raise DUP key); (* list tables *) fun lookup_list tab key = these (lookup tab key); fun cons_list (key, x) tab = modify key (fn NONE => [x] | SOME xs => x :: xs) tab; fun insert_list eq (key, x) = modify key (fn NONE => [x] | SOME xs => if Library.member eq xs x then raise SAME else x :: xs); fun remove_list eq (key, x) tab = map_entry key (fn xs => (case Library.remove eq x xs of [] => raise UNDEF key | ys => ys)) tab handle UNDEF _ => delete key tab; fun update_list eq (key, x) = modify key (fn NONE => [x] | SOME [] => [x] | SOME (xs as y :: _) => if eq (x, y) then raise SAME else Library.update eq x xs); fun make_list args = build (fold_rev cons_list args); fun dest_list tab = maps (fn (key, xs) => map (pair key) xs) (dest tab); fun merge_list eq = join (fn _ => Library.merge eq); (* set operations *) type set = unit table; fun insert_set x = default (x, ()); fun remove_set x : set -> set = delete_safe x; fun make_set xs = build (fold insert_set xs); (* ML pretty-printing *) val _ = ML_system_pp (fn depth => fn pretty => fn tab => ML_Pretty.to_polyml (ML_Pretty.enum "," "{" "}" (ML_Pretty.pair (ML_Pretty.from_polyml o ML_system_pretty) (ML_Pretty.from_polyml o pretty)) (dest tab, depth))); (*final declarations of this structure!*) val map = map_table; val fold = fold_table; val fold_rev = fold_rev_table; end; structure Inttab = Table(type key = int val ord = int_ord); structure Symtab = Table(type key = string val ord = fast_string_ord); structure Symreltab = Table(type key = string * string val ord = prod_ord fast_string_ord fast_string_ord); diff --git a/src/Pure/Syntax/parser.ML b/src/Pure/Syntax/parser.ML --- a/src/Pure/Syntax/parser.ML +++ b/src/Pure/Syntax/parser.ML @@ -1,724 +1,724 @@ (* Title: Pure/Syntax/parser.ML Author: Carsten Clasohm, Sonia Mahjoub Author: Makarius General context-free parser for the inner syntax of terms and types. *) signature PARSER = sig type gram val empty_gram: gram val extend_gram: Syntax_Ext.xprod list -> gram -> gram val make_gram: Syntax_Ext.xprod list -> gram val pretty_gram: gram -> Pretty.T list datatype parsetree = Node of string * parsetree list | Tip of Lexicon.token exception PARSETREE of parsetree val pretty_parsetree: parsetree -> Pretty.T val parse: gram -> string -> Lexicon.token list -> parsetree list val branching_level: int Config.T end; structure Parser: PARSER = struct (** datatype gram **) (* nonterminals *) (*production for the NTs are stored in a vector, indexed by the NT tag*) type nt = int; type tags = nt Symtab.table; val tags_empty: tags = Symtab.empty; fun tags_content (tags: tags) = sort_by #1 (Symtab.dest tags); fun tags_lookup (tags: tags) = Symtab.lookup tags; fun tags_insert tag (tags: tags) = Symtab.update_new tag tags; fun tags_name (tags: tags) = the o Inttab.lookup (Inttab.make (map swap (Symtab.dest tags))); type nts = Intset.T; val nts_empty: nts = Intset.empty; val nts_merge: nts * nts -> nts = Intset.merge; fun nts_insert nt : nts -> nts = Intset.insert nt; fun nts_member (nts: nts) = Intset.member nts; fun nts_fold f (nts: nts) = Intset.fold f nts; fun nts_subset (nts1: nts, nts2: nts) = Intset.forall (nts_member nts2) nts1; fun nts_is_empty (nts: nts) = Intset.is_empty nts; -fun nts_is_unique (nts: nts) = nts_is_empty nts orelse Intset.is_single nts; +fun nts_is_unique (nts: nts) = Intset.size nts <= 1; (* tokens *) structure Tokens = Table(type key = Lexicon.token val ord = Lexicon.tokens_match_ord); type tokens = Tokens.set; val tokens_empty: tokens = Tokens.empty; val tokens_merge: tokens * tokens -> tokens = Tokens.merge (K true); fun tokens_insert tk : tokens -> tokens = Tokens.insert_set tk; fun tokens_remove tk : tokens -> tokens = Tokens.remove_set tk; fun tokens_member (tokens: tokens) = Tokens.defined tokens; fun tokens_is_empty (tokens: tokens) = Tokens.is_empty tokens; fun tokens_fold f (tokens: tokens) = Tokens.fold (f o #1) tokens; val tokens_union = tokens_fold tokens_insert; val tokens_subtract = tokens_fold tokens_remove; fun tokens_find P (tokens: tokens) = Tokens.get_first (fn (tok, ()) => if P tok then SOME tok else NONE) tokens; fun tokens_add (nt: nt, tokens) = if tokens_is_empty tokens then I else cons (nt, tokens); (* productions *) datatype symb = Terminal of Lexicon.token | Nonterminal of nt * int; (*(tag, prio)*) type prods = (symb list * string * int) list Tokens.table; (*start_token ~> [(rhs, name, prio)]*) val prods_empty: prods = Tokens.empty; fun prods_lookup (prods: prods) = Tokens.lookup_list prods; fun prods_update entry : prods -> prods = Tokens.update entry; fun prods_content (prods: prods) = distinct (op =) (maps #2 (Tokens.dest prods)); type nt_gram = (nts * tokens) * prods; (*dependent_nts, start_tokens, prods*) (*depent_nts is a set of all NTs whose lookahead depends on this NT's lookahead*) val nt_gram_empty: nt_gram = ((nts_empty, tokens_empty), prods_empty); type chains = unit Int_Graph.T; fun chains_preds (chains: chains) = Int_Graph.immediate_preds chains; fun chains_all_preds (chains: chains) = Int_Graph.all_preds chains; fun chains_all_succs (chains: chains) = Int_Graph.all_succs chains; val chains_empty: chains = Int_Graph.empty; fun chains_member (chains: chains) = Int_Graph.is_edge chains; fun chains_declare nt : chains -> chains = Int_Graph.default_node (nt, ()); fun chains_insert (from, to) = chains_declare from #> chains_declare to #> Int_Graph.add_edge (from, to); datatype gram = Gram of {nt_count: int, prod_count: int, tags: tags, chains: chains, lambdas: nts, prods: nt_gram Vector.vector}; (*"tags" is used to map NT names (i.e. strings) to tags; chain productions are not stored as normal productions but instead as an entry in "chains": from -> to; lambda productions are stored as normal productions and also as an entry in "lambdas"*) (*productions for which no starting token is known yet are associated with this token*) val unknown_start = Lexicon.eof; fun get_start tks = (case Tokens.min tks of SOME (tk, _) => tk | NONE => unknown_start); fun add_production prods (lhs, new_prod as (rhs, _, pri)) (chains, lambdas) = let (*store chain if it does not already exist*) val (chain, new_chain, chains') = (case (pri, rhs) of (~1, [Nonterminal (from, ~1)]) => if chains_member chains (from, lhs) then (SOME from, false, chains) else (SOME from, true, chains_insert (from, lhs) chains) | _ => let val chains' = chains |> chains_declare lhs |> fold (fn Nonterminal (nt, _) => chains_declare nt | _ => I) rhs; in (NONE, false, chains') end); (*propagate new chain in lookahead and lambdas; added_starts is used later to associate existing productions with new starting tokens*) val (added_starts, lambdas') = if not new_chain then ([], lambdas) else let (*lookahead of chain's source*) val ((_, from_tks), _) = Array.sub (prods, the chain); (*copy from's lookahead to chain's destinations*) fun copy_lookahead to = let val ((to_nts, to_tks), ps) = Array.sub (prods, to); val new_tks = tokens_subtract to_tks from_tks; (*added lookahead tokens*) val to_tks' = tokens_merge (to_tks, new_tks); val _ = Array.update (prods, to, ((to_nts, to_tks'), ps)); in tokens_add (to, new_tks) end; val tos = chains_all_succs chains' [lhs]; in (fold copy_lookahead tos [], lambdas |> nts_member lambdas lhs ? fold nts_insert tos) end; (*test if new production can produce lambda (rhs must either be empty or only consist of lambda NTs)*) val new_lambdas = if forall (fn Nonterminal (id, _) => nts_member lambdas' id | Terminal _ => false) rhs then SOME (filter_out (nts_member lambdas') (chains_all_succs chains' [lhs])) else NONE; val lambdas'' = fold nts_insert (these new_lambdas) lambdas'; (*list optional terminal and all nonterminals on which the lookahead of a production depends*) fun lookahead_dependency _ [] nts = (NONE, nts) | lookahead_dependency _ (Terminal tk :: _) nts = (SOME tk, nts) | lookahead_dependency lambdas (Nonterminal (nt, _) :: symbs) nts = if nts_member lambdas nt then lookahead_dependency lambdas symbs (nts_insert nt nts) else (NONE, nts_insert nt nts); (*get all known starting tokens for a nonterminal*) fun starts_for_nt nt = snd (fst (Array.sub (prods, nt))); (*update prods, lookaheads, and lambdas according to new lambda NTs*) val (added_starts', lambdas') = let (*propagate added lambda NT*) fun propagate_lambda [] added_starts lambdas = (added_starts, lambdas) | propagate_lambda (l :: ls) added_starts lambdas = let (*get lookahead for lambda NT*) val ((dependent, l_starts), _) = Array.sub (prods, l); (*check productions whose lookahead may depend on lambda NT*) fun examine_prods [] add_lambda nt_dependencies added_tks nt_prods = (add_lambda, nt_dependencies, added_tks, nt_prods) | examine_prods ((p as (rhs, _, _)) :: ps) add_lambda nt_dependencies added_tks nt_prods = let val (tk, nts) = lookahead_dependency lambdas rhs nts_empty in if nts_member nts l then (*update production's lookahead*) let val new_lambda = is_none tk andalso nts_subset (nts, lambdas); val new_tks = tokens_empty |> fold tokens_insert (the_list tk) |> nts_fold (tokens_union o starts_for_nt) nts |> tokens_subtract l_starts; val added_tks' = tokens_merge (added_tks, new_tks); val nt_dependencies' = nts_merge (nt_dependencies, nts); (*associate production with new starting tokens*) fun copy tk nt_prods = prods_update (tk, p :: prods_lookup nt_prods tk) nt_prods; val nt_prods' = nt_prods |> tokens_fold copy new_tks |> new_lambda ? copy Lexicon.dummy; in examine_prods ps (add_lambda orelse new_lambda) nt_dependencies' added_tks' nt_prods' end else (*skip production*) examine_prods ps add_lambda nt_dependencies added_tks nt_prods end; (*check each NT whose lookahead depends on new lambda NT*) fun process_nts nt (added_lambdas, added_starts) = let val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); (*existing productions whose lookahead may depend on l*) val tk_prods = prods_lookup nt_prods (get_start l_starts); (*add_lambda is true if an existing production of the nt produces lambda due to the new lambda NT l*) val (add_lambda, nt_dependencies, added_tks, nt_prods') = examine_prods tk_prods false nts_empty tokens_empty nt_prods; val new_nts = nts_merge (old_nts, nt_dependencies); val new_tks = tokens_merge (old_tks, added_tks); val added_lambdas' = added_lambdas |> add_lambda ? cons nt; val _ = Array.update (prods, nt, ((new_nts, new_tks), nt_prods')); (*N.B. that because the tks component is used to access existing productions we have to add new tokens at the _end_ of the list*) val added_starts' = tokens_add (nt, added_tks) added_starts; in (added_lambdas', added_starts') end; val (added_lambdas, added_starts') = nts_fold process_nts dependent ([], added_starts); val added_lambdas' = filter_out (nts_member lambdas) added_lambdas; in propagate_lambda (ls @ added_lambdas') added_starts' (fold nts_insert added_lambdas' lambdas) end; in propagate_lambda (nts_fold (fn l => not (nts_member lambdas l) ? cons l) lambdas'' []) added_starts lambdas'' end; (*insert production into grammar*) val added_starts' = if is_some chain then added_starts' (*don't store chain production*) else let (*lookahead tokens of new production and on which NTs lookahead depends*) val (start_tk, start_nts) = lookahead_dependency lambdas' rhs nts_empty; val start_tks = tokens_empty |> fold tokens_insert (the_list start_tk) |> nts_fold (tokens_union o starts_for_nt) start_nts; val start_tks' = start_tks |> (if is_some new_lambdas then tokens_insert Lexicon.dummy else if tokens_is_empty start_tks then tokens_insert unknown_start else I); (*add lhs NT to list of dependent NTs in lookahead*) fun add_nts nt initial = (if initial then let val ((old_nts, old_tks), ps) = Array.sub (prods, nt) in if nts_member old_nts lhs then () else Array.update (prods, nt, ((nts_insert lhs old_nts, old_tks), ps)) end else (); false); (*add new start tokens to chained NTs' lookahead list; also store new production for lhs NT*) fun add_tks [] added = added | add_tks (nt :: nts) added = let val ((old_nts, old_tks), nt_prods) = Array.sub (prods, nt); val new_tks = tokens_subtract old_tks start_tks; (*store new production*) fun store tk (prods, _) = let val tk_prods = prods_lookup prods tk; val tk_prods' = new_prod :: tk_prods; val prods' = prods_update (tk, tk_prods') prods; in (prods', true) end; val (nt_prods', changed) = (nt_prods, false) |> nt = lhs ? tokens_fold store start_tks'; val _ = if not changed andalso tokens_is_empty new_tks then () else Array.update (prods, nt, ((old_nts, tokens_merge (old_tks, new_tks)), nt_prods')); in add_tks nts (tokens_add (nt, new_tks) added) end; val _ = nts_fold add_nts start_nts true; in add_tks (chains_all_succs chains' [lhs]) [] end; (*associate productions with new lookaheads*) val _ = let (*propagate added start tokens*) fun add_starts [] = () | add_starts ((changed_nt, new_tks) :: starts) = let (*token under which old productions which depend on changed_nt could be stored*) val key = tokens_find (not o tokens_member new_tks) (starts_for_nt changed_nt) |> the_default unknown_start; (*copy productions whose lookahead depends on changed_nt; if key = SOME unknown_start then tk_prods is used to hold the productions not copied*) fun update_prods [] result = result | update_prods ((p as (rhs, _: string, _: nt)) :: ps) (tk_prods, nt_prods) = let (*lookahead dependency for production*) val (tk, depends) = lookahead_dependency lambdas' rhs nts_empty; (*test if this production has to be copied*) val update = nts_member depends changed_nt; (*test if production could already be associated with a member of new_tks*) val lambda = not (nts_is_unique depends) orelse not (nts_is_empty depends) andalso is_some tk andalso tokens_member new_tks (the tk); (*associate production with new starting tokens*) fun copy tk nt_prods = let val tk_prods = prods_lookup nt_prods tk; val tk_prods' = if not lambda then p :: tk_prods else insert (op =) p tk_prods; (*if production depends on lambda NT we have to look for duplicates*) in prods_update (tk, tk_prods') nt_prods end; val result = if update then (tk_prods, tokens_fold copy new_tks nt_prods) else if key = unknown_start then (p :: tk_prods, nt_prods) else (tk_prods, nt_prods); in update_prods ps result end; (*copy existing productions for new starting tokens*) fun process_nts nt = let val ((nts, tks), nt_prods) = Array.sub (prods, nt); val tk_prods = prods_lookup nt_prods key; (*associate productions with new lookahead tokens*) val (tk_prods', nt_prods') = update_prods tk_prods ([], nt_prods); val nt_prods'' = if key = unknown_start then prods_update (key, tk_prods') nt_prods' else nt_prods'; val added_tks = tokens_subtract tks new_tks; val tks' = tokens_merge (tks, added_tks); val _ = Array.update (prods, nt, ((nts, tks'), nt_prods'')); in tokens_add (nt, added_tks) end; val ((dependent, _), _) = Array.sub (prods, changed_nt); in add_starts (starts @ nts_fold process_nts dependent []) end; in add_starts added_starts' end; in (chains', lambdas') end; (* pretty_gram *) fun pretty_gram (Gram {tags, prods, chains, ...}) = let val print_nt = tags_name tags; fun print_pri p = if p < 0 then "" else Symbol.make_sup ("(" ^ string_of_int p ^ ")"); fun pretty_symb (Terminal tok) = if Lexicon.is_literal tok then Pretty.quote (Pretty.keyword1 (Lexicon.str_of_token tok)) else Pretty.str (Lexicon.str_of_token tok) | pretty_symb (Nonterminal (tag, p)) = Pretty.str (print_nt tag ^ print_pri p); fun pretty_const "" = [] | pretty_const c = [Pretty.str ("\<^bold>\ " ^ quote c)]; fun prod_of_chain from = ([Nonterminal (from, ~1)], "", ~1); fun pretty_prod (name, tag) = (prods_content (#2 (Vector.sub (prods, tag))) @ map prod_of_chain (chains_preds chains tag)) |> map (fn (symbs, const, p) => Pretty.block (Pretty.breaks (Pretty.str (name ^ print_pri p ^ " =") :: map pretty_symb symbs @ pretty_const const))); in maps pretty_prod (tags_content tags) end; (** operations on grammars **) val empty_gram = Gram {nt_count = 0, prod_count = 0, tags = tags_empty, chains = chains_empty, lambdas = nts_empty, prods = Vector.fromList [nt_gram_empty]}; (*Add productions to a grammar*) fun extend_gram [] gram = gram | extend_gram xprods (Gram {nt_count, prod_count, tags, chains, lambdas, prods}) = let (*Get tag for existing nonterminal or create a new one*) fun get_tag nt_count tags nt = (case tags_lookup tags nt of SOME tag => (nt_count, tags, tag) | NONE => (nt_count + 1, tags_insert (nt, nt_count) tags, nt_count)); (*Convert symbols to the form used by the parser; delimiters and predefined terms are stored as terminals, nonterminals are converted to integer tags*) fun symb_of [] nt_count tags result = (nt_count, tags, rev result) | symb_of (Syntax_Ext.Delim s :: ss) nt_count tags result = symb_of ss nt_count tags (Terminal (Lexicon.literal s) :: result) | symb_of (Syntax_Ext.Argument (s, p) :: ss) nt_count tags result = let val (nt_count', tags', new_symb) = (case Lexicon.get_terminal s of NONE => let val (nt_count', tags', s_tag) = get_tag nt_count tags s; in (nt_count', tags', Nonterminal (s_tag, p)) end | SOME tk => (nt_count, tags, Terminal tk)); in symb_of ss nt_count' tags' (new_symb :: result) end | symb_of (_ :: ss) nt_count tags result = symb_of ss nt_count tags result; (*Convert list of productions by invoking symb_of for each of them*) fun prod_of [] nt_count prod_count tags result = (nt_count, prod_count, tags, result) | prod_of (Syntax_Ext.XProd (lhs, xsymbs, const, pri) :: ps) nt_count prod_count tags result = let val (nt_count', tags', lhs_tag) = get_tag nt_count tags lhs; val (nt_count'', tags'', prods) = symb_of xsymbs nt_count' tags' []; in prod_of ps nt_count'' (prod_count + 1) tags'' ((lhs_tag, (prods, const, pri)) :: result) end; val (nt_count', prod_count', tags', xprods') = prod_of xprods nt_count prod_count tags []; (*Copy array containing productions of old grammar; this has to be done to preserve the old grammar while being able to change the array's content*) val prods' = let fun get_prod i = if i < nt_count then Vector.sub (prods, i) else nt_gram_empty; in Array.tabulate (nt_count', get_prod) end; val (chains', lambdas') = (chains, lambdas) |> fold (add_production prods') xprods'; in Gram {nt_count = nt_count', prod_count = prod_count', tags = tags', chains = chains', lambdas = lambdas', prods = Array.vector prods'} end; fun make_gram xprods = extend_gram xprods empty_gram; (** parser **) (* parsetree *) datatype parsetree = Node of string * parsetree list | Tip of Lexicon.token; exception PARSETREE of parsetree; fun pretty_parsetree parsetree = let fun pretty (Node (c, pts)) = [Pretty.enclose "(" ")" (Pretty.breaks (Pretty.quote (Pretty.str c) :: maps pretty pts))] | pretty (Tip tok) = if Lexicon.valued_token tok then [Pretty.str (Lexicon.str_of_token tok)] else []; in (case pretty parsetree of [prt] => prt | _ => raise PARSETREE parsetree) end; (* parser state *) type state = nt * int * (*identification and production precedence*) parsetree list * (*already parsed nonterminals on rhs*) symb list * (*rest of rhs*) string * (*name of production*) int; (*index for previous state list*) (*Get all rhss with precedence >= min_prec*) fun get_RHS min_prec = filter (fn (_, _, prec: int) => prec >= min_prec); (*Get all rhss with precedence >= min_prec and < max_prec*) fun get_RHS' min_prec max_prec = filter (fn (_, _, prec: int) => prec >= min_prec andalso prec < max_prec); (*Make states using a list of rhss*) fun mk_states i lhs_ID rhss = let fun mk_state (rhs, id, prod_prec) = (lhs_ID, prod_prec, [], rhs, id, i); in map mk_state rhss end; (*Add parse tree to list and eliminate duplicates saving the maximum precedence*) fun conc (t: parsetree list, prec: int) [] = (NONE, [(t, prec)]) | conc (t, prec) ((t', prec') :: ts) = if t = t' then (SOME prec', if prec' >= prec then (t', prec') :: ts else (t, prec) :: ts) else let val (n, ts') = conc (t, prec) ts in (n, (t', prec') :: ts') end; (*Update entry in used*) fun update_trees (A, t) used = let val (i, ts) = the (Inttab.lookup used A); val (n, ts') = conc t ts; in (n, Inttab.update (A, (i, ts')) used) end; (*Replace entry in used*) fun update_prec (A, prec) = Inttab.map_entry A (fn (_, ts) => (prec, ts)); fun getS A max_prec NONE Si = filter (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec <= max_prec | _ => false) Si | getS A max_prec (SOME min_prec) Si = filter (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec > min_prec andalso prec <= max_prec | _ => false) Si; fun get_states Estate j A max_prec = filter (fn (_, _, _, Nonterminal (B, prec) :: _, _, _) => A = B andalso prec <= max_prec | _ => false) (Array.sub (Estate, j)); fun movedot_term c (A, j, ts, Terminal a :: sa, id, i) = if Lexicon.valued_token c orelse id <> "" then (A, j, Tip c :: ts, sa, id, i) else (A, j, ts, sa, id, i); fun movedot_nonterm tt (A, j, ts, Nonterminal _ :: sa, id, i) = (A, j, tt @ ts, sa, id, i); fun movedot_lambda [] _ = [] | movedot_lambda ((t, ki) :: ts) (state as (B, j, tss, Nonterminal (A, k) :: sa, id, i)) = if k <= ki then (B, j, t @ tss, sa, id, i) :: movedot_lambda ts state else movedot_lambda ts state; (*trigger value for warnings*) val branching_level = Config.declare_int ("syntax_branching_level", \<^here>) (K 600); (*get all productions of a NT and NTs chained to it which can be started by specified token*) fun prods_for (Gram {prods, chains, ...}) tk nt = let fun token_prods prods = fold cons (prods_lookup prods tk) #> fold cons (prods_lookup prods Lexicon.dummy); fun nt_prods nt = #2 (Vector.sub (prods, nt)); in fold (token_prods o nt_prods) (chains_all_preds chains [nt]) [] end; fun PROCESSS gram Estate i c states = let fun processS _ [] (Si, Sii) = (Si, Sii) | processS used (S :: States) (Si, Sii) = (case S of (_, _, _, Nonterminal (nt, min_prec) :: _, _, _) => let (*predictor operation*) val (used', new_states) = (case Inttab.lookup used nt of SOME (used_prec, l) => (*nonterminal has been processed*) if used_prec <= min_prec then (*wanted precedence has been processed*) (used, movedot_lambda l S) else (*wanted precedence hasn't been parsed yet*) let val tk_prods = prods_for gram c nt; val States' = mk_states i nt (get_RHS' min_prec used_prec tk_prods); in (update_prec (nt, min_prec) used, movedot_lambda l S @ States') end | NONE => (*nonterminal is parsed for the first time*) let val tk_prods = prods_for gram c nt; val States' = mk_states i nt (get_RHS min_prec tk_prods); in (Inttab.update (nt, (min_prec, [])) used, States') end); in processS used' (new_states @ States) (S :: Si, Sii) end | (_, _, _, Terminal a :: _, _, _) => (*scanner operation*) processS used States (S :: Si, if Lexicon.tokens_match_ord (a, c) = EQUAL then movedot_term c S :: Sii else Sii) | (A, prec, ts, [], id, j) => (*completer operation*) let val tt = if id = "" then ts else [Node (id, rev ts)] in if j = i then (*lambda production?*) let val (prec', used') = update_trees (A, (tt, prec)) used; val Slist = getS A prec prec' Si; val States' = map (movedot_nonterm tt) Slist; in processS used' (States' @ States) (S :: Si, Sii) end else let val Slist = get_states Estate j A prec in processS used (map (movedot_nonterm tt) Slist @ States) (S :: Si, Sii) end end) in processS Inttab.empty states ([], []) end; fun produce gram stateset i indata prev_token = (case Array.sub (stateset, i) of [] => let val toks = if Lexicon.is_eof prev_token then indata else prev_token :: indata; val pos = Position.here (Lexicon.pos_of_token prev_token); in if null toks then error ("Inner syntax error: unexpected end of input" ^ pos) else error ("Inner syntax error" ^ pos ^ Markup.markup Markup.no_report ("\n" ^ Pretty.string_of (Pretty.block [ Pretty.str "at", Pretty.brk 1, Pretty.block (Pretty.str "\"" :: Pretty.breaks (map (Pretty.str o Lexicon.str_of_token) (#1 (split_last toks))) @ [Pretty.str "\""])]))) end | s => (case indata of [] => s | c :: cs => let val (si, sii) = PROCESSS gram stateset i c s; val _ = Array.update (stateset, i, si); val _ = Array.update (stateset, i + 1, sii); in produce gram stateset (i + 1) cs c end)); fun get_trees states = map_filter (fn (_, _, [pt], _, _, _) => SOME pt | _ => NONE) states; fun earley (gram as Gram {tags, ...}) startsymbol indata = let val start_tag = (case tags_lookup tags startsymbol of SOME tag => tag | NONE => error ("Inner syntax: bad grammar root symbol " ^ quote startsymbol)); val S0 = [(~1, 0, [], [Nonterminal (start_tag, 0), Terminal Lexicon.eof], "", 0)]; val s = length indata + 1; val Estate = Array.array (s, []); val _ = Array.update (Estate, 0, S0); in get_trees (produce gram Estate 0 indata Lexicon.eof) end; fun parse gram start toks = let val end_pos = (case try List.last toks of NONE => Position.none | SOME tok => Lexicon.end_pos_of_token tok); val r = (case earley gram start (toks @ [Lexicon.mk_eof end_pos]) of [] => raise Fail "Inner syntax: no parse trees" | pts => pts); in r end; end;