diff --git a/src/Pure/ML/ml_syntax.ML b/src/Pure/ML/ml_syntax.ML --- a/src/Pure/ML/ml_syntax.ML +++ b/src/Pure/ML/ml_syntax.ML @@ -1,142 +1,143 @@ (* Title: Pure/ML/ml_syntax.ML Author: Makarius Concrete ML syntax for basic values. *) signature ML_SYNTAX = sig val reserved_names: string list val reserved: Name.context val is_reserved: string -> bool val is_identifier: string -> bool val atomic: string -> string val print_int: int -> string val print_pair: ('a -> string) -> ('b -> string) -> 'a * 'b -> string val print_list: ('a -> string) -> 'a list -> string val print_option: ('a -> string) -> 'a option -> string val print_symbol_char: Symbol.symbol -> string val print_symbol: Symbol.symbol -> string val print_string: string -> string val print_strings: string list -> string val print_path: Path.T -> string val print_properties: Properties.T -> string val print_position: Position.T -> string val print_range: Position.range -> string val print_path_binding: Path.binding -> string val make_binding: string * Position.T -> string val print_indexname: indexname -> string val print_class: class -> string val print_sort: sort -> string val print_typ: typ -> string val print_term: term -> string val pretty_string: int -> string -> Pretty.T end; structure ML_Syntax: ML_SYNTAX = struct (* reserved words *) val reserved_names = filter Symbol.is_ascii_identifier ML_Lex.keywords; val reserved = Name.make_context reserved_names; val is_reserved = Name.is_declared reserved; (* identifiers *) fun is_identifier name = not (is_reserved name) andalso Symbol.is_ascii_identifier name; (* literal output -- unformatted *) val atomic = enclose "(" ")"; val print_int = string_of_int; fun print_pair f1 f2 (x, y) = "(" ^ f1 x ^ ", " ^ f2 y ^ ")"; fun print_list f = enclose "[" "]" o commas o map f; fun print_option f NONE = "NONE" | print_option f (SOME x) = "SOME (" ^ f x ^ ")"; fun print_symbol_char s = if Symbol.is_char s then (case ord s of 34 => "\\\"" | 92 => "\\\\" | 7 => "\\a" | 8 => "\\b" | 9 => "\\t" | 10 => "\\n" | 11 => "\\v" | 12 => "\\f" | 13 => "\\r" | c => if c < 32 then "\\^" ^ chr (c + 64) else if c < 127 then s else "\\" ^ string_of_int c) else error ("Bad character: " ^ quote s); fun print_symbol s = if Symbol.is_char s then print_symbol_char s else if Symbol.is_utf8 s then translate_string print_symbol_char s else s; val print_string = quote o implode o map print_symbol o Symbol.explode; val print_strings = print_list print_string; fun print_path path = "Path.explode " ^ print_string (Path.implode path); val print_properties = print_list (print_pair print_string print_string); fun print_position pos = "Position.of_properties " ^ print_properties (Position.properties_of pos); fun print_range range = "Position.range_of_properties " ^ print_properties (Position.properties_of_range range); fun print_path_binding binding = "Path.binding " ^ print_pair print_path print_position (Path.dest_binding binding); fun make_binding (name, pos) = "Binding.make " ^ print_pair print_string print_position (name, pos); val print_indexname = print_pair print_string print_int; val print_class = print_string; val print_sort = print_list print_class; fun print_typ (Type arg) = "Term.Type " ^ print_pair print_string (print_list print_typ) arg | print_typ (TFree arg) = "Term.TFree " ^ print_pair print_string print_sort arg | print_typ (TVar arg) = "Term.TVar " ^ print_pair print_indexname print_sort arg; fun print_term (Const arg) = "Term.Const " ^ print_pair print_string print_typ arg | print_term (Free arg) = "Term.Free " ^ print_pair print_string print_typ arg | print_term (Var arg) = "Term.Var " ^ print_pair print_indexname print_typ arg | print_term (Bound i) = "Term.Bound " ^ print_int i | print_term (Abs (s, T, t)) = "Term.Abs (" ^ print_string s ^ ", " ^ print_typ T ^ ", " ^ print_term t ^ ")" | print_term (t1 $ t2) = "Term.$ " ^ print_pair print_term print_term (t1, t2); (* toplevel pretty printing *) fun pretty_string max_len str = let val body = - maps (fn XML.Elem _ => [""] | XML.Text s => Symbol.explode s) (YXML.parse_body str) - handle Fail _ => Symbol.explode str; + if YXML.is_wellformed str then + maps (fn XML.Elem _ => [""] | XML.Text s => Symbol.explode s) (YXML.parse_body str) + else Symbol.explode str; val body' = if length body <= max_len then body else take (Int.max (max_len, 0)) body @ ["..."]; in Pretty.str (quote (implode (map print_symbol body'))) end; val _ = ML_system_pp (fn depth => fn _ => fn str => Pretty.to_polyml (pretty_string (FixedInt.toInt (depth * 100)) str)); end; diff --git a/src/Pure/PIDE/yxml.ML b/src/Pure/PIDE/yxml.ML --- a/src/Pure/PIDE/yxml.ML +++ b/src/Pure/PIDE/yxml.ML @@ -1,162 +1,165 @@ (* Title: Pure/PIDE/yxml.ML Author: Makarius Efficient text representation of XML trees using extra characters X and Y -- no escaping, may nest marked text verbatim. Suitable for direct inlining into plain text. Markup ...body... is encoded as: X Y name Y att=val ... X ... body ... X Y X *) signature YXML = sig val X: Symbol.symbol val Y: Symbol.symbol val embed_controls: string -> string val detect: string -> bool val traverse: (string -> 'a -> 'a) -> XML.tree -> 'a -> 'a val string_of_body: XML.body -> string val string_of: XML.tree -> string val write_body: Path.T -> XML.body -> unit val output_markup: Markup.T -> string * string val output_markup_elem: Markup.T -> (string * string) * string val parse_body: string -> XML.body val parse: string -> XML.tree val content_of: string -> string + val is_wellformed: string -> bool end; structure YXML: YXML = struct (** string representation **) (* idempotent recoding of certain low ASCII control characters *) fun pseudo_utf8 c = if Symbol.is_ascii_control c then chr 192 ^ chr (128 + ord c) else c; fun embed_controls str = if exists_string Symbol.is_ascii_control str then translate_string pseudo_utf8 str else str; (* markers *) val X_char = Char.chr 5; val Y_char = Char.chr 6; val X = String.str X_char; val Y = String.str Y_char; val XY = X ^ Y; val XYX = XY ^ X; fun detect s = Char.contains s X_char orelse Char.contains s Y_char; (* output *) fun traverse string = let fun attrib (a, x) = string Y #> string a #> string "=" #> string x; fun tree (XML.Elem ((name, atts), ts)) = string XY #> string name #> fold attrib atts #> string X #> fold tree ts #> string XYX | tree (XML.Text s) = string s; in tree end; fun string_of_body body = Buffer.empty |> fold (traverse Buffer.add) body |> Buffer.content; val string_of = string_of_body o single; fun write_body path body = path |> File.open_output (fn file => fold (traverse (fn s => fn () => File.output file s)) body ()); (* markup elements *) val Z = chr 0; val Z_text = [XML.Text Z]; fun output_markup (markup as (name, atts)) = if Markup.is_empty markup then Markup.no_output else (XY ^ name ^ implode (map (fn (a, x) => Y ^ a ^ "=" ^ x) atts) ^ X, XYX); fun output_markup_elem markup = let val [bg1, bg2, en] = space_explode Z (string_of (XML.wrap_elem ((markup, Z_text), Z_text))) in ((bg1, bg2), en) end; (** efficient YXML parsing **) local (* splitting *) val split_string = Substring.full #> Substring.tokens (fn c => c = X_char) #> map (Substring.fields (fn c => c = Y_char) #> map Substring.string); (* structural errors *) fun err msg = raise Fail ("Malformed YXML: " ^ msg); fun err_attribute () = err "bad attribute"; fun err_element () = err "bad element"; fun err_unbalanced "" = err "unbalanced element" | err_unbalanced name = err ("unbalanced element " ^ quote name); (* stack operations *) fun add x ((elem, body) :: pending) = (elem, x :: body) :: pending; fun push "" _ _ = err_element () | push name atts pending = ((name, atts), []) :: pending; fun pop ((("", _), _) :: _) = err_unbalanced "" | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending; (* parsing *) fun parse_attrib s = (case first_field "=" s of NONE => err_attribute () | SOME ("", _) => err_attribute () | SOME att => att); fun parse_chunk ["", ""] = pop | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts) | parse_chunk txts = fold (add o XML.Text) txts; in fun parse_body source = (case fold parse_chunk (split_string source) [(("", []), [])] of [(("", _), result)] => rev result | ((name, _), _) :: _ => err_unbalanced name); fun parse source = (case parse_body source of [result] => result | [] => XML.Text "" | _ => err "multiple results"); end; val content_of = parse_body #> XML.content_of; +fun is_wellformed s = string_of_body (parse_body s) = s + handle Fail _ => false; + end; -