diff --git a/src/Pure/General/symbol.scala b/src/Pure/General/symbol.scala --- a/src/Pure/General/symbol.scala +++ b/src/Pure/General/symbol.scala @@ -1,664 +1,668 @@ /* Title: Pure/General/symbol.scala Author: Makarius Isabelle text symbols. */ package isabelle import scala.collection.mutable import scala.util.matching.Regex import scala.annotation.tailrec object Symbol { type Symbol = String // counting Isabelle symbols, starting from 1 type Offset = Text.Offset type Range = Text.Range /* spaces */ val space_char = ' ' val space = " " private val static_spaces = space * 4000 def spaces(n: Int): String = { require(n >= 0) if (n < static_spaces.length) static_spaces.substring(0, n) else space * n } /* ASCII characters */ def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~' def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' def is_ascii_hex(c: Char): Boolean = '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c) def is_ascii_letdig(c: Char): Boolean = is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) def is_ascii_identifier(s: String): Boolean = s.length > 0 && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) def ascii(c: Char): Symbol = { if (c > 127) error("Non-ASCII character: " + quote(c.toString)) else char_symbols(c.toInt) } def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 /* symbol matching */ private val symbol_total = new Regex("""(?xs) [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") private def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) def is_malformed(s: Symbol): Boolean = s.length match { case 1 => val c = s(0) Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' case 2 => val c1 = s(0) val c2 = s(1) !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>" } def is_newline(s: Symbol): Boolean = s == "\n" || s == "\r" || s == "\r\n" class Matcher(text: CharSequence) { private val matcher = symbol_total.pattern.matcher(text) def apply(start: Int, end: Int): Int = { require(0 <= start && start < end && end <= text.length) if (is_plain(text.charAt(start))) 1 else { matcher.region(start, end).lookingAt matcher.group.length } } } /* iterator */ private val char_symbols: Array[Symbol] = (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray def iterator(text: CharSequence): Iterator[Symbol] = new Iterator[Symbol] { private val matcher = new Matcher(text) private var i = 0 def hasNext: Boolean = i < text.length def next: Symbol = { val n = matcher(i, text.length) val s = if (n == 0) "" else if (n == 1) { val c = text.charAt(i) if (c < char_symbols.length) char_symbols(c) else text.subSequence(i, i + n).toString } else text.subSequence(i, i + n).toString i += n s } } def explode(text: CharSequence): List[Symbol] = iterator(text).toList def length(text: CharSequence): Int = iterator(text).length def trim_blanks(text: CharSequence): String = Library.trim(is_blank, explode(text)).mkString def all_blank(str: String): Boolean = iterator(str).forall(is_blank) def trim_blank_lines(text: String): String = cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse) /* decoding offsets */ object Index { private sealed case class Entry(chr: Int, sym: Int) val empty: Index = new Index(Nil) def apply(text: CharSequence): Index = { val matcher = new Matcher(text) val buf = new mutable.ListBuffer[Entry] var chr = 0 var sym = 0 while (chr < text.length) { val n = matcher(chr, text.length) chr += n sym += 1 if (n > 1) buf += Entry(chr, sym) } if (buf.isEmpty) empty else new Index(buf.toList) } } final class Index private(entries: List[Index.Entry]) { private val hash: Int = entries.hashCode private val index: Array[Index.Entry] = entries.toArray def decode(symbol_offset: Offset): Text.Offset = { val sym = symbol_offset - 1 val end = index.length @tailrec def bisect(a: Int, b: Int): Int = { if (a < b) { val c = (a + b) / 2 if (sym < index(c).sym) bisect(a, c) else if (c + 1 == end || sym < index(c + 1).sym) c else bisect(c + 1, b) } else -1 } val i = bisect(0, end) if (i < 0) sym else index(i).chr + sym - index(i).sym } def decode(symbol_range: Range): Text.Range = symbol_range.map(decode) override def hashCode: Int = hash override def equals(that: Any): Boolean = that match { case other: Index => index.sameElements(other.index) case _ => false } } /* symbolic text chunks -- without actual text */ object Text_Chunk { sealed abstract class Name case object Default extends Name case class Id(id: Document_ID.Generic) extends Name case class File(name: String) extends Name def apply(text: CharSequence): Text_Chunk = new Text_Chunk(Text.Range(0, text.length), Index(text)) } final class Text_Chunk private(val range: Text.Range, private val index: Index) { override def hashCode: Int = (range, index).hashCode override def equals(that: Any): Boolean = that match { case other: Text_Chunk => range == other.range && index == other.index case _ => false } override def toString: String = "Text_Chunk" + range.toString def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) def incorporate(symbol_range: Range): Option[Text.Range] = { def in(r: Range): Option[Text.Range] = range.try_restrict(decode(r)) match { case Some(r1) if !r1.is_singularity => Some(r1) case _ => None } in(symbol_range) orElse in(symbol_range - 1) } } /* recoding text */ private class Recoder(list: List[(String, String)]) { private val (min, max) = { var min = '\uffff' var max = '\u0000' for ((x, _) <- list) { val c = x(0) if (c < min) min = c if (c > max) max = c } (min, max) } private val table = { var tab = Map[String, String]() for ((x, y) <- list) { tab.get(x) match { case None => tab += (x -> y) case Some(z) => error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) } } tab } def recode(text: String): String = { val len = text.length val matcher = symbol_total.pattern.matcher(text) val result = new StringBuilder(len) var i = 0 while (i < len) { val c = text(i) if (min <= c && c <= max) { matcher.region(i, len).lookingAt val x = matcher.group result.append(table.getOrElse(x, x)) i = matcher.end } else { result.append(c); i += 1 } } result.toString } } /** symbol interpretation **/ val ARGUMENT_CARTOUCHE = "cartouche" val ARGUMENT_SPACE_CARTOUCHE = "space_cartouche" private lazy val symbols = { val contents = for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file) yield (File.read(path)) new Interpretation(cat_lines(contents)) } private class Interpretation(symbols_spec: String) { /* read symbols */ private val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) private val Key = new Regex("""(?xs) (.+): """) private def read_decl(decl: String): (Symbol, Properties.T) = { def err() = error("Bad symbol declaration: " + decl) def read_props(props: List[String]): Properties.T = { props match { case Nil => Nil case _ :: Nil => err() case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest) case _ => err() } } decl.split("\\s+").toList match { case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) case _ => err() } } private val symbols: List[(Symbol, Properties.T)] = (((List.empty[(Symbol, Properties.T)], Set.empty[Symbol]) /: split_lines(symbols_spec).reverse) { case (res, No_Decl()) => res case ((list, known), decl) => val (sym, props) = read_decl(decl) if (known(sym)) (list, known) else ((sym, props) :: list, known + sym) })._1 /* basic properties */ val properties: Map[Symbol, Properties.T] = Map(symbols: _*) val names: Map[Symbol, (String, String)] = { val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") val Argument = new Properties.String("argument") def argument(sym: Symbol, props: Properties.T): String = props match { case Argument(arg) => if (arg == ARGUMENT_CARTOUCHE || arg == ARGUMENT_SPACE_CARTOUCHE) arg else error("Bad argument: " + quote(arg) + " for symbol " + quote(sym)) case _ => "" } Map((for ((sym @ Name(a), props) <- symbols) yield sym -> (a, argument(sym, props))): _*) } val groups: List[(String, List[Symbol])] = symbols.flatMap({ case (sym, props) => val gs = for (("group", g) <- props) yield g if (gs.isEmpty) List(sym -> "unsorted") else gs.map(sym -> _) }).groupBy(_._2).toList.map({ case (group, list) => (group, list.map(_._1)) }) .sortBy(_._1) val abbrevs: Multi_Map[Symbol, String] = Multi_Map(( for { (sym, props) <- symbols ("abbrev", a) <- props.reverse } yield sym -> a): _*) val codes: List[(Symbol, Int)] = { val Code = new Properties.String("code") for { (sym, props) <- symbols code <- props match { case Code(s) => try { Some(Integer.decode(s).intValue) } catch { case _: NumberFormatException => error("Bad code for symbol " + sym) } case _ => None } } yield { if (code < 128) error("Illegal ASCII code for symbol " + sym) else (sym, code) } } /* recoding */ private val (decoder, encoder) = { val mapping = for ((sym, code) <- codes) yield (sym, new String(Character.toChars(code))) (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) } def decode(text: String): String = decoder.recode(text) def encode(text: String): String = encoder.recode(text) private def recode_set(elems: String*): Set[String] = { val content = elems.toList Set((content ::: content.map(decode)): _*) } private def recode_map[A](elems: (String, A)*): Map[String, A] = { val content = elems.toList Map((content ::: content.map({ case (sym, a) => (decode(sym), a) })): _*) } /* user fonts */ private val Font = new Properties.String("font") val fonts: Map[Symbol, String] = recode_map((for ((sym, Font(font)) <- symbols) yield sym -> font): _*) val font_names: List[String] = Set(fonts.toList.map(_._2): _*).toList val font_index: Map[String, Int] = Map((font_names zip font_names.indices.toList): _*) /* classification */ val letters: Set[String] = recode_set( "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\

", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\

", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\
", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\") val blanks: Set[String] = recode_set(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n") val sym_chars = Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") val symbolic: Set[String] = recode_set((for {(sym, _) <- symbols; if raw_symbolic(sym)} yield sym): _*) /* misc symbols */ val newline_decoded = decode(newline) val comment_decoded = decode(comment) val cancel_decoded = decode(cancel) val latex_decoded = decode(latex) val marker_decoded = decode(marker) val open_decoded = decode(open) val close_decoded = decode(close) /* control symbols */ val control_decoded: Set[Symbol] = Set((for ((sym, _) <- symbols if sym.startsWith("\\<^")) yield decode(sym)): _*) val sub_decoded = decode(sub) val sup_decoded = decode(sup) val bold_decoded = decode(bold) val emph_decoded = decode(emph) val bsub_decoded = decode(bsub) val esub_decoded = decode(esub) val bsup_decoded = decode(bsup) val esup_decoded = decode(esup) } /* tables */ def properties: Map[Symbol, Properties.T] = symbols.properties def names: Map[Symbol, (String, String)] = symbols.names def groups: List[(String, List[Symbol])] = symbols.groups def abbrevs: Multi_Map[Symbol, String] = symbols.abbrevs def codes: List[(Symbol, Int)] = symbols.codes def groups_code: List[(String, List[Symbol])] = { val has_code = codes.iterator.map(_._1).toSet groups.flatMap({ case (group, symbols) => val symbols1 = symbols.filter(has_code) if (symbols1.isEmpty) None else Some((group, symbols1)) }) } lazy val is_code: Int => Boolean = codes.map(_._2).toSet def decode(text: String): String = symbols.decode(text) def encode(text: String): String = symbols.encode(text) - def decode_yxml(text: String): XML.Body = YXML.parse_body(decode(text)) - def decode_yxml_failsafe(text: String): XML.Body = YXML.parse_body_failsafe(decode(text)) + def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = + YXML.parse_body(decode(text), cache = cache) + + def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = + YXML.parse_body_failsafe(decode(text), cache = cache) + def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body)) def decode_strict(text: String): String = { val decoded = decode(text) if (encode(decoded) == text) decoded else { val bad = new mutable.ListBuffer[Symbol] for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) bad += s error("Bad Unicode symbols in text: " + commas_quote(bad)) } } def output(unicode_symbols: Boolean, text: String): String = if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text) def fonts: Map[Symbol, String] = symbols.fonts def font_names: List[String] = symbols.font_names def font_index: Map[String, Int] = symbols.font_index def lookup_font(sym: Symbol): Option[Int] = symbols.fonts.get(sym).map(font_index(_)) /* classification */ def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) /* symbolic newline */ val newline: Symbol = "\\" def newline_decoded: Symbol = symbols.newline_decoded def print_newlines(str: String): String = if (str.contains('\n')) (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString else str /* formal comments */ val comment: Symbol = "\\" val cancel: Symbol = "\\<^cancel>" val latex: Symbol = "\\<^latex>" val marker: Symbol = "\\<^marker>" def comment_decoded: Symbol = symbols.comment_decoded def cancel_decoded: Symbol = symbols.cancel_decoded def latex_decoded: Symbol = symbols.latex_decoded def marker_decoded: Symbol = symbols.marker_decoded /* cartouches */ val open: Symbol = "\\" val close: Symbol = "\\" def open_decoded: Symbol = symbols.open_decoded def close_decoded: Symbol = symbols.close_decoded def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close def cartouche(s: String): String = open + s + close def cartouche_decoded(s: String): String = open_decoded + s + close_decoded /* symbols for symbolic identifiers */ private def raw_symbolic(sym: Symbol): Boolean = sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") def is_symbolic(sym: Symbol): Boolean = !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) /* control symbols */ val control_prefix = "\\<^" val control_suffix = ">" def control_name(sym: Symbol): Option[String] = if (is_control_encoded(sym)) Some(sym.substring(control_prefix.length, sym.length - control_suffix.length)) else None def is_control_encoded(sym: Symbol): Boolean = sym.startsWith(control_prefix) && sym.endsWith(control_suffix) def is_control(sym: Symbol): Boolean = is_control_encoded(sym) || symbols.control_decoded.contains(sym) def is_controllable(sym: Symbol): Boolean = !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym) && sym != "\"" val sub = "\\<^sub>" val sup = "\\<^sup>" val bold = "\\<^bold>" val emph = "\\<^emph>" val bsub = "\\<^bsub>" val esub = "\\<^esub>" val bsup = "\\<^bsup>" val esup = "\\<^esup>" def sub_decoded: Symbol = symbols.sub_decoded def sup_decoded: Symbol = symbols.sup_decoded def bold_decoded: Symbol = symbols.bold_decoded def emph_decoded: Symbol = symbols.emph_decoded def bsub_decoded: Symbol = symbols.bsub_decoded def esub_decoded: Symbol = symbols.esub_decoded def bsup_decoded: Symbol = symbols.bsup_decoded def esup_decoded: Symbol = symbols.esup_decoded /* metric */ def is_printable(sym: Symbol): Boolean = if (is_ascii(sym)) is_ascii_printable(sym(0)) else !is_control(sym) object Metric extends Pretty.Metric { val unit = 1.0 def apply(str: String): Double = (for (s <- iterator(str)) yield { val sym = encode(s) if (sym.startsWith("\\ Some(Markup(name, props), body1, body2) case _ => None } } object Root_Elem { def apply(body: Body): XML.Elem = XML.Elem(Markup(XML_ELEM, Nil), body) def unapply(tree: Tree): Option[Body] = tree match { case XML.Elem(Markup(XML_ELEM, Nil), body) => Some(body) case _ => None } } /* traverse text */ def traverse_text[A](body: Body)(a: A)(op: (A, String) => A): A = { def traverse(x: A, t: Tree): A = t match { case XML.Wrapped_Elem(_, _, ts) => (x /: ts)(traverse) case XML.Elem(_, ts) => (x /: ts)(traverse) case XML.Text(s) => op(x, s) } (a /: body)(traverse) } def text_length(body: Body): Int = traverse_text(body)(0) { case (n, s) => n + s.length } /* text content */ def content(body: Body): String = { val text = new StringBuilder(text_length(body)) traverse_text(body)(()) { case (_, s) => text.append(s) } text.toString } def content(tree: Tree): String = content(List(tree)) /** string representation **/ val header: String = "\n" def output_char(c: Char, s: StringBuilder) { c match { case '<' => s ++= "<" case '>' => s ++= ">" case '&' => s ++= "&" case '"' => s ++= """ case '\'' => s ++= "'" case _ => s += c } } def output_string(str: String, s: StringBuilder) { if (str == null) s ++= str else str.iterator.foreach(c => output_char(c, s)) } def string_of_body(body: Body): String = { val s = new StringBuilder def text(txt: String) { output_string(txt, s) } def elem(markup: Markup) { s ++= markup.name for ((a, b) <- markup.properties) { s += ' '; s ++= a; s += '='; s += '"'; text(b); s += '"' } } def tree(t: Tree): Unit = t match { case XML.Elem(markup, Nil) => s += '<'; elem(markup); s ++= "/>" case XML.Elem(markup, ts) => s += '<'; elem(markup); s += '>' ts.foreach(tree) s ++= "' case XML.Text(txt) => text(txt) } body.foreach(tree) s.toString } def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) /** cache **/ object Cache { def make( max_string: Int = isabelle.Cache.default_max_string, initial_size: Int = isabelle.Cache.default_initial_size): Cache = new Cache(max_string, initial_size) val none: Cache = make(max_string = 0) } class Cache private[XML](max_string: Int, initial_size: Int) extends isabelle.Cache(max_string, initial_size) { protected def cache_props(x: Properties.T): Properties.T = { if (x.isEmpty) x else lookup(x) match { case Some(y) => y case None => store(x.map(p => (Library.isolate_substring(p._1).intern, cache_string(p._2)))) } } protected def cache_markup(x: Markup): Markup = { lookup(x) match { case Some(y) => y case None => x match { case Markup(name, props) => store(Markup(cache_string(name), cache_props(props))) } } } protected def cache_tree(x: XML.Tree): XML.Tree = { lookup(x) match { case Some(y) => y case None => x match { case XML.Elem(markup, body) => store(XML.Elem(cache_markup(markup), cache_body(body))) case XML.Text(text) => store(XML.Text(cache_string(text))) } } } protected def cache_body(x: XML.Body): XML.Body = { if (x.isEmpty) x else lookup(x) match { case Some(y) => y case None => x.map(cache_tree) } } + // support hash-consing + def tree0(x: XML.Tree): XML.Tree = + if (no_cache) x else synchronized { lookup(x) getOrElse store(x) } + // main methods def props(x: Properties.T): Properties.T = if (no_cache) x else synchronized { cache_props(x) } def markup(x: Markup): Markup = if (no_cache) x else synchronized { cache_markup(x) } def tree(x: XML.Tree): XML.Tree = if (no_cache) x else synchronized { cache_tree(x) } def body(x: XML.Body): XML.Body = if (no_cache) x else synchronized { cache_body(x) } def elem(x: XML.Elem): XML.Elem = if (no_cache) x else synchronized { cache_tree(x).asInstanceOf[XML.Elem] } } /** XML as data representation language **/ abstract class Error(s: String) extends Exception(s) class XML_Atom(s: String) extends Error(s) class XML_Body(body: XML.Body) extends Error("") object Encode { type T[A] = A => XML.Body type V[A] = PartialFunction[A, (List[String], XML.Body)] type P[A] = PartialFunction[A, List[String]] /* atomic values */ def long_atom(i: Long): String = Library.signed_string_of_long(i) def int_atom(i: Int): String = Library.signed_string_of_int(i) def bool_atom(b: Boolean): String = if (b) "1" else "0" def unit_atom(u: Unit) = "" /* structural nodes */ private def node(ts: XML.Body): XML.Tree = XML.Elem(Markup(":", Nil), ts) private def vector(xs: List[String]): XML.Attributes = xs.zipWithIndex.map({ case (x, i) => (int_atom(i), x) }) private def tagged(tag: Int, data: (List[String], XML.Body)): XML.Tree = XML.Elem(Markup(int_atom(tag), vector(data._1)), data._2) /* representation of standard types */ val tree: T[XML.Tree] = (t => List(t)) val properties: T[Properties.T] = (props => List(XML.Elem(Markup(":", props), Nil))) val string: T[String] = (s => if (s.isEmpty) Nil else List(XML.Text(s))) val long: T[Long] = (x => string(long_atom(x))) val int: T[Int] = (x => string(int_atom(x))) val bool: T[Boolean] = (x => string(bool_atom(x))) val unit: T[Unit] = (x => string(unit_atom(x))) def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = (x => List(node(f(x._1)), node(g(x._2)))) def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = (x => List(node(f(x._1)), node(g(x._2)), node(h(x._3)))) def list[A](f: T[A]): T[List[A]] = (xs => xs.map((x: A) => node(f(x)))) def option[A](f: T[A]): T[Option[A]] = { case None => Nil case Some(x) => List(node(f(x))) } def variant[A](fs: List[V[A]]): T[A] = { case x => val (f, tag) = fs.iterator.zipWithIndex.find(p => p._1.isDefinedAt(x)).get List(tagged(tag, f(x))) } } object Decode { type T[A] = XML.Body => A type V[A] = (List[String], XML.Body) => A type P[A] = PartialFunction[List[String], A] /* atomic values */ def long_atom(s: String): Long = try { java.lang.Long.parseLong(s) } catch { case e: NumberFormatException => throw new XML_Atom(s) } def int_atom(s: String): Int = try { Integer.parseInt(s) } catch { case e: NumberFormatException => throw new XML_Atom(s) } def bool_atom(s: String): Boolean = if (s == "1") true else if (s == "0") false else throw new XML_Atom(s) def unit_atom(s: String): Unit = if (s == "") () else throw new XML_Atom(s) /* structural nodes */ private def node(t: XML.Tree): XML.Body = t match { case XML.Elem(Markup(":", Nil), ts) => ts case _ => throw new XML_Body(List(t)) } private def vector(atts: XML.Attributes): List[String] = atts.iterator.zipWithIndex.map( { case ((a, x), i) => if (int_atom(a) == i) x else throw new XML_Atom(a) }).toList private def tagged(t: XML.Tree): (Int, (List[String], XML.Body)) = t match { case XML.Elem(Markup(name, atts), ts) => (int_atom(name), (vector(atts), ts)) case _ => throw new XML_Body(List(t)) } /* representation of standard types */ val tree: T[XML.Tree] = { case List(t) => t case ts => throw new XML_Body(ts) } val properties: T[Properties.T] = { case List(XML.Elem(Markup(":", props), Nil)) => props case ts => throw new XML_Body(ts) } val string: T[String] = { case Nil => "" case List(XML.Text(s)) => s case ts => throw new XML_Body(ts) } val long: T[Long] = (x => long_atom(string(x))) val int: T[Int] = (x => int_atom(string(x))) val bool: T[Boolean] = (x => bool_atom(string(x))) val unit: T[Unit] = (x => unit_atom(string(x))) def pair[A, B](f: T[A], g: T[B]): T[(A, B)] = { case List(t1, t2) => (f(node(t1)), g(node(t2))) case ts => throw new XML_Body(ts) } def triple[A, B, C](f: T[A], g: T[B], h: T[C]): T[(A, B, C)] = { case List(t1, t2, t3) => (f(node(t1)), g(node(t2)), h(node(t3))) case ts => throw new XML_Body(ts) } def list[A](f: T[A]): T[List[A]] = (ts => ts.map(t => f(node(t)))) def option[A](f: T[A]): T[Option[A]] = { case Nil => None case List(t) => Some(f(node(t))) case ts => throw new XML_Body(ts) } def variant[A](fs: List[V[A]]): T[A] = { case List(t) => val (tag, (xs, ts)) = tagged(t) val f = try { fs(tag) } catch { case _: IndexOutOfBoundsException => throw new XML_Body(List(t)) } f(xs, ts) case ts => throw new XML_Body(ts) } } } diff --git a/src/Pure/PIDE/yxml.scala b/src/Pure/PIDE/yxml.scala --- a/src/Pure/PIDE/yxml.scala +++ b/src/Pure/PIDE/yxml.scala @@ -1,156 +1,156 @@ /* Title: Pure/PIDE/yxml.scala Author: Makarius Efficient text representation of XML trees. Suitable for direct inlining into plain text. */ package isabelle import scala.collection.mutable object YXML { /* chunk markers */ val X = '\u0005' val Y = '\u0006' val is_X: Char => Boolean = _ == X val is_Y: Char => Boolean = _ == Y val X_string: String = X.toString val Y_string: String = Y.toString val XY_string: String = X_string + Y_string val XYX_string: String = XY_string + X_string def detect(s: String): Boolean = s.exists(c => c == X || c == Y) def detect_elem(s: String): Boolean = s.startsWith(XY_string) /* string representation */ def traversal(string: String => Unit, body: XML.Body) { def tree(t: XML.Tree): Unit = t match { case XML.Elem(Markup(name, atts), ts) => string(XY_string) string(name) for ((a, x) <- atts) { string(Y_string); string(a); string("="); string(x) } string(X_string) ts.foreach(tree) string(XYX_string) case XML.Text(text) => string(text) } body.foreach(tree) } def string_of_body(body: XML.Body): String = { val s = new StringBuilder traversal(str => s ++= str, body) s.toString } def string_of_tree(tree: XML.Tree): String = string_of_body(List(tree)) /* parsing */ private def err(msg: String) = error("Malformed YXML: " + msg) private def err_attribute() = err("bad attribute") private def err_element() = err("bad element") private def err_unbalanced(name: String) = if (name == "") err("unbalanced element") else err("unbalanced element " + quote(name)) private def parse_attrib(source: CharSequence): (String, String) = { val s = source.toString val i = s.indexOf('=') if (i <= 0) err_attribute() (s.substring(0, i), s.substring(i + 1)) } - def parse_body(source: CharSequence): XML.Body = + def parse_body(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Body = { /* stack operations */ def buffer(): mutable.ListBuffer[XML.Tree] = new mutable.ListBuffer[XML.Tree] var stack: List[(Markup, mutable.ListBuffer[XML.Tree])] = List((Markup.Empty, buffer())) def add(x: XML.Tree) { (stack: @unchecked) match { case (_, body) :: _ => body += x } } def push(name: String, atts: XML.Attributes) { if (name == "") err_element() - else stack = (Markup(name, atts), buffer()) :: stack + else stack = (cache.markup(Markup(name, atts)), buffer()) :: stack } def pop() { (stack: @unchecked) match { case (Markup.Empty, _) :: _ => err_unbalanced("") case (markup, body) :: pending => stack = pending - add(XML.Elem(markup, body.toList)) + add(cache.tree0(XML.Elem(markup, body.toList))) } } /* parse chunks */ for (chunk <- Library.separated_chunks(is_X, source) if chunk.length != 0) { if (chunk.length == 1 && chunk.charAt(0) == Y) pop() else { Library.separated_chunks(is_Y, chunk).toList match { case ch :: name :: atts if ch.length == 0 => push(name.toString, atts.map(parse_attrib)) - case txts => for (txt <- txts) add(XML.Text(txt.toString)) + case txts => for (txt <- txts) add(cache.tree0(XML.Text(cache.string(txt.toString)))) } } } (stack: @unchecked) match { case List((Markup.Empty, body)) => body.toList case (Markup(name, _), _) :: _ => err_unbalanced(name) } } - def parse(source: CharSequence): XML.Tree = - parse_body(source) match { + def parse(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Tree = + parse_body(source, cache = cache) match { case List(result) => result case Nil => XML.no_text case _ => err("multiple XML trees") } - def parse_elem(source: CharSequence): XML.Tree = - parse_body(source) match { + def parse_elem(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Tree = + parse_body(source, cache = cache) match { case List(elem: XML.Elem) => elem case _ => err("single XML element expected") } /* failsafe parsing */ private def markup_broken(source: CharSequence) = XML.Elem(Markup.Broken, List(XML.Text(source.toString))) - def parse_body_failsafe(source: CharSequence): XML.Body = + def parse_body_failsafe(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Body = { - try { parse_body(source) } + try { parse_body(source, cache = cache) } catch { case ERROR(_) => List(markup_broken(source)) } } - def parse_failsafe(source: CharSequence): XML.Tree = + def parse_failsafe(source: CharSequence, cache: XML.Cache = XML.Cache.none): XML.Tree = { - try { parse(source) } + try { parse(source, cache = cache) } catch { case ERROR(_) => markup_broken(source) } } }

", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\