diff --git a/src/Pure/General/scan.scala b/src/Pure/General/scan.scala --- a/src/Pure/General/scan.scala +++ b/src/Pure/General/scan.scala @@ -1,483 +1,482 @@ /* Title: Pure/General/scan.scala Author: Makarius Efficient scanning of keywords and tokens. */ package isabelle import scala.annotation.tailrec import scala.collection.IndexedSeq import scala.util.matching.Regex import scala.util.parsing.input.{OffsetPosition, Position => InputPosition, Reader, CharSequenceReader, PagedSeq} import scala.util.parsing.combinator.RegexParsers import java.io.{File => JFile, BufferedInputStream, FileInputStream, InputStream} import java.net.URL object Scan { /** context of partial line-oriented scans **/ abstract class Line_Context case object Finished extends Line_Context case class Quoted(quote: String) extends Line_Context case class Cartouche(depth: Int) extends Line_Context case class Comment_Prefix(symbol: Symbol.Symbol) extends Line_Context case class Cartouche_Comment(depth: Int) extends Line_Context case class Comment(depth: Int) extends Line_Context /** parser combinators **/ object Parsers extends Parsers trait Parsers extends RegexParsers { override val whiteSpace: Regex = "".r /* optional termination */ def opt_term[T](p: => Parser[T]): Parser[Option[T]] = p ^^ (x => Some(x)) | """\z""".r ^^ (_ => None) /* repeated symbols */ def repeated(pred: Symbol.Symbol => Boolean, min_count: Int, max_count: Int): Parser[String] = new Parser[String] { def apply(in: Input) = { val start = in.offset val end = in.source.length val matcher = new Symbol.Matcher(in.source) var i = start var count = 0 var finished = false while (!finished && i < end && count < max_count) { - val n = matcher(i, end) - val sym = in.source.subSequence(i, i + n).toString - if (pred(sym)) { i += n; count += 1 } + val sym = matcher.match_symbol(i) + if (pred(sym)) { i += sym.length; count += 1 } else finished = true } if (count < min_count) Failure("bad input", in) else Success(in.source.subSequence(start, i).toString, in.drop(i - start)) } }.named("repeated") def one(pred: Symbol.Symbol => Boolean): Parser[String] = repeated(pred, 1, 1) def maybe(pred: Symbol.Symbol => Boolean): Parser[String] = repeated(pred, 0, 1) def many(pred: Symbol.Symbol => Boolean): Parser[String] = repeated(pred, 0, Integer.MAX_VALUE) def many1(pred: Symbol.Symbol => Boolean): Parser[String] = repeated(pred, 1, Integer.MAX_VALUE) /* character */ def character(pred: Char => Boolean): Symbol.Symbol => Boolean = (s: Symbol. Symbol) => s.length == 1 && pred(s.charAt(0)) /* quoted strings */ private def quoted_body(quote: Symbol.Symbol): Parser[String] = { rep(many1(sym => sym != quote && sym != "\\") | "\\" + quote | "\\\\" | ("""\\\d\d\d""".r ^? { case x if x.substring(1, 4).toInt <= 255 => x })) ^^ (_.mkString) } def quoted(quote: Symbol.Symbol): Parser[String] = { quote ~ quoted_body(quote) ~ quote ^^ { case x ~ y ~ z => x + y + z } }.named("quoted") def quoted_content(quote: Symbol.Symbol, source: String): String = { require(parseAll(quoted(quote), source).successful, "no quoted text") val body = source.substring(1, source.length - 1) if (body.exists(_ == '\\')) { val content = rep(many1(sym => sym != quote && sym != "\\") | "\\" ~> (quote | "\\" | """\d\d\d""".r ^^ { case x => x.toInt.toChar.toString })) parseAll(content ^^ (_.mkString), body).get } else body } def quoted_line(quote: Symbol.Symbol, ctxt: Line_Context): Parser[(String, Line_Context)] = { ctxt match { case Finished => quote ~ quoted_body(quote) ~ opt_term(quote) ^^ { case x ~ y ~ Some(z) => (x + y + z, Finished) case x ~ y ~ None => (x + y, Quoted(quote)) } case Quoted(q) if q == quote => quoted_body(quote) ~ opt_term(quote) ^^ { case x ~ Some(y) => (x + y, Finished) case x ~ None => (x, ctxt) } case _ => failure("") } }.named("quoted_line") def recover_quoted(quote: Symbol.Symbol): Parser[String] = quote ~ quoted_body(quote) ^^ { case x ~ y => x + y } /* nested text cartouches */ def cartouche_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)] { require(depth >= 0, "bad cartouche depth") def apply(in: Input) = { val start = in.offset val end = in.source.length val matcher = new Symbol.Matcher(in.source) var i = start var d = depth var finished = false while (!finished && i < end) { - val n = matcher(i, end) - val sym = in.source.subSequence(i, i + n).toString + val sym = matcher.match_symbol(i) + val n = sym.length if (Symbol.is_open(sym)) { i += n; d += 1 } else if (Symbol.is_close(sym) && d > 0) { i += n; d -= 1; if (d == 0) finished = true } else if (d > 0) i += n else finished = true } if (i == start) Failure("bad input", in) else Success((in.source.subSequence(start, i).toString, d), in.drop(i - start)) } }.named("cartouche_depth") def cartouche: Parser[String] = cartouche_depth(0) ^? { case (x, d) if d == 0 => x } def cartouche_line(ctxt: Line_Context): Parser[(String, Line_Context)] = { def cartouche_context(d: Int): Line_Context = if (d == 0) Finished else Cartouche(d) ctxt match { case Finished => cartouche_depth(0) ^^ { case (c, d) => (c, cartouche_context(d)) } case Cartouche(depth) => cartouche_depth(depth) ^^ { case (c, d) => (c, cartouche_context(d)) } case _ => failure("") } } val recover_cartouche: Parser[String] = cartouche_depth(0) ^^ (_._1) def cartouche_content(source: String): String = { def err(): Nothing = error("Malformed text cartouche: " + quote(source)) val source1 = Library.try_unprefix(Symbol.open_decoded, source) orElse Library.try_unprefix(Symbol.open, source) getOrElse err() Library.try_unsuffix(Symbol.close_decoded, source1) orElse Library.try_unsuffix(Symbol.close, source1) getOrElse err() } /* nested comments */ private def comment_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)] { require(depth >= 0, "bad comment depth") val comment_text: Parser[List[String]] = rep1(many1(sym => sym != "*" && sym != "(") | """\*(?!\))|\((?!\*)""".r) def apply(in: Input) = { var rest = in def try_parse[A](p: Parser[A]): Boolean = { parse(p ^^^ (()), rest) match { case Success(_, next) => { rest = next; true } case _ => false } } var d = depth var finished = false while (!finished) { if (try_parse("(*")) d += 1 else if (d > 0 && try_parse("*)")) { d -= 1; if (d == 0) finished = true } else if (d == 0 || !try_parse(comment_text)) finished = true } if (in.offset < rest.offset) Success((in.source.subSequence(in.offset, rest.offset).toString, d), rest) else Failure("comment expected", in) } }.named("comment_depth") def comment: Parser[String] = comment_depth(0) ^? { case (x, d) if d == 0 => x } def comment_line(ctxt: Line_Context): Parser[(String, Line_Context)] = { val depth = ctxt match { case Finished => 0 case Comment(d) => d case _ => -1 } if (depth >= 0) comment_depth(depth) ^^ { case (x, 0) => (x, Finished) case (x, d) => (x, Comment(d)) } else failure("") } val recover_comment: Parser[String] = comment_depth(0) ^^ (_._1) def comment_content(source: String): String = { require(parseAll(comment, source).successful, "no comment") source.substring(2, source.length - 2) } /* control cartouches */ val control_symbol: Parser[String] = one(Symbol.is_control) val control_cartouche: Parser[String] = control_symbol ~ cartouche ^^ { case a ~ b => a + b } /* keyword */ def literal(lexicon: Lexicon): Parser[String] = new Parser[String] { def apply(in: Input) = { val result = lexicon.scan(in) if (result.isEmpty) Failure("keyword expected", in) else Success(result, in.drop(result.length)) } }.named("keyword") } /** Lexicon -- position tree **/ object Lexicon { /* representation */ private sealed case class Tree(branches: Map[Char, (String, Tree)]) private val empty_tree = Tree(Map()) val empty: Lexicon = new Lexicon(empty_tree) def apply(elems: String*): Lexicon = empty ++ elems } final class Lexicon private(rep: Lexicon.Tree) { /* auxiliary operations */ private def dest(tree: Lexicon.Tree, result: List[String]): List[String] = tree.branches.toList.foldLeft(result) { case (res, (_, (s, tr))) => if (s.isEmpty) dest(tr, res) else dest(tr, s :: res) } private def lookup(str: CharSequence): Option[(Boolean, Lexicon.Tree)] = { val len = str.length @tailrec def look(tree: Lexicon.Tree, tip: Boolean, i: Int): Option[(Boolean, Lexicon.Tree)] = { if (i < len) { tree.branches.get(str.charAt(i)) match { case Some((s, tr)) => look(tr, s.nonEmpty, i + 1) case None => None } } else Some(tip, tree) } look(rep, false, 0) } def completions(str: CharSequence): List[String] = lookup(str) match { case Some((true, tree)) => dest(tree, List(str.toString)) case Some((false, tree)) => dest(tree, Nil) case None => Nil } /* pseudo Set methods */ def raw_iterator: Iterator[String] = dest(rep, Nil).iterator def iterator: Iterator[String] = dest(rep, Nil).sorted.iterator override def toString: String = iterator.mkString("Lexicon(", ", ", ")") def is_empty: Boolean = rep.branches.isEmpty def contains(elem: String): Boolean = lookup(elem) match { case Some((tip, _)) => tip case _ => false } /* build lexicon */ def + (elem: String): Lexicon = if (contains(elem)) this else { val len = elem.length def extend(tree: Lexicon.Tree, i: Int): Lexicon.Tree = if (i < len) { val c = elem.charAt(i) val end = (i + 1 == len) tree.branches.get(c) match { case Some((s, tr)) => Lexicon.Tree(tree.branches + (c -> (if (end) elem else s, extend(tr, i + 1)))) case None => Lexicon.Tree(tree.branches + (c -> (if (end) elem else "", extend(Lexicon.empty_tree, i + 1)))) } } else tree new Lexicon(extend(rep, 0)) } def ++ (elems: IterableOnce[String]): Lexicon = elems.iterator.foldLeft(this)(_ + _) def ++ (other: Lexicon): Lexicon = if (this eq other) this else if (is_empty) other else this ++ other.raw_iterator def -- (remove: Iterable[String]): Lexicon = if (remove.exists(contains)) Lexicon.empty ++ iterator.filterNot(a => remove.exists(b => a == b)) else this /* scan */ def scan(in: Reader[Char]): String = { val source = in.source val offset = in.offset val len = source.length - offset @tailrec def scan_tree(tree: Lexicon.Tree, result: String, i: Int): String = { if (i < len) { tree.branches.get(source.charAt(offset + i)) match { case Some((s, tr)) => scan_tree(tr, if (s.isEmpty) result else s, i + 1) case None => result } } else result } scan_tree(rep, "", 0) } } /** read stream without decoding: efficient length operation **/ private class Restricted_Seq(seq: IndexedSeq[Char], start: Int, end: Int) extends CharSequence { def charAt(i: Int): Char = if (0 <= i && i < length) seq(start + i) else throw new IndexOutOfBoundsException def length: Int = end - start // avoid expensive seq.length def subSequence(i: Int, j: Int): CharSequence = if (0 <= i && i <= j && j <= length) new Restricted_Seq(seq, start + i, start + j) else throw new IndexOutOfBoundsException override def toString: String = { val buf = new StringBuilder(length) for (offset <- start until end) buf.append(seq(offset)) buf.toString } } abstract class Byte_Reader extends Reader[Char] with AutoCloseable private def make_byte_reader(stream: InputStream, stream_length: Int): Byte_Reader = { val buffered_stream = new BufferedInputStream(stream) val seq = new PagedSeq( (buf: Array[Char], offset: Int, length: Int) => { var i = 0 var c = 0 var eof = false while (!eof && i < length) { c = buffered_stream.read if (c == -1) eof = true else { buf(offset + i) = c.toChar; i += 1 } } if (i > 0) i else -1 }) val restricted_seq = new Restricted_Seq(seq, 0, stream_length) class Paged_Reader(override val offset: Int) extends Byte_Reader { override lazy val source: CharSequence = restricted_seq def first: Char = if (seq.isDefinedAt(offset)) seq(offset) else '\u001a' def rest: Paged_Reader = if (seq.isDefinedAt(offset)) new Paged_Reader(offset + 1) else this def pos: InputPosition = new OffsetPosition(source, offset) def atEnd: Boolean = !seq.isDefinedAt(offset) override def drop(n: Int): Paged_Reader = new Paged_Reader(offset + n) def close(): Unit = buffered_stream.close() } new Paged_Reader(0) } def byte_reader(file: JFile): Byte_Reader = make_byte_reader(new FileInputStream(file), file.length.toInt) def byte_reader(url: URL): Byte_Reader = { val connection = url.openConnection val stream = connection.getInputStream val stream_length = connection.getContentLength make_byte_reader(stream, stream_length) } def reader_is_utf8(reader: Reader[Char]): Boolean = reader.isInstanceOf[Byte_Reader] def reader_decode_utf8(is_utf8: Boolean, s: String): String = if (is_utf8) UTF8.decode_permissive(s) else s def reader_decode_utf8(reader: Reader[Char], s: String): String = reader_decode_utf8(reader_is_utf8(reader), s) /* plain text reader */ def char_reader(text: CharSequence): CharSequenceReader = new CharSequenceReader(text) } diff --git a/src/Pure/General/symbol.scala b/src/Pure/General/symbol.scala --- a/src/Pure/General/symbol.scala +++ b/src/Pure/General/symbol.scala @@ -1,692 +1,696 @@ /* Title: Pure/General/symbol.scala Author: Makarius Isabelle text symbols. */ package isabelle import scala.collection.mutable import scala.util.matching.Regex import scala.annotation.tailrec object Symbol { type Symbol = String // counting Isabelle symbols, starting from 1 type Offset = Text.Offset type Range = Text.Range /* spaces */ val space_char = ' ' val space = " " private val static_spaces = space * 4000 def spaces(n: Int): String = { require(n >= 0, "negative spaces") if (n < static_spaces.length) static_spaces.substring(0, n) else space * n } /* ASCII characters */ def is_ascii_printable(c: Char): Boolean = space_char <= c && c <= '~' def is_ascii_letter(c: Char): Boolean = 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z' def is_ascii_digit(c: Char): Boolean = '0' <= c && c <= '9' def is_ascii_hex(c: Char): Boolean = '0' <= c && c <= '9' || 'A' <= c && c <= 'F' || 'a' <= c && c <= 'f' def is_ascii_quasi(c: Char): Boolean = c == '_' || c == '\'' def is_ascii_blank(c: Char): Boolean = " \t\n\u000b\f\r".contains(c) def is_ascii_line_terminator(c: Char): Boolean = "\r\n".contains(c) def is_ascii_letdig(c: Char): Boolean = is_ascii_letter(c) || is_ascii_digit(c) || is_ascii_quasi(c) def is_ascii_identifier(s: String): Boolean = s.nonEmpty && is_ascii_letter(s(0)) && s.forall(is_ascii_letdig) def ascii(c: Char): Symbol = { if (c > 127) error("Non-ASCII character: " + quote(c.toString)) else char_symbols(c.toInt) } def is_ascii(s: Symbol): Boolean = s.length == 1 && s(0) < 128 /* symbol matching */ - private val symbol_total = - new Regex("(?xs) [\ud800-\udbff][\udc00-\udfff] " + - """ | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") - - private def is_plain(c: Char): Boolean = - !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) - def is_malformed(s: Symbol): Boolean = s.length match { case 1 => val c = s(0) Character.isHighSurrogate(c) || Character.isLowSurrogate(c) || c == '\ufffd' case 2 => val c1 = s(0) val c2 = s(1) !(c1 == '\r' && c2 == '\n' || Character.isSurrogatePair(c1, c2)) case _ => !s.endsWith(">") || s == "\\<>" || s == "\\<^>" } def is_newline(s: Symbol): Boolean = s == "\n" || s == "\r" || s == "\r\n" class Matcher(text: CharSequence) { - private val matcher = symbol_total.pattern.matcher(text) - def apply(start: Int, end: Int): Int = + private def ok(i: Int): Boolean = 0 <= i && i < text.length + private def char(i: Int): Char = if (ok(i)) text.charAt(i) else 0 + private def maybe_char(c: Char, i: Int): Int = if (char(i) == c) i + 1 else i + + @tailrec private def many_ascii_letdig(i: Int): Int = + if (is_ascii_letdig(char(i))) many_ascii_letdig(i + 1) else i + private def maybe_ascii_id(i: Int): Int = + if (is_ascii_letter(char(i))) many_ascii_letdig(i + 1) else i + + def match_length(i: Int): Int = { - require(0 <= start && start < end && end <= text.length, "bad matcher range") - if (is_plain(text.charAt(start))) 1 - else { - matcher.region(start, end).lookingAt - matcher.group.length + val a = char(i) + val b = char(i + 1) + + if (Character.isHighSurrogate(a) && Character.isLowSurrogate(b) || a == '\r' && b == '\n') 2 + else if (a == '\\' && b == '<') maybe_char('>', maybe_ascii_id(maybe_char('^', i + 2))) - i + else if (ok(i)) 1 + else 0 + } + + def match_symbol(i: Int): String = + match_length(i) match { + case 0 => "" + case 1 => + val c = text.charAt(i) + if (c < char_symbols.length) char_symbols(c) + else c.toString + case n => + text.subSequence(i, i + n).toString } - } } /* iterator */ private val char_symbols: Array[Symbol] = (0 until 256).iterator.map(i => new String(Array(i.toChar))).toArray def iterator(text: CharSequence): Iterator[Symbol] = new Iterator[Symbol] { private val matcher = new Matcher(text) private var i = 0 def hasNext: Boolean = i < text.length def next(): Symbol = { - val n = matcher(i, text.length) - val s = - if (n == 0) "" - else if (n == 1) { - val c = text.charAt(i) - if (c < char_symbols.length) char_symbols(c) - else text.subSequence(i, i + n).toString - } - else text.subSequence(i, i + n).toString - i += n + val s = matcher.match_symbol(i) + i += s.length s } } def explode(text: CharSequence): List[Symbol] = iterator(text).toList def length(text: CharSequence): Int = iterator(text).length def trim_blanks(text: CharSequence): String = Library.trim(is_blank, explode(text)).mkString def all_blank(str: String): Boolean = iterator(str).forall(is_blank) def trim_blank_lines(text: String): String = cat_lines(split_lines(text).dropWhile(all_blank).reverse.dropWhile(all_blank).reverse) /* decoding offsets */ object Index { private sealed case class Entry(chr: Int, sym: Int) val empty: Index = new Index(Nil) def apply(text: CharSequence): Index = { val matcher = new Matcher(text) val buf = new mutable.ListBuffer[Entry] var chr = 0 var sym = 0 while (chr < text.length) { - val n = matcher(chr, text.length) + val n = matcher.match_length(chr) chr += n sym += 1 if (n > 1) buf += Entry(chr, sym) } if (buf.isEmpty) empty else new Index(buf.toList) } } final class Index private(entries: List[Index.Entry]) { private val hash: Int = entries.hashCode private val index: Array[Index.Entry] = entries.toArray def decode(symbol_offset: Offset): Text.Offset = { val sym = symbol_offset - 1 val end = index.length @tailrec def bisect(a: Int, b: Int): Int = { if (a < b) { val c = (a + b) / 2 if (sym < index(c).sym) bisect(a, c) else if (c + 1 == end || sym < index(c + 1).sym) c else bisect(c + 1, b) } else -1 } val i = bisect(0, end) if (i < 0) sym else index(i).chr + sym - index(i).sym } def decode(symbol_range: Range): Text.Range = symbol_range.map(decode) override def hashCode: Int = hash override def equals(that: Any): Boolean = that match { case other: Index => index.sameElements(other.index) case _ => false } } /* symbolic text chunks -- without actual text */ object Text_Chunk { sealed abstract class Name case object Default extends Name case class Id(id: Document_ID.Generic) extends Name case class File(name: String) extends Name def apply(text: CharSequence): Text_Chunk = new Text_Chunk(Text.Range(0, text.length), Index(text)) } final class Text_Chunk private(val range: Text.Range, private val index: Index) { override def hashCode: Int = (range, index).hashCode override def equals(that: Any): Boolean = that match { case other: Text_Chunk => range == other.range && index == other.index case _ => false } override def toString: String = "Text_Chunk" + range.toString def decode(symbol_offset: Offset): Text.Offset = index.decode(symbol_offset) def decode(symbol_range: Range): Text.Range = index.decode(symbol_range) def incorporate(symbol_range: Range): Option[Text.Range] = { def in(r: Range): Option[Text.Range] = range.try_restrict(decode(r)) match { case Some(r1) if !r1.is_singularity => Some(r1) case _ => None } in(symbol_range) orElse in(symbol_range - 1) } } /* recoding text */ private class Recoder(list: List[(String, String)]) { private val (min, max) = { var min = '\uffff' var max = '\u0000' for ((x, _) <- list) { val c = x(0) if (c < min) min = c if (c > max) max = c } (min, max) } private val table = { var tab = Map[String, String]() for ((x, y) <- list) { tab.get(x) match { case None => tab += (x -> y) case Some(z) => error("Duplicate symbol mapping of " + quote(x) + " to " + quote(y) + " vs. " + quote(z)) } } tab } def recode(text: String): String = { val len = text.length - val matcher = symbol_total.pattern.matcher(text) + val matcher = new Symbol.Matcher(text) val result = new StringBuilder(len) var i = 0 while (i < len) { val c = text(i) if (min <= c && c <= max) { - matcher.region(i, len).lookingAt - val x = matcher.group - result.append(table.getOrElse(x, x)) - i = matcher.end + val s = matcher.match_symbol(i) + result.append(table.getOrElse(s, s)) + i += s.length } else { result.append(c); i += 1 } } result.toString } } /** defined symbols **/ object Argument extends Enumeration { val none, cartouche, space_cartouche = Value def unapply(s: String): Option[Value] = try { Some(withName(s)) } catch { case _: NoSuchElementException => None} } object Entry { private val Name = new Regex("""\\<\^?([A-Za-z][A-Za-z0-9_']*)>""") private val Argument = new Properties.String("argument") private val Abbrev = new Properties.String("abbrev") private val Code = new Properties.String("code") private val Font = new Properties.String("font") private val Group = new Properties.String("group") def apply(symbol: Symbol, props: Properties.T): Entry = { def err(msg: String): Nothing = error(msg + " for symbol " + quote(symbol)) val name = symbol match { case Name(a) => a case _ => err("Cannot determine name") } val argument = props match { case Argument(arg) => Symbol.Argument.unapply(arg) getOrElse error("Bad argument: " + quote(arg) + " for symbol " + quote(symbol)) case _ => Symbol.Argument.none } val code = props match { case Code(s) => try { val code = Integer.decode(s).intValue if (code >= 128) Some(code) else err("Illegal ASCII code") } catch { case _: NumberFormatException => err("Bad code") } case _ => None } val groups = proper_list(for ((Group.name, a) <- props) yield a).getOrElse(List("unsorted")) val abbrevs = for ((Abbrev.name, a) <- props) yield a new Entry(symbol, name, argument, code, Font.unapply(props), groups, abbrevs) } } class Entry private( val symbol: Symbol, val name: String, val argument: Symbol.Argument.Value, val code: Option[Int], val font: Option[String], val groups: List[String], val abbrevs: List[String]) { override def toString: String = symbol val decode: Option[String] = code.map(c => new String(Character.toChars(c))) } lazy val symbols: Symbols = Symbols.load() object Symbols { def load(): Symbols = { val contents = for (path <- Path.split(Isabelle_System.getenv("ISABELLE_SYMBOLS")) if path.is_file) yield File.read(path) make(cat_lines(contents)) } def make(symbols_spec: String): Symbols = { val No_Decl = new Regex("""(?xs) ^\s* (?: \#.* )? $ """) val Key = new Regex("""(?xs) (.+): """) def read_decl(decl: String): (Symbol, Properties.T) = { def err() = error("Bad symbol declaration: " + decl) def read_props(props: List[String]): Properties.T = { props match { case Nil => Nil case _ :: Nil => err() case Key(x) :: y :: rest => (x -> y.replace('\u2423', ' ')) :: read_props(rest) case _ => err() } } decl.split("\\s+").toList match { case sym :: props if sym.length > 1 && !is_malformed(sym) => (sym, read_props(props)) case _ => err() } } new Symbols( split_lines(symbols_spec).reverse. foldLeft((List.empty[Entry], Set.empty[Symbol])) { case (res, No_Decl()) => res case ((list, known), decl) => val (sym, props) = read_decl(decl) if (known(sym)) (list, known) else (Entry(sym, props) :: list, known + sym) }._1) } } class Symbols(val entries: List[Entry]) { override def toString: String = entries.mkString("Symbols(", ", ", ")") /* basic properties */ private val entries_map: Map[Symbol, Entry] = (for (entry <- entries.iterator) yield entry.symbol -> entry).toMap def get(sym: Symbol): Option[Entry] = entries_map.get(sym) def defined(sym: Symbol): Boolean = entries_map.isDefinedAt(sym) def defined_code(sym: Symbol): Boolean = get(sym) match { case Some(entry) => entry.code.isDefined case _ => false } val code_defined: Int => Boolean = (for (entry <- entries.iterator; code <- entry.code) yield code).toSet val groups_code: List[(String, List[Symbol])] = (for { entry <- entries.iterator if entry.code.isDefined group <- entry.groups.iterator } yield entry.symbol -> group) .toList.groupBy(_._2).toList.map({ case (g, list) => (g, list.map(_._1)) }).sortBy(_._1) def get_abbrevs(sym: Symbol): List[String] = get(sym) match { case Some(entry) => entry.abbrevs case _ => Nil } /* recoding */ private val (decoder, encoder) = { val mapping = for (entry <- entries; s <- entry.decode) yield entry.symbol -> s (new Recoder(mapping), new Recoder(for ((x, y) <- mapping) yield (y, x))) } def decode(text: String): String = decoder.recode(text) def encode(text: String): String = encoder.recode(text) private def recode_set(elems: Iterable[String]): Set[String] = (elems.iterator ++ elems.iterator.map(decode)).toSet private def recode_map[A](elems: Iterable[(String, A)]): Map[String, A] = (elems.iterator ++ elems.iterator.map({ case (sym, a) => (decode(sym), a) })).toMap /* user fonts */ val fonts: Map[Symbol, String] = recode_map(for (entry <- entries; font <- entry.font) yield entry.symbol -> font) val font_names: List[String] = fonts.iterator.map(_._2).toSet.toList val font_index: Map[String, Int] = (font_names zip font_names.indices.toList).toMap def lookup_font(sym: Symbol): Option[Int] = fonts.get(sym).map(font_index(_)) /* classification */ val letters: Set[String] = recode_set(List( "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\

", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\

", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\
", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\")) val blanks: Set[String] = recode_set(List(space, "\t", "\n", "\u000B", "\f", "\r", "\r\n")) val sym_chars = Set("!", "#", "$", "%", "&", "*", "+", "-", "/", "<", "=", ">", "?", "@", "^", "_", "|", "~") val symbolic: Set[String] = recode_set(for (entry <- entries if raw_symbolic(entry.symbol)) yield entry.symbol) /* misc symbols */ val newline_decoded: Symbol = decode(newline) val comment_decoded: Symbol = decode(comment) val cancel_decoded: Symbol = decode(cancel) val latex_decoded: Symbol = decode(latex) val marker_decoded: Symbol = decode(marker) val open_decoded: Symbol = decode(open) val close_decoded: Symbol = decode(close) /* control symbols */ val control_decoded: Set[Symbol] = (for (entry <- entries.iterator if entry.symbol.startsWith("\\<^"); s <- entry.decode) yield s).toSet val sub_decoded: Symbol = decode(sub) val sup_decoded: Symbol = decode(sup) val bold_decoded: Symbol = decode(bold) val emph_decoded: Symbol = decode(emph) val bsub_decoded: Symbol = decode(bsub) val esub_decoded: Symbol = decode(esub) val bsup_decoded: Symbol = decode(bsup) val esup_decoded: Symbol = decode(esup) } /* tables */ def decode(text: String): String = symbols.decode(text) def encode(text: String): String = symbols.encode(text) def decode_yxml(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = YXML.parse_body(decode(text), cache = cache) def decode_yxml_failsafe(text: String, cache: XML.Cache = XML.Cache.none): XML.Body = YXML.parse_body_failsafe(decode(text), cache = cache) def encode_yxml(body: XML.Body): String = encode(YXML.string_of_body(body)) def decode_strict(text: String): String = { val decoded = decode(text) if (encode(decoded) == text) decoded else { val bad = new mutable.ListBuffer[Symbol] for (s <- iterator(text) if encode(decode(s)) != s && !bad.contains(s)) bad += s error("Bad Unicode symbols in text: " + commas_quote(bad)) } } def output(unicode_symbols: Boolean, text: String): String = if (unicode_symbols) Symbol.decode(text) else Symbol.encode(text) /* classification */ def is_letter(sym: Symbol): Boolean = symbols.letters.contains(sym) def is_digit(sym: Symbol): Boolean = sym.length == 1 && '0' <= sym(0) && sym(0) <= '9' def is_quasi(sym: Symbol): Boolean = sym == "_" || sym == "'" def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym) def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym) /* symbolic newline */ val newline: Symbol = "\\" def newline_decoded: Symbol = symbols.newline_decoded def print_newlines(str: String): String = if (str.contains('\n')) (for (s <- iterator(str)) yield { if (s == "\n") newline_decoded else s }).mkString else str /* formal comments */ val comment: Symbol = "\\" val cancel: Symbol = "\\<^cancel>" val latex: Symbol = "\\<^latex>" val marker: Symbol = "\\<^marker>" def comment_decoded: Symbol = symbols.comment_decoded def cancel_decoded: Symbol = symbols.cancel_decoded def latex_decoded: Symbol = symbols.latex_decoded def marker_decoded: Symbol = symbols.marker_decoded /* cartouches */ val open: Symbol = "\\" val close: Symbol = "\\" def open_decoded: Symbol = symbols.open_decoded def close_decoded: Symbol = symbols.close_decoded def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close def cartouche(s: String): String = open + s + close def cartouche_decoded(s: String): String = open_decoded + s + close_decoded /* symbols for symbolic identifiers */ private def raw_symbolic(sym: Symbol): Boolean = sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^") def is_symbolic(sym: Symbol): Boolean = !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym)) def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym) /* control symbols */ val control_prefix = "\\<^" val control_suffix = ">" def control_name(sym: Symbol): Option[String] = if (is_control_encoded(sym)) Some(sym.substring(control_prefix.length, sym.length - control_suffix.length)) else None def is_control_encoded(sym: Symbol): Boolean = sym.startsWith(control_prefix) && sym.endsWith(control_suffix) def is_control(sym: Symbol): Boolean = is_control_encoded(sym) || symbols.control_decoded.contains(sym) def is_controllable(sym: Symbol): Boolean = !is_blank(sym) && !is_control(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym) && sym != "\"" val sub: Symbol = "\\<^sub>" val sup: Symbol = "\\<^sup>" val bold: Symbol = "\\<^bold>" val emph: Symbol = "\\<^emph>" val bsub: Symbol = "\\<^bsub>" val esub: Symbol = "\\<^esub>" val bsup: Symbol = "\\<^bsup>" val esup: Symbol = "\\<^esup>" def sub_decoded: Symbol = symbols.sub_decoded def sup_decoded: Symbol = symbols.sup_decoded def bold_decoded: Symbol = symbols.bold_decoded def emph_decoded: Symbol = symbols.emph_decoded def bsub_decoded: Symbol = symbols.bsub_decoded def esub_decoded: Symbol = symbols.esub_decoded def bsup_decoded: Symbol = symbols.bsup_decoded def esup_decoded: Symbol = symbols.esup_decoded /* metric */ def is_printable(sym: Symbol): Boolean = if (is_ascii(sym)) is_ascii_printable(sym(0)) else !is_control(sym) object Metric extends Pretty.Metric { val unit = 1.0 def apply(str: String): Double = (for (s <- iterator(str)) yield { val sym = encode(s) if (sym.startsWith("\\

", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\", "\\