diff --git a/src/Pure/Tools/prismjs.scala b/src/Pure/Tools/prismjs.scala --- a/src/Pure/Tools/prismjs.scala +++ b/src/Pure/Tools/prismjs.scala @@ -1,110 +1,110 @@ /* Title: Pure/Tools/prismjs.scala Author: Makarius Support for the Prism.js syntax highlighter -- via external Node.js process. */ package isabelle import scala.collection.mutable object Prismjs { /* component resources */ val HOME: Path = Path.explode("$ISABELLE_PRISMJS_HOME") - lazy val available_languages: List[String] = { + sealed case class Language(names: List[String]) { + require(names.nonEmpty) + + def name: String = names.head + def alias: List[String] = names.tail + override def toString: String = name + } + + lazy val languages: List[Language] = { val components_path = HOME + Path.explode("components.json") val components_json = JSON.parse(File.read(components_path)) JSON.value(components_json, "languages") match { case Some(JSON.Object(langs)) => - (for ((lang, JSON.Object(info)) <- langs.iterator if lang != "meta") yield { + (for ((name, JSON.Object(info)) <- langs.iterator if name != "meta") yield { val alias = JSON.Value.List.unapply(info.getOrElse("alias", Nil), JSON.Value.String.unapply) .getOrElse(Nil) - lang :: alias - }).toList.flatten.sorted + Language(name :: alias) + }).toList case _ => error("Failed to determine languages from " + components_path) } } + def check_language(name: String): Unit = + if (!languages.exists(_.names.contains(name))) error("Unknown language " + quote(name)) - /* JavaScript prelude */ + + /* generate JavaScript */ def prelude(lang: JS.Source): JS.Source = cat_lines(List( Nodejs.require_fs, Nodejs.require_path("const prismjs", HOME), Nodejs.require_path("prismjs.load", HOME + Path.explode("components"), dir = true), JS.function("prismjs.load", lang), """ -function prismjs_content(t) { - if (t instanceof prismjs.Token) { return prismjs_content(t.content) } - else if (Array.isArray(t)) { return t.map(prismjs_content).join("") } +function prismjs_encode(t) { + if (t instanceof prismjs.Token) { + var types = [t.type] + if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } } + else if (t.alias !== undefined) { types.push(t.alias) } + return {"types": types, "content": prismjs_encode(t.content) } + } + else if (Array.isArray(t)) { return t.map(prismjs_encode) } else { return t } } function prismjs_tokenize(text, lang) { - return prismjs.tokenize(text, prismjs.languages[lang]).map(function (t) { - var types = [] - var content = "" - if (t instanceof prismjs.Token) { - types.push(t.type) - if (Array.isArray(t.alias)) { for (a of t.alias) { types.push(a) } } - else if (t.alias !== undefined) { types.push(t.alias) } - content = prismjs_content(t) - } - else { content = t } - return {"types": types, "content": content} - }) + return prismjs.tokenize(text, prismjs.languages[lang]).map(prismjs_encode) } """)) + def main(lang: JS.Source, input: Path, output: Path): JS.Source = + Nodejs.write_file(output, + JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang))) + /* tokenize */ - sealed case class Token( - types: List[String], - content: String, - range: Text.Range) + sealed case class Token(types: List[String], content: Content) { + def string: String = content.string + } + sealed abstract class Content { def string: String } + case class Atom(string: String) extends Content + case class Nested(tokens: List[Token]) extends Content { + def string: String = tokens.map(_.content.string).mkString + } + + def decode(json: JSON.T): Option[Token] = + JSON.Value.String.unapply(json).map(string => Token(Nil, Atom(string))) orElse + (for { + types <- JSON.strings(json, "types") + content_json <- JSON.value(json, "content") + content <- + JSON.Value.String.unapply(content_json).map(Atom.apply) orElse + JSON.Value.List.unapply(content_json, decode).map(Nested.apply) + } yield Token(types, content)) def tokenize(text: String, language: String): List[Token] = { - if (!available_languages.contains(language)) { - error("Unknown language " + quote(language)) - } + check_language(language) val json = Isabelle_System.with_tmp_file("input", ext = "json") { input => Isabelle_System.with_tmp_file("output", ext = "json") { output => File.write(input, text) val lang = JS.value(language) - Nodejs.execute( - prelude(lang) + "\n" + - Nodejs.write_file(output, - JS.json_print(JS.function("prismjs_tokenize", Nodejs.read_file(input), lang)))) + Nodejs.execute(prelude(lang) + "\n" + main(lang, input, output)) JSON.parse(File.read(output)) } } - def parse_token(t: JSON.T): Token = - (for { - types <- JSON.strings(t, "types") - content <- JSON.string(t, "content") - } yield Token(types, content, Text.Range.zero)) - .getOrElse(error("Malformed JSON token: " + t)) - - val tokens0 = - JSON.Value.List.unapply(json, t => Some(parse_token(t))) - .getOrElse(error("Malformed JSON: array expected")) - - var stop = 0 - val tokens = new mutable.ListBuffer[Token] - for (tok <- tokens0) { - val start = stop - stop += tok.content.length - tokens += tok.copy(range = Text.Range(start, stop)) - } - tokens.toList + JSON.Value.List.unapply(json, decode).getOrElse(error("Malformed JSON: " + json)) } }