diff --git a/tools/afp_check_metadata.scala b/tools/afp_check_metadata.scala --- a/tools/afp_check_metadata.scala +++ b/tools/afp_check_metadata.scala @@ -1,149 +1,147 @@ /* Author: Fabian Huch, TU Muenchen Tool to check metadata consistency. */ package afp import isabelle._ import afp.Metadata._ object AFP_Check_Metadata { def diff(t1: afp.TOML.T, t2: afp.TOML.T): List[afp.TOML.Key] = { (t1.keySet diff t2.keySet).toList ++ t1.flatMap { case (k, afp.TOML.T(tr1)) => t2.get(k).map { case afp.TOML.T(tr2) => diff (tr1, tr2) case _ => Nil }.getOrElse(Nil) case _ => Nil } } val isabelle_tool = Isabelle_Tool("afp_check_metadata", "Checks the AFP metadata files", Scala_Project.here, { args => var strict = false val getopts = Getopts(""" Usage: isabelle afp_check_metadata [OPTIONS] Options are: -S strict mode (fail on warnings) Check AFP metadata files for consistency. """, "S" -> (_ => strict = true)) getopts(args) val progress = new Console_Progress() val afp_structure = AFP_Structure() progress.echo("Checking author file...") val authors = afp_structure.load_authors.map(author => author.id -> author).toMap - def sub_topics(topic: Topic): List[Topic] = - topic :: topic.sub_topics.flatMap(sub_topics) progress.echo("Checking topic file...") val root_topics = afp_structure.load_topics - val topics = Utils.grouped_sorted(root_topics.flatMap(sub_topics), (t: Topic) => t.id) + val topics = Utils.grouped_sorted(root_topics.flatMap(_.all_topics), (t: Topic) => t.id) progress.echo("Checking license file....") val licenses = afp_structure.load_licenses.map(license => license.id -> license).toMap progress.echo("Checking release file....") val releases = afp_structure.load_releases.groupBy(_.entry) progress.echo("Checking entry files...") val entries = afp_structure.entries.map(name => afp_structure.load_entry(name, authors, topics, licenses, releases)) /* TOML encoding/decoding */ def check_toml[A](kind: String, a: A, from: A => afp.TOML.T, to: afp.TOML.T => A): Unit = if (to(from(a)) != a) error("Inconsistent toml encode/decode: " + kind) progress.echo("Checking toml conversions...") check_toml("authors", authors.values.toList, TOML.from_authors, TOML.to_authors) check_toml("topics", root_topics, TOML.from_topics, TOML.to_topics) check_toml("licenses", licenses.values.toList, TOML.from_licenses, TOML.to_licenses) check_toml("releases", releases.values.flatten.toList, TOML.from_releases, TOML.to_releases) entries.foreach(entry => check_toml("entry " + entry.name, entry, TOML.from_entry, t => TOML.to_entry(entry.name, t, authors, topics, licenses, releases.getOrElse(entry.name, Nil)))) def warn(msg: String): Unit = if (strict) error(msg) else progress.echo_warning(msg) /* duplicate ids */ var seen_ids = Set.empty[String] def check_id(id: String): Unit = if (seen_ids.contains(id)) error("Duplicate id: " + id) else seen_ids += id progress.echo("Checking for duplicate ids...") authors.values.foreach { author => check_id(author.id) author.emails.map(_.id).foreach(check_id) author.homepages.map(_.id).foreach(check_id) } topics.values.map(_.id).foreach(check_id) licenses.values.map(_.id).foreach(check_id) entries.map(_.name).foreach(check_id) /* unused fields */ progress.echo("Checking for unused fields...") def check_unused_toml[A](file: Path, to: afp.TOML.T => A, from: A => afp.TOML.T): Unit = { val toml = afp.TOML.parse(File.read(file)) val recoded = from(to(toml)) val diff_keys = diff(afp.TOML.parse(File.read(file)), recoded) if (diff_keys.nonEmpty) warn("Unused fields: " + commas_quote(diff_keys)) } check_unused_toml(afp_structure.authors_file, TOML.to_authors, TOML.from_authors) check_unused_toml(afp_structure.topics_file, TOML.to_topics, TOML.from_topics) check_unused_toml(afp_structure.licenses_file, TOML.to_licenses, TOML.from_licenses) check_unused_toml(afp_structure.releases_file, TOML.to_releases, TOML.from_releases) entries.foreach(entry => check_unused_toml(afp_structure.entry_file(entry.name), t => TOML.to_entry(entry.name, t, authors, topics, licenses, releases.getOrElse(entry.name, Nil)), TOML.from_entry)) /* unused values */ def warn_unused(name: String, unused: Set[String]): Unit = if (unused.nonEmpty) warn("Extra (unused) " + name + ": " + commas_quote(unused.toList)) progress.echo("Checking for unused values...") val all_affils = entries.flatMap(entry => entry.authors ++ entry.contributors ++ entry.notifies) warn_unused("authors", authors.keySet diff all_affils.map(_.author).toSet) def author_affil_id(author: Author.ID, affil: String): String = author + " " + affil val affils = authors.values.flatMap(author => (author.emails.map(_.id) ++ author.homepages.map(_.id)).map(author_affil_id(author.id, _))) val used_affils = all_affils.collect { case Email(author, id, _) => author_affil_id(author, id) case Homepage(author, id, _) => author_affil_id(author, id) } warn_unused("affiliations", affils.toSet diff used_affils.toSet) val leaf_topics = topics.values.filter(_.sub_topics.isEmpty).map(_.id) warn_unused("topics", leaf_topics.toSet diff entries.flatMap(_.topics).map(_.id).toSet) warn_unused("licenses", licenses.keySet diff entries.map(_.license.id).toSet) /* extra */ progress.echo("Checking dois...") entries.flatMap(entry => entry.related).collect { case d: DOI => d.formatted() } progress.echo("Checked " + authors.size + " authors with " + affils.size + " affiliations, " + topics.size + " topics, " + releases.values.flatten.size + " releases, " + licenses.size + " licenses, and " + entries.size + " entries.") }) } diff --git a/tools/afp_site_gen.scala b/tools/afp_site_gen.scala --- a/tools/afp_site_gen.scala +++ b/tools/afp_site_gen.scala @@ -1,467 +1,465 @@ /* Author: Fabian Huch, TU Muenchen Generation and compilation of SSG project for the AFP website. */ package afp import isabelle._ import afp.Metadata._ object AFP_Site_Gen { /* cache */ class Cache(layout: Hugo.Layout) { private val doi_cache = Path.basic("dois.json") private var dois: Map[String, String] = { val file = layout.cache_dir + doi_cache if (file.file.exists) { val content = File.read(file) val json = try { isabelle.JSON.parse(content) } catch { case ERROR(msg) => error("Could not parse " + file.toString + ": " + msg) } JSON.to_dois(json) } else Map.empty } def resolve_doi(doi: DOI): String = { dois.get(doi.identifier) match { case Some(value) => value case None => val res = doi.formatted() dois += (doi.identifier -> res) layout.write_cache(doi_cache, JSON.from_dois(dois)) res } } } /* json */ object JSON { type T = isabelle.JSON.T object Object { type T = isabelle.JSON.Object.T def apply(entries: isabelle.JSON.Object.Entry*): T = isabelle.JSON.Object.apply(entries: _*) } def opt(k: String, v: String): Object.T = if (v.isEmpty) Object() else Object(k -> v) def opt(k: String, v: Option[T]): Object.T = v.map(v => Object(k -> v)).getOrElse(Object()) def opt[A <: Iterable[_]](k: String, vals: A): Object.T = if (vals.isEmpty) Object() else Object(k -> vals) def from_dois(dois: Map[String, String]): Object.T = dois def to_dois(dois: T): Map[String, String] = dois match { case m: Map[_, _] if m.keySet.forall(_.isInstanceOf[String]) && m.values.forall(_.isInstanceOf[String]) => m.asInstanceOf[Map[String, String]] case _ => error("Could not read dois") } def from_email(email: Email): Object.T = Object( "user" -> email.user.split('.').toList, "host" -> email.host.split('.').toList) def from_authors(authors: List[Author]): Object.T = authors.map(author => author.id -> (Object( "name" -> author.name, "emails" -> author.emails.map(from_email), "homepages" -> author.homepages.map(_.url.toString)) ++ opt("orcid", author.orcid.map(orcid => Object( "id" -> orcid.identifier, "url" -> orcid.url.toString))))).toMap def from_classification(classification: Classification): Object.T = Object( "desc" -> classification.desc, "url" -> classification.url.toString, "type" -> (classification match { case _: ACM => "ACM" case _: AMS => "AMS" })) def from_topics(topics: List[Topic]): Object.T = Object(topics.map(topic => topic.name -> ( opt("classification", topic.classification.map(from_classification)) ++ opt("topics", from_topics(topic.sub_topics)))): _*) def from_affiliations(affiliations: List[Affiliation]): Object.T = { Utils.group_sorted(affiliations, (a: Affiliation) => a.author).view.mapValues( { author_affiliations => val homepage = author_affiliations.collectFirst { case homepage: Homepage => homepage } val email = author_affiliations.collectFirst { case email: Email => email } Object() ++ opt("homepage", homepage.map(_.url.toString)) ++ opt("email", email.map(from_email)) }).toMap } def from_change_history(entry: (Metadata.Date, String)): Object.T = Object( "date" -> entry._1.toString, "value" -> entry._2) def from_release(release: Release): Object.T = Object( "date" -> release.date.toString, "isabelle" -> release.isabelle) def from_related(related: Reference, cache: Cache): T = related match { case d: DOI => val href = d.url.toString cache.resolve_doi(d).replace(href, "" + href + "") case Formatted(text) => text } def from_entry(entry: Entry, cache: Cache): Object.T = ( Object( "title" -> entry.title, "authors" -> entry.authors.map(_.author).distinct, "affiliations" -> from_affiliations(entry.authors ++ entry.contributors), "date" -> entry.date.toString, "topics" -> entry.topics.map(_.id), "abstract" -> entry.`abstract`, "license" -> entry.license.name) ++ opt("contributors", entry.contributors.map(_.author).distinct) ++ opt("releases", entry.releases.sortBy(_.isabelle).reverse.map(from_release)) ++ opt("note", entry.note) ++ opt("history", entry.change_history.toList.sortBy(_._1).reverse.map(from_change_history)) ++ opt("extra", entry.extra) ++ opt("related", entry.related.map(from_related(_, cache)))) def from_keywords(keywords: List[String]): T = keywords.zipWithIndex.map { case (keyword, i) => Object( "id" -> i, "keyword" -> keyword) } } /* stats */ def afp_stats(deps: Sessions.Deps, structure: AFP_Structure, entries: List[Entry]): JSON.T = { def round(int: Int): Int = Math.round(int.toFloat / 100) * 100 def nodes(entry: Entry): List[Document.Node.Name] = structure.entry_sessions(entry.name) .flatMap(session => deps(session.name).proper_session_theories) val theorem_commands = List("theorem", "lemma", "corollary", "proposition", "schematic_goal") var entry_lines = Map.empty[Entry, Int] var entry_lemmas = Map.empty[Entry, Int] for { entry <- entries node <- nodes(entry) lines = split_lines(File.read(node.path)).map(_.trim) } { entry_lines += entry -> (entry_lines.getOrElse(entry, 0) + lines.count(_.nonEmpty)) entry_lemmas += entry -> (entry_lemmas.getOrElse(entry, 0) + lines.count(line => theorem_commands.exists(line.startsWith))) } val first_year = entries.flatMap(_.releases).map(_.date.getYear).min def years(upto: Int): List[Int] = Range.inclusive(first_year, upto).toList val current_year = Date.now().rep.getYear val all_years = years(current_year) // per Isabelle release year val by_year = entries.groupBy(_.date.getYear) val size_by_year = by_year.view.mapValues(_.length).toMap val loc_by_year = by_year.view.mapValues(_.map(entry_lines).sum).toMap val authors_by_year = by_year.view.mapValues(_.flatMap(_.authors).map(_.author)).toMap val num_lemmas = entries.map(entry_lemmas).sum val num_lines = entries.map(entry_lines).sum // accumulated def total_articles(year: Int): Int = years(year).map(size_by_year.getOrElse(_, 0)).sum def total_loc(year: Int): Int = round(years(year).map(loc_by_year.getOrElse(_, 0)).sum) def total_authors(year: Int): Int = years(year).flatMap(authors_by_year.getOrElse(_, Nil)).distinct.length def fresh_authors(year: Int): Int = total_authors(year) - total_authors(year - 1) val sorted = entries.sortBy(_.date) def map_repetitions(elems: List[String], to: String): List[String] = elems.foldLeft(("", List.empty[String])) { case((last, acc), s) => (s, acc :+ (if (last == s) to else s)) }._2 isabelle.JSON.Object( "years" -> all_years, "num_lemmas" -> num_lemmas, "num_loc" -> num_lines, "articles_year" -> all_years.map(total_articles), "loc_years" -> all_years.map(total_loc), "author_years" -> all_years.map(fresh_authors), "author_years_cumulative" -> all_years.map(total_authors), "loc_articles" -> sorted.map(entry_lines), "all_articles" -> sorted.map(_.name), "article_years_unique" -> map_repetitions(sorted.map(_.date.getYear.toString), "")) } /* site generation */ def afp_site_gen( layout: Hugo.Layout, status_file: Option[Path], afp_structure: AFP_Structure, clean: Boolean = false, progress: Progress = new Progress() ): Unit = { /* clean old */ if (clean) { progress.echo("Cleaning up generated files...") layout.clean() } /* add topics */ progress.echo("Preparing topics...") val topics = afp_structure.load_topics - def sub_topics(topic: Metadata.Topic): List[Metadata.Topic] = - topic :: topic.sub_topics.flatMap(sub_topics) - - val topics_by_id = Utils.grouped_sorted(topics.flatMap(sub_topics), (t: Metadata.Topic) => t.id) + val topics_by_id = + Utils.grouped_sorted(topics.flatMap(_.all_topics), (t: Metadata.Topic) => t.id) layout.write_data(Path.basic("topics.json"), JSON.from_topics(topics)) /* add licenses */ progress.echo("Preparing licenses...") val licenses_by_id = Utils.grouped_sorted(afp_structure.load_licenses, (l: Metadata.License) => l.id) /* add releases */ progress.echo("Preparing releases...") val releases_by_entry = afp_structure.load_releases.groupBy(_.entry) /* prepare authors and entries */ progress.echo("Preparing authors...") val full_authors = afp_structure.load_authors val authors_by_id = Utils.grouped_sorted(full_authors, (a: Metadata.Author) => a.id) var seen_affiliations: List[Affiliation] = Nil val entries = afp_structure.entries.flatMap { name => val entry = afp_structure.load_entry(name, authors_by_id, topics_by_id, licenses_by_id, releases_by_entry) if (entry.sitegen_ignore) None else { seen_affiliations = seen_affiliations :++ entry.authors ++ entry.contributors Some(entry) } } val authors = Utils.group_sorted(seen_affiliations.distinct, (a: Affiliation) => a.author).map { case (id, affiliations) => val seen_emails = affiliations.collect { case e: Email => e } val seen_homepages = affiliations.collect { case h: Homepage => h } authors_by_id(id).copy(emails = seen_emails, homepages = seen_homepages) } layout.write_data(Path.basic("authors.json"), JSON.from_authors(authors.toList)) /* extract keywords */ progress.echo("Extracting keywords...") var seen_keywords = Set.empty[String] val entry_keywords = entries.map { entry => val scored_keywords = Rake.extract_keywords(entry.`abstract`) seen_keywords ++= scored_keywords.map(_._1) entry.name -> scored_keywords.map(_._1) }.toMap seen_keywords = seen_keywords.filter(k => !k.endsWith("s") || !seen_keywords.contains(k.stripSuffix("s"))) layout.write_static(Path.make(List("data", "keywords.json")), JSON.from_keywords(seen_keywords.toList)) def get_keywords(name: Metadata.Entry.Name): List[String] = entry_keywords(name).filter(seen_keywords.contains).take(8) /* add entries and theory listings */ progress.echo("Preparing entries...") val sessions_structure = afp_structure.sessions_structure val sessions_deps = Sessions.deps(sessions_structure) val cache = new Cache(layout) entries.foreach { entry => val deps = for { session <- afp_structure.entry_sessions(entry.name) dep <- sessions_structure.imports_graph.imm_preds(session.name) if session.name != dep && sessions_structure(dep).groups.contains("AFP") } yield dep val theories = afp_structure.entry_sessions(entry.name).map { session => val base = sessions_deps(session.name) val theories = base.proper_session_theories.map(_.theory_base_name) val session_json = isabelle.JSON.Object( "title" -> session.name, "entry" -> entry.name, "url" -> ("/theories/" + session.name.toLowerCase), "theories" -> theories) layout.write_content(Path.make(List("theories", session.name + ".md")), session_json) isabelle.JSON.Object("session" -> session.name, "theories" -> theories) } val entry_json = JSON.from_entry(entry, cache) ++ isabelle.JSON.Object( "dependencies" -> deps.distinct, "sessions" -> theories, "url" -> ("/entries/" + entry.name + ".html"), "keywords" -> get_keywords(entry.name)) layout.write_content(Path.make(List("entries", entry.name + ".md")), entry_json) } /* add statistics */ progress.echo("Preparing statistics...") val statistics_json = afp_stats(sessions_deps, afp_structure, entries) layout.write_data(Path.basic("statistics.json"), statistics_json) /* project */ progress.echo("Preparing project files") layout.copy_project() /* status */ status_file match { case Some(status_file) => progress.echo("Preparing devel version...") val status_json = isabelle.JSON.parse(File.read(status_file)) layout.write_data(Path.basic("status.json"), status_json) case None => } progress.echo("Finished sitegen preparation.") } /* build site */ def afp_build_site( out_dir: Path, layout: Hugo.Layout, do_watch: Boolean = false, clean: Boolean = false, progress: Progress = new Progress() ): Unit = { if (clean) { progress.echo("Cleaning output dir...") Hugo.clean(out_dir) } if (do_watch) { Hugo.watch(layout, out_dir, progress).check } else { progress.echo("Building site...") Hugo.build(layout, out_dir).check progress.echo("Build in " + (out_dir + Path.basic("index.html")).absolute.implode) } } /* tool wrapper */ val isabelle_tool = Isabelle_Tool("afp_site_gen", "generates afp website source", Scala_Project.here, { args => var base_dir = Path.explode("$AFP_BASE") var status_file: Option[Path] = None var hugo_dir = base_dir + Path.make(List("web", "hugo")) var out_dir: Path = base_dir + Path.make(List("web", "out")) var build_only = false var devel_mode = false var fresh = false val getopts = Getopts(""" Usage: isabelle afp_site_gen [OPTIONS] Options are: -B DIR afp base dir (default """" + base_dir.implode + """") -D FILE build status file for devel version -H DIR generated hugo project dir (default """" + hugo_dir.implode + """") -O DIR output dir for build (default """ + out_dir.implode + """) -b build only -d devel mode (overrides hugo dir, builds site in watch mode) -f fresh build: clean up existing hugo and build directories Generates the AFP website source. HTML files of entries are dynamically loaded. Providing a status file will build the development version of the archive. Site will be built from generated source if output dir is specified. """, "B:" -> (arg => base_dir = Path.explode(arg)), "D:" -> (arg => status_file = Some(Path.explode(arg))), "H:" -> (arg => hugo_dir = Path.explode(arg)), "O:" -> (arg => out_dir = Path.explode(arg)), "b" -> (_ => build_only = true), "d" -> (_ => devel_mode = true), "f" -> (_ => fresh = true)) getopts(args) status_file.foreach(path => if (!path.is_file || !path.file.exists()) error("Invalid status file: " + path)) if (devel_mode) hugo_dir = base_dir + Path.make(List("admin", "site")) val afp_structure = AFP_Structure(base_dir) val layout = Hugo.Layout(hugo_dir) val progress = new Console_Progress() if (!build_only) { progress.echo("Preparing site generation in " + hugo_dir.implode) afp_site_gen(layout = layout, status_file = status_file, afp_structure = afp_structure, clean = fresh, progress = progress) } afp_build_site(out_dir = out_dir, layout = layout, do_watch = devel_mode, clean = fresh, progress = progress) }) } \ No newline at end of file diff --git a/tools/afp_structure.scala b/tools/afp_structure.scala --- a/tools/afp_structure.scala +++ b/tools/afp_structure.scala @@ -1,127 +1,125 @@ /* Author: Fabian Huch, TU Muenchen AFP Metadata file structure with save and load operations. */ package afp import isabelle._ class AFP_Structure private(val base_dir: Path) { /* files */ val metadata_dir = base_dir + Path.basic("metadata") val thys_dir = base_dir + Path.basic("thys") val authors_file = metadata_dir + Path.basic("authors.toml") val releases_file = metadata_dir + Path.basic("releases.toml") val licenses_file = metadata_dir + Path.basic("licenses.toml") val topics_file = metadata_dir + Path.basic("topics.toml") val entries_dir = metadata_dir + Path.basic("entries") def entry_file(name: Metadata.Entry.Name): Path = entries_dir + Path.basic(name + ".toml") /* load */ private def load[A](file: Path, parser: afp.TOML.T => A): A = { val content = File.read(file) val toml = try { TOML.parse(content) } catch { case ERROR(msg) => error("Could not parse " + file.toString + ": " + msg) } parser(toml) } def load_authors: List[Metadata.Author] = load(authors_file, Metadata.TOML.to_authors) def load_releases: List[Metadata.Release] = load(releases_file, Metadata.TOML.to_releases) def load_licenses: List[Metadata.License] = load(licenses_file, Metadata.TOML.to_licenses) def load_topics: List[Metadata.Topic] = load(topics_file, Metadata.TOML.to_topics) def load_entry(name: Metadata.Entry.Name, authors: Map[Metadata.Author.ID, Metadata.Author], topics: Map[Metadata.Topic.ID, Metadata.Topic], licenses: Map[Metadata.License.ID, Metadata.License], releases: Map[Metadata.Entry.Name, List[Metadata.Release]] ): Metadata.Entry = { val entry_releases = releases.getOrElse(name, Nil) load(entry_file(name), toml => Metadata.TOML.to_entry(name, toml, authors, topics, licenses, entry_releases)) } def load(): List[Metadata.Entry] = { val authors = load_authors.map(author => author.id -> author).toMap - def sub_topics(topic: Metadata.Topic): List[Metadata.Topic] = - topic :: topic.sub_topics.flatMap(sub_topics) - val topics = Utils.grouped_sorted(load_topics.flatMap(sub_topics), (t: Metadata.Topic) => t.id) + val topics = Utils.grouped_sorted(load_topics.flatMap(_.all_topics), (t: Metadata.Topic) => t.id) val licenses = load_licenses.map(license => license.id -> license).toMap val releases = load_releases.groupBy(_.entry) entries.map(name => load_entry(name, authors, topics, licenses, releases)) } /* save */ private def save(file: Path, content: afp.TOML.T): Unit = { - file.file.mkdirs() + file.dir.file.mkdirs() File.write(file, TOML.Format(content)) } def save_authors(authors: List[Metadata.Author]): Unit = save(authors_file, Metadata.TOML.from_authors(authors)) def save_releases(releases: List[Metadata.Release]): Unit = save(releases_file, Metadata.TOML.from_releases(releases)) def save_topics(topics: List[Metadata.Topic]): Unit = save(topics_file, Metadata.TOML.from_topics(topics)) def save_entry(entry: Metadata.Entry): Unit = save(entry_file(entry.name), Metadata.TOML.from_entry(entry)) /* sessions */ def entries: List[Metadata.Entry.Name] = { val Entry = """([a-zA-Z0-9+_-]+)\.toml""".r val file_entries = File.read_dir(entries_dir).map { case Entry(name) => name case f => error("Unrecognized file in metadata: " + f) } val session_entries = Sessions.parse_roots(thys_dir + Path.basic("ROOTS")) val session_set = session_entries.toSet val metadata_set = file_entries.toSet if (session_set != metadata_set) { val inter = session_set.intersect(metadata_set) val session_without_metadata = if (session_set.subsetOf(inter)) "" else "No metadata for session in ROOTS: " + commas_quote(session_set -- inter) val metadata_without_session = if (metadata_set.subsetOf(inter)) "" else "Metadata entries missing in ROOTS: " + commas_quote(metadata_set -- inter) error("Metadata does not match sessions:\n" + session_without_metadata + metadata_without_session) } else session_entries } def sessions_structure: Sessions.Structure = Sessions.load_structure(options = Options.init(), select_dirs = List(thys_dir)) def entry_sessions(name: Metadata.Entry.Name): List[Sessions.Session_Entry] = Sessions.parse_root(thys_dir + Path.make(List(name, "ROOT"))).collect { case e: Sessions.Session_Entry => e } def hg_id: String = Mercurial.repository(base_dir).id() } object AFP_Structure { def apply(base_dir: Path = Path.explode("$AFP_BASE")): AFP_Structure = new AFP_Structure(base_dir.absolute) } diff --git a/tools/metadata.scala b/tools/metadata.scala --- a/tools/metadata.scala +++ b/tools/metadata.scala @@ -1,430 +1,433 @@ /* Author: Fabian Huch, TU Muenchen AFP metadata model and TOML serialization. */ package afp import isabelle._ import afp.TOML._ import java.time.LocalDate import java.net.{URL, URI} object Metadata { /* affiliations */ sealed trait Affiliation { def author: Author.ID } case class Unaffiliated(override val author: Author.ID) extends Affiliation case class Email(override val author: Author.ID, id: Email.ID, address: String) extends Affiliation { private val Address = "([^@]+)@(.+)".r val (user, host) = address match { case Address(user, host) => (user, host) case _ => error("Invalid address: " + address) } } object Email { type ID = String def apply(author: Author.ID, id: Email.ID, user: String, host: String): Email = Email(author, id, user + "@" + host) } case class Homepage(override val author: Author.ID, id: Homepage.ID, url: URL) extends Affiliation object Homepage { type ID = String } /* authors */ case class Orcid(identifier: String) { require( "^([0-9]{4})-([0-9]{4})-([0-9]{4})-([0-9]{3}[0-9X])$".r.matches(identifier), "Invalid format for orcid: " + quote(identifier)) def url: URL = new URL("https", "orcid.org", "/" + identifier) } case class Author( id: Author.ID, name: String, emails: List[Email] = Nil, homepages: List[Homepage] = Nil, orcid: Option[Orcid] = None ) object Author { type ID = String } /* topics */ sealed trait Classification { def desc: String def url: URL } case class ACM(id: String, override val desc: String) extends Classification { val url = new URL("https", "dl.acm.org", "/topic/ccs2012/" + id) } case class AMS(id: String, hierarchy: List[String]) extends Classification { val code: String = id.length match { case 2 => id + "-XX" case 3 => id + "xx" case 5 => id case _ => error("Invalid ams id:" + id) } override val desc: String = hierarchy.mkString(" / ") override val url: URL = new URL("https", "mathscinet.ams.org", "/mathscinet/msc/msc2020.html?t=" + code) } case class Topic( id: Topic.ID, name: String, classification: List[Classification] = Nil, - sub_topics: List[Topic] = Nil) + sub_topics: List[Topic] = Nil + ) { + def all_topics: List[Topic] = this :: sub_topics.flatMap(_.all_topics) + } object Topic { type ID = String } /* releases */ type Date = LocalDate object Isabelle { type Version = String } case class Release(entry: Entry.Name, date: Date, isabelle: Isabelle.Version) /* license */ case class License(id: License.ID, name: String) object License { type ID = String } /* references */ sealed trait Reference case class DOI(identifier: String) extends Reference { - require("^10.([1-9][0-9]{3})/(.+)".r.matches(identifier), + require("^10.([1-9][0-9]{3,})/(.+)".r.matches(identifier), "invalid format for DOI: " + quote(identifier)) def uri: URI = new URI("doi:" + identifier) def url: URL = new URL("https", "doi.org", "/" + identifier) def formatted(style: String = "apa"): String = Utils.fetch_text(url, Map("Accept" -> ("text/x-bibliography; style=" + style))) } case class Formatted(rep: String) extends Reference /* misc */ type Change_History = Map[Date, String] type Extra = Map[String, String] /* entry */ case class Entry( name: Entry.Name, title: String, authors: List[Affiliation], date: Date, topics: List[Topic], `abstract`: String, notifies: List[Email], license: License, note: String, contributors: List[Affiliation] = Nil, change_history: Change_History = Map.empty, extra: Extra = Map.empty, releases: List[Release] = Nil, sitegen_ignore: Boolean = false, related: List[Reference] = Nil) object Entry { type Name = String } /* toml */ object TOML { private def by_id[A](elems: Map[String, A], id: String): A = - elems.getOrElse(id, error("Elem " + quote(id) + " not found in " + commas_quote(elems.map(_.toString)))) + elems.getOrElse(id, error("Elem " + quote(id) + " not found in " + commas_quote(elems.keys))) /* email */ def from_email(email: Email): T = T( "user" -> email.user.split('.').toList, "host" -> email.host.split('.').toList) def to_email(author_id: Author.ID, email_id: Email.ID, email: T): Email = { val user = get_as[List[String]](email, "user") val host = get_as[List[String]](email, "host") Email(author_id, email_id, user.mkString("."), host.mkString(".")) } /* author */ def from_author(author: Author): T = T( "name" -> author.name, "emails" -> T(author.emails.map(email => email.id -> from_email(email))), "homepages" -> T(author.homepages.map(homepage => homepage.id -> homepage.url.toString))) ++ author.orcid.map(orcid => T("orcid" -> orcid.identifier)).getOrElse(T()) def to_author(author_id: Author.ID, author: T): Author = { val emails = split_as[T](get_as[T](author, "emails")) map { case (id, email) => to_email(author_id, id, email) } val homepages = split_as[String](get_as[T](author, "homepages")) map { case (id, url) => Homepage(author = author_id, id = id, url = new URL(url)) } val orcid = author.get("orcid").flatMap { case orcid: String => Some(Orcid(orcid)) case o => error("Could not read oricid: " + quote(o.toString)) } Author( id = author_id, name = get_as[String](author, "name"), orcid = orcid, emails = emails, homepages = homepages) } def from_authors(authors: List[Author]): T = T(authors.map(author => author.id -> from_author(author))) def to_authors(authors: T): List[Author] = split_as[T](authors).map { case (id, author) => to_author(id, author) } /* topics */ def from_acm(acm: ACM): T = T("id" -> acm.id, "desc" -> acm.desc) def to_acm(acm: T): ACM = ACM(get_as[String](acm, "id"), get_as[String](acm, "desc")) def from_ams(ams: AMS): T = T("id" -> ams.id, "hierarchy" -> ams.hierarchy) def to_ams(ams: T): AMS = AMS(get_as[String](ams, "id"), get_as[List[String]](ams, "hierarchy")) def from_classifications(classifications: List[Classification]): T = T(classifications map { case acm: ACM => "acm" -> from_acm(acm) case ams: AMS => "ams" -> from_ams(ams) }) def to_classifications(classifications: T): List[Classification] = { split_as[T](classifications).map { case ("ams", ams) => to_ams(ams) case ("acm", acm) => to_acm(acm) case (c, _) => error("Unknown topic classification: " + quote(c)) } } def from_topics(root_topics: List[Topic]): T = T(root_topics.map { t => t.name -> ( T("classification" -> from_classifications(t.classification)) ++ from_topics(t.sub_topics)) }) def to_topics(root_topics: T): List[Topic] = { def to_topics_rec(topics: List[(String, T)], root: Topic.ID): List[Topic] = { topics.map { case (name, data) => val id = (if (root.nonEmpty) root + "/" else "") + name val classifications = data.get("classification").map { case T(t) => to_classifications(t) case o => error("Could not read classifications: " + quote(o.toString)) } getOrElse Nil val sub_topics = split_as[T](data).filterNot { case (name, _ ) => name == "classification" } Topic(id, name, classifications, to_topics_rec(sub_topics, id)) } } to_topics_rec(split_as[T](root_topics), "") } /* releases */ def from_releases(releases: List[Release]): T = T(Utils.group_sorted(releases, (r: Release) => r.entry).view.mapValues { entry_releases => T(entry_releases.map(r => r.date.toString -> r.isabelle)) }.toList) def to_releases(map: T): List[Release] = split_as[T](map).flatMap { case (entry, releases) => split_as[String](releases).map { case (date, version) => Release(entry = entry, date = LocalDate.parse(date), isabelle = version) } } /* affiliation */ def from_affiliations(affiliations: List[Affiliation]): T = T(Utils.group_sorted(affiliations, (a: Affiliation) => a.author).view.mapValues(vs => T(vs.collect { case Email(_, id, _) => "email" -> id case Homepage(_, id, _) => "homepage" -> id })).toList) def to_affiliations(affiliations: T, authors: Map[Author.ID, Author]): List[Affiliation] = { def to_affiliation(affiliation: (String, String), author: Author): Affiliation = { affiliation match { case ("email", id: String) => author.emails.find(_.id == id) getOrElse error("Email not found: " + quote(id)) case ("homepage", id: String) => author.homepages.find(_.id == id) getOrElse error("Homepage not found: " + quote(id)) case e => error("Unknown affiliation type: " + e) } } split_as[T](affiliations).flatMap { case (id, author_affiliations) => val author = by_id(authors, id) if (author_affiliations.isEmpty) List(Unaffiliated(author.id)) else split_as[String](author_affiliations).map(to_affiliation(_, author)) } } def from_emails(emails: List[Email]): T = T(emails.map(email => email.author -> email.id)) def to_emails(emails: T, authors: Map[Author.ID, Author]): List[Email] = emails.toList.map { case (author, id: String) => by_id(authors, author).emails.find(_.id == id) getOrElse error("Email not found: " + quote(id)) case e => error("Unknown email: " + quote(e.toString)) } /* license */ def from_licenses(licenses: List[License]): T = T(licenses.map(license => license.id -> T("name" -> license.name))) def to_licenses(licenses: T): List[License] = { split_as[T](licenses) map { case (id, license) => License(id, get_as[String](license, "name")) } } def to_license(license: String, licenses: Map[License.ID, License]): License = licenses.getOrElse(license, error("No such license: " + quote(license))) /* history */ def from_change_history(change_history: Change_History): T = change_history.map { case (date, str) => date.toString -> str } def to_change_history(change_history: T): Change_History = change_history.map { case (date, entry: String) => LocalDate.parse(date) -> entry case e => error("Unknown history entry: " + quote(e.toString)) } /* references */ def from_related(references: List[Reference]): T = { val dois = references collect { case d: DOI => d } val formatted = references collect { case f: Formatted => f } T( "dois" -> dois.map(_.identifier), "pubs" -> formatted.map(_.rep)) } def to_related(references: T): List[Reference] = { val dois = optional_as[List[String]](references, "dois").getOrElse(Nil) val pubs = optional_as[List[String]](references, "pubs").getOrElse(Nil) dois.map(DOI.apply) ++ pubs.map(Formatted.apply) } /* entry */ def from_entry(entry: Entry): T = { T( "title" -> entry.title, "authors" -> from_affiliations(entry.authors), "contributors" -> from_affiliations(entry.contributors), "date" -> entry.date, "topics" -> entry.topics.map(_.id), "abstract" -> entry.`abstract`, "notify" -> from_emails(entry.notifies), "license" -> entry.license.id, "note" -> entry.note, "history" -> from_change_history(entry.change_history), "extra" -> entry.extra, "related" -> from_related(entry.related)) ++ (if (entry.sitegen_ignore) T("sitegen_ignore" -> true) else T()) } def to_entry( name: Entry.Name, entry: T, authors: Map[Author.ID, Author], topics: Map[Topic.ID, Topic], licenses: Map[License.ID, License], releases: List[Release] ): Entry = Entry( name = name, title = get_as[String](entry, "title"), authors = to_affiliations(get_as[T](entry, "authors"), authors), date = get_as[Date](entry, "date"), topics = get_as[List[String]](entry, "topics").map(by_id(topics, _)), `abstract` = get_as[String](entry, "abstract"), notifies = to_emails(get_as[T](entry, "notify"), authors), license = to_license(get_as[String](entry, "license"), licenses), note = get_as[String](entry, "note"), contributors = to_affiliations(get_as[T](entry, "contributors"), authors), change_history = to_change_history(get_as[T](entry, "history")), extra = get_as[Extra](entry, "extra"), releases = releases, sitegen_ignore = optional_as[Boolean](entry, "sitegen_ignore").getOrElse(false), related = to_related(get_as[T](entry, "related"))) } } diff --git a/tools/migration/afp_migrate_metadata.scala b/tools/migration/afp_migrate_metadata.scala --- a/tools/migration/afp_migrate_metadata.scala +++ b/tools/migration/afp_migrate_metadata.scala @@ -1,602 +1,599 @@ package afp.migration import isabelle._ import afp._ import afp.Metadata.{TOML => _, _} import scala.collection.BufferedIterator import java.io.BufferedReader import java.text.Normalizer import java.time.LocalDate import java.net.URL import org.jline.utils.InputStreamReader object AFP_Migrate_Metadata { class Context( names_mapping: Map[String, String], email_names: Map[String, String], dates_mapping: Map[String, String] ) { /* mappings */ def transform_name(name: String): String = names_mapping.getOrElse(name, name) def parse_date(date: String): Metadata.Date = LocalDate.parse(dates_mapping.getOrElse(date, date)) def name(address: String): String = email_names.getOrElse(address, error("No name for address " + address)) /* seen */ private var _seen_authors = Set.empty[Author.ID] private var _seen_emails = Set.empty[Email.ID] private var _seen_homepages = Set.empty[Homepage.ID] private var _seen_licenses = Map.empty[License.ID, License] private var _seen_author_names = Map.empty[String, Author] def seen_authors: Set[Author.ID] = _seen_authors def seen_emails: Set[Email.ID] = _seen_emails def seen_homepages: Set[Homepage.ID] = _seen_homepages def author(name: String): Option[Author] = _seen_author_names.get(name) def authors: List[Author] = _seen_author_names.values.toList def licenses: List[License] = _seen_licenses.values.toList def update_author(author: Author): Unit = { _seen_authors += author.id _seen_author_names = _seen_author_names.updated(author.name, author) } def email(author: Author.ID): Email.ID = { val id = unique_id(author + "_email", this.seen_emails) _seen_emails += id id } def homepage(homepage: Homepage.ID): Homepage.ID = { val id = unique_id(homepage + "_homepage", this.seen_homepages) _seen_homepages += id id } def license(license_str: String): License = { val license = License(license_str.toLowerCase, license_str) _seen_licenses += license.id -> license license } } private def is_email(url: String) = url.contains("@") private def as_ascii(str: String) = { var res: String = str List("ö" -> "oe", "ü" -> "ue", "ä" -> "ae", "ß" -> "ss").foreach { case (c, rep) => res = res.replace(c, rep) } res = Normalizer.normalize(res, Normalizer.Form.NFD).replaceAll("[^\\x00-\\x7F]", "") if (res.exists(_ > 127)) error("Contained non convertible non-ascii character: " + str) res } def unique_id(prefix: String, ids: Set[String]): String = { if (!ids.contains(prefix)) prefix else { var num = 1 while (ids.contains(prefix + num)) { num += 1 } prefix + num } } def private_dom(full_dom: String): String = { val stream = getClass.getClassLoader.getResourceAsStream("public_suffix_list.dat") val reader = new BufferedReader(new InputStreamReader(stream)) val public_suffixes = File.read_lines(reader, _ => ()).filterNot(_.startsWith("//")) val stripped = public_suffixes.map(full_dom.stripSuffix(_)).minBy(_.length) if (stripped.endsWith(".")) stripped.dropRight(1) else stripped } def make_author_id(name: String, context: Context): String = { val normalized = as_ascii(name) val Suffix = """^.*?([a-zA-Z]*)$""".r val suffix = normalized match { case Suffix(suffix) => suffix case _ => "" } val Prefix = """^([a-zA-Z]*).*$""".r val prefix = normalized.stripSuffix(suffix) match { case Prefix(prefix) => prefix case _ => "" } var ident = suffix.toLowerCase for { c <- prefix.toLowerCase } { if (context.seen_authors.contains(ident)) { ident += c.toString } else return ident } unique_id(ident, context.seen_authors) } def author_urls(name_urls: String, context: Context): (String, List[String]) = { val name = AFP.trim_mail(name_urls) val urls_string = name_urls.stripPrefix(name).trim val transformed = context.transform_name(name) if (urls_string == "<>") { (transformed, List("")) } else { (transformed, urls_string.split(Array('<', '>')).toList.filterNot(_.isBlank)) } } def add_email(author: Author, address: String, context: Context): (Author, Email) = { val email = Email(author = author.id, id = context.email(author.id), address = address) val update_author = author.copy(emails = author.emails :+ email) context.update_author(update_author) (update_author, email) } def get_affiliations(name_url: String, context: Context): List[Affiliation] = { val (name, urls) = author_urls(name_url, context) val author = context.author(name).getOrElse(error("Author not found: " + name)) urls.map { url => if (is_email(url)) { val address = url.stripPrefix("mailto:") author.emails.find(_.address == address) getOrElse error("Email not found: " + (author, address)) } else if (url.isEmpty) { Unaffiliated(author.id) } else { author.homepages.find(_.url.toString == url) getOrElse error("Url not found for: " + (author, url)) } } } def get_email_affiliation(address: String, context: Context, progress: Progress): Email = { context.authors.flatMap(_.emails).find(_.address == address) match { case Some(email) => email case None => val name = context.name(address) val author = context.author(name) match { case Some(author) => author case None => progress.echo_warning("New author: " + name) Author(make_author_id(name, context), name, Nil, Nil) } add_email(author, address, context)._2 } } def update_author(name_urls: String, context: Context): Unit = { val (name, urls) = author_urls(name_urls, context) var author = context.author(name) match { case Some(author) => author case None => Author(make_author_id(name, context), name, Nil, Nil) } urls.foreach { url => if (is_email(url)) { val address = url.stripPrefix("mailto:") if (!author.emails.exists(_.address == address)) { author = add_email(author, address, context)._1 } } else if (url.nonEmpty && !author.homepages.exists(_.url.toString == url)) { val homepage = Homepage(author = author.id, id = context.homepage(author.id), url = new URL(url)) author = author.copy(homepages = author.homepages :+ homepage) } } context.update_author(author) } def map_entry( entry: AFP.Entry, releases: Map[Entry.Name, List[Release]], topics: Map[Topic.ID, Topic], sitegen_ignore: Boolean, context: Context, progress: Progress ): Entry = { val author_affiliations = entry.authors.flatMap(get_affiliations(_, context)) val contributor_affiliations = entry.contributors.flatMap(get_affiliations(_, context)) val notify_emails = entry.get_strings("notify").map(get_email_affiliation(_, context, progress)) val change_history = parse_change_history(entry.get_string("extra-history"), context) val extra = entry.metadata.filter { case (k, _) => k.startsWith("extra-") && k != "extra-history" } Entry( name = entry.name, title = entry.title, authors = author_affiliations, date = LocalDate.from(entry.date.rep), topics = entry.topics.map(topics), `abstract` = entry.`abstract`, notifies = notify_emails, contributors = contributor_affiliations, license = context.license(entry.license), note = entry.get_string("note"), change_history = change_history, extra = extra.toMap, releases = releases.getOrElse(entry.name, Nil), sitegen_ignore = sitegen_ignore ) } def parse_change_history(history: String, context: Context): Change_History = { val matches = """\[(\d{4}-\d{2}-\d{2})]:([^\[]+)""".r.findAllMatchIn(history.stripPrefix("Change history:")) matches.toList.map(_.subgroups match { case date :: content :: Nil => context.parse_date(date) -> content.trim case _ => error("Could not parse change history: " + quote(history)) }).toMap } def parse_topics(lines: List[String]): List[Topic] = { val lines_iterator: BufferedIterator[String] = lines.filterNot(_.isBlank).iterator.buffered def get_indent(line: String) = line.takeWhile(_.isWhitespace).length def parse_level(level: Int, root: Option[Topic.ID]): List[Topic] = { val name = lines_iterator.next().trim val id = root.map(_ + "/").getOrElse("") + name val (sub_topics, next_topics) = lines_iterator.headOption match { case Some(next) if get_indent(next) == level + 2 => val sub = parse_level(level + 2, Some(id)) val next = lines_iterator.headOption match { case Some(next1) if get_indent(next1) == level => parse_level(level, root) case _ => Nil } (sub, next) case Some(next) if get_indent(next) == level => (Nil, parse_level(level, root)) case _ => (Nil, Nil) } Topic(id = id, name = name, sub_topics = sub_topics) :: next_topics } parse_level(0, None) } def parse_releases( releases: List[String], isabelle_releases: List[String], all_entries: List[Entry.Name], context: Context ): List[Release] = { val Isa_Release = """(.+) = (.+)""".r val release_dates = isabelle_releases.filterNot(_.isBlank).map { case Isa_Release(isabelle_version, date) => context.parse_date(date) -> isabelle_version case line => error("Could not parse: " + quote(line)) } def to_release(entry: Entry.Name, release_date: LocalDate): Release = Release(entry, release_date, release_dates.findLast { case (date, _) => date.isBefore(release_date) }.get._2) val Entry_Release = """afp-([a-zA-Z0-9_+-]+)-(\d{4}-\d{2}-\d{2})\.tar\.gz""".r val entry_releases = Utils.group_sorted( releases.filterNot(_.isBlank).map { case Entry_Release(entry, date_string) => val date = context.parse_date(date_string) entry -> to_release(entry, date) case line => error("Could not parse: " + quote(line)) }, (e: (Entry.Name, Release)) => e._1) all_entries.flatMap(e => entry_releases.getOrElse(e, Nil).map(_._2)) } def migrate_metadata( base_dir: Path, overwrite: Boolean, context: Context, options: Options = Options.init(), progress: Progress = new Progress() ): Unit = { val metadata = AFP.init(options, base_dir) val metadata_dir = base_dir + Path.basic("metadata") def read(file: Path): String = File.read(metadata_dir + file) def write(toml: TOML.T, file: Path) = { val path = metadata_dir + file if (!overwrite && path.file.exists) error("File already exists: " + path.file_name) else path.dir.file.mkdirs() File.write(path, TOML.Format(toml)) } /* topics */ progress.echo("Parsing topics...") val root_topics = parse_topics(split_lines(read(Path.basic("topics")))) - def sub_topics(topic: Topic): List[Topic] = - topic :: topic.sub_topics.flatMap(sub_topics) - - val topic_map = root_topics.flatMap(sub_topics).map(topic => topic.id -> topic).toMap + val topic_map = root_topics.flatMap(_.all_topics).map(topic => topic.id -> topic).toMap write(Metadata.TOML.from_topics(root_topics), Path.basic("topics.toml")) /* releases */ progress.echo("Parsing releases...") val releases = parse_releases( split_lines(read(Path.basic("releases"))), split_lines(read(Path.basic("release-dates"))), metadata.entries.map(_.name), context) val releases_map = releases.groupBy(_.entry) write(Metadata.TOML.from_releases(releases), Path.basic("releases.toml")) /* collect authors (without notify affiliations) */ progress.echo("Collecting authors...") for { entry <- metadata.entries name_url <- entry.authors ++ entry.contributors } update_author(name_url, context) /* entries */ progress.echo("Parsing entries...") for (entry_metadata <- metadata.entries) { val ignore_file = base_dir + Path.make(List("thys", entry_metadata.name, ".sitegen-ignore")) val sitegen_ignore = ignore_file.file.exists val entry = map_entry(entry_metadata, releases_map, topic_map, sitegen_ignore, context, progress) write(Metadata.TOML.from_entry(entry), Path.make(List("entries", entry.name + ".toml"))) } /* licenses */ write(Metadata.TOML.from_licenses(context.licenses), Path.basic("licenses.toml")) /* authors */ write(Metadata.TOML.from_authors(context.authors), Path.basic("authors.toml")) } val isabelle_tool = Isabelle_Tool("afp_migrate_metadata", "migrates old sitegen metadata to new format", Scala_Project.here, { args => var base_dir = Path.explode("$AFP_BASE") var names = List( "Lawrence C Paulson" -> "Lawrence C. Paulson", "Lawrence Paulson" -> "Lawrence C. Paulson", "Florian Kammueller" -> "Florian Kammüller", "Jasmin Blanchette" -> "Jasmin Christian Blanchette", "Ognjen Maric" -> "Ognjen Marić", "Maximilian P.L. Haslbeck" -> "Maximilian P. L. Haslbeck", "Maximilian Haslbeck" -> "Max W. Haslbeck", "Sebastiaan Joosten" -> "Sebastiaan J. C. Joosten", "Jacques Fleuriot" -> "Jacques D. Fleuriot", "Jose Manuel Rodriguez Caballero" -> "José Manuel Rodríguez Caballero") var emails = List( "a.bentkamp@vu.nl" -> "Alexander Bentkamp", "a.popescu@mdx.ac.uk" -> "Andrei Popescu", "a.popescu@sheffield.ac.uk" -> "Andrei Popescu", "afp@liftm.de" -> "Julius Michaelis", "ahfrom@dtu.dk" -> "Asta Halkjær From", "ak2110@cam.ac.uk" -> "Angeliki Koutsoukou-Argyraki", "akihisayamada@nii.ac.jp" -> "Akihisa Yamada", "aleje@dtu.dk" -> "Alexander Birch Jensen", "alexander.katovsky@cantab.net" -> "Alexander Katovsky", "alexander.maletzky@risc-software.at" -> "Alexander Maletzky", "alexander.maletzky@risc.jku.at" -> "Alexander Maletzky", "andreas.lochbihler@digitalasset.com" -> "Andreas Lochbihler", "andreasvhess@gmail.com" -> "Andreas V. Hess", "andschl@dtu.dk" -> "Anders Schlichtkrull", "apdb3@cam.ac.uk" -> "Anthony Bordg", "avigad@cmu.edu" -> "Jeremy Avigad", "ballarin@in.tum.de" -> "Clemens Ballarin", "bdd@liftm.de" -> "Julius Michaelis", "benedikt1999@freenet.de" -> "Benedikt Stock", "benblumson@gmail.com" -> "Ben Blumson", "berghofe@in.tum.de" -> "Stefan Berghofer", "bjbohrer@gmail.com" -> "Brandon Bohrer", "boehmes@in.tum.de" -> "Sascha Böhme", "brianh@cs.pdx.edu" -> "Brian Huffman", "brunnerj@in.tum.de" -> "Julian Brunner", "bzhan@ios.ac.cn" -> "Bohua Zhan", "c.benzmueller@fu-berlin.de" -> "Christoph Benzmüller", "c.benzmueller@gmail.com" -> "Christoph Benzmüller", "caw77@cam.ac.uk" -> "Conrad Watt", "cezary.kaliszyk@uibk.ac.at" -> "Cezary Kaliszyk", "christian.sternagel@uibk.ac.at" -> "Christian Sternagel", "christian.urban@kcl.ac.uk" -> "Christian Urban", "cle47@cam.ac.uk" -> "Chelsea Edmonds", "coglio@kestrel.edu" -> "Alessandro Coglio", "corey.lewis@data61.csiro.au" -> "Corey Lewis", "d1623001@s.konan-u.ac.jp" -> "Fumiya Iwama", "danijela@matf.bg.ac.rs" -> "Danijela Simić", "denis.lohner@kit.edu" -> "Denis Lohner", "denis.nikif@gmail.com" -> "Denis Nikiforov", "diego.marmsoler@tum.de" -> "Diego Marmsoler", "diekmann@net.in.tum.de" -> "Cornelius Diekmann", "dubut@nii.ac.jp" -> "Jérémy Dubut", "eminkarayel@google.com" -> "Emin Karayel", "f.smola@sms.ed.ac.uk" -> "Filip Smola", "fadouaghourabi@gmail.com" -> "Fadoua Ghourabi", "fimmler@andrew.cmu.edu" -> "Fabian Immler", "fimmler@cs.cmu.edu" -> "Fabian Immler", "fkjac@dtu.dk" -> "Frederik Krogsdal Jacobsen", "florian.haftmann@informatik.tu-muenchen.de" -> "Florian Haftmann", "florian.kammuller@gmail.com" -> "Florian Kammüller", "friedrich.kurz@tum.de" -> "Friedrich Kurz", "ftuong@lri.fr" -> "Frédéric Tuong", "g.struth@dcs.shef.ac.uk" -> "Georg Struth", "ggrov@inf.ed.ac.uk" -> "Gudmund Grov", "giamp@dmi.unict.it" -> "Giampaolo Bella", "giuliano@losa.fr" -> "Giuliano Losa", "Harald.Zankl@uibk.ac.at" -> "Harald Zankl", "haslbecm@in.tum.de" -> "Max W. Haslbeck", "haslbema@in.tum.de" -> "Maximilian P. L. Haslbeck", "heimesl@student.ethz.ch" -> "Lukas Heimes", "hetzl@logic.at" -> "Stefan Hetzl", "hirata.m.ac@m.titech.ac.jp" -> "Michikazu Hirata", "holub@karlin.mff.cuni.cz" -> "Štěpán Holub", "Ian.Hayes@itee.uq.edu.au" -> "Ian J. Hayes", "isabelleopenflow@liftm.de" -> "Julius Michaelis", "Jacques.Fleuriot@ed.ac.uk" -> "Jacques D. Fleuriot", "jason.jaskolka@carleton.ca" -> "Jason Jaskolka", "jdf@ed.ac.uk" -> "Jacques D. Fleuriot", "jeremy.sylvestre@ualberta.ca" -> "Jeremy Sylvestre", "jesus-maria.aransay@unirioja.es" -> "Jesús Aransay", "jonas.bayer999@gmail.com" -> "Jonas Bayer", "jonjulian23@gmail.com" -> "Jonathan Julian Huerta y Munive", "jose.divason@unirioja.es" -> "Jose Divasón", "jose.divasonm@unirioja.es" -> "Jose Divasón", "joseph-thommes@gmx.de" -> "Joseph Thommes", "jovi@dtu.dk" -> "Jørgen Villadsen", "jsiek@indiana.edu" -> "Jeremy Siek", "jsylvest@ualberta.ca" -> "Jeremy Sylvestre", "julian.nagele@uibk.ac.at" -> "Julian Nagele", "julian.parsert@uibk.ac.at" -> "Julian Parsert", "kevin.kappelmann@tum.de" -> "Kevin Kappelmann", "kkz@mit.edu" -> "Karen Zee", "kleing@unsw.edu.au" -> "Gerwin Klein", "krauss@in.tum.de" -> "Alexander Krauss", "kreuzerk@in.tum.de" -> "Katharina Kreuzer", "lars@hupel.info" -> "Lars Hupel", "lcp@cl.cam.ac.uk" -> "Lawrence C. Paulson", "lennart.beringer@ifi.lmu.de" -> "Lennart Beringer", "liwenda1990@hotmail.com" -> "Wenda Li", "m.raszyk@gmail.com" -> "Martin Raszyk", "mail@andreas-lochbihler.de" -> "Andreas Lochbihler", "mail@michael-herzberg.de" -> "Michael Herzberg", "maintainafpppt@liftm.de" -> "Julius Michaelis", "maksym.bortin@nicta.com.au" -> "Maksym Bortin", "manuel.eberl@tum.de" -> "Manuel Eberl", "marco.david@hotmail.de" -> "Marco David", "martin.desharnais@unibw.de" -> "Martin Desharnais", "mathias.fleury@jku.at" -> "Mathias Fleury", "matthias.brun@inf.ethz.ch" -> "Matthias Brun", "max.haslbeck@gmx.de" -> "Max W. Haslbeck", "maximilian.haslbeck@uibk.ac.at" -> "Max W. Haslbeck", "me@eminkarayel.de" -> "Emin Karayel", "MichaelNedzelsky@yandex.ru" -> "Michael Nedzelsky", "miguel.pagano@unc.edu.ar" -> "Miguel Pagano", "mihailsmilehins@gmail.com" -> "Mihails Milehins", "minamide@is.titech.ac.jp" -> "Yasuhiko Minamide", "mnacho.echenim@univ-grenoble-alpes.fr" -> "Mnacho Echenim", "mnfrd.krbr@gmail.com" -> "Manfred Kerber", "mohammad.abdulaziz8@gmail.com" -> "Mohammad Abdulaziz", "mohammad.abdulaziz@in.tum.de" -> "Mohammad Abdulaziz", "mr644@cam.ac.uk" -> "Michael Rawson", "mrtnrau@googlemail.com" -> "Martin Rau", "nanjiang@whu.edu.cn" -> "Nan Jiang", "Nicolas.Peltier@imag.fr" -> "Nicolas Peltier", "nipkow@in.tum.de" -> "Tobias Nipkow", "noschinl@gmail.com" -> "Lars Noschinski", "pagano@famaf.unc.edu.ar" -> "Miguel Pagano", "pc@cs.st-andrews.ac.uk" -> "Peter Chapman", "peter.lammich@uni-muenster.de" -> "Peter Lammich", "peter@hoefner-online.de" -> "Peter Höfner", "psterraf@unc.edu.ar" -> "Pedro Sánchez Terraf", "ralph.bottesch@uibk.ac.at" -> "Ralph Bottesch", "reza.sefidgar@inf.ethz.ch" -> "S. Reza Sefidgar", "richter@informatik.rwth-aachen.de" -> "Stefan Richter", "roelofoosterhuis@gmail.com" -> "Roelof Oosterhuis", "rok@strnisa.com" -> "Rok Strniša", "rosskops@in.tum.de" -> "Simon Roßkopf", "s.griebel@tum.de" -> "Simon Griebel", "s.j.c.joosten@utwente.nl" -> "Sebastiaan J. C. Joosten", "samo@dtu.dk" -> "Sebastian Mödersheim", "schirmer@in.tum.de" -> "Norbert Schirmer", "sidney.amani@data61.csiro.au" -> "Sidney Amani", "sjcjoosten@gmail.com" -> "Sebastiaan J. C. Joosten", "stepan.starosta@fit.cvut.cz" -> "Štěpán Starosta", "Stephan.Merz@loria.fr" -> "Stephan Merz", "sterraf@famaf.unc.edu.ar" -> "Pedro Sánchez Terraf", "stourret@mpi-inf.mpg.de" -> "Sophie Tourret", "stvienna@gmail.com" -> "Stephan Adelsberger", "susannahej@gmail.com" -> "Susannah Mansky", "tdardini@student.ethz.ch" -> "Thibault Dardinier", "thibault.dardinier@inf.ethz.ch" -> "Thibault Dardinier", "tim@tbrk.org" -> "Timothy Bourke", "toby.murray@unimelb.edu.au" -> "Toby Murray", "traytel@di.ku.dk" -> "Dmitriy Traytel", "traytel@in.tum.de" -> "Dmitriy Traytel", "traytel@inf.ethz.ch" -> "Dmitriy Traytel", "tsato@c.titech.ac.jp" -> "Tetsuya Sato", "uuomul@yahoo.com" -> "Andrei Popescu", "walter.guttman@canterbury.ac.nz" -> "Walter Guttmann", "walter.guttmann@canterbury.ac.nz" -> "Walter Guttmann", "wimmers@in.tum.de" -> "Simon Wimmer", "wl302@cam.ac.uk" -> "Wenda Li", "yongkiat@cs.cmu.edu" -> "Yong Kiam Tan", "Yutaka.Nagashima@data61.csiro.au" -> "Yutaka Nagashima") var dates = List( "2020-14-04" -> "2020-04-14", "2020-15-04" -> "2020-04-15") var overwrite = false val getopts = Getopts(""" Usage: isabelle afp_migrate_metadata [OPTIONS] Options are: -B DIR afp base dir (default "$AFP_BASE") -n FROM,TO names to convert (default: """ + names.mkString("\n ") + """) -e EMAIL,AUTHOR emails to associate (default: """ + emails.mkString("\n ") + """) -d FROM,TO date strings to convert (default: """ + dates.mkString("\n ") + """) -f overwrite existing Migrates old sitegen metadata to new format. """, "B:" -> (arg => base_dir = Path.explode(arg)), "n:" -> (arg => names ::= arg.splitAt(arg.indexOf(","))), "e:" -> (arg => emails ::= arg.splitAt(arg.indexOf(","))), "d:" -> (arg => dates ::= arg.splitAt(arg.indexOf(","))), "f" -> (_ => overwrite = true) ) getopts(args) val progress = new Console_Progress() val context = new Context(names.toMap, emails.toMap, dates.toMap) val options = Options.init() migrate_metadata( base_dir = base_dir, context = context, overwrite = overwrite, options = options, progress = progress) } ) } \ No newline at end of file diff --git a/tools/toml.scala b/tools/toml.scala --- a/tools/toml.scala +++ b/tools/toml.scala @@ -1,393 +1,393 @@ /* Author: Fabian Huch, TU Muenchen Support for TOML: https://toml.io/en/v1.0.0 */ package afp import isabelle._ import scala.collection.immutable.ListMap import scala.reflect.{ClassTag, classTag} import scala.util.Try import scala.util.matching.Regex import scala.util.parsing.combinator import scala.util.parsing.combinator.lexical.Scanners import scala.util.parsing.input.CharArrayReader.EofCh import java.time.{LocalDate, LocalDateTime, LocalTime, OffsetDateTime} object TOML { type Key = String type V = Any type T = Map[Key, V] object T { def apply(entries: (Key, V)*): T = ListMap(entries: _*) def apply(entries: List[(Key, V)]): T = ListMap(entries: _*) def unapply(t: Map[_, V]): Option[T] = { if (t.keys.forall(_.isInstanceOf[Key])) Some(t.asInstanceOf[T]) else None } } /* typed access */ def split_as[A: ClassTag](map: T): List[(Key, A)] = map.keys.toList.map(k => k -> get_as[A](map, k)) def optional_as[A: ClassTag](map: T, name: String): Option[A] = map.get(name) match { case Some(value: A) => Some(value) case Some(value) => error("Value " + quote(value.toString) + " not of type " + classTag[A].runtimeClass.getName) case None => None } def get_as[A: ClassTag](map: T, name: String): A = map.get(name) match { case Some(value: A) => value case Some(value) => error("Value " + quote(value.toString) + " not of type " + classTag[A].runtimeClass.getName) - case None => error("Field " + name + " not in " + map) + case None => error("Field " + name + " not in " + commas_quote(map.keys)) } /* lexer */ object Kind extends Enumeration { val KEYWORD, IDENT, STRING, ERROR = Value } sealed case class Token(kind: Kind.Value, text: String) { def is_ident: Boolean = kind == Kind.IDENT def is_keyword(name: String): Boolean = kind == Kind.KEYWORD && text == name def is_string: Boolean = kind == Kind.STRING def is_error: Boolean = kind == Kind.ERROR } object Lexer extends Scanners with Scan.Parsers { override type Elem = Char type Token = TOML.Token def errorToken(msg: String): Token = Token(Kind.ERROR, msg) val white_space: String = " \t\n\r" override val whiteSpace: Regex = ("[" + white_space + "]+").r override def comment: Parser[String] = '#' ~>! rep(elem("", c => c != EofCh && c != '\n' && c != '\r')) ^^ (s => s.mkString) override def whitespace: Parser[Any] = rep(comment | many1(character(white_space.contains(_)))) def keyword: Parser[Token] = ("[[" | "]]" | one(character("{}[],=.".contains(_)))) ^^ (s => Token(Kind.KEYWORD, s)) def ident: Parser[Token] = many1(character(c => Symbol.is_ascii_digit(c) || Symbol.is_ascii_letter(c) || c == '_' || c == '-')) ^^ (s => Token(Kind.IDENT, s)) def basic_string: Parser[Token] = '\"' ~> rep(string_body) <~ '\"' ^^ (cs => Token(Kind.STRING, cs.mkString)) def multiline_basic_string: Parser[Token] = "\"\"\"" ~>! rep(multiline_string_body | ("\"" | "\"\"") ~ multiline_string_body ^^ { case s ~ t => s + t }) <~ "\"\"\"" ^^ (cs => Token(Kind.STRING, cs.mkString.stripPrefix("\n"))) def multiline_string_body: Parser[Char] = elem("", c => c == '\t' || c == '\n' || c == '\r' || (c > '\u001f' && c != '\"' && c != '\\' && c != EofCh)) | '\\' ~> string_escape def string_body: Parser[Char] = elem("", c => c > '\u001f' && c != '\"' && c != '\\' && c != EofCh) | '\\' ~> string_escape def string_escape: Parser[Char] = elem("", "\"\\/".contains(_)) | elem("", "bfnrt".contains(_)) ^^ { case 'b' => '\b' case 'f' => '\f' case 'n' => '\n' case 'r' => '\r' case 't' => '\t' } | 'u' ~> repeated(character("0123456789abcdefABCDEF".contains(_)), 4, 4) ^^ (s => Integer.parseInt(s, 16).toChar) def string_failure: Parser[Token] = '\"' ~> failure("Unterminated string") def token: Parser[Token] = keyword | ident | (multiline_basic_string | basic_string | string_failure) | failure("Illegal character") } /* parser */ trait Parsers extends combinator.Parsers { type Elem = Token def keys: Parser[List[Key]] = rep1sep(key, $$$(".")) def string: Parser[String] = elem("string", _.is_string) ^^ (_.text) def integer: Parser[Int] = token("integer", _.is_ident, _.toInt) def float: Parser[Double] = token("float", _.is_ident, _.toDouble) def boolean: Parser[Boolean] = token("boolean", _.is_ident, _.toBoolean) def offset_date_time: Parser[OffsetDateTime] = token("offset date-time", _.is_ident, OffsetDateTime.parse) def local_date_time: Parser[LocalDateTime] = token("local date-time", _.is_ident, LocalDateTime.parse) def local_date: Parser[LocalDate] = token("local date", _.is_ident, LocalDate.parse) def local_time: Parser[LocalTime] = token("local time", _.is_ident, LocalTime.parse) def array: Parser[V] = $$$("[") ~>! repsep(toml_value, $$$(",")) <~ opt($$$(",")) ~ $$$("]") def table: Parser[T] = $$$("[") ~>! (keys <~ $$$("]")) ~! content ^? { case base ~ T(map) => to_map(List(base -> map)) } def inline_table: Parser[V] = $$$("{") ~>! (content ^? to_map) <~ $$$("}") def array_of_tables: Parser[T] = $$$("[[") ~>! (keys <~ $$$("]]")) >> { ks => repsep(content ^^ to_map, $$$("[[") ~! (keys ^? { case ks1 if ks1 == ks => }) ~ $$$("]]")) ^^ { list => List(ks -> list) } ^? to_map } def toml: Parser[T] = (content ~ rep(table | array_of_tables)) ^? { case T(map) ~ maps => map :: maps } ^? { case Merge(map) => map } /* auxiliary parsers */ private def $$$(name: String): Parser[Token] = elem(name, _.is_keyword(name)) private def token[A](name: String, p: Token => Boolean, parser: String => A): Parser[A] = elem(name, p) ^^ (tok => Try { parser(tok.text) }) ^? { case util.Success(v) => v } private def key: Parser[Key] = elem("key", e => e.is_ident || e.is_string) ^^ (_.text) private def toml_value: Parser[V] = string | integer | float | boolean | offset_date_time | local_date_time | local_date | local_time | array | inline_table private def content: Parser[List[(List[Key], V)]] = rep((keys <~ $$$("=")) ~! toml_value ^^ { case ks ~ v => ks -> v }) /* extractors */ private object T { def unapply(table: List[(List[Key], V)]): Option[T] = { val by_first_key = table.foldLeft(ListMap.empty[Key, List[(List[Key], V)]]) { case (map, (k :: ks, v)) => map.updatedWith(k) { case Some(value) => Some(value :+ (ks, v)) case None => Some(List((ks, v))) } case _ => return None } val res = by_first_key.map { case (k, (Nil, v) :: Nil) => k -> v case (k, T(map)) => k -> map case _ => return None } Some(res) } } private def to_map: PartialFunction[List[(List[Key], V)], T] = { case T(map) => map } object Merge { private def merge(map1: T, map2: T): Option[T] = { val res2 = map2.map { case (k2, v2) => map1.get(k2) match { case Some(v1) => (v1, v2) match { case (TOML.T(t1), TOML.T(t2)) => k2 -> merge(t1, t2).getOrElse(return None) case x => return None } case None => k2 -> v2 } } Some(map1.filter { case (k, _) => !map2.contains(k) } ++ res2) } def unapply(maps: List[T]): Option[T] = Some(maps.fold(Map.empty)(merge(_, _).getOrElse(return None))) } /* parse */ def parse(input: CharSequence): T = { val scanner = new Lexer.Scanner(Scan.char_reader(input)) val result = phrase(toml)(scanner) (result : @unchecked) match { case Success(toml, _) => toml case NoSuccess(_, next) => error("Malformed TOML input at " + next.pos) } } } object Parsers extends Parsers /* standard format */ def parse(s: String): T = Parsers.parse(s) /* format to a subset of TOML */ object Format { def apply(toml: T): String = { val result = new StringBuilder /* keys */ def key(k: Key): Unit = { val Bare_Key = """[A-Za-z0-9_-]+""".r k match { case Bare_Key() => result ++= k case _ => result += '"' result ++= k result += '"' } } def keys(ks: List[Key]): Unit = Library.separate(None, ks.reverse.map(Some(_))).foreach { case None => result += '.' case Some(k) => key(k) } /* string */ def basic_string(s: String): Unit = { result += '"' result ++= s.iterator.map { case '\b' => "\\b" case '\t' => "\\t" case '\n' => "\\n" case '\f' => "\\f" case '\r' => "\\r" case '"' => "\\\"" case '\\' => "\\\\" case c => if (c <= '\u001f' || c == '\u007f') "\\u%04x".format(c.toInt) else c }.mkString result += '"' } def multiline_basic_string(s: String): Unit = { result ++= "\"\"\"\n" result ++= s.iterator.map { case '\b' => "\\b" case '\t' => "\t" case '\n' => "\n" case '\f' => "\\f" case '\r' => "\r" case '"' => "\\\"" case '\\' => "\\\\" case c => if (c <= '\u001f' || c == '\u007f') "\\u%04x".format(c.toInt) else c }.mkString.replace("\"\"\"", "\"\"\\\"") result ++= "\"\"\"" } /* integer, boolean, offset date-time, local date-time, local date, local time */ object To_String { def unapply(v: V): Option[String] = v match { case _: Int | _: Double | _: Boolean | _: OffsetDateTime | _: LocalDateTime | _: LocalDate | _: LocalTime => Some(v.toString) case _ => None } } /* inline: string, float, To_String, value array */ def `inline`(v: V, indent: Int = 0): Unit = { def indentation(i: Int): Unit = for (_ <- Range(0, i)) result ++= " " indentation(indent) v match { case s: String => if (s.contains("\n") && s.length > 20) multiline_basic_string(s) else basic_string(s) case To_String(s) => result ++= s case list: List[V] => if (list.isEmpty) { result ++= "[]" } else { result ++= "[\n" list.foreach { elem => `inline`(elem, indent + 1) result ++= ",\n" } indentation(indent) result += ']' } case _ => error("Not inline TOML value: " + v.toString) } } /* array */ def inline_values(path: List[Key], v: V): Unit = { v match { case T(map) => map.foreach { case (k, v1) => inline_values(k :: path, v1) } case _ => keys(path) result ++= " = " `inline`(v) result += '\n' } } def is_inline(elem: V): Boolean = elem match { case To_String(_) | _: Double | _: String => true case list: List[V] => list.forall(is_inline) case _ => false } def array(path: List[Key], list: List[V]): Unit = { if (list.forall(is_inline)) inline_values(path.take(1), list) else { list.collect { case T(map) => result ++= "\n[[" keys(path) result ++= "]]\n" table(path, map, is_table_entry = true) case _ => error("Array can only contain either all tables or all non-tables") } } } /* table */ def table(path: List[Key], map: T, is_table_entry: Boolean = false): Unit = { val (values, nodes) = map.toList.partition(kv => is_inline(kv._2)) if (!is_table_entry && path.nonEmpty) { result ++= "\n[" keys(path) result ++= "]\n" } values.foreach { case (k, v) => inline_values(List(k), v) } nodes.foreach { case (k, T(map1)) => table(k :: path, map1) case (k, list: List[V]) => array(k :: path, list) case (_, v) => error("Invalid TOML: " + v.toString) } } table(Nil, toml) result.toString } } }