diff --git a/tools/afp_site_gen.scala b/tools/afp_site_gen.scala --- a/tools/afp_site_gen.scala +++ b/tools/afp_site_gen.scala @@ -1,345 +1,349 @@ /* Author: Fabian Huch, TU Muenchen Generation and compilation of SSG project for the AFP website. */ package afp import isabelle._ import afp.Metadata.{Affiliation, Author, Email, Homepage} object AFP_Site_Gen { /* json */ object JSON { type T = isabelle.JSON.T object Object { type T = isabelle.JSON.Object.T } def from_authors(authors: List[Metadata.Author]): T = { authors.map(author => author.id -> isabelle.JSON.Object( "name" -> author.name, "emails" -> author.emails.map(_.address), "homepages" -> author.homepages.map(_.url))).toMap } def from_topics(topics: List[Metadata.Topic]): T = topics.map(topic => isabelle.JSON.Object(topic.name -> from_topics(topic.sub_topics))) def from_affiliations(affiliations: List[Metadata.Affiliation]): Object.T = { Utils.group_sorted(affiliations, (a: Metadata.Affiliation) => a.author).view.mapValues( { author_affiliations => val homepage = author_affiliations.collectFirst { case Metadata.Homepage(_, _, url) => url } val email = author_affiliations.collectFirst { case Metadata.Email(_, _, address) => address } isabelle.JSON.Object( homepage.map(s => List("homepage" -> s)).getOrElse(Nil) ::: email.map(s => List("email" -> s)).getOrElse(Nil): _*) }).toMap } def from_change_history(history: Metadata.Change_History): Object.T = { if (history.isEmpty) { Map.empty } else { Map("Change history" -> history.map { case (date, str) => "[" + date.toString + "] " + str }.mkString("\n")) } } def from_entry(entry: Metadata.Entry): JSON.Object.T = isabelle.JSON.Object( "title" -> entry.title :: "authors" -> entry.authors.map(_.author).distinct :: "affiliations" -> from_affiliations(entry.authors ++ entry.contributors) :: (if (entry.contributors.nonEmpty) "contributors" -> entry.contributors.map(_.author).distinct :: Nil else Nil) ::: "date" -> entry.date.toString :: "topics" -> entry.topics.map(_.id) :: "abstract" -> entry.`abstract` :: "license" -> entry.license.name :: (if (entry.releases.nonEmpty) "releases" -> entry.releases.map(r => r.isabelle -> r.date.toString).toMap :: Nil else Nil) ::: (if (entry.note.nonEmpty) "note" -> entry.note :: Nil else Nil) ::: (if (entry.change_history.nonEmpty || entry.extra.nonEmpty) "extra" -> (from_change_history(entry.change_history) ++ entry.extra) :: Nil else Nil): _*) def from_keywords(keywords: List[String]): T = keywords.zipWithIndex.map { case (keyword, i) => isabelle.JSON.Object("id" -> i, "keyword" -> keyword) } } /* keyword extraction */ private val replacements = List( "<[^>]*>".r -> "", "[^\\w\\s()'.,;:-]".r -> " ", "\\s+".r -> " ") def extract_keywords(text: String): List[String] = { val stripped_text = replacements.foldLeft(text) { case (res, (regex, replacement)) => regex.replaceAllIn(res, replacement) } val arg = quote(stripped_text.replaceAll("\"", "\\\"")) val keyword_cmd = "from keywords import *; print_keywords(" + arg + ")" Python.run(keyword_cmd).check.out_lines } /* site generation */ def afp_site_gen( layout: Hugo.Layout, status_file: Option[Path], afp_structure: AFP_Structure, clean: Boolean = false, progress: Progress = new Progress() ): Unit = { /* clean old */ if (clean) { progress.echo("Cleaning up generated files...") layout.clean() } /* add topics */ progress.echo("Preparing topics...") val topics = afp_structure.load_topics def sub_topics(topic: Metadata.Topic): List[Metadata.Topic] = topic :: topic.sub_topics.flatMap(sub_topics) val topics_by_id = Utils.grouped_sorted(topics.flatMap(sub_topics), (t: Metadata.Topic) => t.id) layout.write_data(Path.basic("topics.json"), JSON.from_topics(topics)) /* add licenses */ progress.echo("Preparing licenses...") val licenses_by_id = Utils.grouped_sorted(afp_structure.load_licenses, (l: Metadata.License) => l.id) /* add releases */ progress.echo("Preparing releases...") val releases_by_entry = afp_structure.load_releases.groupBy(_.entry) /* prepare authors and entries */ progress.echo("Preparing authors...") val full_authors = afp_structure.load_authors val authors_by_id = Utils.grouped_sorted(full_authors, (a: Metadata.Author) => a.id) var seen_affiliations = Set.empty[Affiliation] - val entries = afp_structure.entries.map { name => + val entries = afp_structure.entries.flatMap { name => val entry = afp_structure.load_entry(name, authors_by_id, topics_by_id, licenses_by_id, releases_by_entry) - seen_affiliations ++= entry.authors ++ entry.contributors - entry + + if (entry.sitegen_ignore) None + else { + seen_affiliations ++= entry.authors ++ entry.contributors + Some(entry) + } } val authors = Utils.group_sorted(seen_affiliations.toList, (a: Affiliation) => a.author).map { case (id, affiliations) => val emails = affiliations.collect { case e: Email => e } val homepages = affiliations.collect { case h: Homepage => h } Author(id, authors_by_id(id).name, emails, homepages) } layout.write_data(Path.basic("authors.json"), JSON.from_authors(authors.toList)) /* extract keywords */ progress.echo("Extracting keywords...") var seen_keywords = Set.empty[String] val entry_keywords = entries.map { entry => val Keyword = """\('([^']*)', ([^)]*)\)""".r val scored_keywords = extract_keywords(entry.`abstract`).map { case Keyword(keyword, score) => keyword -> score.toDouble case s => error("Could not parse: " + s) } seen_keywords ++= scored_keywords.map(_._1) entry.name -> scored_keywords.filter(_._2 > 1.0).map(_._1) }.toMap seen_keywords = seen_keywords.filter(k => !k.endsWith("s") || !seen_keywords.contains(k.stripSuffix("s"))) layout.write_static(Path.make(List("data", "keywords.json")), JSON.from_keywords(seen_keywords.toList)) def get_keywords(name: Metadata.Entry.Name): List[String] = entry_keywords(name).filter(seen_keywords.contains).take(8) /* add entries and theory listings */ progress.echo("Preparing entries...") val sessions_structure = afp_structure.sessions_structure val sessions_deps = Sessions.deps(sessions_structure) entries.foreach { entry => val deps = for { session <- afp_structure.entry_sessions(entry.name) dep <- sessions_structure.imports_graph.imm_preds(session.name) if session.name != dep && sessions_structure(dep).groups.contains("AFP") } yield dep val theories = afp_structure.entry_sessions(entry.name).map { session => val base = sessions_deps(session.name) val theories = base.session_theories.map(_.theory_base_name) val session_json = isabelle.JSON.Object( "title" -> session.name, "entry" -> entry.name, "url" -> ("/theories/" + session.name.toLowerCase), "theories" -> theories) layout.write_content(Path.make(List("theories", session.name + ".md")), session_json) isabelle.JSON.Object("session" -> session.name, "theories" -> theories) } val entry_json = JSON.from_entry(entry) ++ isabelle.JSON.Object( "dependencies" -> deps.distinct, "sessions" -> theories, "url" -> ("/entries/" + entry.name + ".html"), "keywords" -> get_keywords(entry.name)) layout.write_content(Path.make(List("entries", entry.name + ".md")), entry_json) } /* add authors */ /* add statistics */ progress.echo("Preparing statistics...") val statistics_cmd = "from statistics import *; add_statistics(" + commas_quote( List( Path.explode("$AFP_BASE").absolute.implode, Path.explode("$AFP").absolute.implode, layout.data_dir.implode)) + ")" Python.run(statistics_cmd).check (layout.data_dir + Path.basic("statistics.html")).file.delete() /* project */ progress.echo("Preparing project files") layout.copy_project() /* status */ status_file match { case Some(status_file) => progress.echo("Preparing devel version...") val status_json = isabelle.JSON.parse(File.read(status_file)) layout.write_data(Path.basic("status.json"), status_json) case None => } progress.echo("Finished sitegen preparation.") } /* build site */ def afp_build_site( out_dir: Path, layout: Hugo.Layout, do_watch: Boolean = false, clean: Boolean = false, progress: Progress = new Progress() ): Unit = { if (clean) { progress.echo("Cleaning output dir...") Hugo.clean(out_dir) } if (do_watch) { Hugo.watch(layout, out_dir, progress).check } else { progress.echo("Building site...") Hugo.build(layout, out_dir).check progress.echo("Build in " + (out_dir + Path.basic("index.html")).absolute.implode) } } /* tool wrapper */ val isabelle_tool = Isabelle_Tool("afp_site_gen", "generates afp website source", Scala_Project.here, { args => var base_dir = Path.explode("$AFP_BASE") var status_file: Option[Path] = None var hugo_dir = base_dir + Path.make(List("web", "hugo")) var out_dir: Path = base_dir + Path.make(List("web", "out")) var build_only = false var devel_mode = false var fresh = false val getopts = Getopts(""" Usage: isabelle afp_site_gen [OPTIONS] Options are: -B DIR afp base dir (default """" + base_dir.implode + """") -D FILE build status file for devel version -H DIR generated hugo project dir (default """" + hugo_dir.implode + """") -O DIR output dir for build (default """ + out_dir.implode + """) -b build only -d devel mode (overrides hugo dir, builds site in watch mode) -f fresh build: clean up existing hugo and build directories Generates the AFP website source. HTML files of entries are dynamically loaded. Providing a status file will build the development version of the archive. Site will be built from generated source if output dir is specified. """, "B:" -> (arg => base_dir = Path.explode(arg)), "D:" -> (arg => status_file = Some(Path.explode(arg))), "H:" -> (arg => hugo_dir = Path.explode(arg)), "O:" -> (arg => out_dir = Path.explode(arg)), "b" -> (_ => build_only = true), "d" -> (_ => devel_mode = true), "f" -> (_ => fresh = true)) getopts(args) if (devel_mode) hugo_dir = base_dir + Path.make(List("admin", "site")) val afp_structure = AFP_Structure(base_dir) val layout = Hugo.Layout(hugo_dir) val progress = new Console_Progress() if (!build_only) { progress.echo("Preparing site generation in " + hugo_dir.implode) afp_site_gen(layout = layout, status_file = status_file, afp_structure = afp_structure, clean = fresh, progress = progress) } afp_build_site(out_dir = out_dir, layout = layout, do_watch = devel_mode, clean = fresh, progress = progress) }) } \ No newline at end of file diff --git a/tools/metadata.scala b/tools/metadata.scala --- a/tools/metadata.scala +++ b/tools/metadata.scala @@ -1,260 +1,264 @@ /* Author: Fabian Huch, TU Muenchen AFP metadata model and TOML serialization. */ package afp import isabelle._ import afp.TOML._ import java.time.LocalDate object Metadata { sealed abstract class Affiliation(val author: Author.ID) case class Unaffiliated(override val author: Author.ID) extends Affiliation(author) case class Email(override val author: Author.ID, id: Email.ID, address: String) extends Affiliation(author) object Email { type ID = String } case class Homepage(override val author: Author.ID, id: Homepage.ID, url: String) extends Affiliation(author) object Homepage { type ID = String } case class Author( id: Author.ID, name: String, emails: List[Email], homepages: List[Homepage] ) object Author { type ID = String } case class Topic(id: Topic.ID, name: String, sub_topics: List[Topic]) object Topic { type ID = String } type Date = LocalDate object Isabelle { type Version = String } case class Release(entry: Entry.Name, date: Date, isabelle: Isabelle.Version) case class License(id: License.ID, name: String) object License { type ID = String } type Change_History = Map[Date, String] type Extra = Map[String, String] case class Entry( name: Entry.Name, title: String, authors: List[Affiliation], date: Date, topics: List[Topic], `abstract`: String, notifies: List[Email], contributors: List[Affiliation], license: License, note: String, change_history: Change_History, extra: Extra, - releases: List[Release]) + releases: List[Release], + sitegen_ignore: Boolean) object Entry { type Name = String } /* toml */ object TOML { private def by_id[A](elems: Map[String, A], id: String): A = elems.getOrElse(id, error("Elem " + quote(id) + " not found in " + commas_quote(elems.map(_.toString)))) /* author */ def from_authors(authors: List[Author]): T = { def from_author(author: Author): T = T( "name" -> author.name, "emails" -> T(author.emails.map(email => email.id -> email.address)), "homepages" -> T(author.homepages.map(homepage => homepage.id -> homepage.url))) T(authors.map(author => author.id -> from_author(author))) } def to_authors(authors: T): List[Author] = { def to_author(author_id: Author.ID, author: T): Author = { val emails = split_as[String](get_as[T](author, "emails")) map { case (id, address) => Email(author = author_id, id = id, address = address) } val homepages = split_as[String](get_as[T](author, "homepages")) map { case (id, url) => Homepage(author = author_id, id = id, url = url) } Author( id = author_id, name = get_as[String](author, "name"), emails = emails, homepages = homepages ) } split_as[T](authors).map { case (id, author) => to_author(id, author) } } /* topics */ def from_topics(root_topics: List[Topic]): T = T(root_topics.map(t => t.name -> from_topics(t.sub_topics))) def to_topics(root_topics: T): List[Topic] = { def to_topics_rec(topics: T, root: Topic.ID): List[Topic] = split_as[T](topics).map { case (name, sub_topics) => val id = (if (root.nonEmpty) root + "/" else "") + name Topic(id, name, to_topics_rec(sub_topics, id)) } to_topics_rec(root_topics, "") } /* releases */ def from_releases(releases: List[Release]): T = T(Utils.group_sorted(releases, (r: Release) => r.entry).view.mapValues { entry_releases => T(entry_releases.map(r => r.isabelle -> r.date)) }.toList) def to_releases(map: T): List[Release] = split_as[T](map).flatMap { case (entry, releases) => split_as[Date](releases).map { case (version, date) => Release(entry = entry, date = date, isabelle = version) } } /* affiliation */ def from_affiliations(affiliations: List[Affiliation]): T = T(Utils.group_sorted(affiliations, (a: Affiliation) => a.author).view.mapValues(vs => T(vs.collect { case Email(_, id, _) => "email" -> id case Homepage(_, id, _) => "homepage" -> id })).toList) def to_affiliations(affiliations: T, authors: Map[Author.ID, Author]): List[Affiliation] = { def to_affiliation(affiliation: (String, String), author: Author): Affiliation = { affiliation match { case ("email", id: String) => author.emails.find(_.id == id) getOrElse error("Email not found: " + quote(id)) case ("homepage", id: String) => author.homepages.find(_.id == id) getOrElse error("Homepage not found: " + quote(id)) case e => error("Unknown affiliation type: " + e) } } split_as[T](affiliations).flatMap { case (id, author_affiliations) => val author = by_id(authors, id) if (author_affiliations.isEmpty) List(Unaffiliated(author.id)) else split_as[String](author_affiliations).map(to_affiliation(_, author)) } } def from_emails(emails: List[Email]): T = T(emails.map(email => email.author -> email.id)) def to_emails(emails: T, authors: Map[Author.ID, Author]): List[Email] = emails.toList.map { case (author, id: String) => by_id(authors, author).emails.find(_.id == id) getOrElse error("Email not found: " + quote(id)) case e => error("Unknown email: " + quote(e.toString)) } /* license */ def from_licenses(licenses: List[License]): T = T(licenses.map(license => license.id -> T("name" -> license.name))) def to_licenses(licenses: T): List[License] = { split_as[T](licenses) map { case (id, license) => License(id, get_as[String](license, "name")) } } def to_license(license: String, licenses: Map[License.ID, License]): License = licenses.getOrElse(license, error("No such license: " + quote(license))) /* history */ def from_change_history(change_history: Change_History): T = change_history.map { case (date, str) => date.toString -> str } def to_change_history(change_history: T): Change_History = change_history.map { case (date, entry: String) => LocalDate.parse(date) -> entry case e => error("Unknown history entry: " + quote(e.toString)) } /* entry */ - def from_entry(entry: Entry): T = + def from_entry(entry: Entry): T = { T( "title" -> entry.title, "authors" -> from_affiliations(entry.authors), "contributors" -> from_affiliations(entry.contributors), "date" -> entry.date, "topics" -> entry.topics.map(_.id), "abstract" -> entry.`abstract`, "notify" -> from_emails(entry.notifies), "license" -> entry.license.id, "note" -> entry.note, "history" -> from_change_history(entry.change_history), - "extra" -> entry.extra) + "extra" -> entry.extra) ++ + (if (entry.sitegen_ignore) T("sitegen_ignore" -> true) else T()) + } def to_entry(entry: T, authors: Map[Author.ID, Author], topics: Map[Topic.ID, Topic], licenses: Map[License.ID, License], releases: List[Release]): Entry = Entry( name = get_as[String](entry, "name"), title = get_as[String](entry, "title"), authors = to_affiliations(get_as[T](entry, "authors"), authors), date = get_as[Date](entry, "date"), topics = get_as[List[String]](entry, "topics").map(by_id(topics, _)), `abstract` = get_as[String](entry, "abstract"), notifies = to_emails(get_as[T](entry, "notify"), authors), contributors = to_affiliations(get_as[T](entry, "contributors"), authors), license = to_license(get_as[String](entry, "license"), licenses), note = get_as[String](entry, "note"), change_history = to_change_history(get_as[T](entry, "history")), extra = get_as[Extra](entry, "extra"), - releases = releases) + releases = releases, + sitegen_ignore = optional_as[Boolean](entry, "sitegen_ignore").getOrElse(false)) } } diff --git a/tools/migration/afp_migrate_metadata.scala b/tools/migration/afp_migrate_metadata.scala --- a/tools/migration/afp_migrate_metadata.scala +++ b/tools/migration/afp_migrate_metadata.scala @@ -1,594 +1,599 @@ package afp.migration import isabelle._ import afp._ import afp.Metadata.{TOML => _, _} import scala.collection.BufferedIterator import java.io.BufferedReader import java.text.Normalizer import java.time.LocalDate import java.net.URI import org.jline.utils.InputStreamReader object AFP_Migrate_Metadata { private class Context( names_mapping: Map[String, String], email_names: Map[String, String], dates_mapping: Map[String, String] ) { /* mappings */ def transform_name(name: String): String = names_mapping.getOrElse(name, name) def parse_date(date: String): Metadata.Date = LocalDate.parse(dates_mapping.getOrElse(date, date)) def name(address: String): String = email_names.getOrElse(address, error("No name for address " + address)) /* seen */ private var _seen_authors = Set.empty[Author.ID] private var _seen_emails = Set.empty[Email.ID] private var _seen_homepages = Set.empty[Homepage.ID] private var _seen_licenses = Map.empty[License.ID, License] private var _seen_author_names = Map.empty[String, Author] def seen_authors: Set[Author.ID] = _seen_authors def seen_emails: Set[Email.ID] = _seen_emails def seen_homepages: Set[Homepage.ID] = _seen_homepages def author(name: String): Option[Author] = _seen_author_names.get(name) def authors: List[Author] = _seen_author_names.values.toList def licenses: List[License] = _seen_licenses.values.toList def update_author(author: Author): Unit = { _seen_authors += author.id _seen_author_names = _seen_author_names.updated(author.name, author) } def email(author: Author.ID): Email.ID = { val id = unique_id(author + "_email", this.seen_emails) _seen_emails += id id } def homepage(homepage: Homepage.ID): Homepage.ID = { val id = unique_id(homepage + "_homepage", this.seen_homepages) _seen_homepages += id id } def license(license_str: String): License = { val license = License(license_str.toLowerCase, license_str) _seen_licenses += license.id -> license license } } private def is_email(url: String) = url.contains("@") private def as_ascii(str: String) = { var res: String = str List("ö" -> "oe", "ü" -> "ue", "ä" -> "ae", "ß" -> "ss").foreach { case (c, rep) => res = res.replace(c, rep) } res = Normalizer.normalize(res, Normalizer.Form.NFD).replaceAll("[^\\x00-\\x7F]", "") if (res.exists(_ > 127)) error("Contained non convertible non-ascii character: " + str) res } def unique_id(prefix: String, ids: Set[String]): String = { if (!ids.contains(prefix)) prefix else { var num = 1 while (ids.contains(prefix + num)) { num += 1 } prefix + num } } def private_dom(full_dom: String): String = { val stream = getClass.getClassLoader.getResourceAsStream("public_suffix_list.dat") val reader = new BufferedReader(new InputStreamReader(stream)) val public_suffixes = File.read_lines(reader, _ => ()).filterNot(_.startsWith("//")) val stripped = public_suffixes.map(full_dom.stripSuffix(_)).minBy(_.length) if (stripped.endsWith(".")) stripped.dropRight(1) else stripped } def make_author_id(name: String, context: Context): String = { val normalized = as_ascii(name) val Suffix = """^.*?([a-zA-Z]*)$""".r val suffix = normalized match { case Suffix(suffix) => suffix case _ => "" } val Prefix = """^([a-zA-Z]*).*$""".r val prefix = normalized.stripSuffix(suffix) match { case Prefix(prefix) => prefix case _ => "" } var ident = suffix.toLowerCase for { c <- prefix.toLowerCase } { if (context.seen_authors.contains(ident)) { ident += c.toString } else return ident } unique_id(ident, context.seen_authors) } def author_urls(name_urls: String, context: Context): (String, List[String]) = { val name = AFP.trim_mail(name_urls) val urls_string = name_urls.stripPrefix(name).trim val transformed = context.transform_name(name) if (urls_string == "<>") { (transformed, List("")) } else { (transformed, urls_string.split(Array('<', '>')).toList.filterNot(_.isBlank)) } } def add_email(author: Author, address: String, context: Context): (Author, Email) = { val email = Email(author = author.id, id = context.email(author.id), address = address) val update_author = author.copy(emails = author.emails :+ email) context.update_author(update_author) (update_author, email) } def get_affiliations(name_url: String, context: Context): List[Affiliation] = { val (name, urls) = author_urls(name_url, context) val author = context.author(name).getOrElse(error("Author not found: " + name)) urls.map { url => if (is_email(url)) { val address = url.stripPrefix("mailto:") author.emails.find(_.address == address) getOrElse error("Email not found: " + (author, address)) } else if (url.isEmpty) { Unaffiliated(author.id) } else { author.homepages.find(_.url == url) getOrElse error("Url not found for: " + (author, url)) } } } def get_email_affiliation(address: String, context: Context, progress: Progress): Email = { context.authors.flatMap(_.emails).find(_.address == address) match { case Some(email) => email case None => val name = context.name(address) val author = context.author(name) match { case Some(author) => author case None => progress.echo_warning("New author: " + name) Author(make_author_id(name, context), name, Nil, Nil) } add_email(author, address, context)._2 } } def update_author(name_urls: String, context: Context): Unit = { val (name, urls) = author_urls(name_urls, context) var author = context.author(name) match { case Some(author) => author case None => Author(make_author_id(name, context), name, Nil, Nil) } urls.foreach { url => if (is_email(url)) { val address = url.stripPrefix("mailto:") if (!author.emails.exists(_.address == address)) { author = add_email(author, address, context)._1 } } else if (url.nonEmpty && !author.homepages.exists(_.url == url)) { val homepage = Homepage(author = author.id, id = context.homepage(author.id), url = url) author = author.copy(homepages = author.homepages :+ homepage) } } context.update_author(author) } def map_entry( entry: AFP.Entry, releases: Map[Entry.Name, List[Release]], topics: Map[Topic.ID, Topic], + sitegen_ignore: Boolean, context: Context, progress: Progress ): Entry = { val author_affiliations = entry.authors.flatMap(get_affiliations(_, context)) val contributor_affiliations = entry.contributors.flatMap(get_affiliations(_, context)) val notify_emails = entry.get_strings("notify").map(get_email_affiliation(_, context, progress)) val change_history = parse_change_history(entry.get_string("extra-history"), context) val extra = entry.metadata.filter { case (k, _) => k.startsWith("extra-") && k != "extra-history" } Entry( name = entry.name, title = entry.title, authors = author_affiliations, date = LocalDate.from(entry.date.rep), topics = entry.topics.map(topics), `abstract` = entry.`abstract`, notifies = notify_emails, contributors = contributor_affiliations, license = context.license(entry.license), note = entry.get_string("note"), change_history = change_history, extra = extra.toMap, - releases = releases(entry.name) + releases = releases(entry.name), + sitegen_ignore = sitegen_ignore ) } def parse_change_history(history: String, context: Context): Change_History = { val matches = """\[(\d{4}-\d{2}-\d{2})]: ([^\[]+)""".r.findAllMatchIn(history.stripPrefix("Change history:")) matches.toList.map(_.subgroups match { case date :: content :: Nil => context.parse_date(date) -> content case _ => error("Could not parse change history: " + quote(history)) }).toMap } def parse_topics(lines: List[String]): List[Topic] = { val lines_iterator: BufferedIterator[String] = lines.filterNot(_.isBlank).iterator.buffered def get_indent(line: String) = line.takeWhile(_.isWhitespace).length def parse_level(level: Int, root: Option[Topic.ID]): List[Topic] = { val name = lines_iterator.next().trim val id = root.map(_ + "/").getOrElse("") + name val (sub_topics, next_topics) = lines_iterator.headOption match { case Some(next) if get_indent(next) == level + 2 => val sub = parse_level(level + 2, Some(id)) val next = lines_iterator.headOption match { case Some(next1) if get_indent(next1) == level => parse_level(level, root) case _ => Nil } (sub, next) case Some(next) if get_indent(next) == level => (Nil, parse_level(level, root)) case _ => (Nil, Nil) } Topic(id = id, name = name, sub_topics = sub_topics) :: next_topics } parse_level(0, None) } def parse_releases( releases: List[String], isabelle_releases: List[String], all_entries: List[Entry.Name], context: Context ): List[Release] = { val Isa_Release = """(.+) = (.+)""".r val release_dates = isabelle_releases.filterNot(_.isBlank).map { case Isa_Release(isabelle_version, date) => context.parse_date(date) -> isabelle_version case line => error("Could not parse: " + quote(line)) } val current = release_dates.last def to_release(entry: Entry.Name, release_date: LocalDate): Release = Release(entry, release_date, release_dates.findLast { case (date, _) => date.isBefore(release_date) }.get._2) val Entry_Release = """afp-([a-zA-Z0-9_+-]+)-(\d{4}-\d{2}-\d{2})\.tar\.gz""".r val entry_releases = Utils.group_sorted( releases.filterNot(_.isBlank).map { case Entry_Release(entry, date_string) => val date = context.parse_date(date_string) entry -> to_release(entry, date) case line => error("Could not parse: " + quote(line)) }, (e: (Entry.Name, Release)) => e._1) all_entries.flatMap(e => entry_releases.getOrElse(e, Nil).map(_._2) :+ to_release(e, current._1)) } def migrate_metadata( base_dir: Path, overwrite: Boolean, context: Context, options: Options = Options.init(), progress: Progress = new Progress() ): Unit = { val metadata = AFP.init(options, base_dir) val metadata_dir = base_dir + Path.basic("metadata") def read(file: Path): String = File.read(metadata_dir + file) def write(toml: TOML.T, file: Path) = { val path = metadata_dir + file if (!overwrite && path.file.exists) error("File already exists: " + path.file_name) else path.dir.file.mkdirs() File.write(path, TOML.Format(toml)) } /* topics */ progress.echo("Parsing topics...") val root_topics = parse_topics(split_lines(read(Path.basic("topics")))) def sub_topics(topic: Topic): List[Topic] = topic :: topic.sub_topics.flatMap(sub_topics) val topic_map = root_topics.flatMap(sub_topics).map(topic => topic.id -> topic).toMap write(Metadata.TOML.from_topics(root_topics), Path.basic("topics.toml")) /* releases */ progress.echo("Parsing releases...") val releases = parse_releases( split_lines(read(Path.basic("releases"))), split_lines(read(Path.basic("release-dates"))), metadata.entries.map(_.name), context) val releases_map = releases.groupBy(_.entry) write(Metadata.TOML.from_releases(releases), Path.basic("releases.toml")) /* collect authors (without notify affiliations) */ progress.echo("Collecting authors...") for { entry <- metadata.entries name_url <- entry.authors ++ entry.contributors } update_author(name_url, context) /* entries */ progress.echo("Parsing entries...") for (entry_metadata <- metadata.entries) { - val entry = map_entry(entry_metadata, releases_map, topic_map, context, progress) + val ignore_file = base_dir + Path.make(List("thys", entry_metadata.name, ".sitegen-ignore")) + val sitegen_ignore = ignore_file.file.exists + + val entry = map_entry(entry_metadata, releases_map, topic_map, sitegen_ignore, context, progress) write(Metadata.TOML.from_entry(entry), Path.make(List("entries", entry.name + ".toml"))) } /* licenses */ write(Metadata.TOML.from_licenses(context.licenses), Path.basic("licenses.toml")) /* authors */ write(Metadata.TOML.from_authors(context.authors), Path.basic("authors.toml")) } val isabelle_tool = Isabelle_Tool("afp_migrate_metadata", "migrates old sitegen metadata to new format", Scala_Project.here, { args => var base_dir = Path.explode("$AFP_BASE") var names = List( "Lawrence C Paulson" -> "Lawrence C. Paulson", "Lawrence Paulson" -> "Lawrence C. Paulson", "Florian Kammueller" -> "Florian Kammüller", "Jasmin Blanchette" -> "Jasmin Christian Blanchette", "Ognjen Maric" -> "Ognjen Marić", "Maximilian P.L. Haslbeck" -> "Maximilian P. L. Haslbeck", "Maximilian Haslbeck" -> "Max W. Haslbeck", "Sebastiaan Joosten" -> "Sebastiaan J. C. Joosten", "Jacques Fleuriot" -> "Jacques D. Fleuriot", "Jose Manuel Rodriguez Caballero" -> "José Manuel Rodríguez Caballero") var emails = List( "a.bentkamp@vu.nl" -> "Alexander Bentkamp", "a.popescu@mdx.ac.uk" -> "Andrei Popescu", "a.popescu@sheffield.ac.uk" -> "Andrei Popescu", "afp@liftm.de" -> "Julius Michaelis", "ahfrom@dtu.dk" -> "Asta Halkjær From", "ak2110@cam.ac.uk" -> "Angeliki Koutsoukou-Argyraki", "akihisayamada@nii.ac.jp" -> "Akihisa Yamada", "aleje@dtu.dk" -> "Alexander Birch Jensen", "alexander.katovsky@cantab.net" -> "Alexander Katovsky", "alexander.maletzky@risc-software.at" -> "Alexander Maletzky", "alexander.maletzky@risc.jku.at" -> "Alexander Maletzky", "andreas.lochbihler@digitalasset.com" -> "Andreas Lochbihler", "andreasvhess@gmail.com" -> "Andreas V. Hess", "andschl@dtu.dk" -> "Anders Schlichtkrull", "apdb3@cam.ac.uk" -> "Anthony Bordg", "avigad@cmu.edu" -> "Jeremy Avigad", "ballarin@in.tum.de" -> "Clemens Ballarin", "bdd@liftm.de" -> "Julius Michaelis", "benblumson@gmail.com" -> "Ben Blumson", "berghofe@in.tum.de" -> "Stefan Berghofer", "bjbohrer@gmail.com" -> "Brandon Bohrer", "boehmes@in.tum.de" -> "Sascha Böhme", "brianh@cs.pdx.edu" -> "Brian Huffman", "brunnerj@in.tum.de" -> "Julian Brunner", "bzhan@ios.ac.cn" -> "Bohua Zhan", "c.benzmueller@fu-berlin.de" -> "Christoph Benzmüller", "c.benzmueller@gmail.com" -> "Christoph Benzmüller", "caw77@cam.ac.uk" -> "Conrad Watt", "cezary.kaliszyk@uibk.ac.at" -> "Cezary Kaliszyk", "christian.sternagel@uibk.ac.at" -> "Christian Sternagel", "christian.urban@kcl.ac.uk" -> "Christian Urban", "cle47@cam.ac.uk" -> "Chelsea Edmonds", "coglio@kestrel.edu" -> "Alessandro Coglio", "corey.lewis@data61.csiro.au" -> "Corey Lewis", "d1623001@s.konan-u.ac.jp" -> "Fumiya Iwama", "danijela@matf.bg.ac.rs" -> "Danijela Simić", "denis.lohner@kit.edu" -> "Denis Lohner", "denis.nikif@gmail.com" -> "Denis Nikiforov", "diego.marmsoler@tum.de" -> "Diego Marmsoler", "diekmann@net.in.tum.de" -> "Cornelius Diekmann", "dubut@nii.ac.jp" -> "Jérémy Dubut", "eminkarayel@google.com" -> "Emin Karayel", "f.smola@sms.ed.ac.uk" -> "Filip Smola", "fadouaghourabi@gmail.com" -> "Fadoua Ghourabi", "fimmler@andrew.cmu.edu" -> "Fabian Immler", "fimmler@cs.cmu.edu" -> "Fabian Immler", "fkjac@dtu.dk" -> "Frederik Krogsdal Jacobsen", "florian.haftmann@informatik.tu-muenchen.de" -> "Florian Haftmann", "florian.kammuller@gmail.com" -> "Florian Kammüller", "friedrich.kurz@tum.de" -> "Friedrich Kurz", "ftuong@lri.fr" -> "Frédéric Tuong", "g.struth@dcs.shef.ac.uk" -> "Georg Struth", "ggrov@inf.ed.ac.uk" -> "Gudmund Grov", "giamp@dmi.unict.it" -> "Giampaolo Bella", "giuliano@losa.fr" -> "Giuliano Losa", "Harald.Zankl@uibk.ac.at" -> "Harald Zankl", "haslbecm@in.tum.de" -> "Max W. Haslbeck", "haslbema@in.tum.de" -> "Maximilian P. L. Haslbeck", "heimesl@student.ethz.ch" -> "Lukas Heimes", "hetzl@logic.at" -> "Stefan Hetzl", "hirata.m.ac@m.titech.ac.jp" -> "Michikazu Hirata", "holub@karlin.mff.cuni.cz" -> "Štěpán Holub", "Ian.Hayes@itee.uq.edu.au" -> "Ian J. Hayes", "isabelleopenflow@liftm.de" -> "Julius Michaelis", "Jacques.Fleuriot@ed.ac.uk" -> "Jacques D. Fleuriot", "jason.jaskolka@carleton.ca" -> "Jason Jaskolka", "jdf@ed.ac.uk" -> "Jacques D. Fleuriot", "jeremy.sylvestre@ualberta.ca" -> "Jeremy Sylvestre", "jesus-maria.aransay@unirioja.es" -> "Jesús Aransay", "jonjulian23@gmail.com" -> "Jonathan Julian Huerta y Munive", "jose.divason@unirioja.es" -> "Jose Divasón", "jose.divasonm@unirioja.es" -> "Jose Divasón", "joseph-thommes@gmx.de" -> "Joseph Thommes", "jovi@dtu.dk" -> "Jørgen Villadsen", "jsiek@indiana.edu" -> "Jeremy Siek", "jsylvest@ualberta.ca" -> "Jeremy Sylvestre", "julian.nagele@uibk.ac.at" -> "Julian Nagele", "julian.parsert@uibk.ac.at" -> "Julian Parsert", "kevin.kappelmann@tum.de" -> "Kevin Kappelmann", "kkz@mit.edu" -> "Karen Zee", "kleing@unsw.edu.au" -> "Gerwin Klein", "krauss@in.tum.de" -> "Alexander Krauss", "kreuzerk@in.tum.de" -> "Katharina Kreuzer", "lars@hupel.info" -> "Lars Hupel", "lcp@cl.cam.ac.uk" -> "Lawrence C. Paulson", "lennart.beringer@ifi.lmu.de" -> "Lennart Beringer", "liwenda1990@hotmail.com" -> "Wenda Li", "m.raszyk@gmail.com" -> "Martin Raszyk", "mail@andreas-lochbihler.de" -> "Andreas Lochbihler", "mail@michael-herzberg.de" -> "Michael Herzberg", "maintainafpppt@liftm.de" -> "Julius Michaelis", "maksym.bortin@nicta.com.au" -> "Maksym Bortin", "manuel.eberl@tum.de" -> "Manuel Eberl", "martin.desharnais@unibw.de" -> "Martin Desharnais", "mathias.fleury@jku.at" -> "Mathias Fleury", "matthias.brun@inf.ethz.ch" -> "Matthias Brun", "max.haslbeck@gmx.de" -> "Max W. Haslbeck", "maximilian.haslbeck@uibk.ac.at" -> "Max W. Haslbeck", "me@eminkarayel.de" -> "Emin Karayel", "MichaelNedzelsky@yandex.ru" -> "Michael Nedzelsky", "miguel.pagano@unc.edu.ar" -> "Miguel Pagano", "mihailsmilehins@gmail.com" -> "Mihails Milehins", "minamide@is.titech.ac.jp" -> "Yasuhiko Minamide", "mnacho.echenim@univ-grenoble-alpes.fr" -> "Mnacho Echenim", "mnfrd.krbr@gmail.com" -> "Manfred Kerber", "mohammad.abdulaziz8@gmail.com" -> "Mohammad Abdulaziz", "mohammad.abdulaziz@in.tum.de" -> "Mohammad Abdulaziz", "mr644@cam.ac.uk" -> "Michael Rawson", "mrtnrau@googlemail.com" -> "Martin Rau", "nanjiang@whu.edu.cn" -> "Nan Jiang", "Nicolas.Peltier@imag.fr" -> "Nicolas Peltier", "nipkow@in.tum.de" -> "Tobias Nipkow", "noschinl@gmail.com" -> "Lars Noschinski", "pagano@famaf.unc.edu.ar" -> "Miguel Pagano", "pc@cs.st-andrews.ac.uk" -> "Peter Chapman", "peter.lammich@uni-muenster.de" -> "Peter Lammich", "peter@hoefner-online.de" -> "Peter Höfner", "psterraf@unc.edu.ar" -> "Pedro Sánchez Terraf", "ralph.bottesch@uibk.ac.at" -> "Ralph Bottesch", "reza.sefidgar@inf.ethz.ch" -> "S. Reza Sefidgar", "richter@informatik.rwth-aachen.de" -> "Stefan Richter", "roelofoosterhuis@gmail.com" -> "Roelof Oosterhuis", "rok@strnisa.com" -> "Rok Strniša", "rosskops@in.tum.de" -> "Simon Roßkopf", "s.griebel@tum.de" -> "Simon Griebel", "s.j.c.joosten@utwente.nl" -> "Sebastiaan J. C. Joosten", "samo@dtu.dk" -> "Sebastian Mödersheim", "schirmer@in.tum.de" -> "Norbert Schirmer", "sidney.amani@data61.csiro.au" -> "Sidney Amani", "sjcjoosten@gmail.com" -> "Sebastiaan J. C. Joosten", "stepan.starosta@fit.cvut.cz" -> "Štěpán Starosta", "Stephan.Merz@loria.fr" -> "Stephan Merz", "sterraf@famaf.unc.edu.ar" -> "Pedro Sánchez Terraf", "stourret@mpi-inf.mpg.de" -> "Sophie Tourret", "stvienna@gmail.com" -> "Stephan Adelsberger", "susannahej@gmail.com" -> "Susannah Mansky", "tdardini@student.ethz.ch" -> "Thibault Dardinier", "tim@tbrk.org" -> "Timothy Bourke", "toby.murray@unimelb.edu.au" -> "Toby Murray", "traytel@di.ku.dk" -> "Dmitriy Traytel", "traytel@in.tum.de" -> "Dmitriy Traytel", "traytel@inf.ethz.ch" -> "Dmitriy Traytel", "tsato@c.titech.ac.jp" -> "Tetsuya Sato", "uuomul@yahoo.com" -> "Andrei Popescu", "walter.guttman@canterbury.ac.nz" -> "Walter Guttmann", "walter.guttmann@canterbury.ac.nz" -> "Walter Guttmann", "wimmers@in.tum.de" -> "Simon Wimmer", "wl302@cam.ac.uk" -> "Wenda Li", "yongkiat@cs.cmu.edu" -> "Yong Kiam Tan", "Yutaka.Nagashima@data61.csiro.au" -> "Yutaka Nagashima") var dates = List( "2020-14-04" -> "2020-04-14", "2020-15-04" -> "2020-04-15") var overwrite = false val getopts = Getopts(""" Usage: isabelle afp_migrate_metadata [OPTIONS] Options are: -B DIR afp base dir (default "$AFP_BASE") -n FROM,TO names to convert (default: """ + names.mkString("\n ") + """) -e EMAIL,AUTHOR emails to associate (default: """ + emails.mkString("\n ") + """) -d FROM,TO date strings to convert (default: """ + dates.mkString("\n ") + """) -f overwrite existing Migrates old sitegen metadata to new format. """, "B:" -> (arg => base_dir = Path.explode(arg)), "n:" -> (arg => names ::= arg.splitAt(arg.indexOf(","))), "e:" -> (arg => emails ::= arg.splitAt(arg.indexOf(","))), "d:" -> (arg => dates ::= arg.splitAt(arg.indexOf(","))), "f" -> (_ => overwrite = true) ) getopts(args) val progress = new Console_Progress() val context = new Context(names.toMap, emails.toMap, dates.toMap) val options = Options.init() migrate_metadata( base_dir = base_dir, context = context, overwrite = overwrite, options = options, progress = progress) } ) } \ No newline at end of file