diff --git a/tools/afp_check_metadata.scala b/tools/afp_check_metadata.scala --- a/tools/afp_check_metadata.scala +++ b/tools/afp_check_metadata.scala @@ -1,179 +1,202 @@ /* Author: Fabian Huch, TU Muenchen Tool to check metadata consistency. */ package afp import isabelle.* import afp.Metadata.{Author, DOI, Email, Homepage, TOML, Topic} import isabelle.TOML.{parse, Format, Key, Table} object AFP_Check_Metadata { def diff(t1: Table, t2: Table): List[Key] = (t1.domain diff t2.domain).toList ++ t1.table.values.flatMap { case (k, tr1) => t2.table.get(k).toList.flatMap(diff(tr1, _)) } def afp_check_metadata( - strict: Boolean, - reformat: Boolean, - slow: Boolean, afp_structure: AFP_Structure, - verbose: Boolean, - progress: Progress + strict: Boolean = false, + reformat: Boolean = false, + format_all: Boolean = false, + slow: Boolean = false, + verbose: Boolean = false, + progress: Progress = new Progress ): Unit = { def warn(msg: String): Unit = if (strict) error(msg) else progress.echo_warning(msg) progress.echo_if(verbose, "Loading metadata...") val orig_authors = afp_structure.load_authors val orig_topics = afp_structure.load_topics val orig_licenses = afp_structure.load_licenses val orig_releases = afp_structure.load_releases val authors = orig_authors.map(author => author.id -> author).toMap val topics = Utils.grouped_sorted(orig_topics.flatMap(_.all_topics), (t: Topic) => t.id) val licenses = orig_licenses.map(license => license.id -> license).toMap val releases = orig_releases.groupBy(_.entry) val entries = afp_structure.entries.map(name => afp_structure.load_entry(name, authors, topics, licenses, releases)) /* TOML encoding/decoding */ def check_toml[A](kind: String, a: A, from: A => Table, to: Table => A): Unit = if (to(from(a)) != a) error("Inconsistent toml encode/decode: " + kind) progress.echo_if(verbose, "Checking toml conversions...") check_toml("authors", authors.values.toList, TOML.from_authors, TOML.to_authors) check_toml("topics", orig_topics, TOML.from_topics, TOML.to_topics) check_toml("licenses", licenses.values.toList, TOML.from_licenses, TOML.to_licenses) check_toml("releases", releases.values.flatten.toList, TOML.from_releases, TOML.to_releases) entries.foreach(entry => check_toml("entry " + entry.name, entry, TOML.from_entry, t => TOML.to_entry(entry.name, t, authors, topics, licenses, releases.getOrElse(entry.name, Nil)))) /* duplicate ids */ var seen_ids = Set.empty[String] def check_id(id: String): Unit = if (seen_ids.contains(id)) error("Duplicate id: " + id) else seen_ids += id progress.echo_if(verbose, "Checking for duplicate ids...") authors.values.foreach { author => check_id(author.id) author.emails.map(_.id).foreach(check_id) author.homepages.map(_.id).foreach(check_id) } topics.values.map(_.id).foreach(check_id) licenses.values.map(_.id).foreach(check_id) entries.map(_.name).foreach(check_id) /* unread fields */ progress.echo_if(verbose, "Checking for unused fields...") def check_unused_toml[A](file: Path, to: Table => A, from: A => Table): Unit = { val toml = parse(File.read(file)) val recoded = from(to(toml)) val diff_keys = diff(parse(File.read(file)), recoded) if (diff_keys.nonEmpty) warn("Unused fields: " + commas_quote(diff_keys)) } check_unused_toml(afp_structure.authors_file, TOML.to_authors, TOML.from_authors) check_unused_toml(afp_structure.topics_file, TOML.to_topics, TOML.from_topics) check_unused_toml(afp_structure.licenses_file, TOML.to_licenses, TOML.from_licenses) check_unused_toml(afp_structure.releases_file, TOML.to_releases, TOML.from_releases) entries.foreach(entry => check_unused_toml(afp_structure.entry_file(entry.name), t => TOML.to_entry(entry.name, t, authors, topics, licenses, releases.getOrElse(entry.name, Nil)), TOML.from_entry)) /* unused values */ def warn_unused(name: String, unused: Set[String]): Unit = if (unused.nonEmpty) warn("Extra (unused) " + name + ": " + commas_quote(unused.toList)) progress.echo_if(verbose, "Checking for unused values...") val all_affils = entries.flatMap(entry => entry.authors ++ entry.contributors ++ entry.notifies) warn_unused("authors", authors.keySet diff all_affils.map(_.author).toSet) def author_affil_id(author: Author.ID, affil: String): String = author + " " + affil val affils = authors.values.flatMap(author => (author.emails.map(_.id) ++ author.homepages.map(_.id)).map(author_affil_id(author.id, _))) val used_affils = all_affils.collect { case Email(author, id, _) => author_affil_id(author, id) case Homepage(author, id, _) => author_affil_id(author, id) } warn_unused("affiliations", affils.toSet diff used_affils.toSet) val leaf_topics = topics.values.filter(_.sub_topics.isEmpty).map(_.id) warn_unused("topics", leaf_topics.toSet diff entries.flatMap(_.topics).map(_.id).toSet) warn_unused("licenses", licenses.keySet diff entries.map(_.license.id).toSet) /* formatting of commonly patched files */ - if (reformat) afp_structure.save_authors(orig_authors) + if (reformat) { + afp_structure.save_authors(orig_authors) + + if (format_all) { + afp_structure.save_topics(orig_topics) + afp_structure.save_licenses(orig_licenses) + afp_structure.save_releases(orig_releases) + entries.foreach(afp_structure.save_entry) + } + } else { def check_toml_format(toml: Table, file: Path): Unit = { val present = File.read(file) val formatted = Format(toml) if (present != formatted) progress.echo_warning("Badly formatted toml: " + file) } progress.echo_if(verbose, "Checking formatting...") check_toml_format(TOML.from_authors(orig_authors), afp_structure.authors_file) + + if (format_all) { + check_toml_format(TOML.from_topics(topics.values.toList), afp_structure.topics_file) + check_toml_format(TOML.from_licenses(licenses.values.toList), afp_structure.licenses_file) + check_toml_format(TOML.from_releases(releases.values.toList.flatten), + afp_structure.releases_file) + entries.foreach(entry => + check_toml_format(TOML.from_entry(entry), afp_structure.entry_file(entry.name))) + } } /* extra */ if (slow) { progress.echo_if(verbose, "Checking DOIs...") entries.flatMap(entry => entry.related).collect { case d: DOI => d.formatted() } } progress.echo_if(verbose, "Checked " + authors.size + " authors with " + affils.size + " affiliations, " + topics.size + " topics, " + releases.values.flatten.size + " releases, " + licenses.size + " licenses, and " + entries.size + " entries.") } val isabelle_tool = Isabelle_Tool("afp_check_metadata", "Checks the AFP metadata files", Scala_Project.here, { args => var slow = false var reformat = false + var format_all = false var strict = false var verbose = false val getopts = Getopts(""" Usage: isabelle afp_check_metadata [OPTIONS] Options are: + -a check formatting of all metadata -s activate slow checks -v verbose -R reformat metadata files -S strict mode (fail on warnings) Check AFP metadata files for consistency. """, + "a" -> (_ => format_all = true), "s" -> (_ => slow = true), "v" -> (_ => verbose = true), "R" -> (_ => reformat = true), "S" -> (_ => strict = true)) getopts(args) val progress = new Console_Progress() val afp_structure = AFP_Structure() - afp_check_metadata(strict, reformat, slow, afp_structure, verbose, progress) + afp_check_metadata(afp_structure, strict = strict, reformat = reformat, format_all = format_all, + slow = slow, verbose = verbose, progress = progress) }) } diff --git a/tools/afp_structure.scala b/tools/afp_structure.scala --- a/tools/afp_structure.scala +++ b/tools/afp_structure.scala @@ -1,126 +1,129 @@ /* Author: Fabian Huch, TU Muenchen AFP Metadata file structure with save and load operations. */ package afp import isabelle.* class AFP_Structure private(val base_dir: Path) { /* files */ val metadata_dir = base_dir + Path.basic("metadata") val thys_dir = AFP.main_dir(base_dir) def entry_thy_dir(name: Metadata.Entry.Name): Path = thys_dir + Path.basic(name) val authors_file = metadata_dir + Path.basic("authors.toml") val releases_file = metadata_dir + Path.basic("releases.toml") val licenses_file = metadata_dir + Path.basic("licenses.toml") val topics_file = metadata_dir + Path.basic("topics.toml") val entries_dir = metadata_dir + Path.basic("entries") def entry_file(name: Metadata.Entry.Name): Path = entries_dir + Path.basic(name + ".toml") /* load */ private def load[A](file: Path, parser: isabelle.TOML.Table => A): A = { val content = File.read(file) val toml = try { TOML.parse(content) } catch { case ERROR(msg) => error("Could not parse " + file.toString + ": " + msg) } parser(toml) } def load_authors: List[Metadata.Author] = load(authors_file, Metadata.TOML.to_authors) def load_releases: List[Metadata.Release] = load(releases_file, Metadata.TOML.to_releases) def load_licenses: List[Metadata.License] = load(licenses_file, Metadata.TOML.to_licenses) def load_topics: List[Metadata.Topic] = load(topics_file, Metadata.TOML.to_topics) def load_entry(name: Metadata.Entry.Name, authors: Map[Metadata.Author.ID, Metadata.Author], topics: Map[Metadata.Topic.ID, Metadata.Topic], licenses: Map[Metadata.License.ID, Metadata.License], releases: Map[Metadata.Entry.Name, List[Metadata.Release]] ): Metadata.Entry = { val entry_releases = releases.getOrElse(name, Nil) load(entry_file(name), toml => Metadata.TOML.to_entry(name, toml, authors, topics, licenses, entry_releases)) } def load(): List[Metadata.Entry] = { val authors = load_authors.map(author => author.id -> author).toMap val topics = Utils.grouped_sorted(load_topics.flatMap(_.all_topics), (t: Metadata.Topic) => t.id) val licenses = load_licenses.map(license => license.id -> license).toMap val releases = load_releases.groupBy(_.entry) entries.map(name => load_entry(name, authors, topics, licenses, releases)) } /* save */ private def save(file: Path, content: isabelle.TOML.Table): Unit = { file.dir.file.mkdirs() File.write(file, TOML.Format(content)) } def save_authors(authors: List[Metadata.Author]): Unit = save(authors_file, Metadata.TOML.from_authors(authors)) def save_releases(releases: List[Metadata.Release]): Unit = save(releases_file, Metadata.TOML.from_releases(releases)) def save_topics(topics: List[Metadata.Topic]): Unit = save(topics_file, Metadata.TOML.from_topics(topics)) + def save_licenses(licenses: List[Metadata.License]): Unit = + save(licenses_file, Metadata.TOML.from_licenses(licenses)) + def save_entry(entry: Metadata.Entry): Unit = save(entry_file(entry.name), Metadata.TOML.from_entry(entry)) /* sessions */ def entries_unchecked: List[Metadata.Entry.Name] = { val Entry = """([a-zA-Z0-9+_-]+)\.toml""".r File.read_dir(entries_dir).map { case Entry(name) => name case f => error("Unrecognized file in metadata: " + f) } } def entries: List[Metadata.Entry.Name] = { val session_entries = Sessions.parse_roots(thys_dir + Path.basic("ROOTS")) val session_set = session_entries.toSet val metadata_set = entries_unchecked.toSet if (session_set != metadata_set) { val inter = session_set.intersect(metadata_set) val session_without_metadata = if (session_set.subsetOf(inter)) "" else "No metadata for session in ROOTS: " + commas_quote(session_set -- inter) val metadata_without_session = if (metadata_set.subsetOf(inter)) "" else "Metadata entries missing in ROOTS: " + commas_quote(metadata_set -- inter) error("Metadata does not match sessions:\n" + session_without_metadata + metadata_without_session) } else session_entries } def sessions_structure: Sessions.Structure = Sessions.load_structure(options = Options.init(), select_dirs = List(thys_dir)) def entry_sessions(name: Metadata.Entry.Name): List[Sessions.Session_Entry] = Sessions.parse_root(thys_dir + Path.make(List(name, "ROOT"))).collect { case e: Sessions.Session_Entry => e } def hg_id: String = Mercurial.repository(base_dir).id() } object AFP_Structure { def apply(base_dir: Path = AFP.BASE): AFP_Structure = new AFP_Structure(base_dir.absolute) }