diff --git a/src/Pure/General/date.scala b/src/Pure/General/date.scala --- a/src/Pure/General/date.scala +++ b/src/Pure/General/date.scala @@ -1,106 +1,122 @@ /* Title: Pure/General/date.scala Author: Makarius Date and time, with time zone. */ package isabelle import java.util.Locale import java.time.{Instant, ZonedDateTime, LocalTime, ZoneId, OffsetDateTime} import java.time.format.{DateTimeFormatter, DateTimeParseException} import java.time.temporal.TemporalAccessor import scala.annotation.tailrec +import scala.math.Ordering object Date { /* format */ object Format { def make(fmts: List[DateTimeFormatter], tune: String => String = s => s): Format = { require(fmts.nonEmpty, "no date formats") new Format { def apply(date: Date): String = fmts.head.format(date.rep) def parse(str: String): Date = new Date(ZonedDateTime.from(Formatter.try_variants(fmts, tune(str)))) } } def apply(pats: String*): Format = make(pats.toList.map(Date.Formatter.pattern)) val default: Format = Format("dd-MMM-uuuu HH:mm:ss xx") val date: Format = Format("dd-MMM-uuuu") val time: Format = Format("HH:mm:ss") val alt_date: Format = Format("uuuuMMdd") } abstract class Format private { def apply(date: Date): String def parse(str: String): Date def unapply(str: String): Option[Date] = try { Some(parse(str)) } catch { case _: DateTimeParseException => None } } object Formatter { def pattern(pat: String): DateTimeFormatter = DateTimeFormatter.ofPattern(pat) def variants(pats: List[String], locs: List[Locale] = Nil): List[DateTimeFormatter] = pats.flatMap(pat => { val fmt = pattern(pat) if (locs.isEmpty) List(fmt) else locs.map(fmt.withLocale) }) @tailrec def try_variants(fmts: List[DateTimeFormatter], str: String, last_exn: Option[DateTimeParseException] = None): TemporalAccessor = { fmts match { case Nil => throw last_exn.getOrElse(new DateTimeParseException("Failed to parse date", str, 0)) case fmt :: rest => try { ZonedDateTime.from(fmt.parse(str)) } catch { case exn: DateTimeParseException => try_variants(rest, str, Some(exn)) } } } } + /* ordering */ + + object Ordering extends scala.math.Ordering[Date] + { + def compare(date1: Date, date2: Date): Int = + date1.instant.compareTo(date2.instant) + } + + object Rev_Ordering extends scala.math.Ordering[Date] + { + def compare(date1: Date, date2: Date): Int = + date2.instant.compareTo(date1.instant) + } + + /* date operations */ def timezone_utc: ZoneId = ZoneId.of("UTC") def timezone_berlin: ZoneId = ZoneId.of("Europe/Berlin") def timezone(): ZoneId = ZoneId.systemDefault def now(zone: ZoneId = timezone()): Date = new Date(ZonedDateTime.now(zone)) def instant(t: Instant, zone: ZoneId = timezone()): Date = new Date(ZonedDateTime.ofInstant(t, zone)) def apply(t: Time, zone: ZoneId = timezone()): Date = instant(t.instant, zone) } sealed case class Date(rep: ZonedDateTime) { def midnight: Date = new Date(ZonedDateTime.of(rep.toLocalDate, LocalTime.MIDNIGHT, rep.getZone)) def to(zone: ZoneId): Date = new Date(rep.withZoneSameInstant(zone)) def unix_epoch: Long = rep.toEpochSecond def instant: Instant = Instant.from(rep) def time: Time = Time.instant(instant) def timezone: ZoneId = rep.getZone def format(fmt: Date.Format = Date.Format.default): String = fmt(this) override def toString: String = format() } diff --git a/src/Pure/General/mailman.scala b/src/Pure/General/mailman.scala --- a/src/Pure/General/mailman.scala +++ b/src/Pure/General/mailman.scala @@ -1,346 +1,393 @@ /* Title: Pure/General/mailman.scala Author: Makarius Support for Mailman list servers. */ package isabelle import java.net.URL import scala.annotation.tailrec import scala.util.matching.Regex import scala.util.matching.Regex.Match object Mailman { /* mailing list messages */ sealed case class Message( name: String, date: Date, title: String, author_name: String, author_address: String, body: String) { + override def toString: String = name def print: String = name + "\n" + date + "\n" + title + "\n" + quote(author_name) + "\n" + "<" + author_address + ">\n" } private class Message_Chunk(bg: String = "", en: String = "") { def unapply(lines: List[String]): Option[List[String]] = { val res1 = if (bg.isEmpty) Some(lines) else { lines.dropWhile(_ != bg) match { case Nil => None case _ :: rest => Some(rest) } } if (en.isEmpty) res1 else { res1 match { case None => None case Some(lines1) => val lines2 = lines1.takeWhile(_ != en) if (lines1.drop(lines2.length).isEmpty) None else Some(lines2) } } } def get(lines: List[String]): List[String] = unapply(lines) getOrElse error("Missing delimiters:" + (if (bg.nonEmpty) " " else "") + bg + (if (en.nonEmpty) " " else "") + en) } + /* integrity check */ + + def check_messages(msgs: List[Message]): Unit = + { + val msgs_sorted = msgs.sortBy(_.date)(Date.Ordering) + val msgs_proper = + msgs_sorted.filter(msg => msg.author_name.nonEmpty && msg.author_address.nonEmpty) + + val address_name = Multi_Map.from(msgs_proper.map(msg => (msg.author_address, msg.author_name))) + val name_address = Multi_Map.from(msgs_proper.map(msg => (msg.author_name, msg.author_address))) + + def print_dup(a: String, bs: List[String]): String = + " * " + a + bs.mkString("\n ", "\n ", "") + + def print_dups(title: String, m: Multi_Map[String, String]): Unit = + { + val dups = m.iterator_list.filter(p => p._2.length > 1).toList + if (dups.nonEmpty) { + Output.writeln(cat_lines(title :: dups.map(p => print_dup(p._1, p._2)))) + } + } + + print_dups("duplicate names:", address_name) + print_dups("duplicate addresses:", name_address) + + def get_name(msg: Message): Option[String] = + proper_string(msg.author_name) orElse address_name.get(msg.author_address) + + val unnamed = + msgs_sorted.flatMap(msg => if (get_name(msg).isEmpty) Some(msg.author_address) else None) + .toSet.toList.sorted + + if (unnamed.nonEmpty) { + Output.writeln(("unnamed:" :: unnamed).mkString("\n", "\n ", "")) + } + } + + /* mailing list archives */ abstract class Archive( url: URL, name: String = "", tag: String = "") { def message_regex: Regex def message_content(name: String, lines: List[String]): Message def message_match(lines: List[String], re: Regex): Option[Match] = lines.flatMap(re.findFirstMatchIn(_)).headOption def make_name(str: String): String = str.replaceAll("""\s+""", " ") def make_address(s: String): String = HTML.input(s).replace(" at ", "@") def make_body(lines: List[String]): String = cat_lines(Library.take_suffix[String](_.isEmpty, lines)._1) private val main_url: URL = Url(Library.take_suffix[Char](_ == '/', Url.trim_index(url).toString.toList)._1.mkString + "/") private val main_html = Url.read(main_url) val list_name: String = { val title = """The ([^</>]*) Archives""".r.findFirstMatchIn(main_html).map(_.group(1)) (proper_string(name) orElse title).getOrElse(error("Failed to determine mailing list name")) } override def toString: String = list_name def full_name(href: String): String = list_name + "/" + href def list_tag: String = proper_string(tag).getOrElse(list_name) def read_text(href: String): String = Url.read(new URL(main_url, href)) def hrefs_text: List[String] = """href="([^"]+\.txt(?:\.gz)?)"""".r.findAllMatchIn(main_html).map(_.group(1)).toList def hrefs_msg: List[String] = (for { href <- """href="([^"]+)/date.html"""".r.findAllMatchIn(main_html).map(_.group(1)) html = read_text(href + "/date.html") msg <- message_regex.findAllMatchIn(html).map(_.group(1)) } yield href + "/" + msg).toList def get(target_dir: Path, href: String, progress: Progress = new Progress): Option[Path] = { val dir = target_dir + Path.basic(list_name) val path = dir + Path.explode(href) val url = new URL(main_url, href) val connection = url.openConnection try { val length = connection.getContentLengthLong val timestamp = connection.getLastModified val file = path.file if (file.isFile && file.length == length && file.lastModified == timestamp) None else { Isabelle_System.make_directory(path.dir) progress.echo("Getting " + url) val bytes = using(connection.getInputStream)(Bytes.read_stream(_, hint = length.toInt max 1024)) Bytes.write(file, bytes) file.setLastModified(timestamp) Some(path) } } finally { connection.getInputStream.close() } } def download_text(target_dir: Path, progress: Progress = new Progress): List[Path] = hrefs_text.flatMap(get(target_dir, _, progress = progress)) def download_msg(target_dir: Path, progress: Progress = new Progress): List[Path] = hrefs_msg.flatMap(get(target_dir, _, progress = progress)) def download(target_dir: Path, progress: Progress = new Progress): List[Path] = download_text(target_dir, progress = progress) ::: download_msg(target_dir, progress = progress) def make_title(str: String): String = { val Trim1 = ("""\s*\Q[""" + list_tag + """]\E\s*(.*)""").r val Trim2 = """(?i:(?:re|fw|fwd)\s*:\s*)(.*)""".r val Trim3 = """\[\s*(.*?)\s*\]""".r @tailrec def trim(s: String): String = s match { case Trim1(s1) => trim(s1) case Trim2(s1) => trim(s1) case Trim3(s1) => trim(s1) case _ => s } trim(str) } def get_messages(): List[Message] = for (href <- hrefs_msg) yield message_content(href, split_lines(read_text(href))) def find_messages(dir: Path): List[Message] = { for { file <- File.find_files(dir.file, file => file.getName.endsWith(".html")) rel_path <- File.relative_path(dir, File.path(file)) } yield { val name = full_name(rel_path.implode) message_content(name, split_lines(File.read(file))) } } } /* Isabelle mailing lists */ object Isabelle_Users extends Archive( Url("https://lists.cam.ac.uk/pipermail/cl-isabelle-users"), name = "isabelle-users", tag = "isabelle") { override def message_regex: Regex = """
  • """.r private object Head extends Message_Chunk(bg = "", en = "") private object Body extends Message_Chunk(bg = "", en = "") private object Date_Format { private val Trim1 = """\w+,\s*(.*)""".r private val Trim2 = """(.*?)\s*\(.*\)""".r private val Format = Date.Format( "d MMM uuuu H:m:s Z", "d MMM uuuu H:m:s z", "d MMM yy H:m:s Z", "d MMM yy H:m:s z") def unapply(s: String): Option[Date] = { val s0 = s.replaceAll("""\s+""", " ") val s1 = s0 match { case Trim1(s1) => s1 case _ => s0 } val s2 = s1 match { case Trim2(s2) => s2 case _ => s1 } Format.unapply(s2) } } override def make_name(str: String): String = { val Trim = """(.*?)\s*via\s+Cl-isabelle-users""".r super.make_name(str) match { case Trim(s) => s case s => s } } + override def make_address(str: String): String = + { + val s = super.make_address(make_name(str)) + if (s == "cl-isabelle-users@lists.cam.ac.uk") "" else s + } + object Address { private def anchor(s: String): String = """""" + s + """""" private def angl(s: String): String = """<""" + s + """>""" private def quot(s: String): String = """"""" + s + """"""" private def paren(s: String): String = """\(""" + s + """\)""" private val adr = """([^<>]*? at [^<>]*?)""" private val any = """([^<>]*?)""" private val spc = """\s*""" private val Name1 = quot(anchor(any)).r private val Name2 = quot(any).r private val Name_Adr1 = (quot(anchor(any)) + spc + angl(anchor(adr))).r private val Name_Adr2 = (quot(any) + spc + angl(anchor(adr))).r private val Name_Adr3 = (anchor(any) + spc + angl(anchor(adr))).r private val Name_Adr4 = (any + spc + angl(anchor(adr))).r private val Adr_Name1 = (angl(anchor(adr)) + spc + paren(any)).r private val Adr_Name2 = (anchor(adr) + spc + paren(any)).r private val Adr1 = angl(anchor(adr)).r private val Adr2 = anchor(adr).r def unapply(s: String): Option[(String, String)] = s match { case Name1(a) => Some((make_address(a), "")) case Name2(a) => Some((make_address(a), "")) case Name_Adr1(a, b) => Some((make_address(a), make_address(b))) case Name_Adr2(a, b) => Some((make_address(a), make_address(b))) case Name_Adr3(a, b) => Some((make_address(a), make_address(b))) case Name_Adr4(a, b) => Some((make_address(a), make_address(b))) case Adr_Name1(b, a) => Some((make_address(a), make_address(b))) case Adr_Name2(b, a) => Some((make_address(a), make_address(b))) case Adr1(a) => Some(("", make_address(a))) case Adr2(a) => Some(("", make_address(a))) case _ => None } } override def message_content(name: String, lines: List[String]): Message = { def err(msg: String = ""): Nothing = error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg)) val (head, body) = try { (Head.get(lines), make_body(Body.get(lines))) } catch { case ERROR(msg) => err(msg) } val date = message_match(head, """
  • Date:\s*(.*?)\s*
  • """.r) .map(m => HTML.input(m.group(1))) match { case Some(Date_Format(d)) => d case Some(s) => err("Malformed Date: " + quote(s)) case None => err("Missing Date") } val title = make_title( HTML.input(message_match(head, """
  • Subject:\s*(.*)
  • """.r) .getOrElse(err("Missing Subject")).group(1))) val (author_name, author_address) = message_match(head, """
  • From:\s*(.*?)\s*
  • """.r) match { case None => err("Missing From") - case Some(m) => Address.unapply(m.group(1)) getOrElse err("Malformed From") + case Some(m) => + val (a, b) = Address.unapply(m.group(1)) getOrElse err("Malformed From") + (if (a == b) "" else a, b) } Message(name, date, title, author_name, author_address, body) } } object Isabelle_Dev extends Archive(Url("https://mailmanbroy.in.tum.de/pipermail/isabelle-dev")) { override def message_regex: Regex = """
  • """.r private object Head extends Message_Chunk(en = "") private object Body extends Message_Chunk(bg = "", en = "") private object Date_Format { val Format = Date.Format("E MMM d H:m:s z uuuu") def unapply(s: String): Option[Date] = Format.unapply(s.replaceAll("""\s+""", " ")) } override def message_content(name: String, lines: List[String]): Message = { def err(msg: String = ""): Nothing = error("Malformed message: " + name + (if (msg.isEmpty) "" else "\n" + msg)) val (head, body) = try { (Head.get(lines), make_body(Body.get(lines))) } catch { case ERROR(msg) => err(msg) } val date = message_match(head, """\s*(.*)""".r).map(m => HTML.input(m.group(1))) match { case Some(Date_Format(d)) => d case Some(s) => err("Malformed Date: " + quote(s)) case None => err("Missing Date") } val (title, author_address) = { message_match(head, """TITLE="([^"]+)">(.*)""".r) match { case Some(m) => (make_title(HTML.input(m.group(1))), make_address(HTML.input(m.group(2)))) case None => err("Missing TITLE") } } val author_name = message_match(head, """\s*(.*)""".r) match { case None => err("Missing author") case Some(m) => make_name(HTML.input(m.group(1))) } Message(name, date, title, author_name, author_address, body) } } }