diff --git a/src/Pure/Tools/build_log.scala b/src/Pure/Tools/build_log.scala --- a/src/Pure/Tools/build_log.scala +++ b/src/Pure/Tools/build_log.scala @@ -1,297 +1,357 @@ /* Title: Pure/Tools/build_log.scala Author: Makarius Build log parsing for historic versions, back to "build_history_base". */ package isabelle import java.time.ZonedDateTime import java.time.format.{DateTimeFormatter, DateTimeParseException} import scala.collection.mutable import scala.util.matching.Regex object Build_Log { /** settings **/ object Settings { val build_settings = List("ISABELLE_BUILD_OPTIONS") val ml_settings = List("ML_PLATFORM", "ML_HOME", "ML_SYSTEM", "ML_OPTIONS") val all_settings = build_settings ::: ml_settings type Entry = (String, String) type T = List[Entry] object Entry { def unapply(s: String): Option[Entry] = s.indexOf('=') match { case -1 => None case i => val a = s.substring(0, i) val b = Library.perhaps_unquote(s.substring(i + 1)) Some((a, b)) } def apply(a: String, b: String): String = a + "=" + quote(b) def getenv(a: String): String = apply(a, Isabelle_System.getenv(a)) } def show(): String = cat_lines( build_settings.map(Entry.getenv(_)) ::: List("") ::: ml_settings.map(Entry.getenv(_))) } /** log file **/ object Log_File { def apply(path: Path): Log_File = { val (path_name, ext) = path.expand.split_ext val text = if (ext == "gz") File.read_gzip(path) else if (ext == "xz") File.read_xz(path) else File.read(path) apply(path_name.base.implode, text) } def apply(name: String, lines: List[String]): Log_File = new Log_File(name, lines) def apply(name: String, text: String): Log_File = Log_File(name, Library.trim_split_lines(text)) } class Log_File private(val name: String, val lines: List[String]) { log_file => override def toString: String = name def text: String = cat_lines(lines) def err(msg: String): Nothing = error("Error in log file " + quote(name) + ": " + msg) /* inlined content */ def find[A](f: String => Option[A]): Option[A] = lines.iterator.map(f).find(_.isDefined).map(_.get) def find_match(regex: Regex): Option[String] = lines.iterator.map(regex.unapplySeq(_)).find(res => res.isDefined && res.get.length == 1). map(res => res.get.head) /* settings */ def get_setting(a: String): Settings.Entry = Settings.Entry.unapply( lines.find(_.startsWith(a + "=")) getOrElse err("missing " + a)).get def get_settings(as: List[String]): Settings.T = as.map(get_setting(_)) /* properties (YXML) */ val xml_cache = new XML.Cache() def parse_props(text: String): Properties.T = xml_cache.props(XML.Decode.properties(YXML.parse_body(text))) def filter_props(prefix: String): List[Properties.T] = for (line <- lines; s <- Library.try_unprefix(prefix, line)) yield parse_props(s) def find_line(prefix: String): Option[String] = find(Library.try_unprefix(prefix, _)) def find_props(prefix: String): Option[Properties.T] = find_line(prefix).map(parse_props(_)) /* parse various formats */ def parse_session_info( - session_name: String, + default_name: String = "", command_timings: Boolean = false, ml_statistics: Boolean = false, task_statistics: Boolean = false): Session_Info = Build_Log.parse_session_info( - log_file, session_name, command_timings, ml_statistics, task_statistics) + log_file, default_name, command_timings, ml_statistics, task_statistics) - def parse_header: Header = Build_Log.parse_header(log_file) + def parse_header(): Header = Build_Log.parse_header(log_file) - def parse_info: Info = Build_Log.parse_info(log_file) + def parse_build_info(): Build_Info = Build_Log.parse_build_info(log_file) } /* session log: produced by "isabelle build" */ sealed case class Session_Info( session_name: String, session_timing: Properties.T, command_timings: List[Properties.T], ml_statistics: List[Properties.T], task_statistics: List[Properties.T]) private def parse_session_info( log_file: Log_File, default_name: String, command_timings: Boolean, ml_statistics: Boolean, task_statistics: Boolean): Session_Info = { val xml_cache = new XML.Cache() val session_name = log_file.find_line("\fSession.name = ") match { case None => default_name case Some(name) if default_name == "" || default_name == name => name case Some(name) => log_file.err("log from different session " + quote(name)) } val session_timing = log_file.find_props("\fTiming = ") getOrElse Nil val command_timings_ = if (command_timings) log_file.filter_props("\fcommand_timing = ") else Nil val ml_statistics_ = if (ml_statistics) log_file.filter_props("\fML_statistics = ") else Nil val task_statistics_ = if (task_statistics) log_file.filter_props("\ftask_statistics = ") else Nil Session_Info(session_name, session_timing, command_timings_, ml_statistics_, task_statistics_) } /* header and meta data */ object Header_Kind extends Enumeration { val ISATEST = Value("isatest") val AFP_TEST = Value("afp-test") val JENKINS = Value("jenkins") } sealed case class Header( kind: Header_Kind.Value, props: Properties.T, settings: List[(String, String)]) object Field { val build_host = "build_host" val build_start = "build_start" val build_end = "build_end" val isabelle_version = "isabelle_version" val afp_version = "afp_version" } object AFP { val Date_Format = Date.Format.make_patterns(List("EEE MMM d HH:mm:ss VV yyyy", "EEE MMM d HH:mm:ss O yyyy"), // workaround for jdk-8u102 s => Word.implode(Word.explode(s).map({ case "CEST" => "GMT+2" case a => a }))) val Test_Start = new Regex("""^Start test for .+ at (.+), (\w+)$""") val Test_End = new Regex("""^End test on (.+), \w+, elapsed time:.*$""") val Isabelle_Version = new Regex("""^Isabelle version: .* -- hg id (\w+)$""") val AFP_Version = new Regex("""^AFP version: .* -- hg id (\w+)$""") } private def parse_header(log_file: Log_File): Header = { log_file.lines match { case AFP.Test_Start(start, hostname) :: _ => (start, log_file.lines.last) match { case (AFP.Date_Format(start_date), AFP.Test_End(AFP.Date_Format(end_date))) => val isabelle_version = log_file.find_match(AFP.Isabelle_Version) getOrElse log_file.err("missing Isabelle version") val afp_version = log_file.find_match(AFP.AFP_Version) getOrElse log_file.err("missing AFP version") Header(Header_Kind.AFP_TEST, List( Field.build_host -> hostname, Field.build_start -> start_date.toString, Field.build_end -> end_date.toString, Field.isabelle_version -> isabelle_version, Field.afp_version -> afp_version), log_file.get_settings(Settings.all_settings)) case _ => log_file.err("cannot detect start/end date in afp-test log") } case _ => log_file.err("cannot detect log header format") } } + + /* build info: produced by isabelle build */ + object Session_Status extends Enumeration { - val UNKNOWN = Value("unknown") + val EXISTING = Value("existing") val FINISHED = Value("finished") val FAILED = Value("failed") val CANCELLED = Value("cancelled") } - - /* main log: produced by isatest, afp-test, jenkins etc. */ - - sealed case class Info( - settings: List[(String, String)], - finished: Map[String, Timing], - timing: Map[String, Timing], - threads: Map[String, Int]) + sealed case class Session_Entry( + chapter: String, + groups: List[String], + threads: Option[Int], + timing: Option[Timing], + ml_timing: Option[Timing], + status: Session_Status.Value) { - val sessions: Set[String] = finished.keySet ++ timing.keySet - - override def toString: String = - sessions.toList.sorted.mkString("Build_Log.Info(", ", ", ")") + def ok: Boolean = status == Session_Status.EXISTING || status == Session_Status.FINISHED + def finished: Boolean = status == Session_Status.FINISHED } - private val Session_Finished1 = - new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time, (\d+):(\d+):(\d+) cpu time.*$""") - private val Session_Finished2 = - new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time.*$""") - private val Session_Timing = - new Regex("""^Timing (\S+) \((\d) threads, (\d+\.\d+)s elapsed time, (\d+\.\d+)s cpu time, (\d+\.\d+)s GC time.*$""") + sealed case class Build_Info(sessions: Map[String, Session_Entry]) + { + def session(name: String): Session_Entry = sessions(name) + def get_session(name: String): Option[Session_Entry] = sessions.get(name) - private def parse_info(log_file: Log_File): Info = + def finished(name: String): Boolean = + get_session(name) match { + case Some(entry) => entry.finished + case None => false + } + + def timing(name: String): Timing = + (for (entry <- get_session(name); t <- entry.timing) yield t) getOrElse Timing.zero + + def ml_timing(name: String): Timing = + (for (entry <- get_session(name); t <- entry.ml_timing) yield t) getOrElse Timing.zero + } + + private def parse_build_info(log_file: Log_File): Build_Info = { - val settings = new mutable.ListBuffer[(String, String)] - var finished = Map.empty[String, Timing] + object Chapter_Name + { + def unapply(s: String): Some[(String, String)] = + space_explode('/', s) match { + case List(chapter, name) => Some((chapter, name)) + case _ => Some(("", s)) + } + } + + val Session_No_Groups = new Regex("""^Session (\S+)$""") + val Session_Groups = new Regex("""^Session (\S+) \((.*)\)$""") + val Session_Finished1 = + new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time, (\d+):(\d+):(\d+) cpu time.*$""") + val Session_Finished2 = + new Regex("""^Finished (\S+) \((\d+):(\d+):(\d+) elapsed time.*$""") + val Session_Timing = + new Regex("""^Timing (\S+) \((\d) threads, (\d+\.\d+)s elapsed time, (\d+\.\d+)s cpu time, (\d+\.\d+)s GC time.*$""") + val Session_Failed = new Regex("""^(\S+) FAILED""") + val Session_Cancelled = new Regex("""^(\S+) CANCELLED""") + + var chapter = Map.empty[String, String] + var groups = Map.empty[String, List[String]] + var threads = Map.empty[String, Int] var timing = Map.empty[String, Timing] - var threads = Map.empty[String, Int] + var ml_timing = Map.empty[String, Timing] + var failed = Set.empty[String] + var cancelled = Set.empty[String] + def all_sessions: Set[String] = + chapter.keySet ++ groups.keySet ++ threads.keySet ++ + timing.keySet ++ ml_timing.keySet ++ failed ++ cancelled + for (line <- log_file.lines) { line match { + case Session_No_Groups(Chapter_Name(chapt, name)) => + chapter += (name -> chapt) + groups += (name -> Nil) + case Session_Groups(Chapter_Name(chapt, name), grps) => + chapter += (name -> chapt) + groups += (name -> Word.explode(grps)) case Session_Finished1(name, Value.Int(e1), Value.Int(e2), Value.Int(e3), Value.Int(c1), Value.Int(c2), Value.Int(c3)) => val elapsed = Time.hms(e1, e2, e3) val cpu = Time.hms(c1, c2, c3) - finished += (name -> Timing(elapsed, cpu, Time.zero)) + timing += (name -> Timing(elapsed, cpu, Time.zero)) case Session_Finished2(name, Value.Int(e1), Value.Int(e2), Value.Int(e3)) => val elapsed = Time.hms(e1, e2, e3) - finished += (name -> Timing(elapsed, Time.zero, Time.zero)) + timing += (name -> Timing(elapsed, Time.zero, Time.zero)) case Session_Timing(name, Value.Int(t), Value.Double(e), Value.Double(c), Value.Double(g)) => val elapsed = Time.seconds(e) val cpu = Time.seconds(c) val gc = Time.seconds(g) - timing += (name -> Timing(elapsed, cpu, gc)) + ml_timing += (name -> Timing(elapsed, cpu, gc)) threads += (name -> t) - case Settings.Entry(a, b) if Settings.all_settings.contains(a) => - settings += (a -> b) case _ => } } - Info(settings.toList, finished, timing, threads) + val sessions = + Map( + (for (name <- all_sessions.toList) yield { + val status = + if (failed(name)) Session_Status.FAILED + else if (cancelled(name)) Session_Status.CANCELLED + else if (timing.isDefinedAt(name)) Session_Status.FINISHED + else Session_Status.EXISTING + val entry = + Session_Entry( + chapter.getOrElse(name, ""), + groups.getOrElse(name, Nil), + threads.get(name), + timing.get(name), + ml_timing.get(name), + status) + (name -> entry) + }):_*) + Build_Info(sessions) } } diff --git a/src/Pure/Tools/build_stats.scala b/src/Pure/Tools/build_stats.scala --- a/src/Pure/Tools/build_stats.scala +++ b/src/Pure/Tools/build_stats.scala @@ -1,187 +1,189 @@ /* Title: Pure/Tools/build_stats.scala Author: Makarius Statistics from session build output. */ package isabelle object Build_Stats { /* presentation */ private val default_history_length = 100 private val default_size = (800, 600) private val default_only_sessions = Set.empty[String] private val default_elapsed_threshold = Time.zero private val default_ml_timing: Option[Boolean] = None def present_job(job: String, dir: Path, history_length: Int = default_history_length, size: (Int, Int) = default_size, only_sessions: Set[String] = default_only_sessions, elapsed_threshold: Time = default_elapsed_threshold, ml_timing: Option[Boolean] = default_ml_timing): List[String] = { val job_infos = CI_API.build_job_builds(job).sortBy(_.timestamp).reverse.take(history_length) if (job_infos.isEmpty) error("No build infos for job " + quote(job)) val all_infos = Par_List.map((job_info: CI_API.Job_Info) => - (job_info.timestamp / 1000, job_info.read_output.parse_info), job_infos) + (job_info.timestamp / 1000, job_info.read_output.parse_build_info), job_infos) val all_sessions = (Set.empty[String] /: all_infos)( - { case (s, (_, info)) => s ++ info.sessions }) + { case (s, (_, info)) => s ++ info.sessions.keySet }) - def check_threshold(info: Build_Log.Info, session: String): Boolean = - info.finished.get(session) match { - case Some(t) => t.elapsed >= elapsed_threshold - case None => false - } + def check_threshold(info: Build_Log.Build_Info, session: String): Boolean = + (for (entry <- info.get_session(session); t <- entry.timing) + yield t.elapsed >= elapsed_threshold) getOrElse false val sessions = for { session <- (if (only_sessions.isEmpty) all_sessions else all_sessions & only_sessions) if all_infos.filter({ case (_, info) => check_threshold(info, session) }).length >= 3 } yield session Isabelle_System.mkdirs(dir) for (session <- sessions) { Isabelle_System.with_tmp_file(session, "png") { data_file => Isabelle_System.with_tmp_file(session, "gnuplot") { plot_file => val data = - for { (t, info) <- all_infos if info.finished.isDefinedAt(session) } + for { (t, info) <- all_infos if info.finished(session) } yield { - val finished = info.finished.getOrElse(session, Timing.zero) - val timing = info.timing.getOrElse(session, Timing.zero) - List(t.toString, finished.elapsed.minutes, finished.cpu.minutes, - timing.elapsed.minutes, timing.cpu.minutes, timing.gc.minutes).mkString(" ") + val timing1 = info.timing(session) + val timing2 = info.ml_timing(session) + List(t.toString, + timing1.elapsed.minutes, + timing1.cpu.minutes, + timing2.elapsed.minutes, + timing2.cpu.minutes, + timing2.gc.minutes).mkString(" ") } File.write(data_file, cat_lines(data)) val plots1 = List( """ using 1:3 smooth sbezier title "cpu time (smooth)" """, """ using 1:3 smooth csplines title "cpu time" """, """ using 1:2 smooth sbezier title "elapsed time (smooth)" """, """ using 1:2 smooth csplines title "elapsed time" """) val plots2 = List( """ using 1:5 smooth sbezier title "ML cpu time (smooth)" """, """ using 1:5 smooth csplines title "ML cpu time" """, """ using 1:4 smooth sbezier title "ML elapsed time (smooth)" """, """ using 1:4 smooth csplines title "ML elapsed time" """, """ using 1:6 smooth sbezier title "ML gc time (smooth)" """, """ using 1:6 smooth csplines title "ML gc time" """) val plots = ml_timing match { case None => plots1 case Some(false) => plots1 ::: plots2 case Some(true) => plots2 } val data_file_name = File.standard_path(data_file.getAbsolutePath) File.write(plot_file, """ set terminal png size """ + size._1 + "," + size._2 + """ set output """ + quote(File.standard_path(dir + Path.basic(session + ".png"))) + """ set xdata time set timefmt "%s" set format x "%d-%b" set xlabel """ + quote(session) + """ noenhanced set key left top plot [] [0:] """ + plots.map(s => quote(data_file_name) + " " + s).mkString(", ") + "\n") val result = Isabelle_System.bash("\"$ISABELLE_GNUPLOT\" " + File.bash_path(plot_file)) if (result.rc != 0) { Output.error_message("Session " + session + ": gnuplot error") result.print } } } } sessions.toList.sorted } /* Isabelle tool wrapper */ private val html_header = """ Performance statistics from session build output """ private val html_footer = """ """ val isabelle_tool = Isabelle_Tool("build_stats", "present statistics from session build output", args => { var target_dir = Path.explode("stats") var ml_timing = default_ml_timing var only_sessions = default_only_sessions var elapsed_threshold = default_elapsed_threshold var history_length = default_history_length var size = default_size val getopts = Getopts(""" Usage: isabelle build_stats [OPTIONS] [JOBS ...] Options are: -D DIR target directory (default "stats") -M only ML timing -S SESSIONS only given SESSIONS (comma separated) -T THRESHOLD only sessions with elapsed time >= THRESHOLD (minutes) -l LENGTH length of history (default 100) -m include ML timing -s WxH size of PNG image (default 800x600) Present statistics from session build output of the given JOBS, from Jenkins continuous build service specified as URL via ISABELLE_JENKINS_ROOT. """, "D:" -> (arg => target_dir = Path.explode(arg)), "M" -> (_ => ml_timing = Some(true)), "S:" -> (arg => only_sessions = space_explode(',', arg).toSet), "T:" -> (arg => elapsed_threshold = Time.minutes(Value.Double.parse(arg))), "l:" -> (arg => history_length = Value.Int.parse(arg)), "m" -> (_ => ml_timing = Some(false)), "s:" -> (arg => space_explode('x', arg).map(Value.Int.parse(_)) match { case List(w, h) if w > 0 && h > 0 => size = (w, h) case _ => error("Error bad PNG image size: " + quote(arg)) })) val jobs = getopts(args) val all_jobs = CI_API.build_jobs() val bad_jobs = jobs.filterNot(all_jobs.contains(_)).sorted if (jobs.isEmpty) error("No build jobs given. Available jobs: " + all_jobs.sorted.mkString(" ")) if (bad_jobs.nonEmpty) error("Unknown build jobs: " + bad_jobs.mkString(" ") + "\nAvailable jobs: " + all_jobs.sorted.mkString(" ")) for (job <- jobs) { val dir = target_dir + Path.basic(job) Output.writeln(dir.implode) val sessions = present_job(job, dir, history_length, size, only_sessions, elapsed_threshold, ml_timing) File.write(dir + Path.basic("index.html"), html_header + "\n

" + HTML.output(job) + "

\n" + cat_lines( sessions.map(session => """

""")) + "\n" + html_footer) } File.write(target_dir + Path.basic("index.html"), html_header + "\n\n" + html_footer) }) }