diff --git a/src/Pure/General/bytes.ML b/src/Pure/General/bytes.ML --- a/src/Pure/General/bytes.ML +++ b/src/Pure/General/bytes.ML @@ -1,84 +1,81 @@ (* Title: Pure/General/bytes.ML Author: Makarius Scalable byte strings, with incremental construction (add content to the end). Note: type string is implicitly limited by String.maxSize (approx. 64 MB on 64_32 architecture). *) signature BYTES = sig val chunk_size: int type T val length: T -> int val contents: T -> string list val content: T -> string val is_empty: T -> bool val empty: T val add_substring: substring -> T -> T val add: string -> T -> T val build: (T -> T) -> T val string: string -> T val buffer: Buffer.T -> T val read: Path.T -> T end; structure Bytes: BYTES = struct val chunk_size = 1024 * 1024; abstype T = Bytes of {buffer: string list, chunks: string list, m: int (*buffer size*), n: int (*chunks size*)} with fun length (Bytes {m, n, ...}) = m + n; val compact = implode o rev; fun contents (Bytes {buffer, chunks, ...}) = rev (chunks |> not (null buffer) ? cons (compact buffer)); val content = implode o contents; fun is_empty bytes = length bytes = 0; val empty = Bytes {buffer = [], chunks = [], m = 0, n = 0}; fun add_substring s (bytes as Bytes {buffer, chunks, m, n}) = if Substring.isEmpty s then bytes else let val l = Substring.size s in if l + m < chunk_size then Bytes {buffer = Substring.string s :: buffer, chunks = chunks, m = l + m, n = n} else let val k = chunk_size - m; val chunk = compact (Substring.string (Substring.slice (s, 0, SOME k)) :: buffer); val s' = Substring.slice (s, k, SOME (l - k)); val bytes' = Bytes {buffer = [], chunks = chunk :: chunks, m = 0, n = chunk_size + n}; in add_substring s' bytes' end end; val add = add_substring o Substring.full; end; fun build (f: T -> T) = f empty; val string = build o add; val buffer = build o fold add o Buffer.contents; -fun read_chunk stream = - Byte.bytesToString (BinIO.inputN (stream, chunk_size)); - val read = File.open_input (fn stream => let fun read_bytes bytes = - (case read_chunk stream of + (case File.input_size chunk_size stream of "" => bytes | s => read_bytes (add s bytes)) in read_bytes empty end); end; diff --git a/src/Pure/General/file.ML b/src/Pure/General/file.ML --- a/src/Pure/General/file.ML +++ b/src/Pure/General/file.ML @@ -1,183 +1,190 @@ (* Title: Pure/General/file.ML Author: Makarius File-system operations. *) signature FILE = sig val standard_path: Path.T -> string val platform_path: Path.T -> string val bash_path: Path.T -> string val bash_paths: Path.T list -> string val bash_platform_path: Path.T -> string val absolute_path: Path.T -> Path.T val full_path: Path.T -> Path.T -> Path.T val tmp_path: Path.T -> Path.T val exists: Path.T -> bool val rm: Path.T -> unit val is_dir: Path.T -> bool val is_file: Path.T -> bool val check_dir: Path.T -> Path.T val check_file: Path.T -> Path.T val open_dir: (OS.FileSys.dirstream -> 'a) -> Path.T -> 'a val open_input: (BinIO.instream -> 'a) -> Path.T -> 'a val open_output: (BinIO.outstream -> 'a) -> Path.T -> 'a val open_append: (BinIO.outstream -> 'a) -> Path.T -> 'a val fold_dir: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a val read_dir: Path.T -> string list + val input: BinIO.instream -> string + val input_size: int -> BinIO.instream -> string + val input_all: BinIO.instream -> string val fold_lines: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a val fold_pages: (string -> 'a -> 'a) -> Path.T -> 'a -> 'a val read_lines: Path.T -> string list val read_pages: Path.T -> string list val read: Path.T -> string val write: Path.T -> string -> unit val append: Path.T -> string -> unit val generate: Path.T -> string -> unit val output: BinIO.outstream -> string -> unit val write_list: Path.T -> string list -> unit val append_list: Path.T -> string list -> unit val write_buffer: Path.T -> Buffer.T -> unit val eq: Path.T * Path.T -> bool end; structure File: FILE = struct (* system path representations *) val standard_path = Path.implode o Path.expand; val platform_path = ML_System.platform_path o standard_path; val bash_path = Bash.string o standard_path; val bash_paths = Bash.strings o map standard_path; val bash_platform_path = Bash.string o platform_path; (* full_path *) val absolute_path = Path.expand #> (fn path => if Path.is_absolute path then path else Path.explode (ML_System.standard_path (OS.FileSys.getDir ())) + path); fun full_path dir path = let val path' = Path.expand path; val _ = Path.is_current path' andalso error "Bad file specification"; in absolute_path (dir + path') end; (* tmp_path *) fun tmp_path path = Path.variable "ISABELLE_TMP" + Path.base path; (* directory entries *) val exists = can OS.FileSys.modTime o platform_path; val rm = OS.FileSys.remove o platform_path; fun test_dir path = the_default false (try OS.FileSys.isDir (platform_path path)); fun is_dir path = exists path andalso test_dir path; fun is_file path = exists path andalso not (test_dir path); fun check_dir path = if is_dir path then path else error ("No such directory: " ^ Path.print (Path.expand path)); fun check_file path = if is_file path then path else error ("No such file: " ^ Path.print (Path.expand path)); (* open streams *) local fun with_file open_file close_file f = Thread_Attributes.uninterruptible (fn restore_attributes => fn path => let val file = open_file path; val result = Exn.capture (restore_attributes f) file; in close_file file; Exn.release result end); in fun open_dir f = with_file OS.FileSys.openDir OS.FileSys.closeDir f o platform_path; fun open_input f = with_file BinIO.openIn BinIO.closeIn f o platform_path; fun open_output f = with_file BinIO.openOut BinIO.closeOut f o platform_path; fun open_append f = with_file BinIO.openAppend BinIO.closeOut f o platform_path; end; (* directory content *) fun fold_dir f path a = check_dir path |> open_dir (fn stream => let fun read x = (case OS.FileSys.readDir stream of NONE => x | SOME entry => read (f entry x)); in read a end); fun read_dir path = sort_strings (fold_dir cons path []); (* input *) +val input = Byte.bytesToString o BinIO.input; +fun input_size n stream = Byte.bytesToString (BinIO.inputN (stream, n)); +val input_all = Byte.bytesToString o BinIO.inputAll; + (* scalable iterator: . avoid size limit of TextIO.inputAll and overhead of many TextIO.inputLine . optional terminator at end-of-input *) fun fold_chunks terminator f path a = open_input (fn file => let fun read buf x = - (case Byte.bytesToString (BinIO.input file) of + (case input file of "" => (case Buffer.content buf of "" => x | line => f line x) | input => (case String.fields (fn c => c = terminator) input of [rest] => read (Buffer.add rest buf) x | line :: more => read_lines more (f (Buffer.content (Buffer.add line buf)) x))) and read_lines [rest] x = read (Buffer.add rest Buffer.empty) x | read_lines (line :: more) x = read_lines more (f line x); in read Buffer.empty a end) path; fun fold_lines f = fold_chunks #"\n" f; fun fold_pages f = fold_chunks #"\f" f; fun read_lines path = rev (fold_lines cons path []); fun read_pages path = rev (fold_pages cons path []); -val read = open_input (Byte.bytesToString o BinIO.inputAll); +val read = open_input input_all; (* output *) fun output file txt = BinIO.output (file, Byte.stringToBytes txt); fun output_list txts file = List.app (output file) txts; fun write_list path txts = open_output (output_list txts) path; fun append_list path txts = open_append (output_list txts) path; fun write path txt = write_list path [txt]; fun append path txt = append_list path [txt]; fun generate path txt = if try read path = SOME txt then () else write path txt; fun write_buffer path buf = open_output (fn file => List.app (output file) (Buffer.contents buf)) path; (* eq *) fun eq paths = (case try (apply2 (OS.FileSys.fileId o platform_path)) paths of SOME ids => is_equal (OS.FileSys.compare ids) | NONE => false); end; diff --git a/src/Pure/PIDE/byte_message.ML b/src/Pure/PIDE/byte_message.ML --- a/src/Pure/PIDE/byte_message.ML +++ b/src/Pure/PIDE/byte_message.ML @@ -1,116 +1,116 @@ (* Title: Pure/General/byte_message.ML Author: Makarius Byte-oriented messages. *) signature BYTE_MESSAGE = sig val write: BinIO.outstream -> string list -> unit val write_yxml: BinIO.outstream -> XML.tree -> unit val flush: BinIO.outstream -> unit val write_line: BinIO.outstream -> string -> unit val read: BinIO.instream -> int -> string val read_block: BinIO.instream -> int -> string option * int val read_line: BinIO.instream -> string option val write_message: BinIO.outstream -> string list -> unit val write_message_yxml: BinIO.outstream -> XML.body list -> unit val read_message: BinIO.instream -> string list option val write_line_message: BinIO.outstream -> string -> unit val read_line_message: BinIO.instream -> string option end; structure Byte_Message: BYTE_MESSAGE = struct (* output operations *) fun write stream = List.app (File.output stream); fun write_yxml stream tree = YXML.traverse (fn s => fn () => File.output stream s) tree (); fun flush stream = ignore (try BinIO.flushOut stream); fun write_line stream s = (write stream [s, "\n"]; flush stream); (* input operations *) -fun read stream n = Byte.bytesToString (BinIO.inputN (stream, n)); +fun read stream n = File.input_size n stream; fun read_block stream n = let val msg = read stream n; val len = size msg; in (if len = n then SOME msg else NONE, len) end; fun read_line stream = let val result = trim_line o String.implode o rev; fun read_body cs = (case BinIO.input1 stream of NONE => if null cs then NONE else SOME (result cs) | SOME b => (case Byte.byteToChar b of #"\n" => SOME (result cs) | c => read_body (c :: cs))); in read_body [] end; (* messages with multiple chunks (arbitrary content) *) fun make_header ns = [space_implode "," (map Value.print_int ns), "\n"]; fun write_message stream chunks = (write stream (make_header (map size chunks) @ chunks); flush stream); fun write_message_yxml stream chunks = (write stream (make_header (map YXML.body_size chunks)); (List.app o List.app) (write_yxml stream) chunks; flush stream); fun parse_header line = map Value.parse_nat (space_explode "," line) handle Fail _ => error ("Malformed message header: " ^ quote line); fun read_chunk stream n = (case read_block stream n of (SOME chunk, _) => chunk | (NONE, len) => error ("Malformed message chunk: unexpected EOF after " ^ string_of_int len ^ " of " ^ string_of_int n ^ " bytes")); fun read_message stream = read_line stream |> Option.map (parse_header #> map (read_chunk stream)); (* hybrid messages: line or length+block (with content restriction) *) fun is_length msg = msg <> "" andalso forall_string Symbol.is_ascii_digit msg; fun is_terminated msg = let val len = size msg in len > 0 andalso Symbol.is_ascii_line_terminator (str (String.sub (msg, len - 1))) end; fun write_line_message stream msg = if is_length msg orelse is_terminated msg then error ("Bad content for line message:\n" ^ implode (take 100 (Symbol.explode msg))) else let val n = size msg in write stream ((if n > 100 orelse exists_string (fn s => s = "\n") msg then make_header [n + 1] else []) @ [msg, "\n"]); flush stream end; fun read_line_message stream = (case read_line stream of NONE => NONE | SOME line => (case try Value.parse_nat line of NONE => SOME line | SOME n => Option.map trim_line (#1 (read_block stream n)))); end;