diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy @@ -1,1411 +1,1411 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Annotation Language: Parsing Combinator\ theory C_Lexer_Annotation imports C_Lexer_Language begin ML \ \\<^file>\~~/src/Pure/Isar/keyword.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Isar/keyword.ML Author: Makarius Isar keyword classification. *) \ structure C_Keyword = struct (** keyword classification **) (* kinds *) val command_kinds = [Keyword.diag, Keyword.document_heading, Keyword.document_body, Keyword.document_raw, Keyword.thy_begin, Keyword.thy_end, Keyword.thy_load, Keyword.thy_decl, Keyword.thy_decl_block, Keyword.thy_defn, Keyword.thy_stmt, Keyword.thy_goal, Keyword.thy_goal_defn, Keyword.thy_goal_stmt, Keyword.qed, Keyword.qed_script, Keyword.qed_block, Keyword.qed_global, Keyword.prf_goal, Keyword.prf_block, Keyword.next_block, Keyword.prf_open, Keyword.prf_close, Keyword.prf_chain, Keyword.prf_decl, Keyword.prf_asm, Keyword.prf_asm_goal, Keyword.prf_script, Keyword.prf_script_goal, Keyword.prf_script_asm_goal]; (* specifications *) type entry = {pos: Position.T, id: serial, kind: string, files: string list, (*extensions of embedded files*) tags: string list}; fun check_spec pos ((kind, files), tags) : entry = if not (member (op =) command_kinds kind) then error ("Unknown annotation syntax keyword kind " ^ quote kind) else if not (null files) andalso kind <> Keyword.thy_load then error ("Illegal specification of files for " ^ quote kind) else {pos = pos, id = serial (), kind = kind, files = files, tags = tags}; (** keyword tables **) (* type keywords *) datatype keywords = Keywords of {minor: Scan.lexicon, major: Scan.lexicon, commands: entry Symtab.table}; fun minor_keywords (Keywords {minor, ...}) = minor; fun major_keywords (Keywords {major, ...}) = major; fun make_keywords (minor, major, commands) = Keywords {minor = minor, major = major, commands = commands}; fun map_keywords f (Keywords {minor, major, commands}) = make_keywords (f (minor, major, commands)); (* build keywords *) val empty_keywords = make_keywords (Scan.empty_lexicon, Scan.empty_lexicon, Symtab.empty); fun empty_keywords' minor = make_keywords (minor, Scan.empty_lexicon, Symtab.empty); fun merge_keywords (Keywords {minor = minor1, major = major1, commands = commands1}, Keywords {minor = minor2, major = major2, commands = commands2}) = make_keywords (Scan.merge_lexicons (minor1, minor2), Scan.merge_lexicons (major1, major2), Symtab.merge (K true) (commands1, commands2)); val add_keywords0 = fold (fn ((name, pos), force_minor, spec as ((kind, _), _)) => map_keywords (fn (minor, major, commands) => let val extend = Scan.extend_lexicon (Symbol.explode name) fun update spec = Symtab.update (name, spec) in if force_minor then (extend minor, major, update (check_spec pos spec) commands) else if kind = "" orelse kind = Keyword.before_command orelse kind = Keyword.quasi_command then (extend minor, major, commands) else (minor, extend major, update (check_spec pos spec) commands) end)); val add_keywords = add_keywords0 o map (fn (cmd, spec) => (cmd, false, spec)) val add_keywords_minor = add_keywords0 o map (fn (cmd, spec) => (cmd, true, spec)) (* keyword status *) fun is_command (Keywords {commands, ...}) = Symtab.defined commands; fun dest_commands (Keywords {commands, ...}) = Symtab.keys commands; (* command keywords *) fun lookup_command (Keywords {commands, ...}) = Symtab.lookup commands; fun command_markup keywords name = lookup_command keywords name |> Option.map (fn {pos, id, ...} => Markup.properties (Position.entity_properties_of false id pos) (Markup.entity Markup.command_keywordN name)); fun command_files keywords name path = (case lookup_command keywords name of NONE => [] | SOME {kind, files, ...} => if kind <> Keyword.thy_load then [] else if null files then [path] else map (fn ext => Path.ext ext path) files); (* command categories *) fun command_category ks = let val tab = Symtab.make_set ks; fun pred keywords name = (case lookup_command keywords name of NONE => false | SOME {kind, ...} => Symtab.defined tab kind); in pred end; val is_theory_end = command_category [Keyword.thy_end]; val is_proof_asm = command_category [Keyword.prf_asm, Keyword.prf_asm_goal]; val is_improper = command_category [ Keyword.qed_script , Keyword.prf_script , Keyword.prf_script_goal , Keyword.prf_script_asm_goal]; end; \ text \ Notes: \<^item> The next structure contains a duplicated copy of the type \<^ML_type>\Token.T\, since it is not possible to set an arbitrary \<^emph>\slot\ value in \<^ML_structure>\Token\. \<^item> Parsing priorities in C and HOL slightly differ, see for instance \<^ML>\Token.explode\. \ ML \ \\<^file>\~~/src/Pure/Isar/token.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Isar/token.ML Author: Markus Wenzel, TU Muenchen Outer token syntax for Isabelle/Isar. *) \ structure C_Token = struct (** tokens **) (* token kind *) val immediate_kinds' = fn Token.Command => 0 | Token.Keyword => 1 | Token.Ident => 2 | Token.Long_Ident => 3 | Token.Sym_Ident => 4 | Token.Var => 5 | Token.Type_Ident => 6 | Token.Type_Var => 7 | Token.Nat => 8 | Token.Float => 9 | Token.Space => 10 | _ => ~1 val delimited_kind = (fn Token.String => true | Token.Alt_String => true | Token.Verbatim => true | Token.Cartouche => true | Token.Comment _ => true | _ => false); (* datatype token *) (*The value slot assigns an (optional) internal value to a token, usually as a side-effect of special scanner setup (see also args.ML). Note that an assignable ref designates an intermediate state of internalization -- it is NOT meant to persist.*) datatype T = Token of (Symbol_Pos.text * Position.range) * (Token.kind * string) * slot and slot = Slot | Value of value option | Assignable of value option Unsynchronized.ref and value = Source of T list | Literal of bool * Markup.T | Name of Token.name_value * morphism | Typ of typ | Term of term | Fact of string option * thm list | (*optional name for dynamic fact, i.e. fact "variable"*) Attribute of morphism -> attribute | Declaration of declaration | Files of Token.file Exn.result list; type src = T list; (* position *) fun pos_of (Token ((_, (pos, _)), _, _)) = pos; fun end_pos_of (Token ((_, (_, pos)), _, _)) = pos; fun adjust_offsets adjust (Token ((x, range), y, z)) = Token ((x, apply2 (Position.adjust_offsets adjust) range), y, z); (* stopper *) fun mk_eof pos = Token (("", (pos, Position.none)), (Token.EOF, ""), Slot); val eof = mk_eof Position.none; fun is_eof (Token (_, (Token.EOF, _), _)) = true | is_eof _ = false; val not_eof = not o is_eof; val stopper = Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; (* kind of token *) fun kind_of (Token (_, (k, _), _)) = k; fun is_kind k (Token (_, (k', _), _)) = k = k'; val is_command = is_kind Token.Command; fun keyword_with pred (Token (_, (Token.Keyword, x), _)) = pred x | keyword_with _ _ = false; val is_command_modifier = keyword_with (fn x => x = "private" orelse x = "qualified"); fun ident_with pred (Token (_, (Token.Ident, x), _)) = pred x | ident_with _ _ = false; fun is_ignored (Token (_, (Token.Space, _), _)) = true | is_ignored (Token (_, (Token.Comment NONE, _), _)) = true | is_ignored _ = false; fun is_proper (Token (_, (Token.Space, _), _)) = false | is_proper (Token (_, (Token.Comment _, _), _)) = false | is_proper _ = true; fun is_comment (Token (_, (Token.Comment _, _), _)) = true | is_comment _ = false; fun is_informal_comment (Token (_, (Token.Comment NONE, _), _)) = true | is_informal_comment _ = false; fun is_formal_comment (Token (_, (Token.Comment (SOME _), _), _)) = true | is_formal_comment _ = false; fun is_document_marker (Token (_, (Token.Comment (SOME Comment.Marker), _), _)) = true | is_document_marker _ = false; fun is_begin_ignore (Token (_, (Token.Comment NONE, "<"), _)) = true | is_begin_ignore _ = false; fun is_end_ignore (Token (_, (Token.Comment NONE, ">"), _)) = true | is_end_ignore _ = false; fun is_error (Token (_, (Token.Error _, _), _)) = true | is_error _ = false; fun is_error' (Token (_, (Token.Error msg, _), _)) = SOME msg | is_error' _ = NONE; fun content_of (Token (_, (_, x), _)) = x; fun content_of' (Token (_, (_, _), Value (SOME (Source l)))) = map (fn Token ((_, (pos, _)), (_, x), _) => (x, pos)) l | content_of' _ = []; val is_stack1 = fn Token (_, (Token.Sym_Ident, _), Value (SOME (Source l))) => forall (fn tok => content_of tok = "+") l | _ => false; val is_stack2 = fn Token (_, (Token.Sym_Ident, _), Value (SOME (Source l))) => forall (fn tok => content_of tok = "@") l | _ => false; val is_stack3 = fn Token (_, (Token.Sym_Ident, _), Value (SOME (Source l))) => forall (fn tok => content_of tok = "&") l | _ => false; (* blanks and newlines -- space tokens obey lines *) fun is_space (Token (_, (Space, _), _)) = true | is_space _ = false; fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) | is_blank _ = false; fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x | is_newline _ = false; (* range of tokens *) fun range_of (toks as tok :: _) = let val pos' = end_pos_of (List.last toks) in Position.range (pos_of tok, pos') end | range_of [] = Position.no_range; val core_range_of = drop_prefix is_ignored #> drop_suffix is_ignored #> range_of; (* token content *) fun content_of (Token (_, (_, x), _)) = x; fun source_of (Token ((source, _), _, _)) = source; fun input_of (Token ((source, range), (kind, _), _)) = Input.source (delimited_kind kind) source range; fun inner_syntax_of tok = let val x = content_of tok in if YXML.detect x then x else Syntax.implode_input (input_of tok) end; (* markup reports *) local val token_kind_markup = fn Token.Var => (Markup.var, "") | Token.Type_Ident => (Markup.tfree, "") | Token.Type_Var => (Markup.tvar, "") | Token.String => (Markup.string, "") | Token.Alt_String => (Markup.alt_string, "") | Token.Verbatim => (Markup.verbatim, "") | Token.Cartouche => (Markup.cartouche, "") | Token.Comment _ => (Markup.ML_comment, "") | Token.Error msg => (Markup.bad (), msg) | _ => (Markup.empty, ""); fun keyword_reports tok = map (fn markup => ((pos_of tok, markup), "")); fun command_markups keywords x = if C_Keyword.is_theory_end keywords x then [Markup.keyword2 |> Markup.keyword_properties] else (if C_Keyword.is_proof_asm keywords x then [Markup.keyword3] else if C_Keyword.is_improper keywords x then [Markup.keyword1, Markup.improper] else [Markup.keyword1]) |> map Markup.command_properties; fun keyword_markup (important, keyword) x = if important orelse Symbol.is_ascii_identifier x then keyword else Markup.delimiter; fun command_minor_markups keywords x = if C_Keyword.is_theory_end keywords x then [Markup.keyword2 |> Markup.keyword_properties] else (if C_Keyword.is_proof_asm keywords x then [Markup.keyword3] else if C_Keyword.is_improper keywords x then [Markup.keyword1, Markup.improper] else if C_Keyword.is_command keywords x then [Markup.keyword1] else [keyword_markup (false, Markup.keyword2 |> Markup.keyword_properties) x]); in fun completion_report tok = if is_kind Token.Keyword tok then map (fn m => ((pos_of tok, m), "")) (Completion.suppress_abbrevs (content_of tok)) else []; fun reports keywords tok = if is_command tok then keyword_reports tok (command_markups keywords (content_of tok)) else if is_stack1 tok orelse is_stack2 tok orelse is_stack3 tok then keyword_reports tok [Markup.keyword2 |> Markup.keyword_properties] else if is_kind Token.Keyword tok then keyword_reports tok (command_minor_markups keywords (content_of tok)) else let val pos = pos_of tok; val (m, text) = token_kind_markup (kind_of tok); val delete = #2 (Symbol_Pos.explode_delete (source_of tok, pos)); in ((pos, m), text) :: map (fn p => ((p, Markup.delete), "")) delete end; fun markups keywords = map (#2 o #1) o reports keywords; end; (* unparse *) fun unparse' (Token ((source0, _), (kind, x), _)) = let val source = \ \ We are computing a reverse function of \<^ML>\Symbol_Pos.implode_range\ taking into account consecutive \<^ML>\Symbol.DEL\ symbols potentially appearing at the beginning, or at the end of the string. As remark, \<^ML>\Symbol_Pos.explode_delete\ will remove any potentially consecutive \<^ML>\Symbol.DEL\ symbols. This is why it is not used here.\ case Symbol.explode source0 of x :: xs => if x = Symbol.DEL then case rev xs of x' :: xs => if x' = Symbol.DEL then implode (rev xs) else source0 | _ => source0 else source0 | _ => source0 in case kind of Token.String => Symbol_Pos.quote_string_qq source | Token.Alt_String => Symbol_Pos.quote_string_bq source | Token.Verbatim => enclose "{*" "*}" source | Token.Cartouche => cartouche source | Token.Comment NONE => enclose "(*" "*)" source | Token.EOF => "" | _ => x end; fun text_of tok = let val k = Token.str_of_kind (kind_of tok); val ms = markups C_Keyword.empty_keywords tok; val s = unparse' tok; in if s = "" then (k, "") else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ Markup.markups ms s, "") else (k, Markup.markups ms s) end; (** associated values **) (* inlined file content *) fun file_source (file: Token.file) = let val text = cat_lines (#lines file); - val end_pos = fold Position.advance (Symbol.explode text) (#pos file); + val end_pos = Position.symbol_explode text (#pos file); in Input.source true text (Position.range (#pos file, end_pos)) end; fun get_files (Token (_, _, Value (SOME (Files files)))) = files | get_files _ = []; fun put_files [] tok = tok | put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files))) | put_files _ tok = raise Fail ("Cannot put inlined files here" ^ Position.here (pos_of tok)); (* access values *) (* reports of value *) (* name value *) (* maxidx *) (* fact values *) (* transform *) (* static binding *) (*1st stage: initialize assignable slots*) fun init_assignable tok = (case tok of Token (x, y, Slot) => Token (x, y, Assignable (Unsynchronized.ref NONE)) | Token (_, _, Value _) => tok | Token (_, _, Assignable r) => (r := NONE; tok)); (*2nd stage: assign values as side-effect of scanning*) fun assign v tok = (case tok of Token (x, y, Slot) => Token (x, y, Value v) | Token (_, _, Value _) => tok | Token (_, _, Assignable r) => (r := v; tok)); fun evaluate mk eval arg = let val x = eval arg in (assign (SOME (mk x)) arg; x) end; (*3rd stage: static closure of final values*) fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) | closure tok = tok; (* pretty *) (* src *) (** scanners **) open Basic_Symbol_Pos; val err_prefix = "Annotation lexical error: "; fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); (* scan stack *) fun scan_stack is_stack = Scan.optional (Scan.one is_stack >> content_of') [] (* scan symbolic idents *) val scan_symid = Scan.many1 (Symbol.is_symbolic_char o Symbol_Pos.symbol) || Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; fun is_symid str = (case try Symbol.explode str of SOME [s] => Symbol.is_symbolic s orelse Symbol.is_symbolic_char s | SOME ss => forall Symbol.is_symbolic_char ss | _ => false); fun ident_or_symbolic "begin" = false | ident_or_symbolic ":" = true | ident_or_symbolic "::" = true | ident_or_symbolic s = Symbol_Pos.is_identifier s orelse is_symid s; (* scan verbatim text *) val scan_verb = $$$ "*" --| Scan.ahead (~$$ "}") || Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single; val scan_verbatim = Scan.ahead ($$ "{" -- $$ "*") |-- !!! "unclosed verbatim text" ((Symbol_Pos.scan_pos --| $$ "{" --| $$ "*") -- (Scan.repeats scan_verb -- ($$ "*" |-- $$ "}" |-- Symbol_Pos.scan_pos))); val recover_verbatim = $$$ "{" @@@ $$$ "*" @@@ Scan.repeats scan_verb; (* scan cartouche *) val scan_cartouche = Symbol_Pos.scan_pos -- ((Symbol_Pos.scan_cartouche err_prefix >> Symbol_Pos.cartouche_content) -- Symbol_Pos.scan_pos); (* scan space *) fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n"; val scan_space = Scan.many1 space_symbol @@@ Scan.optional ($$$ "\n") [] || Scan.many space_symbol @@@ $$$ "\n"; (* scan comment *) val scan_comment = Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body err_prefix -- Symbol_Pos.scan_pos); (** token sources **) local fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; fun token k ss = Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot); fun token' (mk_value, k) ss = if mk_value then Token ( (Symbol_Pos.implode ss, Symbol_Pos.range ss) , (k, Symbol_Pos.content ss) , Value (SOME (Source (map (fn (s, pos) => Token (("", (pos, Position.none)), (k, s), Slot)) ss)))) else token k ss; fun token_t k = token' (true, k) fun token_range k (pos1, (ss, pos2)) = Token (Symbol_Pos.implode_range (pos1, pos2) ss, (k, Symbol_Pos.content ss), Slot); fun scan_token keywords = !!! "bad input" (Symbol_Pos.scan_string_qq err_prefix >> token_range Token.String || Symbol_Pos.scan_string_bq err_prefix >> token_range Token.Alt_String || scan_verbatim >> token_range Token.Verbatim || scan_cartouche >> token_range Token.Cartouche || scan_comment >> token_range (Token.Comment NONE) || Comment.scan_outer >> (fn (k, ss) => token (Token.Comment (SOME k)) ss) || scan_space >> token Token.Space || Scan.repeats1 ($$$ "+") >> token_t Token.Sym_Ident || Scan.repeats1 ($$$ "@") >> token_t Token.Sym_Ident || Scan.repeats1 ($$$ "&") >> token_t Token.Sym_Ident || (Scan.max token_leq (Scan.max token_leq (Scan.literal (C_Keyword.major_keywords keywords) >> pair Token.Command) (Scan.literal (C_Keyword.minor_keywords keywords) >> pair Token.Keyword)) (Lexicon.scan_longid >> pair Token.Long_Ident || Scan.max token_leq (C_Lex.scan_ident >> pair Token.Ident) (Lexicon.scan_id >> pair Token.Ident) || Lexicon.scan_var >> pair Token.Var || Lexicon.scan_tid >> pair Token.Type_Ident || Lexicon.scan_tvar >> pair Token.Type_Var || Symbol_Pos.scan_float >> pair Token.Float || Symbol_Pos.scan_nat >> pair Token.Nat || scan_symid >> pair Token.Sym_Ident)) >> uncurry (token' o pair false)); fun recover msg = (Symbol_Pos.recover_string_qq || Symbol_Pos.recover_string_bq || recover_verbatim || Symbol_Pos.recover_cartouche || Symbol_Pos.recover_comment || Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single) >> (single o token (Token.Error msg)); in fun make_source keywords {strict} = let val scan_strict = Scan.bulk (scan_token keywords); val scan = if strict then scan_strict else Scan.recover scan_strict recover; in Source.source Symbol_Pos.stopper scan end; end; (* explode *) fun tokenize keywords strict syms = Source.of_list syms |> make_source keywords strict |> Source.exhaust; fun explode keywords pos text = Symbol_Pos.explode (text, pos) |> tokenize keywords {strict = false}; fun explode0 keywords = explode keywords Position.none; (* print name in parsable form *) (* make *) (** parsers **) type 'a parser = T list -> 'a * T list; type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list); (* read body -- e.g. antiquotation source *) fun read_with_commands'0 keywords syms = Source.of_list syms |> make_source keywords {strict = false} |> Source.filter (not o is_proper) |> Source.exhaust fun read_with_commands' keywords scan syms = Source.of_list syms |> make_source keywords {strict = false} |> Source.filter is_proper |> Source.source stopper (Scan.recover (Scan.bulk scan) (fn msg => Scan.one (not o is_eof) >> (fn tok => [C_Scan.Right let val msg = case is_error' tok of SOME msg0 => msg0 ^ " (" ^ msg ^ ")" | NONE => msg in ( msg , [((pos_of tok, Markup.bad ()), msg)] , tok) end]))) |> Source.exhaust; fun read_antiq' keywords scan = read_with_commands' keywords (scan >> C_Scan.Left); (* wrapped syntax *) local fun make src pos = Token.make src pos |> #1 fun make_default text pos = make ((~1, 0), text) pos fun explode keywords pos text = case Token.explode keywords pos text of [tok] => tok | _ => make_default text pos in fun syntax' f = I #> map (fn tok0 as Token ((source, (pos1, pos2)), (kind, x), _) => if is_stack1 tok0 orelse is_stack2 tok0 orelse is_stack3 tok0 then make_default source pos1 else if is_eof tok0 then Token.eof else if delimited_kind kind then explode Keyword.empty_keywords pos1 (unparse' tok0) else let val tok1 = explode ((case kind of Token.Keyword => Keyword.add_keywords [((x, Position.none), Keyword.no_spec)] | Token.Command => Keyword.add_keywords [( (x, Position.none), (Keyword.thy_decl, []))] | _ => I) Keyword.empty_keywords) pos1 source in if Token.kind_of tok1 = kind then tok1 else make ( ( immediate_kinds' kind , case Position.distance_of (pos1, pos2) of NONE => 0 | SOME i => i) , source) pos1 end) #> f #> apsnd (map (fn tok => Token ( (Token.source_of tok, Token.range_of [tok]) , (Token.kind_of tok, Token.content_of tok) , Slot))) end end; type 'a c_parser = 'a C_Token.parser; type 'a c_context_parser = 'a C_Token.context_parser; \ ML \ \\<^file>\~~/src/Pure/Isar/parse.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Isar/parse.ML Author: Markus Wenzel, TU Muenchen Generic parsers for Isabelle/Isar outer syntax. *) \ signature C_PARSE = sig type T type src = T list type 'a parser = T list -> 'a * T list type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list) (**) val C_source: Input.source parser val star: string parser (**) val group: (unit -> string) -> (T list -> 'a) -> T list -> 'a val !!! : (T list -> 'a) -> T list -> 'a val !!!! : (T list -> 'a) -> T list -> 'a val not_eof: T parser val token: 'a parser -> T parser val range: 'a parser -> ('a * Position.range) parser val position: 'a parser -> ('a * Position.T) parser val input: 'a parser -> Input.source parser val inner_syntax: 'a parser -> string parser val command: string parser val keyword: string parser val short_ident: string parser val long_ident: string parser val sym_ident: string parser val dots: string parser val minus: string parser val term_var: string parser val type_ident: string parser val type_var: string parser val number: string parser val float_number: string parser val string: string parser val string_position: (string * Position.T) parser val alt_string: string parser val verbatim: string parser val cartouche: string parser val eof: string parser val command_name: string -> string parser val keyword_with: (string -> bool) -> string parser val keyword_markup: bool * Markup.T -> string -> string parser val keyword_improper: string -> string parser val $$$ : string -> string parser val reserved: string -> string parser val underscore: string parser val maybe: 'a parser -> 'a option parser val maybe_position: ('a * Position.T) parser -> ('a option * Position.T) parser val opt_keyword: string -> bool parser val opt_bang: bool parser val begin: string parser val opt_begin: bool parser val nat: int parser val int: int parser val real: real parser val enum_positions: string -> 'a parser -> ('a list * Position.T list) parser val enum1_positions: string -> 'a parser -> ('a list * Position.T list) parser val enum: string -> 'a parser -> 'a list parser val enum1: string -> 'a parser -> 'a list parser val and_list: 'a parser -> 'a list parser val and_list1: 'a parser -> 'a list parser val enum': string -> 'a context_parser -> 'a list context_parser val enum1': string -> 'a context_parser -> 'a list context_parser val and_list': 'a context_parser -> 'a list context_parser val and_list1': 'a context_parser -> 'a list context_parser val list: 'a parser -> 'a list parser val list1: 'a parser -> 'a list parser val name: string parser val name_range: (string * Position.range) parser val name_position: (string * Position.T) parser val binding: binding parser val embedded: string parser val embedded_input: Input.source parser val embedded_position: (string * Position.T) parser val text: string parser val path: string parser val path_binding: (string * Position.T) parser val session_name: (string * Position.T) parser val theory_name: (string * Position.T) parser val liberal_name: string parser val parname: string parser val parbinding: binding parser val class: string parser val sort: string parser val type_const: string parser val arity: (string * string list * string) parser val multi_arity: (string list * string list * string) parser val type_args: string list parser val type_args_constrained: (string * string option) list parser val typ: string parser val mixfix: mixfix parser val mixfix': mixfix parser val opt_mixfix: mixfix parser val opt_mixfix': mixfix parser val syntax_mode: Syntax.mode parser val where_: string parser val const_decl: (string * string * mixfix) parser val const_binding: (binding * string * mixfix) parser val params: (binding * string option * mixfix) list parser val vars: (binding * string option * mixfix) list parser val for_fixes: (binding * string option * mixfix) list parser val ML_source: Input.source parser val document_source: Input.source parser val document_marker: Input.source parser val const: string parser val term: string parser val prop: string parser val literal_fact: string parser val propp: (string * string list) parser val termp: (string * string list) parser val private: Position.T parser val qualified: Position.T parser val target: (string * Position.T) parser val opt_target: (string * Position.T) option parser val args: T list parser val args1: (string -> bool) -> T list parser val attribs: src list parser val opt_attribs: src list parser val thm_sel: Facts.interval list parser val thm: (Facts.ref * src list) parser val thms1: (Facts.ref * src list) list parser val options: ((string * Position.T) * (string * Position.T)) list parser end; structure C_Parse: C_PARSE = struct type T = C_Token.T type src = T list type 'a parser = T list -> 'a * T list type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list) structure Token = struct open Token open C_Token end (** error handling **) (* group atomic parsers (no cuts!) *) fun group s scan = scan || Scan.fail_with (fn [] => (fn () => s () ^ " expected,\nbut end-of-input was found") | tok :: _ => (fn () => (case Token.text_of tok of (txt, "") => s () ^ " expected,\nbut " ^ txt ^ Position.here (Token.pos_of tok) ^ " was found" | (txt1, txt2) => s () ^ " expected,\nbut " ^ txt1 ^ Position.here (Token.pos_of tok) ^ " was found:\n" ^ txt2))); (* cut *) fun cut kind scan = let fun get_pos [] = " (end-of-input)" | get_pos (tok :: _) = Position.here (Token.pos_of tok); fun err (toks, NONE) = (fn () => kind ^ get_pos toks) | err (toks, SOME msg) = (fn () => let val s = msg () in if String.isPrefix kind s then s else kind ^ get_pos toks ^ ": " ^ s end); in Scan.!! err scan end; fun !!! scan = cut "Annotation syntax error" scan; fun !!!! scan = cut "Corrupted annotation syntax in presentation" scan; (** basic parsers **) (* tokens *) fun RESET_VALUE atom = (*required for all primitive parsers*) Scan.ahead (Scan.one (K true)) -- atom >> (fn (arg, x) => (Token.assign NONE arg; x)); val not_eof = RESET_VALUE (Scan.one Token.not_eof); fun token atom = Scan.ahead not_eof --| atom; fun range scan = (Scan.ahead not_eof >> (Token.range_of o single)) -- scan >> Library.swap; fun position scan = (Scan.ahead not_eof >> Token.pos_of) -- scan >> Library.swap; fun input atom = Scan.ahead atom |-- not_eof >> Token.input_of; fun inner_syntax atom = Scan.ahead atom |-- not_eof >> Token.inner_syntax_of; fun kind k = group (fn () => Token.str_of_kind k) (RESET_VALUE (Scan.one (Token.is_kind k) >> Token.content_of)); val command = kind Token.Command; val keyword = kind Token.Keyword; val short_ident = kind Token.Ident; val long_ident = kind Token.Long_Ident; val sym_ident = kind Token.Sym_Ident; val term_var = kind Token.Var; val type_ident = kind Token.Type_Ident; val type_var = kind Token.Type_Var; val number = kind Token.Nat; val float_number = kind Token.Float; val string = kind Token.String; val alt_string = kind Token.Alt_String; val verbatim = kind Token.Verbatim; val cartouche = kind Token.Cartouche; val eof = kind Token.EOF; fun command_name x = group (fn () => Token.str_of_kind Token.Command ^ " " ^ quote x) (RESET_VALUE (Scan.one (fn tok => Token.is_command tok andalso Token.content_of tok = x))) >> Token.content_of; fun keyword_with pred = RESET_VALUE (Scan.one (Token.keyword_with pred) >> Token.content_of); fun keyword_markup markup x = group (fn () => Token.str_of_kind Token.Keyword ^ " " ^ quote x) (Scan.ahead not_eof -- keyword_with (fn y => x = y)) >> (fn (tok, x) => (Token.assign (SOME (Token.Literal markup)) tok; x)); val keyword_improper = keyword_markup (true, Markup.improper); val $$$ = keyword_markup (false, Markup.quasi_keyword); fun reserved x = group (fn () => "reserved identifier " ^ quote x) (RESET_VALUE (Scan.one (Token.ident_with (fn y => x = y)) >> Token.content_of)); val dots = sym_ident :-- (fn "\" => Scan.succeed () | _ => Scan.fail) >> #1; val minus = sym_ident :-- (fn "-" => Scan.succeed () | _ => Scan.fail) >> #1; val underscore = sym_ident :-- (fn "_" => Scan.succeed () | _ => Scan.fail) >> #1; fun maybe scan = underscore >> K NONE || scan >> SOME; fun maybe_position scan = position (underscore >> K NONE) || scan >> apfst SOME; val nat = number >> (#1 o Library.read_int o Symbol.explode); val int = Scan.optional (minus >> K ~1) 1 -- nat >> op *; val real = float_number >> Value.parse_real || int >> Real.fromInt; fun opt_keyword s = Scan.optional ($$$ "(" |-- !!! (($$$ s >> K true) --| $$$ ")")) false; val opt_bang = Scan.optional ($$$ "!" >> K true) false; val begin = $$$ "begin"; val opt_begin = Scan.optional (begin >> K true) false; (* enumerations *) fun enum1_positions sep scan = scan -- Scan.repeat (position ($$$ sep) -- !!! scan) >> (fn (x, ys) => (x :: map #2 ys, map (#2 o #1) ys)); fun enum_positions sep scan = enum1_positions sep scan || Scan.succeed ([], []); fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); fun enum sep scan = enum1 sep scan || Scan.succeed []; fun enum1' sep scan = scan ::: Scan.repeat (Scan.lift ($$$ sep) |-- scan); fun enum' sep scan = enum1' sep scan || Scan.succeed []; fun and_list1 scan = enum1 "and" scan; fun and_list scan = enum "and" scan; fun and_list1' scan = enum1' "and" scan; fun and_list' scan = enum' "and" scan; fun list1 scan = enum1 "," scan; fun list scan = enum "," scan; (* names and embedded content *) val name = group (fn () => "name") (short_ident || long_ident || sym_ident || number || string); val name_range = input name >> Input.source_content_range; val name_position = input name >> Input.source_content; val string_position = input string >> Input.source_content; val binding = name_position >> Binding.make; val embedded = group (fn () => "embedded content") (cartouche || string || short_ident || long_ident || sym_ident || term_var || type_ident || type_var || number); val embedded_input = input embedded; val embedded_position = embedded_input >> Input.source_content; val text = group (fn () => "text") (embedded || verbatim); val path = group (fn () => "file name/path specification") embedded; val path_binding = group (fn () => "path binding (strict file name)") (position embedded); val session_name = group (fn () => "session name") name_position; val theory_name = group (fn () => "theory name") name_position; val liberal_name = keyword_with Token.ident_or_symbolic || name; val parname = Scan.optional ($$$ "(" |-- name --| $$$ ")") ""; val parbinding = Scan.optional ($$$ "(" |-- binding --| $$$ ")") Binding.empty; (* type classes *) val class = group (fn () => "type class") (inner_syntax embedded); val sort = group (fn () => "sort") (inner_syntax embedded); val type_const = group (fn () => "type constructor") (inner_syntax embedded); val arity = type_const -- ($$$ "::" |-- !!! (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> Scan.triple2; val multi_arity = and_list1 type_const -- ($$$ "::" |-- !!! (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> Scan.triple2; (* types *) val typ = group (fn () => "type") (inner_syntax embedded); fun type_arguments arg = arg >> single || $$$ "(" |-- !!! (list1 arg --| $$$ ")") || Scan.succeed []; val type_args = type_arguments type_ident; val type_args_constrained = type_arguments (type_ident -- Scan.option ($$$ "::" |-- !!! sort)); (* mixfix annotations *) local val mfix = input (string || cartouche); val mixfix_ = mfix -- !!! (Scan.optional ($$$ "[" |-- !!! (list nat --| $$$ "]")) [] -- Scan.optional nat 1000) >> (fn (sy, (ps, p)) => fn range => Mixfix (sy, ps, p, range)); val structure_ = $$$ "structure" >> K Structure; val binder_ = $$$ "binder" |-- !!! (mfix -- ($$$ "[" |-- nat --| $$$ "]" -- nat || nat >> (fn n => (n, n)))) >> (fn (sy, (p, q)) => fn range => Binder (sy, p, q, range)); val infixl_ = $$$ "infixl" |-- !!! (mfix -- nat >> (fn (sy, p) => fn range => Infixl (sy, p, range))); val infixr_ = $$$ "infixr" |-- !!! (mfix -- nat >> (fn (sy, p) => fn range => Infixr (sy, p, range))); val infix_ = $$$ "infix" |-- !!! (mfix -- nat >> (fn (sy, p) => fn range => Infix (sy, p, range))); val mixfix_body = mixfix_ || structure_ || binder_ || infixl_ || infixr_ || infix_; fun annotation guard body = Scan.trace ($$$ "(" |-- guard (body --| $$$ ")")) >> (fn (mx, toks) => mx (Token.range_of toks)); fun opt_annotation guard body = Scan.optional (annotation guard body) NoSyn; in val mixfix = annotation !!! mixfix_body; val mixfix' = annotation I mixfix_body; val opt_mixfix = opt_annotation !!! mixfix_body; val opt_mixfix' = opt_annotation I mixfix_body; end; (* syntax mode *) val syntax_mode_spec = ($$$ "output" >> K ("", false)) || name -- Scan.optional ($$$ "output" >> K false) true; val syntax_mode = Scan.optional ($$$ "(" |-- !!! (syntax_mode_spec --| $$$ ")")) Syntax.mode_default; (* fixes *) val where_ = $$$ "where"; val const_decl = name -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> Scan.triple1; val const_binding = binding -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> Scan.triple1; val param_mixfix = binding -- Scan.option ($$$ "::" |-- typ) -- mixfix' >> (single o Scan.triple1); val params = (binding -- Scan.repeat binding) -- Scan.option ($$$ "::" |-- !!! (Scan.ahead typ -- embedded)) >> (fn ((x, ys), T) => (x, Option.map #1 T, NoSyn) :: map (fn y => (y, Option.map #2 T, NoSyn)) ys); val vars = and_list1 (param_mixfix || params) >> flat; val for_fixes = Scan.optional ($$$ "for" |-- !!! vars) []; (* embedded source text *) val ML_source = input (group (fn () => "ML source") text); val document_source = input (group (fn () => "document source") text); val document_marker = group (fn () => "document marker") (RESET_VALUE (Scan.one Token.is_document_marker >> Token.input_of)); (* terms *) val const = group (fn () => "constant") (inner_syntax embedded); val term = group (fn () => "term") (inner_syntax embedded); val prop = group (fn () => "proposition") (inner_syntax embedded); val literal_fact = inner_syntax (group (fn () => "literal fact") (alt_string || cartouche)); (* patterns *) val is_terms = Scan.repeat1 ($$$ "is" |-- term); val is_props = Scan.repeat1 ($$$ "is" |-- prop); val propp = prop -- Scan.optional ($$$ "(" |-- !!! (is_props --| $$$ ")")) []; val termp = term -- Scan.optional ($$$ "(" |-- !!! (is_terms --| $$$ ")")) []; (* target information *) val private = position ($$$ "private") >> #2; val qualified = position ($$$ "qualified") >> #2; val target = ($$$ "(" -- $$$ "in") |-- !!! (name_position --| $$$ ")"); val opt_target = Scan.option target; (* arguments within outer syntax *) local val argument_kinds = [Token.Ident, Token.Long_Ident, Token.Sym_Ident, Token.Var, Token.Type_Ident, Token.Type_Var, Token.Nat, Token.Float, Token.String, Token.Alt_String, Token.Cartouche, Token.Verbatim]; fun arguments is_symid = let fun argument blk = group (fn () => "argument") (Scan.one (fn tok => let val kind = Token.kind_of tok in member (op =) argument_kinds kind orelse Token.keyword_with is_symid tok orelse (blk andalso Token.keyword_with (fn s => s = ",") tok) end)); fun args blk x = Scan.optional (args1 blk) [] x and args1 blk x = (Scan.repeats1 (Scan.repeat1 (argument blk) || argsp "(" ")" || argsp "[" "]")) x and argsp l r x = (token ($$$ l) ::: !!! (args true @@@ (token ($$$ r) >> single))) x; in (args, args1) end; in val args = #1 (arguments Token.ident_or_symbolic) false; fun args1 is_symid = #2 (arguments is_symid) false; end; (* attributes *) val attrib = token liberal_name ::: !!! args; val attribs = $$$ "[" |-- list attrib --| $$$ "]"; val opt_attribs = Scan.optional attribs []; (* theorem references *) val thm_sel = $$$ "(" |-- list1 (nat --| minus -- nat >> Facts.FromTo || nat --| minus >> Facts.From || nat >> Facts.Single) --| $$$ ")"; val thm = $$$ "[" |-- attribs --| $$$ "]" >> pair (Facts.named "") || (literal_fact >> Facts.Fact || name_position -- Scan.option thm_sel >> Facts.Named) -- opt_attribs; val thms1 = Scan.repeat1 thm; (* options *) val option_name = group (fn () => "option name") name_position; val option_value = group (fn () => "option value") ((token real || token name) >> Token.content_of); val option = option_name :-- (fn (_, pos) => Scan.optional ($$$ "=" |-- !!! (position option_value)) ("true", pos)); val options = $$$ "[" |-- list1 option --| $$$ "]"; (** C basic parsers **) (* embedded source text *) val C_source = input (group (fn () => "C source") text); (* AutoCorres (MODIFIES) *) val star = sym_ident :-- (fn "*" => Scan.succeed () | _ => Scan.fail) >> #1; end; structure C_Parse_Native: C_PARSE = struct open Token open Parse (** C basic parsers **) (* embedded source text *) val C_source = input (group (fn () => "C source") text); (* AutoCorres (MODIFIES) *) val star = sym_ident :-- (fn "*" => Scan.succeed () | _ => Scan.fail) >> #1; end; \ ML \ \\<^file>\~~/src/Pure/Thy/thy_header.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Thy/thy_header.ML Author: Makarius Static theory header information. *) \ structure C_Thy_Header = struct val bootstrap_keywords = C_Keyword.empty_keywords' (Keyword.minor_keywords (Thy_Header.get_keywords @{theory})) (* theory data *) structure Data = Theory_Data ( type T = C_Keyword.keywords; val empty = bootstrap_keywords; val extend = I; val merge = C_Keyword.merge_keywords; ); val add_keywords = Data.map o C_Keyword.add_keywords; val add_keywords_minor = Data.map o C_Keyword.add_keywords_minor; val get_keywords = Data.get; val get_keywords' = get_keywords o Proof_Context.theory_of; end \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy @@ -1,1512 +1,1512 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Core Language: Lexing Support, with Filtered Annotations (without Annotation Lexing)\ theory C_Lexer_Language imports Main begin text \ The part implementing the C lexer might resemble to the implementation of the ML one, but the C syntax is much complex: for example, the preprocessing of directives is implemented with several parsing layers. Also, we will see that the way antiquotations are handled in C is slightly different than in ML (especially in the execution part). Overall, the next ML structures presented here in this theory are all aligned with \<^file>\~~/src/Pure/ROOT.ML\, and are thus accordingly sorted in the same order (except for \<^file>\~~/src/Pure/ML/ml_options.ML\ which is early included in the boot process). This theory will stop at \<^file>\~~/src/Pure/ML/ml_lex.ML\. It is basically situated in the phase 1 of the bootstrap process (of \<^file>\~~/src/Pure/ROOT.ML\), i.e., the part dealing with how to get some C tokens from a raw string: \<^ML_text>\Position.T -> string -> token list\. \ ML \ \\<^file>\~~/src/Pure/General/scan.ML\\ \ structure C_Scan = struct datatype ('a, 'b) either = Left of 'a | Right of 'b fun opt x = Scan.optional x []; end \ ML \ \\<^file>\~~/src/Pure/General/symbol.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/General/symbol.ML Author: Makarius Generalized characters with infinitely many named symbols. *) \ structure C_Symbol = struct fun is_ascii_quasi "_" = true | is_ascii_quasi "$" = true | is_ascii_quasi _ = false; fun is_identletter s = Symbol.is_ascii_letter s orelse is_ascii_quasi s fun is_ascii_oct s = Symbol.is_char s andalso Char.ord #"0" <= ord s andalso ord s <= Char.ord #"7"; fun is_ascii_digit1 s = Symbol.is_char s andalso Char.ord #"1" <= ord s andalso ord s <= Char.ord #"9"; fun is_ascii_letdig s = Symbol.is_ascii_letter s orelse Symbol.is_ascii_digit s orelse is_ascii_quasi s; fun is_ascii_identifier s = size s > 0 andalso forall_string is_ascii_letdig s; val is_ascii_blank_no_line = fn " " => true | "\t" => true | "\^K" => true | "\f" => true | _ => false; end \ ML \ \\<^file>\~~/src/Pure/General/position.ML\\ \ structure C_Position = struct type reports_text = Position.report_text list end \ ML \ \\<^file>\~~/src/Pure/General/symbol_pos.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/General/symbol_pos.ML Author: Makarius Symbols with explicit position information. *) \ structure C_Basic_Symbol_Pos = (*not open by default*) struct open Basic_Symbol_Pos; fun one f = Scan.one (f o Symbol_Pos.symbol) fun many f = Scan.many (f o Symbol_Pos.symbol) fun many1 f = Scan.many1 (f o Symbol_Pos.symbol) val one' = Scan.single o one fun scan_full !!! mem msg scan = scan --| (Scan.ahead (one' (not o mem)) || !!! msg Scan.fail) fun this_string s = (fold (fn s0 => uncurry (fn acc => one (fn s1 => s0 = s1) >> (fn x => x :: acc))) (Symbol.explode s) o pair []) >> rev val one_not_eof = Scan.one (Symbol.not_eof o #1) fun unless_eof scan = Scan.unless scan one_not_eof >> single val repeats_one_not_eof = Scan.repeats o unless_eof val newline = $$$ "\n" || $$$ "\^M" @@@ $$$ "\n" || $$$ "\^M" val repeats_until_nl = repeats_one_not_eof newline end structure C_Symbol_Pos = struct (* basic scanners *) val !!! = Symbol_Pos.!!! fun !!!! text scan = let fun get_pos [] = " (end-of-input)" | get_pos ((_, pos) :: _) = Position.here pos; fun err ((_, syms), msg) = fn () => text () ^ get_pos syms ^ Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map Symbol_Pos.symbol syms)) ^ (case msg of NONE => "" | SOME m => "\n" ^ m ()); in Scan.!! err scan end; val $$ = Symbol_Pos.$$ val $$$ = Symbol_Pos.$$$ val ~$$$ = Symbol_Pos.~$$$ (* scan string literals *) local val char_code = Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) -- Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) -- Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) :|-- (fn (((a, pos), (b, _)), (c, _)) => let val (n, _) = Library.read_int [a, b, c] in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); fun scan_str q err_prefix stop = $$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string") ($$$ q || $$$ "\\" || char_code) || Scan.unless stop (Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s)) >> single; fun scan_strs q err_prefix err_suffix stop = Scan.ahead ($$ q) |-- !!! (fn () => err_prefix ^ "unclosed string literal within " ^ err_suffix) ((Symbol_Pos.scan_pos --| $$$ q) -- (Scan.repeats (scan_str q err_prefix stop) -- ($$$ q |-- Symbol_Pos.scan_pos))); in fun scan_string_qq_multi err_prefix stop = scan_strs "\"" err_prefix "the comment delimiter" stop; fun scan_string_bq_multi err_prefix stop = scan_strs "`" err_prefix "the comment delimiter" stop; fun scan_string_qq_inline err_prefix = scan_strs "\"" err_prefix "the same line" C_Basic_Symbol_Pos.newline; fun scan_string_bq_inline err_prefix = scan_strs "`" err_prefix "the same line" C_Basic_Symbol_Pos.newline; end; (* nested text cartouches *) fun scan_cartouche_depth stop = Scan.repeat1 (Scan.depend (fn (depth: int option) => (case depth of SOME d => $$ Symbol.open_ >> pair (SOME (d + 1)) || (if d > 0 then Scan.unless stop (Scan.one (fn (s, _) => s <> Symbol.close andalso Symbol.not_eof s)) >> pair depth || $$ Symbol.close >> pair (if d = 1 then NONE else SOME (d - 1)) else Scan.fail) | NONE => Scan.fail))); fun scan_cartouche err_prefix err_suffix stop = Scan.ahead ($$ Symbol.open_) |-- !!! (fn () => err_prefix ^ "unclosed text cartouche within " ^ err_suffix) (Scan.provide is_none (SOME 0) (scan_cartouche_depth stop)); fun scan_cartouche_multi err_prefix stop = scan_cartouche err_prefix "the comment delimiter" stop; fun scan_cartouche_inline err_prefix = scan_cartouche err_prefix "the same line" C_Basic_Symbol_Pos.newline; (* C-style comments *) local val par_l = "/" val par_r = "/" val scan_body1 = $$$ "*" --| Scan.ahead (~$$$ par_r) val scan_body2 = Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single val scan_cmt = Scan.depend (fn (d: int) => $$$ par_l @@@ $$$ "*" >> pair (d + 1)) || Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ par_r >> pair (d - 1)) || Scan.lift scan_body1 || Scan.lift scan_body2; val scan_cmts = Scan.pass 0 (Scan.repeats scan_cmt); in fun scan_comment err_prefix = Scan.ahead ($$ par_l -- $$ "*") |-- !!! (fn () => err_prefix ^ "unclosed comment") ($$$ par_l @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ par_r) || $$$ "/" @@@ $$$ "/" @@@ C_Basic_Symbol_Pos.repeats_until_nl; fun scan_comment_no_nest err_prefix = Scan.ahead ($$ par_l -- $$ "*") |-- !!! (fn () => err_prefix ^ "unclosed comment") ($$$ par_l @@@ $$$ "*" @@@ Scan.repeats (scan_body1 || scan_body2) @@@ $$$ "*" @@@ $$$ par_r) || $$$ "/" @@@ $$$ "/" @@@ C_Basic_Symbol_Pos.repeats_until_nl; val recover_comment = $$$ par_l @@@ $$$ "*" @@@ Scan.repeats (scan_body1 || scan_body2); end end \ ML \ \\<^file>\~~/src/Pure/General/antiquote.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/General/antiquote.ML Author: Makarius Antiquotations within plain text. *) \ structure C_Antiquote = struct (* datatype antiquote *) type antiq = { explicit: bool , start: Position.T , stop: Position.T option , range: Position.range , body: Symbol_Pos.T list , body_begin: Symbol_Pos.T list , body_end: Symbol_Pos.T list } (* scan *) open C_Basic_Symbol_Pos; local val err_prefix = "Antiquotation lexical error: "; val par_l = "/" val par_r = "/" val scan_body1 = $$$ "*" --| Scan.ahead (~$$$ par_r) val scan_body1' = $$$ "*" @@@ $$$ par_r val scan_body2 = Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single val scan_antiq_body_multi = Scan.trace (C_Symbol_Pos.scan_string_qq_multi err_prefix scan_body1' || C_Symbol_Pos.scan_string_bq_multi err_prefix scan_body1') >> #2 || C_Symbol_Pos.scan_cartouche_multi err_prefix scan_body1' || scan_body1 || scan_body2; val scan_antiq_body_multi_recover = scan_body1 || scan_body2; val scan_antiq_body_inline = Scan.trace (C_Symbol_Pos.scan_string_qq_inline err_prefix || C_Symbol_Pos.scan_string_bq_inline err_prefix) >> #2 || C_Symbol_Pos.scan_cartouche_inline err_prefix || unless_eof newline; val scan_antiq_body_inline_recover = unless_eof newline; fun control_name sym = (case Symbol.decode sym of Symbol.Control name => name); fun scan_antiq_multi scan = Symbol_Pos.scan_pos -- (Scan.trace ($$ par_l |-- $$ "*" |-- scan) -- Symbol_Pos.scan_pos -- Symbol_Pos.!!! (fn () => err_prefix ^ "missing closing antiquotation") (Scan.repeats scan_antiq_body_multi -- Symbol_Pos.scan_pos -- ($$$ "*" @@@ $$$ par_r) -- Symbol_Pos.scan_pos)) fun scan_antiq_multi_recover scan = Symbol_Pos.scan_pos -- ($$ par_l |-- $$ "*" |-- scan -- Symbol_Pos.scan_pos -- (Scan.repeats scan_antiq_body_multi_recover -- Symbol_Pos.scan_pos -- ($$ "*" |-- $$ par_r |-- Symbol_Pos.scan_pos))) fun scan_antiq_inline scan = (Symbol_Pos.scan_pos -- Scan.trace ($$ "/" |-- $$ "/" |-- scan) -- Symbol_Pos.scan_pos -- Scan.repeats scan_antiq_body_inline -- Symbol_Pos.scan_pos) fun scan_antiq_inline_recover scan = (Symbol_Pos.scan_pos --| $$ "/" --| $$ "/" -- scan -- Symbol_Pos.scan_pos -- Scan.repeats scan_antiq_body_inline_recover -- Symbol_Pos.scan_pos) in val scan_control = Scan.option (Scan.one (Symbol.is_control o Symbol_Pos.symbol)) -- Symbol_Pos.scan_cartouche err_prefix >> (fn (opt_control, body) => let val (name, range) = (case opt_control of SOME (sym, pos) => ((control_name sym, pos), Symbol_Pos.range ((sym, pos) :: body)) | NONE => (("cartouche", #2 (hd body)), Symbol_Pos.range body)); in {name = name, range = range, body = body} end) || Scan.one (Symbol.is_control o Symbol_Pos.symbol) >> (fn (sym, pos) => {name = (control_name sym, pos), range = Symbol_Pos.range [(sym, pos)], body = []}); val scan_antiq = scan_antiq_multi ($$$ "@" >> K true || scan_body1 >> K false) >> (fn (pos1, (((explicit, body_begin), pos2), (((body, pos3), body_end), pos4))) => {explicit = explicit, start = Position.range_position (pos1, pos2), stop = SOME (Position.range_position (pos3, pos4)), range = Position.range (pos1, pos4), body = body, body_begin = body_begin, body_end = body_end}) || scan_antiq_inline ($$$ "@" >> K true || $$$ "*" >> K false) >> (fn ((((pos1, (explicit, body_begin)), pos2), body), pos3) => {explicit = explicit, start = Position.range_position (pos1, pos2), stop = NONE, range = Position.range (pos1, pos3), body = body, body_begin = body_begin, body_end = []}) val scan_antiq_recover = scan_antiq_multi_recover ($$$ "@" >> K true || scan_body1 >> K false) >> (fn (_, ((explicit, _), _)) => explicit) || scan_antiq_inline_recover ($$$ "@" >> K true || $$$ "*" >> K false) >> (fn ((((_, explicit), _), _), _) => explicit) end; end; \ ML \ \\<^file>\~~/src/Pure/ML/ml_options.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/ML/ml_options.ML Author: Makarius ML configuration options. *) \ structure C_Options = struct (* source trace *) val lexer_trace = Attrib.setup_config_bool @{binding C_lexer_trace} (K false); val parser_trace = Attrib.setup_config_bool @{binding C_parser_trace} (K false); val ML_verbose = Attrib.setup_config_bool @{binding C_ML_verbose} (K true); val starting_env = Attrib.setup_config_string @{binding C_starting_env} (K "empty"); val starting_rule = Attrib.setup_config_string @{binding C_starting_rule} (K "translation_unit"); end \ ML \ \\<^file>\~~/src/Pure/ML/ml_lex.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/ML/ml_lex.ML Author: Makarius Lexical syntax for Isabelle/ML and Standard ML. *) \ structure C_Lex = struct open C_Scan; open C_Basic_Symbol_Pos; (** keywords **) val keywords = ["(", ")", "[", "]", "->", ".", "!", "~", "++", "--", "+", "-", "*", "/", "%", "&", "<<", ">>", "<", "<=", ">", ">=", "==", "!=", "^", "|", "&&", "||", "?", ":", "=", "+=", "-=", "*=", "/=", "%=", "&=", "^=", "|=", "<<=", ">>=", ",", ";", "{", "}", "...", (**) "_Alignas", "_Alignof", "__alignof", "alignof", "__alignof__", "__asm", "asm", "__asm__", "_Atomic", "__attribute", "__attribute__", "auto", "_Bool", "break", "__builtin_offsetof", "__builtin_types_compatible_p", "__builtin_va_arg", "case", "char", "_Complex", "__complex__", "__const", "const", "__const__", "continue", "default", "do", "double", "else", "enum", "__extension__", "extern", "float", "for", "_Generic", "goto", "if", "__imag", "__imag__", "__inline", "inline", "__inline__", "int", "__int128", "__label__", "long", "_Nonnull", "__nonnull", "_Noreturn", "_Nullable", "__nullable", "__real", "__real__", "register", "__restrict", "restrict", "__restrict__", "return", "short", "__signed", "signed", "__signed__", "sizeof", "static", "_Static_assert", "struct", "switch", "__thread", "_Thread_local", "typedef", "__typeof", "typeof", "__typeof__", "union", "unsigned", "void", "__volatile", "volatile", "__volatile__", "while"]; val keywords2 = ["__asm", "asm", "__asm__", "extern"]; val keywords3 = ["_Bool", "char", "double", "float", "int", "__int128", "long", "short", "__signed", "signed", "__signed__", "unsigned", "void"]; val lexicon = Scan.make_lexicon (map raw_explode keywords); (** tokens **) (* datatype token *) datatype token_kind_comment = Comment_formal of C_Antiquote.antiq | Comment_suspicious of (bool * string * ((Position.T * Markup.T) * string) list) option datatype token_kind_encoding = Encoding_L | Encoding_default | Encoding_file of string (* error message *) option type token_kind_string = token_kind_encoding * (Symbol.symbol, Position.range * int \ \exceeding \<^ML>\Char.maxOrd\\) either list datatype token_kind_int_repr = Repr_decimal | Repr_hexadecimal | Repr_octal datatype token_kind_int_flag = Flag_unsigned | Flag_long | Flag_long_long | Flag_imag datatype token_kind = Keyword | Ident | Type_ident | GnuC | ClangC | (**) Char of token_kind_string | Integer of int * token_kind_int_repr * token_kind_int_flag list | Float of Symbol_Pos.T list | String of token_kind_string | File of token_kind_string | (**) Space | Comment of token_kind_comment | Sharp of int | (**) Unknown | Error of string * token_group | Directive of token_kind_directive | EOF and token_kind_directive = Inline of token_group (* a not yet analyzed directive *) | Include of token_group | Define of token_group (* define *) * token_group (* name *) * token_group option (* functional arguments *) * token_group (* rewrite body *) | Undef of token_group (* name *) | Cpp of token_group | Conditional of token_group (* if *) * token_group list (* elif *) * token_group option (* else *) * token_group (* endif *) and token_group = Group1 of token list (* spaces and comments filtered from the directive *) * token list (* directive: raw data *) | Group2 of token list (* spaces and comments filtered from the directive *) * token list (* directive: function *) * token list (* directive: arguments (same line) *) | Group3 of ( Position.range (* full directive (with blanks) *) * token list (* spaces and comments filtered from the directive *) * token list (* directive: function *) * token list (* directive: arguments (same line) *)) * (Position.range * token list) (* C code or directive: arguments (following lines) *) and token = Token of Position.range * (token_kind * string); (* position *) fun set_range range (Token (_, x)) = Token (range, x); fun range_of (Token (range, _)) = range; val pos_of = #1 o range_of; val end_pos_of = #2 o range_of; (* stopper *) fun mk_eof pos = Token ((pos, Position.none), (EOF, "")); val eof = mk_eof Position.none; fun is_eof (Token (_, (EOF, _))) = true | is_eof _ = false; val stopper = Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; val one_not_eof = Scan.one (not o is_eof) (* token content *) fun kind_of (Token (_, (k, _))) = k; val group_list_of = fn Inline g => [g] | Include g => [g] | Define (g1, g2, o_g3, g4) => flat [[g1], [g2], the_list o_g3, [g4]] | Undef g => [g] | Cpp g => [g] | Conditional (g1, gs2, o_g3, g4) => flat [[g1], gs2, the_list o_g3, [g4]] fun content_of (Token (_, (_, x))) = x; fun token_leq (tok, tok') = content_of tok <= content_of tok'; val directive_cmds = fn Inline (Group1 (_, _ :: tok2 :: _)) => [tok2] | Include (Group2 (_, [_, tok2], _)) => [tok2] | Define (Group1 (_, [_, tok2]), _, _, _) => [tok2] | Undef (Group2 (_, [_, tok2], _)) => [tok2] | Conditional (c1, cs2, c3, c4) => maps (fn Group3 ((_, _, [_, tok2], _), _) => [tok2] | _ => error "Only expecting Group3") (flat [[c1], cs2, the_list c3, [c4]]) | _ => [] fun is_keyword (Token (_, (Keyword, _))) = true | is_keyword _ = false; fun is_ident (Token (_, (Ident, _))) = true | is_ident _ = false; fun is_integer (Token (_, (Integer _, _))) = true | is_integer _ = false; fun is_delimiter (Token (_, (Keyword, x))) = not (C_Symbol.is_ascii_identifier x) | is_delimiter _ = false; (* range *) val range_list_of0 = fn [] => Position.no_range | toks as tok1 :: _ => Position.range (pos_of tok1, end_pos_of (List.last toks)) (* WARNING the use of: \\\<^ML>\fn content_of => fn pos_of => fn tok2 => - List.last (Symbol_Pos.explode (content_of tok2, pos_of tok2)) |-> Position.advance\\ + List.last (Symbol_Pos.explode (content_of tok2, pos_of tok2)) |-> Position.symbol\\ would not return an accurate position if for example several "backslash newlines" are present in the symbol *) fun range_list_of toks = (range_list_of0 toks, toks) fun range_list_of' toks1 toks2 = (range_list_of0 toks1, toks2) local fun cmp_pos x2 x1 = case Position.distance_of (pos_of x2, pos_of x1) of SOME dist => dist < 0 | NONE => error "cmp_pos" fun merge_pos xs = case xs of (xs1, []) => xs1 | ([], xs2) => xs2 | (x1 :: xs1, x2 :: xs2) => let val (x, xs) = if cmp_pos x2 x1 then (x1, (xs1, x2 :: xs2)) else (x2, (x1 :: xs1, xs2)) in x :: merge_pos xs end in fun merge_blank toks_bl xs1 xs2 = let val cmp_tok2 = cmp_pos (List.last xs1) in ( range_list_of (merge_pos (xs1, filter cmp_tok2 toks_bl)) , range_list_of (merge_pos (xs2, filter_out cmp_tok2 toks_bl))) end end val token_list_of = let fun merge_blank' toks_bl xs1 xs2 = let val ((_, l1), (_, l2)) = merge_blank toks_bl xs1 xs2 in [l1, l2] end in group_list_of #> maps (fn Group1 (toks_bl, []) => [toks_bl] | Group1 (toks_bl, xs1) => merge_blank' toks_bl xs1 [] | Group2 (toks_bl, xs1, xs2) => merge_blank' toks_bl xs1 xs2 | Group3 ((_, toks_bl, xs1, xs2), (_, xs3)) => flat [merge_blank' toks_bl xs1 xs2, [xs3]]) #> flat end local fun warn0 pos l s = () |> tap (fn _ => if exists (fn Left s => not (Symbol.is_printable s) | _ => false) l then app (fn (s, pos) => if Symbol.is_printable s then () else Output.information ("Not printable character " ^ @{make_string} (ord s, s) ^ Position.here pos)) (Symbol_Pos.explode (s, pos)) else ()) |> tap (fn _ => app (fn Left _ => () | Right ((pos1, _), n) => Output.information ("Out of the supported range (character number " ^ Int.toString n ^ ")" ^ Position.here pos1)) l) fun unknown pos = Output.information ("Unknown symbol" ^ Position.here pos) val app_directive = app (fn Token (_, (Error (msg, _), _)) => warning msg | Token ((pos, _), (Unknown, _)) => unknown pos | _ => ()) in val warn = fn Token ((pos, _), (Char (_, l), s)) => warn0 pos l s | Token ((pos, _), (String (_, l), s)) => warn0 pos l s | Token ((pos, _), (File (_, l), s)) => warn0 pos l s | Token ((pos, _), (Unknown, _)) => unknown pos | Token (_, (Comment (Comment_suspicious (SOME (explicit, msg, _))), _)) => (if explicit then warning else tracing) msg | Token (_, (Directive (kind as Conditional _), _)) => app_directive (token_list_of kind) | Token (_, (Directive (Define (_, _, _, Group1 (_, toks4))), _)) => app_directive toks4 | Token (_, (Directive (Include (Group2 (_, _, toks))), _)) => (case toks of [Token (_, (String _, _))] => () | [Token (_, (File _, _))] => () | _ => Output.information ("Expecting at least and at most one file" ^ Position.here (Position.range_position (pos_of (hd toks), end_pos_of (List.last toks))))) | _ => (); end fun check_error tok = case kind_of tok of Error (msg, _) => [msg] | _ => []; (* markup *) local val token_kind_markup0 = fn Char _ => (Markup.ML_char, "") | Integer _ => (Markup.ML_numeral, "") | Float _ => (Markup.ML_numeral, "") | ClangC => (Markup.ML_numeral, "") | String _ => (Markup.ML_string, "") | File _ => (Markup.ML_string, "") | Sharp _ => (Markup.antiquote, "") | Unknown => (Markup.intensify, "") | Error (msg, _) => (Markup.bad (), msg) | _ => (Markup.empty, ""); fun token_report' escape_directive (tok as Token ((pos, _), (kind, x))) = if escape_directive andalso (is_keyword tok orelse is_ident tok) then [((pos, Markup.antiquote), "")] else if is_keyword tok then let val (markup, txt) = if is_delimiter tok then (Markup.ML_delimiter, "") else if member (op =) keywords2 x then (Markup.ML_keyword2 |> Markup.keyword_properties, "") else if member (op =) keywords3 x then (Markup.ML_keyword3 |> Markup.keyword_properties, "") else (Markup.ML_keyword1 |> Markup.keyword_properties, ""); in [((pos, markup), txt)] end else case kind of Directive (tokens as Inline _) => ((pos, Markup.antiquoted), "") :: maps token_report0 (token_list_of tokens) | Directive (Include (Group2 (toks_bl, tok1 :: _, toks2))) => ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks2 , maps token_report0 toks_bl ] | Directive (Define (Group1 (toks_bl1, tok1 :: _), Group1 (toks_bl2, _), toks3, Group1 (toks_bl4, toks4))) => let val (toks_bl3, toks3) = case toks3 of SOME (Group1 x) => x | _ => ([], []) in ((pos, Markup.antiquoted), "") :: ((range_list_of0 toks4 |> #1, Markup.intensify), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks3 , maps token_report0 toks4 , maps token_report0 toks_bl1 , maps token_report0 toks_bl2 , map (fn tok => ((pos_of tok, Markup.antiquote), "")) toks_bl3 , maps token_report0 toks_bl4 ] end | Directive (Undef (Group2 (toks_bl, tok1 :: _, _))) => ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks_bl ] | Directive (Cpp (Group2 (toks_bl, toks1, toks2))) => ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 toks1 , maps token_report0 toks2 , maps token_report0 toks_bl ] | Directive (Conditional (c1, cs2, c3, c4)) => maps (fn Group3 (((pos, _), toks_bl, tok1 :: _, toks2), ((pos3, _), toks3)) => ((pos, Markup.antiquoted), "") :: ((pos3, Markup.intensify), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks2 , maps token_report0 toks3 , maps token_report0 toks_bl ] | _ => error "Only expecting Group3") (flat [[c1], cs2, the_list c3, [c4]]) | Error (msg, Group2 (toks_bl, toks1, toks2)) => ((range_list_of0 toks1 |> #1, Markup.bad ()), msg) :: ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 toks1 , maps token_report0 toks2 , maps token_report0 toks_bl ] | Error (msg, Group3 ((_, toks_bl, toks1, toks2), _)) => ((range_list_of0 toks1 |> #1, Markup.bad ()), msg) :: ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 toks1 , maps token_report0 toks2 , maps token_report0 toks_bl ] | Comment (Comment_suspicious c) => ((pos, Markup.ML_comment), "") :: (case c of NONE => [] | SOME (_, _, l) => l) | x => [let val (markup, txt) = token_kind_markup0 x in ((pos, markup), txt) end] and token_report0 tok = token_report' false tok and token_report1 tok = token_report' true tok in val token_report = token_report0 end; (** scanners **) val err_prefix = "C lexical error: "; fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); fun !!!! msg = C_Symbol_Pos.!!!! (fn () => err_prefix ^ msg); val many1_blanks_no_line = many1 C_Symbol.is_ascii_blank_no_line (* identifiers *) val scan_ident_sym = let val hex = one' Symbol.is_ascii_hex in one' C_Symbol.is_identletter || $$$ "\\" @@@ $$$ "u" @@@ hex @@@ hex @@@ hex @@@ hex || $$$ "\\" @@@ $$$ "U" @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex || one' Symbol.is_symbolic || one' Symbol.is_control || one' Symbol.is_utf8 end val scan_ident = scan_ident_sym @@@ Scan.repeats (scan_ident_sym || one' Symbol.is_ascii_digit); val keywords_ident = map_filter (fn s => Source.of_list (Symbol_Pos.explode (s, Position.none)) |> Source.source Symbol_Pos.stopper (Scan.bulk (scan_ident >> SOME || Scan.one (not o Symbol_Pos.is_eof) >> K NONE)) |> Source.exhaust |> (fn [SOME _] => SOME s | _ => NONE)) keywords (* numerals *) fun read_bin s = #1 (read_radix_int 2 s) fun read_oct s = #1 (read_radix_int 8 s) fun read_dec s = #1 (read_int s) val read_hex = let fun conv_ascii c1 c0 = String.str (Char.chr (Char.ord #"9" + Char.ord c1 - Char.ord c0 + 1)) in map (fn s => let val c = String.sub (s, 0) in if c >= #"A" andalso c <= #"F" then conv_ascii c #"A" else if c >= #"a" andalso c <= #"f" then conv_ascii c #"a" else s end) #> read_radix_int 16 #> #1 end local val many_digit = many Symbol.is_ascii_digit val many1_digit = many1 Symbol.is_ascii_digit val many_hex = many Symbol.is_ascii_hex val many1_hex = many1 Symbol.is_ascii_hex val scan_suffix_ll = ($$$ "l" @@@ $$$ "l" || $$$ "L" @@@ $$$ "L") >> K [Flag_long_long] fun scan_suffix_gnu flag = ($$$ "i" || $$$ "j") >> K [flag] val scan_suffix_int = let val u = ($$$ "u" || $$$ "U") >> K [Flag_unsigned] val l = ($$$ "l" || $$$ "L") >> K [Flag_long] in u @@@ scan_suffix_ll || scan_suffix_ll @@@ opt u || u @@@ opt l || l @@@ opt u end val scan_suffix_gnu_int0 = scan_suffix_gnu Flag_imag val scan_suffix_gnu_int = scan_full !!! (member (op =) (raw_explode "uUlLij")) "Invalid integer constant suffix" ( scan_suffix_int @@@ opt scan_suffix_gnu_int0 || scan_suffix_gnu_int0 @@@ opt scan_suffix_int) fun scan_intgnu x = x -- opt scan_suffix_gnu_int >> (fn ((s1', read, repr), l) => (read (map (Symbol_Pos.content o single) s1'), repr, l)) val scan_intoct = scan_intgnu ($$ "0" |-- scan_full !!! Symbol.is_ascii_digit "Invalid digit in octal constant" (Scan.max (fn ((xs2, _, _), (xs1, _, _)) => length xs2 < length xs1) (many C_Symbol.is_ascii_oct >> (fn xs => (xs, read_oct, Repr_octal))) (many (fn x => x = "0") >> (fn xs => (xs, read_dec, Repr_decimal))))) val scan_intdec = scan_intgnu (one C_Symbol.is_ascii_digit1 -- many Symbol.is_ascii_digit >> (fn (x, xs) => (x :: xs, read_dec, Repr_decimal))) val scan_inthex = scan_intgnu (($$ "0" -- ($$ "x" || $$ "X")) |-- many1_hex >> (fn xs2 => (xs2, read_hex, Repr_hexadecimal))) (**) fun scan_signpart a A = ($$$ a || $$$ A) @@@ opt ($$$ "+" || $$$ "-") @@@ many1_digit val scan_exppart = scan_signpart "e" "E" val scan_suffix_float = $$$ "f" || $$$ "F" || $$$ "l" || $$$ "L" val scan_suffix_gnu_float0 = Scan.trace (scan_suffix_gnu ()) >> #2 val scan_suffix_gnu_float = scan_full !!! (member (op =) (raw_explode "fFlLij")) "Invalid float constant suffix" ( scan_suffix_float @@@ opt scan_suffix_gnu_float0 || scan_suffix_gnu_float0 @@@ opt scan_suffix_float) val scan_hex_pref = $$$ "0" @@@ $$$ "x" val scan_hexmant = many_hex @@@ $$$ "." @@@ many1_hex || many1_hex @@@ $$$ "." val scan_floatdec = ( ( many_digit @@@ $$$ "." @@@ many1_digit || many1_digit @@@ $$$ ".") @@@ opt scan_exppart || many1_digit @@@ scan_exppart) @@@ opt scan_suffix_gnu_float val scan_floathex = scan_hex_pref @@@ (scan_hexmant || many1_hex) @@@ scan_signpart "p" "P" @@@ opt scan_suffix_gnu_float val scan_floatfail = scan_hex_pref @@@ scan_hexmant in val scan_int = scan_inthex || scan_intoct || scan_intdec val recover_int = many1 (fn s => Symbol.is_ascii_hex s orelse member (op =) (raw_explode "xXuUlLij") s) val scan_float = scan_floatdec || scan_floathex || scan_floatfail @@@ !!! "Hexadecimal floating constant requires an exponent" Scan.fail val scan_clangversion = many1_digit @@@ $$$ "." @@@ many1_digit @@@ $$$ "." @@@ many1_digit end; (* chars and strings *) val scan_blanks1 = many1 Symbol.is_ascii_blank local val escape_char = [ ("n", #"\n") , ("t", #"\t") , ("v", #"\v") , ("b", #"\b") , ("r", #"\r") , ("f", #"\f") , ("a", #"\a") , ("e", #"\^[") , ("E", #"\^[") , ("\\", #"\\") , ("?", #"?") , ("'", #"'") , ("\"", #"\"") ] val _ = \ \printing a ML function translating code point from \<^ML_type>\int -> string\\ fn _ => app (fn (x0, x) => writeln (" | " ^ string_of_int (Char.ord x) ^ " => \"\\\\" ^ (if exists (fn x1 => x0 = x1) ["\"", "\\"] then "\\" ^ x0 else x0) ^ "\"")) escape_char fun scan_escape s0 = let val oct = one' C_Symbol.is_ascii_oct val hex = one' Symbol.is_ascii_hex fun chr' f l = let val x = f (map Symbol_Pos.content l) in [if x <= Char.maxOrd then Left (chr x) else Right (Symbol_Pos.range (flat l), x)] end val read_oct' = chr' read_oct val read_hex' = chr' read_hex in one' (member (op =) (raw_explode (s0 ^ String.concat (map #1 escape_char)))) >> (fn i => [Left (case AList.lookup (op =) escape_char (Symbol_Pos.content i) of NONE => s0 | SOME c => String.str c)]) || oct -- oct -- oct >> (fn ((o1, o2), o3) => read_oct' [o1, o2, o3]) || oct -- oct >> (fn (o1, o2) => read_oct' [o1, o2]) || oct >> (read_oct' o single) || $$ "x" |-- many1 Symbol.is_ascii_hex >> (read_hex' o map single) || $$ "u" |-- hex -- hex -- hex -- hex >> (fn (((x1, x2), x3), x4) => read_hex' [x1, x2, x3, x4]) || $$ "U" |-- hex -- hex -- hex -- hex -- hex -- hex -- hex -- hex >> (fn (((((((x1, x2), x3), x4), x5), x6), x7), x8) => read_hex' [x1, x2, x3, x4, x5, x6, x7, x8]) end fun scan_str s0 = Scan.unless newline (Scan.one (fn (s, _) => Symbol.not_eof s andalso s <> s0 andalso s <> "\\")) >> (fn s => [Left (#1 s)]) || Scan.ahead newline |-- !!! "bad newline" Scan.fail || $$ "\\" |-- !!! "bad escape character" (scan_escape s0); fun scan_string0 s0 msg repeats = Scan.optional ($$ "L" >> K Encoding_L) Encoding_default -- (Scan.ahead ($$ s0) |-- !!! ("unclosed " ^ msg ^ " literal") ($$ s0 |-- repeats (scan_str s0) --| $$ s0)) fun recover_string0 s0 repeats = opt ($$$ "L") @@@ $$$ s0 @@@ repeats (Scan.permissive (Scan.trace (scan_str s0) >> #2)); in val scan_char = scan_string0 "'" "char" Scan.repeats1 val scan_string = scan_string0 "\"" "string" Scan.repeats fun scan_string' src = case Source.source Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!! "bad input" scan_string >> K NONE)) (fn msg => C_Basic_Symbol_Pos.one_not_eof >> K [SOME msg])) (Source.of_list src) |> Source.exhaust of [NONE] => NONE | [] => SOME "Empty input" | l => case map_filter I l of msg :: _ => SOME msg | _ => SOME "More than one string" val scan_file = let fun scan !!! s_l s_r = Scan.ahead ($$ s_l) |-- !!! ($$ s_l |-- Scan.repeats (Scan.unless newline (Scan.one (fn (s, _) => Symbol.not_eof s andalso s <> s_r) >> (fn s => [Left (#1 s)]))) --| $$ s_r) in Scan.trace (scan (!!! ("unclosed file literal")) "\"" "\"") >> (fn (s, src) => String (Encoding_file (scan_string' src), s)) || scan I \ \Due to conflicting symbols, raising \<^ML>\Symbol_Pos.!!!\ here will not let a potential legal \<^ML>\"<"\ symbol be tried and parsed as a \<^emph>\keyword\.\ "<" ">" >> (fn s => File (Encoding_default, s)) end val recover_char = recover_string0 "'" Scan.repeats1 val recover_string = recover_string0 "\"" Scan.repeats end; (* scan tokens *) val check = fold (tap warn #> fold cons o check_error) local fun token k ss = Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss)); fun scan_token f1 f2 = Scan.trace f1 >> (fn (v, s) => token (f2 v) s) val comments = Scan.recover (scan_token C_Antiquote.scan_antiq (Comment o Comment_formal)) (fn msg => Scan.ahead C_Antiquote.scan_antiq_recover -- C_Symbol_Pos.scan_comment_no_nest err_prefix >> (fn (explicit, res) => token (Comment (Comment_suspicious (SOME (explicit, msg, [])))) res) || Scan.fail_with (fn _ => fn _ => msg)) || C_Symbol_Pos.scan_comment_no_nest err_prefix >> token (Comment (Comment_suspicious NONE)) fun scan_fragment blanks comments sharps non_blanks = non_blanks (scan_token scan_char Char) || non_blanks (scan_token scan_string String) || blanks || comments || non_blanks sharps || non_blanks (Scan.max token_leq (Scan.literal lexicon >> token Keyword) ( scan_clangversion >> token ClangC || scan_token scan_float Float || scan_token scan_int Integer || scan_ident >> token Ident)) || non_blanks (Scan.one (Symbol.is_printable o #1) >> single >> token Unknown) (* scan tokens, directive part *) val scan_sharp1 = $$$ "#" val scan_sharp2 = $$$ "#" @@@ $$$ "#" val scan_directive = let val f_filter = fn Token (_, (Space, _)) => true | Token (_, (Comment _, _)) => true | Token (_, (Error _, _)) => true | _ => false val sharp1 = scan_sharp1 >> token (Sharp 1) in (sharp1 >> single) @@@ Scan.repeat ( scan_token scan_file I || scan_fragment (many1_blanks_no_line >> token Space) comments (scan_sharp2 >> token (Sharp 2) || sharp1) I) >> (fn tokens => Inline (Group1 (filter f_filter tokens, filter_out f_filter tokens))) end local fun !!! text scan = let fun get_pos [] = " (end-of-input)" | get_pos (t :: _) = Position.here (pos_of t); fun err (syms, msg) = fn () => err_prefix ^ text ^ get_pos syms ^ (case msg of NONE => "" | SOME m => "\n" ^ m ()); in Scan.!! err scan end val pos_here_of = Position.here o pos_of fun one_directive f = Scan.one (fn Token (_, (Directive ( Inline (Group1 (_, Token (_, (Sharp 1, _)) :: Token (_, s) :: _))) , _)) => f s | _ => false) val get_cond = fn Token (pos, (Directive (Inline (Group1 (toks_bl, tok1 :: tok2 :: toks))), _)) => (fn t3 => Group3 ((pos, toks_bl, [tok1, tok2], toks), range_list_of t3)) | _ => error "Inline directive expected" val one_start_cond = one_directive (fn (Keyword, "if") => true | (Ident, "ifdef") => true | (Ident, "ifndef") => true | _ => false) val one_elif = one_directive (fn (Ident, "elif") => true | _ => false) val one_else = one_directive (fn (Keyword, "else") => true | _ => false) val one_endif = one_directive (fn (Ident, "endif") => true | _ => false) val not_cond = Scan.unless (one_start_cond || one_elif || one_else || one_endif) (one_not_eof >> (fn Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Ident, "include"))) :: toks))) , s)) => Token (pos, ( case toks of [] => Error ( "Expecting at least one file" ^ Position.here (end_pos_of tok2) , Group2 (toks_bl, [tok1, tok2], toks)) | _ => Directive (Include (Group2 (toks_bl, [tok1, tok2], toks))) , s)) | Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Ident, "define"))) :: toks))) , s)) => let fun define tok3 toks = case case toks of (tok3' as Token (pos, (Keyword, "("(*)*)))) :: toks => if Position.offset_of (end_pos_of tok3) = Position.offset_of (pos_of tok3') then let fun right msg pos = Right (msg ^ Position.here pos) fun right1 msg = right msg o #1 fun right2 msg = right msg o #2 fun take_prefix' toks_bl toks_acc pos = fn (tok1 as Token (_, (Ident, _))) :: (tok2 as Token (pos2, (Keyword, key))) :: toks => if key = "," then take_prefix' (tok2 :: toks_bl) (tok1 :: toks_acc) pos2 toks else if key = (*( *)")" then Left (rev (tok2 :: toks_bl), rev (tok1 :: toks_acc), toks) else right1 "Expecting a colon delimiter or a closing parenthesis" pos2 | Token (pos1, (Ident, _)) :: _ => right2 "Expecting a colon delimiter or a closing parenthesis" pos1 | (tok1 as Token (_, (Keyword, key1))) :: (tok2 as Token (pos2, (Keyword, key2))) :: toks => if key1 = "..." then if key2 = (*( *)")" then Left (rev (tok2 :: toks_bl), rev (tok1 :: toks_acc), toks) else right1 "Expecting a closing parenthesis" pos2 else right2 "Expecting an identifier or the keyword '...'" pos | _ => right2 "Expecting an identifier or the keyword '...'" pos in case case toks of (tok2 as Token (_, (Keyword, (*( *)")"))) :: toks => Left ([tok2], [], toks) | _ => take_prefix' [] [] pos toks of Left (toks_bl, toks_acc, toks) => Left (SOME (Group1 (tok3' :: toks_bl, toks_acc)), Group1 ([], toks)) | Right x => Right x end else Left (NONE, Group1 ([], tok3' :: toks)) | _ => Left (NONE, Group1 ([], toks)) of Left (gr1, gr2) => Directive (Define (Group1 (toks_bl, [tok1, tok2]), Group1 ([], [tok3]), gr1, gr2)) | Right msg => Error (msg, Group2 (toks_bl, [tok1, tok2], tok3 :: toks)) fun err () = Error ( "Expecting at least one identifier" ^ Position.here (end_pos_of tok2) , Group2 (toks_bl, [tok1, tok2], toks)) in Token (pos, ( case toks of (tok3 as Token (_, (Ident, _))) :: toks => define tok3 toks | (tok3 as Token (_, (Keyword, cts))) :: toks => if exists (fn cts0 => cts = cts0) keywords_ident then define tok3 toks else err () | _ => err () , s)) end | Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Ident, "undef"))) :: toks))) , s)) => Token (pos, ( let fun err () = Error ( "Expecting at least and at most one identifier" ^ Position.here (end_pos_of tok2) , Group2 (toks_bl, [tok1, tok2], toks)) in case toks of [Token (_, (Ident, _))] => Directive (Undef (Group2 (toks_bl, [tok1, tok2], toks))) | [Token (_, (Keyword, cts))] => if exists (fn cts0 => cts = cts0) keywords_ident then Directive (Undef (Group2 (toks_bl, [tok1, tok2], toks))) else err () | _ => err () end , s)) | Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Integer _, _))) :: (tok3 as Token (_, (String _, _))) :: toks))) , s)) => Token (pos, ( if forall is_integer toks then Directive (Cpp (Group2 (toks_bl, [tok1], tok2 :: tok3 :: toks))) else Error ( "Expecting an integer" ^ Position.here (drop_prefix is_integer toks |> hd |> pos_of) , Group2 (toks_bl, [tok1], tok2 :: tok3 :: toks)) , s)) | x => x)) fun scan_cond xs = xs |> (one_start_cond -- scan_cond_list -- Scan.repeat (one_elif -- scan_cond_list) -- Scan.option (one_else -- scan_cond_list) -- Scan.recover one_endif (fn msg => Scan.fail_with (fn toks => fn () => case toks of tok :: _ => "can be closed here" ^ Position.here (pos_of tok) | _ => msg)) >> (fn (((t_if, t_elif), t_else), t_endif) => Token ( Position.no_range , ( Directive (Conditional let fun t_body x = x |-> get_cond in ( t_body t_if , map t_body t_elif , Option.map t_body t_else , t_body (t_endif, [])) end) , "")))) and scan_cond_list xs = xs |> Scan.repeat (not_cond || scan_cond) val scan_directive_cond0 = not_cond || Scan.ahead ( one_start_cond |-- scan_cond_list |-- Scan.repeat (one_elif -- scan_cond_list) |-- one_else --| scan_cond_list -- (one_elif || one_else)) :-- (fn (tok1, tok2) => !!! ( "directive" ^ pos_here_of tok2 ^ " not expected after" ^ pos_here_of tok1 ^ ", detected at") Scan.fail) >> #2 || (Scan.ahead one_start_cond |-- !!! "unclosed directive" scan_cond) || (Scan.ahead one_not_eof |-- !!! "missing or ambiguous beginning of conditional" Scan.fail) fun scan_directive_recover msg = not_cond || one_not_eof >> (fn tok as Token (pos, (_, s)) => Token (pos, (Error (msg, get_cond tok []), s))) in val scan_directive_cond = Scan.recover (Scan.bulk scan_directive_cond0) (fn msg => scan_directive_recover msg >> single) end (* scan tokens, main *) val scan_ml = Scan.depend let fun non_blanks st scan = scan >> pair st fun scan_frag st = scan_fragment ( C_Basic_Symbol_Pos.newline >> token Space >> pair true || many1_blanks_no_line >> token Space >> pair st) (non_blanks st comments) ((scan_sharp2 || scan_sharp1) >> token Keyword) (non_blanks false) in fn true => scan_token scan_directive Directive >> pair false || scan_frag true | false => scan_frag false end; fun recover msg = (recover_char || recover_string || Symbol_Pos.recover_cartouche || C_Symbol_Pos.recover_comment || recover_int || one' Symbol.not_eof) >> token (Error (msg, Group1 ([], []))); fun reader scan syms = let val termination = if null syms then [] else let - val pos1 = List.last syms |-> Position.advance; - val pos2 = Position.advance Symbol.space pos1; + val pos1 = List.last syms |-> Position.symbol; + val pos2 = Position.symbol Symbol.space pos1; in [Token (Position.range (pos1, pos2), (Space, Symbol.space))] end; val backslash1 = $$$ "\\" @@@ many C_Symbol.is_ascii_blank_no_line @@@ C_Basic_Symbol_Pos.newline val backslash2 = Scan.one (not o Symbol_Pos.is_eof) val input0 = Source.of_list syms |> Source.source Symbol_Pos.stopper (Scan.bulk (backslash1 >> SOME || backslash2 >> K NONE)) |> Source.map_filter I |> Source.exhaust |> map (Symbol_Pos.range #> Position.range_position) val input1 = Source.of_list syms |> Source.source Symbol_Pos.stopper (Scan.bulk (backslash1 >> K NONE || backslash2 >> SOME)) |> Source.map_filter I |> Source.source' true Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!!! "bad input" scan)) (fn msg => Scan.lift (recover msg) >> single)) |> Source.source stopper scan_directive_cond |> Source.exhaust |> (fn input => input @ termination); val _ = app (fn pos => Output.information ("Backslash newline" ^ Position.here pos)) input0 val _ = Position.reports_text (map (fn pos => ((pos, Markup.intensify), "")) input0); in (input1, check input1) end; in fun op @@ ((input1, f_error_lines1), (input2, f_error_lines2)) = (input1 @ input2, f_error_lines1 #> f_error_lines2) val read_init = ([], I) fun read text = (reader scan_ml o Symbol_Pos.explode) (text, Position.none); fun read_source' {language, symbols} scan source = let val pos = Input.pos_of source; val _ = if Position.is_reported_range pos then Position.report pos (language (Input.is_delimited source)) else (); in Input.source_explode source |> not symbols ? maps (fn (s, p) => raw_explode s |> map (rpair p)) |> reader scan end; val read_source = read_source' { language = Markup.language' {name = "C", symbols = false, antiquotes = true}, symbols = true} scan_ml; end; end; \ end