diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Environment.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Environment.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Environment.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Environment.thy @@ -1,645 +1,645 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Parsing Environment\ theory C_Environment imports C_Lexer_Language C_Ast begin text \ The environment comes in two parts: a basic core structure, and a (thin) layer of utilities. \ ML\ signature C_ENV = sig val namespace_enum: string val namespace_tag: string val namespace_typedef: string type error_lines = string list datatype stream_lang_state = Stream_atomic | Stream_ident of Position.range * string | Stream_regular | Stream_string of (Position.range * string) list type ('a, 'b, 'c) stack_elem0 = 'a * ('b * 'c * 'c) type 'a stream = ('a, C_Lex.token) C_Scan.either list datatype 'a parse_status = Parsed of 'a | Previous_in_stack eqtype markup_global type markup_ident = {global : markup_global, params : C_Ast.CDerivedDeclr list, ret : C_Ast.CDeclSpec list parse_status} type 'a markup_store = Position.T list * serial * 'a type var_table = {idents : markup_ident markup_store Symtab.table, tyidents : markup_global markup_store Symtab.table Symtab.table} type env_directives = ( (string * Position.range -> Context.generic -> C_Lex.token list * Context.generic, C_Lex.token list) C_Scan.either * (string * Position.range -> Context.generic -> Context.generic)) markup_store Symtab.table type env_lang = {env_directives : env_directives, namesupply : int, scopes : (C_Ast.ident option * var_table) list, stream_ignored : C_Antiquote.antiq stream, var_table: var_table} type env_tree = {context : Context.generic, error_lines : error_lines, - reports_text : C_Position.reports_text} + reports_text : Position.report_text list} type env_propagation_reduce = int option type env_propagation_ctxt = env_propagation_reduce -> Context.generic -> Context.generic type env_propagation_directive = env_propagation_reduce -> env_directives -> env_lang * env_tree -> env_lang * env_tree datatype env_propagation_bottom_up = Exec_annotation of env_propagation_ctxt | Exec_directive of env_propagation_directive datatype env_propagation = Bottom_up of env_propagation_bottom_up | Top_down of env_propagation_ctxt type rule_static = (env_tree -> env_lang * env_tree) option type 'a rule_output0' = {output_env: rule_static, output_pos: 'a option, output_vacuous: bool} type ('a, 'b, 'c) stack0 = ('a, 'b, 'c) stack_elem0 list type rule_output = C_Ast.class_Pos rule_output0' type eval_node = Position.range * env_propagation * env_directives * bool type ('a, 'b, 'c) rule_reduce = int * ('a, 'b, 'c) stack0 * eval_node list list type ('a, 'b, 'c) rule_reduce0 = (('a, 'b, 'c) stack0 * env_lang * eval_node) list type ('a, 'b, 'c) rule_reduce' = int * bool * ('a, 'b, 'c) rule_reduce0 datatype ('a, 'b, 'c) rule_type = Reduce of rule_static * ('a, 'b, 'c) rule_reduce' | Shift | Void type ('a, 'b, 'c) rule_ml = {rule_pos: 'c * 'c, rule_type: ('a, 'b, 'c) rule_type} type ('a, 'b, 'c) rule_output0 = eval_node list list * ('a, 'b, 'c) rule_reduce0 * ('c * 'c) rule_output0' datatype 'a tree = Tree of 'a * 'a tree list type ('a, 'b, 'c) stack' = ('a, 'b, 'c) stack0 * eval_node list list * ('c * 'c) list * ('a, 'b, 'c) rule_ml tree list datatype comment_style = Comment_directive | Comment_language datatype eval_time = Never | Lexing of Position.range * (comment_style -> Context.generic -> Context.generic) | Parsing of (Symbol_Pos.T list * Symbol_Pos.T list) * eval_node datatype antiq_language = Antiq_none of C_Lex.token - | Antiq_stack of C_Position.reports_text * eval_time + | Antiq_stack of Position.report_text list * eval_time type 'a stream_hook = ('a list * Symbol_Pos.T list * eval_node) list list type 'a T = {env_lang : env_lang, env_tree : env_tree, rule_input : C_Ast.class_Pos list * int, rule_output : rule_output, stream_hook: Symbol_Pos.T stream_hook, stream_hook_excess : int stream_hook, stream_lang : stream_lang_state * 'a stream} val decode_positions: string -> Position.T list val empty_env_lang: env_lang val empty_env_tree: Context.generic -> env_tree val empty_rule_output: rule_output val encode_positions: Position.T list -> string val get_scopes: env_lang -> (C_Ast.ident option * var_table) list val make: env_lang -> 'a stream -> env_tree -> 'a T val map_context: (Context.generic -> Context.generic) -> {context: Context.generic, error_lines: 'c, reports_text: 'd} -> {context: Context.generic, error_lines: 'c, reports_text: 'd} (* why not just "env_tree" *) val map_context': (Context.generic -> 'b * Context.generic) -> {context: Context.generic, error_lines: 'd, reports_text: 'e} -> 'b * {context: Context.generic, error_lines: 'd, reports_text: 'e} (* why not just "env_tree" *) - val map_reports_text:(C_Position.reports_text -> C_Position.reports_text) -> env_tree -> env_tree + val map_reports_text: (Position.report_text list -> Position.report_text list) -> env_tree -> env_tree val map_error_lines: (error_lines -> error_lines) -> {context: 'c, error_lines: error_lines, reports_text: 'd} -> {context: 'c, error_lines: error_lines, reports_text: 'd} (* why not just : "env_tree" *) val map_namesupply: (int -> int) -> env_lang -> env_lang val map_env_directives: (env_directives -> env_directives) -> env_lang -> env_lang val map_scopes : ((C_Ast.ident option * var_table) list -> (C_Ast.ident option * var_table) list) -> env_lang -> env_lang val map_stream_ignored: (C_Antiquote.antiq stream->C_Antiquote.antiq stream) -> env_lang -> env_lang val map_var_table: (var_table -> var_table) -> env_lang -> env_lang val map_env_lang : (env_lang -> env_lang) -> 'a T -> 'a T val map_env_lang_tree : (env_lang -> env_tree -> env_lang * env_tree) -> 'a T -> 'a T val map_env_lang_tree': (env_lang -> env_tree -> 'c * (env_lang * env_tree)) -> 'a T -> 'c * 'a T val map_env_tree : (env_tree -> env_tree) -> 'a T -> 'a T val map_env_tree' : (env_tree -> 'b * env_tree) -> 'a T -> 'b * 'a T val map_rule_output: (rule_output -> rule_output) -> 'a T -> 'a T val map_stream_hook: (Symbol_Pos.T stream_hook -> Symbol_Pos.T stream_hook) -> 'a T -> 'a T val map_stream_hook_excess: (int stream_hook -> int stream_hook) -> 'a T -> 'a T val map_rule_input : (C_Ast.class_Pos list * int -> C_Ast.class_Pos list * int) -> 'a T -> 'a T val map_stream_lang: (stream_lang_state*'a stream -> stream_lang_state*'a stream)-> 'a T -> 'a T val map_output_env : (rule_static -> rule_static) -> 'a rule_output0' -> 'a rule_output0' val map_output_pos : ('a option -> 'a option) -> 'a rule_output0' -> 'a rule_output0' val map_output_vacuous : (bool -> bool) -> 'a rule_output0' -> 'a rule_output0' val map_idents: (markup_ident markup_store Symtab.table -> markup_ident markup_store Symtab.table) -> var_table -> var_table val map_tyidents: (markup_global markup_store Symtab.table Symtab.table -> markup_global markup_store Symtab.table Symtab.table ) -> var_table -> var_table val string_of: env_lang -> string end \ ML \ \\<^file>\~~/src/Pure/context.ML\\ \ structure C_Env : C_ENV = struct type 'a markup_store = Position.T list * serial * 'a type env_directives = ( ( string * Position.range -> Context.generic -> C_Lex.token list * Context.generic , C_Lex.token list) C_Scan.either * (string * Position.range -> Context.generic -> Context.generic)) markup_store Symtab.table (**) datatype 'a parse_status = Parsed of 'a | Previous_in_stack type markup_global = bool (*true: global*) type markup_ident = { global : markup_global , params : C_Ast.CDerivedDeclr list , ret : C_Ast.CDeclSpec list parse_status } type var_table = { tyidents : markup_global markup_store Symtab.table (*ident name*) Symtab.table (*internal namespace*) , idents : markup_ident markup_store Symtab.table (*ident name*) } type 'antiq_language_list stream = ('antiq_language_list, C_Lex.token) C_Scan.either list \ \Key entry point environment to the C language\ type env_lang = { var_table : var_table \ \current active table in the scope\ , scopes : (C_Ast.ident option * var_table) list \ \parent scope tables\ , namesupply : int , stream_ignored : C_Antiquote.antiq stream , env_directives : env_directives } (* NOTE: The distinction between type variable or identifier can not be solely made during the lexing process. Another pass on the parsed tree is required. *) type error_lines = string list type env_tree = { context : Context.generic - , reports_text : C_Position.reports_text + , reports_text : Position.report_text list , error_lines : error_lines } type rule_static = (env_tree -> env_lang * env_tree) option (**) datatype comment_style = Comment_directive | Comment_language type env_propagation_reduce = int (*reduce rule number*) option (* NONE: shift action *) type env_propagation_ctxt = env_propagation_reduce -> Context.generic -> Context.generic type env_propagation_directive = env_propagation_reduce -> env_directives -> env_lang * env_tree -> env_lang * env_tree datatype env_propagation_bottom_up = Exec_annotation of env_propagation_ctxt | Exec_directive of env_propagation_directive datatype env_propagation = Bottom_up (*during parsing*) of env_propagation_bottom_up | Top_down (*after parsing*) of env_propagation_ctxt type eval_node = Position.range * env_propagation * env_directives * bool (* true: skip vacuous reduce rules *) datatype eval_time = Lexing of Position.range * (comment_style -> Context.generic -> Context.generic) | Parsing of (Symbol_Pos.T list (* length = number of tokens to advance *) * Symbol_Pos.T list (* length = number of steps back in stack *)) * eval_node | Never (* to be manually treated by the semantic back-end, and analyzed there *) -datatype antiq_language = Antiq_stack of C_Position.reports_text * eval_time +datatype antiq_language = Antiq_stack of Position.report_text list * eval_time | Antiq_none of C_Lex.token \ \ One of the key element of the structure is \<^ML_text>\eval_time\, relevant for the generic annotation module. \ (**) type ('LrTable_state, 'a, 'Position_T) stack_elem0 = 'LrTable_state * ('a * 'Position_T * 'Position_T) type ('LrTable_state, 'a, 'Position_T) stack0 = ('LrTable_state, 'a, 'Position_T) stack_elem0 list type ('LrTable_state, 'svalue0, 'pos) rule_reduce0 = (('LrTable_state, 'svalue0, 'pos) stack0 * env_lang * eval_node) list type ('LrTable_state, 'svalue0, 'pos) rule_reduce = int * ('LrTable_state, 'svalue0, 'pos) stack0 * eval_node list list type ('LrTable_state, 'svalue0, 'pos) rule_reduce' = int * bool (*vacuous*) * ('LrTable_state, 'svalue0, 'pos) rule_reduce0 datatype ('LrTable_state, 'svalue0, 'pos) rule_type = Void | Shift | Reduce of rule_static * ('LrTable_state, 'svalue0, 'pos) rule_reduce' type ('LrTable_state, 'svalue0, 'pos) rule_ml = { rule_pos : 'pos * 'pos , rule_type : ('LrTable_state, 'svalue0, 'pos) rule_type } (**) type 'class_Pos rule_output0' = { output_pos : 'class_Pos option , output_vacuous : bool , output_env : rule_static } type ('LrTable_state, 'svalue0, 'pos) rule_output0 = eval_node list list (* delayed *) * ('LrTable_state, 'svalue0, 'pos) rule_reduce0 (* actual *) * ('pos * 'pos) rule_output0' type rule_output = C_Ast.class_Pos rule_output0' (**) datatype stream_lang_state = Stream_ident of Position.range * string | Stream_string of (Position.range * string) list | Stream_atomic | Stream_regular type 'a stream_hook = ('a list * Symbol_Pos.T list * eval_node) list list type 'a T = {env_lang: env_lang, env_tree: env_tree, rule_input: C_Ast.class_Pos list * int, rule_output: rule_output, stream_hook: Symbol_Pos.T stream_hook, stream_hook_excess : int stream_hook, stream_lang: stream_lang_state * 'a stream} type T' = (C_Antiquote.antiq * antiq_language list) T (**) datatype 'a tree = Tree of 'a * 'a tree list type ('LrTable_state, 'a, 'Position_T) stack' = ('LrTable_state, 'a, 'Position_T) stack0 * eval_node list list * ('Position_T * 'Position_T) list * ('LrTable_state, 'a, 'Position_T) rule_ml tree list (**) fun map_env_lang f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = f env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang} fun map_env_tree f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = env_lang, env_tree = f env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang} fun map_rule_output f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = env_lang, env_tree = env_tree, rule_output = f rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang} fun map_rule_input f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = f rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang} fun map_stream_hook f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = f stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang} fun map_stream_hook_excess f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = f stream_hook_excess, stream_lang = stream_lang} fun map_stream_lang f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = f stream_lang} (**) fun map_output_pos f {output_pos, output_vacuous, output_env} = {output_pos = f output_pos, output_vacuous = output_vacuous, output_env = output_env} fun map_output_vacuous f {output_pos, output_vacuous, output_env} = {output_pos = output_pos, output_vacuous = f output_vacuous, output_env = output_env} fun map_output_env f {output_pos, output_vacuous, output_env} = {output_pos = output_pos, output_vacuous = output_vacuous, output_env = f output_env} (**) fun map_tyidents f {tyidents, idents} = {tyidents = f tyidents, idents = idents} fun map_idents f {tyidents, idents} = {tyidents = tyidents, idents = f idents} (**) fun map_var_table f {var_table, scopes, namesupply, stream_ignored, env_directives} = {var_table = f var_table, scopes = scopes, namesupply = namesupply, stream_ignored = stream_ignored, env_directives = env_directives} fun map_scopes f {var_table, scopes, namesupply, stream_ignored, env_directives} = {var_table = var_table, scopes = f scopes, namesupply = namesupply, stream_ignored = stream_ignored, env_directives = env_directives} fun map_namesupply f {var_table, scopes, namesupply, stream_ignored, env_directives} = {var_table = var_table, scopes = scopes, namesupply = f namesupply, stream_ignored = stream_ignored, env_directives = env_directives} fun map_stream_ignored f {var_table, scopes, namesupply, stream_ignored, env_directives} = {var_table = var_table, scopes = scopes, namesupply = namesupply, stream_ignored = f stream_ignored, env_directives = env_directives} fun map_env_directives f {var_table, scopes, namesupply, stream_ignored, env_directives} = {var_table = var_table, scopes = scopes, namesupply = namesupply, stream_ignored = stream_ignored, env_directives = f env_directives} (**) fun map_context f {context, reports_text, error_lines} = {context = f context, reports_text = reports_text, error_lines = error_lines} fun map_context' f {context, reports_text, error_lines} = let val (res, context) = f context in (res, {context = context, reports_text = reports_text, error_lines = error_lines}) end fun map_reports_text f {context, reports_text, error_lines} = {context = context, reports_text = f reports_text, error_lines = error_lines} fun map_error_lines f {context, reports_text, error_lines} = {context = context, reports_text = reports_text, error_lines = f error_lines} (**) fun map_env_tree' f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = let val (res, env_tree) = f env_tree in (res, {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang}) end fun map_env_lang_tree f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = let val (env_lang, env_tree) = f env_lang env_tree in {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang} end fun map_env_lang_tree' f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = let val (res, (env_lang, env_tree)) = f env_lang env_tree in (res, {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang}) end (**) fun get_scopes (t : env_lang) = #scopes t (**) val empty_env_lang : env_lang = {var_table = {tyidents = Symtab.make [], idents = Symtab.make []}, scopes = [], namesupply = 0, stream_ignored = [], env_directives = Symtab.empty} fun empty_env_tree context = {context = context, reports_text = [], error_lines = []} val empty_rule_output : rule_output = {output_pos = NONE, output_vacuous = true, output_env = NONE} fun make env_lang stream_lang env_tree = { env_lang = env_lang , env_tree = env_tree , rule_output = empty_rule_output , rule_input = ([], 0) , stream_hook = [] , stream_hook_excess = [] , stream_lang = ( Stream_regular , map_filter (fn C_Scan.Right (C_Lex.Token (_, (C_Lex.Space _, _))) => NONE | C_Scan.Right (C_Lex.Token (_, (C_Lex.Comment _, _))) => NONE | C_Scan.Right tok => SOME (C_Scan.Right tok) | C_Scan.Left antiq => SOME (C_Scan.Left antiq)) stream_lang) } fun string_of (env_lang : env_lang) = let fun dest0 x f = x |> Symtab.dest |> map f fun dest {tyidents, idents} = (dest0 tyidents #1, dest0 idents (fn (i, (_,_,v)) => (i, if #global v then "global" else "local"))) in \<^make_string> ( ("var_table", dest (#var_table env_lang)) , ("scopes", map (fn (id, i) => ( Option.map (fn C_Ast.Ident0 (i, _, _) => C_Ast.meta_of_logic i) id , dest i)) (#scopes env_lang)) , ("namesupply", #namesupply env_lang) , ("stream_ignored", #stream_ignored env_lang)) end val namespace_typedef = "typedef" val namespace_tag = "tag" val namespace_enum = namespace_tag (**) val encode_positions = map (Position.dest #> (fn pos => ((#line pos, #offset pos, #end_offset pos), Properties.make_string Markup.fileN (#file (#props pos)) @ Properties.make_string Markup.idN (#id (#props pos))))) #> let open XML.Encode in list (pair (triple int int int) properties) end #> YXML.string_of_body val decode_positions = YXML.parse_body #> let open XML.Decode in list (pair (triple int int int) properties) end #> map ((fn ((line, offset, end_offset), props) => {line = line, offset = offset, end_offset = end_offset, props = props}) #> Position.of_props) end \ ML \ \\<^file>\~~/src/Pure/context.ML\\ \ structure C_Env_Ext = struct local fun map_tyidents' f = C_Env.map_var_table (C_Env.map_tyidents f) fun map_tyidents f = C_Env.map_env_lang (map_tyidents' f) in fun map_tyidents_typedef f = map_tyidents (Symtab.map_default (C_Env.namespace_typedef, Symtab.empty) f) fun map_tyidents_enum f = map_tyidents (Symtab.map_default (C_Env.namespace_enum, Symtab.empty) f) fun map_tyidents'_typedef f = map_tyidents' (Symtab.map_default (C_Env.namespace_typedef, Symtab.empty) f) fun map_tyidents'_enum f = map_tyidents' (Symtab.map_default (C_Env.namespace_enum, Symtab.empty) f) end fun map_idents' f = C_Env.map_var_table (C_Env.map_idents f) fun map_idents f = C_Env.map_env_lang (map_idents' f) (**) fun map_var_table f = C_Env.map_env_lang (C_Env.map_var_table f) fun map_scopes f = C_Env.map_env_lang (C_Env.map_scopes f) fun map_namesupply f = C_Env.map_env_lang (C_Env.map_namesupply f) fun map_stream_ignored f = C_Env.map_env_lang (C_Env.map_stream_ignored f) (**) local fun get_tyidents' namespace (env_lang : C_Env.env_lang) = case Symtab.lookup (env_lang |> #var_table |> #tyidents) namespace of NONE => Symtab.empty | SOME t => t fun get_tyidents namespace (t : 'a C_Env.T) = get_tyidents' namespace (#env_lang t) in fun get_tyidents_typedef env= get_tyidents C_Env.namespace_typedef env fun get_tyidents_enum env = get_tyidents C_Env.namespace_enum env fun get_tyidents'_typedef env = get_tyidents' C_Env.namespace_typedef env fun get_tyidents'_enum env = get_tyidents' C_Env.namespace_enum env end fun get_idents (t: 'a C_Env.T) = #env_lang t |> #var_table |> #idents fun get_idents' (env:C_Env.env_lang) = env |> #var_table |> #idents (**) fun get_var_table (t: 'a C_Env.T) = #env_lang t |> #var_table fun get_scopes (t:'a C_Env.T) = #env_lang t |> #scopes fun get_namesupply (t: 'a C_Env.T) = #env_lang t |> #namesupply (**) fun map_output_pos f = C_Env.map_rule_output (C_Env.map_output_pos f) fun map_output_vacuous f = C_Env.map_rule_output (C_Env.map_output_vacuous f) fun map_output_env f = C_Env.map_rule_output (C_Env.map_output_env f) (**) fun get_output_pos (t : 'a C_Env.T) = #rule_output t |> #output_pos (**) fun map_context f = C_Env.map_env_tree (C_Env.map_context f) fun map_reports_text f = C_Env.map_env_tree (C_Env.map_reports_text f) (**) fun get_context (t : 'a C_Env.T) = #env_tree t |> #context fun get_reports_text (t : 'a C_Env.T) = #env_tree t |> #reports_text (**) fun map_env_directives' f {var_table, scopes, namesupply, stream_ignored, env_directives} = let val (res, env_directives) = f env_directives in (res, {var_table = var_table, scopes = scopes, namesupply = namesupply, stream_ignored = stream_ignored, env_directives = env_directives}) end (**) fun map_stream_lang' f {env_lang, env_tree, rule_output, rule_input, stream_hook, stream_hook_excess, stream_lang} = let val (res, stream_lang) = f stream_lang in (res, {env_lang = env_lang, env_tree = env_tree, rule_output = rule_output, rule_input = rule_input, stream_hook = stream_hook, stream_hook_excess = stream_hook_excess, stream_lang = stream_lang}) end (**) fun context_map (f : C_Env.env_tree -> C_Env.env_tree) = C_Env.empty_env_tree #> f #> #context fun context_map' (f : C_Env.env_tree -> 'a * C_Env.env_tree) = C_Env.empty_env_tree #> f #> apsnd #context (**) fun list_lookup tab name = flat (map (fn (x, _, _) => x) (the_list (Symtab.lookup tab name))) end \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Eval.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Eval.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Eval.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Eval.thy @@ -1,808 +1,808 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Evaluation Scheduling\ theory C_Eval imports C_Parser_Language C_Parser_Annotation begin subsection \Evaluation Engine for the Core Language\ \ \\<^file>\~~/src/Pure/Thy/thy_info.ML\: \<^theory>\Isabelle_C.C_Parser_Language\\ ML \ \\<^theory>\Isabelle_C.C_Environment\\ \ structure C_Stack = struct type 'a stack_elem = (LALR_Table.state, 'a, Position.T) C_Env.stack_elem0 type stack_data = (LALR_Table.state, C_Grammar.Tokens.svalue0, Position.T) C_Env.stack0 type stack_data_elem = (LALR_Table.state, C_Grammar.Tokens.svalue0, Position.T) C_Env.stack_elem0 fun map_svalue0 f (st, (v, pos1, pos2)) = (st, (f v, pos1, pos2)) structure Data_Lang = struct val empty' = ([], C_Env.empty_env_lang) structure Data_Lang = Generic_Data ( type T = (stack_data * C_Env.env_lang) option val empty = NONE val merge = K empty ) open Data_Lang fun get' context = case get context of NONE => empty' | SOME data => data fun setmp data f context = put (get context) (f (put data context)) end structure Data_Tree_Args : GENERIC_DATA_ARGS = struct - type T = C_Position.reports_text * C_Env.error_lines + type T = Position.report_text list * C_Env.error_lines val empty = ([], []) fun merge ((l11, l12), (l21, l22)) = (l11 @ l21, l12 @ l22) end structure Data_Tree = Generic_Data (Data_Tree_Args) fun setmp_tree f context = let val x = Data_Tree.get context val context = f (Data_Tree.put Data_Tree_Args.empty context) in (Data_Tree.get context, Data_Tree.put x context) end fun stack_exec0 f {context, reports_text, error_lines} = let val ((reports_text', error_lines'), context) = setmp_tree f context in { context = context , reports_text = append reports_text' reports_text , error_lines = append error_lines' error_lines } end fun stack_exec env_dir data_put = stack_exec0 o Data_Lang.setmp (SOME (apsnd (C_Env.map_env_directives (K env_dir)) data_put)) end \ ML \ \\<^file>\~~/src/Pure/ML/ml_context.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/ML/ml_context.ML Author: Makarius ML context and antiquotations. *)*) \ structure C_Context0 = struct (* theory data *) type env_direct = bool (* internal result for conditional directives: branch skipping *) * (C_Env.env_directives * C_Env.env_tree) structure Directives = Generic_Data ( type T = (Position.T list * serial * ( (* evaluated during lexing phase *) (C_Lex.token_kind_directive -> env_direct -> C_Env.antiq_language list (* nested annotations from the input *) * env_direct (*NOTE: remove the possibility of returning a too modified env?*)) * (* evaluated during parsing phase *) (C_Lex.token_kind_directive -> C_Env.env_propagation_directive))) Symtab.table val empty = Symtab.empty val merge = Symtab.join (K #2) ); end \ ML \ \\<^theory>\Isabelle_C.C_Environment\\ \ structure C_Hook = struct fun add_stream0 (syms_shift, syms, ml_exec) stream_hook = case fold (fn _ => fn (eval1, eval2) => (case eval2 of e2 :: eval2 => (e2, eval2) | [] => ([], [])) |>> (fn e1 => e1 :: eval1)) syms_shift ([], stream_hook) of (eval1, eval2) => fold cons eval1 (case eval2 of e :: es => ((syms_shift, syms, ml_exec) :: e) :: es | [] => [[(syms_shift, syms, ml_exec)]]) fun advance00 stack ml_exec = case ml_exec of (_, C_Env.Bottom_up (C_Env.Exec_annotation exec), env_dir, _) => (fn arg => C_Env.map_env_tree (C_Stack.stack_exec env_dir (stack, #env_lang arg) (exec NONE)) arg) | (_, C_Env.Bottom_up (C_Env.Exec_directive exec), env_dir, _) => C_Env.map_env_lang_tree (curry (exec NONE env_dir)) | ((pos, _), C_Env.Top_down exec, env_dir, _) => tap (fn _ => warning ("Missing navigation, evaluating as bottom-up style instead of top-down" ^ Position.here pos)) #> (fn arg => C_Env.map_env_tree (C_Stack.stack_exec env_dir (stack, #env_lang arg) (exec NONE)) arg) fun advance0 stack (_, syms_reduce, ml_exec) = let val len = length syms_reduce in if len = 0 then I #>> advance00 stack ml_exec else let val len = len - 1 in fn (arg, stack_ml) => if length stack_ml - 2 <= len then ( C_Env.map_stream_hook_excess (add_stream0 (map_range I (len - length stack_ml + 2), syms_reduce, ml_exec)) arg , stack_ml) |> tap (fn _ => warning ("Navigation out of bounds, " ^ (if length stack_ml <= len then "maximum depth" else "internal value") ^ " reached (" ^ Int.toString (len - length stack_ml + 3) ^ " in excess)" ^ Position.here (Symbol_Pos.range syms_reduce |> Position.range_position))) else (arg, nth_map len (cons ml_exec) stack_ml) end end fun advance stack = (fn f => fn (arg, stack_ml) => f (#stream_hook arg) (arg, stack_ml)) (fn [] => I | l :: ls => fold_rev (advance0 stack) l #>> C_Env.map_stream_hook (K ls)) fun add_stream exec = C_Env.map_stream_hook (add_stream0 exec) end \ ML \ \\<^theory>\Isabelle_C.C_Lexer_Language\\ \ structure C_Grammar_Lexer : ARG_LEXER1 = struct structure LALR_Lex_Instance = struct type ('a,'b) token = ('a, 'b) C_Grammar.Tokens.token type pos = Position.T type arg = C_Grammar.Tokens.arg type svalue0 = C_Grammar.Tokens.svalue0 type svalue = arg -> svalue0 * arg type state = C_Grammar.ParserData.LALR_Table.state end type stack = (LALR_Lex_Instance.state, LALR_Lex_Instance.svalue0, LALR_Lex_Instance.pos) C_Env.stack' fun makeLexer ((stack, stack_ml, stack_pos, stack_tree), arg) = let val (token, arg) = C_Env_Ext.map_stream_lang' (fn (st, []) => (NONE, (st, [])) | (st, x :: xs) => (SOME x, (st, xs))) arg fun return0' f = (arg, stack_ml) |> C_Hook.advance stack |> f |> (fn (arg, stack_ml) => rpair ((stack, stack_ml, stack_pos, stack_tree), arg)) fun return0 x = \ \Warning: \C_Hook.advance\ must not be early evaluated here, as it might generate undesirable markup reporting (in annotation commands).\ \ \Todo: Arrange \C_Hook.advance\ as a pure function, so that the overall could be eta-simplified.\ return0' I x val encoding = fn C_Lex.Encoding_L => true | _ => false open C_Ast fun token_err pos1 pos2 src = C_Grammar_Tokens.token_of_string (C_Grammar.Tokens.error (pos1, pos2)) (ClangCVersion0 (From_string src)) (CChar (From_char_hd "0") false) (CFloat (From_string src)) (CInteger 0 DecRepr (Flags 0)) (CString0 (From_string src, false)) (Ident (From_string src, 0, OnlyPos NoPosition (NoPosition, 0))) src pos1 pos2 src open C_Scan in case token of NONE => return0 (C_Grammar.Tokens.x25_eof (Position.none, Position.none)) | SOME (Left (antiq_raw, l_antiq)) => makeLexer ( (stack, stack_ml, stack_pos, stack_tree) , (arg, false) |> fold (fn C_Env.Antiq_stack (_, C_Env.Parsing ((syms_shift, syms), ml_exec)) => I #>> C_Hook.add_stream (syms_shift, syms, ml_exec) | C_Env.Antiq_stack (_, C_Env.Never) => I ##> K true | _ => I) l_antiq |> (fn (arg, false) => arg | (arg, true) => C_Env_Ext.map_stream_ignored (cons (Left antiq_raw)) arg)) | SOME (Right (tok as C_Lex.Token (_, (C_Lex.Directive dir, _)))) => makeLexer ( (stack, stack_ml, stack_pos, stack_tree) , arg |> let val context = C_Env_Ext.get_context arg in fold (fn dir_tok => C_Hook.add_stream ( [] , [] , ( Position.no_range , C_Env.Bottom_up (C_Env.Exec_directive (dir |> (case Symtab.lookup (C_Context0.Directives.get context) (C_Lex.content_of dir_tok) of NONE => K (K (K I)) | SOME (_, _, (_, exec)) => exec))) , Symtab.empty , true))) (C_Lex.directive_cmds dir) end |> C_Env_Ext.map_stream_ignored (cons (Right tok))) | SOME (Right (C_Lex.Token ((pos1, pos2), (tok, src)))) => case tok of C_Lex.String (C_Lex.Encoding_file (SOME err), _) => return0' (apfst (C_Env.map_env_tree (C_Env.map_error_lines (cons (err ^ Position.here pos1))))) (token_err pos1 pos2 src) | _ => return0 (case tok of C_Lex.Char (b, [c]) => C_Grammar.Tokens.cchar (CChar (From_char_hd (case c of Left (c, _) => c | _ => chr 0)) (encoding b), pos1, pos2) | C_Lex.String (b, s) => C_Grammar.Tokens.cstr (CString0 ( From_string ( implode (map (fn Left (s, _) => s | Right _ => chr 0) s)) , encoding b) , pos1 , pos2) | C_Lex.Integer (i, repr, flag) => C_Grammar.Tokens.cint ( CInteger i (case repr of C_Lex.Repr_decimal => DecRepr0 | C_Lex.Repr_hexadecimal => HexRepr0 | C_Lex.Repr_octal => OctalRepr0) (C_Lex.read_bin (fold (fn flag => map (fn (bit, flag0) => ( if flag0 = (case flag of C_Lex.Flag_unsigned => FlagUnsigned0 | C_Lex.Flag_long => FlagLong0 | C_Lex.Flag_long_long => FlagLongLong0 | C_Lex.Flag_imag => FlagImag0) then "1" else bit , flag0))) flag ([FlagUnsigned, FlagLong, FlagLongLong, FlagImag] |> rev |> map (pair "0")) |> map #1) |> Flags) , pos1 , pos2) | C_Lex.Float s => C_Grammar.Tokens.cfloat (CFloat (From_string (implode (map #1 s))), pos1, pos2) | C_Lex.Ident _ => let val (name, arg) = C_Grammar_Rule_Lib.getNewName arg val ident0 = C_Grammar_Rule_Lib.mkIdent (C_Grammar_Rule_Lib.posOf' false (pos1, pos2)) src name in if C_Grammar_Rule_Lib.isTypeIdent src arg then C_Grammar.Tokens.tyident (ident0, pos1, pos2) else C_Grammar.Tokens.ident (ident0, pos1, pos2) end | _ => token_err pos1 pos2 src) end end \ text \ This is where the instancing of the parser functor (from \<^theory>\Isabelle_C.C_Parser_Language\) with the lexer (from \<^theory>\Isabelle_C.C_Lexer_Language\) actually happens ... \ ML \ \\<^theory>\Isabelle_C.C_Parser_Language\\ \ structure C_Grammar_Parser = LALR_Parser_Join (structure LrParser = LALR_Parser_Eval structure ParserData = C_Grammar.ParserData structure Lex = C_Grammar_Lexer) \ ML \ \\<^file>\~~/src/Pure/ML/ml_compiler.ML\\ \ structure C_Language = struct open C_Env fun exec_tree write msg (Tree ({rule_pos, rule_type}, l_tree)) = case rule_type of Void => write msg rule_pos "VOID" NONE | Shift => write msg rule_pos "SHIFT" NONE | Reduce (rule_static, (rule0, vacuous, rule_antiq)) => write msg rule_pos ("REDUCE " ^ Int.toString rule0 ^ " " ^ (if vacuous then "X" else "O")) (SOME (C_Grammar_Rule.string_reduce rule0 ^ " " ^ C_Grammar_Rule.type_reduce rule0)) #> (case rule_static of SOME rule_static => rule_static #>> SOME | NONE => pair NONE) #-> (fn env_lang => fold (fn (stack0, env_lang0, (_, C_Env.Top_down exec, env_dir, _)) => C_Stack.stack_exec env_dir (stack0, Option.getOpt (env_lang, env_lang0)) (exec (SOME rule0)) | _ => I) rule_antiq) #> fold (exec_tree write (msg ^ " ")) l_tree fun exec_tree' l env_tree = env_tree |> fold (exec_tree let val ctxt = Context.proof_of (#context env_tree) val write = if Config.get ctxt C_Options.parser_trace andalso Context_Position.is_visible ctxt then fn f => tap (tracing o f) else K I in fn msg => fn (p1, p2) => fn s1 => fn s2 => write (fn _ => msg ^ s1 ^ " " ^ Position.here p1 ^ " " ^ Position.here p2 ^ (case s2 of SOME s2 => " " ^ s2 | NONE => "")) end "") l fun uncurry_context f pos = uncurry (fn (stack, stack_ml, stack_pos, stack_tree) => (* executing stack_tree *) (fn arg => map_env_tree' (f pos stack stack_tree (#env_lang arg)) arg) #> apfst (pair (stack, stack_ml, stack_pos, stack_tree))) fun eval env_lang start err accept stream_lang = make env_lang stream_lang #> C_Grammar_Parser.makeLexer #> C_Grammar_Parser.parse ( 0 , uncurry_context (fn (next_pos1, next_pos2) => fn stack => fn stack_tree => fn env_lang => C_Env.map_reports_text (cons ( ( Position.range_position (case hd stack of (_, (_, pos1, pos2)) => (pos1, pos2)) , Markup.bad ()) , "") #> (case rev (tl stack) of _ :: _ :: stack => append (map_filter (fn (pos1, pos2) => if Position.offset_of pos1 = Position.offset_of pos2 then NONE else SOME ((Position.range_position (pos1, pos2), Markup.intensify), "")) ((next_pos1, next_pos2) :: map (fn (_, (_, pos1, pos2)) => (pos1, pos2)) stack)) | _ => I)) #> exec_tree' (rev stack_tree) #> err env_lang stack (Position.range_position (case hd stack_tree of Tree ({rule_pos = (rule_pos1, _), ...}, _) => (rule_pos1, next_pos2)))) , Position.none , start , uncurry_context (fn _ => fn stack => fn stack_tree => fn env_lang => exec_tree' stack_tree #> accept env_lang (stack |> hd |> C_Stack.map_svalue0 C_Grammar_Rule.reduce0)) , fn (stack, arg) => arg |> map_rule_input (K stack) |> map_rule_output (K empty_rule_output) , fn (rule0, stack0, pre_ml) => fn arg => let val rule_output = #rule_output arg val env_lang = #env_lang arg val (delayed, actual) = if #output_vacuous rule_output then let fun f (_, _, _, to_delay) = to_delay in (map (filter f) pre_ml, map (filter_out f) pre_ml) end else ([], pre_ml) val actual = flat (map rev actual) in ( (delayed, map (fn x => (stack0, env_lang, x)) actual, rule_output) , fold (fn (_, C_Env.Bottom_up (C_Env.Exec_annotation exec), env_dir, _) => C_Env.map_env_tree (C_Stack.stack_exec env_dir (stack0, env_lang) (exec (SOME rule0))) | (_, C_Env.Bottom_up (C_Env.Exec_directive exec), env_dir, _) => C_Env.map_env_lang_tree (curry (exec (SOME rule0) env_dir)) | _ => I) actual arg) end) #> (fn (stream, (((stack, stack_ml, stack_pos, stack_tree), user), arg)) => let fun shift_max acc stream = let val (tok, stream) = C_Grammar_Parser.Stream.get stream in if LALR_Parser_Eval.Token.sameToken (tok, C_Grammar.Tokens.x25_eof (Position.none, Position.none)) then (acc, stream) else shift_max (tok :: acc) stream end (* executing stack_ml *) val arg = fold (fold_rev (C_Hook.advance00 stack)) stack_ml arg (* executing stream_lang *) val (_, (_, ((stack, stack_ml, _, _), arg))) = shift_max [] (stream, ((stack, [[], []], stack_pos, stack_tree), arg)) in arg (* executing stream_hook *) |> (fn arg => fold (uncurry (fn pos => fold_rev (fn (syms_shift, syms_reduce, ml_exec) => tap (fn _ => warning ("Navigation out of bounds,\ \ maximum breadth reached (" ^ Int.toString (pos + 1) ^ " in excess)" ^ Position.here (Symbol_Pos.range syms_shift |> Position.range_position))) #> C_Hook.advance0 stack (syms_shift, syms_reduce, ml_exec)))) (map_index I (#stream_hook arg)) (arg, stack_ml) |> fst) (* executing stream_hook_excess *) |> (fn arg => fold (fold_rev (fn (_, _, ml_exec) => C_Hook.advance00 stack ml_exec)) (#stream_hook_excess arg) arg) (**) |> pair user o #env_tree end) end \ subsection \Full Evaluation Engine (Core Language with Annotations)\ \ \\<^file>\~~/src/Pure/Thy/thy_info.ML\: \<^theory>\Isabelle_C.C_Parser_Language\, \<^theory>\Isabelle_C.C_Parser_Annotation\\ ML \ \\<^file>\~~/src/Pure/ML/ml_context.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/ML/ml_context.ML Author: Makarius ML context and antiquotations. *)*) \ structure C_Context = struct fun fun_decl a v s ctxt = let val (b, ctxt') = ML_Context.variant a ctxt; val env = "fun " ^ b ^ " " ^ v ^ " = " ^ s ^ " " ^ v ^ ";\n"; val body = ML_Context.struct_name ctxt ^ "." ^ b; fun decl (_: Proof.context) = (env, body); in (decl, ctxt') end; (* parsing *) local fun scan_antiq context syms = let val keywords = C_Thy_Header.get_keywords' (Context.proof_of context) in ( C_Parse_Read.read_antiq' keywords (C_Parse.!!! (Scan.trace (C_Annotation.parse_command (Context.theory_of context)) >> (I #>> C_Env.Antiq_stack))) syms , C_Parse_Read.read_with_commands'0 keywords syms) end fun print0 s = maps (fn C_Lex.Token (_, (t as C_Lex.Directive d, _)) => (s ^ @{make_string} t) :: print0 (s ^ " ") (C_Lex.token_list_of d) | C_Lex.Token (_, t) => [case t of (C_Lex.Char _, _) => "Text Char" | (C_Lex.String _, _) => "Text String" | _ => let val t' = @{make_string} (#2 t) in if String.size t' <= 2 then @{make_string} (#1 t) else s ^ @{make_string} (#1 t) ^ " " ^ (String.substring (t', 1, String.size t' - 2) |> Markup.markup Markup.intensify) end]) val print = tracing o cat_lines o print0 "" open C_Scan fun markup_directive ty = C_Grammar_Rule_Lib.markup_make (K NONE) (K ()) (K ty) in fun markup_directive_command data = markup_directive "directive command" (fn cons' => fn def => fn C_Ast.Left _ => cons' (Markup.keyword_properties (if def then Markup.free else Markup.keyword1)) | C_Ast.Right (_, msg, f) => tap (fn _ => Output.information msg) #> f #> cons' (Markup.keyword_properties Markup.free)) data fun directive_update (name, pos) f tab = let val pos = [pos] val data = (pos, serial (), f) val _ = Position.reports_text (markup_directive_command (C_Ast.Left (data, C_Env_Ext.list_lookup tab name)) pos name []) in Symtab.update (name, data) tab end fun markup_directive_define in_direct = C_Env.map_reports_text ooo markup_directive "directive define" (fn cons' => fn def => fn err => (if def orelse in_direct then I else cons' Markup.language_antiquotation) #> (case err of C_Ast.Left _ => I | C_Ast.Right (_, msg, f) => tap (fn _ => Output.information msg) #> f) #> (if def then cons' Markup.free else if in_direct then I else cons' Markup.antiquote)) (* evaluation *) fun eval env start err accept (ants, ants_err) {context, reports_text, error_lines} = let val error_lines = ants_err error_lines fun scan_comment tag pos (antiq as {explicit, body, ...}) cts = let val (res, l_comm) = scan_antiq context body in Left ( tag , antiq , l_comm , if forall (fn Right _ => true | _ => false) res then let val (l_msg, res) = split_list (map_filter (fn Right (msg, l_report, l_tok) => SOME (msg, (l_report, l_tok)) | _ => NONE) res) val (l_report, l_tok) = split_list res in [( C_Env.Antiq_none (C_Lex.Token (pos, ( (C_Lex.Comment o C_Lex.Comment_suspicious o SOME) ( explicit , cat_lines l_msg , if explicit then flat l_report else []) , cts))) , l_tok)] end else map (fn Left x => x | Right (msg, l_report, tok) => (C_Env.Antiq_none (C_Lex.Token ( C_Token.range_of [tok] , ( (C_Lex.Comment o C_Lex.Comment_suspicious o SOME) (explicit, msg, l_report) , C_Token.content_of tok))) , [tok])) res) end val ants = map (fn C_Lex.Token (pos, (C_Lex.Comment (C_Lex.Comment_formal antiq), cts)) => scan_comment C_Env.Comment_language pos antiq cts | tok => Right tok) ants fun map_ants f1 f2 = maps (fn Left x => f1 x | Right tok => f2 tok) val ants_none = map_ants (fn (_, _, _, l) => maps (fn (C_Env.Antiq_none x, _) => [x] | _ => []) l) (K []) ants val _ = Position.reports (maps (fn Left (_, _, _, [(C_Env.Antiq_none _, _)]) => [] | Left (_, {start, stop, range = (pos, _), ...}, _, _) => (case stop of SOME stop => cons (stop, Markup.antiquote) | NONE => I) [(start, Markup.antiquote), (pos, Markup.language_antiquotation)] | _ => []) ants); val _ = Position.reports_text (maps C_Lex.token_report ants_none @ maps (fn Left (_, _, _, [(C_Env.Antiq_none _, _)]) => [] | Left (_, _, l, ls) => maps (fn (C_Env.Antiq_stack (pos, _), _) => pos | _ => []) ls @ maps (maps (C_Token.reports (C_Thy_Header.get_keywords (Context.theory_of context)))) (l :: map #2 ls) | _ => []) ants); val error_lines = C_Lex.check ants_none error_lines; val ((ants, {context, reports_text, error_lines}), env) = C_Env_Ext.map_env_directives' (fn env_dir => let val (ants, (env_dir, env_tree)) = fold_map let fun subst_directive tok (range1 as (pos1, _)) name (env_dir, env_tree) = case Symtab.lookup env_dir name of NONE => (Right (Left tok), (env_dir, env_tree)) | SOME (data as (_, _, (exec_toks, exec_antiq))) => env_tree |> markup_directive_define false (C_Ast.Right ([pos1], SOME data)) [pos1] name |> (case exec_toks of Left exec_toks => C_Env.map_context' (exec_toks (name, range1)) #> apfst (fn toks => (toks, Symtab.update (name, ( #1 data , #2 data , (Right toks, exec_antiq))) env_dir)) | Right toks => pair (toks, env_dir)) ||> C_Env.map_context (exec_antiq (name, range1)) |-> (fn (toks, env_dir) => pair (Right (Right (pos1, map (C_Lex.set_range range1) toks))) o pair env_dir) in fn Left (tag, antiq, toks, l_antiq) => fold_map (fn antiq as (C_Env.Antiq_stack (_, C_Env.Lexing (_, exec)), _) => apsnd (C_Stack.stack_exec0 (exec C_Env.Comment_language)) #> pair antiq | (C_Env.Antiq_stack (rep, C_Env.Parsing (syms, (range, exec, _, skip))), toks) => (fn env as (env_dir, _) => ( ( C_Env.Antiq_stack (rep, C_Env.Parsing (syms, (range, exec, env_dir, skip))) , toks) , env)) | antiq => pair antiq) l_antiq #> apfst (fn l_antiq => Left (tag, antiq, toks, l_antiq)) | Right tok => case tok of C_Lex.Token (_, (C_Lex.Directive dir, _)) => pair false #> fold (fn dir_tok => let val name = C_Lex.content_of dir_tok val pos1 = [C_Lex.pos_of dir_tok] in fn env_tree as (_, (_, {context = context, ...})) => let val data = Symtab.lookup (C_Context0.Directives.get context) name in env_tree |> apsnd (apsnd (C_Env.map_reports_text (markup_directive_command (C_Ast.Right (pos1, data)) pos1 name))) |> (case data of NONE => I | SOME (_, _, (exec, _)) => exec dir #> #2) end end) (C_Lex.directive_cmds dir) #> snd #> tap (fn _ => app (fn C_Lex.Token ( (pos, _) , (C_Lex.Comment (C_Lex.Comment_formal _), _)) => (Position.reports_text [((pos, Markup.ML_comment), "")]; (* not yet implemented *) warning ("Ignored annotation in directive" ^ Position.here pos)) | _ => ()) (C_Lex.token_list_of dir)) #> pair (Right (Left tok)) | C_Lex.Token (pos, (C_Lex.Keyword, cts)) => subst_directive tok pos cts | C_Lex.Token (pos, (C_Lex.Ident _, cts)) => subst_directive tok pos cts | _ => pair (Right (Left tok)) end ants ( env_dir , {context = context, reports_text = reports_text, error_lines = error_lines}) in ((ants, env_tree), env_dir) end) env val ants_stack = map_ants (single o Left o (fn (_, a, _, l) => (a, maps (single o #1) l))) (map Right o (fn Left tok => [tok] | Right (_, toks) => toks)) ants val _ = Position.reports_text (maps (fn Right (Left tok) => C_Lex.token_report tok | Right (Right (pos, [])) => [((pos, Markup.intensify), "")] | _ => []) ants); val ctxt = Context.proof_of context val () = if Config.get ctxt C_Options.lexer_trace andalso Context_Position.is_visible ctxt then print (map_filter (fn Right x => SOME x | _ => NONE) ants_stack) else () in C_Language.eval env start err accept ants_stack {context = context, reports_text = reports_text, error_lines = error_lines} end (* derived versions *) fun eval' env start err accept ants = Context.>>> (fn context => C_Env_Ext.context_map' (eval (env context) (start context) err accept ants #> apsnd (tap (Position.reports_text o #reports_text) #> tap (#error_lines #> (fn [] => () | l => error (cat_lines (rev l)))) #> (C_Env.empty_env_tree o #context))) context) end; fun eval_source env start err accept source = eval' env (start source) err accept (C_Lex.read_source source); fun eval_source' env start err accept source = eval env (start source) err accept (C_Lex.read_source source); fun eval_in o_context env start err accept toks = Context.setmp_generic_context o_context (fn () => eval' env start err accept toks) (); fun expression struct_open range name constraint body ants context = context |> ML_Context.exec let val verbose = Config.get (Context.proof_of context) C_Options.ML_verbose in fn () => ML_Context.eval (ML_Compiler.verbose verbose ML_Compiler.flags) (#1 range) (ML_Lex.read ("Context.put_generic_context (SOME (let open " ^ struct_open ^ " val ") @ ML_Lex.read_range range name @ ML_Lex.read (": " ^ constraint ^ " =") @ ants @ ML_Lex.read ("in " ^ body ^ " end (Context.the_generic_context ())));")) end; end \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Language.thy @@ -1,1546 +1,1539 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Core Language: Lexing Support, with Filtered Annotations (without Annotation Lexing)\ theory C_Lexer_Language imports Main begin text \ The part implementing the C lexer might resemble to the implementation of the ML one, but the C syntax is much complex: for example, the preprocessing of directives is implemented with several parsing layers. Also, we will see that the way antiquotations are handled in C is slightly different than in ML (especially in the execution part). Overall, the next ML structures presented here in this theory are all aligned with \<^file>\~~/src/Pure/ROOT.ML\, and are thus accordingly sorted in the same order (except for \<^file>\~~/src/Pure/ML/ml_options.ML\ which is early included in the boot process). This theory will stop at \<^file>\~~/src/Pure/ML/ml_lex.ML\. It is basically situated in the phase 1 of the bootstrap process (of \<^file>\~~/src/Pure/ROOT.ML\), i.e., the part dealing with how to get some C tokens from a raw string: \<^ML_text>\Position.T -> string -> token list\. \ ML \ \\<^file>\~~/src/Pure/General/scan.ML\\ \ structure C_Scan = struct datatype ('a, 'b) either = Left of 'a | Right of 'b fun opt x = Scan.optional x []; end \ ML \ \\<^file>\~~/src/Pure/General/symbol.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/General/symbol.ML Author: Makarius Generalized characters with infinitely many named symbols. *)*) \ structure C_Symbol = struct fun is_ascii_quasi "_" = true | is_ascii_quasi "$" = true | is_ascii_quasi _ = false; fun is_identletter s = Symbol.is_ascii_letter s orelse is_ascii_quasi s fun is_ascii_oct s = Symbol.is_char s andalso Char.ord #"0" <= ord s andalso ord s <= Char.ord #"7"; fun is_ascii_digit1 s = Symbol.is_char s andalso Char.ord #"1" <= ord s andalso ord s <= Char.ord #"9"; fun is_ascii_letdig s = Symbol.is_ascii_letter s orelse Symbol.is_ascii_digit s orelse is_ascii_quasi s; fun is_ascii_identifier s = size s > 0 andalso forall_string is_ascii_letdig s; val ascii_blank_no_line = [ ([" "], NONE) , (["\t", "\^K", "\f"], SOME "Space symbol") , (["\194\160"], SOME "Non-standard space symbol") ] fun is_ascii_blank_no_line s = exists (#1 #> exists (curry op = s)) ascii_blank_no_line end \ -ML \ \\<^file>\~~/src/Pure/General/position.ML\\ \ -structure C_Position = -struct -type reports_text = Position.report_text list -end -\ - ML \ \\<^file>\~~/src/Pure/General/symbol_pos.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/General/symbol_pos.ML Author: Makarius Symbols with explicit position information. *)*) \ structure C_Basic_Symbol_Pos = (*not open by default*) struct open Basic_Symbol_Pos; fun one f = Scan.one (f o Symbol_Pos.symbol) fun many f = Scan.many (f o Symbol_Pos.symbol) fun many1 f = Scan.many1 (f o Symbol_Pos.symbol) val one' = Scan.single o one fun scan_full !!! mem msg scan = scan --| (Scan.ahead (one' (not o mem)) || !!! msg Scan.fail) fun this_string s = (fold (fn s0 => uncurry (fn acc => one (fn s1 => s0 = s1) >> (fn x => x :: acc))) (Symbol.explode s) o pair []) >> rev val one_not_eof = Scan.one (Symbol.not_eof o #1) fun unless_eof scan = Scan.unless scan one_not_eof >> single val repeats_one_not_eof = Scan.repeats o unless_eof val newline = $$$ "\n" || $$$ "\^M" @@@ $$$ "\n" || $$$ "\^M" val repeats_until_nl = repeats_one_not_eof newline end structure C_Symbol_Pos = struct (* basic scanners *) val !!! = Symbol_Pos.!!! fun !!!! text scan = let fun get_pos [] = " (end-of-input)" | get_pos ((_, pos) :: _) = Position.here pos; fun err ((_, syms), msg) = fn () => text () ^ get_pos syms ^ Markup.markup Markup.no_report (" at " ^ Symbol.beginning 10 (map Symbol_Pos.symbol syms)) ^ (case msg of NONE => "" | SOME m => "\n" ^ m ()); in Scan.!! err scan end; val $$ = Symbol_Pos.$$ val $$$ = Symbol_Pos.$$$ val ~$$$ = Symbol_Pos.~$$$ (* scan string literals *) local val char_code = Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) -- Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) -- Scan.one (Symbol.is_ascii_digit o Symbol_Pos.symbol) :|-- (fn (((a, pos), (b, _)), (c, _)) => let val (n, _) = Library.read_int [a, b, c] in if n <= 255 then Scan.succeed [(chr n, pos)] else Scan.fail end); fun scan_str q err_prefix stop = $$$ "\\" |-- !!! (fn () => err_prefix ^ "bad escape character in string") ($$$ q || $$$ "\\" || char_code) || Scan.unless stop (Scan.one (fn (s, _) => s <> q andalso s <> "\\" andalso Symbol.not_eof s)) >> single; fun scan_strs q err_prefix err_suffix stop = Scan.ahead ($$ q) |-- !!! (fn () => err_prefix ^ "unclosed string literal within " ^ err_suffix) ((Symbol_Pos.scan_pos --| $$$ q) -- (Scan.repeats (scan_str q err_prefix stop) -- ($$$ q |-- Symbol_Pos.scan_pos))); in fun scan_string_qq_multi err_prefix stop = scan_strs "\"" err_prefix "the comment delimiter" stop; fun scan_string_bq_multi err_prefix stop = scan_strs "`" err_prefix "the comment delimiter" stop; fun scan_string_qq_inline err_prefix = scan_strs "\"" err_prefix "the same line" C_Basic_Symbol_Pos.newline; fun scan_string_bq_inline err_prefix = scan_strs "`" err_prefix "the same line" C_Basic_Symbol_Pos.newline; end; (* nested text cartouches *) fun scan_cartouche_depth stop = Scan.repeat1 (Scan.depend (fn (depth: int option) => (case depth of SOME d => $$ Symbol.open_ >> pair (SOME (d + 1)) || (if d > 0 then Scan.unless stop (Scan.one (fn (s, _) => s <> Symbol.close andalso Symbol.not_eof s)) >> pair depth || $$ Symbol.close >> pair (if d = 1 then NONE else SOME (d - 1)) else Scan.fail) | NONE => Scan.fail))); fun scan_cartouche err_prefix err_suffix stop = Scan.ahead ($$ Symbol.open_) |-- !!! (fn () => err_prefix ^ "unclosed text cartouche within " ^ err_suffix) (Scan.provide is_none (SOME 0) (scan_cartouche_depth stop)); fun scan_cartouche_multi err_prefix stop = scan_cartouche err_prefix "the comment delimiter" stop; fun scan_cartouche_inline err_prefix = scan_cartouche err_prefix "the same line" C_Basic_Symbol_Pos.newline; (* C-style comments *) local val par_l = "/" val par_r = "/" val scan_body1 = $$$ "*" --| Scan.ahead (~$$$ par_r) val scan_body2 = Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single val scan_cmt = Scan.depend (fn (d: int) => $$$ par_l @@@ $$$ "*" >> pair (d + 1)) || Scan.depend (fn 0 => Scan.fail | d => $$$ "*" @@@ $$$ par_r >> pair (d - 1)) || Scan.lift scan_body1 || Scan.lift scan_body2; val scan_cmts = Scan.pass 0 (Scan.repeats scan_cmt); in fun scan_comment err_prefix = Scan.ahead ($$ par_l -- $$ "*") |-- !!! (fn () => err_prefix ^ "unclosed comment") ($$$ par_l @@@ $$$ "*" @@@ scan_cmts @@@ $$$ "*" @@@ $$$ par_r) || $$$ "/" @@@ $$$ "/" @@@ C_Basic_Symbol_Pos.repeats_until_nl; fun scan_comment_no_nest err_prefix = Scan.ahead ($$ par_l -- $$ "*") |-- !!! (fn () => err_prefix ^ "unclosed comment") ($$$ par_l @@@ $$$ "*" @@@ Scan.repeats (scan_body1 || scan_body2) @@@ $$$ "*" @@@ $$$ par_r) || $$$ "/" @@@ $$$ "/" @@@ C_Basic_Symbol_Pos.repeats_until_nl; val recover_comment = $$$ par_l @@@ $$$ "*" @@@ Scan.repeats (scan_body1 || scan_body2); end end \ ML \ \\<^file>\~~/src/Pure/General/antiquote.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/General/antiquote.ML Author: Makarius Antiquotations within plain text. *)*) \ structure C_Antiquote = struct (* datatype antiquote *) type antiq = { explicit: bool , start: Position.T , stop: Position.T option , range: Position.range , body: Symbol_Pos.T list , body_begin: Symbol_Pos.T list , body_end: Symbol_Pos.T list } (* scan *) open C_Basic_Symbol_Pos; local val err_prefix = "Antiquotation lexical error: "; val par_l = "/" val par_r = "/" val scan_body1 = $$$ "*" --| Scan.ahead (~$$$ par_r) val scan_body1' = $$$ "*" @@@ $$$ par_r val scan_body2 = Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single val scan_antiq_body_multi = Scan.trace (C_Symbol_Pos.scan_string_qq_multi err_prefix scan_body1' || C_Symbol_Pos.scan_string_bq_multi err_prefix scan_body1') >> #2 || C_Symbol_Pos.scan_cartouche_multi err_prefix scan_body1' || scan_body1 || scan_body2; val scan_antiq_body_multi_recover = scan_body1 || scan_body2; val scan_antiq_body_inline = Scan.trace (C_Symbol_Pos.scan_string_qq_inline err_prefix || C_Symbol_Pos.scan_string_bq_inline err_prefix) >> #2 || C_Symbol_Pos.scan_cartouche_inline err_prefix || unless_eof newline; val scan_antiq_body_inline_recover = unless_eof newline; fun control_name sym = (case Symbol.decode sym of Symbol.Control name => name); fun scan_antiq_multi scan = Symbol_Pos.scan_pos -- (Scan.trace ($$ par_l |-- $$ "*" |-- scan) -- Symbol_Pos.scan_pos -- Symbol_Pos.!!! (fn () => err_prefix ^ "missing closing antiquotation") (Scan.repeats scan_antiq_body_multi -- Symbol_Pos.scan_pos -- ($$$ "*" @@@ $$$ par_r) -- Symbol_Pos.scan_pos)) fun scan_antiq_multi_recover scan = Symbol_Pos.scan_pos -- ($$ par_l |-- $$ "*" |-- scan -- Symbol_Pos.scan_pos -- (Scan.repeats scan_antiq_body_multi_recover -- Symbol_Pos.scan_pos -- ($$ "*" |-- $$ par_r |-- Symbol_Pos.scan_pos))) fun scan_antiq_inline scan = (Symbol_Pos.scan_pos -- Scan.trace ($$ "/" |-- $$ "/" |-- scan) -- Symbol_Pos.scan_pos -- Scan.repeats scan_antiq_body_inline -- Symbol_Pos.scan_pos) fun scan_antiq_inline_recover scan = (Symbol_Pos.scan_pos --| $$ "/" --| $$ "/" -- scan -- Symbol_Pos.scan_pos -- Scan.repeats scan_antiq_body_inline_recover -- Symbol_Pos.scan_pos) in val scan_control = Scan.option (Scan.one (Symbol.is_control o Symbol_Pos.symbol)) -- Symbol_Pos.scan_cartouche err_prefix >> (fn (opt_control, body) => let val (name, range) = (case opt_control of SOME (sym, pos) => ((control_name sym, pos), Symbol_Pos.range ((sym, pos) :: body)) | NONE => (("cartouche", #2 (hd body)), Symbol_Pos.range body)); in {name = name, range = range, body = body} end) || Scan.one (Symbol.is_control o Symbol_Pos.symbol) >> (fn (sym, pos) => {name = (control_name sym, pos), range = Symbol_Pos.range [(sym, pos)], body = []}); val scan_antiq = scan_antiq_multi ($$$ "@" >> K true || scan_body1 >> K false) >> (fn (pos1, (((explicit, body_begin), pos2), (((body, pos3), body_end), pos4))) => {explicit = explicit, start = Position.range_position (pos1, pos2), stop = SOME (Position.range_position (pos3, pos4)), range = Position.range (pos1, pos4), body = body, body_begin = body_begin, body_end = body_end}) || scan_antiq_inline ($$$ "@" >> K true || $$$ "*" >> K false) >> (fn ((((pos1, (explicit, body_begin)), pos2), body), pos3) => {explicit = explicit, start = Position.range_position (pos1, pos2), stop = NONE, range = Position.range (pos1, pos3), body = body, body_begin = body_begin, body_end = []}) val scan_antiq_recover = scan_antiq_multi_recover ($$$ "@" >> K true || scan_body1 >> K false) >> (fn (_, ((explicit, _), _)) => explicit) || scan_antiq_inline_recover ($$$ "@" >> K true || $$$ "*" >> K false) >> (fn ((((_, explicit), _), _), _) => explicit) end; end; \ ML \ \\<^file>\~~/src/Pure/ML/ml_options.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/ML/ml_options.ML Author: Makarius ML configuration options. *)*) \ structure C_Options = struct (* source trace *) val lexer_trace = Attrib.setup_config_bool @{binding C_lexer_trace} (K false); val parser_trace = Attrib.setup_config_bool @{binding C_parser_trace} (K false); val ML_verbose = Attrib.setup_config_bool @{binding C_ML_verbose} (K true); val starting_env = Attrib.setup_config_string @{binding C\<^sub>e\<^sub>n\<^sub>v\<^sub>0} (K "empty"); val starting_rule = Attrib.setup_config_string @{binding C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0} (K "translation_unit"); end \ ML \ \\<^file>\~~/src/Pure/ML/ml_lex.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/ML/ml_lex.ML Author: Makarius Lexical syntax for Isabelle/ML and Standard ML. *)*) \ structure C_Lex = struct open C_Scan; open C_Basic_Symbol_Pos; (** keywords **) val keywords = ["(", ")", "[", "]", "->", ".", "!", "~", "++", "--", "+", "-", "*", "/", "%", "&", "<<", ">>", "<", "<=", ">", ">=", "==", "!=", "^", "|", "&&", "||", "?", ":", "=", "+=", "-=", "*=", "/=", "%=", "&=", "^=", "|=", "<<=", ">>=", ",", ";", "{", "}", "...", (**) "_Alignas", "_Alignof", "__alignof", "alignof", "__alignof__", "__asm", "asm", "__asm__", "_Atomic", "__attribute", "__attribute__", "auto", "_Bool", "break", "__builtin_offsetof", "__builtin_types_compatible_p", "__builtin_va_arg", "case", "char", "_Complex", "__complex__", "__const", "const", "__const__", "continue", "default", "do", "double", "else", "enum", "__extension__", "extern", "float", "for", "_Generic", "goto", "if", "__imag", "__imag__", "__inline", "inline", "__inline__", "int", "__int128", "__label__", "long", "_Nonnull", "__nonnull", "_Noreturn", "_Nullable", "__nullable", "__real", "__real__", "register", "__restrict", "restrict", "__restrict__", "return", "short", "__signed", "signed", "__signed__", "sizeof", "static", "_Static_assert", "struct", "switch", "__thread", "_Thread_local", "typedef", "__typeof", "typeof", "__typeof__", "union", "unsigned", "void", "__volatile", "volatile", "__volatile__", "while"]; val keywords2 = ["__asm", "asm", "__asm__", "extern"]; val keywords3 = ["_Bool", "char", "double", "float", "int", "__int128", "long", "short", "__signed", "signed", "__signed__", "unsigned", "void"]; val lexicon = Scan.make_lexicon (map raw_explode keywords); (** tokens **) (* datatype token *) datatype token_kind_comment = Comment_formal of C_Antiquote.antiq | Comment_suspicious of (bool * string * ((Position.T * Markup.T) * string) list) option datatype token_kind_encoding = Encoding_L | Encoding_default | Encoding_file of string (* error message *) option type token_kind_string = token_kind_encoding * (Symbol_Pos.T, Position.range * int \ \exceeding \<^ML>\Char.maxOrd\\) either list datatype token_kind_int_repr = Repr_decimal | Repr_hexadecimal | Repr_octal datatype token_kind_int_flag = Flag_unsigned | Flag_long | Flag_long_long | Flag_imag datatype token_kind = Keyword | Ident of (Symbol_Pos.T list, Symbol_Pos.T) either list | Type_ident | GnuC | ClangC | (**) Char of token_kind_string | Integer of int * token_kind_int_repr * token_kind_int_flag list | Float of Symbol_Pos.T list | String of token_kind_string | File of token_kind_string | (**) Space of (string * Symbol_Pos.T) option list | Comment of token_kind_comment | Sharp of int | (**) Unknown | Error of string * token_group | Directive of token_kind_directive | EOF and token_kind_directive = Inline of token_group (* a not yet analyzed directive *) | Include of token_group | Define of token_group (* define *) * token_group (* name *) * token_group option (* functional arguments *) * token_group (* rewrite body *) | Undef of token_group (* name *) | Cpp of token_group | Conditional of token_group (* if *) * token_group list (* elif *) * token_group option (* else *) * token_group (* endif *) and token_group = Group1 of token list (* spaces and comments filtered from the directive *) * token list (* directive: raw data *) | Group2 of token list (* spaces and comments filtered from the directive *) * token list (* directive: function *) * token list (* directive: arguments (same line) *) | Group3 of ( Position.range (* full directive (with blanks) *) * token list (* spaces and comments filtered from the directive *) * token list (* directive: function *) * token list (* directive: arguments (same line) *)) * (Position.range * token list) (* C code or directive: arguments (following lines) *) and token = Token of Position.range * (token_kind * string); (* position *) fun set_range range (Token (_, x)) = Token (range, x); fun range_of (Token (range, _)) = range; val pos_of = #1 o range_of; val end_pos_of = #2 o range_of; (* stopper *) fun mk_eof pos = Token ((pos, Position.none), (EOF, "")); val eof = mk_eof Position.none; fun is_eof (Token (_, (EOF, _))) = true | is_eof _ = false; val stopper = Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; val one_not_eof = Scan.one (not o is_eof) (* token content *) fun kind_of (Token (_, (k, _))) = k; val group_list_of = fn Inline g => [g] | Include g => [g] | Define (g1, g2, o_g3, g4) => flat [[g1], [g2], the_list o_g3, [g4]] | Undef g => [g] | Cpp g => [g] | Conditional (g1, gs2, o_g3, g4) => flat [[g1], gs2, the_list o_g3, [g4]] fun content_of (Token (_, (_, x))) = x; fun token_leq (tok, tok') = content_of tok <= content_of tok'; val directive_cmds = fn Inline (Group1 (_, _ :: tok2 :: _)) => [tok2] | Include (Group2 (_, [_, tok2], _)) => [tok2] | Define (Group1 (_, [_, tok2]), _, _, _) => [tok2] | Undef (Group2 (_, [_, tok2], _)) => [tok2] | Conditional (c1, cs2, c3, c4) => maps (fn Group3 ((_, _, [_, tok2], _), _) => [tok2] | _ => error "Only expecting Group3") (flat [[c1], cs2, the_list c3, [c4]]) | _ => [] fun is_keyword (Token (_, (Keyword, _))) = true | is_keyword _ = false; fun is_ident (Token (_, (Ident _, _))) = true | is_ident _ = false; fun is_integer (Token (_, (Integer _, _))) = true | is_integer _ = false; fun is_delimiter (Token (_, (Keyword, x))) = not (C_Symbol.is_ascii_identifier x) | is_delimiter _ = false; (* range *) val range_list_of0 = fn [] => Position.no_range | toks as tok1 :: _ => Position.range (pos_of tok1, end_pos_of (List.last toks)) (* WARNING the use of: \\\<^ML>\fn content_of => fn pos_of => fn tok2 => List.last (Symbol_Pos.explode (content_of tok2, pos_of tok2)) |-> Position.symbol\\ would not return an accurate position if for example several "backslash newlines" are present in the symbol *) fun range_list_of toks = (range_list_of0 toks, toks) fun range_list_of' toks1 toks2 = (range_list_of0 toks1, toks2) local fun cmp_pos x2 x1 = case Position.distance_of (pos_of x2, pos_of x1) of SOME dist => dist < 0 | NONE => error "cmp_pos" fun merge_pos xs = case xs of (xs1, []) => xs1 | ([], xs2) => xs2 | (x1 :: xs1, x2 :: xs2) => let val (x, xs) = if cmp_pos x2 x1 then (x1, (xs1, x2 :: xs2)) else (x2, (x1 :: xs1, xs2)) in x :: merge_pos xs end in fun merge_blank toks_bl xs1 xs2 = let val cmp_tok2 = cmp_pos (List.last xs1) in ( range_list_of (merge_pos (xs1, filter cmp_tok2 toks_bl)) , range_list_of (merge_pos (xs2, filter_out cmp_tok2 toks_bl))) end end val token_list_of = let fun merge_blank' toks_bl xs1 xs2 = let val ((_, l1), (_, l2)) = merge_blank toks_bl xs1 xs2 in [l1, l2] end in group_list_of #> maps (fn Group1 (toks_bl, []) => [toks_bl] | Group1 (toks_bl, xs1) => merge_blank' toks_bl xs1 [] | Group2 (toks_bl, xs1, xs2) => merge_blank' toks_bl xs1 xs2 | Group3 ((_, toks_bl, xs1, xs2), (_, xs3)) => flat [merge_blank' toks_bl xs1 xs2, [xs3]]) #> flat end local fun warn_utf8 s pos = Output.information ("UTF-8 " ^ @{make_string} s ^ Position.here pos) val warn_ident = app (fn Right (s, pos) => warn_utf8 s pos | _ => ()) val warn_string = app (fn Left (s, pos) => if Symbol.is_utf8 s then warn_utf8 s pos else if Symbol.is_printable s then () else let val ord_s = ord s in Output.information ("Not printable symbol " ^ (if chr ord_s = s then @{make_string} (ord_s, s) else @{make_string} s) ^ Position.here pos) end | Right ((pos1, _), n) => Output.information ("Out of the supported range (character number " ^ Int.toString n ^ ")" ^ Position.here pos1)) val warn_space = app (fn SOME (msg, (s, pos)) => Output.information (msg ^ " " ^ @{make_string} s ^ Position.here pos) | _ => ()) fun warn_unknown pos = Output.information ("Unknown symbol" ^ Position.here pos) val warn_directive = app (fn Token (_, (Error (msg, _), _)) => warning msg | Token ((pos, _), (Unknown, _)) => warn_unknown pos | _ => ()) in val warn = fn Token (_, (Ident l, _)) => warn_ident l | Token (_, (Char (_, l), _)) => warn_string l | Token (_, (String (_, l), _)) => warn_string l | Token (_, (File (_, l), _)) => warn_string l | Token (_, (Space l, _)) => warn_space l | Token ((pos, _), (Unknown, _)) => warn_unknown pos | Token (_, (Comment (Comment_suspicious (SOME (explicit, msg, _))), _)) => (if explicit then warning else tracing) msg | Token (_, (Directive (kind as Conditional _), _)) => warn_directive (token_list_of kind) | Token (_, (Directive (Define (_, _, _, Group1 (_, toks4))), _)) => warn_directive toks4 | Token (_, (Directive (Include (Group2 (_, _, toks))), _)) => (case toks of [Token (_, (String _, _))] => () | [Token (_, (File _, _))] => () | _ => Output.information ("Expecting at least and at most one file" ^ Position.here (Position.range_position (pos_of (hd toks), end_pos_of (List.last toks))))) | _ => (); end fun check_error tok = case kind_of tok of Error (msg, _) => [msg] | _ => []; (* markup *) local val token_kind_markup0 = fn Char _ => (Markup.ML_char, "") | Integer _ => (Markup.ML_numeral, "") | Float _ => (Markup.ML_numeral, "") | ClangC => (Markup.ML_numeral, "") | String _ => (Markup.ML_string, "") | File _ => (Markup.ML_string, "") | Sharp _ => (Markup.antiquote, "") | Unknown => (Markup.intensify, "") | Error (msg, _) => (Markup.bad (), msg) | _ => (Markup.empty, ""); fun token_report' escape_directive (tok as Token ((pos, _), (kind, x))) = if escape_directive andalso (is_keyword tok orelse is_ident tok) then [((pos, Markup.antiquote), "")] else if is_keyword tok then let val (markup, txt) = if is_delimiter tok then (Markup.ML_delimiter, "") else if member (op =) keywords2 x then (Markup.ML_keyword2 |> Markup.keyword_properties, "") else if member (op =) keywords3 x then (Markup.ML_keyword3 |> Markup.keyword_properties, "") else (Markup.ML_keyword1 |> Markup.keyword_properties, ""); in [((pos, markup), txt)] end else case kind of Directive (tokens as Inline _) => ((pos, Markup.antiquoted), "") :: maps token_report0 (token_list_of tokens) | Directive (Include (Group2 (toks_bl, tok1 :: _, toks2))) => ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks2 , maps token_report0 toks_bl ] | Directive (Define (Group1 (toks_bl1, tok1 :: _), Group1 (toks_bl2, _), toks3, Group1 (toks_bl4, toks4))) => let val (toks_bl3, toks3) = case toks3 of SOME (Group1 x) => x | _ => ([], []) in ((pos, Markup.antiquoted), "") :: ((range_list_of0 toks4 |> #1, Markup.intensify), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks3 , maps token_report0 toks4 , maps token_report0 toks_bl1 , maps token_report0 toks_bl2 , map (fn tok => ((pos_of tok, Markup.antiquote), "")) toks_bl3 , maps token_report0 toks_bl4 ] end | Directive (Undef (Group2 (toks_bl, tok1 :: _, _))) => ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks_bl ] | Directive (Cpp (Group2 (toks_bl, toks1, toks2))) => ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 toks1 , maps token_report0 toks2 , maps token_report0 toks_bl ] | Directive (Conditional (c1, cs2, c3, c4)) => maps (fn Group3 (((pos, _), toks_bl, tok1 :: _, toks2), ((pos3, _), toks3)) => ((pos, Markup.antiquoted), "") :: ((pos3, Markup.intensify), "") :: flat [ maps token_report1 [tok1] , maps token_report0 toks2 , maps token_report0 toks3 , maps token_report0 toks_bl ] | _ => error "Only expecting Group3") (flat [[c1], cs2, the_list c3, [c4]]) | Error (msg, Group2 (toks_bl, toks1, toks2)) => ((range_list_of0 toks1 |> #1, Markup.bad ()), msg) :: ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 toks1 , maps token_report0 toks2 , maps token_report0 toks_bl ] | Error (msg, Group3 ((_, toks_bl, toks1, toks2), _)) => ((range_list_of0 toks1 |> #1, Markup.bad ()), msg) :: ((pos, Markup.antiquoted), "") :: flat [ maps token_report1 toks1 , maps token_report0 toks2 , maps token_report0 toks_bl ] | Comment (Comment_suspicious c) => ((pos, Markup.ML_comment), "") :: (case c of NONE => [] | SOME (_, _, l) => l) | x => [let val (markup, txt) = token_kind_markup0 x in ((pos, markup), txt) end] and token_report0 tok = token_report' false tok and token_report1 tok = token_report' true tok in val token_report = token_report0 end; (** scanners **) val err_prefix = "C lexical error: "; fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); fun !!!! msg = C_Symbol_Pos.!!!! (fn () => err_prefix ^ msg); val many1_blanks_no_line = Scan.repeat1 (one C_Symbol.is_ascii_blank_no_line >> (fn (s, pos) => List.find (#1 #> exists (curry op = s)) C_Symbol.ascii_blank_no_line |> the |> #2 |> Option.map (rpair (s, pos)))) (* identifiers *) local fun left x = [Left x] fun right x = [Right x] in val scan_ident_sym = let val hex = one' Symbol.is_ascii_hex in one' C_Symbol.is_identletter >> left || $$$ "\\" @@@ $$$ "u" @@@ hex @@@ hex @@@ hex @@@ hex >> left || $$$ "\\" @@@ $$$ "U" @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex @@@ hex >> left || one' Symbol.is_symbolic >> left || one' Symbol.is_control >> left || one Symbol.is_utf8 >> right end val scan_ident = scan_ident_sym @@@ Scan.repeats (scan_ident_sym || one' Symbol.is_ascii_digit >> left); val scan_ident' = scan_ident >> maps (fn Left s => s | Right c => [c]); end val keywords_ident = map_filter (fn s => Source.of_list (Symbol_Pos.explode (s, Position.none)) |> Source.source Symbol_Pos.stopper (Scan.bulk (scan_ident >> SOME || Scan.one (not o Symbol_Pos.is_eof) >> K NONE)) |> Source.exhaust |> (fn [SOME _] => SOME s | _ => NONE)) keywords (* numerals *) fun read_bin s = #1 (read_radix_int 2 s) fun read_oct s = #1 (read_radix_int 8 s) fun read_dec s = #1 (read_int s) val read_hex = let fun conv_ascii c1 c0 = String.str (Char.chr (Char.ord #"9" + Char.ord c1 - Char.ord c0 + 1)) in map (fn s => let val c = String.sub (s, 0) in if c >= #"A" andalso c <= #"F" then conv_ascii c #"A" else if c >= #"a" andalso c <= #"f" then conv_ascii c #"a" else s end) #> read_radix_int 16 #> #1 end local val many_digit = many Symbol.is_ascii_digit val many1_digit = many1 Symbol.is_ascii_digit val many_hex = many Symbol.is_ascii_hex val many1_hex = many1 Symbol.is_ascii_hex val scan_suffix_ll = ($$$ "l" @@@ $$$ "l" || $$$ "L" @@@ $$$ "L") >> K [Flag_long_long] fun scan_suffix_gnu flag = ($$$ "i" || $$$ "j") >> K [flag] val scan_suffix_int = let val u = ($$$ "u" || $$$ "U") >> K [Flag_unsigned] val l = ($$$ "l" || $$$ "L") >> K [Flag_long] in u @@@ scan_suffix_ll || scan_suffix_ll @@@ opt u || u @@@ opt l || l @@@ opt u end val scan_suffix_gnu_int0 = scan_suffix_gnu Flag_imag val scan_suffix_gnu_int = scan_full !!! (member (op =) (raw_explode "uUlLij")) "Invalid integer constant suffix" ( scan_suffix_int @@@ opt scan_suffix_gnu_int0 || scan_suffix_gnu_int0 @@@ opt scan_suffix_int) fun scan_intgnu x = x -- opt scan_suffix_gnu_int >> (fn ((s1', read, repr), l) => (read (map (Symbol_Pos.content o single) s1'), repr, l)) val scan_intoct = scan_intgnu ($$ "0" |-- scan_full !!! Symbol.is_ascii_digit "Invalid digit in octal constant" (Scan.max (fn ((xs2, _, _), (xs1, _, _)) => length xs2 < length xs1) (many C_Symbol.is_ascii_oct >> (fn xs => (xs, read_oct, Repr_octal))) (many (fn x => x = "0") >> (fn xs => (xs, read_dec, Repr_decimal))))) val scan_intdec = scan_intgnu (one C_Symbol.is_ascii_digit1 -- many Symbol.is_ascii_digit >> (fn (x, xs) => (x :: xs, read_dec, Repr_decimal))) val scan_inthex = scan_intgnu (($$ "0" -- ($$ "x" || $$ "X")) |-- many1_hex >> (fn xs2 => (xs2, read_hex, Repr_hexadecimal))) (**) fun scan_signpart a A = ($$$ a || $$$ A) @@@ opt ($$$ "+" || $$$ "-") @@@ many1_digit val scan_exppart = scan_signpart "e" "E" val scan_suffix_float = $$$ "f" || $$$ "F" || $$$ "l" || $$$ "L" val scan_suffix_gnu_float0 = Scan.trace (scan_suffix_gnu ()) >> #2 val scan_suffix_gnu_float = scan_full !!! (member (op =) (raw_explode "fFlLij")) "Invalid float constant suffix" ( scan_suffix_float @@@ opt scan_suffix_gnu_float0 || scan_suffix_gnu_float0 @@@ opt scan_suffix_float) val scan_hex_pref = $$$ "0" @@@ $$$ "x" val scan_hexmant = many_hex @@@ $$$ "." @@@ many1_hex || many1_hex @@@ $$$ "." val scan_floatdec = ( ( many_digit @@@ $$$ "." @@@ many1_digit || many1_digit @@@ $$$ ".") @@@ opt scan_exppart || many1_digit @@@ scan_exppart) @@@ opt scan_suffix_gnu_float val scan_floathex = scan_hex_pref @@@ (scan_hexmant || many1_hex) @@@ scan_signpart "p" "P" @@@ opt scan_suffix_gnu_float val scan_floatfail = scan_hex_pref @@@ scan_hexmant in val scan_int = scan_inthex || scan_intoct || scan_intdec val recover_int = many1 (fn s => Symbol.is_ascii_hex s orelse member (op =) (raw_explode "xXuUlLij") s) val scan_float = scan_floatdec || scan_floathex || scan_floatfail @@@ !!! "Hexadecimal floating constant requires an exponent" Scan.fail val scan_clangversion = many1_digit @@@ $$$ "." @@@ many1_digit @@@ $$$ "." @@@ many1_digit end; (* chars and strings *) val scan_blanks1 = many1 Symbol.is_ascii_blank local val escape_char = [ ("n", #"\n") , ("t", #"\t") , ("v", #"\v") , ("b", #"\b") , ("r", #"\r") , ("f", #"\f") , ("a", #"\a") , ("e", #"\^[") , ("E", #"\^[") , ("\\", #"\\") , ("?", #"?") , ("'", #"'") , ("\"", #"\"") ] val _ = \ \printing a ML function translating code point from \<^ML_type>\int -> string\\ fn _ => app (fn (x0, x) => writeln (" | " ^ string_of_int (Char.ord x) ^ " => \"\\\\" ^ (if exists (fn x1 => x0 = x1) ["\"", "\\"] then "\\" ^ x0 else x0) ^ "\"")) escape_char fun scan_escape s0 = let val oct = one' C_Symbol.is_ascii_oct val hex = one' Symbol.is_ascii_hex val sym_pos = Symbol_Pos.range #> Position.range_position fun chr' f l = let val x = f (map Symbol_Pos.content l) val l = flat l in [if x <= Char.maxOrd then Left (chr x, sym_pos l) else Right (Symbol_Pos.range l, x)] end val read_oct' = chr' read_oct val read_hex' = chr' read_hex in one' (member (op =) (raw_explode (s0 ^ String.concat (map #1 escape_char)))) >> (fn i => [Left (( case AList.lookup (op =) escape_char (Symbol_Pos.content i) of NONE => s0 | SOME c => String.str c) , sym_pos i)]) || oct -- oct -- oct >> (fn ((o1, o2), o3) => read_oct' [o1, o2, o3]) || oct -- oct >> (fn (o1, o2) => read_oct' [o1, o2]) || oct >> (read_oct' o single) || $$ "x" |-- many1 Symbol.is_ascii_hex >> (read_hex' o map single) || $$ "u" |-- hex -- hex -- hex -- hex >> (fn (((x1, x2), x3), x4) => read_hex' [x1, x2, x3, x4]) || $$ "U" |-- hex -- hex -- hex -- hex -- hex -- hex -- hex -- hex >> (fn (((((((x1, x2), x3), x4), x5), x6), x7), x8) => read_hex' [x1, x2, x3, x4, x5, x6, x7, x8]) end fun scan_str s0 = Scan.unless newline (Scan.one (fn (s, _) => Symbol.not_eof s andalso s <> s0 andalso s <> "\\")) >> (single o Left) || Scan.ahead newline |-- !!! "bad newline" Scan.fail || $$ "\\" |-- !!! "bad escape character" (scan_escape s0); fun scan_string0 s0 msg repeats = Scan.optional ($$ "L" >> K Encoding_L) Encoding_default -- (Scan.ahead ($$ s0) |-- !!! ("unclosed " ^ msg ^ " literal") ($$ s0 |-- repeats (scan_str s0) --| $$ s0)) fun recover_string0 s0 repeats = opt ($$$ "L") @@@ $$$ s0 @@@ repeats (Scan.permissive (Scan.trace (scan_str s0) >> #2)); in val scan_char = scan_string0 "'" "char" Scan.repeats1 val scan_string = scan_string0 "\"" "string" Scan.repeats fun scan_string' src = case Source.source Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!! "bad input" scan_string >> K NONE)) (fn msg => C_Basic_Symbol_Pos.one_not_eof >> K [SOME msg])) (Source.of_list src) |> Source.exhaust of [NONE] => NONE | [] => SOME "Empty input" | l => case map_filter I l of msg :: _ => SOME msg | _ => SOME "More than one string" val scan_file = let fun scan !!! s_l s_r = Scan.ahead ($$ s_l) |-- !!! ($$ s_l |-- Scan.repeats (Scan.unless newline (Scan.one (fn (s, _) => Symbol.not_eof s andalso s <> s_r) >> (single o Left))) --| $$ s_r) in Scan.trace (scan (!!! ("unclosed file literal")) "\"" "\"") >> (fn (s, src) => String (Encoding_file (scan_string' src), s)) || scan I \ \Due to conflicting symbols, raising \<^ML>\Symbol_Pos.!!!\ here will not let a potential legal \<^ML>\"<"\ symbol be tried and parsed as a \<^emph>\keyword\.\ "<" ">" >> (fn s => File (Encoding_default, s)) end val recover_char = recover_string0 "'" Scan.repeats1 val recover_string = recover_string0 "\"" Scan.repeats end; (* scan tokens *) val check = fold (tap warn #> fold cons o check_error) local fun token k ss = Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss)); fun scan_token f1 f2 = Scan.trace f1 >> (fn (v, s) => token (f2 v) s) val comments = Scan.recover (scan_token C_Antiquote.scan_antiq (Comment o Comment_formal)) (fn msg => Scan.ahead C_Antiquote.scan_antiq_recover -- C_Symbol_Pos.scan_comment_no_nest err_prefix >> (fn (explicit, res) => token (Comment (Comment_suspicious (SOME (explicit, msg, [])))) res) || Scan.fail_with (fn _ => fn _ => msg)) || C_Symbol_Pos.scan_comment_no_nest err_prefix >> token (Comment (Comment_suspicious NONE)) fun scan_fragment blanks comments sharps non_blanks = non_blanks (scan_token scan_char Char) || non_blanks (scan_token scan_string String) || blanks || comments || non_blanks sharps || non_blanks (Scan.max token_leq (Scan.literal lexicon >> token Keyword) ( scan_clangversion >> token ClangC || scan_token scan_float Float || scan_token scan_int Integer || scan_token scan_ident Ident)) || non_blanks (Scan.one (Symbol.is_printable o #1) >> single >> token Unknown) (* scan tokens, directive part *) val scan_sharp1 = $$$ "#" val scan_sharp2 = $$$ "#" @@@ $$$ "#" val scan_directive = let val f_filter = fn Token (_, (Space _, _)) => true | Token (_, (Comment _, _)) => true | Token (_, (Error _, _)) => true | _ => false val sharp1 = scan_sharp1 >> token (Sharp 1) in (sharp1 >> single) @@@ Scan.repeat ( scan_token scan_file I || scan_fragment (scan_token many1_blanks_no_line Space) comments (scan_sharp2 >> token (Sharp 2) || sharp1) I) >> (fn tokens => Inline (Group1 (filter f_filter tokens, filter_out f_filter tokens))) end local fun !!! text scan = let fun get_pos [] = " (end-of-input)" | get_pos (t :: _) = Position.here (pos_of t); fun err (syms, msg) = fn () => err_prefix ^ text ^ get_pos syms ^ (case msg of NONE => "" | SOME m => "\n" ^ m ()); in Scan.!! err scan end val pos_here_of = Position.here o pos_of fun one_directive f = Scan.one (fn Token (_, (Directive ( Inline (Group1 (_, Token (_, (Sharp 1, _)) :: Token (_, s) :: _))) , _)) => f s | _ => false) val get_cond = fn Token (pos, (Directive (Inline (Group1 (toks_bl, tok1 :: tok2 :: toks))), _)) => (fn t3 => Group3 ((pos, toks_bl, [tok1, tok2], toks), range_list_of t3)) | _ => error "Inline directive expected" val one_start_cond = one_directive (fn (Keyword, "if") => true | (Ident _, "ifdef") => true | (Ident _, "ifndef") => true | _ => false) val one_elif = one_directive (fn (Ident _, "elif") => true | _ => false) val one_else = one_directive (fn (Keyword, "else") => true | _ => false) val one_endif = one_directive (fn (Ident _, "endif") => true | _ => false) val not_cond = Scan.unless (one_start_cond || one_elif || one_else || one_endif) (one_not_eof >> (fn Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Ident _, "include"))) :: toks))) , s)) => Token (pos, ( case toks of [] => Error ( "Expecting at least one file" ^ Position.here (end_pos_of tok2) , Group2 (toks_bl, [tok1, tok2], toks)) | _ => Directive (Include (Group2 (toks_bl, [tok1, tok2], toks))) , s)) | Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Ident _, "define"))) :: toks))) , s)) => let fun define tok3 toks = case case toks of (tok3' as Token (pos, (Keyword, "("(*)*)))) :: toks => if Position.offset_of (end_pos_of tok3) = Position.offset_of (pos_of tok3') then let fun right msg pos = Right (msg ^ Position.here pos) fun right1 msg = right msg o #1 fun right2 msg = right msg o #2 fun take_prefix' toks_bl toks_acc pos = fn (tok1 as Token (_, (Ident _, _))) :: (tok2 as Token (pos2, (Keyword, key))) :: toks => if key = "," then take_prefix' (tok2 :: toks_bl) (tok1 :: toks_acc) pos2 toks else if key = (*( *)")" then Left (rev (tok2 :: toks_bl), rev (tok1 :: toks_acc), toks) else right1 "Expecting a colon delimiter or a closing parenthesis" pos2 | Token (pos1, (Ident _, _)) :: _ => right2 "Expecting a colon delimiter or a closing parenthesis" pos1 | (tok1 as Token (_, (Keyword, key1))) :: (tok2 as Token (pos2, (Keyword, key2))) :: toks => if key1 = "..." then if key2 = (*( *)")" then Left (rev (tok2 :: toks_bl), rev (tok1 :: toks_acc), toks) else right1 "Expecting a closing parenthesis" pos2 else right2 "Expecting an identifier or the keyword '...'" pos | _ => right2 "Expecting an identifier or the keyword '...'" pos in case case toks of (tok2 as Token (_, (Keyword, (*( *)")"))) :: toks => Left ([tok2], [], toks) | _ => take_prefix' [] [] pos toks of Left (toks_bl, toks_acc, toks) => Left (SOME (Group1 (tok3' :: toks_bl, toks_acc)), Group1 ([], toks)) | Right x => Right x end else Left (NONE, Group1 ([], tok3' :: toks)) | _ => Left (NONE, Group1 ([], toks)) of Left (gr1, gr2) => Directive (Define (Group1 (toks_bl, [tok1, tok2]), Group1 ([], [tok3]), gr1, gr2)) | Right msg => Error (msg, Group2 (toks_bl, [tok1, tok2], tok3 :: toks)) fun err () = Error ( "Expecting at least one identifier" ^ Position.here (end_pos_of tok2) , Group2 (toks_bl, [tok1, tok2], toks)) in Token (pos, ( case toks of (tok3 as Token (_, (Ident _, _))) :: toks => define tok3 toks | (tok3 as Token (_, (Keyword, cts))) :: toks => if exists (fn cts0 => cts = cts0) keywords_ident then define tok3 toks else err () | _ => err () , s)) end | Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Ident _, "undef"))) :: toks))) , s)) => Token (pos, ( let fun err () = Error ( "Expecting at least and at most one identifier" ^ Position.here (end_pos_of tok2) , Group2 (toks_bl, [tok1, tok2], toks)) in case toks of [Token (_, (Ident _, _))] => Directive (Undef (Group2 (toks_bl, [tok1, tok2], toks))) | [Token (_, (Keyword, cts))] => if exists (fn cts0 => cts = cts0) keywords_ident then Directive (Undef (Group2 (toks_bl, [tok1, tok2], toks))) else err () | _ => err () end , s)) | Token (pos, ( Directive (Inline (Group1 ( toks_bl , (tok1 as Token (_, (Sharp _, _))) :: (tok2 as Token (_, (Integer _, _))) :: (tok3 as Token (_, (String _, _))) :: toks))) , s)) => Token (pos, ( if forall is_integer toks then Directive (Cpp (Group2 (toks_bl, [tok1], tok2 :: tok3 :: toks))) else Error ( "Expecting an integer" ^ Position.here (drop_prefix is_integer toks |> hd |> pos_of) , Group2 (toks_bl, [tok1], tok2 :: tok3 :: toks)) , s)) | x => x)) fun scan_cond xs = xs |> (one_start_cond -- scan_cond_list -- Scan.repeat (one_elif -- scan_cond_list) -- Scan.option (one_else -- scan_cond_list) -- Scan.recover one_endif (fn msg => Scan.fail_with (fn toks => fn () => case toks of tok :: _ => "can be closed here" ^ Position.here (pos_of tok) | _ => msg)) >> (fn (((t_if, t_elif), t_else), t_endif) => Token ( Position.no_range , ( Directive (Conditional let fun t_body x = x |-> get_cond in ( t_body t_if , map t_body t_elif , Option.map t_body t_else , t_body (t_endif, [])) end) , "")))) and scan_cond_list xs = xs |> Scan.repeat (not_cond || scan_cond) val scan_directive_cond0 = not_cond || Scan.ahead ( one_start_cond |-- scan_cond_list |-- Scan.repeat (one_elif -- scan_cond_list) |-- one_else --| scan_cond_list -- (one_elif || one_else)) :-- (fn (tok1, tok2) => !!! ( "directive" ^ pos_here_of tok2 ^ " not expected after" ^ pos_here_of tok1 ^ ", detected at") Scan.fail) >> #2 || (Scan.ahead one_start_cond |-- !!! "unclosed directive" scan_cond) || (Scan.ahead one_not_eof |-- !!! "missing or ambiguous beginning of conditional" Scan.fail) fun scan_directive_recover msg = not_cond || one_not_eof >> (fn tok as Token (pos, (_, s)) => Token (pos, (Error (msg, get_cond tok []), s))) in val scan_directive_cond = Scan.recover (Scan.bulk scan_directive_cond0) (fn msg => scan_directive_recover msg >> single) end (* scan tokens, main *) val scan_ml = Scan.depend let fun non_blanks st scan = scan >> pair st fun scan_frag st = scan_fragment ( scan_token (C_Basic_Symbol_Pos.newline >> K [NONE]) Space >> pair true || scan_token many1_blanks_no_line Space >> pair st) (non_blanks st comments) ((scan_sharp2 || scan_sharp1) >> token Keyword) (non_blanks false) in fn true => scan_token scan_directive Directive >> pair false || scan_frag true | false => scan_frag false end; fun recover msg = (recover_char || recover_string || Symbol_Pos.recover_cartouche || C_Symbol_Pos.recover_comment || recover_int || one' Symbol.not_eof) >> token (Error (msg, Group1 ([], []))); fun reader scan syms = let val termination = if null syms then [] else let val pos1 = List.last syms |-> Position.symbol; val pos2 = Position.symbol Symbol.space pos1; in [Token (Position.range (pos1, pos2), (Space [NONE], Symbol.space))] end; val backslash1 = $$$ "\\" @@@ many C_Symbol.is_ascii_blank_no_line @@@ C_Basic_Symbol_Pos.newline val backslash2 = Scan.one (not o Symbol_Pos.is_eof) val input0 = Source.of_list syms |> Source.source Symbol_Pos.stopper (Scan.bulk (backslash1 >> SOME || backslash2 >> K NONE)) |> Source.map_filter I |> Source.exhaust |> map (Symbol_Pos.range #> Position.range_position) val input1 = Source.of_list syms |> Source.source Symbol_Pos.stopper (Scan.bulk (backslash1 >> K NONE || backslash2 >> SOME)) |> Source.map_filter I |> Source.source' true Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!!! "bad input" scan)) (fn msg => Scan.lift (recover msg) >> single)) |> Source.source stopper scan_directive_cond |> Source.exhaust |> (fn input => input @ termination); val _ = app (fn pos => Output.information ("Backslash newline" ^ Position.here pos)) input0 val _ = Position.reports_text (map (fn pos => ((pos, Markup.intensify), "")) input0); in (input1, check input1) end; in fun op @@ ((input1, f_error_lines1), (input2, f_error_lines2)) = (input1 @ input2, f_error_lines1 #> f_error_lines2) val read_init = ([], I) fun read text = (reader scan_ml o Symbol_Pos.explode) (text, Position.none); fun read_source' {language, symbols} scan source = let val pos = Input.pos_of source; val _ = if Position.is_reported_range pos then Position.report pos (language (Input.is_delimited source)) else (); in Input.source_explode source |> not symbols ? maps (fn (s, p) => raw_explode s |> map (rpair p)) |> reader scan end; val read_source = read_source' { language = Markup.language' {name = "C", symbols = false, antiquotes = true}, symbols = true} scan_ml; end; end; \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy @@ -1,1384 +1,1384 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Core Language: Parsing Support (C Language without Annotations)\ theory C_Parser_Language imports C_Environment begin text \ As mentioned in \<^theory>\Isabelle_C.C_Ast\, Isabelle/C depends on certain external parsing libraries, such as \<^dir>\../../src_ext/mlton\, and more specifically \<^dir>\../../src_ext/mlton/lib/mlyacc-lib\. Actually, the sole theory making use of the files in \<^dir>\../../src_ext/mlton/lib/mlyacc-lib\ is the present \<^file>\C_Parser_Language.thy\. (Any other remaining files in \<^dir>\../../src_ext/mlton\ are not used by Isabelle/C, they come from the original repository of MLton: \<^url>\https://github.com/MLton/mlton\.) \ subsection \Parsing Library (Including Semantic Functions)\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ (* * Modified by Frédéric Tuong, Université Paris-Saclay * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Language.C * https://hackage.haskell.org/package/language-c * * Copyright (c) 1999-2017 Manuel M T Chakravarty * Duncan Coutts * Benedikt Huber * Portions Copyright (c) 1989,1990 James A. Roskind * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Language.C.Comments * https://hackage.haskell.org/package/language-c-comments * * Copyright (c) 2010-2014 Geoff Hulette *) \ signature C_GRAMMAR_RULE_LIB = sig type arg = (C_Antiquote.antiq * C_Env.antiq_language list) C_Env.T type 'a monad = arg -> 'a * arg (* type aliases *) type class_Pos = C_Ast.class_Pos type reports_text0' = { markup : Markup.T, markup_body : string } type reports_text0 = reports_text0' list -> reports_text0' list type ('a, 'b) reports_base = ('a C_Env.markup_store * Position.T list, Position.T list * 'a C_Env.markup_store option) C_Ast.either -> Position.T list -> string -> 'b -> 'b (**) type NodeInfo = C_Ast.nodeInfo type CStorageSpec = NodeInfo C_Ast.cStorageSpecifier type CFunSpec = NodeInfo C_Ast.cFunctionSpecifier type CConst = NodeInfo C_Ast.cConstant type 'a CInitializerList = ('a C_Ast.cPartDesignator List.list * 'a C_Ast.cInitializer) List.list type CTranslUnit = NodeInfo C_Ast.cTranslationUnit type CExtDecl = NodeInfo C_Ast.cExternalDeclaration type CFunDef = NodeInfo C_Ast.cFunctionDef type CDecl = NodeInfo C_Ast.cDeclaration type CDeclr = NodeInfo C_Ast.cDeclarator type CDerivedDeclr = NodeInfo C_Ast.cDerivedDeclarator type CArrSize = NodeInfo C_Ast.cArraySize type CStat = NodeInfo C_Ast.cStatement type CAsmStmt = NodeInfo C_Ast.cAssemblyStatement type CAsmOperand = NodeInfo C_Ast.cAssemblyOperand type CBlockItem = NodeInfo C_Ast.cCompoundBlockItem type CDeclSpec = NodeInfo C_Ast.cDeclarationSpecifier type CTypeSpec = NodeInfo C_Ast.cTypeSpecifier type CTypeQual = NodeInfo C_Ast.cTypeQualifier type CAlignSpec = NodeInfo C_Ast.cAlignmentSpecifier type CStructUnion = NodeInfo C_Ast.cStructureUnion type CEnum = NodeInfo C_Ast.cEnumeration type CInit = NodeInfo C_Ast.cInitializer type CInitList = NodeInfo CInitializerList type CDesignator = NodeInfo C_Ast.cPartDesignator type CAttr = NodeInfo C_Ast.cAttribute type CExpr = NodeInfo C_Ast.cExpression type CBuiltin = NodeInfo C_Ast.cBuiltinThing type CStrLit = NodeInfo C_Ast.cStringLiteral (**) type ClangCVersion = C_Ast.clangCVersion type Ident = C_Ast.ident type Position = C_Ast.positiona type PosLength = Position * int type Name = C_Ast.namea type Bool = bool type CString = C_Ast.cString type CChar = C_Ast.cChar type CInteger = C_Ast.cInteger type CFloat = C_Ast.cFloat type CStructTag = C_Ast.cStructTag type CUnaryOp = C_Ast.cUnaryOp type 'a CStringLiteral = 'a C_Ast.cStringLiteral type 'a CConstant = 'a C_Ast.cConstant type ('a, 'b) Either = ('a, 'b) C_Ast.either type CIntRepr = C_Ast.cIntRepr type CIntFlag = C_Ast.cIntFlag type CAssignOp = C_Ast.cAssignOp type Comment = C_Ast.comment (**) type 'a Reversed = 'a C_Ast.Reversed type CDeclrR = C_Ast.CDeclrR type 'a Maybe = 'a C_Ast.optiona type 'a Located = 'a C_Ast.Located (**) structure List : sig val reverse : 'a list -> 'a list end (* monadic operations *) val return : 'a -> 'a monad val bind : 'a monad -> ('a -> 'b monad) -> 'b monad val bind' : 'b monad -> ('b -> unit monad) -> 'b monad val >> : unit monad * 'a monad -> 'a monad (* position reports *) val markup_make : ('a -> reports_text0' option) -> ('a -> 'b) -> ('b option -> string) -> ((Markup.T -> reports_text0) -> bool -> ('b, 'b option * string * reports_text0) C_Ast.either -> reports_text0) -> - ('a, C_Position.reports_text) reports_base - val markup_tvar : (C_Env.markup_global, C_Position.reports_text) reports_base - val markup_var_enum : (C_Env.markup_global, C_Position.reports_text) reports_base - val markup_var : (C_Env.markup_ident, C_Position.reports_text) reports_base - val markup_var_bound : (C_Env.markup_ident, C_Position.reports_text) reports_base - val markup_var_improper : (C_Env.markup_ident, C_Position.reports_text) reports_base + ('a, Position.report_text list) reports_base + val markup_tvar : (C_Env.markup_global, Position.report_text list) reports_base + val markup_var_enum : (C_Env.markup_global, Position.report_text list) reports_base + val markup_var : (C_Env.markup_ident, Position.report_text list) reports_base + val markup_var_bound : (C_Env.markup_ident, Position.report_text list) reports_base + val markup_var_improper : (C_Env.markup_ident, Position.report_text list) reports_base (* Language.C.Data.RList *) val empty : 'a list Reversed val singleton : 'a -> 'a list Reversed val snoc : 'a list Reversed -> 'a -> 'a list Reversed val rappend : 'a list Reversed -> 'a list -> 'a list Reversed val rappendr : 'a list Reversed -> 'a list Reversed -> 'a list Reversed val rmap : ('a -> 'b) -> 'a list Reversed -> 'b list Reversed (* Language.C.Data.Position *) val posOf : 'a -> Position val posOf' : bool -> class_Pos -> Position * int val make_comment : Symbol_Pos.T list -> Symbol_Pos.T list -> Symbol_Pos.T list -> Position.range -> Comment (* Language.C.Data.Node *) val mkNodeInfo' : Position -> PosLength -> Name -> NodeInfo val decode : NodeInfo -> (class_Pos, string) Either val decode_error' : NodeInfo -> Position.range (* Language.C.Data.Ident *) val quad : string list -> int val ident_encode : string -> int val ident_decode : int -> string val mkIdent : Position * int -> string -> Name -> Ident val internalIdent : string -> Ident (* Language.C.Syntax.AST *) val liftStrLit : 'a CStringLiteral -> 'a CConstant (* Language.C.Syntax.Constants *) val concatCStrings : CString list -> CString (* Language.C.Parser.ParserMonad *) val getNewName : Name monad val shadowTypedef0'''' : string -> Position.T list -> C_Env.markup_ident -> C_Env.env_lang -> C_Env.env_tree -> C_Env.env_lang * C_Env.env_tree val shadowTypedef0' : C_Ast.CDeclSpec list C_Env.parse_status -> bool -> C_Ast.ident * C_Ast.CDerivedDeclr list -> C_Env.env_lang -> C_Env.env_tree -> C_Env.env_lang * C_Env.env_tree val isTypeIdent : string -> arg -> bool val enterScope : unit monad val leaveScope : unit monad val getCurrentPosition : Position monad (* Language.C.Parser.Tokens *) val CTokCLit : CChar -> (CChar -> 'a) -> 'a val CTokILit : CInteger -> (CInteger -> 'a) -> 'a val CTokFLit : CFloat -> (CFloat -> 'a) -> 'a val CTokSLit : CString -> (CString -> 'a) -> 'a (* Language.C.Parser.Parser *) val reverseList : 'a list -> 'a list Reversed val L : 'a -> int -> 'a Located monad val unL : 'a Located -> 'a val withNodeInfo : int -> (NodeInfo -> 'a) -> 'a monad val withNodeInfo_CExtDecl : CExtDecl -> (NodeInfo -> 'a) -> 'a monad val withNodeInfo_CExpr : CExpr list Reversed -> (NodeInfo -> 'a) -> 'a monad val withLength : NodeInfo -> (NodeInfo -> 'a) -> 'a monad val reverseDeclr : CDeclrR -> CDeclr val withAttribute : int -> CAttr list -> (NodeInfo -> CDeclrR) -> CDeclrR monad val withAttributePF : int -> CAttr list -> (NodeInfo -> CDeclrR -> CDeclrR) -> (CDeclrR -> CDeclrR) monad val appendObjAttrs : CAttr list -> CDeclr -> CDeclr val withAsmNameAttrs : CStrLit Maybe * CAttr list -> CDeclrR -> CDeclrR monad val appendDeclrAttrs : CAttr list -> CDeclrR -> CDeclrR val ptrDeclr : CDeclrR -> CTypeQual list -> NodeInfo -> CDeclrR val funDeclr : CDeclrR -> (Ident list, (CDecl list * Bool)) Either -> CAttr list -> NodeInfo -> CDeclrR val arrDeclr : CDeclrR -> CTypeQual list -> Bool -> Bool -> CExpr Maybe -> NodeInfo -> CDeclrR val liftTypeQuals : CTypeQual list Reversed -> CDeclSpec list val liftCAttrs : CAttr list -> CDeclSpec list val addTrailingAttrs : CDeclSpec list Reversed -> CAttr list -> CDeclSpec list Reversed val emptyDeclr : CDeclrR val mkVarDeclr : Ident -> NodeInfo -> CDeclrR val doDeclIdent : CDeclSpec list -> CDeclrR -> unit monad val ident_of_decl : (Ident list, CDecl list * bool) C_Ast.either -> (Ident * CDerivedDeclr list * CDeclSpec list) list val doFuncParamDeclIdent : CDeclr -> unit monad end structure C_Grammar_Rule_Lib : C_GRAMMAR_RULE_LIB = struct open C_Ast type arg = (C_Antiquote.antiq * C_Env.antiq_language list) C_Env.T type 'a monad = arg -> 'a * arg (**) type reports_text0' = { markup : Markup.T, markup_body : string } type reports_text0 = reports_text0' list -> reports_text0' list type 'a reports_store = Position.T list * serial * 'a type ('a, 'b) reports_base = ('a C_Env.markup_store * Position.T list, Position.T list * 'a C_Env.markup_store option) C_Ast.either -> Position.T list -> string -> 'b -> 'b fun markup_init markup = { markup = markup, markup_body = "" } val look_idents = C_Env_Ext.list_lookup o C_Env_Ext.get_idents val look_idents' = C_Env_Ext.list_lookup o C_Env_Ext.get_idents' val look_tyidents_typedef = C_Env_Ext.list_lookup o C_Env_Ext.get_tyidents_typedef val look_tyidents'_typedef = C_Env_Ext.list_lookup o C_Env_Ext.get_tyidents'_typedef val To_string0 = meta_of_logic val ident_encode = Word8Vector.foldl (fn (w, acc) => Word8.toInt w + acc * 256) 0 o Byte.stringToBytes fun ident_decode nb = radixpand (256, nb) |> map chr |> implode fun reverse l = rev l fun report _ [] _ = I | report markup ps x = let val ms = markup x in fold (fn p => fold (fn {markup, markup_body} => cons ((p, markup), markup_body)) ms) ps end fun markup_make typing get_global desc report' data = report (fn name => let val (def, ps, id, global, typing) = case data of Left ((ps, id, param), ps' as _ :: _) => ( true , ps , id , Right ( SOME (get_global param) , "Redefinition of " ^ quote name ^ Position.here_list ps \ \Any positions provided here will be explicitly reported, which might not be the desired effect. So we explicitly refer the reader to a separate tooltip.\ ^ " (more details in the command modifier tooltip)" , cons { markup = Markup.class_parameter , markup_body = "redefining this" ^ Position.here_list ps' }) , typing param) | Left ((ps, id, param), _) => (true, ps, id, Left (get_global param), typing param) | Right (_, SOME (ps, id, param)) => (false, ps, id, Left (get_global param), typing param) | Right (ps, _) => ( true , ps , serial () , Right (NONE, "Undeclared " ^ quote name ^ Position.here_list ps, I) , NONE) fun markup_elem name = (name, (name, []): Markup.T) val (varN, var) = markup_elem (desc (case global of Left b => SOME b | Right (SOME b, _, _) => SOME b | _ => NONE)); val cons' = cons o markup_init in (cons' var #> report' cons' def global #> (case typing of NONE => I | SOME x => cons x)) (map (markup_init o Position.make_entity_markup {def = def} id varN o pair name) ps) end) fun markup_make' typing get_global desc report = markup_make typing get_global (fn global => "C " ^ (case global of SOME true => "global " | SOME false => "local " | NONE => "") ^ desc) (fn cons' => fn def => fn Left b => report cons' def b | Right (b, msg, f) => tap (fn _ => Output.information msg) #> f #> (case b of NONE => cons' Markup.free | SOME b => report cons' def b)) fun markup_tvar0 desc = markup_make' (K NONE) I desc (fn cons' => fn def => fn true => cons' (if def then Markup.free else Markup.ML_keyword3) | false => cons' Markup.skolem) val markup_tvar = markup_tvar0 "type variable" val markup_var_enum = markup_tvar0 "enum tag" fun string_of_list f = (fn [] => NONE | [s] => SOME s | s => SOME ("[" ^ String.concatWith ", " s ^ "]")) o map f val string_of_cDeclarationSpecifier = fn C_Ast.CStorageSpec0 _ => "storage" | C_Ast.CTypeSpec0 t => (case t of CVoidType0 _ => "void" | CCharType0 _ => "char" | CShortType0 _ => "short" | CIntType0 _ => "int" | CLongType0 _ => "long" | CFloatType0 _ => "float" | CDoubleType0 _ => "double" | CSignedType0 _ => "signed" | CUnsigType0 _ => "unsig" | CBoolType0 _ => "bool" | CComplexType0 _ => "complex" | CInt128Type0 _ => "int128" | CSUType0 _ => "SU" | CEnumType0 _ => "enum" | CTypeDef0 _ => "typedef" | CTypeOfExpr0 _ => "type_of_expr" | CTypeOfType0 _ => "type_of_type" | CAtomicType0 _ => "atomic") | C_Ast.CTypeQual0 _ => "type_qual" | C_Ast.CFunSpec0 _ => "fun" | C_Ast.CAlignSpec0 _ => "align" fun typing {params, ret, ...} = SOME { markup = Markup.typing , markup_body = case ( string_of_list (fn C_Ast.CPtrDeclr0 _ => "pointer" | C_Ast.CArrDeclr0 _ => "array" | C_Ast.CFunDeclr0 (C_Ast.Left _, _, _) => "function [...] ->" | C_Ast.CFunDeclr0 (C_Ast.Right (l_decl, _), _, _) => "function " ^ (String.concatWith " -> " (map (fn CDecl0 ([decl], _, _) => string_of_cDeclarationSpecifier decl | CDecl0 (l, _, _) => "(" ^ String.concatWith " " (map string_of_cDeclarationSpecifier l) ^ ")" | CStaticAssert0 _ => "static_assert") l_decl)) ^ " ->") params , case ret of C_Env.Previous_in_stack => SOME "..." | C_Env.Parsed ret => string_of_list string_of_cDeclarationSpecifier ret) of (NONE, NONE) => let val _ = warning "markup_var: Not yet implemented" in "" end | (SOME params, NONE) => params | (NONE, SOME ret) => ret | (SOME params, SOME ret) => params ^ " " ^ ret } val markup_var = markup_make' typing #global "variable" (fn cons' => fn def => fn true => if def then cons' Markup.free else cons' Markup.delimiter (*explicit black color, otherwise the default color of constants might be automatically chosen (especially in term cartouches)*) | false => cons' Markup.bound) val markup_var_bound = markup_make' typing #global "variable" (fn cons' => K (K (cons' Markup.bound))) val markup_var_improper = markup_make' typing #global "variable" (fn cons' => K (K (cons' Markup.improper))) (**) val return = pair fun bind f g = f #-> g fun bind' f g = bind f (fn r => bind (g r) (fn () => return r)) fun a >> b = a #> b o #2 fun sequence_ f = fn [] => return () | x :: xs => f x >> sequence_ f xs (* Language.C.Data.RList *) val empty = [] fun singleton x = [x] fun snoc xs x = x :: xs fun rappend xs ys = rev ys @ xs fun rappendr xs ys = ys @ xs val rmap = map val viewr = fn [] => error "viewr: empty RList" | x :: xs => (xs, x) (* Language.C.Data.Position *) val nopos = NoPosition fun posOf _ = NoPosition fun posOf' mk_range = (if mk_range then Position.range else I) #> (fn (pos1, pos2) => let val {offset = offset, end_offset = end_offset, ...} = Position.dest pos1 in ( Position offset (From_string (C_Env.encode_positions [pos1, pos2])) 0 0 , end_offset - offset) end) fun posOf'' node env = let val (stack, len) = #rule_input env val (mk_range, (pos1a, pos1b)) = case node of Left i => (true, nth stack (len - i - 1)) | Right range => (false, range) val (pos2a, pos2b) = nth stack 0 in ( (posOf' mk_range (pos1a, pos1b) |> #1, posOf' true (pos2a, pos2b)) , env |> C_Env_Ext.map_output_pos (K (SOME (pos1a, pos2b))) |> C_Env_Ext.map_output_vacuous (K false)) end val posOf''' = posOf'' o Left val internalPos = InternalPosition fun make_comment body_begin body body_end range = Commenta ( posOf' false range |> #1 , From_string (Symbol_Pos.implode (body_begin @ body @ body_end)) , case body_end of [] => SingleLine | _ => MultiLine) (* Language.C.Data.Node *) val undefNode = OnlyPos nopos (nopos, ~1) fun mkNodeInfoOnlyPos pos = OnlyPos pos (nopos, ~1) fun mkNodeInfo pos name = NodeInfo pos (nopos, ~1) name val mkNodeInfo' = NodeInfo val decode = (fn OnlyPos0 range => range | NodeInfo0 (pos1, (pos2, len2), _) => (pos1, (pos2, len2))) #> (fn (Position0 (_, s1, _, _), (Position0 (_, s2, _, _), _)) => (case (C_Env.decode_positions (To_string0 s1), C_Env.decode_positions (To_string0 s2)) of ([pos1, _], [_, pos2]) => Left (Position.range (pos1, pos2)) | _ => Right "Expecting 2 elements") | _ => Right "Invalid position") fun decode_error' x = case decode x of Left x => x | Right msg => error msg fun decode_error x = Right (decode_error' x) val nameOfNode = fn OnlyPos0 _ => NONE | NodeInfo0 (_, _, name) => SOME name (* Language.C.Data.Ident *) local val bits7 = Integer.pow 7 2 val bits14 = Integer.pow 14 2 val bits21 = Integer.pow 21 2 val bits28 = Integer.pow 28 2 in fun quad s = case s of [] => 0 | c1 :: [] => ord c1 | c1 :: c2 :: [] => ord c2 * bits7 + ord c1 | c1 :: c2 :: c3 :: [] => ord c3 * bits14 + ord c2 * bits7 + ord c1 | c1 :: c2 :: c3 :: c4 :: s => ((ord c4 * bits21 + ord c3 * bits14 + ord c2 * bits7 + ord c1) mod bits28) + (quad s mod bits28) end local fun internalIdent0 pos s = Ident (From_string s, ident_encode s, pos) in fun mkIdent (pos, len) s name = internalIdent0 (mkNodeInfo' pos (pos, len) name) s val internalIdent = internalIdent0 (mkNodeInfoOnlyPos internalPos) end (* Language.C.Syntax.AST *) fun liftStrLit (CStrLit0 (str, at)) = CStrConst str at (* Language.C.Syntax.Constants *) fun concatCStrings cs = CString0 (flattena (map (fn CString0 (s,_) => s) cs), exists (fn CString0 (_, b) => b) cs) (* Language.C.Parser.ParserMonad *) fun getNewName env = (Namea (C_Env_Ext.get_namesupply env), C_Env_Ext.map_namesupply (fn x => x + 1) env) fun addTypedef (Ident0 (_, i, node)) env = let val name = ident_decode i val pos1 = [decode_error' node |> #1] val data = (pos1, serial (), null (C_Env_Ext.get_scopes env)) in ((), env |> C_Env_Ext.map_idents (Symtab.delete_safe name) |> C_Env_Ext.map_tyidents_typedef (Symtab.update (name, data)) |> C_Env_Ext.map_reports_text (markup_tvar (Left (data, flat [ look_idents env name, look_tyidents_typedef env name ])) pos1 name)) end fun shadowTypedef0''' name pos data0 env_lang env_tree = let val data = (pos, serial (), data0) val update_id = Symtab.update (name, data) in ( env_lang |> C_Env_Ext.map_tyidents'_typedef (Symtab.delete_safe name) |> C_Env_Ext.map_idents' update_id , update_id , env_tree |> C_Env.map_reports_text (markup_var (Left (data, flat [ look_idents' env_lang name , look_tyidents'_typedef env_lang name ])) pos name)) end fun shadowTypedef0'''' name pos data0 env_lang env_tree = let val (env_lang, _, env_tree) = shadowTypedef0''' name pos data0 env_lang env_tree in ( env_lang, env_tree) end fun shadowTypedef0'' ret global (Ident0 (_, i, node), params) = shadowTypedef0''' (ident_decode i) [decode_error' node |> #1] {global = global, params = params, ret = ret} fun shadowTypedef0' ret global ident env_lang env_tree = let val (env_lang, _, env_tree) = shadowTypedef0'' ret global ident env_lang env_tree in (env_lang, env_tree) end fun shadowTypedef0 ret global f ident env = let val (update_id, env) = C_Env.map_env_lang_tree' (fn env_lang => fn env_tree => let val (env_lang, update_id, env_tree) = shadowTypedef0'' ret global ident env_lang env_tree in (update_id, (env_lang, env_tree)) end) env in ((), f update_id env) end fun shadowTypedef_fun ident env = shadowTypedef0 C_Env.Previous_in_stack (case C_Env_Ext.get_scopes env of _ :: [] => true | _ => false) (fn update_id => C_Env_Ext.map_scopes (fn (NONE, x) :: xs => (SOME (fst ident), C_Env.map_idents update_id x) :: xs | (SOME _, _) :: _ => error "Not yet implemented" | [] => error "Not expecting an empty scope")) ident env fun shadowTypedef (i, params, ret) env = shadowTypedef0 (C_Env.Parsed ret) (List.null (C_Env_Ext.get_scopes env)) (K I) (i, params) env fun isTypeIdent s0 = Symtab.exists (fn (s1, _) => s0 = s1) o C_Env_Ext.get_tyidents_typedef fun enterScope env = ((), C_Env_Ext.map_scopes (cons (NONE, C_Env_Ext.get_var_table env)) env) fun leaveScope env = case C_Env_Ext.get_scopes env of [] => error "leaveScope: already in global scope" | (_, var_table) :: scopes => ((), env |> C_Env_Ext.map_scopes (K scopes) |> C_Env_Ext.map_var_table (K var_table)) val getCurrentPosition = return NoPosition (* Language.C.Parser.Tokens *) fun CTokCLit x f = x |> f fun CTokILit x f = x |> f fun CTokFLit x f = x |> f fun CTokSLit x f = x |> f (* Language.C.Parser.Parser *) fun reverseList x = rev x fun L a i = posOf''' i #>> curry Located a fun unL (Located (a, _)) = a fun withNodeInfo00 (pos1, (pos2, len2)) mkAttrNode name = return (mkAttrNode (NodeInfo pos1 (pos2, len2) name)) fun withNodeInfo0 x = x |> bind getNewName oo withNodeInfo00 fun withNodeInfo0' node mkAttrNode env = let val (range, env) = posOf'' node env in withNodeInfo0 range mkAttrNode env end fun withNodeInfo x = x |> withNodeInfo0' o Left fun withNodeInfo' x = x |> withNodeInfo0' o decode_error fun withNodeInfo_CExtDecl x = x |> withNodeInfo' o (fn CDeclExt0 (CDecl0 (_, _, node)) => node | CDeclExt0 (CStaticAssert0 (_, _, node)) => node | CFDefExt0 (CFunDef0 (_, _, _, _, node)) => node | CAsmExt0 (_, node) => node) val get_node_CExpr = fn CComma0 (_, a) => a | CAssign0 (_, _, _, a) => a | CCond0 (_, _, _, a) => a | CBinary0 (_, _, _, a) => a | CCast0 (_, _, a) => a | CUnary0 (_, _, a) => a | CSizeofExpr0 (_, a) => a | CSizeofType0 (_, a) => a | CAlignofExpr0 (_, a) => a | CAlignofType0 (_, a) => a | CComplexReal0 (_, a) => a | CComplexImag0 (_, a) => a | CIndex0 (_, _, a) => a | CCall0 (_, _, a) => a | CMember0 (_, _, _, a) => a | CVar0 (_, a) => a | CConst0 c => (case c of CIntConst0 (_, a) => a | CCharConst0 (_, a) => a | CFloatConst0 (_, a) => a | CStrConst0 (_, a) => a) | CCompoundLit0 (_, _, a) => a | CGenericSelection0 (_, _, a) => a | CStatExpr0 (_, a) => a | CLabAddrExpr0 (_, a) => a | CBuiltinExpr0 cBuiltinThing => (case cBuiltinThing of CBuiltinVaArg0 (_, _, a) => a | CBuiltinOffsetOf0 (_, _, a) => a | CBuiltinTypesCompatible0 (_, _, a) => a) fun withNodeInfo_CExpr x = x |> withNodeInfo' o get_node_CExpr o hd fun withLength node mkAttrNode = bind (posOf'' (decode_error node)) (fn range => withNodeInfo00 range mkAttrNode (case nameOfNode node of NONE => error "nameOfNode" | SOME name => name)) fun reverseDeclr (CDeclrR0 (ide, reversedDDs, asmname, cattrs, at)) = CDeclr ide (rev reversedDDs) asmname cattrs at fun appendDeclrAttrs newAttrs (CDeclrR0 (ident, l, asmname, cattrs, at)) = case l of [] => CDeclrR ident empty asmname (cattrs @ newAttrs) at | x :: xs => let val appendAttrs = fn CPtrDeclr0 (typeQuals, at) => CPtrDeclr (typeQuals @ map CAttrQual newAttrs) at | CArrDeclr0 (typeQuals, arraySize, at) => CArrDeclr (typeQuals @ map CAttrQual newAttrs) arraySize at | CFunDeclr0 (parameters, cattrs, at) => CFunDeclr parameters (cattrs @ newAttrs) at in CDeclrR ident (appendAttrs x :: xs) asmname cattrs at end fun withAttribute node cattrs mkDeclrNode = bind (posOf''' node) (fn (pos, _) => bind getNewName (fn name => let val attrs = mkNodeInfo pos name val newDeclr = appendDeclrAttrs cattrs (mkDeclrNode attrs) in return newDeclr end)) fun withAttributePF node cattrs mkDeclrCtor = bind (posOf''' node) (fn (pos, _) => bind getNewName (fn name => let val attrs = mkNodeInfo pos name val newDeclr = appendDeclrAttrs cattrs o mkDeclrCtor attrs in return newDeclr end)) fun appendObjAttrs newAttrs (CDeclr0 (ident, indirections, asmname, cAttrs, at)) = CDeclr ident indirections asmname (cAttrs @ newAttrs) at fun appendObjAttrsR newAttrs (CDeclrR0 (ident, indirections, asmname, cAttrs, at)) = CDeclrR ident indirections asmname (cAttrs @ newAttrs) at fun setAsmName mAsmName (CDeclrR0 (ident, indirections, oldName, cattrs, at)) = case (case (mAsmName, oldName) of (None, None) => Right None | (None, oldname as Some _) => Right oldname | (newname as Some _, None) => Right newname | (Some n1, Some n2) => Left (n1, n2)) of Left (n1, n2) => let fun showName (CStrLit0 (CString0 (s, _), _)) = To_string0 s in error ("Duplicate assembler name: " ^ showName n1 ^ " " ^ showName n2) end | Right newName => return (CDeclrR ident indirections newName cattrs at) fun withAsmNameAttrs (mAsmName, newAttrs) declr = setAsmName mAsmName (appendObjAttrsR newAttrs declr) fun ptrDeclr (CDeclrR0 (ident, derivedDeclrs, asmname, cattrs, dat)) tyquals at = CDeclrR ident (snoc derivedDeclrs (CPtrDeclr tyquals at)) asmname cattrs dat fun funDeclr (CDeclrR0 (ident, derivedDeclrs, asmname, dcattrs, dat)) params cattrs at = CDeclrR ident (snoc derivedDeclrs (CFunDeclr params cattrs at)) asmname dcattrs dat fun arrDeclr (CDeclrR0 (ident, derivedDeclrs, asmname, cattrs, dat)) tyquals var_sized static_size size_expr_opt at = CDeclrR ident (snoc derivedDeclrs (CArrDeclr tyquals (case size_expr_opt of Some e => CArrSize static_size e | None => CNoArrSize var_sized) at)) asmname cattrs dat val liftTypeQuals = map CTypeQual o reverse val liftCAttrs = map (CTypeQual o CAttrQual) fun addTrailingAttrs declspecs new_attrs = case viewr declspecs of (specs_init, CTypeSpec0 (CSUType0 (CStruct0 (tag, name, Some def, def_attrs, su_node), node))) => snoc specs_init (CTypeSpec (CSUType (CStruct tag name (Just def) (def_attrs @ new_attrs) su_node) node)) | (specs_init, CTypeSpec0 (CEnumType0 (CEnum0 (name, Some def, def_attrs, e_node), node))) => snoc specs_init (CTypeSpec (CEnumType (CEnum name (Just def) (def_attrs @ new_attrs) e_node) node)) | _ => rappend declspecs (liftCAttrs new_attrs) val emptyDeclr = CDeclrR Nothing empty Nothing [] undefNode fun mkVarDeclr ident = CDeclrR (Some ident) empty Nothing [] fun doDeclIdent declspecs (decl as CDeclrR0 (mIdent, _, _, _, _)) = case mIdent of None => return () | Some ident => if exists (fn CStorageSpec0 (CTypedef0 _) => true | _ => false) declspecs then addTypedef ident else shadowTypedef ( ident , case reverseDeclr decl of CDeclr0 (_, params, _, _, _) => params , declspecs) val ident_of_decl = fn Left params => map (fn i => (i, [], [])) params | Right (params, _) => maps (fn CDecl0 (ret, l, _) => maps (fn ((Some (CDeclr0 (Some mIdent, params, _, _, _)),_),_) => [(mIdent, params, ret)] | _ => []) l | _ => []) params local fun sequence_' f = sequence_ f o ident_of_decl val is_fun = fn CFunDeclr0 _ => true | _ => false in fun doFuncParamDeclIdent (CDeclr0 (mIdent0, param0, _, _, node0)) = let val (param_not_fun, param0') = chop_prefix (not o is_fun) param0 val () = if null param_not_fun then () else Output.information ("Not a function" ^ Position.here (decode_error' (case mIdent0 of None => node0 | Some (Ident0 (_, _, node)) => node) |> #1)) val (param_fun, param0') = chop_prefix is_fun param0' in (case mIdent0 of None => return () | Some mIdent0 => shadowTypedef_fun (mIdent0, param0)) >> sequence_ shadowTypedef (maps (fn CFunDeclr0 (params, _, _) => ident_of_decl params | _ => []) param_fun) >> sequence_ (fn CFunDeclr0 (params, _, _) => C_Env.map_env_tree (pair Symtab.empty #> sequence_' (fn (Ident0 (_, i, node), params, ret) => fn (env_lang, env_tree) => pair () let val name = ident_decode i val pos = [decode_error' node |> #1] val data = ( pos , serial () , {global = false, params = params, ret = C_Env.Parsed ret}) in ( env_lang |> Symtab.update (name, data) , env_tree |> C_Env.map_reports_text (markup_var_improper (Left (data, C_Env_Ext.list_lookup env_lang name)) pos name)) end) params #> #2 o #2) #> pair () | _ => return ()) param0' end end (**) structure List = struct val reverse = rev end end \ subsection \Miscellaneous\ ML \ \\<^file>\~~/src/Pure/Thy/document_antiquotations.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay Analogous to: (* Title: Pure/Thy/document_antiquotations.ML Author: Makarius Miscellaneous document antiquotations. *)*) \ structure ML_Document_Antiquotations = struct (* ML text *) local fun ml_text name ml = Document_Output.antiquotation_raw_embedded name (Scan.lift Parse.embedded_input \ \TODO: enable reporting with \<^ML_type>\Token.file\ as in \<^ML>\Resources.parse_files\\) (fn ctxt => fn text => let val file_content = Token.file_source (Resources.read_file (Resources.master_directory (Proof_Context.theory_of ctxt)) (Path.explode (#1 (Input.source_content text)), Position.none)) val _ = (*TODO: avoid multiple file scanning*) ML_Context.eval_in (SOME ctxt) ML_Compiler.flags Position.none (* \ (optionally) disabling a potential double report*) (ml file_content) in file_content |> Input.source_explode |> Source.of_list |> Source.source Symbol_Pos.stopper (Scan.bulk (Symbol_Pos.scan_comment "" >> (C_Scan.Left o pair true) || Scan.many1 (Symbol.is_ascii_blank o Symbol_Pos.symbol) >> (C_Scan.Left o pair false) || Scan.one (not o Symbol_Pos.is_eof) >> C_Scan.Right)) |> Source.exhaust |> drop_prefix (fn C_Scan.Left _ => true | _ => false) |> drop_suffix (fn C_Scan.Left (false, _) => true | _ => false) |> maps (fn C_Scan.Left (_, x) => x | C_Scan.Right x => [x]) |> Symbol_Pos.implode |> enclose "\n" "\n" |> cartouche |> Document_Output.output_source ctxt |> Document_Output.isabelle ctxt end); fun ml_enclose bg en source = ML_Lex.read bg @ ML_Lex.read_source source @ ML_Lex.read en; in val _ = Theory.setup (ml_text \<^binding>\ML_file\ (ml_enclose "" "")); end; end; \ subsection \Loading the Grammar Simulator\ text \ The parser consists of a generic module \<^file>\../../src_ext/mlton/lib/mlyacc-lib/base.sig\, which interprets an automata-like format generated from ML-Yacc. \ ML_file "../../src_ext/mlton/lib/mlyacc-lib/base.sig" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/base.sig\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/join.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/join.sml\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/lrtable.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/lrtable.sml\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/stream.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/stream.sml\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/parser1.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/parser1.sml\\ subsection \Loading the Generated Grammar (SML signature)\ ML_file "../generated/c_grammar_fun.grm.sig" ML \ \\<^file>\../generated/c_grammar_fun.grm.sig\\ (*TODO: the whole part should be maximally generated and integrated in the signature file*) \ structure C_Grammar_Rule = struct open C_Grammar_Rule (* ast_generic is an untyped universe of (some) ast's with the specific lenses put ... get ... *) type ast_generic = start_happy val get_CExpr = start_happy4 val get_CStat = start_happy3 val get_CExtDecl = start_happy2 val get_CTranslUnit = start_happy1 fun put_CExpr (x : C_Grammar_Rule_Lib.CExpr) = Right (Right (Right (Left x))) : ast_generic fun put_CStat (x : C_Grammar_Rule_Lib.CStat) = Right (Right (Left x)) : ast_generic fun put_CExtDecl (x : C_Grammar_Rule_Lib.CExtDecl) = Right (Left x) : ast_generic fun put_CTranslUnit (x : C_Grammar_Rule_Lib.CTranslUnit) = Left x : ast_generic end \ subsection \Overloading Grammar Rules (Optional Part)\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar_Rule_Wrap_Overloading = struct open C_Grammar_Rule_Lib fun update_env_bottom_up f x arg = ((), C_Env.map_env_lang_tree (f x) arg) fun update_env_top_down f x = pair () ##> (fn arg => C_Env_Ext.map_output_env (K (SOME (f x (#env_lang arg)))) arg) (*type variable (report bound)*) val specifier3 : (CDeclSpec list) -> unit monad = update_env_bottom_up (fn l => fn env_lang => fn env_tree => ( env_lang , fold let open C_Ast in fn CTypeSpec0 (CTypeDef0 (Ident0 (_, i, node), _)) => let val name = ident_decode i val pos1 = [decode_error' node |> #1] in C_Env.map_reports_text (markup_tvar (Right (pos1, Symtab.lookup (C_Env_Ext.get_tyidents'_typedef env_lang) name)) pos1 name) end | _ => I end l env_tree)) val declaration_specifier3 : (CDeclSpec list) -> unit monad = specifier3 val type_specifier3 : (CDeclSpec list) -> unit monad = specifier3 (*basic variable (report bound)*) val primary_expression1 : (CExpr) -> unit monad = update_env_bottom_up (fn e => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in fn CVar0 (Ident0 (_, i, node), _) => let val name = ident_decode i val pos1 = [decode_error' node |> #1] in C_Env.map_reports_text (markup_var (Right (pos1, Symtab.lookup (C_Env_Ext.get_idents' env_lang) name)) pos1 name) end | _ => I end e env_tree)) (*basic variable, parameter functions (report bound)*) val declarator1 : (CDeclrR) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => ( env_lang , let open C_Ast fun markup markup_var params = pair Symtab.empty #> fold (fn (Ident0 (_, i, node), params, ret) => fn (env_lang, env_tree) => let val name = ident_decode i val pos = [decode_error' node |> #1] val data = ( pos , serial () , {global = false, params = params, ret = C_Env.Parsed ret}) in ( env_lang |> Symtab.update (name, data) , env_tree |> C_Env.map_reports_text (markup_var (Left (data, C_Env_Ext.list_lookup env_lang name)) pos name)) end) (ident_of_decl params) #> #2 in fn CDeclrR0 (_, param0, _, _, _) => (case rev param0 of CFunDeclr0 (params, _, _) :: param0 => pair param0 o markup markup_var_bound params | param0 => pair param0) #-> fold (fn CFunDeclr0 (params, _, _) => markup markup_var_improper params | _ => I) end d env_tree)) (*old style syntax for functions (legacy feature)*) val external_declaration1 : (CExtDecl) -> unit monad = update_env_bottom_up (fn f => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in fn CFDefExt0 (CFunDef0 (_, _, l, _, node)) => if null l then I else tap (fn _ => legacy_feature ("Scope analysing for old function syntax not implemented" ^ Position.here (decode_error' node |> #1))) | _ => I end f env_tree)) (*(type) enum, struct, union (report define & report bound)*) fun report_enum_bound i' node env_lang = let open C_Ast val name = ident_decode i' val pos1 = [decode_error' node |> #1] in C_Env.map_reports_text (markup_var_enum (Right (pos1, Symtab.lookup (C_Env_Ext.get_tyidents'_enum env_lang) name)) pos1 name) end local val look_tyidents'_enum = C_Env_Ext.list_lookup o C_Env_Ext.get_tyidents'_enum val declaration : (CDecl) -> unit monad = update_env_bottom_up (fn decl => fn env_lang => fn env_tree => let open C_Ast in fn CDecl0 (l, _, _) => fold (fn CTypeSpec0 (CEnumType0 (CEnum0 (Some (Ident0 (_, i, node)), body, _, _), _)) => (case body of None => (fn (env_lang, env_tree) => (env_lang, report_enum_bound i node env_lang env_tree)) | Some _ => fn (env_lang, env_tree) => let val name = ident_decode i val pos1 = [decode_error' node |> #1] val data = (pos1, serial (), null (C_Env.get_scopes env_lang)) in ( C_Env_Ext.map_tyidents'_enum (Symtab.update (name, data)) env_lang , C_Env.map_reports_text (markup_var_enum (Left (data, look_tyidents'_enum env_lang name)) pos1 name) env_tree) end) | _ => I) l | _ => I end decl (env_lang, env_tree)) in val declaration1 = declaration val declaration2 = declaration val declaration3 = declaration end (*(basic) enum, struct, union (report define)*) local val enumerator : ( ( Ident * CExpr Maybe ) ) -> unit monad = update_env_bottom_up (fn id => fn env_lang => let open C_Ast in fn (ident as Ident0 (_, _, node), _) => C_Grammar_Rule_Lib.shadowTypedef0' (C_Env.Parsed [CTypeSpec0 (CIntType0 node)]) (null (C_Env.get_scopes env_lang)) (ident, []) env_lang end id) in val enumerator1 = enumerator val enumerator2 = enumerator val enumerator3 = enumerator val enumerator4 = enumerator end (*(type) enum, struct, union (report bound)*) local fun declaration_specifier env_lang = let open C_Ast in fold (fn CTypeSpec0 (CEnumType0 (CEnum0 (Some (Ident0 (_, i, node)), _, _, _), _)) => report_enum_bound i node env_lang | _ => I) end in val declaration_specifier2 : (CDeclSpec list) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => let open C_Ast in ( env_lang , env_tree |> (if exists (fn CStorageSpec0 (CTypedef0 _) => true | _ => false) d then I else declaration_specifier env_lang d)) end) local val f_definition : (CFunDef) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in fn CFunDef0 (l, _, _, _, _) => declaration_specifier env_lang l end d env_tree)) in val function_definition4 = f_definition val nested_function_definition2 = f_definition end local val parameter_type_list : ( ( CDecl list * Bool ) ) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in #1 #> fold (fn CDecl0 (l, _, _) => declaration_specifier env_lang l | _ => I) end d env_tree)) in val parameter_type_list2 = parameter_type_list val parameter_type_list3 = parameter_type_list end end end \ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar_Rule_Wrap = struct open C_Grammar_Rule_Wrap open C_Grammar_Rule_Wrap_Overloading end \ subsection \Loading the Generated Grammar (SML structure)\ ML_file "../generated/c_grammar_fun.grm.sml" subsection \Grammar Initialization\ subsubsection \Functor Application\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar = C_Grammar_Fun (structure Token = LALR_Parser_Eval.Token) \ subsubsection \Mapping Strings to Structured Tokens\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar_Tokens = struct local open C_Grammar.Tokens in fun token_of_string error ty_ClangCVersion ty_cChar ty_cFloat ty_cInteger ty_cString ty_ident ty_string a1 a2 = fn "(" => x28 (ty_string, a1, a2) | ")" => x29 (ty_string, a1, a2) | "[" => x5b (ty_string, a1, a2) | "]" => x5d (ty_string, a1, a2) | "->" => x2d_x3e (ty_string, a1, a2) | "." => x2e (ty_string, a1, a2) | "!" => x21 (ty_string, a1, a2) | "~" => x7e (ty_string, a1, a2) | "++" => x2b_x2b (ty_string, a1, a2) | "--" => x2d_x2d (ty_string, a1, a2) | "+" => x2b (ty_string, a1, a2) | "-" => x2d (ty_string, a1, a2) | "*" => x2a (ty_string, a1, a2) | "/" => x2f (ty_string, a1, a2) | "%" => x25 (ty_string, a1, a2) | "&" => x26 (ty_string, a1, a2) | "<<" => x3c_x3c (ty_string, a1, a2) | ">>" => x3e_x3e (ty_string, a1, a2) | "<" => x3c (ty_string, a1, a2) | "<=" => x3c_x3d (ty_string, a1, a2) | ">" => x3e (ty_string, a1, a2) | ">=" => x3e_x3d (ty_string, a1, a2) | "==" => x3d_x3d (ty_string, a1, a2) | "!=" => x21_x3d (ty_string, a1, a2) | "^" => x5e (ty_string, a1, a2) | "|" => x7c (ty_string, a1, a2) | "&&" => x26_x26 (ty_string, a1, a2) | "||" => x7c_x7c (ty_string, a1, a2) | "?" => x3f (ty_string, a1, a2) | ":" => x3a (ty_string, a1, a2) | "=" => x3d (ty_string, a1, a2) | "+=" => x2b_x3d (ty_string, a1, a2) | "-=" => x2d_x3d (ty_string, a1, a2) | "*=" => x2a_x3d (ty_string, a1, a2) | "/=" => x2f_x3d (ty_string, a1, a2) | "%=" => x25_x3d (ty_string, a1, a2) | "&=" => x26_x3d (ty_string, a1, a2) | "^=" => x5e_x3d (ty_string, a1, a2) | "|=" => x7c_x3d (ty_string, a1, a2) | "<<=" => x3c_x3c_x3d (ty_string, a1, a2) | ">>=" => x3e_x3e_x3d (ty_string, a1, a2) | "," => x2c (ty_string, a1, a2) | ";" => x3b (ty_string, a1, a2) | "{" => x7b (ty_string, a1, a2) | "}" => x7d (ty_string, a1, a2) | "..." => x2e_x2e_x2e (ty_string, a1, a2) | x => let val alignof = alignof (ty_string, a1, a2) val alignas = alignas (ty_string, a1, a2) val atomic = x5f_Atomic (ty_string, a1, a2) val asm = asm (ty_string, a1, a2) val auto = auto (ty_string, a1, a2) val break = break (ty_string, a1, a2) val bool = x5f_Bool (ty_string, a1, a2) val case0 = case0 (ty_string, a1, a2) val char = char (ty_string, a1, a2) val const = const (ty_string, a1, a2) val continue = continue (ty_string, a1, a2) val complex = x5f_Complex (ty_string, a1, a2) val default = default (ty_string, a1, a2) val do0 = do0 (ty_string, a1, a2) val double = double (ty_string, a1, a2) val else0 = else0 (ty_string, a1, a2) val enum = enum (ty_string, a1, a2) val extern = extern (ty_string, a1, a2) val float = float (ty_string, a1, a2) val for0 = for0 (ty_string, a1, a2) val generic = x5f_Generic (ty_string, a1, a2) val goto = goto (ty_string, a1, a2) val if0 = if0 (ty_string, a1, a2) val inline = inline (ty_string, a1, a2) val int = int (ty_string, a1, a2) val int128 = x5f_x5f_int_x31_x32_x38 (ty_string, a1, a2) val long = long (ty_string, a1, a2) val label = x5f_x5f_label_x5f_x5f (ty_string, a1, a2) val noreturn = x5f_Noreturn (ty_string, a1, a2) val nullable = x5f_Nullable (ty_string, a1, a2) val nonnull = x5f_Nonnull (ty_string, a1, a2) val register = register (ty_string, a1, a2) val restrict = restrict (ty_string, a1, a2) val return0 = return0 (ty_string, a1, a2) val short = short (ty_string, a1, a2) val signed = signed (ty_string, a1, a2) val sizeof = sizeof (ty_string, a1, a2) val static = static (ty_string, a1, a2) val staticassert = x5f_Static_assert (ty_string, a1, a2) val struct0 = struct0 (ty_string, a1, a2) val switch = switch (ty_string, a1, a2) val typedef = typedef (ty_string, a1, a2) val typeof = typeof (ty_string, a1, a2) val thread = x5f_x5f_thread (ty_string, a1, a2) val union = union (ty_string, a1, a2) val unsigned = unsigned (ty_string, a1, a2) val void = void (ty_string, a1, a2) val volatile = volatile (ty_string, a1, a2) val while0 = while0 (ty_string, a1, a2) val cchar = cchar (ty_cChar, a1, a2) val cint = cint (ty_cInteger, a1, a2) val cfloat = cfloat (ty_cFloat, a1, a2) val cstr = cstr (ty_cString, a1, a2) val ident = ident (ty_ident, a1, a2) val tyident = tyident (ty_ident, a1, a2) val attribute = x5f_x5f_attribute_x5f_x5f (ty_string, a1, a2) val extension = x5f_x5f_extension_x5f_x5f (ty_string, a1, a2) val real = x5f_x5f_real_x5f_x5f (ty_string, a1, a2) val imag = x5f_x5f_imag_x5f_x5f (ty_string, a1, a2) val builtinvaarg = x5f_x5f_builtin_va_arg (ty_string, a1, a2) val builtinoffsetof = x5f_x5f_builtin_offsetof (ty_string, a1, a2) val builtintypescompatiblep = x5f_x5f_builtin_types_compatible_p (ty_string, a1, a2) val clangcversion = clangcversion (ty_ClangCVersion, a1, a2) in case x of "_Alignas" => alignas | "_Alignof" => alignof | "__alignof" => alignof | "alignof" => alignof | "__alignof__" => alignof | "__asm" => asm | "asm" => asm | "__asm__" => asm | "_Atomic" => atomic | "__attribute" => attribute | "__attribute__" => attribute | "auto" => auto | "_Bool" => bool | "break" => break | "__builtin_offsetof" => builtinoffsetof | "__builtin_types_compatible_p" => builtintypescompatiblep | "__builtin_va_arg" => builtinvaarg | "case" => case0 | "char" => char | "_Complex" => complex | "__complex__" => complex | "__const" => const | "const" => const | "__const__" => const | "continue" => continue | "default" => default | "do" => do0 | "double" => double | "else" => else0 | "enum" => enum | "__extension__" => extension | "extern" => extern | "float" => float | "for" => for0 | "_Generic" => generic | "goto" => goto | "if" => if0 | "__imag" => imag | "__imag__" => imag | "__inline" => inline | "inline" => inline | "__inline__" => inline | "int" => int | "__int128" => int128 | "__label__" => label | "long" => long | "_Nonnull" => nonnull | "__nonnull" => nonnull | "_Noreturn" => noreturn | "_Nullable" => nullable | "__nullable" => nullable | "__real" => real | "__real__" => real | "register" => register | "__restrict" => restrict | "restrict" => restrict | "__restrict__" => restrict | "return" => return0 | "short" => short | "__signed" => signed | "signed" => signed | "__signed__" => signed | "sizeof" => sizeof | "static" => static | "_Static_assert" => staticassert | "struct" => struct0 | "switch" => switch | "__thread" => thread | "_Thread_local" => thread | "typedef" => typedef | "__typeof" => typeof | "typeof" => typeof | "__typeof__" => typeof | "union" => union | "unsigned" => unsigned | "void" => void | "__volatile" => volatile | "volatile" => volatile | "__volatile__" => volatile | "while" => while0 | _ => error end end end \ end