diff --git a/thys/CakeML/Tools/cakeml_compiler.ML b/thys/CakeML/Tools/cakeml_compiler.ML --- a/thys/CakeML/Tools/cakeml_compiler.ML +++ b/thys/CakeML/Tools/cakeml_compiler.ML @@ -1,168 +1,168 @@ signature CAKEML_COMPILER = sig datatype mode = Literal (* concrete syntax *) | Prog (* sexp syntax *) val compiler: unit -> Path.T val compile_ml: mode -> string -> Path.T val compile_ml_file: mode -> Path.T -> Path.T val compile_c_file: Path.T -> Path.T val compile_ffi: unit -> Path.T val link: Path.T list -> Path.T val eval: mode -> string -> string val eval_source: mode -> Input.source -> string val string_of_prog: Proof.context -> term -> string val cakeml_cmd: Proof.context -> mode -> Input.source -> unit end structure CakeML_Compiler : CAKEML_COMPILER = struct datatype mode = Literal | Prog fun compiler () = let val platform = getenv_strict "ISABELLE_PLATFORM64" val paths = [getenv_strict "ISABELLE_CAKEML_HOME", "bin", platform, "cake"] val file = Path.appends (map Path.explode paths) in if File.exists file then file else error "CakeML: unsupported platform" end fun basis_ffi () = Path.append (Path.explode (getenv_strict "ISABELLE_CAKEML_HOME")) (Path.basic "basis_ffi.c") fun compile_ml_file mode source = let val id = serial_string () val output = File.tmp_path (Path.basic ("cakeml_out" ^ id ^ ".S")) val sexp = if mode = Literal then "false" else "true" val bash_cake = File.bash_path (compiler ()) val bash_source = File.bash_path source val res = Isabelle_System.bash_process (Bash.script (bash_cake ^ " --sexp=" ^ sexp ^ " < " ^ bash_source)) val err = Process_Result.err res val out = Process_Result.out res val _ = if err <> "" then warning err else () in if not (Process_Result.ok res) orelse err <> "" then error "CakeML: ML compilation failed" else (File.write output out; output) end fun compile_ml mode source = let val id = serial_string () val output = File.tmp_path (Path.basic ("cakeml_in" ^ id)) val _ = File.write output source in compile_ml_file mode output end fun compile_c_file source = let val id = serial_string () val output = File.tmp_path (Path.basic ("c_out" ^ id ^ ".o")) val bash_cc = File.bash_path (Path.explode (getenv_strict "ISABELLE_CC")) val bash_source = File.bash_path source val bash_output = File.bash_path output val res = Isabelle_System.bash_process (Bash.script (bash_cc ^ " -c -o " ^ bash_output ^ " " ^ bash_source)) val err = Process_Result.err res val out = Process_Result.out res val _ = if err <> "" then warning err else () val _ = writeln out in if not (Process_Result.ok res) orelse err <> "" then error "CakeML: C compilation failed" else output end val compile_ffi = compile_c_file o basis_ffi fun link sources = let val id = serial_string () val output = File.tmp_path (Path.basic ("bin" ^ id ^ ".out")) val bash_cc = File.bash_path (Path.explode (getenv_strict "ISABELLE_CC")) val bash_sources = Bash.strings (map File.standard_path sources) val bash_output = File.bash_path output val res = Isabelle_System.bash_process (Bash.script (bash_cc ^ " -o " ^ bash_output ^ " " ^ bash_sources)) val err = Process_Result.err res val out = Process_Result.out res val _ = if err <> "" then warning err else () val _ = writeln out in if not (Process_Result.ok res) orelse err <> "" then error "CakeML: linking failed" else output end fun eval mode source = let val cake = compile_ml mode source val ffi = compile_ffi () val bin = link [cake, ffi] val res = Isabelle_System.bash_process (Bash.script (File.bash_path bin)) val err = Process_Result.err res val out = Process_Result.out res val _ = if err <> "" then warning err else () in if not (Process_Result.ok res) orelse err <> "" then error "CakeML: evaluation failed" else out end fun eval_source mode = eval mode o #1 o Input.source_content fun string_of_prog ctxt t = let val (_, raw_prog) = Thm.cterm_of ctxt t |> Code_Simp.dynamic_conv ctxt |> Thm.prop_of |> Logic.dest_equals in CakeML_Sexp.print_prog raw_prog end val parse_mode = Args.parens (Parse.reserved "literal" >> K Literal || Parse.reserved "prog" >> K Prog) fun cakeml_cmd ctxt mode source = let val source = case mode of Literal => #1 (Input.source_content source) | Prog => Syntax.implode_input source |> Syntax.parse_term ctxt |> Type.constraint @{typ Ast.prog} |> Syntax.check_term ctxt |> string_of_prog ctxt val _ = tracing ("Evaluating: " ^ source) val output = eval mode source in writeln output end val _ = Outer_Syntax.command \<^command_keyword>\cakeml\ "evalute CakeML source" - (parse_mode -- Parse.input Parse.text >> (fn (mode, src) => + (parse_mode -- Parse.embedded_input >> (fn (mode, src) => Toplevel.keep (fn state => cakeml_cmd (Toplevel.context_of state) mode src))) end \ No newline at end of file diff --git a/thys/Game_Based_Crypto/CryptHOL_Tutorial.thy b/thys/Game_Based_Crypto/CryptHOL_Tutorial.thy --- a/thys/Game_Based_Crypto/CryptHOL_Tutorial.thy +++ b/thys/Game_Based_Crypto/CryptHOL_Tutorial.thy @@ -1,1333 +1,1333 @@ (* Author: Andreas Lochbihler, Digital Asset Author: S. Reza Sefidgar, ETH Zurich *) text_raw \% \clearpage \addcontentsline{toc}{section}{A Tutorial Introduction to \CryptHOL{}}% \begin{center} \normalfont {\huge A Tutorial Introduction to \CryptHOL{}} \\[1em] \large Andreas Lochbihler \\ Digital Asset (Switzerland) GmbH, Zurich, Switzerland, \texttt{mail@andreas-lochbihler.de} \\[1ex] S. Reza Sefidgar \\ Institute of Information Security, Department of Computer Science, ETH Zurich, Zurich, Switzerland, \\ \texttt{reza.sefidgar@inf.ethz.ch} \end{center} % \begin{abstract} \noindent This tutorial demonstrates how cryptographic security notions, constructions, and game-based security proofs can be formalized using the \CryptHOL{} framework. As a running example, we formalize a variant of the hash-based ElGamal encryption scheme and its IND-CPA security in the random oracle model. This tutorial assumes basic familiarity with Isabelle/HOL and standard cryptographic terminology. \end{abstract} \ section \Introduction\ text \ \CryptHOL{}~\cite{Basin2017, Lochbihler2016} is a framework for constructing rigorous game-based proofs using the proof assistant Isabelle/HOL~\cite{Nipkow2002}. Games are expressed as probabilistic functional programs that are shallowly embedded in higher-order logic (HOL) using \CryptHOL{}'s combinators. The security statements, both concrete and asymptotic, are expressed as Isabelle/HOL theorem statements, and their proofs are written declaratively in Isabelle's proof language Isar~\cite{Wenzel1999}. This way, Isabelle mechanically checks that all definitions and statements are type-correct and each proof step is a valid logical inference in HOL. This ensures that the resulting theorems are valid in higher-order logic. This tutorial explains the \CryptHOL{} essentials using a simple security proof. Our running example is a variant of the hashed ElGamal encryption scheme~\cite{Elgamal1985}. We formalize the scheme, the indistinguishability under chosen plaintext (IND-CPA) security property, the computational Diffie-Hellman (CDH) hardness assumption~\cite{Diffie1976}, and the security proof in the random oracle model. This illustrates how the following aspects of a cryptographic security proof are formalized using \CryptHOL{}: \begin{itemize} \item Game-based security definitions (CDH in \S\ref{section:lcdh} and IND-CPA in \S\ref{section:ind-cpa}) \item Oracles (a random oracle in \S\ref{section:random-oracle}) \item Cryptographic schemes, both generic (the concept of an encryption scheme) and a particular instance (the hashed Elgamal scheme in \S\ref{section:hashed-elgamal-scheme}) \item Security statements (concrete and asymptotic, \S\ref{section:security:concrete} and \S\ref{section:security:asymptotic}) \item Reductions (from IND-CPA to CDH for hashed Elgamal in \S\ref{section:reduction}) \item Different kinds of proof steps (\S\ref{section:ghop-first}--\ref{section:combining:hops}): \begin{itemize} \item Using intermediate games \item Defining failure events and applying indistinguishability-up-to lemmas \item Equivalence transformations on games \end{itemize} \end{itemize} This tutorial assumes that the reader knows the basics of Isabelle/HOL and game-based cryptography and wants to get hands-on experience with \CryptHOL{}. The semantics behind CryptHOL's embedding in higher-order logic and its soundness are not discussed; we refer the reader to the scientific articles for that~\cite{Basin2017, Lochbihler2016}. Shoup's tutorial~\cite{Shoup2004IACR} provides a good introduction to game-based proofs. The following Isabelle features are frequently used in \CryptHOL{} formalizations; the tutorials are available from the Documentation panel in Isabelle/jEdit. \begin{itemize} \item Function definitions (tutorials @{doc "prog-prove"} and @{doc "functions"},~\cite{Krauss2009}) for games and reductions \item Locales (tutorial @{doc locales},~\cite{Ballarin2014}) to modularize the formalization \item The Transfer package~\cite{Huffman2013} for automating parametricity and representation independence proofs \end{itemize} This document is generated from a corresponding Isabelle theory file available online~\cite{Lochbihler2017b}.% \footnote{% The tutorial has been added to the Archive of Formal Proofs after the release of Isabelle2018. Until the subsequent Isabelle release, the tutorial is only available in the development version at \url{https://devel.isa-afp.org/entries/Game_Based_Crypto.html}. The version for Isabelle2018 is available at \url{http://www.andreas-lochbihler.de/pub/crypthol_tutorial.zip}. } It contains this text and all examples, including the security definitions and proofs. We encourage all readers to download the latest version of the tutorial and follow the proofs and examples interactively in Isabelle/HOL. In particular, a Ctrl-click on a formal entity (function, constant, theorem name, ...) jumps to the definition of the entity. We split the tutorial into a series of recipes for common formalization tasks. In each section, we cover a familiar cryptography concept and show how it is formalized in \CryptHOL{}. Simultaneously, we explain the Isabelle/HOL and functional programming topics that are essential for formalizing game-based proofs. \ subsection \Getting started\ text \ \CryptHOL{} is available as part of the Archive of Formal Proofs~\cite{Lochbihler2017a}. Cryptography formalizations based on \CryptHOL{} are arranged in Isabelle theory files that import the relevant libraries. \ subsection \Getting started\ text \ \CryptHOL{} is available as part of the Archive of Formal Proofs~\cite{Lochbihler2017a}. Cryptography formalizations based on \CryptHOL{} are arranged in Isabelle theory files that import the relevant libraries. \ theory CryptHOL_Tutorial imports CryptHOL.CryptHOL (*<*)"HOL-Library.LaTeXsugar"(*>*) begin unbundle %invisible lifting_syntax no_adhoc_overloading %invisible Monad_Syntax.bind bind_pmf declare %invisible [[names_short]] text \ The file @{theory CryptHOL.CryptHOL} is the canonical entry point into \CryptHOL{}. For the hashed Elgamal example in this tutorial, the \CryptHOL{} library contains everything that is needed. Additional Isabelle libraries can be imported if necessary. \ section \Modelling cryptography using \CryptHOL{}\ text \ This section demonstrates how the following cryptographic concepts are modelled in \CryptHOL{}. \begin{itemize} \item A security property without oracles (\S\ref{section:lcdh}) \item An oracle (\S\ref{section:random-oracle}) \item A cryptographic concept (\S\ref{section:pk-enc}) \item A security property with an oracle (\S\ref{section:ind-cpa}) \item A concrete cryptographic scheme (\S\ref{section:hashed-elgamal-scheme}) \end{itemize} \ subsection \Security notions without oracles: the CDH assumption \label{section:lcdh}\ text \ In game-based cryptography, a security property is specified using a game between a benign challenger and an adversary. The probability of an adversary to win the game against the challenger is called its advantage. A cryptographic construction satisfies a security property if the advantage for any ``feasible'' adversary is ``negligible''. A typical security proof reduces the security of a construction to the assumed security of its building blocks. In a concrete security proof, where the security parameter is implicit, it is therefore not necessary to formally define ``feasibility'' and ''negligibility'', as the security statement establishes a concrete relation between the advantages of specific adversaries.% \footnote{% The cryptographic literature sometimes abstracts over the adversary and defines the advantage to be the advantage of the best "feasible" adversary against a game. Such abstraction would require a formalization of feasibility, for which \CryptHOL{} currently does not offer any support. We therefore always consider the advantage of a specific adversary. } We return to asymptotic security statements in \S\ref{section:asymptotic}. A formalization of a security property must therefore specify all of the following: \begin{itemize} \item The operations of the scheme (e.g., an algebraic group, an encryption scheme) \item The type of adversary \item The game with the challenger \item The advantage of the adversary as a function of the winning probability \end{itemize} For hashed Elgamal, the cyclic group must satisfy the computational Diffie-Hellman assumption. To keep the proof simple, we formalize the equivalent list version of CDH. \begin{definition}[The list computational Diffie-Hellman game] Let \\\ be a group of order \q\ with generator \\<^bold>g\. The List Computational Diffie-Hellman (LCDH) assumption holds for \\\ if any ``feasible'' adversary has ``negligible'' probability in winning the following \textbf{LCDH game} against a challenger: \begin{enumerate} \item The challenger picks \x\ and \y\ randomly (and independently) from $\{0, \dots, \q\ - 1\}$. \item It passes $\\<^bold>g\^x$ and $\\<^bold>g\^y$ to the adversary. The adversary generates a set \L\ of guesses about the value of $\\<^bold>g\^{\x\\y\}$. \item The adversary wins the game if $\\<^bold>g\^{\x\\y\} \in \L\$. \end{enumerate} \end{definition} The scheme for LCDH uses only a cyclic group. To make the LCDH formalisation reusable, we formalize the LCDH game for an arbitrary cyclic group @{term "\"} using Isabelle's module system based on locales. The locale \list_cdh\ fixes @{term "\"} to be a finite cyclic group that has elements of type @{typ "'grp"} and comes with a generator @{term "\<^bold>g\<^bsub>\\<^esub>"}. Basic facts about finite groups are formalized in the \CryptHOL{} theory @{theory CryptHOL.Cyclic_Group}.% \footnote{% The syntax directive @{theory_text "structure"} tells Isabelle that all group operations in the context of the locale refer to the group @{term "\"} unless stated otherwise. For example, @{term "\<^bold>g\<^bsub>\\<^esub>"} can be written as \\<^bold>g\ inside the locale. Isabelle automatically adds the locale parameters and the assumptions on them to all definitions and lemmas inside that locale. Of course, we could have made the group @{term "\"} an explicit argument of all functions ourselves, but then we would not benefit from Isabelle's module system, in particular locale instantiation. } \ locale list_cdh = cyclic_group \ for \ :: "'grp cyclic_group" (structure) begin text \ The LCDH game does not need oracles. The adversary is therefore just a probabilistic function from two group elements to a set of guesses, which are again group elements. In \CryptHOL{}, the probabilistic nature is expressed by the adversary returning a discrete subprobability distribution over sets of guesses, as expressed by the type constructor \spmf\. (Subprobability distributions are like probability distributions except that the whole probability mass may be less than 1, i.e., some probability may be ``lost''. A subprobability distribution is called lossless, written @{term lossless_spmf}, if its probability mass is 1.) We define the following abbreviation as a shorthand for the type of LCDH adversaries.% \footnote{% Actually, the type of group elements has already been fixed in the locale @{locale list_cdh} to the type variable @{typ "'grp"}. Unfortunately, such fixed type variables cannot be used in type declarations inside a locale in Isabelle2018. The @{command type_synonym} \adversary\ is therefore parametrized by a different type variable @{typ 'grp'}, but it will be used below only with @{typ 'grp}. } \ type_synonym 'grp' adversary = "'grp' \ 'grp' \ 'grp' set spmf" text \ The LCDH game itself is expressed as a function from the adversary @{term "\"} to the subprobability distribution of the adversary winning. \CryptHOL{} provides operators to express these distributions as probabilistic programs and reason about them using program logics: \begin{itemize} \item The \do\ notation desugars to monadic sequencing in the monad of subprobabilities~\cite{Wadler1989}. Intuitively, every line \x \ p;\ samples an element @{term x} from the distribution @{term p}. The sampling is independent, unless the distribution @{term p} depends on previously sampled variables. At the end of the block, the @{term "return_spmf DUMMY"} returns whether the adversary has won the game. \item @{term "sample_uniform n"} denotes the uniform distribution over the set \{0, ..., n - 1}\. \item @{term "order \"} denotes the order of @{term "\"} and @{term [source] "([^]) :: 'grp \ nat \ 'grp"} is the group exponentiation operator. \end{itemize} The LCDH game formalizes the challenger's behavior against an adversary @{term "\"}. In the following definition, the challenger randomly (and independently) picks two natural numbers @{term "x"} and @{term "y"} that are between 0 and @{term "\"}'s order and passes them to the adversary. The adversary then returns a set @{term zs} of guesses for @{term "g ^ (x * y)"}, where @{term "g"} is the generator of @{term "\"}. The game finally returns a @{typ bool}ean that indicates whether the adversary produced a right guess. Formally, @{term "game \"} is a @{typ bool}ean random variable. \ definition game :: "'grp adversary \ bool spmf" where "game \ = do { x \ sample_uniform (order \); y \ sample_uniform (order \); zs \ \ (\<^bold>g [^] x) (\<^bold>g [^] y); return_spmf (\<^bold>g [^] (x * y) \ zs) }" text \ The advantage of the adversary is equivalent to its probability of winning the LCDH game. The function @{term [source] "spmf :: 'a spmf \ 'a \ real"} returns the probability of an elementary event under a given subprobability distribution.\ definition advantage :: "'grp adversary \ real" where "advantage \ = spmf (game \) True" end text \ This completes the formalisation of the LCDH game and we close the locale @{locale list_cdh} with @{command end}. The above definitions are now accessible under the names @{term "list_cdh.game"} and @{term "list_cdh.advantage"}. Furthermore, when we later instantiate the locale @{locale list_cdh}, they will be specialized to the given pararameters. We will return to this topic in \S\ref{section:hashed-elgamal-scheme}. \ subsection \A Random Oracle \label{section:random-oracle}\ text \ A cryptographic oracle grants an adversary black-box access to a certain information or functionality. In this section, we formalize a random oracle, i.e., an oracle that models a random function with a finite codomain. In the Elgamal security proof, the random oracle represents the hash function: the adversary can query the oracle for a value and the oracle responds with the corresponding ``hash''. Like for the LCDH formalization, we wrap the random oracle in the locale \random_oracle\ for modularity. The random oracle will return a \bitstring\, i.e. a list of booleans, of length \len\. \ type_synonym bitstring = "bool list" locale random_oracle = fixes len :: "nat" begin text \ In \CryptHOL{}, oracles are modeled as probabilistic transition systems that given an initial state and an input, return a subprobability distribution over the output and the successor state. The type synonym @{typ [source] "('s, 'a, 'b) oracle'"} abbreviates @{typ "'s \ 'a \ ('b \ 's) spmf"}. A random oracle accepts queries of type @{typ "'a"} and generates a random bitstring of length @{term len}. The state of the random oracle remembers its previous responses in a mapping of type @{typ [source] "'a \ bitstring"}. Upon a query @{term "x"}, the oracle first checks whether this query was received before. If so, the oracle returns the same answer again. Otherwise, the oracle randomly samples a bitstring of length @{term "len"}, stores it in its state, and returns it alongside with the new state. \ type_synonym 'a state = "'a \ bitstring" definition "oracle" :: "'a state \ 'a \ (bitstring \ 'a state) spmf" where "oracle \ x = (case \ x of None \ do { bs \ spmf_of_set (nlists UNIV len); return_spmf (bs, \(x \ bs)) } | Some bs \ return_spmf (bs, \))" text \ Initially, the state of a random oracle is the empty map @{term Map.empty}, as no queries have been asked. For readability, we introduce an abbreviation:\ abbreviation (input) initial :: "'a state" where "initial \ Map.empty" text \ This actually completes the formalization of the random oracle. Before we close the locale, we prove two technical lemmas: \begin{enumerate} \item The lemma \lossless_oracle\ states that the distribution over answers and successor states is \emph{lossless}, i.e., a full probability distribution. Many reasoning steps in game-based proofs are only valid for lossless distributions, so it is generally recommended to prove losslessness of all definitions if possible. \item The lemma \fresh\ describes random oracle's behavior when the query is fresh. This lemma makes it possible to automatically unfold the random oracle only when it is known that the query is fresh. \end{enumerate} \ lemma lossless_oracle [simp]: "lossless_spmf (oracle \ x)" by(simp add: oracle_def split: option.split) lemma fresh: "oracle \ x = (do { bs \ spmf_of_set (nlists UNIV len); return_spmf (bs, \(x \ bs)) })" if "\ x = None" using that by(simp add: oracle_def) end text \ \paragraph{Remark: Independence is the default.} Note that @{typ "_ spmf"} represents a discrete probability distribution rather than a random variable. The difference is that every spmf is independent of all other spmfs. There is no implicit space of elementary events via which information may be passed from one random variable to the other. If such information passing is necessary, this must be made explicit in the program. That is why the random oracle explicitly takes a state of previous responses and returns the updated states. Later, whenever the random oracle is used, the user must pass the state around as needed. This also applies to adversaries that may want to store some information. \ subsection \Cryptographic concepts: public-key encryption \label{section:pk-enc}\ text \ A cryptographic concept consists of a set of operations and their functional behaviour. We have already seen two simple examples: the cyclic group in \S\ref{section:lcdh} and the random oracle in \S\ref{section:random-oracle}. We have formalized both of them as locales; we have not modelled their functional behavior as this is not needed for the proof. In this section, we now present a more realistic example: public-key encryption with oracle access. A public-key encryption scheme consists of three algorithms: key generation, encryption, and decryption. They are all probabilistic and, in the most general case, they may access an oracle jointly with the adversary, e.g., a random oracle modelling a hash function. As before, the operations are modelled as parameters of a locale, \ind_cpa_pk\. \begin{itemize} \item The key generation algorithm \key_gen\ outputs a public-private key pair. \item The encryption operation \encrypt\ takes a public key and a plaintext of type @{typ "'plain"} and outputs a ciphertext of type @{typ 'cipher}. \item The decryption operation \decrypt\ takes a private key and a ciphertext and outputs a plaintext. \item Additionally, the predicate \valid_plains\ tests whether the adversary has chosen a valid pair of plaintexts. This operation is needed only in the IND-CPA game definition in the next section, but we include it already here for convenience. \end{itemize} \ locale ind_cpa_pk = fixes key_gen :: "('pubkey \ 'privkey, 'query, 'response) gpv" and encrypt :: "'pubkey \ 'plain \ ('cipher, 'query, 'response) gpv" and decrypt :: "'privkey \ 'cipher \ ('plain, 'query, 'response) gpv" and valid_plains :: "'plain \ 'plain \ bool" begin text \ The three actual operations are generative probabilistic values (GPV) of type @{typ [source] "(_, 'query, 'response) gpv"}. A GPV is a probabilistic algorithm that has not yet been connected to its oracles; see the theoretical paper @{cite Basin2017} for details. The interface to the oracle is abstracted in the two type parameters @{typ 'query} for queries and @{typ 'response} for responses. As before, we omit the specification of the functional behavior, namely that decrypting an encryption with a key pair returns the plaintext. \ subsection \Security notions with oracles: IND-CPA security \label{section:ind-cpa}\ text \ In general, there are several security notions for the same cryptographic concept. For encryption schemes, an indistinguishability notion of security~\cite{Goldwasser1982} is often used. We now formalize the notion indistinguishability under chosen plaintext attacks (IND-CPA) for public-key encryption schemes. Goldwasser et al.~\cite{Goldwasser1984} showed that IND-CPA is equivalent to semantic security. \begin{definition}[IND-CPA @{cite Shoup2004IACR}] Let @{term key_gen}, @{term encrypt} and @{term decrypt} denote a public-key encryption scheme. The IND-CPA game is a two-stage game between the \emph{adversary} and a \emph{challenger}: \begin{description} \item[Stage 1 (find):] \strut \begin{enumerate} \item The challenger generates a public key @{term pk} using @{term key_gen} and gives the public key to the adversary. \item The adversary returns two messages @{term "m\<^sub>0"} and @{term "m\<^sub>1"}. \item The challenger checks that the two messages are a valid pair of plaintexts. (For example, both messages must have the same length.) \end{enumerate} \item[Stage 2 (guess):] \strut \begin{enumerate} \item The challenger flips a coin @{term b} (either 0 or 1) and gives @{term "encrypt pk m\<^sub>b"} to the adversary. \item The adversary returns a bit @{term b'}. \end{enumerate} \end{description} The adversary wins the game if his guess @{term b'} is the value of @{term b}. Let @{term "P\<^sub>w\<^sub>i\<^sub>n"} denote the winning probability. His advantage is @{term "\P\<^sub>w\<^sub>i\<^sub>n - 1/2\ :: real"} \end{definition} Like with the encryption scheme, we will define the game such that the challenger and the adversary have access to a shared oracle, but the oracle is still unspecified. Consequently, the corresponding \CryptHOL{} game is a GPV, like the operations of the abstract encryption scheme. When we specialize the definitions in the next section to the hashed Elgamal scheme, the GPV will be connected to the random oracle. The type of adversary is now more complicated: It is a pair of probabilistic functions with oracle access, one for each stage of the game. The first computes the pair of plaintext messages and the second guesses the challenge bit. The additional @{typ 'state} parameter allows the adversary to maintain state between the two stages. \ type_synonym ('pubkey', 'plain', 'cipher', 'query', 'response', 'state) adversary = "('pubkey' \ (('plain' \ 'plain') \ 'state, 'query', 'response') gpv) \ ('cipher' \ 'state \ (bool, 'query', 'response') gpv)" text \ The IND-CPA game formalization below follows the above informal definition. There are three points that need some explanation. First, this game differs from the simpler LCDH game in that it works with GPVs instead of SPMFs. Therefore, probability distributions like coin flips @{term coin_spmf} must be lifted from SPMFs to GPVs using the coercion @{term lift_spmf}. Second, the assertion @{term "assert_gpv (valid_plains m\<^sub>0 m\<^sub>1)"} ensures that the pair of messages is valid. Third, the construct @{term "TRY DUMMY ELSE DUMMY :: (_, _, _) gpv"} catches a violated assertion. In that case, the adversary's advantage drops to 0 because the result of the game is a coin flip, as we are in the \ELSE\ branch. \ fun game :: "('pubkey, 'plain, 'cipher, 'query, 'response, 'state) adversary \ (bool, 'query, 'response) gpv" where "game (\\<^sub>1, \\<^sub>2) = TRY do { (pk, sk) \ key_gen; ((m\<^sub>0, m\<^sub>1), \) \ \\<^sub>1 pk; assert_gpv (valid_plains m\<^sub>0 m\<^sub>1); b \ lift_spmf coin_spmf; cipher \ encrypt pk (if b then m\<^sub>0 else m\<^sub>1); b' \ \\<^sub>2 cipher \; Done (b' = b) } ELSE lift_spmf coin_spmf" text \ \input{fig-1} Figure~\ref{fig:1} visualizes this game as a grey box. The dashed boxes represent parameters of the game or the locale, i.e., parts that have not yet been instantiated. The actual probabilistic program is shown on the left half, which uses the dashed boxes as sub-programs. Arrows in the grey box from the left to the right pass the contents of the variables to the sub-program. Those in the other direction bind the result of the sub-program to new variables. The arrows leaving box indicate the query-response interaction with an oracle. The thick arrows emphasize that the adversary's state is passed around explicitly. The double arrow represents the return value of the game. We will use this to define the adversary's advantage. As the oracle is not specified in the game, the advantage, too, is parametrized by the oracle, given by the transition function @{term [source] "oracle :: ('s, 'query, 'response) oracle'"} and the initial state @{term [source] "\ :: 's"} its initial state. The operator @{term "run_gpv"} connects the game with the oracle, whereby the GPV becomes an SPMF. \ fun advantage :: "('\, 'query, 'response) oracle' \ '\ \ ('pubkey, 'plain, 'cipher, 'query, 'response, 'state) adversary \ real" where "advantage (oracle, \) \ = \spmf (run_gpv oracle (game \) \) True - 1/2\" end subsection \Concrete cryptographic constructions: the hashed ElGamal encryption scheme \label{section:hashed-elgamal-scheme}\ text \ With all the above modelling definitions in place, we are now ready to explain how concrete cryptographic constructions are expressed in \CryptHOL. In general, a cryptographic construction builds a cryptographic concept from possibly several simpler cryptographic concepts. In the running example, the hashed ElGamal cipher~\cite{Elgamal1985} constructs a public-key encryption scheme from a finite cyclic group and a hash function. Accordingly, the formalisation consists of three steps: \begin{enumerate} \item Import the cryptographic concepts on which the construction builds. \item Define the concrete construction. \item Instantiate the abstract concepts with the construction. \end{enumerate} First, we declare a new locale that imports the two building blocks: the cyclic group from the LCDH game with namespace \lcdh\ and the random oracle for the hash function with namespace \ro\. This ensures that the construction can be used for arbitrary cyclic groups. For the message space, it suffices to fix the length \len_plain\ of the plaintexts. \ locale hashed_elgamal = lcdh: list_cdh \ + ro: random_oracle len_plain for \ :: "'grp cyclic_group" (structure) and len_plain :: "nat" begin text \ Second, we formalize the hashed ElGamal encryption scheme. Here is the well-known informal definition. \begin{definition}[Hashed Elgamal encryption scheme] Let $G$ be a cyclic group of order $q$ that has a generator $g$. Furthermore, let $h$ be a hash function that maps the elements of $G$ to bitstrings, and $\oplus$ be the xor operator on bitstrings. The Hashed-ElGamal encryption scheme is given by the following algorithms: \begin{description} \item[Key generation] Pick an element $x$ randomly from the set $\{0, \dots, q-1\}$ and output the pair $(g^x, x)$, where $g^x$ is the public key and $x$ is the private key. \item[Encryption] Given the public key $pk$ and the message $m$, pick $y$ randomly from the set $\{0, \dots, q-1\}$ and output the pair $(g^y, h(pk^y) \oplus m)$. Here $\oplus$ denotes the bitwise exclusive-or of two bitstrings. \item[Decryption] Given the private key $sk$ and the ciphertext $(\alpha, \beta)$, output $h(\alpha^{sk}) \oplus \beta$. \end{description} \end{definition} As we can see, the public key is a group element, the private key a natural number, a plaintext a bitstring, and a ciphertext a pair of a group element and a bitstring.% \footnote{% More precisely, the private key ranges between 0 and $q - 1$ and the bitstrings are of length \len_plain\. However, Isabelle/HOL's type system cannot express such properties that depend on locale parameters. } For readability, we introduce meaningful abbreviations for these concepts. \ type_synonym 'grp' pub_key = "'grp'" type_synonym 'grp' priv_key = nat type_synonym plain = bitstring type_synonym 'grp' cipher = "'grp' \ bitstring" text \ We next translate the three algorithms into \CryptHOL{} definitions. The definitions are straightforward except for the hashing. Since we analyze the security in the random oracle model, an application of the hash function $H$ is modelled as a query to the random oracle using the GPV @{term hash}. Here, @{term "Pause x Done"} calls the oracle with query @{term x} and returns the oracle's response. Furthermore, we define the plaintext validity predicate to check the length of the adversary's messages produced by the adversary. \ abbreviation hash :: "'grp \ (bitstring, 'grp, bitstring) gpv" where "hash x \ Pause x Done" definition key_gen :: "('grp pub_key \ 'grp priv_key) spmf" where "key_gen = do { x \ sample_uniform (order \); return_spmf (\<^bold>g [^] x, x) }" definition encrypt :: "'grp pub_key \ plain \ ('grp cipher, 'grp, bitstring) gpv" where "encrypt \ msg = do { y \ lift_spmf (sample_uniform (order \)); h \ hash (\ [^] y); Done (\<^bold>g [^] y, h [\] msg) }" definition decrypt :: "'grp priv_key \ 'grp cipher \ (plain, 'grp, bitstring) gpv" where "decrypt x = (\(\, \). do { h \ hash (\ [^] x); Done (\ [\] h) })" definition valid_plains :: "plain \ plain \ bool" where "valid_plains msg1 msg2 \ length msg1 = len_plain \ length msg2 = len_plain" text\ The third and last step instantiates the interface of the encryption scheme with the hashed Elgamal scheme. This specializes all definition and theorems in the locale @{locale ind_cpa_pk} to our scheme. \ sublocale ind_cpa: ind_cpa_pk "(lift_spmf key_gen)" encrypt decrypt valid_plains . text \ Figure~\ref{fig:2} illustrates the instantiation. In comparison to Fig.~\ref{fig:1}, the boxes for the key generation and the encryption algorithm have been instantiated with the hashed Elgamal definitions from this section. We nevertheless draw the boxes to indicate that the definitions of these algorithms has not yet been inlined in the game definition. The thick grey border around the key generation algorithm denotes the @{term lift_spmf} operator, which embeds the probabilistic @{term key_gen} without oracle access into the type of GPVs with oracle access. The oracle has also been instantiated with the random oracle @{term ro.oracle} imported from @{locale "hashed_elgamal"}'s parent locale @{locale random_oracle} with prefix \ro\. \input{fig-2} \ section \Cryptographic proofs in \CryptHOL{}\ text \ This section explains how cryptographic proofs are expressed in \CryptHOL{}. We will continue our running example by stating and proving the IND-CPA security of the hashed Elgamal encryption scheme under the computational Diffie-Hellman assumption in the random oracle model, using the definitions from the previous section. More precisely, we will formalize a reduction argument (\S\ref{section:reduction}) and bound the IND-CPA advantage using the CDH advantage. We will \emph{not} formally state the result that CDH hardness in the cyclic group implies IND-CPA security, which quantifies over all feasible adversaries--% to that end, we would have to formally define feasibility, for which \CryptHOL{} currently does not offer any support. The actual proof of the bound consists of several game transformations. We will focus on those steps that illustrate common steps in cryptographic proofs (\S\ref{section:ghop-first}--\S\ref{section:combining:hops}) . \ subsection\The reduction \label{section:reduction}\ text \ The security proof involves a reduction argument: We will derive a bound on the advantage of an arbitrary adversary in the IND-CPA game @{term "ind_cpa.game"} for hashed Elgamal that depends on another adversary's advantage in the LCDH game @{term "lcdh.game"} of the underlying group. The reduction transforms every IND-CPA adversary @{term "\"} into a LCDH adversary @{term "elgamal_reduction \"}, using @{term "\"} as a black box. In more detail, it simulates an execution of the IND-CPA game including the random oracle. At the end of the game, the reduction outputs the set of queries that the adversary has sent to the random oracle. The reduction works as follows given a two part IND-CPA adversary @{term "\ = (\\<^sub>1, \\<^sub>2)"} (Figure~\ref{fig:3} visualizes the reduction as the dotted box): \begin{enumerate} \item It receives two group elements @{term "\"} and @{term "\"} from the LCDH challenger. \item The reduction passes @{term "\"} to the adversary as the public key and runs @{term "\\<^sub>1"} to get messages @{term "m\<^sub>1"} and @{term "m\<^sub>2"}. The adversary is given access to the random oracle with the initial state @{term ro.initial}. \item The assertion checks that the adversary returns two valid plaintexts, i.e., @{term m\<^sub>1} and @{term m\<^sub>2} are strings of length @{term len_plain}. \item Instead of actually performing an encryption, the reduction generates a random bitstring @{term h} of length @{term len_plain} (@{term "nlists UNIV len_plain"} denotes the set of all bitstrings of length @{term len_plain} and @{term spmf_of_set} converts the set into a uniform distribution over the set.) \item The reduction passes @{term "(\, h)"} as the challenge ciphertext to the adversary in the second phase of the IND-CPA game. \item The actual guess @{term b'} of the adversary is ignored; instead the reduction returns the set @{term "dom s'"} of all queries that the adversary made to the random oracle as its guess for the CDH game. \item If any of the steps after the first phase fails, the reduction's guess is the set @{term "dom s"} of oracle queries made during the first phase. \end{enumerate} \input{fig-3} \ fun elgamal_reduction :: "('grp pub_key, plain, 'grp cipher, 'grp, bitstring, 'state) ind_cpa.adversary \ 'grp lcdh.adversary" where "elgamal_reduction (\\<^sub>1, \\<^sub>2) \ \ = do { (((m\<^sub>1, m\<^sub>2), \), s) \ exec_gpv ro.oracle (\\<^sub>1 \) ro.initial; TRY do { _ :: unit \ assert_spmf (valid_plains m\<^sub>1 m\<^sub>2); h \ spmf_of_set (nlists UNIV len_plain); (b', s') \ exec_gpv ro.oracle (\\<^sub>2 (\, h) \) s; return_spmf (dom s') } ELSE return_spmf (dom s) }" subsection \Concrete security statement \label{section:security:concrete}\ text \ A concrete security statement in \CryptHOL{} has the form: Subject to some side conditions for the adversary @{term \}, the advantage in one game is bounded by a function of the transformed adversary's advantage in a different game.% \footnote{% A security proof often involves several reductions. The bound then depends on several advantages, one for each reduction. } \ theorem concrete_security: assumes "side conditions for \" shows "advantage\<^sub>1 \ \ f (advantage\<^sub>2 (reduction \))" oops %invisible text \ For the hashed Elgamal scheme, the theorem looks as follows, i.e., the function @{term f} is the identity function. \ end %invisible \ \These six lines allow us to show the concrete theorem statement before we define the side condition on \. They should not occur in a normal formalisation.\ context %invisible begin local_setup %invisible \Local_Theory.map_background_naming (Name_Space.mandatory_path "ind_cpa")\ definition %invisible lossless where "lossless = undefined" end %invisible context %invisible hashed_elgamal begin theorem concrete_security_elgamal: assumes lossless: "ind_cpa.lossless \" shows "ind_cpa.advantage (ro.oracle, ro.initial) \ \ lcdh.advantage (elgamal_reduction \)" oops%invisible \ \This aborts the proof as we have not yet defined @{term ind_cpa.lossless} properly. We restate the theorem below properly.\ text \ Such a statement captures the essence of a concrete security proof. For if there was a feasible adversary @{term \} with non-negligible advantage against the @{term ind_cpa.game}, then @{term "elgamal_reduction \"} would be an adversary against the @{term lcdh.game} with at least the same advantage. This implies the existence of an adversary with non-negligible advantage against the cryptographic primitive that was assumed to be secure. What we cannot state formally is that the transformed adversary @{term "elgamal_reduction \"} is feasible as we have not formalized the notion of feasibility. The readers of the formalization must convince themselves that the reduction preserves feasibility. In the case of @{term elgamal_reduction}, this should be obvious from the definition (given the theorem's side condition) as the reduction does nothing more than sampling and redirecting data. Our proof for the concrete security theorem needs the side condition that the adversary is lossless. Losslessness for adversaries is similar to losslessness for subprobability distributions. It ensures that the adversary always terminates and returns an answer to the challenger. For the IND-CPA game, we define losslessness as follows: \ definition (in ind_cpa_pk) lossless :: "('pubkey, 'plain, 'cipher, 'query, 'response, 'state) adversary \ bool" where "lossless = (\(\\<^sub>1, \\<^sub>2). (\pk. lossless_gpv \_full (\\<^sub>1 pk)) \ (\cipher \. lossless_gpv \_full (\\<^sub>2 cipher \)))" theorem %invisible concrete_security_elgamal: assumes lossless: "ind_cpa.lossless \" shows "ind_cpa.advantage (ro.oracle, ro.initial) \ \ lcdh.advantage (elgamal_reduction \)" text %visible \ So now let's start with the proof. \ proof %visible - text %invisible \ For this proof, we configure Isabelle's simplifier such that the proofs become reasonably short. Initially, when writing the proof, we had added those lemmas manually to to invocation of the simplifier and then collected the useful rules in a polishing step. \ note %invisible [cong del] = if_weak_cong and [split del] = if_split and [simp] = map_lift_spmf gpv.map_id lossless_weight_spmfD map_spmf_bind_spmf bind_spmf_const lcdh.order_gt_0 and [if_distribs] = if_distrib[where f="\x. try_spmf x _"] if_distrib[where f="weight_spmf"] if_distrib[where f="\r. scale_spmf r _"] text %visible \ As a preparatory step, we split the adversary @{term "\"} into its two phases @{term "\\<^sub>1"} and @{term "\\<^sub>2"}. We could have made the two phases explicit in the theorem statement, but our form is easier to read and use. We also immediately decompose the losslessness assumption on @{term "\"}.% \footnote{% Later in the proof, we will often prove losslessness of the definitions in the proof. We will not show them in this document, but they are in the Isabelle sources from which this document is generated. } \ obtain \\<^sub>1 \\<^sub>2 where \ [simp]: "\ = (\\<^sub>1, \\<^sub>2)" by (cases "\") from lossless have lossless1 [simp]: "\pk. lossless_gpv \_full (\\<^sub>1 pk)" and lossless2 [simp]: "\\ cipher. lossless_gpv \_full (\\<^sub>2 \ cipher)" by(auto simp add: ind_cpa.lossless_def) subsection \Recording adversary queries \label{section:ghop-first}\ text \ As can be seen in Fig.~\ref{fig:2}, both the adversary and the encryption of the challenge ciphertext use the random oracle. The reduction, however, returns only the queries that the adversary makes to the oracle (in Fig.~\ref{fig:3}, $h$ is generated independently of the random oracle). To bridge this gap, we introduce an @{term interceptor} between the adversary and the oracle that records all adversary's queries. \ define interceptor :: "'grp set \ 'grp \ (bitstring \ 'grp set, _, _) gpv" where "interceptor \ x = (do { h \ hash x; Done (h, insert x \) })" for \ x have %invisible [simp]: "lossless_gpv \_full (interceptor \ x)" for \ x by(simp add: interceptor_def) have %invisible [simp]: "lossless_gpv \_full (inline interceptor (\\<^sub>1 \) s)" for \ s by(rule lossless_inline[where \=\_full]) simp_all have %invisible [simp]: "lossless_gpv \_full (inline interceptor (\\<^sub>2 \ c) s)" for \ c s by(rule lossless_inline[where \=\_full]) simp_all text \ We integrate this interceptor into the @{term "ind_cpa.game"} using the @{term "inline"} function as illustrated in Fig.~\ref{fig:4} and name the result @{term "game\<^sub>0"}. \input{fig-4} \ define game\<^sub>0 where "game\<^sub>0 = TRY do { (pk, _) \ lift_spmf key_gen; (((m\<^sub>1, m\<^sub>2), \), s) \ inline interceptor (\\<^sub>1 pk) {}; assert_gpv (valid_plains m\<^sub>1 m\<^sub>2); b \ lift_spmf coin_spmf; c \ encrypt pk (if b then m\<^sub>1 else m\<^sub>2); (b', s') \ inline interceptor (\\<^sub>2 c \) s; Done (b' = b) } ELSE lift_spmf coin_spmf" text \ We claim that the above modifications do not affect the output of the IND-CPA game at all. This might seem obvious since we are only logging the adversary's queries without modifying them. However, in a formal proof, this needs to be precisely justified. More precisely, we have been very careful that the two games @{term "ind_cpa.game \"} and @{term game\<^sub>0} have identical structure. They differ only in that @{term game\<^sub>0} uses the adversary @{term "(\pk. inline interceptor (\\<^sub>1 pk) {}, \cipher \. inline interceptor (\\<^sub>2 cipher \))"} instead of @{term "\"}. The formal justification for this replacement happens in two steps: \begin{enumerate} \item We replace the oracle transformer @{term interceptor} with @{term id_oracle}, which merely passes queries and results to the oracle. \item Inlining the identity oracle transformer @{term id_oracle} does not change an adversary and can therefore be dropped. \end{enumerate} The first step is automated using Isabelle's Transfer package~\cite{Huffman2013}, which is based on Mitchell's representation independence~\cite{Mitchell1986}. The replacement is controlled by so-called transfer rules of the form @{term "R x y"} which indicates that @{term x} shall replace @{term y}; the correspondence relation @{term R} captures the kind of replacement. The @{method transfer} proof method then constructs a constraint system with one constraint for each atom in the proof goal where the correspondence relation and the replacement are unknown. It then tries to solve the constraint system using the rules that have been declared with the attribute \[transfer_rule]\. Atoms that do not have a suitable transfer rule are not changed and their correspondence relation is instantiated with the identity relation @{term "(=)"}. The second step is automated using Isabelle's simplifier. In the example, the crucial change happens in the state of the oracle transformer: @{term interceptor} records all queries in a set whereas @{term id_oracle} has no state, which is modelled with the singleton type @{typ unit}. To capture the change, we define the correspondence relation @{term cr} on the states of the oracle transformers. (As we are in the process of adding this state, this state is irrelevant and @{term cr} is therefore always true. We nevertheless have to make an explicit definition such that Isabelle does not automatically beta-reduce terms, which would confuse @{method transfer}.) We then prove that it relates the initial states and that @{term cr} is a bisimulation relation for the two oracle transformers; see @{cite Basin2017} for details. The bisimulation proof itself is automated, too: A bit of term rewriting (@{command "unfolding"}) makes the two oracle transformers structurally identical except for the state update function. Having proved that the state update function @{term "\_ \. \"} is a correct replacement for @{term "insert"} w.r.t. @{term cr}, the @{method transfer_prover} then lifts this replacement to the bisimulation rule. Here, @{method transfer_prover} is similar to @{method transfer} except that it works only for transfer rules and builds the constraint system only for the term to be replaced. The theory source of this tutorial contains a step-by-step proof to illustrate how transfer works. \ { define cr :: "unit \ 'grp set \ bool" where "cr \ \' = True" for \ \' have [transfer_rule]: "cr () {}" by(simp add: cr_def) \ \initial states\ have [transfer_rule]: "((=) ===> cr ===> cr) (\_ \. \) insert" \ \state update\ by(simp add: rel_fun_def cr_def) have [transfer_rule]: \ \@{term cr} is a bisimulation for the oracle transformers\ "(cr ===> (=) ===> rel_gpv (rel_prod (=) cr) (=)) id_oracle interceptor" unfolding interceptor_def[abs_def] id_oracle_def[abs_def] bind_gpv_Pause bind_rpv_Done by transfer_prover have "ind_cpa.game \ = game\<^sub>0" unfolding game\<^sub>0_def \ ind_cpa.game.simps by transfer (simp add: bind_map_gpv o_def ind_cpa.game.simps split_def) } note %invisible ind_cpa_game_eq_game\<^sub>0 = this { % invisible text \And now the same proof again, but step by step.\ define cr :: "unit \ 'grp set \ bool" where "cr \ \' = True" for \ \' have [transfer_rule]: "cr () {}" by(simp add: cr_def) \ \initial states\ have [transfer_rule]: "((=) ===> cr ===> cr) (\_ \. \) insert" \ \state update\ by(simp add: rel_fun_def cr_def) have [transfer_rule]: \ \@{term cr} is a bisimulation for the oracle transformers\ "(cr ===> (=) ===> rel_gpv (rel_prod (=) cr) (=)) id_oracle interceptor" text \1. Unfold the definitions of the oracle transformers and massage them to have the same structure.\ unfolding interceptor_def[abs_def] id_oracle_def[abs_def] bind_gpv_Pause bind_rpv_Done text \2. Build the constraint system for the second argument of the correspondence relation, namely @{term interceptor} with the rewrite rules applied.\ apply transfer_prover_start text \3. Solve the constraint system prolog-style by resolving with the rules from @{thm [source] transfer_raw}.\ apply transfer_step \ \This step uses the state update transfer rule proven in the previous @{command have}.\ apply transfer_step apply transfer_step apply transfer_step text \4. Check that the found solution is the expected one, namely the first argument of the correspondence relation, here @{term id_oracle} with the rewrite rules applied.\ apply transfer_prover_end done have "ind_cpa.game \ = game\<^sub>0" text \1. Unfold the definitions\ unfolding game\<^sub>0_def \ ind_cpa.game.simps text \2. Build the constraint system for the whole subgoal. The \fixing\ tells transfer that it must not replace the variables @{term "\"}, @{term "len_plain"}, @{term "\\<^sub>1"} and @{term "\\<^sub>2"}.\ apply transfer_start text \3. Solve all constraints with the transfer rules\ apply transfer_step+ apply transfer_end text \4. Get rid of the identity oracle transformer @{term id_oracle} by rewriting. Really curious readers may dare to look at the simplifer trace.\ supply [[simp_trace_new mode=full]] apply(simp add: bind_map_gpv o_def ind_cpa.game.simps split_def) done } subsection %visible \Equational program transformations\ text \ Before we move on, we need to simplify @{term game\<^sub>0} and inline a few of the definitions. All these simplifications are equational program transformations, so the Isabelle simplifier can justify them. We combine the @{term interceptor} with the random oracle @{term ro.oracle} into a new oracle @{term oracle'} with which the adversary interacts. \ define oracle' :: "'grp set \ ('grp \ bitstring) \ 'grp \ _" where "oracle' = (\(s, \) x. do { (h, \') \ case \ x of None \ do { bs \ spmf_of_set (nlists UNIV len_plain); return_spmf (bs, \(x \ bs)) } | Some bs \ return_spmf (bs, \); return_spmf (h, insert x s, \') })" have %invisible [simp]: "lossless_spmf (oracle' s plain)" for s plain by(simp add: oracle'_def Let_def split: prod.split option.split) have %invisible [simp]: "lossless_spmf (exec_gpv oracle' (\\<^sub>1 \) s)" for s \ by(rule lossless_exec_gpv[where \=\_full]) simp_all have %invisible [simp]: "lossless_spmf (exec_gpv oracle' (\\<^sub>2 \ cipher) s)" for \ cipher s by(rule lossless_exec_gpv[where \=\_full]) simp_all have *: "exec_gpv ro.oracle (inline interceptor \ s) \ = map_spmf (\(a, b, c). ((a, b), c)) (exec_gpv oracle' \ (s, \))" for \ \ s by(simp add: interceptor_def oracle'_def ro.oracle_def Let_def exec_gpv_inline exec_gpv_bind o_def split_def cong del: option.case_cong_weak) text \ We also want to inline the key generation and encryption algorithms, push the @{term "TRY DUMMY ELSE DUMMY :: (_, _, _) gpv"} towards the assertion (which is possible because the adversary is lossless by assumption), and rearrange the samplings a bit. The latter is automated using \monad_normalisation\~\cite{Schneider2017}.% \footnote{% The tool \monad_normalisation\ augments Isabelle's simplifier with a normalization procedure for commutative monads based on higher-order ordered rewriting. It can also commute across control structures like \if\ and \case\. Although it is not complete as a decision procedure (as the normal forms are not unique), it usually works in practice. } \ have game\<^sub>0: "run_gpv ro.oracle game\<^sub>0 ro.initial = do { x \ sample_uniform (order \); y \ sample_uniform (order \); b \ coin_spmf; (((msg1, msg2), \), (s, s_h)) \ exec_gpv oracle' (\\<^sub>1 (\<^bold>g [^] x)) ({}, ro.initial); TRY do { _ :: unit \ assert_spmf (valid_plains msg1 msg2); (h, s_h') \ ro.oracle s_h (\<^bold>g [^] (x * y)); let cipher = (\<^bold>g [^] y, h [\] (if b then msg1 else msg2)); (b', (s', s_h'')) \ exec_gpv oracle' (\\<^sub>2 cipher \) (s, s_h'); return_spmf (b' = b) } ELSE do { b \ coin_spmf; return_spmf b } }" including monad_normalisation by(simp add: game\<^sub>0_def key_gen_def encrypt_def * exec_gpv_bind bind_map_spmf assert_spmf_def try_bind_assert_gpv try_gpv_bind_lossless split_def o_def if_distribs lcdh.nat_pow_pow) text \ This call to Isabelle's simplifier may look complicated at first, but it can be constructed incrementally by adding a few theorems and looking at the resulting goal state and searching for suitable theorems using @{command find_theorems}. As always in Isabelle, some intuition and knowledge about the library of lemmas is crucial. \begin{itemize} \item We knew that the definitions @{thm [source] game\<^sub>0_def}, @{thm [source] key_gen_def}, and @{thm [source] encrypt_def} should be unfolded, so they are added first to the simplifier's set of rewrite rules. \item The equations @{thm [source] exec_gpv_bind}, @{thm [source] try_bind_assert_gpv}, and @{thm [source] try_gpv_bind_lossless} ensure that the operator @{term exec_gpv}, which connects the @{term game\<^sub>0} with the random oracle, is distributed over the sequencing. - Together with @{thm [source] * }, this gives the adversary access to @{term oracle'} instead of the interceptor and the random oracle, + Together with @{thm [source] *}, this gives the adversary access to @{term oracle'} instead of the interceptor and the random oracle, and makes the call to the random oracle in the encryption of the chosen message explicit. \item The theorem @{thm [source] lcdh.nat_pow_pow} rewrites the iterated exponentiation @{term "(\<^bold>g [^] x) [^] y"} to @{term "\<^bold>g [^] (x * y)"}. \item The other theorems @{thm [source] bind_map_spmf}, @{thm [source] assert_spmf_def}, @{thm [source] split_def}, @{thm [source] o_def}, and @{thm [source] if_distribs} take care of all the boilerplate code that makes all these transformations type-correct. These theorems often have to be used together. \end{itemize} Note that the state of the oracle @{term oracle'} is changed between @{term "\\<^sub>1"} and @{term "\\<^sub>2"}. Namely, the random oracle's part @{term s_h} may change when the chosen message is encrypted, but the state that records the adversary's queries @{term s} is passed on unchanged. \ text\\input{fig-5}\ subsection \Capturing a failure event\ text \ Suppose that two games behave the same except when a so-called failure event occurs @{cite Shoup2004IACR}. Then the chance of an adversary distinguishing the two games is bounded by the probability of the failure event. In other words, the simulation of the reduction is allowed to break if the failure event occurs. In the running example, such an argument is a key step to derive the bound on the adversary's advantage. But to reason about failure events, we must first introduce them into the games we consider. This is because in \CryptHOL{}, the probabilistic programs describe probability distributions over what they return (@{term return_spmf}). The variables that are used internally in the program are not accessible from the outside, i.e., there is no memory to which these are written. This has the advantage that we never have to worry about the names of the variables, e.g., to avoid clashes. The drawback is that we must explicitly introduce all the events that we are interested in. Introducing a failure event into a game is straightforward. So far, the games @{term ind_cpa.game} and @{term game\<^sub>0} simply denoted the probability distribution of whether the adversary has guessed right. For hashed Elgamal, the simulation breaks if the adversary queries the random oracle with the same query @{term "\<^bold>g [^] (x * y)"} that is used for encrypting the chosen message \m\<^sub>b\. So we simply change the return type of the game to return whether the adversary guessed right \emph{and} whether the failure event has occurred. The next definition @{term game\<^sub>1} does so. (Recall that @{term oracle'} stores in its first state component @{term s} the queries by the adversary.) In preparation of the next reasoning step, we also split off the first two samplings, namely of @{term x} and @{term y}, and make them parameters of @{term game\<^sub>1}. \ define game\<^sub>1 :: "nat \ nat \ (bool \ bool) spmf" where "game\<^sub>1 x y = do { b \ coin_spmf; (((m\<^sub>1, m\<^sub>2), \), (s, s_h)) \ exec_gpv oracle' (\\<^sub>1 (\<^bold>g [^] x)) ({}, ro.initial); TRY do { _ :: unit \ assert_spmf (valid_plains m\<^sub>1 m\<^sub>2); (h, s_h') \ ro.oracle s_h (\<^bold>g [^] (x * y)); let c = (\<^bold>g [^] y, h [\] (if b then m\<^sub>1 else m\<^sub>2)); (b', (s', s_h'')) \ exec_gpv oracle' (\\<^sub>2 c \) (s, s_h'); return_spmf (b' = b, \<^bold>g [^] (x * y) \ s') } ELSE do { b \ coin_spmf; return_spmf (b, \<^bold>g [^] (x * y) \ s) } }" for x y text \ It is easy to prove that @{term game\<^sub>0} combined with the random oracle is a projection of @{term game\<^sub>1} with the sampling added, as formalized in \game\<^sub>0_game\<^sub>1\. \ let ?sample = "\f :: nat \ nat \ _ spmf. do { x \ sample_uniform (order \); y \ sample_uniform (order \); f x y }" have game\<^sub>0_game\<^sub>1: "run_gpv ro.oracle game\<^sub>0 ro.initial = map_spmf fst (?sample game\<^sub>1)" by(simp add: game\<^sub>0 game\<^sub>1_def o_def split_def map_try_spmf map_scale_spmf) subsection \Game hop based on a failure event\ text \ A game hop based on a failure event changes one game into another such that they behave identically unless the failure event occurs. The @{thm [source] "fundamental_lemma"} bounds the absolute difference between the two games by the probability of the failure event. In the running example, we would like to avoid querying the random oracle when encrypting the chosen message. The next game @{term game\<^sub>2} is identical except that the call to the random oracle @{term ro.oracle} is replaced with sampling a random bitstring.% \footnote{% In Shoup's terminology @{cite Shoup2004IACR}, such a step makes (a gnome sitting inside) the random oracle forgetting the query. } \ define game\<^sub>2 :: "nat \ nat \ (bool \ bool) spmf" where "game\<^sub>2 x y = do { b \ coin_spmf; (((m\<^sub>1, m\<^sub>2), \), (s, s_h)) \ exec_gpv oracle' (\\<^sub>1 (\<^bold>g [^] x)) ({}, ro.initial); TRY do { _ :: unit \ assert_spmf (valid_plains m\<^sub>1 m\<^sub>2); h \ spmf_of_set (nlists UNIV len_plain); \ \We do not query the random oracle for @{term \\<^bold>g [^] (x * y)\}, but instead sample a random bitstring @{term h} directly. So the rest differs from @{term game\<^sub>1} only if the adversary queries @{term \\<^bold>g [^] (x * y)\}.\ let cipher = (\<^bold>g [^] y, h [\] (if b then m\<^sub>1 else m\<^sub>2)); (b', (s', s_h')) \ exec_gpv oracle' (\\<^sub>2 cipher \) (s, s_h); return_spmf (b' = b, \<^bold>g [^] (x * y) \ s') } ELSE do { b \ coin_spmf; return_spmf (b, \<^bold>g [^] (x * y) \ s) } }" for x y text \ To apply the @{thm [source] fundamental_lemma}, we first have to prove that the two games are indeed the same except when the failure event occurs. \ have "rel_spmf (\(win, bad) (win', bad'). bad = bad' \ (\ bad' \ win = win')) (game\<^sub>2 x y) (game\<^sub>1 x y)" for x y proof - text \ This proof requires two invariants on the state of @{term oracle'}. First, @{term "s = dom s_h"}. Second, @{term s} only becomes larger. The next two statements capture the two invariants: \ interpret inv_oracle': callee_invariant_on "oracle'" "(\(s, s_h). s = dom s_h)" \_full by unfold_locales(auto simp add: oracle'_def split: option.split_asm if_split) interpret bad: callee_invariant_on "oracle'" "(\(s, _). z \ s)" \_full for z by unfold_locales(auto simp add: oracle'_def) text \ First, we identify a bisimulation relation \?X\ between the different states of @{term oracle'} for the second phase of the game. Namely, the invariant @{term "s = dom s_h"} holds, the set of queries are the same, and the random oracle's state (a map from queries to responses) differs only at the point @{term "\<^bold>g [^] (x * y)"}. \ let ?X = "\(s, s_h) (s', s_h'). s = dom s_h \ s' = s \ s_h = s_h'(\<^bold>g [^] (x * y) := None)" text \ Then, we can prove that @{term [source] "?X"} really is a bisimulation for @{term oracle'} except when the failure event occurs. The next statement expresses this. \ let ?bad = "\(s, s_h). \<^bold>g [^] (x * y) \ s" let ?R = "(\(a, s1') (b, s2'). ?bad s1' = ?bad s2' \ (\ ?bad s2' \ a = b \ ?X s1' s2'))" have bisim: "rel_spmf ?R (oracle' s1 plain) (oracle' s2 plain)" if "?X s1 s2" for s1 s2 plain using that by(auto split: prod.splits intro!: rel_spmf_bind_reflI simp add: oracle'_def rel_spmf_return_spmf2 fun_upd_twist split: option.split dest!: fun_upd_eqD) have inv: "callee_invariant oracle' ?bad" \ \Once the failure event has happened, it will not be forgotten any more.\ by(unfold_locales)(auto simp add: oracle'_def split: option.split_asm) text \ Now we are ready to prove that the two games @{term game\<^sub>1} and @{term game\<^sub>2} are sufficiently similar. The Isar proof now switches into an @{command apply} script that manipulates the goal state directly. This is sometimes convenient when it would be too cumbersome to spell out every intermediate goal state. \ show ?thesis unfolding game\<^sub>1_def game\<^sub>2_def \ \Peel off the first phase of the game using the structural decomposition rules @{thm [source] rel_spmf_bind_reflI} and @{thm [source] rel_spmf_try_spmf}.\ apply(clarsimp intro!: rel_spmf_bind_reflI simp del: bind_spmf_const) apply(rule rel_spmf_try_spmf) subgoal TRY for b m\<^sub>1 m\<^sub>2 \ s s_h apply(rule rel_spmf_bind_reflI) \ \Exploit that in the first phase of the game, the set @{term s} of queried strings and the map of the random oracle @{term s_h} are updated in lock step, i.e., @{term "s = dom s_h"}.\ apply(drule inv_oracle'.exec_gpv_invariant; clarsimp) \ \Has the adversary queried the random oracle with @{term "\<^bold>g [^] (x * y)"} during the first phase?\ apply(cases "\<^bold>g [^] (x * y) \ s") subgoal True \ \Then the failure event has already happened and there is nothing more to do. We just have to prove that the two games on both sides terminate with the same probability.\ by(auto intro!: rel_spmf_bindI1 rel_spmf_bindI2 lossless_exec_gpv[where \=\_full] dest!: bad.exec_gpv_invariant) subgoal False \ \Then let's see whether the adversary queries @{term "\<^bold>g [^] (x * y)"} in the second phase. Thanks to @{thm [source] ro.fresh}, the call to the random oracle simplifies to sampling a random bitstring.\ apply(clarsimp iff del: domIff simp add: domIff ro.fresh intro!: rel_spmf_bind_reflI) apply(rule rel_spmf_bindI[where R="?R"]) \ \The lemma @{thm [source] exec_gpv_oracle_bisim_bad_full} lifts the bisimulation for @{term oracle'} to the adversary @{term \\<^sub>2} interacting with @{term oracle'}.\ apply(rule exec_gpv_oracle_bisim_bad_full[OF _ _ bisim inv inv]) apply(auto simp add: fun_upd_idem) done done subgoal ELSE by(rule rel_spmf_reflI) clarsimp done qed text \ Now we can add the sampling of @{term x} and @{term y} in front of @{term game\<^sub>1} and @{term game\<^sub>2}, apply the @{thm [source] fundamental_lemma}. \ hence "rel_spmf (\(win, bad) (win', bad'). (bad \ bad') \ (\ bad' \ win \ win')) (?sample game\<^sub>2) (?sample game\<^sub>1)" by(intro rel_spmf_bind_reflI) hence "\measure (measure_spmf (?sample game\<^sub>2)) {(win, _). win} - measure (measure_spmf (?sample game\<^sub>1)) {(win, _). win}\ \ measure (measure_spmf (?sample game\<^sub>2)) {(_, bad). bad}" unfolding split_def by(rule fundamental_lemma) moreover text \ The @{thm [source] fundamental_lemma} is written in full generality for arbitrary events, i.e., sets of elementary events. But in this formalization, the events of interest (correct guess and failure) are elementary events. We therefore transform the above statement to measure the probability of elementary events using @{term spmf}. \ have "measure (measure_spmf (?sample game\<^sub>2)) {(win, _). win} = spmf (map_spmf fst (?sample game\<^sub>2)) True" and "measure (measure_spmf (?sample game\<^sub>1)) {(win, _). win} = spmf (map_spmf fst (?sample game\<^sub>1)) True" and "measure (measure_spmf (?sample game\<^sub>2)) {(_, bad). bad} = spmf (map_spmf snd (?sample game\<^sub>2)) True" unfolding spmf_conv_measure_spmf measure_map_spmf by(auto simp add: vimage_def split_def) ultimately have hop12: "\spmf (map_spmf fst (?sample game\<^sub>2)) True - spmf (map_spmf fst (?sample game\<^sub>1)) True\ \ spmf (map_spmf snd (?sample game\<^sub>2)) True" by simp subsection \Optimistic sampling: the one-time-pad \label{section:ghop-last}\ text \ This step is based on the one-time-pad, which is an instance of optimistic sampling. If two runs of the two games in an optimistic sampling step would use the same random bits, then their results would be different. However, if the adversary's choices are independent of the random bits, we may relate runs that use different random bits, as in the end, only the probabilities have to match. The previous game hop from @{term game\<^sub>1} to @{term game\<^sub>2} made the oracle's responses in the second phase independent from the encrypted ciphertext. So we can now change the bits used for encrypting the chosen message and thereby make the ciphertext independent of the message. To that end, we parametrize @{term game\<^sub>2} by the part that does the optimistic sampling and call this parametrized version @{term game\<^sub>3}. \ define game\<^sub>3 :: "(bool \ bitstring \ bitstring \ bitstring spmf) \ nat \ nat \ (bool \ bool) spmf" where "game\<^sub>3 f x y = do { b \ coin_spmf; (((m\<^sub>1, m\<^sub>2), \), (s, s_h)) \ exec_gpv oracle' (\\<^sub>1 (\<^bold>g [^] x)) ({}, ro.initial); TRY do { _ :: unit \ assert_spmf (valid_plains m\<^sub>1 m\<^sub>2); h' \ f b m\<^sub>1 m\<^sub>2; let cipher = (\<^bold>g [^] y, h'); (b', (s', s_h')) \ exec_gpv oracle' (\\<^sub>2 cipher \) (s, s_h); return_spmf (b' = b, \<^bold>g [^] (x * y) \ s') } ELSE do { b \ coin_spmf; return_spmf (b, \<^bold>g [^] (x * y) \ s) } }" for f x y text \ Clearly, if we plug in the appropriate function \?f\, then we get @{term game\<^sub>2}: \ let ?f = "\b m\<^sub>1 m\<^sub>2. map_spmf (\h. (if b then m\<^sub>1 else m\<^sub>2) [\] h) (spmf_of_set (nlists UNIV len_plain))" have game\<^sub>2_game\<^sub>3: "game\<^sub>2 x y = game\<^sub>3 ?f x y" for x y by(simp add: game\<^sub>2_def game\<^sub>3_def Let_def bind_map_spmf xor_list_commute o_def) text \ CryptHOL's @{thm [source] one_time_pad} lemma now allows us to remove the exclusive or with the chosen message, because the resulting distributions are the same. The proof is slightly non-trivial because the one-time-pad lemma holds only if the xor'ed bitstrings have the right length, which the assertion @{term valid_plains} ensures. The congruence rules @{thm [source] try_spmf_cong bind_spmf_cong[OF refl] if_cong[OF refl]} extract this information from the program of the game. \ let ?f' = "\b m\<^sub>1 m\<^sub>2. spmf_of_set (nlists UNIV len_plain)" have game\<^sub>3: "game\<^sub>3 ?f x y = game\<^sub>3 ?f' x y" for x y by(auto intro!: try_spmf_cong bind_spmf_cong[OF refl] if_cong[OF refl] simp add: game\<^sub>3_def split_def one_time_pad valid_plains_def simp del: map_spmf_of_set_inj_on bind_spmf_const split: if_split) text \ The rest of the proof consists of simplifying @{term [source] "game\<^sub>3 ?f'"}. The steps are similar to what we have shown before, so we do not explain them in detail. The interested reader can look at them in the theory file from which this document was generated. At a high level, we see that there is no need to track the adversary's queries in @{term game\<^sub>2} or @{term game\<^sub>3} any more because this information is already stored in the random oracle's state. So we change the @{term oracle'} back into @{term ro.oracle} using the Transfer package. With a bit of rewriting, the result is then the @{term lcdh.game} for the adversary @{term "elgamal_reduction \"}. Moreover, the guess @{term b'} of the adversary is independent of @{term b} in @{term [source] "game\<^sub>3 ?f"}, so the first boolean returned by @{term [source] "game\<^sub>3 ?f'"} is just a coin flip. \ have game\<^sub>3_bad: "map_spmf snd (?sample (game\<^sub>3 ?f')) = lcdh.game (elgamal_reduction \)" proof %invisible - define bisim where "bisim = (\\ (s :: 'grp set, \' :: 'grp \ bitstring). s = dom \ \ \ = \')" have [transfer_rule]: "bisim Map_empty ({}, Map_empty)" by(simp add: bisim_def) have [transfer_rule]: "(bisim ===> (=) ===> rel_spmf (rel_prod (=) bisim)) ro.oracle oracle'" by(auto simp add: oracle'_def split_def spmf_rel_map ro.oracle_def rel_fun_def bisim_def split: option.split intro!: rel_spmf_bind_reflI) have * [transfer_rule]: "(bisim ===> (=)) dom fst" by(simp add: bisim_def rel_fun_def) have * [transfer_rule]: "(bisim ===> (=)) (\x. x) snd" by(simp add: rel_fun_def bisim_def) have "game\<^sub>3 ?f' x y = do { b \ coin_spmf; (((msg1, msg2), \), s) \ exec_gpv ro.oracle (\\<^sub>1 (\<^bold>g [^] x)) ro.initial; TRY do { _ :: unit \ assert_spmf (valid_plains msg1 msg2); h' \ spmf_of_set (nlists UNIV len_plain); let cipher = (\<^bold>g [^] y, h'); (b', s') \ exec_gpv ro.oracle (\\<^sub>2 cipher \) s; return_spmf (b' = b, \<^bold>g [^] (x * y) \ dom s') } ELSE do { b \ coin_spmf; return_spmf (b, \<^bold>g [^] (x * y) \ dom s) } }" for x y unfolding game\<^sub>3_def Map_empty_def[symmetric] split_def fst_conv snd_conv prod.collapse by(transfer fixing: \ len_plain x y) simp moreover have "map_spmf snd (\ x y) = do { zs \ elgamal_reduction \ (\<^bold>g [^] x) (\<^bold>g [^] y); return_spmf (\<^bold>g [^] (x * y) \ zs) }" for x y by(simp add: o_def split_def map_try_spmf map_scale_spmf) (simp add: o_def map_try_spmf map_scale_spmf map_spmf_conv_bind_spmf[symmetric] spmf.map_comp map_const_spmf_of_set) ultimately show ?thesis by(simp add: o_def lcdh.game_def Let_def lcdh.nat_pow_pow) qed have game\<^sub>3_guess: "map_spmf fst (game\<^sub>3 ?f' x y) = coin_spmf" for x y proof %invisible - have "map_spmf fst (game\<^sub>3 ?f' x y) = do { (((msg1, msg2), \), (s, s_h)) \ exec_gpv oracle' (\\<^sub>1 (\<^bold>g [^] x)) ({}, ro.initial); TRY do { _ :: unit \ assert_spmf (valid_plains msg1 msg2); h' \ spmf_of_set (nlists UNIV len_plain); (b', (s', s_h')) \ exec_gpv oracle' (\\<^sub>2 (\<^bold>g [^] y, h') \) (s, s_h); map_spmf ((=) b') coin_spmf } ELSE coin_spmf }" for x y including monad_normalisation by(simp add: game\<^sub>3_def o_def split_def map_spmf_conv_bind_spmf try_spmf_bind_out weight_spmf_le_1 scale_bind_spmf try_spmf_bind_out1 bind_scale_spmf) then show ?thesis by(simp add: o_def if_distribs spmf.map_comp map_eq_const_coin_spmf split_def) qed subsection \Combining several game hops \label{section:combining:hops}\ text \ Finally, we combine all the (in)equalities of the previous steps to obtain the desired bound using the lemmas for reasoning about reals from Isabelle's library. \ have "ind_cpa.advantage (ro.oracle, ro.initial) \ = \spmf (map_spmf fst (?sample game\<^sub>1)) True - 1 / 2\" using ind_cpa_game_eq_game\<^sub>0 by(simp add: game\<^sub>0_game\<^sub>1 o_def) also have "\ = \1 / 2 - spmf (map_spmf fst (?sample game\<^sub>1)) True\" by(simp add: abs_minus_commute) also have "1 / 2 = spmf (map_spmf fst (?sample game\<^sub>2)) True" by(simp add: game\<^sub>2_game\<^sub>3 game\<^sub>3 o_def game\<^sub>3_guess spmf_of_set) also have "\\ - spmf (map_spmf fst (?sample game\<^sub>1)) True\ \ spmf (map_spmf snd (?sample game\<^sub>2)) True" by(rule hop12) also have "\ = lcdh.advantage (elgamal_reduction \)" by(simp add: game\<^sub>2_game\<^sub>3 game\<^sub>3 game\<^sub>3_bad lcdh.advantage_def o_def del: map_bind_spmf) finally show ?thesis . text \ This completes the concrete proof and we can end the locale @{locale hashed_elgamal}. \ qed end section \Asymptotic security \label{section:asymptotic}\ text \ An asymptotic security statement can be easily derived from a concrete security theorem. This is done in two steps: First, we have to introduce a security parameter @{term \} into the definitions and assumptions. Only then can we state asymptotic security. The proof is easy given the concrete security theorem. \ subsection \Introducing a security parameter\ text \ Since all our definitions were done in locales, it is easy to introduce a security parameter after the fact. To that end, we define copies of all locales where their parameters now take the security parameter as an additional argument. We illustrate it for the locale @{locale ind_cpa_pk}. The @{command sublocale} command brings all the definitions and theorems of the original @{locale ind_cpa_pk} into the copy and adds the security parameter where necessary. The type @{typ security} is a synonym for @{typ nat}. \ locale ind_cpa_pk' = fixes key_gen :: "security \ ('pubkey \ 'privkey, 'query, 'response) gpv" and encrypt :: "security \ 'pubkey \ 'plain \ ('cipher, 'query, 'response) gpv" and decrypt :: "security \ 'privkey \ 'cipher \ ('plain, 'query, 'response) gpv" and valid_plains :: "security \ 'plain \ 'plain \ bool" begin sublocale ind_cpa_pk "key_gen \" "encrypt \" "decrypt \" "valid_plains \" for \ . end locale %invisible list_cdh' = fixes \ :: "security \ 'grp cyclic_group" assumes cyclic_group: "cyclic_group (\ \)" begin sublocale %invisible cyclic_group "\ \" for \ by(fact cyclic_group) end %invisible locale %invisible random_oracle' = fixes len :: "security \ nat" begin sublocale %invisible random_oracle "len \" for \ . end %invisible lemma %invisible (in ind_cpa_pk) advantage_nonneg: "advantage oracle \ \ 0" by(cases "oracle"; simp) lemma %invisible (in list_cdh) advantage_nonneg: "advantage \ \ 0" by(simp add: advantage_def) text \ We do so similarly for @{locale list_cdh}, @{locale random_oracle}, and @{locale hashed_elgamal}. \ locale hashed_elgamal' = lcdh: list_cdh' \ + ro: random_oracle' len_plain for \ :: "security \ 'grp cyclic_group" and len_plain :: "security \ nat" begin sublocale hashed_elgamal "\ \" "len_plain \" for \ .. subsection \Asymptotic security statements \label{section:security:asymptotic}\ text \ For asymptotic security statements, \CryptHOL{} defines the predicate @{term negligible}. It states that the given real-valued function approaches 0 faster than the inverse of any polynomial. A concrete security statement translates into an asymptotic one as follows: \begin{itemize} \item All advantages in the bound become negligibility assumptions. \item All side conditions of the concrete security theorems remain assumptions, but wrapped into an @{term eventually} statement. This expresses that the side condition holds eventually, i.e., there is a security parameter from which on it holds. \item The conclusion is that the bounded advantage is @{term negligible}. \end{itemize} \ theorem asymptotic_security_elgamal: assumes "negligible (\\. lcdh.advantage \ (elgamal_reduction \ (\ \)))" and "eventually (\\. ind_cpa.lossless (\ \)) at_top" shows "negligible (\\. ind_cpa.advantage \ (ro.oracle \, ro.initial) (\ \))" text \ The proof is canonical, too: Using the lemmas about @{term negligible} and Eberl's library for asymptotic reasoning~\cite{Eberl2015}, we transform the asymptotic statement into a concrete one and then simply use the concrete security statement. \ apply(rule negligible_mono[OF assms(1)]) apply(rule landau_o.big_mono) apply(rule eventually_rev_mp[OF assms(2)]) apply(intro eventuallyI impI) apply(simp del: ind_cpa.advantage.simps add: ind_cpa.advantage_nonneg lcdh.advantage_nonneg) by(rule concrete_security_elgamal) end end %invisible diff --git a/thys/Isabelle_C/C11-FrontEnd/examples/C2.thy b/thys/Isabelle_C/C11-FrontEnd/examples/C2.thy --- a/thys/Isabelle_C/C11-FrontEnd/examples/C2.thy +++ b/thys/Isabelle_C/C11-FrontEnd/examples/C2.thy @@ -1,896 +1,896 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) chapter \Appendix IV : Examples for Annotation Navigation and Context Serialization\ theory C2 imports "../C_Main" "HOL-ex.Cartouche_Examples" begin text \ Operationally, the \<^theory_text>\C\ command can be thought of as behaving as the \<^theory_text>\ML\ command, where it is for example possible to recursively nest C code in C. Generally, the present chapter assumes a familiarity with all advance concepts of ML as described in \<^file>\~~/src/HOL/Examples/ML.thy\, as well as the concept of ML antiquotations (\<^file>\~~/src/Doc/Implementation/ML.thy\). However, even if\<^theory_text>\C\ might resemble to \<^theory_text>\ML\, we will now see in detail that there are actually subtle differences between the two commands.\ section \Setup of ML Antiquotations Displaying the Environment (For Debugging) \ ML\ fun print_top make_string f _ (_, (value, _, _)) _ = tap (fn _ => writeln (make_string value)) o f fun print_top' _ f _ _ env = tap (fn _ => writeln ("ENV " ^ C_Env.string_of env)) o f fun print_stack s make_string stack _ _ thy = let val () = Output.information ("SHIFT " ^ (case s of NONE => "" | SOME s => "\"" ^ s ^ "\" ") ^ Int.toString (length stack - 1) ^ " +1 ") val () = stack |> split_list |> #2 |> map_index I |> app (fn (i, (value, pos1, pos2)) => writeln (" " ^ Int.toString (length stack - i) ^ " " ^ make_string value ^ " " ^ Position.here pos1 ^ " " ^ Position.here pos2)) in thy end fun print_stack' s _ stack _ env thy = let val () = Output.information ("SHIFT " ^ (case s of NONE => "" | SOME s => "\"" ^ s ^ "\" ") ^ Int.toString (length stack - 1) ^ " +1 ") val () = writeln ("ENV " ^ C_Env.string_of env) in thy end \ setup \ML_Antiquotation.inline @{binding print_top} (Args.context >> K ("print_top " ^ ML_Pretty.make_string_fn ^ " I"))\ setup \ML_Antiquotation.inline @{binding print_top'} (Args.context >> K ("print_top' " ^ ML_Pretty.make_string_fn ^ " I"))\ setup \ML_Antiquotation.inline @{binding print_stack} - (Scan.peek (fn _ => Scan.option Args.text) + (Scan.peek (fn _ => Scan.option Parse.embedded) >> (fn name => ("print_stack " ^ (case name of NONE => "NONE" | SOME s => "(SOME \"" ^ s ^ "\")") ^ " " ^ ML_Pretty.make_string_fn)))\ setup \ML_Antiquotation.inline @{binding print_stack'} - (Scan.peek (fn _ => Scan.option Args.text) + (Scan.peek (fn _ => Scan.option Parse.embedded) >> (fn name => ("print_stack' " ^ (case name of NONE => "NONE" | SOME s => "(SOME \"" ^ s ^ "\")") ^ " " ^ ML_Pretty.make_string_fn)))\ declare[[C_lexer_trace]] section \Introduction to C Annotations: Navigating in the Parsing Stack\ subsection \Basics\ text \ Since the present theory \<^file>\C1.thy\ is depending on \<^theory>\Isabelle_C.C_Lexer_Language\ and \<^theory>\Isabelle_C.C_Parser_Language\, the syntax one is writing in the \<^theory_text>\C\ command is C11. Additionally, \<^file>\C1.thy\ also depends on \<^theory>\Isabelle_C.C_Parser_Annotation\, making it possible to write commands in C comments, called annotation commands, such as \<^theory_text>\\setup\. \ C \ \Nesting ML code in C comments\ \ int a = (((0))); /*@ highlight */ /*@ \setup \@{print_stack}\ */ /*@ \setup \@{print_top}\ */ \ text \ In terms of execution order, nested annotation commands are not pre-filtered out of the C code, but executed when the C code is still being parsed. Since the parser implemented is a LALR parser \<^footnote>\\<^url>\https://en.wikipedia.org/wiki/LALR\\, C tokens are uniquely read and treated from left to right. Thus, each nested command is (supposed by default to be) executed when the parser has already read all C tokens before the comment associated to the nested command, so when the parser is in a particular intermediate parsing step (not necessarily final) \<^footnote>\\<^url>\https://en.wikipedia.org/wiki/Shift-reduce_parser\\. \ text \The command \<^theory_text>\\setup\ is similar to the command \<^theory_text>\setup\ except that the former takes a function with additional arguments. These arguments are precisely depending on the current parsing state. To better examine these arguments, it is convenient to use ML antiquotations (be it for printing, or for doing any regular ML actions like PIDE reporting). Note that, in contrast with \<^theory_text>\setup\, the return type of the \<^theory_text>\\setup\ function is not \<^ML_type>\theory -> theory\ but \<^ML_type>\Context.generic -> Context.generic\. \ C \ \Positional navigation: referring to any previous parsed sub-tree in the stack\ \ int a = (((0 + 5))) /*@@ \setup \print_top @{make_string} I\ @ highlight */ * 4; float b = 7 / 3; \ text \The special \@\ symbol makes the command be executed whenever the first element \E\ in the stack is about to be irremediably replaced by a more structured parent element (having \E\ as one of its direct children). It is the parent element which is provided to the ML code. Instead of always referring to the first element of the stack, \N\ consecutive occurrences of \@\ will make the ML code getting as argument the direct parent of the \N\-th element.\ C \ \Positional navigation: referring to any previous parsed sub-tree in the stack\ \ int a = (((0 + 5))) /*@@ highlight */ * 4; int a = (((0 + 5))) /*@& highlight */ * 4; int a = (((0 + 5))) /*@@@@@ highlight */ * 4; int a = (((0 + 5))) /*@&&&& highlight */ * 4; \ text \\&\ behaves as \@\, but instead of always giving the designated direct parent to the ML code, it finds the first parent ancestor making non-trivial changes in the respective grammar rule (a non-trivial change can be for example the registration of the position of the current AST node being built).\ C \ \Positional navigation: moving the comment after a number of C token\ \ int b = 7 / (3) * 50; /*@+++@@ highlight */ long long f (int a) { while (0) { return 0; } } int b = 7 / (3) * 50; \ text \\N\ consecutive occurrences of \+\ will delay the interpretation of the comment, which is ignored at the place it is written. The comment is only really considered after the C parser has treated \N\ more tokens.\ C \ \Closing C comments \*/\ must close anything, even when editing ML code\ \ int a = (((0 //@ (* inline *) \setup \fn _ => fn _ => fn _ => fn context => let in (* */ *) context end\ /*@ \setup \(K o K o K) I\ (* * / *) */ ))); \ C \ \Inline comments with antiquotations\ \ /*@ \setup\(K o K o K) (fn x => K x @{con\ text (**)})\ */ // break of line activated everywhere (also in antiquotations) int a = 0; //\ @ \setup\(K o K o K) (fn x => K x @{term \a \ + b\ (* (**) *\ \ )})\ \ subsection \Erroneous Annotations Treated as Regular C Comments\ C \ \Permissive Types of Antiquotations\ \ int a = 0; /*@ \setup (* Errors: Explicit warning + Explicit markup reporting *) */ /** \setup (* Errors: Turned into tracing report information *) */ /** \setup \fn _ => fn _ => fn _ => I\ (* An example of correct syntax accepted as usual *) */ \ C \ \Permissive Types of Antiquotations\ \ int a = 0; /*@ \setup \fn _ => fn _ => fn _ => I\ \setup (* Parsing error of a single command does not propagate to other commands *) \setup \fn _ => fn _ => fn _ => I\ context */ /** \setup \fn _ => fn _ => fn _ => I\ \setup (* Parsing error of a single command does not propagate to other commands *) \setup \fn _ => fn _ => fn _ => I\ context */ /*@ \setup (* Errors in all commands are all rendered *) \setup (* Errors in all commands are all rendered *) \setup (* Errors in all commands are all rendered *) */ /** \setup (* Errors in all commands makes the whole comment considered as an usual comment *) \setup (* Errors in all commands makes the whole comment considered as an usual comment *) \setup (* Errors in all commands makes the whole comment considered as an usual comment *) */ \ subsection \Bottom-Up vs. Top-Down Evaluation\ ML\ structure Example_Data = Generic_Data(type T = string list val empty = [] val merge = K empty) fun add_ex s1 s2 = Example_Data.map (cons s2) #> (fn context => let val () = Output.information (s1 ^ s2) val () = app (fn s => writeln (" Data content: " ^ s)) (Example_Data.get context) in context end) \ setup \Context.theory_map (Example_Data.put [])\ declare[[ML_source_trace]] declare[[C_parser_trace]] C \ \Arbitrary interleaving of effects: \\setup\ vs \\setup\\\ \ int b,c,d/*@@ \setup \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "3_print_top"\ */,e = 0; /*@@ \setup \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "4_print_top"\ */ int b,c,d/*@@ \setup\ \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "6_print_top"\ */,e = 0; /*@@ \setup\ \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "5_print_top"\ */ \ section \Reporting of Positions and Contextual Update of Environment\ text \ To show the content of the parsing environment, the ML antiquotations \print_top'\ and \print_stack'\ will respectively be used instead of \print_top\ and \print_stack\. This example suite allows to explore the bindings represented in the C environment and made accessible in PIDE for hovering. \ subsection \Reporting: \typedef\, \enum\\ (*\struct\*) declare [[ML_source_trace = false]] declare [[C_lexer_trace = false]] C \ \Reporting of Positions\ \ typedef int i, j; /*@@ \setup \@{print_top'}\ @highlight */ //@ +++++@ \setup \@{print_top'}\ +++++@highlight int j = 0; typedef int i, j; j jj1 = 0; j jj = jj1; j j = jj1 + jj; typedef i j; typedef i j; typedef i j; i jj = jj; j j = jj; \ C \ \Nesting type definitions\ \ typedef int j; j a = 0; typedef int k; int main (int c) { j b = 0; typedef int k; typedef k l; k a = c; l a = 0; } k a = a; \ C \ \Reporting \enum\\ \ enum a b; // bound case: undeclared enum a {aaa}; // define case enum a {aaa}; // define case: redefined enum a _; // bound case __thread (f ( enum a, enum a vv)); enum a /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.function_definition4\\*/ f (enum a a) { } __thread enum a /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.declaration_specifier2\\*/ f (enum a a) { enum c {ccc}; // define case __thread enum c f (enum c a) { return 0; } enum c /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.nested_function_definition2\\*/ f (enum c a) { return 0; } return 0; } enum z {zz}; // define case int main (enum z *x) /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.parameter_type_list2\\*/ { return zz; } int main (enum a *x, ...) /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.parameter_type_list3\\*/ { return zz; } \ subsection \Continuation Calculus with the C Environment: Presentation in ML\ declare [[C_parser_trace = false]] ML\ val C = C_Module.C val C' = C_Module.C' \ C \ \Nesting C code without propagating the C environment\ \ int a = 0; int b = 7 / (3) * 50 /*@@@@@ \setup \fn _ => fn _ => fn _ => C \int b = a + a + a + a + a + a + a ;\ \ */; \ C \ \Nesting C code and propagating the C environment\ \ int a = 0; int b = 7 / (3) * 50 /*@@@@@ \setup \fn _ => fn _ => fn env => C' env \int b = a + a + a + a + a + a + a ;\ \ */; \ subsection \Continuation Calculus with the C Environment: Presentation with Outer Commands\ ML\ val _ = Theory.setup (C_Inner_Syntax.command0 (fn src => fn context => C' (C_Stack.Data_Lang.get' context |> #2) src context) C_Parse.C_source ("C'", \<^here>, \<^here>, \<^here>)) \ C \ \Nesting C code without propagating the C environment\ \ int f (int a) { int b = 7 / (3) * 50 /*@ C \int b = a + a + a + a + a + a + a;\ */; int c = b + a + a + a + a + a + a; } \ C \ \Nesting C code and propagating the C environment\ \ int f (int a) { int b = 7 / (3) * 50 /*@ C' \int b = a + a + a + a + a + a + a;\ */; int c = b + b + b + b + a + a + a + a + a + a; } \ C \ \Miscellaneous\ \ int f (int a) { int b = 7 / (3) * 50 /*@ C \int b = a + a + a + a + a; //@ C' \int c = b + b + b + b + a;\ \ */; int b = 7 / (3) * 50 /*@ C' \int b = a + a + a + a + a; //@ C' \int c = b + b + b + b + a;\ \ */; int c = b + b + b + b + a + a + a + a + a + a; } \ subsection \Continuation Calculus with the C Environment: Deep-First Nesting vs Breadth-First Folding: Propagation of \<^ML_type>\C_Env.env_lang\\ C \ \Propagation of report environment while manually composing at ML level (with \#>\)\ \ \In \c1 #> c2\, \c1\ and \c2\ should not interfere each other.\ \ //@ ML \fun C_env src _ _ env = C' env src\ int a; int f (int b) { int c = 0; /*@ \setup \fn _ => fn _ => fn env => C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ \ */ int e = a + b + c + d; }\ C \ \Propagation of directive environment (evaluated before parsing) to any other annotations (evaluated at parsing time)\ \ #undef int #define int(a,b) int #define int int int a; int f (int b) { int c = 0; /*@ \setup \fn _ => fn _ => fn env => C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ \ */ #undef int int e = a + b + c + d; } \ subsection \Continuation Calculus with the C Environment: Deep-First Nesting vs Breadth-First Folding: Propagation of \<^ML_type>\C_Env.env_tree\\ ML\ structure Data_Out = Generic_Data (type T = int val empty = 0 val merge = K empty) fun show_env0 make_string f msg context = Output.information ("(" ^ msg ^ ") " ^ make_string (f (Data_Out.get context))) val show_env = tap o show_env0 @{make_string} I \ setup \Context.theory_map (C_Module.Data_Accept.put (fn _ => fn _ => Data_Out.map (fn x => x + 1)))\ C \ \Propagation of Updates\ \ typedef int i, j; int j = 0; typedef int i, j; j jj1 = 0; j jj = jj1; /*@@ \setup \fn _ => fn _ => fn _ => show_env "POSITION 0"\ @\setup \@{print_top'}\ */ typedef int k; /*@@ \setup \fn _ => fn _ => fn env => C' env \k jj = jj; //@@ \setup \@{print_top'}\ k jj = jj + jj1; typedef k l; //@@ \setup \@{print_top'}\\ #> show_env "POSITION 1"\ */ j j = jj1 + jj; //@@ \setup \@{print_top'}\ typedef i j; /*@@ \setup \fn _ => fn _ => fn _ => show_env "POSITION 2"\ */ typedef i j; typedef i j; i jj = jj; j j = jj; \ ML\show_env "POSITION 3" (Context.Theory @{theory})\ setup \Context.theory_map (C_Module.Data_Accept.put (fn _ => fn _ => I))\ subsection \Reporting: Scope of Recursive Functions\ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = last]] C \ \Propagation of Updates\ \ int a = 0; int b = a * a + 0; int jjj = b; int main (void main(int *x,int *y),int *jjj) { return a + jjj + main(); } int main2 () { int main3 () { main2() + main(); } int main () { main2() + main(); } return a + jjj + main3() + main(); } \ C \ int main3 () { main2 (); } \ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = empty]] subsection \Reporting: Extensions to Function Types, Array Types\ C \int f (int z);\ C \int * f (int z);\ C \int (* f) (int z /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.declarator1\\*/);\ C \typedef int (* f) (int z);\ C \int f (int z) {}\ C \int * f (int z) {return z;}\ C \int ((* f) (int z1, int z2)) {return z1 + z2;}\ C \int (* (* f) (int z1, int z2)) {return z1 + z2;}\ C \typedef int (* f) (int z); f uuu (int b) {return b;};\ C \typedef int (* (* f) (int z, int z)) (int a); f uuu (int b) {return b;};\ C \struct z { int (* f) (int z); int (* (* ff) (int z)) (int a); };\ C \double (* (* f (int a /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.declarator1\\*/)) (int a, double d)) (char a);\ C \double (* (((* f) []) (int a)) (int b, double c)) (char d) {int a = b + c + d;}\ C \double ((*((f) (int a))) (int a /* \\ \\<^ML>\C_Grammar_Rule_Lib.doFuncParamDeclIdent\\*/, double)) (char c) {int a = 0;}\ C \ \Nesting functions\ \ double (* (* f (int a)) (int a, double)) (char c) { double (* (* f (int a)) (double a, int a)) (char) { return a; } } \ C \ \Old function syntax\ \ f (x) int x; {return x;} \ section \General Isar Commands\ locale zz begin definition "z' = ()" end C \ \Mixing arbitrary commands\ \ int a = 0; int b = a * a + 0; int jjj = b; /*@ @@@ ML \@{lemma \A \ B \ B \ A\ by (ml_tactic \blast_tac ctxt 1\)}\ definition "a' = ()" declare [[ML_source_trace]] lemma (in zz) \A \ B \ B \ A\ by (ml_tactic \blast_tac ctxt 1\) definition (in zz) "z = ()" corollary "zz.z' = ()" apply (unfold zz.z'_def) by blast theorem "True &&& True" by (auto, presburger?) */ \ declare [[ML_source_trace = false]] C \ \Backslash newlines must be supported by \<^ML>\C_Token.syntax'\ (in particular in keywords)\ \ //@ lem\ ma (i\ n z\ z) \ \\ AA \ B\ \\ B \ A\ \ A\ b\ y (ml_t\ actic \\ bla\ st_tac c\ txt\ 0\ 001\) \ section \Starting Parsing Rule\ subsection \Basics\ C \ \Parameterizing starting rule\ \ /*@ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "statement"]] C \while (a) {}\ C \a = 2;\ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "expression"]] C \2 + 3\ C \a = 2\ C \a[1]\ C \&a\ C \a\ */ \ subsection \Embedding in Inner Terms\ term \\<^C> \ \default behavior of parsing depending on the activated option\ \0\\ term \\<^C>\<^sub>u\<^sub>n\<^sub>i\<^sub>t \ \force the explicit parsing\ \f () {while (a) {}; return 0;} int a = 0;\\ term \\<^C>\<^sub>d\<^sub>e\<^sub>c\<^sub>l \ \force the explicit parsing\ \int a = 0; \\ term \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r \ \force the explicit parsing\ \a\\ term \\<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t \ \force the explicit parsing\ \while (a) {}\\ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "translation_unit"]] term \\<^C> \ \default behavior of parsing depending on the current option\ \int a = 0;\\ subsection \User Defined Setup of Syntax\ setup \C_Module.C_Term.map_expression (fn _ => fn _ => fn _ => @{term "10 :: nat"})\ setup \C_Module.C_Term.map_statement (fn _ => fn _ => fn _ => @{term "20 :: nat"})\ value \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\1\ + \<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t\for (;;);\\ setup \ \redefinition\ \C_Module.C_Term.map_expression (fn _ => fn _ => fn _ => @{term "1000 :: nat"})\ value \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\1\ + \<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t\for (;;);\\ setup \C_Module.C_Term.map_default (fn _ => fn _ => fn _ => @{term "True"})\ subsection \Validity of Context for Annotations\ ML \fun fac x = if x = 0 then 1 else x * fac (x - 1)\ ML \ \Execution of annotations in term possible in (the outermost) \<^theory_text>\ML\\ \ \<^term>\ \<^C> \int c = 0; /*@ ML \fac 100\ */\ \ \ definition \ \Execution of annotations in term possible in \<^ML_type>\local_theory\ commands (such as \<^theory_text>\definition\)\ \ term = \<^C> \int c = 0; /*@ ML \fac 100\ */\ \ section \Scopes of Inner and Outer Terms\ ML \ local fun bind scan ((stack1, (to_delay, stack2)), _) = C_Parse.range scan >> (fn (src, range) => C_Env.Parsing ( (stack1, stack2) , ( range , C_Inner_Syntax.bottom_up (fn _ => fn context => ML_Context.exec (tap (fn _ => Syntax.read_term (Context.proof_of context) (Token.inner_syntax_of src))) context) , Symtab.empty , to_delay))) in val _ = Theory.setup ( C_Annotation.command' ("term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r", \<^here>) "" (bind (C_Token.syntax' (Parse.token Parse.cartouche))) #> C_Inner_Syntax.command0 (C_Inner_Toplevel.keep'' o C_Inner_Isar_Cmd.print_term) (C_Token.syntax' (Scan.succeed [] -- Parse.term)) ("term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r", \<^here>, \<^here>, \<^here>)) end \ C \ int z = z; /*@ C \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ */\ term(*outer*) \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \ int z = z; /*@ C \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ */\ term(*outer*) \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = last]] C \ int z = z; /*@ C \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ */\ term(*outer*) \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = empty]] C \ \Propagation of report environment while manually composing at ML level\ \ int a; int f (int b) { int c = 0; /*@ \setup \fn _ => fn _ => fn env => C' env \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ #> C \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ #> C' env \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ #> C \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ \ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ */ int e = a + b + c + d; }\ section \Calculation in Directives\ subsection \Annotation Command Classification\ C \ \Lexing category vs. parsing category\ \ int a = 0; // \ \Category 2: only parsing\ //@ \setup \K (K (K I))\ (* evaluation at parsing *) //@@ \setup\ \K (K (K I))\ (* evaluation at parsing *) //@ highlight (* evaluation at parsing *) //@@ highlight\ (* evaluation at parsing *) // \ \Category 3: with lexing\ //@ #setup I (* evaluation at lexing (and directives resolving) *) //@ setup I (* evaluation at parsing *) //@@ setup\ I (* evaluation at parsing *) //@ #ML I (* evaluation at lexing (and directives resolving) *) //@ ML I (* evaluation at parsing *) //@@ ML\ I (* evaluation at parsing *) //@ #C \\ (* evaluation at lexing (and directives resolving) *) //@ C \\ (* evaluation at parsing *) //@@ C\ \\ (* evaluation at parsing *) \ C \ \Scheduling example\ \ //@+++++ ML \writeln "2"\ int a = 0; //@@ ML\ \writeln "3"\ //@ #ML \writeln "1"\ \ C \ \Scheduling example\ \ //* lemma True by simp //* #lemma True #by simp //* #lemma True by simp //* lemma True #by simp \ C \ \Scheduling example\ \ /*@ lemma \1 = one\ \2 = two\ \two + one = three\ by auto #definition [simp]: \three = 3\ #definition [simp]: \two = 2\ #definition [simp]: \one = 1\ */ \ subsection \Generalizing ML Antiquotations with C Directives\ ML \ structure Directive_setup_define = Generic_Data (type T = int val empty = 0 val merge = K empty) fun setup_define1 pos f = C_Directive.setup_define pos (fn toks => fn (name, (pos1, _)) => tap (fn _ => writeln ("Executing " ^ name ^ Position.here pos1 ^ " (only once)")) #> pair (f toks)) (K I) fun setup_define2 pos = C_Directive.setup_define pos (K o pair) \ C \ \General scheme of C antiquotations\ \ /*@ #setup \ \Overloading \#define\\ \ setup_define2 \<^here> (fn (name, (pos1, _)) => op ` Directive_setup_define.get #>> (case name of "f3" => curry op * 152263 | _ => curry op + 1) #> tap (fn (nb, _) => tracing ("Executing antiquotation " ^ name ^ Position.here pos1 ^ " (number = " ^ Int.toString nb ^ ")")) #> uncurry Directive_setup_define.put) \ */ #define f1 #define f2 int a = 0; #define f3 f1 f2 f1 f3 //@ #setup \ \Resetting \#define\\ \setup_define2 \<^here> (K I)\ f3 #define f3 f3 \ C \ \Dynamic token computing in \#define\\ \ //@ #setup \setup_define1 \<^here> (K [])\ #define f int a = 0; f f f f //@ #setup \setup_define1 \<^here> (fn toks => toks @ toks)\ #define f int b = a; f f //@ #setup \setup_define1 \<^here> I\ #define f int a = 0; f f \ section \Miscellaneous\ C \ \Antiquotations acting on a parsed-subtree\ \ # /**/ include // backslash rendered unescaped f(){0 + 0;} /**/ // val _ : theory => 'a => theory # /* context */ if if elif #include if then else ; # /* zzz */ elif /**/ #else\ #define FOO 00 0 "" (( FOO(FOO(a,b,c)) #endif\ C \ \Header-names in directives\ \ #define F #define G "stdio\h" // expecting an error whenever expanded #define H "stdio_h" // can be used anywhere without errors int f = /*F*/ ""; int g = /*G*/ ""; int h = H ""; #include F \ C \ \Parsing tokens as directives only when detecting space symbols before \#\\ \/* */ \ \ // # /* */ define /**/ \ a a a /*#include <>*/ // must not be considered as a directive \ C \ \Universal character names in identifiers and Isabelle symbols\ \ #include int main () { char * ó\<^url>ò = "ó\<^url>ò"; printf ("%s", ó\<^url>ò); } \ \\The core lexer ...\ ML\ C_Parse.ML_source \ declare[[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = last]] ML\@{C\<^sub>e\<^sub>n\<^sub>v}\ ML\C_Stack.Data_Lang.get' : Context.generic -> (LALR_Table.state, C_Grammar_Rule.svalue0, Position.T) C_Env.stack_elem0 list * C_Env.env_lang; C_Parse.C_source: Input.source C_Parse.parser ; C_Inner_Syntax.command0 ; C'; C; \ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "expression"]] ML\ val src = \a + b\; val ctxt = (Context.Proof @{context}); val ctxt' = C' @{C\<^sub>e\<^sub>n\<^sub>v} src ctxt; C_Module.Data_In_Env.get ctxt' \ ML\val _ = @{term \3::nat\}\ ML\ ML_Antiquotation.inline_embedded; \ (* and from where do I get the result ? *) end diff --git a/thys/Isabelle_C/C11-FrontEnd/examples/C3.thy b/thys/Isabelle_C/C11-FrontEnd/examples/C3.thy --- a/thys/Isabelle_C/C11-FrontEnd/examples/C3.thy +++ b/thys/Isabelle_C/C11-FrontEnd/examples/C3.thy @@ -1,862 +1,862 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) chapter \Annnex IV : Examples for Annotation Navigation and Context Serialization\ theory C3 imports "../C_Main" "HOL-ex.Cartouche_Examples" (* This dependency should be erliminated.*) begin text \ Operationally, the \<^theory_text>\C\ command can be thought of as behaving as \<^theory_text>\ML\, where it is for example possible to recursively nest C code in C. Generally, the present chapter assumes a familiarity with all advance concepts of ML as described in \<^file>\~~/src/HOL/Examples/ML.thy\, as well as the concept of ML antiquotations (\<^file>\~~/src/Doc/Implementation/ML.thy\). However, even if \<^theory_text>\C\ might resemble to \<^theory_text>\ML\, we will now see in detail that there are actually subtle differences between the two commands.\ section \Setup of ML Antiquotations Displaying the Environment (For Debugging) \ ML\ fun print_top make_string f _ (_, (value, _, _)) _ = tap (fn _ => writeln (make_string value)) o f fun print_top' _ f _ _ env = tap (fn _ => writeln ("ENV " ^ C_Env.string_of env)) o f fun print_stack s make_string stack _ _ thy = let val () = Output.information ("SHIFT " ^ (case s of NONE => "" | SOME s => "\"" ^ s ^ "\" ") ^ Int.toString (length stack - 1) ^ " +1 ") val () = stack |> split_list |> #2 |> map_index I |> app (fn (i, (value, pos1, pos2)) => writeln (" " ^ Int.toString (length stack - i) ^ " " ^ make_string value ^ " " ^ Position.here pos1 ^ " " ^ Position.here pos2)) in thy end fun print_stack' s _ stack _ env thy = let val () = Output.information ("SHIFT " ^ (case s of NONE => "" | SOME s => "\"" ^ s ^ "\" ") ^ Int.toString (length stack - 1) ^ " +1 ") val () = writeln ("ENV " ^ C_Env.string_of env) in thy end \ setup \ML_Antiquotation.inline @{binding print_top} (Args.context >> K ("print_top " ^ ML_Pretty.make_string_fn ^ " I"))\ setup \ML_Antiquotation.inline @{binding print_top'} (Args.context >> K ("print_top' " ^ ML_Pretty.make_string_fn ^ " I"))\ setup \ML_Antiquotation.inline @{binding print_stack} - (Scan.peek (fn _ => Scan.option Args.text) + (Scan.peek (fn _ => Scan.option Parse.embedded) >> (fn name => ("print_stack " ^ (case name of NONE => "NONE" | SOME s => "(SOME \"" ^ s ^ "\")") ^ " " ^ ML_Pretty.make_string_fn)))\ setup \ML_Antiquotation.inline @{binding print_stack'} - (Scan.peek (fn _ => Scan.option Args.text) + (Scan.peek (fn _ => Scan.option Parse.embedded) >> (fn name => ("print_stack' " ^ (case name of NONE => "NONE" | SOME s => "(SOME \"" ^ s ^ "\")") ^ " " ^ ML_Pretty.make_string_fn)))\ declare[[C_lexer_trace]] section \Introduction to C Annotations: Navigating in the Parsing Stack\ subsection \Basics\ text \ Since the present theory \<^file>\C1.thy\ is depending on \<^theory>\Isabelle_C.C_Lexer_Language\ and \<^theory>\Isabelle_C.C_Parser_Language\, the syntax one is writing in the \<^theory_text>\C\ command is C11. Additionally, \<^file>\C1.thy\ also depends on \<^theory>\Isabelle_C.C_Parser_Annotation\, making it possible to write commands in C comments, called annotation commands, such as \<^theory_text>\\setup\. \ C \ \Nesting ML code in C comments\ \ int a = (((0))); /*@ highlight */ /*@ \setup \@{print_stack}\ */ /*@ \setup \@{print_top}\ */ \ text \ In terms of execution order, nested annotation commands are not pre-filtered out of the C code, but executed when the C code is still being parsed. Since the parser implemented is a LALR parser \<^footnote>\\<^url>\https://en.wikipedia.org/wiki/LALR\\, C tokens are uniquely read and treated from left to right. Thus, each nested command is (supposed by default to be) executed when the parser has already read all C tokens before the comment associated to the nested command, so when the parser is in a particular intermediate parsing step (not necessarily final) \<^footnote>\\<^url>\https://en.wikipedia.org/wiki/Shift-reduce_parser\\. \ text \The command \<^theory_text>\\setup\ is similar to the command \<^theory_text>\setup\ except that the former takes a function with additional arguments. These arguments are precisely depending on the current parsing state. To better examine these arguments, it is convenient to use ML antiquotations (be it for printing, or for doing any regular ML actions like PIDE reporting). Note that, in contrast with \<^theory_text>\setup\, the return type of the \<^theory_text>\\setup\ function is not \<^ML_type>\theory -> theory\ but \<^ML_type>\Context.generic -> Context.generic\. \ C \ \Positional navigation: referring to any previous parsed sub-tree in the stack\ \ int a = (((0 + 5))) /*@@ \setup \print_top @{make_string} I\ @ highlight */ * 4; float b = 7 / 3; \ text \The special \@\ symbol makes the command be executed whenever the first element \E\ in the stack is about to be irremediably replaced by a more structured parent element (having \E\ as one of its direct children). It is the parent element which is provided to the ML code. Instead of always referring to the first element of the stack, \N\ consecutive occurrences of \@\ will make the ML code getting as argument the direct parent of the \N\-th element.\ C \ \Positional navigation: referring to any previous parsed sub-tree in the stack\ \ int a = (((0 + 5))) /*@@ highlight */ * 4; int a = (((0 + 5))) /*@& highlight */ * 4; int a = (((0 + 5))) /*@@@@@ highlight */ * 4; int a = (((0 + 5))) /*@&&&& highlight */ * 4; \ text \\&\ behaves as \@\, but instead of always giving the designated direct parent to the ML code, it finds the first parent ancestor making non-trivial changes in the respective grammar rule (a non-trivial change can be for example the registration of the position of the current AST node being built).\ C \ \Positional navigation: moving the comment after a number of C token\ \ int b = 7 / (3) * 50; /*@+++@@ highlight */ long long f (int a) { while (0) { return 0; } } int b = 7 / (3) * 50; \ text \\N\ consecutive occurrences of \+\ will delay the interpretation of the comment, which is ignored at the place it is written. The comment is only really considered after the C parser has treated \N\ more tokens.\ C \ \Closing C comments \*/\ must close anything, even when editing ML code\ \ int a = (((0 //@ (* inline *) \setup \fn _ => fn _ => fn _ => fn context => let in (* */ *) context end\ /*@ \setup \(K o K o K) I\ (* * / *) */ ))); \ C \ \Inline comments with antiquotations\ \ /*@ \setup\(K o K o K) (fn x => K x @{con\ text (**)})\ */ // break of line activated everywhere (also in antiquotations) int a = 0; //\ @ \setup\(K o K o K) (fn x => K x @{term \a \ + b\ (* (**) *\ \ )})\ \ subsection \Erroneous Annotations Treated as Regular C Comments\ C \ \Permissive Types of Antiquotations\ \ int a = 0; /*@ \setup (* Errors: Explicit warning + Explicit markup reporting *) */ /** \setup (* Errors: Turned into tracing report information *) */ /** \setup \fn _ => fn _ => fn _ => I\ (* An example of correct syntax accepted as usual *) */ \ C \ \Permissive Types of Antiquotations\ \ int a = 0; /*@ \setup \fn _ => fn _ => fn _ => I\ \setup (* Parsing error of a single command does not propagate to other commands *) \setup \fn _ => fn _ => fn _ => I\ context */ /** \setup \fn _ => fn _ => fn _ => I\ \setup (* Parsing error of a single command does not propagate to other commands *) \setup \fn _ => fn _ => fn _ => I\ context */ /*@ \setup (* Errors in all commands are all rendered *) \setup (* Errors in all commands are all rendered *) \setup (* Errors in all commands are all rendered *) */ /** \setup (* Errors in all commands makes the whole comment considered as an usual comment *) \setup (* Errors in all commands makes the whole comment considered as an usual comment *) \setup (* Errors in all commands makes the whole comment considered as an usual comment *) */ \ subsection \Bottom-Up vs. Top-Down Evaluation\ ML\ structure Example_Data = Generic_Data (type T = string list val empty = [] val merge = K empty) fun add_ex s1 s2 = Example_Data.map (cons s2) #> (fn context => let val () = Output.information (s1 ^ s2) val () = app (fn s => writeln (" Data content: " ^ s)) (Example_Data.get context) in context end) \ setup \Context.theory_map (Example_Data.put [])\ declare[[ML_source_trace]] declare[[C_parser_trace]] C \ \Arbitrary interleaving of effects: \\setup\ vs \\setup\\\ \ int b,c,d/*@@ \setup \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "3_print_top"\ */,e = 0; /*@@ \setup \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "4_print_top"\ */ int b,c,d/*@@ \setup\ \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "6_print_top"\ */,e = 0; /*@@ \setup\ \fn s => fn x => fn env => @{print_top} s x env #> add_ex "evaluation of " "5_print_top"\ */ \ section \Reporting of Positions and Contextual Update of Environment\ text \ To show the content of the parsing environment, the ML antiquotations \print_top'\ and \print_stack'\ will respectively be used instead of \print_top\ and \print_stack\. This example suite allows to explore the bindings represented in the C environment and made accessible in PIDE for hovering. \ subsection \Reporting: \typedef\, \enum\\ (*\struct\*) declare [[ML_source_trace = false]] declare [[C_lexer_trace = false]] C \ \Reporting of Positions\ \ typedef int i, j; /*@@ \setup \@{print_top'}\ @highlight */ //@ +++++@ \setup \@{print_top'}\ +++++@highlight int j = 0; typedef int i, j; j jj1 = 0; j jj = jj1; j j = jj1 + jj; typedef i j; typedef i j; typedef i j; i jj = jj; j j = jj; \ C \ \Nesting type definitions\ \ typedef int j; j a = 0; typedef int k; int main (int c) { j b = 0; typedef int k; typedef k l; k a = c; l a = 0; } k a = a; \ C \ \Reporting \enum\\ \ enum a b; // bound case: undeclared enum a {aaa}; // define case enum a {aaa}; // define case: redefined enum a _; // bound case __thread (f ( enum a, enum a vv)); enum a /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.function_definition4\\*/ f (enum a a) { } __thread enum a /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.declaration_specifier2\\*/ f (enum a a) { enum c {ccc}; // define case __thread enum c f (enum c a) { return 0; } enum c /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.nested_function_definition2\\*/ f (enum c a) { return 0; } return 0; } enum z {zz}; // define case int main (enum z *x) /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.parameter_type_list2\\*/ { return zz; } int main (enum a *x, ...) /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.parameter_type_list3\\*/ { return zz; } \ subsection \Continuation Calculus with the C Environment: Presentation in ML\ declare [[C_parser_trace = false]] ML\ val C = tap o C_Module.C val C' = C_Module.C' \ C \ \Nesting C code without propagating the C environment\ \ int a = 0; int b = 7 / (3) * 50 /*@@@@@ \setup \fn _ => fn _ => fn _ => C \int b = a + a + a + a + a + a + a ;\ \ */; \ C \ \Nesting C code and propagating the C environment\ \ int a = 0; int b = 7 / (3) * 50 /*@@@@@ \setup \fn _ => fn _ => fn env => C' env \int b = a + a + a + a + a + a + a ;\ \ */; \ subsection \Continuation Calculus with the C Environment: Presentation with Outer Commands\ ML\ val _ = Theory.setup (C_Inner_Syntax.command0 (fn src => fn context => C' (C_Stack.Data_Lang.get' context |> #2) src context) C_Parse.C_source ("C'", \<^here>, \<^here>, \<^here>)) \ C \ \Nesting C code without propagating the C environment\ \ int f (int a) { int b = 7 / (3) * 50 /*@ C \int b = a + a + a + a + a + a + a;\ */; int c = b + a + a + a + a + a + a; } \ C \ \Nesting C code and propagating the C environment\ \ int f (int a) { int b = 7 / (3) * 50 /*@ C' \int b = a + a + a + a + a + a + a;\ */; int c = b + b + b + b + a + a + a + a + a + a; } \ C \ \Miscellaneous\ \ int f (int a) { int b = 7 / (3) * 50 /*@ C \int b = a + a + a + a + a; //@ C' \int c = b + b + b + b + a;\ \ */; int b = 7 / (3) * 50 /*@ C' \int b = a + a + a + a + a; //@ C' \int c = b + b + b + b + a;\ \ */; int c = b + b + b + b + a + a + a + a + a + a; } \ subsection \Continuation Calculus with the C Environment: Deep-First Nesting vs Breadth-First Folding: Propagation of \<^ML_type>\C_Env.env_lang\\ C \ \Propagation of report environment while manually composing at ML level (with \#>\)\ \ \In \c1 #> c2\, \c1\ and \c2\ should not interfere each other.\ \ //@ ML \fun C_env src _ _ env = C' env src\ int a; int f (int b) { int c = 0; /*@ \setup \fn _ => fn _ => fn env => C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ \ */ int e = a + b + c + d; }\ C \ \Propagation of directive environment (evaluated before parsing) to any other annotations (evaluated at parsing time)\ \ #undef int #define int(a,b) int #define int int int a; int f (int b) { int c = 0; /*@ \setup \fn _ => fn _ => fn env => C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C' env \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ #> C \int d = a + b + c + d; //@ \setup \C_env \int e = a + b + c + d;\\\ \ */ #undef int int e = a + b + c + d; } \ subsection \Continuation Calculus with the C Environment: Deep-First Nesting vs Breadth-First Folding: Propagation of \<^ML_type>\C_Env.env_tree\\ ML\ structure Data_Out = Generic_Data (type T = int val empty = 0 val merge = K empty) fun show_env0 make_string f msg context = Output.information ("(" ^ msg ^ ") " ^ make_string (f (Data_Out.get context))) val show_env = tap o show_env0 @{make_string} I \ setup \Context.theory_map (C_Module.Data_Accept.put (fn _ => fn _ => Data_Out.map (fn x => x + 1)))\ C \ \Propagation of Updates\ \ typedef int i, j; int j = 0; typedef int i, j; j jj1 = 0; j jj = jj1; /*@@ \setup \fn _ => fn _ => fn _ => show_env "POSITION 0"\ @\setup \@{print_top'}\ */ typedef int k; /*@@ \setup \fn _ => fn _ => fn env => C' env \k jj = jj; //@@ \setup \@{print_top'}\ k jj = jj + jj1; typedef k l; //@@ \setup \@{print_top'}\\ #> show_env "POSITION 1"\ */ j j = jj1 + jj; //@@ \setup \@{print_top'}\ typedef i j; /*@@ \setup \fn _ => fn _ => fn _ => show_env "POSITION 2"\ */ typedef i j; typedef i j; i jj = jj; j j = jj; \ ML\show_env "POSITION 3" (Context.Theory @{theory})\ setup \Context.theory_map (C_Module.Data_Accept.put (fn _ => fn _ => I))\ subsection \Reporting: Scope of Recursive Functions\ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = last]] C \ \Propagation of Updates\ \ int a = 0; int b = a * a + 0; int jjj = b; int main (void main(int *x,int *y),int *jjj) { return a + jjj + main(); } int main2 () { int main3 () { main2() + main(); } int main () { main2() + main(); } return a + jjj + main3() + main(); } \ C \ int main3 () { main2 (); } \ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = empty]] subsection \Reporting: Extensions to Function Types, Array Types\ C \int f (int z);\ C \int * f (int z);\ C \int (* f) (int z /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.declarator1\\*/);\ C \typedef int (* f) (int z);\ C \int f (int z) {}\ C \int * f (int z) {return z;}\ C \int ((* f) (int z1, int z2)) {return z1 + z2;}\ C \int (* (* f) (int z1, int z2)) {return z1 + z2;}\ C \typedef int (* f) (int z); f uuu (int b) {return b;};\ C \typedef int (* (* f) (int z, int z)) (int a); f uuu (int b) {return b;};\ C \struct z { int (* f) (int z); int (* (* ff) (int z)) (int a); };\ C \double (* (* f (int a /* \\ \\<^ML>\C_Grammar_Rule_Wrap_Overloading.declarator1\\*/)) (int a, double d)) (char a);\ C \double (* (((* f) []) (int a)) (int b, double c)) (char d) {int a = b + c + d;}\ C \double ((*((f) (int a))) (int a /* \\ \\<^ML>\C_Grammar_Rule_Lib.doFuncParamDeclIdent\\*/, double)) (char c) {int a = 0;}\ C \ \Nesting functions\ \ double (* (* f (int a)) (int a, double)) (char c) { double (* (* f (int a)) (double a, int a)) (char) { return a; } } \ C \ \Old function syntax\ \ f (x) int x; {return x;} \ section \General Isar Commands\ locale zz begin definition "z' = ()" end C \ \Mixing arbitrary commands\ \ int a = 0; int b = a * a + 0; int jjj = b; /*@ @@@ ML \@{lemma \A \ B \ B \ A\ by (ml_tactic \blast_tac ctxt 1\)}\ definition "a' = ()" declare [[ML_source_trace]] lemma (in zz) \A \ B \ B \ A\ by (ml_tactic \blast_tac ctxt 1\) definition (in zz) "z = ()" corollary "zz.z' = ()" apply (unfold zz.z'_def) by blast theorem "True &&& True" by (auto, presburger?) */ \ declare [[ML_source_trace = false]] C \ \Backslash newlines must be supported by \<^ML>\C_Token.syntax'\ (in particular in keywords)\ \ //@ lem\ ma (i\ n z\ z) \ \\ AA \ B\ \\ B \ A\ \ A\ b\ y (ml_t\ actic \\ bla\ st_tac c\ txt\ 0\ 001\) \ section \Starting Parsing Rule\ subsection \Basics\ C \ \Parameterizing starting rule\ \ /*@ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "statement"]] C \while (a) {}\ C \a = 2;\ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "expression"]] C \2 + 3\ C \a = 2\ C \a[1]\ C \&a\ C \a\ */ \ subsection \Embedding in Inner Terms\ term \\<^C> \ \default behavior of parsing depending on the activated option\ \0\\ term \\<^C>\<^sub>u\<^sub>n\<^sub>i\<^sub>t \ \force the explicit parsing\ \f () {while (a) {}; return 0;} int a = 0;\\ term \\<^C>\<^sub>d\<^sub>e\<^sub>c\<^sub>l \ \force the explicit parsing\ \int a = 0; \\ term \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r \ \force the explicit parsing\ \a\\ term \\<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t \ \force the explicit parsing\ \while (a) {}\\ declare [[C\<^sub>r\<^sub>u\<^sub>l\<^sub>e\<^sub>0 = "translation_unit"]] term \\<^C> \ \default behavior of parsing depending on the current option\ \int a = 0;\\ subsection \User Defined Setup of Syntax\ setup \C_Module.C_Term.map_expression (fn _ => fn _ => fn _ => @{term "10 :: nat"})\ setup \C_Module.C_Term.map_statement (fn _ => fn _ => fn _ => @{term "20 :: nat"})\ value \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\1\ + \<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t\for (;;);\\ setup \ \redefinition\ \C_Module.C_Term.map_expression (fn _ => fn _ => fn _ => @{term "1000 :: nat"})\ value \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\1\ + \<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t\for (;;);\\ setup \C_Module.C_Term.map_default (fn _ => fn _ => fn _ => @{term "True"})\ subsection \Validity of Context for Annotations\ ML \fun fac x = if x = 0 then 1 else x * fac (x - 1)\ ML \ \Execution of annotations in term possible in (the outermost) \<^theory_text>\ML\\ \ \<^term>\ \<^C> \int c = 0; /*@ ML \fac 100\ */\ \ \ definition \ \Execution of annotations in term possible in \<^ML_type>\local_theory\ commands (such as \<^theory_text>\definition\)\ \ term = \<^C> \int c = 0; /*@ ML \fac 100\ */\ \ section \Scopes of Inner and Outer Terms\ ML \ local fun bind scan ((stack1, (to_delay, stack2)), _) = C_Parse.range scan >> (fn (src, range) => C_Env.Parsing ( (stack1, stack2) , ( range , C_Inner_Syntax.bottom_up (fn _ => fn context => ML_Context.exec (tap (fn _ => Syntax.read_term (Context.proof_of context) (Token.inner_syntax_of src))) context) , Symtab.empty , to_delay))) in val _ = Theory.setup ( C_Annotation.command' ("term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r", \<^here>) "" (bind (C_Token.syntax' (Parse.token Parse.cartouche))) #> C_Inner_Syntax.command0 (C_Inner_Toplevel.keep'' o C_Inner_Isar_Cmd.print_term) (C_Token.syntax' (Scan.succeed [] -- Parse.term)) ("term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r", \<^here>, \<^here>, \<^here>)) end \ C \ int z = z; /*@ C \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ */\ term(*outer*) \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \ int z = z; /*@ C \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ */\ term(*outer*) \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = last]] C \ int z = z; /*@ C \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ C \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ C' \//@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\\ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ */\ term(*outer*) \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\z\\ declare [[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = empty]] C \ \Propagation of report environment while manually composing at ML level\ \ int a; int f (int b) { int c = 0; /*@ \setup \fn _ => fn _ => fn env => C' env \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ #> C \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ #> C' env \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ #> C \int d = a + b + c + d; //@ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\\ \ term\<^sub>i\<^sub>n\<^sub>n\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ term\<^sub>o\<^sub>u\<^sub>t\<^sub>e\<^sub>r \\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\c\ + \<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r\d\\ */ int e = a + b + c + d; }\ section \Calculation in Directives\ subsection \Annotation Command Classification\ C \ \Lexing category vs. parsing category\ \ int a = 0; // \ \Category 2: only parsing\ //@ \setup \K (K (K I))\ (* evaluation at parsing *) //@@ \setup\ \K (K (K I))\ (* evaluation at parsing *) //@ highlight (* evaluation at parsing *) //@@ highlight\ (* evaluation at parsing *) // \ \Category 3: with lexing\ //@ #setup I (* evaluation at lexing (and directives resolving) *) //@ setup I (* evaluation at parsing *) //@@ setup\ I (* evaluation at parsing *) //@ #ML I (* evaluation at lexing (and directives resolving) *) //@ ML I (* evaluation at parsing *) //@@ ML\ I (* evaluation at parsing *) //@ #C \\ (* evaluation at lexing (and directives resolving) *) //@ C \\ (* evaluation at parsing *) //@@ C\ \\ (* evaluation at parsing *) \ C \ \Scheduling example\ \ //@+++++ ML \writeln "2"\ int a = 0; //@@ ML\ \writeln "3"\ //@ #ML \writeln "1"\ \ C \ \Scheduling example\ \ //* lemma True by simp //* #lemma True #by simp //* #lemma True by simp //* lemma True #by simp \ C \ \Scheduling example\ \ /*@ lemma \1 = one\ \2 = two\ \two + one = three\ by auto #definition [simp]: \three = 3\ #definition [simp]: \two = 2\ #definition [simp]: \one = 1\ */ \ subsection \Generalizing ML Antiquotations with C Directives\ ML \ structure Directive_setup_define = Generic_Data (type T = int val empty = 0 val merge = K empty) fun setup_define1 pos f = C_Directive.setup_define pos (fn toks => fn (name, (pos1, _)) => tap (fn _ => writeln ("Executing " ^ name ^ Position.here pos1 ^ " (only once)")) #> pair (f toks)) (K I) fun setup_define2 pos = C_Directive.setup_define pos (K o pair) \ C \ \General scheme of C antiquotations\ \ /*@ #setup \ \Overloading \#define\\ \ setup_define2 \<^here> (fn (name, (pos1, _)) => op ` Directive_setup_define.get #>> (case name of "f3" => curry op * 152263 | _ => curry op + 1) #> tap (fn (nb, _) => tracing ("Executing antiquotation " ^ name ^ Position.here pos1 ^ " (number = " ^ Int.toString nb ^ ")")) #> uncurry Directive_setup_define.put) \ */ #define f1 #define f2 int a = 0; #define f3 f1 f2 f1 f3 //@ #setup \ \Resetting \#define\\ \setup_define2 \<^here> (K I)\ f3 #define f3 f3 \ C \ \Dynamic token computing in \#define\\ \ //@ #setup \setup_define1 \<^here> (K [])\ #define f int a = 0; f f f f //@ #setup \setup_define1 \<^here> (fn toks => toks @ toks)\ #define f int b = a; f f //@ #setup \setup_define1 \<^here> I\ #define f int a = 0; f f \ section \Miscellaneous\ C \ \Antiquotations acting on a parsed-subtree\ \ # /**/ include // backslash rendered unescaped f(){0 + 0;} /**/ // val _ : theory => 'a => theory # /* context */ if if elif #include if then else ; # /* zzz */ elif /**/ #else\ #define FOO 00 0 "" (( FOO(FOO(a,b,c)) #endif\ C \ \Header-names in directives\ \ #define F #define G "stdio\h" // expecting an error whenever expanded #define H "stdio_h" // can be used anywhere without errors int f = /*F*/ ""; int g = /*G*/ ""; int h = H ""; #include F \ C \ \Parsing tokens as directives only when detecting space symbols before \#\\ \/* */ \ \ // # /* */ define /**/ \ a a a /*#include <>*/ // must not be considered as a directive \ C \ \Universal character names in identifiers and Isabelle symbols\ \ #include int main () { char * ó\<^url>ò = "ó\<^url>ò"; printf ("%s", ó\<^url>ò); } \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Command.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Command.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Command.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Command.thy @@ -1,1179 +1,1179 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * Author: Frédéric Tuong, Burkhart Wolff, Université Paris-Saclay * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Interface: Inner and Outer Commands\ theory C_Command imports C_Eval keywords "C" :: thy_decl % "ML" and "C_file" :: thy_load % "ML" and "C_export_boot" :: thy_decl % "ML" and "C_export_file" :: thy_decl and "C_prf" :: prf_decl % "proof" (* FIXME % "ML" ?? *) and "C_val" :: diag % "ML" begin subsection \Parsing Entry-Point: Error and Acceptance Cases\ ML \ \analogous to \<^file>\~~/src/Pure/Tools/ghc.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) \ structure C_Serialize = struct (** string literals **) fun print_codepoint c = (case c of 10 => "\\n" | 9 => "\\t" | 11 => "\\v" | 8 => "\\b" | 13 => "\\r" | 12 => "\\f" | 7 => "\\a" | 27 => "\\e" | 92 => "\\\\" | 63 => "\\?" | 39 => "\\'" | 34 => "\\\"" | c => if c >= 32 andalso c < 127 then chr c else error "Not yet implemented"); fun print_symbol sym = let val ord = SML90.ord; (* copied from ML_init in Isabelle2020. *) in (case Symbol.decode sym of Symbol.Char s => print_codepoint (ord s) | Symbol.UTF8 s => UTF8.decode_permissive s |> map print_codepoint |> implode | Symbol.Sym s => "\\092<" ^ s ^ ">" | Symbol.Control s => "\\092<^" ^ s ^ ">" | _ => translate_string (print_codepoint o ord) sym) end; val print_string = quote o implode o map print_symbol o Symbol.explode; end \ ML \ \analogous to \<^file>\~~/src/Pure/Tools/generated_files.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) \ structure C_Generated_Files = struct val c_dir = "C"; val c_ext = "c"; val c_make_comment = enclose "/*" "*/"; (** context data **) (* file types *) fun get_file_type ext = if ext = "" then error "Bad file extension" else if c_ext = ext then () else error ("Unknown file type for extension " ^ quote ext); (** Isar commands **) (* generate_file *) fun generate_file (binding, src_content) lthy = let val (path, pos) = Path.dest_binding binding; val () = get_file_type (#2 (Path.split_ext path)) handle ERROR msg => error (msg ^ Position.here pos); val header = c_make_comment " generated by Isabelle "; val content = header ^ "\n" ^ src_content; in lthy |> (Local_Theory.background_theory o Generated_Files.add_files) (binding, content) end; (** concrete file types **) val _ = Theory.setup (Generated_Files.file_type \<^binding>\C\ {ext = c_ext, make_comment = c_make_comment, make_string = C_Serialize.print_string}); end \ ML \ \\<^theory>\Isabelle_C.C_Eval\\ \ signature C_MODULE = sig structure Data_Accept : GENERIC_DATA structure Data_In_Env : GENERIC_DATA structure Data_In_Source : GENERIC_DATA structure Data_Term : GENERIC_DATA structure C_Term: sig val key0_default: string val key0_expression: string val key0_external_declaration: string val key0_statement: string val key0_translation_unit: string val key_default: Input.source val key_expression: Input.source val key_external_declaration: Input.source val key_statement: Input.source val key_translation_unit: Input.source val map_default: (C_Grammar_Rule.ast_generic -> C_Env.env_lang -> local_theory -> term) -> theory -> theory val map_expression: (C_Grammar_Rule_Lib.CExpr -> C_Env.env_lang -> local_theory -> term) -> theory -> theory val map_external_declaration: (C_Grammar_Rule_Lib.CExtDecl -> C_Env.env_lang -> local_theory -> term) -> theory -> theory val map_statement: (C_Grammar_Rule_Lib.CStat -> C_Env.env_lang -> local_theory -> term) -> theory -> theory val map_translation_unit: (C_Grammar_Rule_Lib.CTranslUnit -> C_Env.env_lang -> local_theory -> term) -> theory -> theory val tok0_expression: string * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok0_external_declaration: string * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok0_statement: string * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok0_translation_unit: string * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok_expression: Input.source * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok_external_declaration: Input.source * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok_statement: Input.source * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tok_translation_unit: Input.source * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token) val tokens: (string * ('a * 'a -> (C_Grammar.Tokens.svalue, 'a) LALR_Parser_Eval.Token.token)) list end structure C_Term': sig val accept: local_theory -> (Input.source * (Position.range -> (C_Grammar.Tokens.svalue, Position.T) LALR_Parser_Eval.Token.token)) option -> (Input.source -> Context.generic -> (C_Grammar.Tokens.svalue, Position.T) LALR_Parser_Eval.Token.token) * (Data_In_Env.T -> 'a * (C_Grammar_Rule.ast_generic * 'b * 'c) -> {context: Context.generic, error_lines: 'd, reports_text: 'e} -> term * {context: Context.generic, error_lines: 'd, reports_text: 'e}) val err: C_Env.env_lang -> (LALR_Table.state * (C_Grammar_Parser.svalue0 * Position.T * Position.T)) list -> Position.T -> {context: Context.generic, error_lines: string list, reports_text: Position.report_text list} -> term * {context: Context.generic, error_lines: string list, reports_text: Position.report_text list} val eval_in: Input.source -> Context.generic -> (Context.generic -> C_Env.env_lang) -> (Input.source * (Position.range -> (C_Grammar.Tokens.svalue, Position.T) LALR_Parser_Eval.Token.token)) option -> C_Lex.token list * (C_Env.error_lines -> string list) -> term val parse_translation: ('a * (Input.source * (Position.range -> (C_Grammar.Tokens.svalue, Position.T) LALR_Parser_Eval.Token.token)) option) list -> ('a * (Proof.context -> term list -> term)) list end val accept: Data_In_Env.T -> 'a * (C_Grammar_Rule.ast_generic * 'b * 'c) -> {context: Context.generic, error_lines: 'd, reports_text: 'e} -> unit * {context: Context.generic, error_lines: 'd, reports_text: 'e} val accept0: (Context.generic -> C_Grammar_Rule.ast_generic -> Data_In_Env.T -> Context.generic -> 'a) -> Data_In_Env.T -> C_Grammar_Rule.ast_generic -> Context.generic -> 'a val c_enclose: string -> string -> Input.source -> C_Lex.token list * (string list -> string list) val env: Context.generic -> Data_In_Env.T val env0: Proof.context -> Data_In_Env.T val err: C_Env.env_lang -> (LALR_Table.state * (C_Grammar_Parser.svalue0 * Position.T * Position.T)) list -> Position.T -> {context: Context.generic, error_lines: string list, reports_text: Position.report_text list} -> unit * {context: Context.generic, error_lines: string list, reports_text: Position.report_text list} val err0: 'a -> 'b -> Position.T -> {context: 'c, error_lines: string list, reports_text: 'd} -> {context: 'c, error_lines: string list, reports_text: 'd} val eval_in: Input.source -> Context.generic option -> C_Lex.token list * (C_Env.error_lines -> string list) -> unit val eval_source: Input.source -> unit val exec_eval: Input.source -> Context.generic -> Context.generic val start: Input.source -> Context.generic -> (C_Grammar.Tokens.svalue, Position.T) LALR_Parser_Eval.Token.token (* toplevel command semantics of Isabelle_C *) val C: Input.source -> Context.generic -> Context.generic val C': C_Env.env_lang -> Input.source -> Context.generic -> Context.generic val C_export_boot: Input.source -> Context.generic -> generic_theory val C_export_file: Position.T * 'a -> Proof.context -> Proof.context val C_prf: Input.source -> Proof.state -> Proof.state end structure C_Module : C_MODULE = struct structure Data_In_Source = Generic_Data (type T = Input.source list val empty = [] val merge = K empty) structure Data_In_Env = Generic_Data (type T = C_Env.env_lang val empty = C_Env.empty_env_lang val merge = K empty) structure Data_Accept = Generic_Data (type T = C_Grammar_Rule.ast_generic -> C_Env.env_lang -> Context.generic -> Context.generic fun empty _ _ = I val merge = #2) structure Data_Term = Generic_Data (type T = (C_Grammar_Rule.ast_generic -> C_Env.env_lang -> local_theory -> term) Symtab.table val empty = Symtab.empty val merge = #2) (* keys for major syntactic categories *) structure C_Term = struct val key_translation_unit = \translation_unit\ val key_external_declaration = \external_declaration\ val key_statement = \statement\ val key_expression = \expression\ val key_default = \default\ local val source_content = Input.source_content #> #1 in val key0_translation_unit = source_content key_translation_unit val key0_external_declaration = source_content key_external_declaration val key0_statement = source_content key_statement val key0_expression = source_content key_expression val key0_default = source_content key_default end val tok0_translation_unit = (key0_translation_unit, C_Grammar.Tokens.start_translation_unit) val tok0_external_declaration = ( key0_external_declaration , C_Grammar.Tokens.start_external_declaration) val tok0_statement = (key0_statement, C_Grammar.Tokens.start_statement) val tok0_expression = (key0_expression, C_Grammar.Tokens.start_expression) val tok_translation_unit = (key_translation_unit, C_Grammar.Tokens.start_translation_unit) val tok_external_declaration = ( key_external_declaration , C_Grammar.Tokens.start_external_declaration) val tok_statement = (key_statement, C_Grammar.Tokens.start_statement) val tok_expression = (key_expression, C_Grammar.Tokens.start_expression) val tokens = [ tok0_translation_unit , tok0_external_declaration , tok0_statement , tok0_expression ] local fun map_upd0 key v = Context.theory_map (Data_Term.map (Symtab.update (key, v))) fun map_upd key start f = map_upd0 key (f o the o start) in val map_translation_unit = map_upd key0_translation_unit C_Grammar_Rule.get_CTranslUnit val map_external_declaration = map_upd key0_external_declaration C_Grammar_Rule.get_CExtDecl val map_statement = map_upd key0_statement C_Grammar_Rule.get_CStat val map_expression = map_upd key0_expression C_Grammar_Rule.get_CExpr val map_default = map_upd0 key0_default end end fun env0 ctxt = case Config.get ctxt C_Options.starting_env of "last" => Data_In_Env.get (Context.Proof ctxt) | "empty" => C_Env.empty_env_lang | s => error ("Unknown option: " ^ s ^ Position.here (Config.pos_of C_Options.starting_env)) val env = env0 o Context.proof_of fun start source context = Input.range_of source |> let val s = Config.get (Context.proof_of context) C_Options.starting_rule in case AList.lookup (op =) C_Term.tokens s of SOME tok => tok | NONE => error ("Unknown option: " ^ s ^ Position.here (Config.pos_of C_Options.starting_rule)) end fun err0 _ _ pos = C_Env.map_error_lines (cons ("Parser: No matching grammar rule" ^ Position.here pos)) val err = pair () oooo err0 fun accept0 f (env_lang:C_Env.env_lang) ast = Data_In_Env.put env_lang #> (fn context => f context ast env_lang (Data_Accept.get context ast env_lang context)) fun accept (env_lang:C_Env.env_lang) (_, (ast, _, _)) = pair () o C_Env.map_context (accept0 (K (K (K I))) env_lang ast) val eval_source = C_Context.eval_source env start err accept fun c_enclose bg en source = C_Lex.@@ ( C_Lex.@@ (C_Lex.read bg, C_Lex.read_source source) , C_Lex.read en); structure C_Term' = struct val err = pair Term.dummy oooo err0 fun accept ctxt start_rule = let val (key, start) = case start_rule of NONE => (C_Term.key_default, start) | SOME (key, start_rule) => (key, fn source => fn _ => start_rule (Input.range_of source)) val (key, pos) = Input.source_content key in ( start , fn env_lang => fn (_, (ast, _, _)) => C_Env.map_context' (accept0 (fn context => pair oo (case Symtab.lookup (Data_Term.get context) key of NONE => tap (fn _ => warning ("Representation function associated to\ \ \"" ^ key ^ "\"" ^ Position.here pos ^ " not found (returning a dummy term)")) (fn _ => fn _ => @{term "()"}) | SOME f => fn ast => fn env_lang => f ast env_lang ctxt)) env_lang ast)) end fun eval_in text context env start_rule = let val (start, accept) = accept (Context.proof_of context) start_rule in C_Context.eval_in (SOME context) env (start text) err accept end fun parse_translation l = l |> map (apsnd (fn start_rule => fn ctxt => fn args => let val msg = (case start_rule of NONE => C_Term.key_default | SOME (key, _) => key) |> Input.source_content |> #1 fun err () = raise TERM (msg, args) in case args of [(c as Const (\<^syntax_const>\_constrain\, _)) $ Free (s, _) $ p] => (case Term_Position.decode_position p of SOME (pos, _) => c $ let val src = uncurry (Input.source false) let val s0 = Symbol_Pos.explode (s, pos) val s = Symbol_Pos.cartouche_content s0 in ( Symbol_Pos.implode s , case s of [] => Position.no_range | (_, pos0) :: _ => Position.range (pos0, s0 |> List.last |> snd)) end in eval_in src (case Context.get_generic_context () of NONE => Context.Proof ctxt | SOME context => Context.mapping I (K ctxt) context) (C_Stack.Data_Lang.get #> (fn NONE => env0 ctxt | SOME (_, env_lang) => env_lang)) start_rule (c_enclose "" "" src) end $ p | NONE => err ()) | _ => err () end)) end fun eval_in text ctxt = C_Context.eval_in ctxt env (start text) err accept fun exec_eval source = Data_In_Source.map (cons source) #> ML_Context.exec (fn () => eval_source source) fun C_prf source = Proof.map_context (Context.proof_map (exec_eval source)) #> Proof.propagate_ml_env fun C_export_boot source context = context |> Config.put_generic ML_Env.ML_environment ML_Env.Isabelle |> Config.put_generic ML_Env.ML_write_global true |> exec_eval source |> Config.restore_generic ML_Env.ML_write_global context |> Config.restore_generic ML_Env.ML_environment context |> Local_Theory.propagate_ml_env val C: Input.source -> Context.generic -> Context.generic = fn source => exec_eval source #> Local_Theory.propagate_ml_env val C': C_Env.env_lang -> Input.source -> Context.generic -> Context.generic = fn env_lang:C_Env.env_lang => fn src:Input.source => fn context:Context.generic => context |> C_Env.empty_env_tree |> C_Context.eval_source' env_lang (fn src => start src context) err accept src |> (fn (_, {context, reports_text, error_lines}) => tap (fn _ => case error_lines of [] => () | l => warning (cat_lines (rev l))) (C_Stack.Data_Tree.map (curry C_Stack.Data_Tree_Args.merge (reports_text, [])) context)) fun C_export_file (pos, _) lthy = let val c_sources = Data_In_Source.get (Context.Proof lthy) val binding = Path.binding ( Path.appends [ Path.basic C_Generated_Files.c_dir , Path.basic (string_of_int (length c_sources)) , lthy |> Proof_Context.theory_of |> Context.theory_name |> Path.explode |> Path.ext C_Generated_Files.c_ext ] , pos) in lthy |> C_Generated_Files.generate_file (binding, rev c_sources |> map (Input.source_content #> #1) |> cat_lines) |> tap (Proof_Context.theory_of #> (fn thy => let val file = Generated_Files.get_file thy binding in Generated_Files.export_file thy file; writeln (Export.message thy Path.current); writeln (prefix " " (Generated_Files.print_file file)) end)) end end \ subsection \Definitions of C11 Directives as C-commands\ subsubsection \Initialization\ ML \ \analogous to \<^theory>\Pure\\ \ structure C_Directive : sig val setup_define: Position.T -> (C_Lex.token list -> string * Position.range -> Context.generic -> C_Lex.token list * Context.generic) -> (string * Position.range -> Context.generic -> Context.generic) -> theory -> theory end = struct local fun directive_update keyword data = C_Context.directive_update keyword (data, K (K (K I))) fun return f (env_cond, env) = ([], (env_cond, f env)) fun directive_update_define pos f_toks f_antiq = directive_update ("define", pos) (return o (fn C_Lex.Define (_, C_Lex.Group1 ([], [tok3]), NONE, C_Lex.Group1 ([], toks)) => let val map_ctxt = case (tok3, toks) of (C_Lex.Token ((pos, _), (C_Lex.Ident, ident)), [C_Lex.Token (_, (C_Lex.Integer (_, C_Lex.Repr_decimal, []), integer))]) => C_Env.map_context (Context.map_theory (Named_Target.theory_map (Specification.definition_cmd (SOME (Binding.make (ident, pos), NONE, NoSyn)) [] [] (Binding.empty_atts, ident ^ " \ " ^ integer) true #> tap (fn ((_, (_, t)), ctxt) => Output.information ("Generating " ^ Pretty.string_of (Syntax.pretty_term ctxt (Thm.prop_of t)) ^ Position.here (Position.range_position ( C_Lex.pos_of tok3 , C_Lex.end_pos_of (List.last toks))))) #> #2))) | _ => I in fn (env_dir, env_tree) => let val name = C_Lex.content_of tok3 val pos = [C_Lex.pos_of tok3] val data = (pos, serial (), (C_Scan.Left (f_toks toks), f_antiq)) in ( Symtab.update (name, data) env_dir , env_tree |> C_Context.markup_directive_define false (C_Ast.Left (data, C_Env_Ext.list_lookup env_dir name)) pos name |> map_ctxt) end end | C_Lex.Define (_, C_Lex.Group1 ([], [tok3]), SOME (C_Lex.Group1 (_ :: toks_bl, _)), _) => tap (fn _ => (* not yet implemented *) warning ("Ignored functional macro directive" ^ Position.here (Position.range_position (C_Lex.pos_of tok3, C_Lex.end_pos_of (List.last toks_bl))))) | _ => I)) in val setup_define = Context.theory_map o C_Context0.Directives.map ooo directive_update_define val _ = Theory.setup (Context.theory_map (C_Context0.Directives.map (directive_update_define \<^here> (K o pair) (K I) #> directive_update ("undef", \<^here>) (return o (fn C_Lex.Undef (C_Lex.Group2 (_, _, [tok])) => (fn (env_dir, env_tree) => let val name = C_Lex.content_of tok val pos1 = [C_Lex.pos_of tok] val data = Symtab.lookup env_dir name in ( (case data of NONE => env_dir | SOME _ => Symtab.delete name env_dir) , C_Context.markup_directive_define true (C_Ast.Right (pos1, data)) pos1 name env_tree) end) | _ => I))))) end end \ subsection \Definitions of C Annotation Commands\ subsubsection \Library\ ML \ \analogous to \<^file>\~~/src/Pure/Isar/toplevel.ML\\ \ structure C_Inner_Toplevel = struct val theory = Context.map_theory fun local_theory' target f gthy = let val (finish, lthy) = Target_Context.switch_named_cmd target gthy; val lthy' = lthy |> Local_Theory.new_group |> f false |> Local_Theory.reset_group; in finish lthy' end val generic_theory = I fun keep'' f = tap (f o Context.proof_of) end \ ML \ \analogous to \<^file>\~~/src/Pure/Isar/isar_cmd.ML\\ \ structure C_Inner_Isar_Cmd = struct (** theory declarations **) (* generic setup *) fun setup0 f_typ f_val src = fn NONE => let val setup = "setup" in C_Context.expression "C_Ast" (Input.range_of src) setup (f_typ "C_Stack.stack_data" "C_Stack.stack_data_elem -> C_Env.env_lang -> Context.generic -> Context.generic") ("fn context => \ \let val (stack, env_lang) = C_Stack.Data_Lang.get' context \ \in " ^ f_val setup "stack" ^ " (stack |> hd) env_lang end context") (ML_Lex.read_source src) end | SOME rule => let val hook = "hook" in C_Context.expression "C_Ast" (Input.range_of src) hook (f_typ "C_Stack.stack_data" (C_Grammar_Rule.type_reduce rule ^ " C_Stack.stack_elem -> C_Env.env_lang -> Context.generic -> Context.generic")) ("fn context => \ \let val (stack, env_lang) = C_Stack.Data_Lang.get' context \ \in " ^ f_val hook "stack" ^ " " ^ "(stack \ \|> hd \ \|> C_Stack.map_svalue0 C_Grammar_Rule.reduce" ^ Int.toString rule ^ ")\ \env_lang \ \end \ \ context") (ML_Lex.read_source src) end val setup = setup0 (fn a => fn b => a ^ " -> " ^ b) (fn a => fn b => a ^ " " ^ b) val setup' = setup0 (K I) K (* print theorems, terms, types etc. *) local fun string_of_term ctxt s = let val t = Syntax.read_term ctxt s; val T = Term.type_of t; val ctxt' = Proof_Context.augment t ctxt; in Pretty.string_of (Pretty.block [Pretty.quote (Syntax.pretty_term ctxt' t), Pretty.fbrk, Pretty.str "::", Pretty.brk 1, Pretty.quote (Syntax.pretty_typ ctxt' T)]) end; fun print_item string_of (modes, arg) ctxt = Print_Mode.with_modes modes (fn () => writeln (string_of ctxt arg)) (); in val print_term = print_item string_of_term; end; end \ ML \ \analogous to \<^file>\~~/src/Pure/Isar/outer_syntax.ML\\ \ structure C_Inner_Syntax = struct val drop1 = fn C_Scan.Left f => C_Scan.Left (K o f) | C_Scan.Right (f, dir) => C_Scan.Right (K o f, dir) val drop2 = fn C_Scan.Left f => C_Scan.Left (K o K o f) | C_Scan.Right (f, dir) => C_Scan.Right (K o K o f, dir) val bottom_up = C_Env.Bottom_up o C_Env.Exec_annotation (**) fun pref_lex name = "#" ^ name val pref_bot = I fun pref_top name = name ^ "\" (**) fun command2' cmd f (pos_bot, pos_top) = let fun cmd' dir = cmd (C_Scan.Right (f, dir)) Keyword.thy_decl in cmd' bottom_up (pref_bot, pos_bot) #> cmd' C_Env.Top_down (pref_top, pos_top) end fun command3' cmd f (pos_lex, pos_bot, pos_top) = cmd (C_Scan.Left f) (pref_lex, pos_lex) #> command2' (K o cmd) f (pos_bot, pos_top) fun command2 cmd f (name, pos_bot, pos_top) = command2' (fn f => fn kind => fn (name_pref, pos) => cmd f kind (name_pref name, pos)) f (pos_bot, pos_top) fun command3 cmd f (name, pos_lex, pos_bot, pos_top) = command3' (fn f => fn (name_pref, pos) => cmd f (name_pref name, pos)) f (pos_lex, pos_bot, pos_top) (**) fun command00 f kind scan name = C_Annotation.command'' kind name "" (case f of C_Scan.Left f => (fn _ => C_Parse.range scan >> (fn (src, range) => C_Env.Lexing (range, f src range))) | C_Scan.Right (f, dir) => fn ((stack1, (to_delay, stack2)), _) => C_Parse.range scan >> (fn (src, range) => C_Env.Parsing ((stack1, stack2), (range, dir (f src range), Symtab.empty, to_delay)))) fun command00_no_range f kind name = C_Annotation.command'' kind name "" (case f of C_Scan.Left f => (fn (_, range) => Scan.succeed () >> K (C_Env.Lexing (range, f range))) | C_Scan.Right (f, dir) => fn ((stack1, (to_delay, stack2)), range) => Scan.succeed () >> K (C_Env.Parsing ((stack1, stack2), (range, dir (f range), Symtab.empty, to_delay)))) (**) fun command' f = command00 (drop1 f) Keyword.thy_decl fun command f scan = command2 (fn f => fn kind => command00 f kind scan) (K o f) fun command_range f = command00_no_range f Keyword.thy_decl val command_range' = command3 (command_range o drop1) fun command_no_range' f = command00_no_range (drop1 f) Keyword.thy_decl fun command_no_range f = command2 command00_no_range (K f) fun command0 f scan = command3 (fn f => command' (drop1 f) scan) f fun local_command' (name, pos_lex, pos_bot, pos_top) scan f = command3' (fn f => fn (name_pref, pos) => command' (drop1 f) (C_Token.syntax' (Parse.opt_target -- scan name_pref)) (name_pref name, pos)) (fn (target, arg) => C_Inner_Toplevel.local_theory' target (f arg)) (pos_lex, pos_bot, pos_top) fun local_command'' spec = local_command' spec o K val command0_no_range = command_no_range' o drop1 fun command0' f kind scan = command3 (fn f => fn (name, pos) => command00 (drop2 f) kind (scan name) (name, pos)) f end \ ML \ \analogous to \<^file>\~~/src/Pure/ML/ml_file.ML\\ \ structure C_Inner_File = struct fun command_c ({lines, pos, ...}: Token.file) = C_Module.C (Input.source true (cat_lines lines) (pos, pos)); fun C files gthy = command_c (hd (files (Context.theory_of gthy))) gthy; fun command_ml environment debug files gthy = let val file: Token.file = hd (files (Context.theory_of gthy)); val source = Token.file_source file; val _ = Document_Output.check_comments (Context.proof_of gthy) (Input.source_explode source); val flags: ML_Compiler.flags = {environment = environment, redirect = true, verbose = true, debug = debug, writeln = writeln, warning = warning}; in gthy |> ML_Context.exec (fn () => ML_Context.eval_source flags source) |> Local_Theory.propagate_ml_env end; val ML = command_ml ""; val SML = command_ml ML_Env.SML; end; \ subsubsection \Initialization\ setup \ \analogous to \<^theory>\Pure\\ \ C_Thy_Header.add_keywords_minor (maps (fn ((name, pos_lex, pos_bot, pos_top), ty) => [ ((C_Inner_Syntax.pref_lex name, pos_lex), ty) , ((C_Inner_Syntax.pref_bot name, pos_bot), ty) , ((C_Inner_Syntax.pref_top name, pos_top), ty) ]) [ (("apply", \<^here>, \<^here>, \<^here>), ((Keyword.prf_script, []), ["proof"])) , (("by", \<^here>, \<^here>, \<^here>), ((Keyword.qed, []), ["proof"])) , (("done", \<^here>, \<^here>, \<^here>), ((Keyword.qed_script, []), ["proof"])) ]) \ ML \ \analogous to \<^theory>\Pure\\ \ local val semi = Scan.option (C_Parse.$$$ ";"); structure C_Isar_Cmd = struct fun ML source = ML_Context.exec (fn () => ML_Context.eval_source (ML_Compiler.verbose true ML_Compiler.flags) source) #> Local_Theory.propagate_ml_env fun theorem schematic ((long, binding, includes, elems, concl), (l_meth, o_meth)) int lthy = (if schematic then Specification.schematic_theorem_cmd else Specification.theorem_cmd) long Thm.theoremK NONE (K I) binding includes elems concl int lthy |> fold (fn m => tap (fn _ => Method.report m) #> Proof.apply m #> Seq.the_result "") l_meth |> (case o_meth of NONE => Proof.global_done_proof | SOME (m1, m2) => tap (fn _ => (Method.report m1; Option.map Method.report m2)) #> Proof.global_terminal_proof (m1, m2)) fun definition (((decl, spec), prems), params) = #2 oo Specification.definition_cmd decl params prems spec fun declare (facts, fixes) = #2 oo Specification.theorems_cmd "" [(Binding.empty_atts, flat facts)] fixes end local val long_keyword = Parse_Spec.includes >> K "" || Parse_Spec.long_statement_keyword; val long_statement = Scan.optional (Parse_Spec.opt_thm_name ":" --| Scan.ahead long_keyword) Binding.empty_atts -- Scan.optional Parse_Spec.includes [] -- Parse_Spec.long_statement >> (fn ((binding, includes), (elems, concl)) => (true, binding, includes, elems, concl)); val short_statement = Parse_Spec.statement -- Parse_Spec.if_statement -- Parse.for_fixes >> (fn ((shows, assumes), fixes) => (false, Binding.empty_atts, [], [Element.Fixes fixes, Element.Assumes assumes], Element.Shows shows)); in fun theorem spec schematic = C_Inner_Syntax.local_command' spec (fn name_pref => (long_statement || short_statement) -- let val apply = Parse.$$$ (name_pref "apply") |-- Method.parse in Scan.repeat1 apply -- (Parse.$$$ (name_pref "done") >> K NONE) || Scan.repeat apply -- (Parse.$$$ (name_pref "by") |-- Method.parse -- Scan.option Method.parse >> SOME) end) (C_Isar_Cmd.theorem schematic) end val opt_modes = Scan.optional (\<^keyword>\(\ |-- Parse.!!! (Scan.repeat1 Parse.name --| \<^keyword>\)\)) []; val _ = Theory.setup ( C_Inner_Syntax.command (C_Inner_Toplevel.generic_theory oo C_Inner_Isar_Cmd.setup) C_Parse.ML_source ("\setup", \<^here>, \<^here>) #> C_Inner_Syntax.command0 (C_Inner_Toplevel.theory o Isar_Cmd.setup) C_Parse.ML_source ("setup", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command0 (C_Inner_Toplevel.generic_theory o C_Isar_Cmd.ML) C_Parse.ML_source ("ML", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command0 (C_Inner_Toplevel.generic_theory o C_Module.C) C_Parse.C_source ("C", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command0' (C_Inner_Toplevel.generic_theory o C_Inner_File.ML NONE) Keyword.thy_load (fn name => C_Resources.parse_files name --| semi) ("ML_file", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command0' (C_Inner_Toplevel.generic_theory o C_Inner_File.C) Keyword.thy_load (fn name => C_Resources.parse_files name --| semi) ("C_file", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command0 (C_Inner_Toplevel.generic_theory o C_Module.C_export_boot) C_Parse.C_source ("C_export_boot", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command_range' (Context.map_theory o Named_Target.theory_map o C_Module.C_export_file) ("C_export_file", \<^here>, \<^here>, \<^here>) #> C_Inner_Syntax.command_no_range (C_Inner_Toplevel.generic_theory oo C_Inner_Isar_Cmd.setup \fn ((_, (_, pos1, pos2)) :: _) => (fn _ => fn _ => tap (fn _ => Position.reports_text [((Position.range (pos1, pos2) |> Position.range_position, Markup.intensify), "")])) | _ => fn _ => fn _ => I\) ("highlight", \<^here>, \<^here>) #> theorem ("theorem", \<^here>, \<^here>, \<^here>) false #> theorem ("lemma", \<^here>, \<^here>, \<^here>) false #> theorem ("corollary", \<^here>, \<^here>, \<^here>) false #> theorem ("proposition", \<^here>, \<^here>, \<^here>) false #> theorem ("schematic_goal", \<^here>, \<^here>, \<^here>) true #> C_Inner_Syntax.local_command'' ("definition", \<^here>, \<^here>, \<^here>) (Scan.option Parse_Spec.constdecl -- (Parse_Spec.opt_thm_name ":" -- Parse.prop) -- Parse_Spec.if_assumes -- Parse.for_fixes) C_Isar_Cmd.definition #> C_Inner_Syntax.local_command'' ("declare", \<^here>, \<^here>, \<^here>) (Parse.and_list1 Parse.thms1 -- Parse.for_fixes) C_Isar_Cmd.declare #> C_Inner_Syntax.command0 (C_Inner_Toplevel.keep'' o C_Inner_Isar_Cmd.print_term) (C_Token.syntax' (opt_modes -- Parse.term)) ("term", \<^here>, \<^here>, \<^here>)) in end \ subsection \Definitions of Outer Classical Commands\ subsubsection \Library\ (* Author: Frédéric Tuong, Université Paris-Saclay *) ML \ \analogously to \<^file>\~~/src/Pure/Isar/parse.ML\\ \ structure C_Outer_Parse = struct - val C_source = Parse.input (Parse.group (fn () => "C source") Parse.text) + val C_source = Parse.input (Parse.group (fn () => "C source") Parse.embedded) end \ ML \ \analogously to \<^file>\~~/src/Pure/Isar/outer_syntax.ML\\ \ structure C_Outer_Syntax = struct val _ = Outer_Syntax.command \<^command_keyword>\C\ "" (C_Outer_Parse.C_source >> (Toplevel.generic_theory o C_Module.C)); end \ ML \ \analogously to \<^file>\~~/src/Pure/Isar/isar_cmd.ML\\ \ structure C_Outer_Isar_Cmd = struct (* diagnostic ML evaluation *) structure Diag_State = Proof_Data ( type T = Toplevel.state option; fun init _ = NONE; ); fun C_diag source state = let val opt_ctxt = try Toplevel.generic_theory_of state |> Option.map (Context.proof_of #> Diag_State.put (SOME state)); in Context.setmp_generic_context (Option.map Context.Proof opt_ctxt) (fn () => C_Module.eval_source source) () end; fun diag_state ctxt = (case Diag_State.get ctxt of SOME st => st | NONE => Toplevel.init_toplevel ()); val diag_goal = Proof.goal o Toplevel.proof_of o diag_state; val _ = Theory.setup (ML_Antiquotation.value (Binding.qualify true "Isar" \<^binding>\C_state\) (Scan.succeed "C_Outer_Isar_Cmd.diag_state ML_context") #> ML_Antiquotation.value (Binding.qualify true "Isar" \<^binding>\C_goal\) (Scan.succeed "C_Outer_Isar_Cmd.diag_goal ML_context")); end \ ML \ \analogously to \<^file>\~~/src/Pure/ML/ml_file.ML\\ \ structure C_Outer_File = struct fun command_c ({src_path, lines, digest, pos}: Token.file) = let val provide = Resources.provide (src_path, digest); in I #> C_Module.C (Input.source true (cat_lines lines) (pos, pos)) #> Context.mapping provide (Local_Theory.background_theory provide) end; fun C files gthy = command_c (hd (files (Context.theory_of gthy))) gthy; end; \ subsubsection \Setup for \<^verbatim>\C\ and \<^verbatim>\C_file\ Command Syntax\ ML \ local val semi = Scan.option \<^keyword>\;\; val _ = Outer_Syntax.command \<^command_keyword>\C_file\ "read and evaluate Isabelle/C file" (Resources.parse_files single --| semi >> (Toplevel.generic_theory o C_Outer_File.C)); val _ = Outer_Syntax.command \<^command_keyword>\C_export_boot\ "C text within theory or local theory, and export to bootstrap environment" (C_Outer_Parse.C_source >> (Toplevel.generic_theory o C_Module.C_export_boot)); val _ = Outer_Syntax.command \<^command_keyword>\C_prf\ "C text within proof" (C_Outer_Parse.C_source >> (Toplevel.proof o C_Module.C_prf)); val _ = Outer_Syntax.command \<^command_keyword>\C_val\ "diagnostic C text" (C_Outer_Parse.C_source >> (Toplevel.keep o C_Outer_Isar_Cmd.C_diag)); val _ = Outer_Syntax.local_theory \<^command_keyword>\C_export_file\ "diagnostic C text" (Scan.succeed () >> K (C_Module.C_export_file Position.no_range)); in end\ subsection \Term-Cartouches for C Syntax\ syntax "_C_translation_unit" :: \cartouche_position \ string\ ("\<^C>\<^sub>u\<^sub>n\<^sub>i\<^sub>t _") syntax "_C_external_declaration" :: \cartouche_position \ string\ ("\<^C>\<^sub>d\<^sub>e\<^sub>c\<^sub>l _") syntax "_C_expression" :: \cartouche_position \ string\ ("\<^C>\<^sub>e\<^sub>x\<^sub>p\<^sub>r _") syntax "_C_statement" :: \cartouche_position \ string\ ("\<^C>\<^sub>s\<^sub>t\<^sub>m\<^sub>t _") syntax "_C" :: \cartouche_position \ string\ ("\<^C> _") parse_translation \ C_Module.C_Term'.parse_translation [ (\<^syntax_const>\_C_translation_unit\, SOME C_Module.C_Term.tok_translation_unit) , (\<^syntax_const>\_C_external_declaration\, SOME C_Module.C_Term.tok_external_declaration) , (\<^syntax_const>\_C_expression\, SOME C_Module.C_Term.tok_expression) , (\<^syntax_const>\_C_statement\, SOME C_Module.C_Term.tok_statement) , (\<^syntax_const>\_C\, NONE) ] \ (*test*) ML\C_Module.env (Context.the_generic_context())\ ML\open Args\ subsection\C-env related ML-Antiquotations as Programming Support\ ML\ (* was in Isabelle2020: (Args.context -- Scan.lift Args.embedded_position >> (fn (ctxt, (name, pos)) => with: val embedded_token = ident || string || cartouche; val embedded_inner_syntax = embedded_token >> Token.inner_syntax_of; val embedded_input = embedded_token >> Token.input_of; val embedded = embedded_token >> Token.content_of; val embedded_position = embedded_input >> Input.source_content; defined in args. Setting it to : (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, (name, pos)) => makes this syntactically more restrictive. *) val _ = Theory.setup( ML_Antiquotation.value_embedded \<^binding>\C\<^sub>e\<^sub>n\<^sub>v\ (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, (name, pos)) => (warning"arg variant not implemented";"C_Module.env (Context.the_generic_context())")) || Scan.succeed "C_Module.env (Context.the_generic_context())")) \ text\Note that this anti-quotation is controlled by the \<^verbatim>\C_starting_env\ - flag. \ declare[[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = last]] ML\@{C\<^sub>e\<^sub>n\<^sub>v}\ declare[[C\<^sub>e\<^sub>n\<^sub>v\<^sub>0 = empty]] ML\@{C\<^sub>e\<^sub>n\<^sub>v}\ subsection\The Standard Store C11-AST's generated from C-commands\ text\Each call of the C command will register the parsed root AST in this theory-name indexed table.\ ML\ structure Root_Ast_Store = Generic_Data (type T = C_Grammar_Rule.ast_generic list Symtab.table val empty = Symtab.empty val merge = K empty); Root_Ast_Store.map: ( C_Grammar_Rule.ast_generic list Symtab.table -> C_Grammar_Rule.ast_generic list Symtab.table) -> Context.generic -> Context.generic; fun update_Root_Ast filter ast _ ctxt = let val theory_id = Context.theory_long_name(Context.theory_of ctxt) val insert_K_ast = Symtab.map_default (theory_id,[]) (cons ast) in case filter ast of NONE => (warning "No appropriate c11 ast found - store unchanged."; ctxt) |SOME _ => (Root_Ast_Store.map insert_K_ast) ctxt end; fun get_Root_Ast filter thy = let val ctxt = Context.Theory thy val thid = Context.theory_long_name(Context.theory_of ctxt) val ast = case Symtab.lookup (Root_Ast_Store.get ctxt) (thid) of SOME (a::_) => (case filter a of NONE => error "Last C command is not of appropriate AST-class." | SOME x => x) | _ => error"No C command in the current theory." in ast end val get_CExpr = get_Root_Ast C_Grammar_Rule.get_CExpr; val get_CStat = get_Root_Ast C_Grammar_Rule.get_CStat; val get_CExtDecl = get_Root_Ast C_Grammar_Rule.get_CExtDecl; val get_CTranslUnit = get_Root_Ast C_Grammar_Rule.get_CTranslUnit; \ setup \Context.theory_map (C_Module.Data_Accept.put (update_Root_Ast SOME))\ ML\ (* Was : Args.embedded_position changed to : Args.name_position. See comment above. *) val _ = Theory.setup( ML_Antiquotation.value_embedded \<^binding>\C11_CTranslUnit\ (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, (name, pos)) => (warning"arg variant not implemented";"get_CTranslUnit (Context.the_global_context())")) || Scan.succeed "get_CTranslUnit (Context.the_global_context())") #> ML_Antiquotation.value_embedded \<^binding>\C11_CExtDecl\ (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, (name, pos)) => (warning"arg variant not implemented";"get_CExtDecl (Context.the_global_context())")) || Scan.succeed "get_CExtDecl (Context.the_global_context())") #> ML_Antiquotation.value_embedded \<^binding>\C11_CStat\ (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, (name, pos)) => (warning"arg variant not implemented";"get_CStat (Context.the_global_context())")) || Scan.succeed "get_CStat (Context.the_global_context())") #> ML_Antiquotation.value_embedded \<^binding>\C11_CExpr\ (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, (name, pos)) => (warning"arg variant not implemented";"get_CExpr (Context.the_global_context())")) || Scan.succeed "get_CExpr (Context.the_global_context())") ) \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Document.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Document.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Document.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Document.thy @@ -1,232 +1,231 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Support for Document Preparation: Text-Antioquotations.\ theory C_Document imports C_Command begin ML \ \analogous to \<^file>\~~/src/Pure/Thy/document_output.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Text Antiquotations and Theory document output. *) \ structure C_Document_Output = struct (* output document source *) fun output_comment ctxt (kind, syms) = (case kind of Comment.Comment => Input.cartouche_content syms |> output_document (ctxt |> Config.put Document_Antiquotation.thy_output_display false) {markdown = false} |> XML.enclose "%\n\\isamarkupcmt{" "%\n}" | Comment.Cancel => Symbol_Pos.cartouche_content syms |> Latex.symbols_output |> XML.enclose "%\n\\isamarkupcancel{" "}" | Comment.Latex => Latex.symbols (Symbol_Pos.cartouche_content syms) | Comment.Marker => []) and output_comment_document ctxt (comment, syms) = (case comment of SOME kind => output_comment ctxt (kind, syms) | NONE => Latex.symbols syms) and output_document_text ctxt syms = Comment.read_body syms |> maps (output_comment_document ctxt) and output_document ctxt {markdown} source = let val pos = Input.pos_of source; val syms = Input.source_explode source; val output_antiquotes = maps (Document_Antiquotation.evaluate (output_document_text ctxt) ctxt); fun output_line line = (if Markdown.line_is_item line then Latex.string "\\item " else []) @ output_antiquotes (Markdown.line_content line); fun output_block (Markdown.Par lines) = separate (XML.Text "\n") (map (Latex.block o output_line) lines) | output_block (Markdown.List {kind, body, ...}) = Latex.environment (Markdown.print_kind kind) (output_blocks body) and output_blocks blocks = separate (XML.Text "\n\n") (map (Latex.block o output_block) blocks); in if Toplevel.is_skipped_proof (Toplevel.presentation_state ctxt) then [] else if markdown andalso exists (Markdown.is_control o Symbol_Pos.symbol) syms then let val ants = Antiquote.parse_comments pos syms; val reports = Antiquote.antiq_reports ants; val blocks = Markdown.read_antiquotes ants; val _ = Context_Position.reports ctxt (reports @ Markdown.reports blocks); in output_blocks blocks end else let val ants = Antiquote.parse_comments pos (trim (Symbol.is_blank o Symbol_Pos.symbol) syms); val reports = Antiquote.antiq_reports ants; val _ = Context_Position.reports ctxt (reports @ Markdown.text_reports ants); in output_antiquotes ants end end; (* output tokens with formal comments *) local val output_symbols_antiq = (fn Antiquote.Text syms => Latex.symbols_output syms | Antiquote.Control {name = (name, _), body, ...} => Latex.string (Latex.output_symbols [Symbol.encode (Symbol.Control name)]) @ Latex.symbols_output body | Antiquote.Antiq {body, ...} => XML.enclose "%\n\\isaantiq\n" "{}%\n\\endisaantiq\n" (Latex.symbols_output body)); fun output_comment_symbols ctxt {antiq} (comment, syms) = (case (comment, antiq) of (NONE, false) => Latex.symbols_output syms | (NONE, true) => Antiquote.parse_comments (#1 (Symbol_Pos.range syms)) syms |> maps output_symbols_antiq | (SOME comment, _) => output_comment ctxt (comment, syms)); fun output_body ctxt antiq bg en syms = Comment.read_body syms |> maps (output_comment_symbols ctxt {antiq = antiq}) |> XML.enclose bg en; in fun output_token ctxt tok = let fun output antiq bg en = output_body ctxt antiq bg en (Input.source_explode (C_Token.input_of tok)); in (case C_Token.kind_of tok of Token.Comment NONE => [] | Token.Comment (SOME Comment.Marker) => [] | Token.Command => output false "\\isacommand{" "}" | Token.Keyword => if Symbol.is_ascii_identifier (C_Token.content_of tok) then output false "\\isakeyword{" "}" else output false "" "" | Token.String => output false "{\\isachardoublequoteopen}" "{\\isachardoublequoteclose}" | Token.Alt_String => output false "{\\isacharbackquoteopen}" "{\\isacharbackquoteclose}" - | Token.Verbatim => output true "{\\isacharverbatimopen}" "{\\isacharverbatimclose}" | Token.Cartouche => output false "{\\isacartoucheopen}" "{\\isacartoucheclose}" | _ => output false "" "") end handle ERROR msg => error (msg ^ Position.here (C_Token.pos_of tok)); end; end; \ ML \ \\<^file>\~~/src/Pure/Thy/document_antiquotations.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Thy/document_antiquotations.ML Author: Makarius Miscellaneous document antiquotations. *) \ structure C_Document_Antiquotations = struct (* quasi-formal text (unchecked) *) local fun report_text ctxt text = let val pos = Input.pos_of text in Context_Position.reports ctxt [(pos, Markup.language_text (Input.is_delimited text)), (pos, Markup.raw_text)] end; fun prepare_text ctxt = Input.source_content #> #1 #> Document_Antiquotation.prepare_lines ctxt; val theory_text_antiquotation = - Document_Output.antiquotation_raw_embedded \<^binding>\C_theory_text\ (Scan.lift Args.text_input) + Document_Output.antiquotation_raw_embedded \<^binding>\C_theory_text\ (Scan.lift Parse.embedded_input) (fn ctxt => fn text => let val keywords = C_Thy_Header.get_keywords' ctxt; val _ = report_text ctxt text; val _ = Input.source_explode text |> C_Token.tokenize keywords {strict = true} |> maps (C_Token.reports keywords) |> Context_Position.reports_text ctxt; in prepare_text ctxt text |> C_Token.explode0 keywords |> maps (C_Document_Output.output_token ctxt) |> Document_Output.isabelle ctxt end); in val _ = Theory.setup theory_text_antiquotation; end; (* C text *) local fun c_text name c = - Document_Output.antiquotation_verbatim_embedded name (Scan.lift Args.text_input) + Document_Output.antiquotation_verbatim_embedded name (Scan.lift Parse.embedded_input) (fn ctxt => fn text => let val _ = C_Module.eval_in text (SOME (Context.Proof ctxt)) (c text) in #1 (Input.source_content text) end); in val _ = Theory.setup (c_text \<^binding>\C\ (C_Module.c_enclose "" "") #> c_text \<^binding>\C_text\ (K C_Lex.read_init)); end; end; \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Lexer_Annotation.thy @@ -1,1422 +1,1396 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Annotation Language: Parsing Combinator\ theory C_Lexer_Annotation imports C_Lexer_Language begin ML \ \\<^file>\~~/src/Pure/Isar/keyword.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Isar/keyword.ML Author: Makarius Isar keyword classification. *) \ structure C_Keyword = struct (** keyword classification **) (* kinds *) val command_kinds = [Keyword.diag, Keyword.document_heading, Keyword.document_body, Keyword.document_raw, Keyword.thy_begin, Keyword.thy_end, Keyword.thy_load, Keyword.thy_decl, Keyword.thy_decl_block, Keyword.thy_defn, Keyword.thy_stmt, Keyword.thy_goal, Keyword.thy_goal_defn, Keyword.thy_goal_stmt, Keyword.qed, Keyword.qed_script, Keyword.qed_block, Keyword.qed_global, Keyword.prf_goal, Keyword.prf_block, Keyword.next_block, Keyword.prf_open, Keyword.prf_close, Keyword.prf_chain, Keyword.prf_decl, Keyword.prf_asm, Keyword.prf_asm_goal, Keyword.prf_script, Keyword.prf_script_goal, Keyword.prf_script_asm_goal]; (* specifications *) type entry = {pos: Position.T, id: serial, kind: string, files: string list, (*extensions of embedded files*) tags: string list}; fun check_spec pos ((kind, files), tags) : entry = if not (member (op =) command_kinds kind) then error ("Unknown annotation syntax keyword kind " ^ quote kind) else if not (null files) andalso kind <> Keyword.thy_load then error ("Illegal specification of files for " ^ quote kind) else {pos = pos, id = serial (), kind = kind, files = files, tags = tags}; (** keyword tables **) (* type keywords *) datatype keywords = Keywords of {minor: Scan.lexicon, major: Scan.lexicon, commands: entry Symtab.table}; fun minor_keywords (Keywords {minor, ...}) = minor; fun major_keywords (Keywords {major, ...}) = major; fun make_keywords (minor, major, commands) = Keywords {minor = minor, major = major, commands = commands}; fun map_keywords f (Keywords {minor, major, commands}) = make_keywords (f (minor, major, commands)); (* build keywords *) val empty_keywords = make_keywords (Scan.empty_lexicon, Scan.empty_lexicon, Symtab.empty); fun empty_keywords' minor = make_keywords (minor, Scan.empty_lexicon, Symtab.empty); fun merge_keywords (Keywords {minor = minor1, major = major1, commands = commands1}, Keywords {minor = minor2, major = major2, commands = commands2}) = make_keywords (Scan.merge_lexicons (minor1, minor2), Scan.merge_lexicons (major1, major2), Symtab.merge (K true) (commands1, commands2)); val add_keywords0 = fold (fn ((name, pos), force_minor, spec as ((kind, _), _)) => map_keywords (fn (minor, major, commands) => let val extend = Scan.extend_lexicon (Symbol.explode name) fun update spec = Symtab.update (name, spec) in if force_minor then (extend minor, major, update (check_spec pos spec) commands) else if kind = "" orelse kind = Keyword.before_command orelse kind = Keyword.quasi_command then (extend minor, major, commands) else (minor, extend major, update (check_spec pos spec) commands) end)); val add_keywords = add_keywords0 o map (fn (cmd, spec) => (cmd, false, spec)) val add_keywords_minor = add_keywords0 o map (fn (cmd, spec) => (cmd, true, spec)) (* keyword status *) fun is_command (Keywords {commands, ...}) = Symtab.defined commands; fun dest_commands (Keywords {commands, ...}) = Symtab.keys commands; (* command keywords *) fun lookup_command (Keywords {commands, ...}) = Symtab.lookup commands; fun command_markup keywords name = let (* PATCH: copied as such from Isabelle2020 *) fun entity_properties_of def serial pos = if def then (Markup.defN, Value.print_int serial) :: Position.properties_of pos else (Markup.refN, Value.print_int serial) :: Position.def_properties_of pos; in lookup_command keywords name |> Option.map (fn {pos, id, ...} => Markup.properties (entity_properties_of false id pos) (Markup.entity Markup.command_keywordN name)) end; fun command_files keywords name path = (case lookup_command keywords name of NONE => [] | SOME {kind, files, ...} => if kind <> Keyword.thy_load then [] else if null files then [path] else map (fn ext => Path.ext ext path) files); (* command categories *) fun command_category ks = let val tab = Symtab.make_set ks; fun pred keywords name = (case lookup_command keywords name of NONE => false | SOME {kind, ...} => Symtab.defined tab kind); in pred end; val is_theory_end = command_category [Keyword.thy_end]; val is_proof_asm = command_category [Keyword.prf_asm, Keyword.prf_asm_goal]; val is_improper = command_category [ Keyword.qed_script , Keyword.prf_script , Keyword.prf_script_goal , Keyword.prf_script_asm_goal]; end; \ text \ Notes: \<^item> The next structure contains a duplicated copy of the type \<^ML_type>\Token.T\, since it is not possible to set an arbitrary \<^emph>\slot\ value in \<^ML_structure>\Token\. \<^item> Parsing priorities in C and HOL slightly differ, see for instance \<^ML>\Token.explode\. \ ML \ \\<^file>\~~/src/Pure/Isar/token.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Isar/token.ML Author: Markus Wenzel, TU Muenchen Outer token syntax for Isabelle/Isar. *) \ structure C_Token = struct (** tokens **) (* token kind *) val immediate_kinds' = fn Token.Command => 0 | Token.Keyword => 1 | Token.Ident => 2 | Token.Long_Ident => 3 | Token.Sym_Ident => 4 | Token.Var => 5 | Token.Type_Ident => 6 | Token.Type_Var => 7 | Token.Nat => 8 | Token.Float => 9 | Token.Space => 10 | _ => ~1 val delimited_kind = (fn Token.String => true | Token.Alt_String => true - | Token.Verbatim => true | Token.Cartouche => true | Token.Comment _ => true | _ => false); (* datatype token *) (*The value slot assigns an (optional) internal value to a token, usually as a side-effect of special scanner setup (see also args.ML). Note that an assignable ref designates an intermediate state of internalization -- it is NOT meant to persist.*) datatype T = Token of (Symbol_Pos.text * Position.range) * (Token.kind * string) * slot and slot = Slot | Value of value option | Assignable of value option Unsynchronized.ref and value = Source of T list | Literal of bool * Markup.T | Name of Token.name_value * morphism | Typ of typ | Term of term | Fact of string option * thm list | (*optional name for dynamic fact, i.e. fact "variable"*) Attribute of morphism -> attribute | Declaration of declaration | Files of Token.file Exn.result list; type src = T list; (* position *) fun pos_of (Token ((_, (pos, _)), _, _)) = pos; fun end_pos_of (Token ((_, (_, pos)), _, _)) = pos; fun adjust_offsets adjust (Token ((x, range), y, z)) = Token ((x, apply2 (Position.adjust_offsets adjust) range), y, z); (* stopper *) fun mk_eof pos = Token (("", (pos, Position.none)), (Token.EOF, ""), Slot); val eof = mk_eof Position.none; fun is_eof (Token (_, (Token.EOF, _), _)) = true | is_eof _ = false; val not_eof = not o is_eof; val stopper = Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; (* kind of token *) fun kind_of (Token (_, (k, _), _)) = k; fun is_kind k (Token (_, (k', _), _)) = k = k'; val is_command = is_kind Token.Command; fun keyword_with pred (Token (_, (Token.Keyword, x), _)) = pred x | keyword_with _ _ = false; val is_command_modifier = keyword_with (fn x => x = "private" orelse x = "qualified"); fun ident_with pred (Token (_, (Token.Ident, x), _)) = pred x | ident_with _ _ = false; fun is_ignored (Token (_, (Token.Space, _), _)) = true | is_ignored (Token (_, (Token.Comment NONE, _), _)) = true | is_ignored _ = false; fun is_proper (Token (_, (Token.Space, _), _)) = false | is_proper (Token (_, (Token.Comment _, _), _)) = false | is_proper _ = true; fun is_comment (Token (_, (Token.Comment _, _), _)) = true | is_comment _ = false; fun is_informal_comment (Token (_, (Token.Comment NONE, _), _)) = true | is_informal_comment _ = false; fun is_formal_comment (Token (_, (Token.Comment (SOME _), _), _)) = true | is_formal_comment _ = false; fun is_document_marker (Token (_, (Token.Comment (SOME Comment.Marker), _), _)) = true | is_document_marker _ = false; fun is_begin_ignore (Token (_, (Token.Comment NONE, "<"), _)) = true | is_begin_ignore _ = false; fun is_end_ignore (Token (_, (Token.Comment NONE, ">"), _)) = true | is_end_ignore _ = false; fun is_error (Token (_, (Token.Error _, _), _)) = true | is_error _ = false; fun is_error' (Token (_, (Token.Error msg, _), _)) = SOME msg | is_error' _ = NONE; fun content_of (Token (_, (_, x), _)) = x; fun content_of' (Token (_, (_, _), Value (SOME (Source l)))) = map (fn Token ((_, (pos, _)), (_, x), _) => (x, pos)) l | content_of' _ = []; val is_stack1 = fn Token (_, (Token.Sym_Ident, _), Value (SOME (Source l))) => forall (fn tok => content_of tok = "+") l | _ => false; val is_stack2 = fn Token (_, (Token.Sym_Ident, _), Value (SOME (Source l))) => forall (fn tok => content_of tok = "@") l | _ => false; val is_stack3 = fn Token (_, (Token.Sym_Ident, _), Value (SOME (Source l))) => forall (fn tok => content_of tok = "&") l | _ => false; (* blanks and newlines -- space tokens obey lines *) fun is_space (Token (_, (Space, _), _)) = true | is_space _ = false; fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) | is_blank _ = false; fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x | is_newline _ = false; (* range of tokens *) fun range_of (toks as tok :: _) = let val pos' = end_pos_of (List.last toks) in Position.range (pos_of tok, pos') end | range_of [] = Position.no_range; val core_range_of = drop_prefix is_ignored #> drop_suffix is_ignored #> range_of; (* token content *) fun content_of (Token (_, (_, x), _)) = x; fun source_of (Token ((source, _), _, _)) = source; fun input_of (Token ((source, range), (kind, _), _)) = Input.source (delimited_kind kind) source range; fun inner_syntax_of tok = let val x = content_of tok in if YXML.detect x then x else Syntax.implode_input (input_of tok) end; (* markup reports *) local val token_kind_markup = fn Token.Var => (Markup.var, "") | Token.Type_Ident => (Markup.tfree, "") | Token.Type_Var => (Markup.tvar, "") | Token.String => (Markup.string, "") | Token.Alt_String => (Markup.alt_string, "") - | Token.Verbatim => (Markup.verbatim, "") | Token.Cartouche => (Markup.cartouche, "") | Token.Comment _ => (Markup.ML_comment, "") | Token.Error msg => (Markup.bad (), msg) | _ => (Markup.empty, ""); fun keyword_reports tok = map (fn markup => ((pos_of tok, markup), "")); fun command_markups keywords x = if C_Keyword.is_theory_end keywords x then [Markup.keyword2 |> Markup.keyword_properties] else (if C_Keyword.is_proof_asm keywords x then [Markup.keyword3] else if C_Keyword.is_improper keywords x then [Markup.keyword1, Markup.improper] else [Markup.keyword1]) |> map Markup.command_properties; fun keyword_markup (important, keyword) x = if important orelse Symbol.is_ascii_identifier x then keyword else Markup.delimiter; fun command_minor_markups keywords x = if C_Keyword.is_theory_end keywords x then [Markup.keyword2 |> Markup.keyword_properties] else (if C_Keyword.is_proof_asm keywords x then [Markup.keyword3] else if C_Keyword.is_improper keywords x then [Markup.keyword1, Markup.improper] else if C_Keyword.is_command keywords x then [Markup.keyword1] else [keyword_markup (false, Markup.keyword2 |> Markup.keyword_properties) x]); in fun completion_report tok = if is_kind Token.Keyword tok then map (fn m => ((pos_of tok, m), "")) (Completion.suppress_abbrevs (content_of tok)) else []; fun reports keywords tok = if is_command tok then keyword_reports tok (command_markups keywords (content_of tok)) else if is_stack1 tok orelse is_stack2 tok orelse is_stack3 tok then keyword_reports tok [Markup.keyword2 |> Markup.keyword_properties] else if is_kind Token.Keyword tok then keyword_reports tok (command_minor_markups keywords (content_of tok)) else let val pos = pos_of tok; val (m, text) = token_kind_markup (kind_of tok); val delete = (Symbol_Pos.explode_deleted (source_of tok, pos)); in ((pos, m), text) :: map (fn p => ((p, Markup.delete), "")) delete end; fun markups keywords = map (#2 o #1) o reports keywords; end; (* unparse *) fun unparse' (Token ((source0, _), (kind, x), _)) = let val source = \ \ We are computing a reverse function of \<^ML>\Symbol_Pos.implode_range\ taking into account consecutive \<^ML>\Symbol.DEL\ symbols potentially appearing at the beginning, or at the end of the string. As remark, \<^ML>\Symbol_Pos.explode_deleted\ will remove any potentially consecutive \<^ML>\Symbol.DEL\ symbols. This is why it is not used here.\ case Symbol.explode source0 of x :: xs => if x = Symbol.DEL then case rev xs of x' :: xs => if x' = Symbol.DEL then implode (rev xs) else source0 | _ => source0 else source0 | _ => source0 in case kind of Token.String => Symbol_Pos.quote_string_qq source | Token.Alt_String => Symbol_Pos.quote_string_bq source - | Token.Verbatim => enclose "{*" "*}" source | Token.Cartouche => cartouche source | Token.Comment NONE => enclose "(*" "*)" source | Token.EOF => "" | _ => x end; fun text_of tok = let val k = Token.str_of_kind (kind_of tok); val ms = markups C_Keyword.empty_keywords tok; val s = unparse' tok; in if s = "" then (k, "") else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ Markup.markups ms s, "") else (k, Markup.markups ms s) end; (** associated values **) (* inlined file content *) fun file_source (file: Token.file) = let val text = cat_lines (#lines file); val end_pos = fold Position.symbol (Symbol.explode text) (#pos file); in Input.source true text (Position.range (#pos file, end_pos)) end; fun get_files (Token (_, _, Value (SOME (Files files)))) = files | get_files _ = []; fun put_files [] tok = tok | put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files))) | put_files _ tok = raise Fail ("Cannot put inlined files here" ^ Position.here (pos_of tok)); (* access values *) (* reports of value *) (* name value *) (* maxidx *) (* fact values *) (* transform *) (* static binding *) (*1st stage: initialize assignable slots*) fun init_assignable tok = (case tok of Token (x, y, Slot) => Token (x, y, Assignable (Unsynchronized.ref NONE)) | Token (_, _, Value _) => tok | Token (_, _, Assignable r) => (r := NONE; tok)); (*2nd stage: assign values as side-effect of scanning*) fun assign v tok = (case tok of Token (x, y, Slot) => Token (x, y, Value v) | Token (_, _, Value _) => tok | Token (_, _, Assignable r) => (r := v; tok)); fun evaluate mk eval arg = let val x = eval arg in (assign (SOME (mk x)) arg; x) end; (*3rd stage: static closure of final values*) fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) | closure tok = tok; (* pretty *) (* src *) (** scanners **) open Basic_Symbol_Pos; val err_prefix = "Annotation lexical error: "; fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); (* scan stack *) fun scan_stack is_stack = Scan.optional (Scan.one is_stack >> content_of') [] (* scan symbolic idents *) val scan_symid = Scan.many1 (Symbol.is_symbolic_char o Symbol_Pos.symbol) || Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; fun is_symid str = (case try Symbol.explode str of SOME [s] => Symbol.is_symbolic s orelse Symbol.is_symbolic_char s | SOME ss => forall Symbol.is_symbolic_char ss | _ => false); fun ident_or_symbolic "begin" = false | ident_or_symbolic ":" = true | ident_or_symbolic "::" = true | ident_or_symbolic s = Symbol_Pos.is_identifier s orelse is_symid s; -(* scan verbatim text *) - -val scan_verb = - $$$ "*" --| Scan.ahead (~$$ "}") || - Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single; - -val scan_verbatim = - Scan.ahead ($$ "{" -- $$ "*") |-- - !!! "unclosed verbatim text" - ((Symbol_Pos.scan_pos --| $$ "{" --| $$ "*") -- - (Scan.repeats scan_verb -- ($$ "*" |-- $$ "}" |-- Symbol_Pos.scan_pos))); - -val recover_verbatim = - $$$ "{" @@@ $$$ "*" @@@ Scan.repeats scan_verb; - - (* scan cartouche *) val scan_cartouche = Symbol_Pos.scan_pos -- ((Symbol_Pos.scan_cartouche err_prefix >> Symbol_Pos.cartouche_content) -- Symbol_Pos.scan_pos); (* scan space *) fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n"; val scan_space = Scan.many1 space_symbol @@@ Scan.optional ($$$ "\n") [] || Scan.many space_symbol @@@ $$$ "\n"; (* scan comment *) val scan_comment = Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body err_prefix -- Symbol_Pos.scan_pos); (** token sources **) local fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; fun token k ss = Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot); fun token' (mk_value, k) ss = if mk_value then Token ( (Symbol_Pos.implode ss, Symbol_Pos.range ss) , (k, Symbol_Pos.content ss) , Value (SOME (Source (map (fn (s, pos) => Token (("", (pos, Position.none)), (k, s), Slot)) ss)))) else token k ss; fun token_t k = token' (true, k) fun token_range k (pos1, (ss, pos2)) = Token (Symbol_Pos.implode_range (pos1, pos2) ss, (k, Symbol_Pos.content ss), Slot); fun scan_token keywords = !!! "bad input" (Symbol_Pos.scan_string_qq err_prefix >> token_range Token.String || Symbol_Pos.scan_string_bq err_prefix >> token_range Token.Alt_String || - scan_verbatim >> token_range Token.Verbatim || scan_cartouche >> token_range Token.Cartouche || scan_comment >> token_range (Token.Comment NONE) || Comment.scan_outer >> (fn (k, ss) => token (Token.Comment (SOME k)) ss) || scan_space >> token Token.Space || Scan.repeats1 ($$$ "+") >> token_t Token.Sym_Ident || Scan.repeats1 ($$$ "@") >> token_t Token.Sym_Ident || Scan.repeats1 ($$$ "&") >> token_t Token.Sym_Ident || (Scan.max token_leq (Scan.max token_leq (Scan.literal (C_Keyword.major_keywords keywords) >> pair Token.Command) (Scan.literal (C_Keyword.minor_keywords keywords) >> pair Token.Keyword)) (Lexicon.scan_longid >> pair Token.Long_Ident || Scan.max token_leq (C_Lex.scan_ident >> pair Token.Ident) (Lexicon.scan_id >> pair Token.Ident) || Lexicon.scan_var >> pair Token.Var || Lexicon.scan_tid >> pair Token.Type_Ident || Lexicon.scan_tvar >> pair Token.Type_Var || Symbol_Pos.scan_float >> pair Token.Float || Symbol_Pos.scan_nat >> pair Token.Nat || scan_symid >> pair Token.Sym_Ident)) >> uncurry (token' o pair false)); fun recover msg = (Symbol_Pos.recover_string_qq || Symbol_Pos.recover_string_bq || - recover_verbatim || Symbol_Pos.recover_cartouche || Symbol_Pos.recover_comment || Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single) >> (single o token (Token.Error msg)); in fun make_source keywords {strict} = let val scan_strict = Scan.bulk (scan_token keywords); val scan = if strict then scan_strict else Scan.recover scan_strict recover; in Source.source Symbol_Pos.stopper scan end; end; (* explode *) fun tokenize keywords strict syms = Source.of_list syms |> make_source keywords strict |> Source.exhaust; fun explode keywords pos text = Symbol_Pos.explode (text, pos) |> tokenize keywords {strict = false}; fun explode0 keywords = explode keywords Position.none; (* print name in parsable form *) (* make *) (** parsers **) type 'a parser = T list -> 'a * T list; type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list); (* read body -- e.g. antiquotation source *) fun read_with_commands'0 keywords syms = Source.of_list syms |> make_source keywords {strict = false} |> Source.filter (not o is_proper) |> Source.exhaust fun read_with_commands' keywords scan syms = Source.of_list syms |> make_source keywords {strict = false} |> Source.filter is_proper |> Source.source stopper (Scan.recover (Scan.bulk scan) (fn msg => Scan.one (not o is_eof) >> (fn tok => [C_Scan.Right let val msg = case is_error' tok of SOME msg0 => msg0 ^ " (" ^ msg ^ ")" | NONE => msg in ( msg , [((pos_of tok, Markup.bad ()), msg)] , tok) end]))) |> Source.exhaust; fun read_antiq' keywords scan = read_with_commands' keywords (scan >> C_Scan.Left); (* wrapped syntax *) local fun make src pos = Token.make src pos |> #1 fun make_default text pos = make ((~1, 0), text) pos fun explode keywords pos text = case Token.explode keywords pos text of [tok] => tok | _ => make_default text pos in fun syntax' f = I #> map (fn tok0 as Token ((source, (pos1, pos2)), (kind, x), _) => if is_stack1 tok0 orelse is_stack2 tok0 orelse is_stack3 tok0 then make_default source pos1 else if is_eof tok0 then Token.eof else if delimited_kind kind then explode Keyword.empty_keywords pos1 (unparse' tok0) else let val tok1 = explode ((case kind of Token.Keyword => Keyword.add_keywords [((x, Position.none), Keyword.no_spec)] | Token.Command => Keyword.add_keywords [( (x, Position.none), Keyword.command_spec(Keyword.thy_decl, []))] | _ => I) Keyword.empty_keywords) pos1 source in if Token.kind_of tok1 = kind then tok1 else make ( ( immediate_kinds' kind , case Position.distance_of (pos1, pos2) of NONE => 0 | SOME i => i) , source) pos1 end) #> f #> apsnd (map (fn tok => Token ( (Token.source_of tok, Token.range_of [tok]) , (Token.kind_of tok, Token.content_of tok) , Slot))) end end; type 'a c_parser = 'a C_Token.parser; type 'a c_context_parser = 'a C_Token.context_parser; \ ML \ \\<^file>\~~/src/Pure/Isar/parse.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay parsers for C syntax. A partial copy is unfortunately necessary due to signature restrictions. *) (* based on: Title: Pure/Isar/parse.ML Author: Markus Wenzel, TU Muenchen Generic parsers for Isabelle/Isar outer syntax. *) \ signature C_PARSE = sig type T type src = T list type 'a parser = T list -> 'a * T list type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list) (**) val C_source: Input.source parser val star: string parser (**) val group: (unit -> string) -> (T list -> 'a) -> T list -> 'a val !!! : (T list -> 'a) -> T list -> 'a val !!!! : (T list -> 'a) -> T list -> 'a val not_eof: T parser val token: 'a parser -> T parser val range: 'a parser -> ('a * Position.range) parser val position: 'a parser -> ('a * Position.T) parser val input: 'a parser -> Input.source parser val inner_syntax: 'a parser -> string parser val command: string parser val keyword: string parser val short_ident: string parser val long_ident: string parser val sym_ident: string parser val dots: string parser val minus: string parser val term_var: string parser val type_ident: string parser val type_var: string parser val number: string parser val float_number: string parser val string: string parser val string_position: (string * Position.T) parser val alt_string: string parser - val verbatim: string parser val cartouche: string parser val eof: string parser val command_name: string -> string parser val keyword_with: (string -> bool) -> string parser val keyword_markup: bool * Markup.T -> string -> string parser val keyword_improper: string -> string parser val $$$ : string -> string parser val reserved: string -> string parser val underscore: string parser val maybe: 'a parser -> 'a option parser val maybe_position: ('a * Position.T) parser -> ('a option * Position.T) parser val opt_keyword: string -> bool parser val opt_bang: bool parser val begin: string parser val opt_begin: bool parser val nat: int parser val int: int parser val real: real parser val enum_positions: string -> 'a parser -> ('a list * Position.T list) parser val enum1_positions: string -> 'a parser -> ('a list * Position.T list) parser val enum: string -> 'a parser -> 'a list parser val enum1: string -> 'a parser -> 'a list parser val and_list: 'a parser -> 'a list parser val and_list1: 'a parser -> 'a list parser val enum': string -> 'a context_parser -> 'a list context_parser val enum1': string -> 'a context_parser -> 'a list context_parser val and_list': 'a context_parser -> 'a list context_parser val and_list1': 'a context_parser -> 'a list context_parser val list: 'a parser -> 'a list parser val list1: 'a parser -> 'a list parser val name: string parser val name_range: (string * Position.range) parser val name_position: (string * Position.T) parser val binding: binding parser val embedded: string parser val embedded_input: Input.source parser val embedded_position: (string * Position.T) parser - val text: string parser val path: string parser val path_binding: (string * Position.T) parser val session_name: (string * Position.T) parser val theory_name: (string * Position.T) parser val liberal_name: string parser val parname: string parser val parbinding: binding parser val class: string parser val sort: string parser val type_const: string parser val arity: (string * string list * string) parser val multi_arity: (string list * string list * string) parser val type_args: string list parser val type_args_constrained: (string * string option) list parser val typ: string parser val mixfix: mixfix parser val mixfix': mixfix parser val opt_mixfix: mixfix parser val opt_mixfix': mixfix parser val syntax_mode: Syntax.mode parser val where_: string parser val const_decl: (string * string * mixfix) parser val const_binding: (binding * string * mixfix) parser val params: (binding * string option * mixfix) list parser val vars: (binding * string option * mixfix) list parser val for_fixes: (binding * string option * mixfix) list parser val ML_source: Input.source parser val document_source: Input.source parser val document_marker: Input.source parser val const: string parser val term: string parser val prop: string parser val literal_fact: string parser val propp: (string * string list) parser val termp: (string * string list) parser val private: Position.T parser val qualified: Position.T parser val target: (string * Position.T) parser val opt_target: (string * Position.T) option parser val args: T list parser val args1: (string -> bool) -> T list parser val attribs: src list parser val opt_attribs: src list parser val thm_sel: Facts.interval list parser val thm: (Facts.ref * src list) parser val thms1: (Facts.ref * src list) list parser val options: ((string * Position.T) * (string * Position.T)) list parser end; structure C_Parse: C_PARSE = struct type T = C_Token.T type src = T list type 'a parser = T list -> 'a * T list type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list) structure Token = struct open Token open C_Token end (** error handling **) (* group atomic parsers (no cuts!) *) fun group s scan = scan || Scan.fail_with (fn [] => (fn () => s () ^ " expected,\nbut end-of-input was found") | tok :: _ => (fn () => (case Token.text_of tok of (txt, "") => s () ^ " expected,\nbut " ^ txt ^ Position.here (Token.pos_of tok) ^ " was found" | (txt1, txt2) => s () ^ " expected,\nbut " ^ txt1 ^ Position.here (Token.pos_of tok) ^ " was found:\n" ^ txt2))); (* cut *) fun cut kind scan = let fun get_pos [] = " (end-of-input)" | get_pos (tok :: _) = Position.here (Token.pos_of tok); fun err (toks, NONE) = (fn () => kind ^ get_pos toks) | err (toks, SOME msg) = (fn () => let val s = msg () in if String.isPrefix kind s then s else kind ^ get_pos toks ^ ": " ^ s end); in Scan.!! err scan end; fun !!! scan = cut "Annotation syntax error" scan; fun !!!! scan = cut "Corrupted annotation syntax in presentation" scan; (** basic parsers **) (* tokens *) fun RESET_VALUE atom = (*required for all primitive parsers*) Scan.ahead (Scan.one (K true)) -- atom >> (fn (arg, x) => (Token.assign NONE arg; x)); val not_eof = RESET_VALUE (Scan.one Token.not_eof); fun token atom = Scan.ahead not_eof --| atom; fun range scan = (Scan.ahead not_eof >> (Token.range_of o single)) -- scan >> Library.swap; fun position scan = (Scan.ahead not_eof >> Token.pos_of) -- scan >> Library.swap; fun input atom = Scan.ahead atom |-- not_eof >> Token.input_of; fun inner_syntax atom = Scan.ahead atom |-- not_eof >> Token.inner_syntax_of; fun kind k = group (fn () => Token.str_of_kind k) (RESET_VALUE (Scan.one (Token.is_kind k) >> Token.content_of)); val command = kind Token.Command; val keyword = kind Token.Keyword; val short_ident = kind Token.Ident; val long_ident = kind Token.Long_Ident; val sym_ident = kind Token.Sym_Ident; val term_var = kind Token.Var; val type_ident = kind Token.Type_Ident; val type_var = kind Token.Type_Var; val number = kind Token.Nat; val float_number = kind Token.Float; val string = kind Token.String; val alt_string = kind Token.Alt_String; -val verbatim = kind Token.Verbatim; val cartouche = kind Token.Cartouche; val eof = kind Token.EOF; fun command_name x = group (fn () => Token.str_of_kind Token.Command ^ " " ^ quote x) (RESET_VALUE (Scan.one (fn tok => Token.is_command tok andalso Token.content_of tok = x))) >> Token.content_of; fun keyword_with pred = RESET_VALUE (Scan.one (Token.keyword_with pred) >> Token.content_of); fun keyword_markup markup x = group (fn () => Token.str_of_kind Token.Keyword ^ " " ^ quote x) (Scan.ahead not_eof -- keyword_with (fn y => x = y)) >> (fn (tok, x) => (Token.assign (SOME (Token.Literal markup)) tok; x)); val keyword_improper = keyword_markup (true, Markup.improper); val $$$ = keyword_markup (false, Markup.quasi_keyword); fun reserved x = group (fn () => "reserved identifier " ^ quote x) (RESET_VALUE (Scan.one (Token.ident_with (fn y => x = y)) >> Token.content_of)); val dots = sym_ident :-- (fn "\" => Scan.succeed () | _ => Scan.fail) >> #1; val minus = sym_ident :-- (fn "-" => Scan.succeed () | _ => Scan.fail) >> #1; val underscore = sym_ident :-- (fn "_" => Scan.succeed () | _ => Scan.fail) >> #1; fun maybe scan = underscore >> K NONE || scan >> SOME; fun maybe_position scan = position (underscore >> K NONE) || scan >> apfst SOME; val nat = number >> (#1 o Library.read_int o Symbol.explode); val int = Scan.optional (minus >> K ~1) 1 -- nat >> op *; val real = float_number >> Value.parse_real || int >> Real.fromInt; fun opt_keyword s = Scan.optional ($$$ "(" |-- !!! (($$$ s >> K true) --| $$$ ")")) false; val opt_bang = Scan.optional ($$$ "!" >> K true) false; val begin = $$$ "begin"; val opt_begin = Scan.optional (begin >> K true) false; (* enumerations *) fun enum1_positions sep scan = scan -- Scan.repeat (position ($$$ sep) -- !!! scan) >> (fn (x, ys) => (x :: map #2 ys, map (#2 o #1) ys)); fun enum_positions sep scan = enum1_positions sep scan || Scan.succeed ([], []); fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); fun enum sep scan = enum1 sep scan || Scan.succeed []; fun enum1' sep scan = scan ::: Scan.repeat (Scan.lift ($$$ sep) |-- scan); fun enum' sep scan = enum1' sep scan || Scan.succeed []; fun and_list1 scan = enum1 "and" scan; fun and_list scan = enum "and" scan; fun and_list1' scan = enum1' "and" scan; fun and_list' scan = enum' "and" scan; fun list1 scan = enum1 "," scan; fun list scan = enum "," scan; (* names and embedded content *) val name = group (fn () => "name") (short_ident || long_ident || sym_ident || number || string); val name_range = input name >> Input.source_content_range; val name_position = input name >> Input.source_content; val string_position = input string >> Input.source_content; val binding = name_position >> Binding.make; val embedded = group (fn () => "embedded content") (cartouche || string || short_ident || long_ident || sym_ident || term_var || type_ident || type_var || number); val embedded_input = input embedded; val embedded_position = embedded_input >> Input.source_content; -val text = group (fn () => "text") (embedded || verbatim); - val path = group (fn () => "file name/path specification") embedded; val path_binding = group (fn () => "path binding (strict file name)") (position embedded); val session_name = group (fn () => "session name") name_position; val theory_name = group (fn () => "theory name") name_position; val liberal_name = keyword_with Token.ident_or_symbolic || name; val parname = Scan.optional ($$$ "(" |-- name --| $$$ ")") ""; val parbinding = Scan.optional ($$$ "(" |-- binding --| $$$ ")") Binding.empty; (* type classes *) val class = group (fn () => "type class") (inner_syntax embedded); val sort = group (fn () => "sort") (inner_syntax embedded); val type_const = group (fn () => "type constructor") (inner_syntax embedded); val arity = type_const -- ($$$ "::" |-- !!! (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> Scan.triple2; val multi_arity = and_list1 type_const -- ($$$ "::" |-- !!! (Scan.optional ($$$ "(" |-- !!! (list1 sort --| $$$ ")")) [] -- sort)) >> Scan.triple2; (* types *) val typ = group (fn () => "type") (inner_syntax embedded); fun type_arguments arg = arg >> single || $$$ "(" |-- !!! (list1 arg --| $$$ ")") || Scan.succeed []; val type_args = type_arguments type_ident; val type_args_constrained = type_arguments (type_ident -- Scan.option ($$$ "::" |-- !!! sort)); (* mixfix annotations *) local val mfix = input (string || cartouche); val mixfix_ = mfix -- !!! (Scan.optional ($$$ "[" |-- !!! (list nat --| $$$ "]")) [] -- Scan.optional nat 1000) >> (fn (sy, (ps, p)) => fn range => Mixfix (sy, ps, p, range)); val structure_ = $$$ "structure" >> K Structure; val binder_ = $$$ "binder" |-- !!! (mfix -- ($$$ "[" |-- nat --| $$$ "]" -- nat || nat >> (fn n => (n, n)))) >> (fn (sy, (p, q)) => fn range => Binder (sy, p, q, range)); val infixl_ = $$$ "infixl" |-- !!! (mfix -- nat >> (fn (sy, p) => fn range => Infixl (sy, p, range))); val infixr_ = $$$ "infixr" |-- !!! (mfix -- nat >> (fn (sy, p) => fn range => Infixr (sy, p, range))); val infix_ = $$$ "infix" |-- !!! (mfix -- nat >> (fn (sy, p) => fn range => Infix (sy, p, range))); val mixfix_body = mixfix_ || structure_ || binder_ || infixl_ || infixr_ || infix_; fun annotation guard body = Scan.trace ($$$ "(" |-- guard (body --| $$$ ")")) >> (fn (mx, toks) => mx (Token.range_of toks)); fun opt_annotation guard body = Scan.optional (annotation guard body) NoSyn; in val mixfix = annotation !!! mixfix_body; val mixfix' = annotation I mixfix_body; val opt_mixfix = opt_annotation !!! mixfix_body; val opt_mixfix' = opt_annotation I mixfix_body; end; (* syntax mode *) val syntax_mode_spec = ($$$ "output" >> K ("", false)) || name -- Scan.optional ($$$ "output" >> K false) true; val syntax_mode = Scan.optional ($$$ "(" |-- !!! (syntax_mode_spec --| $$$ ")")) Syntax.mode_default; (* fixes *) val where_ = $$$ "where"; val const_decl = name -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> Scan.triple1; val const_binding = binding -- ($$$ "::" |-- !!! typ) -- opt_mixfix >> Scan.triple1; val param_mixfix = binding -- Scan.option ($$$ "::" |-- typ) -- mixfix' >> (single o Scan.triple1); val params = (binding -- Scan.repeat binding) -- Scan.option ($$$ "::" |-- !!! (Scan.ahead typ -- embedded)) >> (fn ((x, ys), T) => (x, Option.map #1 T, NoSyn) :: map (fn y => (y, Option.map #2 T, NoSyn)) ys); val vars = and_list1 (param_mixfix || params) >> flat; val for_fixes = Scan.optional ($$$ "for" |-- !!! vars) []; (* embedded source text *) -val ML_source = input (group (fn () => "ML source") text); -val document_source = input (group (fn () => "document source") text); +val ML_source = input (group (fn () => "ML source") embedded); +val document_source = input (group (fn () => "document source") embedded); val document_marker = group (fn () => "document marker") (RESET_VALUE (Scan.one Token.is_document_marker >> Token.input_of)); (* terms *) val const = group (fn () => "constant") (inner_syntax embedded); val term = group (fn () => "term") (inner_syntax embedded); val prop = group (fn () => "proposition") (inner_syntax embedded); val literal_fact = inner_syntax (group (fn () => "literal fact") (alt_string || cartouche)); (* patterns *) val is_terms = Scan.repeat1 ($$$ "is" |-- term); val is_props = Scan.repeat1 ($$$ "is" |-- prop); val propp = prop -- Scan.optional ($$$ "(" |-- !!! (is_props --| $$$ ")")) []; val termp = term -- Scan.optional ($$$ "(" |-- !!! (is_terms --| $$$ ")")) []; (* target information *) val private = position ($$$ "private") >> #2; val qualified = position ($$$ "qualified") >> #2; val target = ($$$ "(" -- $$$ "in") |-- !!! (name_position --| $$$ ")"); val opt_target = Scan.option target; (* arguments within outer syntax *) local val argument_kinds = [Token.Ident, Token.Long_Ident, Token.Sym_Ident, Token.Var, Token.Type_Ident, Token.Type_Var, - Token.Nat, Token.Float, Token.String, Token.Alt_String, Token.Cartouche, Token.Verbatim]; + Token.Nat, Token.Float, Token.String, Token.Alt_String, Token.Cartouche]; fun arguments is_symid = let fun argument blk = group (fn () => "argument") (Scan.one (fn tok => let val kind = Token.kind_of tok in member (op =) argument_kinds kind orelse Token.keyword_with is_symid tok orelse (blk andalso Token.keyword_with (fn s => s = ",") tok) end)); fun args blk x = Scan.optional (args1 blk) [] x and args1 blk x = (Scan.repeats1 (Scan.repeat1 (argument blk) || argsp "(" ")" || argsp "[" "]")) x and argsp l r x = (token ($$$ l) ::: !!! (args true @@@ (token ($$$ r) >> single))) x; in (args, args1) end; in val args = #1 (arguments Token.ident_or_symbolic) false; fun args1 is_symid = #2 (arguments is_symid) false; end; (* attributes *) val attrib = token liberal_name ::: !!! args; val attribs = $$$ "[" |-- list attrib --| $$$ "]"; val opt_attribs = Scan.optional attribs []; (* theorem references *) val thm_sel = $$$ "(" |-- list1 (nat --| minus -- nat >> Facts.FromTo || nat --| minus >> Facts.From || nat >> Facts.Single) --| $$$ ")"; val thm = $$$ "[" |-- attribs --| $$$ "]" >> pair (Facts.named "") || (literal_fact >> Facts.Fact || name_position -- Scan.option thm_sel >> Facts.Named) -- opt_attribs; val thms1 = Scan.repeat1 thm; (* options *) val option_name = group (fn () => "option name") name_position; val option_value = group (fn () => "option value") ((token real || token name) >> Token.content_of); val option = option_name :-- (fn (_, pos) => Scan.optional ($$$ "=" |-- !!! (position option_value)) ("true", pos)); val options = $$$ "[" |-- list1 option --| $$$ "]"; (** C basic parsers **) (* embedded source text *) -val C_source = input (group (fn () => "C source") text); +val C_source = input (group (fn () => "C source") embedded); (* AutoCorres (MODIFIES) *) val star = sym_ident :-- (fn "*" => Scan.succeed () | _ => Scan.fail) >> #1; end; structure C_Parse_Native: C_PARSE = struct open Token open Parse (** C basic parsers **) (* embedded source text *) -val C_source = input (group (fn () => "C source") text); +val C_source = input (group (fn () => "C source") embedded); (* AutoCorres (MODIFIES) *) val star = sym_ident :-- (fn "*" => Scan.succeed () | _ => Scan.fail) >> #1; end; \ ML \ \\<^file>\~~/src/Pure/Thy/thy_header.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Thy/thy_header.ML Author: Makarius Static theory header information. *) \ structure C_Thy_Header = struct val bootstrap_keywords = C_Keyword.empty_keywords' (Keyword.minor_keywords (Thy_Header.get_keywords @{theory})) (* theory data *) structure Data = Theory_Data ( type T = C_Keyword.keywords; val empty = bootstrap_keywords; val merge = C_Keyword.merge_keywords; ); val add_keywords = Data.map o C_Keyword.add_keywords; val add_keywords_minor = Data.map o C_Keyword.add_keywords_minor; val get_keywords = Data.get; val get_keywords' = get_keywords o Proof_Context.theory_of; end \ end diff --git a/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy b/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy --- a/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy +++ b/thys/Isabelle_C/C11-FrontEnd/src/C_Parser_Language.thy @@ -1,1380 +1,1380 @@ (****************************************************************************** * Isabelle/C * * Copyright (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) section \Core Language: Parsing Support (C Language without Annotations)\ theory C_Parser_Language imports C_Environment begin text \ As mentioned in \<^theory>\Isabelle_C.C_Ast\, Isabelle/C depends on certain external parsing libraries, such as \<^dir>\../../src_ext/mlton\, and more specifically \<^dir>\../../src_ext/mlton/lib/mlyacc-lib\. Actually, the sole theory making use of the files in \<^dir>\../../src_ext/mlton/lib/mlyacc-lib\ is the present \<^file>\C_Parser_Language.thy\. (Any other remaining files in \<^dir>\../../src_ext/mlton\ are not used by Isabelle/C, they come from the original repository of MLton: \<^url>\https://github.com/MLton/mlton\.) \ subsection \Parsing Library (Including Semantic Functions)\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ (* * Modified by Frédéric Tuong, Université Paris-Saclay * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Language.C * https://hackage.haskell.org/package/language-c * * Copyright (c) 1999-2017 Manuel M T Chakravarty * Duncan Coutts * Benedikt Huber * Portions Copyright (c) 1989,1990 James A. Roskind * * * * * * * * * * * * * * * * * * * * * * * * * * * * * Language.C.Comments * https://hackage.haskell.org/package/language-c-comments * * Copyright (c) 2010-2014 Geoff Hulette *) \ signature C_GRAMMAR_RULE_LIB = sig type arg = (C_Antiquote.antiq * C_Env.antiq_language list) C_Env.T type 'a monad = arg -> 'a * arg (* type aliases *) type class_Pos = C_Ast.class_Pos type reports_text0' = { markup : Markup.T, markup_body : string } type reports_text0 = reports_text0' list -> reports_text0' list type ('a, 'b) reports_base = ('a C_Env.markup_store * Position.T list, Position.T list * 'a C_Env.markup_store option) C_Ast.either -> Position.T list -> string -> 'b -> 'b (**) type NodeInfo = C_Ast.nodeInfo type CStorageSpec = NodeInfo C_Ast.cStorageSpecifier type CFunSpec = NodeInfo C_Ast.cFunctionSpecifier type CConst = NodeInfo C_Ast.cConstant type 'a CInitializerList = ('a C_Ast.cPartDesignator List.list * 'a C_Ast.cInitializer) List.list type CTranslUnit = NodeInfo C_Ast.cTranslationUnit type CExtDecl = NodeInfo C_Ast.cExternalDeclaration type CFunDef = NodeInfo C_Ast.cFunctionDef type CDecl = NodeInfo C_Ast.cDeclaration type CDeclr = NodeInfo C_Ast.cDeclarator type CDerivedDeclr = NodeInfo C_Ast.cDerivedDeclarator type CArrSize = NodeInfo C_Ast.cArraySize type CStat = NodeInfo C_Ast.cStatement type CAsmStmt = NodeInfo C_Ast.cAssemblyStatement type CAsmOperand = NodeInfo C_Ast.cAssemblyOperand type CBlockItem = NodeInfo C_Ast.cCompoundBlockItem type CDeclSpec = NodeInfo C_Ast.cDeclarationSpecifier type CTypeSpec = NodeInfo C_Ast.cTypeSpecifier type CTypeQual = NodeInfo C_Ast.cTypeQualifier type CAlignSpec = NodeInfo C_Ast.cAlignmentSpecifier type CStructUnion = NodeInfo C_Ast.cStructureUnion type CEnum = NodeInfo C_Ast.cEnumeration type CInit = NodeInfo C_Ast.cInitializer type CInitList = NodeInfo CInitializerList type CDesignator = NodeInfo C_Ast.cPartDesignator type CAttr = NodeInfo C_Ast.cAttribute type CExpr = NodeInfo C_Ast.cExpression type CBuiltin = NodeInfo C_Ast.cBuiltinThing type CStrLit = NodeInfo C_Ast.cStringLiteral (**) type ClangCVersion = C_Ast.clangCVersion type Ident = C_Ast.ident type Position = C_Ast.positiona type PosLength = Position * int type Name = C_Ast.namea type Bool = bool type CString = C_Ast.cString type CChar = C_Ast.cChar type CInteger = C_Ast.cInteger type CFloat = C_Ast.cFloat type CStructTag = C_Ast.cStructTag type CUnaryOp = C_Ast.cUnaryOp type 'a CStringLiteral = 'a C_Ast.cStringLiteral type 'a CConstant = 'a C_Ast.cConstant type ('a, 'b) Either = ('a, 'b) C_Ast.either type CIntRepr = C_Ast.cIntRepr type CIntFlag = C_Ast.cIntFlag type CAssignOp = C_Ast.cAssignOp type Comment = C_Ast.comment (**) type 'a Reversed = 'a C_Ast.Reversed type CDeclrR = C_Ast.CDeclrR type 'a Maybe = 'a C_Ast.optiona type 'a Located = 'a C_Ast.Located (**) structure List : sig val reverse : 'a list -> 'a list end (* monadic operations *) val return : 'a -> 'a monad val bind : 'a monad -> ('a -> 'b monad) -> 'b monad val bind' : 'b monad -> ('b -> unit monad) -> 'b monad val >> : unit monad * 'a monad -> 'a monad (* position reports *) val markup_make : ('a -> reports_text0' option) -> ('a -> 'b) -> ('b option -> string) -> ((Markup.T -> reports_text0) -> bool -> ('b, 'b option * string * reports_text0) C_Ast.either -> reports_text0) -> ('a, C_Position.reports_text) reports_base val markup_tvar : (C_Env.markup_global, C_Position.reports_text) reports_base val markup_var_enum : (C_Env.markup_global, C_Position.reports_text) reports_base val markup_var : (C_Env.markup_ident, C_Position.reports_text) reports_base val markup_var_bound : (C_Env.markup_ident, C_Position.reports_text) reports_base val markup_var_improper : (C_Env.markup_ident, C_Position.reports_text) reports_base (* Language.C.Data.RList *) val empty : 'a list Reversed val singleton : 'a -> 'a list Reversed val snoc : 'a list Reversed -> 'a -> 'a list Reversed val rappend : 'a list Reversed -> 'a list -> 'a list Reversed val rappendr : 'a list Reversed -> 'a list Reversed -> 'a list Reversed val rmap : ('a -> 'b) -> 'a list Reversed -> 'b list Reversed (* Language.C.Data.Position *) val posOf : 'a -> Position val posOf' : bool -> class_Pos -> Position * int val make_comment : Symbol_Pos.T list -> Symbol_Pos.T list -> Symbol_Pos.T list -> Position.range -> Comment (* Language.C.Data.Node *) val mkNodeInfo' : Position -> PosLength -> Name -> NodeInfo val decode : NodeInfo -> (class_Pos, string) Either val decode_error' : NodeInfo -> Position.range (* Language.C.Data.Ident *) val quad : string list -> int val ident_encode : string -> int val ident_decode : int -> string val mkIdent : Position * int -> string -> Name -> Ident val internalIdent : string -> Ident (* Language.C.Syntax.AST *) val liftStrLit : 'a CStringLiteral -> 'a CConstant (* Language.C.Syntax.Constants *) val concatCStrings : CString list -> CString (* Language.C.Parser.ParserMonad *) val getNewName : Name monad val shadowTypedef0'''' : string -> Position.T list -> C_Env.markup_ident -> C_Env.env_lang -> C_Env.env_tree -> C_Env.env_lang * C_Env.env_tree val shadowTypedef0' : C_Ast.CDeclSpec list C_Env.parse_status -> bool -> C_Ast.ident * C_Ast.CDerivedDeclr list -> C_Env.env_lang -> C_Env.env_tree -> C_Env.env_lang * C_Env.env_tree val isTypeIdent : string -> arg -> bool val enterScope : unit monad val leaveScope : unit monad val getCurrentPosition : Position monad (* Language.C.Parser.Tokens *) val CTokCLit : CChar -> (CChar -> 'a) -> 'a val CTokILit : CInteger -> (CInteger -> 'a) -> 'a val CTokFLit : CFloat -> (CFloat -> 'a) -> 'a val CTokSLit : CString -> (CString -> 'a) -> 'a (* Language.C.Parser.Parser *) val reverseList : 'a list -> 'a list Reversed val L : 'a -> int -> 'a Located monad val unL : 'a Located -> 'a val withNodeInfo : int -> (NodeInfo -> 'a) -> 'a monad val withNodeInfo_CExtDecl : CExtDecl -> (NodeInfo -> 'a) -> 'a monad val withNodeInfo_CExpr : CExpr list Reversed -> (NodeInfo -> 'a) -> 'a monad val withLength : NodeInfo -> (NodeInfo -> 'a) -> 'a monad val reverseDeclr : CDeclrR -> CDeclr val withAttribute : int -> CAttr list -> (NodeInfo -> CDeclrR) -> CDeclrR monad val withAttributePF : int -> CAttr list -> (NodeInfo -> CDeclrR -> CDeclrR) -> (CDeclrR -> CDeclrR) monad val appendObjAttrs : CAttr list -> CDeclr -> CDeclr val withAsmNameAttrs : CStrLit Maybe * CAttr list -> CDeclrR -> CDeclrR monad val appendDeclrAttrs : CAttr list -> CDeclrR -> CDeclrR val ptrDeclr : CDeclrR -> CTypeQual list -> NodeInfo -> CDeclrR val funDeclr : CDeclrR -> (Ident list, (CDecl list * Bool)) Either -> CAttr list -> NodeInfo -> CDeclrR val arrDeclr : CDeclrR -> CTypeQual list -> Bool -> Bool -> CExpr Maybe -> NodeInfo -> CDeclrR val liftTypeQuals : CTypeQual list Reversed -> CDeclSpec list val liftCAttrs : CAttr list -> CDeclSpec list val addTrailingAttrs : CDeclSpec list Reversed -> CAttr list -> CDeclSpec list Reversed val emptyDeclr : CDeclrR val mkVarDeclr : Ident -> NodeInfo -> CDeclrR val doDeclIdent : CDeclSpec list -> CDeclrR -> unit monad val ident_of_decl : (Ident list, CDecl list * bool) C_Ast.either -> (Ident * CDerivedDeclr list * CDeclSpec list) list val doFuncParamDeclIdent : CDeclr -> unit monad end structure C_Grammar_Rule_Lib : C_GRAMMAR_RULE_LIB = struct open C_Ast type arg = (C_Antiquote.antiq * C_Env.antiq_language list) C_Env.T type 'a monad = arg -> 'a * arg (**) type reports_text0' = { markup : Markup.T, markup_body : string } type reports_text0 = reports_text0' list -> reports_text0' list type 'a reports_store = Position.T list * serial * 'a type ('a, 'b) reports_base = ('a C_Env.markup_store * Position.T list, Position.T list * 'a C_Env.markup_store option) C_Ast.either -> Position.T list -> string -> 'b -> 'b fun markup_init markup = { markup = markup, markup_body = "" } val look_idents = C_Env_Ext.list_lookup o C_Env_Ext.get_idents val look_idents' = C_Env_Ext.list_lookup o C_Env_Ext.get_idents' val look_tyidents_typedef = C_Env_Ext.list_lookup o C_Env_Ext.get_tyidents_typedef val look_tyidents'_typedef = C_Env_Ext.list_lookup o C_Env_Ext.get_tyidents'_typedef val To_string0 = meta_of_logic val ident_encode = Word8Vector.foldl (fn (w, acc) => Word8.toInt w + acc * 256) 0 o Byte.stringToBytes fun ident_decode nb = radixpand (256, nb) |> map chr |> implode fun reverse l = rev l fun report _ [] _ = I | report markup ps x = let val ms = markup x in fold (fn p => fold (fn {markup, markup_body} => cons ((p, markup), markup_body)) ms) ps end fun markup_make typing get_global desc report' data = report (fn name => let val (def, ps, id, global, typing) = case data of Left ((ps, id, param), ps' as _ :: _) => ( true , ps , id , Right ( SOME (get_global param) , "Redefinition of " ^ quote name ^ Position.here_list ps \ \Any positions provided here will be explicitly reported, which might not be the desired effect. So we explicitly refer the reader to a separate tooltip.\ ^ " (more details in the command modifier tooltip)" , cons { markup = Markup.class_parameter , markup_body = "redefining this" ^ Position.here_list ps' }) , typing param) | Left ((ps, id, param), _) => (true, ps, id, Left (get_global param), typing param) | Right (_, SOME (ps, id, param)) => (false, ps, id, Left (get_global param), typing param) | Right (ps, _) => ( true , ps , serial () , Right (NONE, "Undeclared " ^ quote name ^ Position.here_list ps, I) , NONE) fun markup_elem name = (name, (name, []): Markup.T) val (varN, var) = markup_elem (desc (case global of Left b => SOME b | Right (SOME b, _, _) => SOME b | _ => NONE)); val entity = Markup.entity varN name val cons' = cons o markup_init val _ = make_entity_markup (* PATCH: copied as such from Isabelle2020 *) fun entity_properties_of def serial pos = if def then (Markup.defN, Value.print_int serial) :: properties_of pos else (Markup.refN, Value.print_int serial) :: def_properties_of pos; in (cons' var #> report' cons' def global #> (case typing of NONE => I | SOME x => cons x)) (map (fn pos => (* WAS: markup_init (Markup.properties (Position.entity_properties_of def id pos) entity)) *) (* NEW in Isabelle 2021-1RC: fun make_entity_markup {def} serial kind (name, pos) = let val props = if def then (Markup.defN, Value.print_int serial) :: properties_of pos else (Markup.refN, Value.print_int serial) :: def_properties_of pos; in Markup.entity kind name |> Markup.properties props end; *) markup_init (Markup.properties (entity_properties_of def id pos) entity)) ps) end) fun markup_make' typing get_global desc report = markup_make typing get_global (fn global => "C " ^ (case global of SOME true => "global " | SOME false => "local " | NONE => "") ^ desc) (fn cons' => fn def => fn Left b => report cons' def b | Right (b, msg, f) => tap (fn _ => Output.information msg) #> f #> (case b of NONE => cons' Markup.free | SOME b => report cons' def b)) fun markup_tvar0 desc = markup_make' (K NONE) I desc (fn cons' => fn def => fn true => cons' (if def then Markup.free else Markup.ML_keyword3) | false => cons' Markup.skolem) val markup_tvar = markup_tvar0 "type variable" val markup_var_enum = markup_tvar0 "enum tag" fun string_of_list f = (fn [] => NONE | [s] => SOME s | s => SOME ("[" ^ String.concatWith ", " s ^ "]")) o map f val string_of_cDeclarationSpecifier = fn C_Ast.CStorageSpec0 _ => "storage" | C_Ast.CTypeSpec0 t => (case t of CVoidType0 _ => "void" | CCharType0 _ => "char" | CShortType0 _ => "short" | CIntType0 _ => "int" | CLongType0 _ => "long" | CFloatType0 _ => "float" | CDoubleType0 _ => "double" | CSignedType0 _ => "signed" | CUnsigType0 _ => "unsig" | CBoolType0 _ => "bool" | CComplexType0 _ => "complex" | CInt128Type0 _ => "int128" | CSUType0 _ => "SU" | CEnumType0 _ => "enum" | CTypeDef0 _ => "typedef" | CTypeOfExpr0 _ => "type_of_expr" | CTypeOfType0 _ => "type_of_type" | CAtomicType0 _ => "atomic") | C_Ast.CTypeQual0 _ => "type_qual" | C_Ast.CFunSpec0 _ => "fun" | C_Ast.CAlignSpec0 _ => "align" fun typing {params, ret, ...} = SOME { markup = Markup.typing , markup_body = case ( string_of_list (fn C_Ast.CPtrDeclr0 _ => "pointer" | C_Ast.CArrDeclr0 _ => "array" | C_Ast.CFunDeclr0 (C_Ast.Left _, _, _) => "function [...] ->" | C_Ast.CFunDeclr0 (C_Ast.Right (l_decl, _), _, _) => "function " ^ (String.concatWith " -> " (map (fn CDecl0 ([decl], _, _) => string_of_cDeclarationSpecifier decl | CDecl0 (l, _, _) => "(" ^ String.concatWith " " (map string_of_cDeclarationSpecifier l) ^ ")" | CStaticAssert0 _ => "static_assert") l_decl)) ^ " ->") params , case ret of C_Env.Previous_in_stack => SOME "..." | C_Env.Parsed ret => string_of_list string_of_cDeclarationSpecifier ret) of (NONE, NONE) => let val _ = warning "markup_var: Not yet implemented" in "" end | (SOME params, NONE) => params | (NONE, SOME ret) => ret | (SOME params, SOME ret) => params ^ " " ^ ret } val markup_var = markup_make' typing #global "variable" (fn cons' => fn def => fn true => if def then cons' Markup.free else cons' Markup.delimiter (*explicit black color, otherwise the default color of constants might be automatically chosen (especially in term cartouches)*) | false => cons' Markup.bound) val markup_var_bound = markup_make' typing #global "variable" (fn cons' => K (K (cons' Markup.bound))) val markup_var_improper = markup_make' typing #global "variable" (fn cons' => K (K (cons' Markup.improper))) (**) val return = pair fun bind f g = f #-> g fun bind' f g = bind f (fn r => bind (g r) (fn () => return r)) fun a >> b = a #> b o #2 fun sequence_ f = fn [] => return () | x :: xs => f x >> sequence_ f xs (* Language.C.Data.RList *) val empty = [] fun singleton x = [x] fun snoc xs x = x :: xs fun rappend xs ys = rev ys @ xs fun rappendr xs ys = ys @ xs val rmap = map val viewr = fn [] => error "viewr: empty RList" | x :: xs => (xs, x) (* Language.C.Data.Position *) val nopos = NoPosition fun posOf _ = NoPosition fun posOf' mk_range = (if mk_range then Position.range else I) #> (fn (pos1, pos2) => let val {offset = offset, end_offset = end_offset, ...} = Position.dest pos1 in ( Position offset (From_string (C_Env.encode_positions [pos1, pos2])) 0 0 , end_offset - offset) end) fun posOf'' node env = let val (stack, len) = #rule_input env val (mk_range, (pos1a, pos1b)) = case node of Left i => (true, nth stack (len - i - 1)) | Right range => (false, range) val (pos2a, pos2b) = nth stack 0 in ( (posOf' mk_range (pos1a, pos1b) |> #1, posOf' true (pos2a, pos2b)) , env |> C_Env_Ext.map_output_pos (K (SOME (pos1a, pos2b))) |> C_Env_Ext.map_output_vacuous (K false)) end val posOf''' = posOf'' o Left val internalPos = InternalPosition fun make_comment body_begin body body_end range = Commenta ( posOf' false range |> #1 , From_string (Symbol_Pos.implode (body_begin @ body @ body_end)) , case body_end of [] => SingleLine | _ => MultiLine) (* Language.C.Data.Node *) val undefNode = OnlyPos nopos (nopos, ~1) fun mkNodeInfoOnlyPos pos = OnlyPos pos (nopos, ~1) fun mkNodeInfo pos name = NodeInfo pos (nopos, ~1) name val mkNodeInfo' = NodeInfo val decode = (fn OnlyPos0 range => range | NodeInfo0 (pos1, (pos2, len2), _) => (pos1, (pos2, len2))) #> (fn (Position0 (_, s1, _, _), (Position0 (_, s2, _, _), _)) => (case (C_Env.decode_positions (To_string0 s1), C_Env.decode_positions (To_string0 s2)) of ([pos1, _], [_, pos2]) => Left (Position.range (pos1, pos2)) | _ => Right "Expecting 2 elements") | _ => Right "Invalid position") fun decode_error' x = case decode x of Left x => x | Right msg => error msg fun decode_error x = Right (decode_error' x) val nameOfNode = fn OnlyPos0 _ => NONE | NodeInfo0 (_, _, name) => SOME name (* Language.C.Data.Ident *) local val bits7 = Integer.pow 7 2 val bits14 = Integer.pow 14 2 val bits21 = Integer.pow 21 2 val bits28 = Integer.pow 28 2 val ord = SML90.ord; (* copied from ML_init in Isabelle2020. *) in fun quad s = case s of [] => 0 | c1 :: [] => ord c1 | c1 :: c2 :: [] => ord c2 * bits7 + ord c1 | c1 :: c2 :: c3 :: [] => ord c3 * bits14 + ord c2 * bits7 + ord c1 | c1 :: c2 :: c3 :: c4 :: s => ((ord c4 * bits21 + ord c3 * bits14 + ord c2 * bits7 + ord c1) mod bits28) + (quad s mod bits28) end local fun internalIdent0 pos s = Ident (From_string s, ident_encode s, pos) in fun mkIdent (pos, len) s name = internalIdent0 (mkNodeInfo' pos (pos, len) name) s val internalIdent = internalIdent0 (mkNodeInfoOnlyPos internalPos) end (* Language.C.Syntax.AST *) fun liftStrLit (CStrLit0 (str, at)) = CStrConst str at (* Language.C.Syntax.Constants *) fun concatCStrings cs = CString0 (flattena (map (fn CString0 (s,_) => s) cs), exists (fn CString0 (_, b) => b) cs) (* Language.C.Parser.ParserMonad *) fun getNewName env = (Namea (C_Env_Ext.get_namesupply env), C_Env_Ext.map_namesupply (fn x => x + 1) env) fun addTypedef (Ident0 (_, i, node)) env = let val name = ident_decode i val pos1 = [decode_error' node |> #1] val data = (pos1, serial (), null (C_Env_Ext.get_scopes env)) in ((), env |> C_Env_Ext.map_idents (Symtab.delete_safe name) |> C_Env_Ext.map_tyidents_typedef (Symtab.update (name, data)) |> C_Env_Ext.map_reports_text (markup_tvar (Left (data, flat [ look_idents env name, look_tyidents_typedef env name ])) pos1 name)) end fun shadowTypedef0''' name pos data0 env_lang env_tree = let val data = (pos, serial (), data0) val update_id = Symtab.update (name, data) in ( env_lang |> C_Env_Ext.map_tyidents'_typedef (Symtab.delete_safe name) |> C_Env_Ext.map_idents' update_id , update_id , env_tree |> C_Env.map_reports_text (markup_var (Left (data, flat [ look_idents' env_lang name , look_tyidents'_typedef env_lang name ])) pos name)) end fun shadowTypedef0'''' name pos data0 env_lang env_tree = let val (env_lang, _, env_tree) = shadowTypedef0''' name pos data0 env_lang env_tree in ( env_lang, env_tree) end fun shadowTypedef0'' ret global (Ident0 (_, i, node), params) = shadowTypedef0''' (ident_decode i) [decode_error' node |> #1] {global = global, params = params, ret = ret} fun shadowTypedef0' ret global ident env_lang env_tree = let val (env_lang, _, env_tree) = shadowTypedef0'' ret global ident env_lang env_tree in (env_lang, env_tree) end fun shadowTypedef0 ret global f ident env = let val (update_id, env) = C_Env.map_env_lang_tree' (fn env_lang => fn env_tree => let val (env_lang, update_id, env_tree) = shadowTypedef0'' ret global ident env_lang env_tree in (update_id, (env_lang, env_tree)) end) env in ((), f update_id env) end fun shadowTypedef_fun ident env = shadowTypedef0 C_Env.Previous_in_stack (case C_Env_Ext.get_scopes env of _ :: [] => true | _ => false) (fn update_id => C_Env_Ext.map_scopes (fn (NONE, x) :: xs => (SOME (fst ident), C_Env.map_idents update_id x) :: xs | (SOME _, _) :: _ => error "Not yet implemented" | [] => error "Not expecting an empty scope")) ident env fun shadowTypedef (i, params, ret) env = shadowTypedef0 (C_Env.Parsed ret) (List.null (C_Env_Ext.get_scopes env)) (K I) (i, params) env fun isTypeIdent s0 arg = (Symtab.exists (fn (s1, _) => s0 = s1) o C_Env_Ext.get_tyidents_typedef) arg fun enterScope env = ((), C_Env_Ext.map_scopes (cons (NONE, C_Env_Ext.get_var_table env)) env) fun leaveScope env = case C_Env_Ext.get_scopes env of [] => error "leaveScope: already in global scope" | (_, var_table) :: scopes => ((), env |> C_Env_Ext.map_scopes (K scopes) |> C_Env_Ext.map_var_table (K var_table)) val getCurrentPosition = return NoPosition (* Language.C.Parser.Tokens *) fun CTokCLit x f = x |> f fun CTokILit x f = x |> f fun CTokFLit x f = x |> f fun CTokSLit x f = x |> f (* Language.C.Parser.Parser *) fun reverseList x = rev x fun L a i = posOf''' i #>> curry Located a fun unL (Located (a, _)) = a fun withNodeInfo00 (pos1, (pos2, len2)) mkAttrNode name = return (mkAttrNode (NodeInfo pos1 (pos2, len2) name)) fun withNodeInfo0 x = x |> bind getNewName oo withNodeInfo00 fun withNodeInfo0' node mkAttrNode env = let val (range, env) = posOf'' node env in withNodeInfo0 range mkAttrNode env end fun withNodeInfo x = x |> withNodeInfo0' o Left fun withNodeInfo' x = x |> withNodeInfo0' o decode_error fun withNodeInfo_CExtDecl x = x |> withNodeInfo' o (fn CDeclExt0 (CDecl0 (_, _, node)) => node | CDeclExt0 (CStaticAssert0 (_, _, node)) => node | CFDefExt0 (CFunDef0 (_, _, _, _, node)) => node | CAsmExt0 (_, node) => node) val get_node_CExpr = fn CComma0 (_, a) => a | CAssign0 (_, _, _, a) => a | CCond0 (_, _, _, a) => a | CBinary0 (_, _, _, a) => a | CCast0 (_, _, a) => a | CUnary0 (_, _, a) => a | CSizeofExpr0 (_, a) => a | CSizeofType0 (_, a) => a | CAlignofExpr0 (_, a) => a | CAlignofType0 (_, a) => a | CComplexReal0 (_, a) => a | CComplexImag0 (_, a) => a | CIndex0 (_, _, a) => a | CCall0 (_, _, a) => a | CMember0 (_, _, _, a) => a | CVar0 (_, a) => a | CConst0 c => (case c of CIntConst0 (_, a) => a | CCharConst0 (_, a) => a | CFloatConst0 (_, a) => a | CStrConst0 (_, a) => a) | CCompoundLit0 (_, _, a) => a | CGenericSelection0 (_, _, a) => a | CStatExpr0 (_, a) => a | CLabAddrExpr0 (_, a) => a | CBuiltinExpr0 cBuiltinThing => (case cBuiltinThing of CBuiltinVaArg0 (_, _, a) => a | CBuiltinOffsetOf0 (_, _, a) => a | CBuiltinTypesCompatible0 (_, _, a) => a) fun withNodeInfo_CExpr x = x |> withNodeInfo' o get_node_CExpr o hd fun withLength node mkAttrNode = bind (posOf'' (decode_error node)) (fn range => withNodeInfo00 range mkAttrNode (case nameOfNode node of NONE => error "nameOfNode" | SOME name => name)) fun reverseDeclr (CDeclrR0 (ide, reversedDDs, asmname, cattrs, at)) = CDeclr ide (rev reversedDDs) asmname cattrs at fun appendDeclrAttrs newAttrs (CDeclrR0 (ident, l, asmname, cattrs, at)) = case l of [] => CDeclrR ident empty asmname (cattrs @ newAttrs) at | x :: xs => let val appendAttrs = fn CPtrDeclr0 (typeQuals, at) => CPtrDeclr (typeQuals @ map CAttrQual newAttrs) at | CArrDeclr0 (typeQuals, arraySize, at) => CArrDeclr (typeQuals @ map CAttrQual newAttrs) arraySize at | CFunDeclr0 (parameters, cattrs, at) => CFunDeclr parameters (cattrs @ newAttrs) at in CDeclrR ident (appendAttrs x :: xs) asmname cattrs at end fun withAttribute node cattrs mkDeclrNode = bind (posOf''' node) (fn (pos, _) => bind getNewName (fn name => let val attrs = mkNodeInfo pos name val newDeclr = appendDeclrAttrs cattrs (mkDeclrNode attrs) in return newDeclr end)) fun withAttributePF node cattrs mkDeclrCtor = bind (posOf''' node) (fn (pos, _) => bind getNewName (fn name => let val attrs = mkNodeInfo pos name val newDeclr = appendDeclrAttrs cattrs o mkDeclrCtor attrs in return newDeclr end)) fun appendObjAttrs newAttrs (CDeclr0 (ident, indirections, asmname, cAttrs, at)) = CDeclr ident indirections asmname (cAttrs @ newAttrs) at fun appendObjAttrsR newAttrs (CDeclrR0 (ident, indirections, asmname, cAttrs, at)) = CDeclrR ident indirections asmname (cAttrs @ newAttrs) at fun setAsmName mAsmName (CDeclrR0 (ident, indirections, oldName, cattrs, at)) = case (case (mAsmName, oldName) of (None, None) => Right None | (None, oldname as Some _) => Right oldname | (newname as Some _, None) => Right newname | (Some n1, Some n2) => Left (n1, n2)) of Left (n1, n2) => let fun showName (CStrLit0 (CString0 (s, _), _)) = To_string0 s in error ("Duplicate assembler name: " ^ showName n1 ^ " " ^ showName n2) end | Right newName => return (CDeclrR ident indirections newName cattrs at) fun withAsmNameAttrs (mAsmName, newAttrs) declr = setAsmName mAsmName (appendObjAttrsR newAttrs declr) fun ptrDeclr (CDeclrR0 (ident, derivedDeclrs, asmname, cattrs, dat)) tyquals at = CDeclrR ident (snoc derivedDeclrs (CPtrDeclr tyquals at)) asmname cattrs dat fun funDeclr (CDeclrR0 (ident, derivedDeclrs, asmname, dcattrs, dat)) params cattrs at = CDeclrR ident (snoc derivedDeclrs (CFunDeclr params cattrs at)) asmname dcattrs dat fun arrDeclr (CDeclrR0 (ident, derivedDeclrs, asmname, cattrs, dat)) tyquals var_sized static_size size_expr_opt at = CDeclrR ident (snoc derivedDeclrs (CArrDeclr tyquals (case size_expr_opt of Some e => CArrSize static_size e | None => CNoArrSize var_sized) at)) asmname cattrs dat val liftTypeQuals = map CTypeQual o reverse val liftCAttrs = map (CTypeQual o CAttrQual) fun addTrailingAttrs declspecs new_attrs = case viewr declspecs of (specs_init, CTypeSpec0 (CSUType0 (CStruct0 (tag, name, Some def, def_attrs, su_node), node))) => snoc specs_init (CTypeSpec (CSUType (CStruct tag name (Just def) (def_attrs @ new_attrs) su_node) node)) | (specs_init, CTypeSpec0 (CEnumType0 (CEnum0 (name, Some def, def_attrs, e_node), node))) => snoc specs_init (CTypeSpec (CEnumType (CEnum name (Just def) (def_attrs @ new_attrs) e_node) node)) | _ => rappend declspecs (liftCAttrs new_attrs) val emptyDeclr = CDeclrR Nothing empty Nothing [] undefNode fun mkVarDeclr ident = CDeclrR (Some ident) empty Nothing [] fun doDeclIdent declspecs (decl as CDeclrR0 (mIdent, _, _, _, _)) = case mIdent of None => return () | Some ident => if exists (fn CStorageSpec0 (CTypedef0 _) => true | _ => false) declspecs then addTypedef ident else shadowTypedef ( ident , case reverseDeclr decl of CDeclr0 (_, params, _, _, _) => params , declspecs) val ident_of_decl = fn Left params => map (fn i => (i, [], [])) params | Right (params, _) => maps (fn CDecl0 (ret, l, _) => maps (fn ((Some (CDeclr0 (Some mIdent, params, _, _, _)),_),_) => [(mIdent, params, ret)] | _ => []) l | _ => []) params local fun sequence_' f = sequence_ f o ident_of_decl val is_fun = fn CFunDeclr0 _ => true | _ => false in fun doFuncParamDeclIdent (CDeclr0 (mIdent0, param0, _, _, node0)) = let val (param_not_fun, param0') = chop_prefix (not o is_fun) param0 val () = if null param_not_fun then () else Output.information ("Not a function" ^ Position.here (decode_error' (case mIdent0 of None => node0 | Some (Ident0 (_, _, node)) => node) |> #1)) val (param_fun, param0') = chop_prefix is_fun param0' in (case mIdent0 of None => return () | Some mIdent0 => shadowTypedef_fun (mIdent0, param0)) >> sequence_ shadowTypedef (maps (fn CFunDeclr0 (params, _, _) => ident_of_decl params | _ => []) param_fun) >> sequence_ (fn CFunDeclr0 (params, _, _) => C_Env.map_env_tree (pair Symtab.empty #> sequence_' (fn (Ident0 (_, i, node), params, ret) => fn (env_lang, env_tree) => pair () let val name = ident_decode i val pos = [decode_error' node |> #1] val data = ( pos , serial () , {global = false, params = params, ret = C_Env.Parsed ret}) in ( env_lang |> Symtab.update (name, data) , env_tree |> C_Env.map_reports_text (markup_var_improper (Left (data, C_Env_Ext.list_lookup env_lang name)) pos name)) end) params #> #2 o #2) #> pair () | _ => return ()) param0' end end (**) structure List = struct val reverse = rev end end \ subsection \Miscellaneous\ ML \ \\<^file>\~~/src/Pure/Thy/document_antiquotations.ML\\ (* Author: Frédéric Tuong, Université Paris-Saclay *) (* Title: Pure/Thy/document_antiquotations.ML Author: Makarius Miscellaneous document antiquotations. *) \ structure ML_Document_Antiquotations = struct (* ML text *) local fun ml_text name ml = - Document_Output.antiquotation_raw_embedded name (Scan.lift Args.text_input \ \TODO: enable reporting with \<^ML_type>\Token.file\ as in \<^ML>\Resources.parse_files\\) + Document_Output.antiquotation_raw_embedded name (Scan.lift Parse.embedded_input \ \TODO: enable reporting with \<^ML_type>\Token.file\ as in \<^ML>\Resources.parse_files\\) (fn ctxt => fn text => let val file_content = Token.file_source (Command.read_file (Resources.master_directory (Proof_Context.theory_of ctxt)) Position.none false (Path.explode (#1 (Input.source_content text)))) val _ = (*TODO: avoid multiple file scanning*) ML_Context.eval_in (SOME ctxt) ML_Compiler.flags Position.none (* \ (optionally) disabling a potential double report*) (ml file_content) in file_content |> Input.source_explode |> Source.of_list |> Source.source Symbol_Pos.stopper (Scan.bulk (Symbol_Pos.scan_comment "" >> (C_Scan.Left o pair true) || Scan.many1 (Symbol.is_ascii_blank o Symbol_Pos.symbol) >> (C_Scan.Left o pair false) || Scan.one (not o Symbol_Pos.is_eof) >> C_Scan.Right)) |> Source.exhaust |> drop_prefix (fn C_Scan.Left _ => true | _ => false) |> drop_suffix (fn C_Scan.Left (false, _) => true | _ => false) |> maps (fn C_Scan.Left (_, x) => x | C_Scan.Right x => [x]) |> Symbol_Pos.implode |> enclose "\n" "\n" |> cartouche |> Document_Output.output_source ctxt |> Document_Output.isabelle ctxt end); fun ml_enclose bg en source = ML_Lex.read bg @ ML_Lex.read_source source @ ML_Lex.read en; in val _ = Theory.setup (ml_text \<^binding>\ML_file\ (ml_enclose "" "")); end; end; \ subsection \Loading the Grammar Simulator\ text \ The parser consists of a generic module \<^file>\../../src_ext/mlton/lib/mlyacc-lib/base.sig\, which interprets an automata-like format generated from ML-Yacc. \ ML_file "../../src_ext/mlton/lib/mlyacc-lib/base.sig" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/base.sig\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/join.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/join.sml\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/lrtable.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/lrtable.sml\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/stream.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/stream.sml\\ ML_file "../../src_ext/mlton/lib/mlyacc-lib/parser1.sml" \ \\<^ML_file>\../../src_ext/mlton/lib/mlyacc-lib/parser1.sml\\ subsection \Loading the Generated Grammar (SML signature)\ ML_file "../generated/c_grammar_fun.grm.sig" subsection \Overloading Grammar Rules (Optional Part)\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar_Rule_Wrap_Overloading = struct open C_Grammar_Rule_Lib fun update_env_bottom_up f x arg = ((), C_Env.map_env_lang_tree (f x) arg) fun update_env_top_down f x = pair () ##> (fn arg => C_Env_Ext.map_output_env (K (SOME (f x (#env_lang arg)))) arg) (*type variable (report bound)*) val specifier3 : (CDeclSpec list) -> unit monad = update_env_bottom_up (fn l => fn env_lang => fn env_tree => ( env_lang , fold let open C_Ast in fn CTypeSpec0 (CTypeDef0 (Ident0 (_, i, node), _)) => let val name = ident_decode i val pos1 = [decode_error' node |> #1] in C_Env.map_reports_text (markup_tvar (Right (pos1, Symtab.lookup (C_Env_Ext.get_tyidents'_typedef env_lang) name)) pos1 name) end | _ => I end l env_tree)) val declaration_specifier3 : (CDeclSpec list) -> unit monad = specifier3 val type_specifier3 : (CDeclSpec list) -> unit monad = specifier3 (*basic variable (report bound)*) val primary_expression1 : (CExpr) -> unit monad = update_env_bottom_up (fn e => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in fn CVar0 (Ident0 (_, i, node), _) => let val name = ident_decode i val pos1 = [decode_error' node |> #1] in C_Env.map_reports_text (markup_var (Right (pos1, Symtab.lookup (C_Env_Ext.get_idents' env_lang) name)) pos1 name) end | _ => I end e env_tree)) (*basic variable, parameter functions (report bound)*) val declarator1 : (CDeclrR) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => ( env_lang , let open C_Ast fun markup markup_var params = pair Symtab.empty #> fold (fn (Ident0 (_, i, node), params, ret) => fn (env_lang, env_tree) => let val name = ident_decode i val pos = [decode_error' node |> #1] val data = ( pos , serial () , {global = false, params = params, ret = C_Env.Parsed ret}) in ( env_lang |> Symtab.update (name, data) , env_tree |> C_Env.map_reports_text (markup_var (Left (data, C_Env_Ext.list_lookup env_lang name)) pos name)) end) (ident_of_decl params) #> #2 in fn CDeclrR0 (_, param0, _, _, _) => (case rev param0 of CFunDeclr0 (params, _, _) :: param0 => pair param0 o markup markup_var_bound params | param0 => pair param0) #-> fold (fn CFunDeclr0 (params, _, _) => markup markup_var_improper params | _ => I) end d env_tree)) (*old style syntax for functions (legacy feature)*) val external_declaration1 : (CExtDecl) -> unit monad = update_env_bottom_up (fn f => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in fn CFDefExt0 (CFunDef0 (_, _, l, _, node)) => if null l then I else tap (fn _ => legacy_feature ("Scope analysing for old function syntax not implemented" ^ Position.here (decode_error' node |> #1))) | _ => I end f env_tree)) (*(type) enum, struct, union (report define & report bound)*) fun report_enum_bound i' node env_lang = let open C_Ast val name = ident_decode i' val pos1 = [decode_error' node |> #1] in C_Env.map_reports_text (markup_var_enum (Right (pos1, Symtab.lookup (C_Env_Ext.get_tyidents'_enum env_lang) name)) pos1 name) end local val look_tyidents'_enum = C_Env_Ext.list_lookup o C_Env_Ext.get_tyidents'_enum val declaration : (CDecl) -> unit monad = update_env_bottom_up (fn decl => fn env_lang => fn env_tree => let open C_Ast in fn CDecl0 (l, _, _) => fold (fn CTypeSpec0 (CEnumType0 (CEnum0 (Some (Ident0 (_, i, node)), body, _, _), _)) => (case body of None => (fn (env_lang, env_tree) => (env_lang, report_enum_bound i node env_lang env_tree)) | Some _ => fn (env_lang, env_tree) => let val name = ident_decode i val pos1 = [decode_error' node |> #1] val data = (pos1, serial (), null (C_Env.get_scopes env_lang)) in ( C_Env_Ext.map_tyidents'_enum (Symtab.update (name, data)) env_lang , C_Env.map_reports_text (markup_var_enum (Left (data, look_tyidents'_enum env_lang name)) pos1 name) env_tree) end) | _ => I) l | _ => I end decl (env_lang, env_tree)) in val declaration1 = declaration val declaration2 = declaration val declaration3 = declaration end (*(basic) enum, struct, union (report define)*) local val enumerator : ( ( Ident * CExpr Maybe ) ) -> unit monad = update_env_bottom_up (fn id => fn env_lang => let open C_Ast in fn (ident as Ident0 (_, _, node), _) => C_Grammar_Rule_Lib.shadowTypedef0' (C_Env.Parsed [CTypeSpec0 (CIntType0 node)]) (null (C_Env.get_scopes env_lang)) (ident, []) env_lang end id) in val enumerator1 = enumerator val enumerator2 = enumerator val enumerator3 = enumerator val enumerator4 = enumerator end (*(type) enum, struct, union (report bound)*) local fun declaration_specifier env_lang = let open C_Ast in fold (fn CTypeSpec0 (CEnumType0 (CEnum0 (Some (Ident0 (_, i, node)), _, _, _), _)) => report_enum_bound i node env_lang | _ => I) end in val declaration_specifier2 : (CDeclSpec list) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => let open C_Ast in ( env_lang , env_tree |> (if exists (fn CStorageSpec0 (CTypedef0 _) => true | _ => false) d then I else declaration_specifier env_lang d)) end) local val f_definition : (CFunDef) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in fn CFunDef0 (l, _, _, _, _) => declaration_specifier env_lang l end d env_tree)) in val function_definition4 = f_definition val nested_function_definition2 = f_definition end local val parameter_type_list : ( ( CDecl list * Bool ) ) -> unit monad = update_env_bottom_up (fn d => fn env_lang => fn env_tree => ( env_lang , let open C_Ast in #1 #> fold (fn CDecl0 (l, _, _) => declaration_specifier env_lang l | _ => I) end d env_tree)) in val parameter_type_list2 = parameter_type_list val parameter_type_list3 = parameter_type_list end end end \ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar_Rule_Wrap = struct open C_Grammar_Rule_Wrap open C_Grammar_Rule_Wrap_Overloading end \ subsection \Loading the Generated Grammar (SML structure)\ ML_file "../generated/c_grammar_fun.grm.sml" subsection \Grammar Initialization\ subsubsection \Functor Application\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar = C_Grammar_Fun (structure Token = LALR_Parser_Eval.Token) \ subsubsection \Mapping Strings to Structured Tokens\ ML \ \\<^file>\../generated/c_grammar_fun.grm.sml\\ \ structure C_Grammar_Tokens = struct local open C_Grammar.Tokens in fun token_of_string error ty_ClangCVersion ty_cChar ty_cFloat ty_cInteger ty_cString ty_ident ty_string a1 a2 = fn "(" => x28 (ty_string, a1, a2) | ")" => x29 (ty_string, a1, a2) | "[" => x5b (ty_string, a1, a2) | "]" => x5d (ty_string, a1, a2) | "->" => x2d_x3e (ty_string, a1, a2) | "." => x2e (ty_string, a1, a2) | "!" => x21 (ty_string, a1, a2) | "~" => x7e (ty_string, a1, a2) | "++" => x2b_x2b (ty_string, a1, a2) | "--" => x2d_x2d (ty_string, a1, a2) | "+" => x2b (ty_string, a1, a2) | "-" => x2d (ty_string, a1, a2) | "*" => x2a (ty_string, a1, a2) | "/" => x2f (ty_string, a1, a2) | "%" => x25 (ty_string, a1, a2) | "&" => x26 (ty_string, a1, a2) | "<<" => x3c_x3c (ty_string, a1, a2) | ">>" => x3e_x3e (ty_string, a1, a2) | "<" => x3c (ty_string, a1, a2) | "<=" => x3c_x3d (ty_string, a1, a2) | ">" => x3e (ty_string, a1, a2) | ">=" => x3e_x3d (ty_string, a1, a2) | "==" => x3d_x3d (ty_string, a1, a2) | "!=" => x21_x3d (ty_string, a1, a2) | "^" => x5e (ty_string, a1, a2) | "|" => x7c (ty_string, a1, a2) | "&&" => x26_x26 (ty_string, a1, a2) | "||" => x7c_x7c (ty_string, a1, a2) | "?" => x3f (ty_string, a1, a2) | ":" => x3a (ty_string, a1, a2) | "=" => x3d (ty_string, a1, a2) | "+=" => x2b_x3d (ty_string, a1, a2) | "-=" => x2d_x3d (ty_string, a1, a2) | "*=" => x2a_x3d (ty_string, a1, a2) | "/=" => x2f_x3d (ty_string, a1, a2) | "%=" => x25_x3d (ty_string, a1, a2) | "&=" => x26_x3d (ty_string, a1, a2) | "^=" => x5e_x3d (ty_string, a1, a2) | "|=" => x7c_x3d (ty_string, a1, a2) | "<<=" => x3c_x3c_x3d (ty_string, a1, a2) | ">>=" => x3e_x3e_x3d (ty_string, a1, a2) | "," => x2c (ty_string, a1, a2) | ";" => x3b (ty_string, a1, a2) | "{" => x7b (ty_string, a1, a2) | "}" => x7d (ty_string, a1, a2) | "..." => x2e_x2e_x2e (ty_string, a1, a2) | x => let val alignof = alignof (ty_string, a1, a2) val alignas = alignas (ty_string, a1, a2) val atomic = x5f_Atomic (ty_string, a1, a2) val asm = asm (ty_string, a1, a2) val auto = auto (ty_string, a1, a2) val break = break (ty_string, a1, a2) val bool = x5f_Bool (ty_string, a1, a2) val case0 = case0 (ty_string, a1, a2) val char = char (ty_string, a1, a2) val const = const (ty_string, a1, a2) val continue = continue (ty_string, a1, a2) val complex = x5f_Complex (ty_string, a1, a2) val default = default (ty_string, a1, a2) val do0 = do0 (ty_string, a1, a2) val double = double (ty_string, a1, a2) val else0 = else0 (ty_string, a1, a2) val enum = enum (ty_string, a1, a2) val extern = extern (ty_string, a1, a2) val float = float (ty_string, a1, a2) val for0 = for0 (ty_string, a1, a2) val generic = x5f_Generic (ty_string, a1, a2) val goto = goto (ty_string, a1, a2) val if0 = if0 (ty_string, a1, a2) val inline = inline (ty_string, a1, a2) val int = int (ty_string, a1, a2) val int128 = x5f_x5f_int_x31_x32_x38 (ty_string, a1, a2) val long = long (ty_string, a1, a2) val label = x5f_x5f_label_x5f_x5f (ty_string, a1, a2) val noreturn = x5f_Noreturn (ty_string, a1, a2) val nullable = x5f_Nullable (ty_string, a1, a2) val nonnull = x5f_Nonnull (ty_string, a1, a2) val register = register (ty_string, a1, a2) val restrict = restrict (ty_string, a1, a2) val return0 = return0 (ty_string, a1, a2) val short = short (ty_string, a1, a2) val signed = signed (ty_string, a1, a2) val sizeof = sizeof (ty_string, a1, a2) val static = static (ty_string, a1, a2) val staticassert = x5f_Static_assert (ty_string, a1, a2) val struct0 = struct0 (ty_string, a1, a2) val switch = switch (ty_string, a1, a2) val typedef = typedef (ty_string, a1, a2) val typeof = typeof (ty_string, a1, a2) val thread = x5f_x5f_thread (ty_string, a1, a2) val union = union (ty_string, a1, a2) val unsigned = unsigned (ty_string, a1, a2) val void = void (ty_string, a1, a2) val volatile = volatile (ty_string, a1, a2) val while0 = while0 (ty_string, a1, a2) val cchar = cchar (ty_cChar, a1, a2) val cint = cint (ty_cInteger, a1, a2) val cfloat = cfloat (ty_cFloat, a1, a2) val cstr = cstr (ty_cString, a1, a2) val ident = ident (ty_ident, a1, a2) val tyident = tyident (ty_ident, a1, a2) val attribute = x5f_x5f_attribute_x5f_x5f (ty_string, a1, a2) val extension = x5f_x5f_extension_x5f_x5f (ty_string, a1, a2) val real = x5f_x5f_real_x5f_x5f (ty_string, a1, a2) val imag = x5f_x5f_imag_x5f_x5f (ty_string, a1, a2) val builtinvaarg = x5f_x5f_builtin_va_arg (ty_string, a1, a2) val builtinoffsetof = x5f_x5f_builtin_offsetof (ty_string, a1, a2) val builtintypescompatiblep = x5f_x5f_builtin_types_compatible_p (ty_string, a1, a2) val clangcversion = clangcversion (ty_ClangCVersion, a1, a2) in case x of "_Alignas" => alignas | "_Alignof" => alignof | "__alignof" => alignof | "alignof" => alignof | "__alignof__" => alignof | "__asm" => asm | "asm" => asm | "__asm__" => asm | "_Atomic" => atomic | "__attribute" => attribute | "__attribute__" => attribute | "auto" => auto | "_Bool" => bool | "break" => break | "__builtin_offsetof" => builtinoffsetof | "__builtin_types_compatible_p" => builtintypescompatiblep | "__builtin_va_arg" => builtinvaarg | "case" => case0 | "char" => char | "_Complex" => complex | "__complex__" => complex | "__const" => const | "const" => const | "__const__" => const | "continue" => continue | "default" => default | "do" => do0 | "double" => double | "else" => else0 | "enum" => enum | "__extension__" => extension | "extern" => extern | "float" => float | "for" => for0 | "_Generic" => generic | "goto" => goto | "if" => if0 | "__imag" => imag | "__imag__" => imag | "__inline" => inline | "inline" => inline | "__inline__" => inline | "int" => int | "__int128" => int128 | "__label__" => label | "long" => long | "_Nonnull" => nonnull | "__nonnull" => nonnull | "_Noreturn" => noreturn | "_Nullable" => nullable | "__nullable" => nullable | "__real" => real | "__real__" => real | "register" => register | "__restrict" => restrict | "restrict" => restrict | "__restrict__" => restrict | "return" => return0 | "short" => short | "__signed" => signed | "signed" => signed | "__signed__" => signed | "sizeof" => sizeof | "static" => static | "_Static_assert" => staticassert | "struct" => struct0 | "switch" => switch | "__thread" => thread | "_Thread_local" => thread | "typedef" => typedef | "__typeof" => typeof | "typeof" => typeof | "__typeof__" => typeof | "union" => union | "unsigned" => unsigned | "void" => void | "__volatile" => volatile | "volatile" => volatile | "__volatile__" => volatile | "while" => while0 | _ => error end end end \ end diff --git a/thys/Isabelle_Meta_Model/isabelle_home/src/Tools/Code/Isabelle_code_target.thy b/thys/Isabelle_Meta_Model/isabelle_home/src/Tools/Code/Isabelle_code_target.thy --- a/thys/Isabelle_Meta_Model/isabelle_home/src/Tools/Code/Isabelle_code_target.thy +++ b/thys/Isabelle_Meta_Model/isabelle_home/src/Tools/Code/Isabelle_code_target.thy @@ -1,214 +1,214 @@ (****************************************************************************** * ISABELLE COPYRIGHT NOTICE, LICENCE AND DISCLAIMER. * * Copyright (c) 1986-2018 Contributors (in the changeset history) * * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials provided * with the distribution. * * * Neither the name of the copyright holders nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ******************************************************************************) chapter\Part ...\ theory Isabelle_code_target imports Main keywords "attach" and "lazy_code_printing" "apply_code_printing" "apply_code_printing_reflect" :: thy_decl begin subsection\beginning (lazy code printing)\ ML\ structure Isabelle_Code_Target = struct (* Title: Tools/Code/code_target.ML Author: Florian Haftmann, TU Muenchen Generic infrastructure for target language data. *) open Basic_Code_Symbol; open Basic_Code_Thingol; (** checking and parsing of symbols **) val parse_classrel_ident = Parse.class --| @{keyword "<"} -- Parse.class; val parse_inst_ident = Parse.name --| @{keyword "::"} -- Parse.class; (** serializations and serializer **) (* serialization: abstract nonsense to cover different destinies for generated code *) (* serializers: functions producing serializations *) (** theory data **) (** serializer usage **) (* montage *) (* code generation *) fun prep_destination s = ({physical = true}, (Path.explode s, Position.none)); fun export_code_cmd all_public raw_cs seris thy = let val ctxt = Proof_Context.init_global thy; val cs = Code_Thingol.read_const_exprs ctxt raw_cs; in thy |> Named_Target.theory_map (Code_Target.export_code all_public cs ((map o apfst o apsnd o Option.map) prep_destination seris)) end; (** serializer configuration **) (* reserved symbol names *) (* checking of syntax *) (* custom symbol names *) (* custom printings *) (* concrete syntax *) (** Isar setup **) fun parse_single_symbol_pragma parse_keyword parse_isa parse_target = parse_keyword |-- Parse.!!! (parse_isa --| (@{keyword "\"} || @{keyword "=>"}) -- Parse.and_list1 (@{keyword "("} |-- (Parse.name --| @{keyword ")"} -- Scan.option parse_target))); fun parse_symbol_pragma parse_const parse_tyco parse_class parse_classrel parse_inst parse_module = parse_single_symbol_pragma @{keyword "constant"} Parse.term parse_const >> Constant || parse_single_symbol_pragma @{keyword "type_constructor"} Parse.type_const parse_tyco >> Type_Constructor || parse_single_symbol_pragma @{keyword "type_class"} Parse.class parse_class >> Type_Class || parse_single_symbol_pragma @{keyword "class_relation"} parse_classrel_ident parse_classrel >> Class_Relation || parse_single_symbol_pragma @{keyword "class_instance"} parse_inst_ident parse_inst >> Class_Instance || parse_single_symbol_pragma @{keyword "code_module"} Parse.name parse_module >> Code_Symbol.Module; fun parse_symbol_pragmas parse_const parse_tyco parse_class parse_classrel parse_inst parse_module = Parse.enum1 "|" (Parse.group (fn () => "code symbol pragma") (parse_symbol_pragma parse_const parse_tyco parse_class parse_classrel parse_inst parse_module)); end \ ML\ structure Code_printing = struct datatype code_printing = Code_printing of (string * (bstring * Code_Printer.raw_const_syntax option) list, string * (bstring * Code_Printer.tyco_syntax option) list, string * (bstring * string option) list, (string * string) * (bstring * unit option) list, (xstring * string) * (bstring * unit option) list, bstring * (bstring * (string * Code_Symbol.T list) option) list) Code_Symbol.attr list structure Data_code = Theory_Data ( type T = code_printing list Symtab.table val empty = Symtab.empty val merge = Symtab.merge (K true) ) val code_empty = "" val () = Outer_Syntax.command @{command_keyword lazy_code_printing} "declare dedicated printing for code symbols" (Isabelle_Code_Target.parse_symbol_pragmas (Code_Printer.parse_const_syntax) (Code_Printer.parse_tyco_syntax) Parse.string (Parse.minus >> K ()) (Parse.minus >> K ()) - (Parse.text -- Scan.optional (@{keyword "attach"} |-- Scan.repeat1 Parse.term >> map Code_Symbol.Constant) []) + (Parse.embedded -- Scan.optional (@{keyword "attach"} |-- Scan.repeat1 Parse.term >> map Code_Symbol.Constant) []) >> (fn code => Toplevel.theory (Data_code.map (Symtab.map_default (code_empty, []) (fn l => Code_printing code :: l))))) fun apply_code_printing thy = (case Symtab.lookup (Data_code.get thy) code_empty of SOME l => rev l | _ => []) |> (fn l => fold (fn Code_printing l => fold Code_Target.set_printings l) l thy) val () = Outer_Syntax.command @{command_keyword apply_code_printing} "apply dedicated printing for code symbols" (Parse.$$$ "(" -- Parse.$$$ ")" >> K (Toplevel.theory apply_code_printing)) fun reflect_ml source thy = case ML_Context.exec (fn () => ML_Context.eval_source (ML_Compiler.verbose false ML_Compiler.flags) source) (Context.Theory thy) of Context.Theory thy => thy fun apply_code_printing_reflect thy = (case Symtab.lookup (Data_code.get thy) code_empty of SOME l => rev l | _ => []) |> (fn l => fold (fn Code_printing l => fold (fn Code_Symbol.Module (_, l) => fold (fn ("SML", SOME (txt, _)) => reflect_ml (Input.source false txt (Position.none, Position.none)) | _ => I) l | _ => I) l) l thy) val () = Outer_Syntax.command @{command_keyword apply_code_printing_reflect} "apply dedicated printing for code symbols" (Parse.ML_source >> (fn src => Toplevel.theory (apply_code_printing_reflect o reflect_ml src))) end \ end diff --git a/thys/Refine_Imperative_HOL/Lib/Named_Theorems_Rev.thy b/thys/Refine_Imperative_HOL/Lib/Named_Theorems_Rev.thy --- a/thys/Refine_Imperative_HOL/Lib/Named_Theorems_Rev.thy +++ b/thys/Refine_Imperative_HOL/Lib/Named_Theorems_Rev.thy @@ -1,108 +1,108 @@ theory Named_Theorems_Rev imports Main keywords "named_theorems_rev" :: thy_decl begin ML \ signature NAMED_THEOREMS_REV = sig val member: Proof.context -> string -> thm -> bool val get: Proof.context -> string -> thm list val add_thm: string -> thm -> Context.generic -> Context.generic val del_thm: string -> thm -> Context.generic -> Context.generic val add: string -> attribute val del: string -> attribute val check: Proof.context -> string * Position.T -> string val declare: binding -> string -> local_theory -> string * local_theory end; structure Named_Theorems_Rev: NAMED_THEOREMS_REV = struct (* context data *) structure Data = Generic_Data ( type T = thm Item_Net.T Symtab.table; val empty: T = Symtab.empty; val merge : T * T -> T = Symtab.join (K Item_Net.merge); ); fun new_entry name = Data.map (fn data => if Symtab.defined data name then error ("Duplicate declaration of named theorems: " ^ quote name) else Symtab.update (name, Thm.item_net) data); fun undeclared name = "Undeclared named theorems " ^ quote name; fun the_entry context name = (case Symtab.lookup (Data.get context) name of NONE => error (undeclared name) | SOME entry => entry); fun map_entry name f context = (the_entry context name; Data.map (Symtab.map_entry name f) context); (* maintain content *) fun member ctxt = Item_Net.member o the_entry (Context.Proof ctxt); fun content context = Item_Net.content o the_entry context; val get = content o Context.Proof; fun add_thm name = map_entry name o Item_Net.update; fun del_thm name = map_entry name o Item_Net.remove; val add = Thm.declaration_attribute o add_thm; val del = Thm.declaration_attribute o del_thm; (* check *) fun check ctxt (xname, pos) = let val context = Context.Proof ctxt; val fact_ref = Facts.Named ((xname, Position.none), NONE); fun err () = error (undeclared xname ^ Position.here pos); in (case try (Proof_Context.get_fact_generic context) fact_ref of SOME (SOME name, _) => if can (the_entry context) name then name else err () | _ => err ()) end; (* declaration *) fun declare binding descr lthy = let val name = Local_Theory.full_name lthy binding; val description = "declaration of " ^ (if descr = "" then Binding.name_of binding ^ " rules" else descr); val lthy' = lthy |> Local_Theory.background_theory (Context.theory_map (new_entry name)) |> Local_Theory.map_contexts (K (Context.proof_map (new_entry name))) |> Local_Theory.add_thms_dynamic (binding, fn context => content context name) |> Attrib.local_setup binding (Attrib.add_del (add name) (del name)) description in (name, lthy') end; val _ = Outer_Syntax.local_theory @{command_keyword named_theorems_rev} "declare named collection of theorems" - (Parse.and_list1 (Parse.binding -- Scan.optional Parse.text "") >> + (Parse.and_list1 (Parse.binding -- Scan.optional Parse.embedded "") >> fold (fn (b, descr) => snd o declare b descr)); (* ML antiquotation *) val _ = Theory.setup (ML_Antiquotation.inline @{binding named_theorems_rev} (Args.context -- Scan.lift Args.name_position >> (fn (ctxt, name) => ML_Syntax.print_string (check ctxt name)))); end; \ end diff --git a/thys/X86_Semantics/X86_Parse.ML b/thys/X86_Semantics/X86_Parse.ML --- a/thys/X86_Semantics/X86_Parse.ML +++ b/thys/X86_Semantics/X86_Parse.ML @@ -1,356 +1,356 @@ (* TODO: segment registers *) datatype Operand = (* size (in bytes) segment offset base index scale *) Mem of ( int * string option * int * string * string * int) | Reg of string | Imm of LargeInt.int datatype Instr = Instr of (LargeInt.int * LargeInt.int * string * Operand option * Operand option * Operand option) (* PRETTY PRINTING *) fun pp_option NONE = "" | pp_option (SOME s) = s fun pp_mem_size 1 = "BYTE PTR" | pp_mem_size 2 = "WORD PTR" | pp_mem_size 4 = "DWORD PTR" | pp_mem_size 8 = "QWORD PTR" | pp_mem_size 16 = "XMMWORD PTR" | pp_mem_size n = "SIZEDIR " ^ Int.toString (n*8) ^ " PTR" fun pp_operand (Mem (si,segment,offset,base, index, scale)) = pp_mem_size si ^ " " ^ pp_option segment ^ ":[" ^ Int.toString offset ^ " + " ^ base ^ " + " ^ index ^ " * " ^ Int.toString scale ^ "]" | pp_operand (Reg r) = r | pp_operand (Imm i) = Int.toString i fun pp_operands [] = "" | pp_operands (NONE::_) = "" | pp_operands [SOME op1] = pp_operand op1 | pp_operands [SOME op1,NONE] = pp_operand op1 | pp_operands (SOME op1::op2::ops) = pp_operand op1 ^ ", " ^ pp_operands (op2::ops) fun pp_instr (Instr (a,si,m,op1,op2,op3)) = LargeInt.toString a ^ ": " ^ m ^ " " ^ pp_operands [op1,op2,op3] ^ " (" ^ LargeInt.toString si ^ ")" val intFromHexString = StringCvt.scanString (LargeInt.scan StringCvt.HEX) o Substring.string fun intFromHexString_forced s = case intFromHexString s of SOME i => i | NONE => raise Fail ("Could not convert string '" ^ Substring.string s ^ "' to int.") fun is_whitespace c = (c = #" " orelse c = #"\t" orelse c = #"\n") fun trim str = let val (_,x) = Substring.splitl is_whitespace str val (y,_) = Substring.splitr is_whitespace x in y end; (* PARSING *) val registers = [ "rip", "rax", "eax", "ax", "ah", "al", "rbx", "ebx", "bx", "bh", "bl", "rcx", "ecx", "cx", "ch", "cl", "rdx", "edx", "dx", "dh", "dl", "rbp", "ebp", "bp", "bpl", "rsp", "esp", "sp", "spl", "rdi", "edi", "di", "dil", "rsi", "esi", "si", "sil", "r15", "r15d", "r15w", "r15b", "r14", "r14d", "r14w", "r14b", "r13", "r13d", "r13w", "r13b", "r12", "r12d", "r12w", "r12b", "r11", "r11d", "r11w", "r11b", "r10", "r10d", "r10w", "r10b", "r9", "r9d", "r9w", "r9b", "r8", "r8d", "r8w", "r8b", "xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8", "xmm9","xmm10","xmm11","xmm12","xmm13","xmm14","xmm15" ] fun is_register str = List.find (fn (str') => String.compare (Substring.string str,str') = EQUAL) registers <> NONE fun overwrite_str "" s = s | overwrite_str s "" = s | overwrite_str _ s = s fun overwrite_str_option NONE s = s | overwrite_str_option s NONE = s | overwrite_str_option _ s = s fun max x y = if x >= y then x else y fun overwrite_Mem (Mem (si,seg,off,base,ind,sc)) (Mem (si',seg',off',base',ind',sc')) = Mem (max si si',overwrite_str_option seg seg',max off off',overwrite_str base base',overwrite_str ind ind',max sc sc') fun parse_operand_address_between_brackets_inner str = if is_register str then Mem (0,NONE,0,Substring.string str,"",0) (* base *) else let val tokens = map trim (Substring.tokens (fn c => c = #"*") str) in if length tokens = 1 then case intFromHexString str of SOME i => Mem (0,NONE,i,"","",0) (* offset *) | NONE => raise Fail ("Don't know how to parse operand part:" ^ Substring.string str) else if length tokens = 2 then if is_register (nth tokens 0) then Mem (0,NONE,0,"",Substring.string (nth tokens 0),intFromHexString_forced (nth tokens 1)) (* index * scale *) else if is_register (nth tokens 1) then Mem (0,NONE,0,"",Substring.string (nth tokens 1),intFromHexString_forced (nth tokens 0)) (* scale * index *) else raise Fail ("Don't know how to parse operand part:" ^ Substring.string str) else raise Fail ("Don't know how to parse operand part:" ^ Substring.string str) end fun parse_operand_address_between_brackets_sum si segment_reg str = let val tokens = map trim (Substring.tokens (fn c => c = #"+") str) in fold (overwrite_Mem o parse_operand_address_between_brackets_inner) tokens (Mem (si,segment_reg ,0,"","",0)) end; fun parse_operand_address_between_brackets_sub si segment_reg str = let val (lhs,num) = Substring.splitl (fn c => c <> #"-") str; val (Mem (x0,x1,_,x3,x4,x5)) = parse_operand_address_between_brackets_sum si segment_reg lhs in Mem (x0,x1,intFromHexString_forced num,x3,x4,x5) end fun parse_operand_address_between_brackets si segment_reg str = let val (_,num) = Substring.splitl (fn c => c <> #"-") str in if Substring.isEmpty num then parse_operand_address_between_brackets_sum si segment_reg str else parse_operand_address_between_brackets_sub si segment_reg str end fun skip_brackets str = let val (x,y) = Substring.splitAt (trim str,1) val (z,_) = Substring.splitl (fn c => c <> #"]") y in if Substring.compare (x,Substring.full "[") = EQUAL then z else raise Fail ("Expecting non-empty bracketed string preceded with colon or an immediate in hex-format, but got: " ^ Substring.string str) end; fun parse_operand_address_bracketed si segment_reg str = case intFromHexString str of SOME imm => Mem (si,segment_reg,imm,"", "",0) | NONE => parse_operand_address_between_brackets si segment_reg (skip_brackets str) fun tail str = case Substring.getc str of NONE => raise Fail ("Expecting non-empty string, but got: " ^ Substring.string str) | SOME (_,s) => s; fun parse_operand_address si str = case Substring.splitl (fn c => c <> #":") str of (before_colon, after_colon) => if Substring.isEmpty after_colon then parse_operand_address_bracketed si NONE before_colon else parse_operand_address_bracketed si (SOME (Substring.string (trim before_colon))) (tail after_colon); fun parse_operand str' = let val str = trim str' in if Substring.isPrefix "BYTE PTR" str then parse_operand_address 1 (snd (Substring.splitAt (str,8))) else if Substring.isPrefix "WORD PTR" str then parse_operand_address 2 (snd (Substring.splitAt (str,8))) else if Substring.isPrefix "DWORD PTR" str then parse_operand_address 4 (snd (Substring.splitAt (str,9))) else if Substring.isPrefix "QWORD PTR" str then parse_operand_address 8 (snd (Substring.splitAt (str,9))) else if Substring.isPrefix "XMMWORD PTR" str then parse_operand_address 16 (snd (Substring.splitAt (str,11))) else if Substring.isPrefix "[" str then (* happens in case of a LEA instruction *) parse_operand_address 0 str else if List.find (fn (str') => String.compare (Substring.string str,str') = EQUAL) registers <> NONE then Reg (Substring.string str) else case intFromHexString str of NONE => raise Fail ("Cannot read hex number in string: " ^ (Substring.string str)) | SOME imm => Imm imm end; fun parse_operands str = let val tokens = map trim (Substring.tokens (fn c => c = #",") (trim str)) val ops = map parse_operand tokens in case ops of [] => (NONE,NONE,NONE) | [op1] => (SOME op1,NONE,NONE) | [op1,op2] => (SOME op1,SOME op2,NONE) | [op1,op2,op3] => (SOME op1,SOME op2,SOME op3) | _ => raise Fail ("Unexpected number of operands in : " ^ Substring.string str) end; fun remove_comment str = let val (str0,str1) = Substring.splitl (fn c => c <> #"#" andalso c <> #"<") str in if Substring.isEmpty str1 then str0 else Substring.trimr 1 str0 end fun parse_external_func a si str = let val (m,func) = Substring.splitl (fn c => c <> #" ") str val func_name = Substring.string (trim func) in Instr (a, si, Substring.string m, SOME (Reg func_name), NONE, NONE) end fun parse_normal_instr a si str = let val (_,rem1) = Substring.splitl (fn c => c = #":" orelse c = #" ") str val (m,rem2) = Substring.splitl (fn c => c <> #" ") rem1 val (op1,op2,op3) = parse_operands rem2 in Instr (a, si, Substring.string m, op1,op2,op3) end; fun parse_instr si str = let val str' = remove_comment (Substring.full str) val (addr,rem0) = Substring.splitl (fn c => c <> #":") str' val a = intFromHexString_forced (Substring.full ("0x" ^ Substring.string (trim addr))) in if Substring.isPrefix "EXTERNAL_FUNCTION" (trim (tail (rem0))) then parse_external_func a si (trim (tail (rem0))) else parse_normal_instr a si rem0 end; fun read_instr_addr str = let val instr = parse_instr 0 str val (Instr (a,_,_,_,_,_)) = instr in a end (* EMBEDDING INTO HOL *) val mk_nat = HOLogic.mk_number @{typ nat} val mk_string = HOLogic.mk_string fun mk_word_typ_from_num s = Syntax.read_typ @{context} ("num \ " ^ Int.toString s ^ " word") fun mk_word_typ s = Syntax.read_typ @{context} (Int.toString s ^ " word") fun mk_word i b = if i=0 then Const ("Groups.zero_class.zero", mk_word_typ b) else if i=1 then Const ("Groups.one_class.one", mk_word_typ b) else if i < 0 then Syntax.read_term @{context} ("uminus :: " ^ Int.toString b ^ " word \ " ^ Int.toString b ^ " word") $ (Const ("Num.numeral_class.numeral", mk_word_typ_from_num b) $ HOLogic.mk_numeral (0 - i)) else Const ("Num.numeral_class.numeral", mk_word_typ_from_num b) $ HOLogic.mk_numeral i fun mk_operand (Mem (8,segment,offset,base,index,scale)) = @{term "qword_ptr"} $ HOLogic.mk_prod (mk_word offset 64, HOLogic.mk_prod (mk_string base, HOLogic.mk_prod (mk_string index, mk_nat scale))) | mk_operand (Mem (4,segment,offset,base,index,scale)) = @{term "dword_ptr"} $ HOLogic.mk_prod (mk_word offset 64, HOLogic.mk_prod (mk_string base, HOLogic.mk_prod (mk_string index, mk_nat scale))) | mk_operand (Mem (2,segment,offset,base,index,scale)) = @{term "word_ptr"} $ HOLogic.mk_prod (mk_word offset 64, HOLogic.mk_prod (mk_string base, HOLogic.mk_prod (mk_string index, mk_nat scale))) | mk_operand (Mem (1,segment,offset,base,index,scale)) = @{term "byte_ptr"} $ HOLogic.mk_prod (mk_word offset 64, HOLogic.mk_prod (mk_string base, HOLogic.mk_prod (mk_string index, mk_nat scale))) | mk_operand (Mem (si,segment,offset,base,index,scale)) = @{term Mem} $ mk_nat si $ mk_word offset 64 $ mk_string base $ mk_string index $ mk_nat scale | mk_operand (Reg reg) = @{term Reg} $ mk_string reg | mk_operand (Imm imm) = @{term Imm} $ mk_word imm 256 fun mk_operand_option NONE = @{term "None :: Operand option"} | mk_operand_option (SOME op1) = @{term "Some :: Operand \ Operand option"} $ mk_operand op1 fun mk_instr (Instr (_,_,"EXTERNAL_FUNCTION",SOME (Reg f),NONE,NONE)) lthy = let val def = Syntax.read_term (Local_Theory.target_of lthy) ("EXTERNAL_FUNCTION_" ^ f) in if fastype_of def = (@{typ state} --> @{typ state}) then @{term ExternalFunc} $ def else raise Fail ("Unknown external function: " ^ f ^ "; expecting a function named EXTERNAL_FUNCTION_" ^ f ^ " in locale unknowns of type state \ state") end | mk_instr (Instr (a,si,m,op1,op2,op3)) _ = @{term Instr} $ mk_string m $ mk_operand_option op1 $ mk_operand_option op2 $ mk_operand_option op3 $ mk_word (a+si) 64 (* Make a definition in HOL with name "name" and as body "value". Value can be any HOL term, e.g.,: HOLogic.mk_number @{typ nat} 42 Note that HOL terms can be produced using antiquotations, e.g., @{term "42::nat"} does the same as the above code. *) fun mk_definition name value lthy = let val binding = Binding.name name val (_, lthy) = Local_Theory.define ((binding, NoSyn), ((Thm.def_binding binding, []), value)) lthy val _ = tracing ("Added definition: " ^ (Local_Theory.full_name lthy binding)) in lthy end fun main localename assembly lthy = let (* Build a locale name *) val _ = not (Long_Name.is_qualified localename) orelse raise Fail ("Given localename looks like qualified Isabelle ID: " ^ localename) val _ = localename <> "" orelse raise Fail ("Given localename is illegal") (* The locale fixes a variable called "fetch" of type "64 word \ I" *) val fixes_fetch = Element.Fixes [( Binding.name "fetch" , SOME (@{typ "64 word => I"}), NoSyn)] (* the locale contains a list of assumptions, one for each instruction. They are given the [simp] attribute. *) val simp_attrib = Attrib.internal (fn (_) => Simplifier.simp_add) fun mk_assume thm_name term = ((Binding.name thm_name, [simp_attrib]), [term]); val mk_fetch = Free ("fetch", @{typ "64 word => I"}) fun mk_fetch_equality_assume si str = let val instr = parse_instr si str val (Instr (a,_,_,_,_,_)) = instr val asm_name = "fetch_" ^ LargeInt.toString a val eq_term = HOLogic.mk_eq (mk_fetch $ mk_word a 64, mk_instr instr lthy) in mk_assume asm_name (HOLogic.Trueprop $ eq_term, []) end fun mk_fetch_equality_assumes [] = [] | mk_fetch_equality_assumes [str] = [mk_fetch_equality_assume 1 str] | mk_fetch_equality_assumes (str0::str1::strs) = (mk_fetch_equality_assume (read_instr_addr str1 - read_instr_addr str0) str0) :: mk_fetch_equality_assumes (str1::strs) val assembly = String.tokens (fn c => c = #"\n") assembly |> List.filter (Substring.full #> remove_comment #> Substring.explode #> List.all Char.isSpace #> not) val loc_bindings = Binding.name localename val loc_elems = [fixes_fetch,Element.Assumes (mk_fetch_equality_assumes assembly)] val thy = Local_Theory.exit_global lthy val loc_expr : (string, term) Expression.expr = [(Locale.intern thy "unknowns",(("",false),(Expression.Named [], [])))] val (_,lthy) = Expression.add_locale loc_bindings loc_bindings [] (loc_expr,[]) loc_elems thy val _ = tracing ("Added locale: " ^ localename ^ " with a fetch function for " ^ Int.toString (length assembly) ^ " instructions. To get started, execute:\n\ncontext " ^ localename ^ "\nbegin\n find_theorems fetch\nend\n") in lthy end (* Add the command "x86_64_parser" to the Isabelle syntax. - Its argument is parsed by Parse.text, which simply returns - the text. The parsed text is given to the "main" function. + Its argument is parsed by Parse.embedded, which simply returns + the embedded source. The parsed text is given to the "main" function. *) val _ = Outer_Syntax.local_theory \<^command_keyword>\x86_64_parser\ "Generate a locale from a list of assembly instructions." - (Parse.text -- Parse.text >> (fn (localename, assembly) => main localename assembly)) + (Parse.embedded -- Parse.embedded >> (fn (localename, assembly) => main localename assembly))