forked from Isabelle_DOF/Isabelle_DOF
199 lines
9.9 KiB
Plaintext
199 lines
9.9 KiB
Plaintext
chapter\<open>The High-Level Interface to the Automata-Library\<close>
|
|
|
|
theory RegExpInterface
|
|
imports "Functional-Automata.Execute"
|
|
begin
|
|
|
|
|
|
text\<open> The implementation of the monitoring concept follows the following design decisions:
|
|
\<^enum> We re-use generated code from the AFP submissions @{theory "Regular-Sets.Regular_Set"} and
|
|
@{theory "Functional-Automata.Automata"}, converted by the code-generator into executable SML code
|
|
(ports to future Isabelle versions should just reuse future versions of these)
|
|
\<^enum> Monitor-Expressions are regular expressions (in some adapted syntax)
|
|
over Document Class identifiers; they denote the language of all possible document object
|
|
instances belonging to these classes
|
|
\<^enum> Instead of expanding the sub-class relation (and building the product automaton of all
|
|
monitor expressions), we convert the monitor expressions into automata over class-id's
|
|
executed in parallel, in order to avoid blowup.
|
|
\<^enum> For efficiency reasons, the class-ids were internally abstracted to integers; the
|
|
encoding table is called environment \<^verbatim>\<open>env\<close>.
|
|
\<^enum> For reusability reasons, we did NOT abstract the internal state representation in the
|
|
deterministic automata construction (lists of lists of bits - sic !) by replacing them
|
|
by unique keys via a suitable coding-table; rather, we opted for keeping the automatas small
|
|
(no products, no subclass-expansion).
|
|
\<close>
|
|
|
|
section\<open>Monitor Syntax over RegExp - constructs\<close>
|
|
|
|
notation Star ("\<lbrace>(_)\<rbrace>\<^sup>*" [0]100)
|
|
notation Plus (infixr "||" 55)
|
|
notation Times (infixr "~~" 60)
|
|
notation Atom ("\<lfloor>_\<rfloor>" 65)
|
|
|
|
definition rep1 :: "'a rexp \<Rightarrow> 'a rexp" ("\<lbrace>(_)\<rbrace>\<^sup>+")
|
|
where "\<lbrace>A\<rbrace>\<^sup>+ \<equiv> A ~~ \<lbrace>A\<rbrace>\<^sup>*"
|
|
|
|
definition opt :: "'a rexp \<Rightarrow> 'a rexp" ("\<lbrakk>(_)\<rbrakk>")
|
|
where "\<lbrakk>A\<rbrakk> \<equiv> A || One"
|
|
|
|
value "Star (Conc(Alt (Atom(CHR ''a'')) (Atom(CHR ''b''))) (Atom(CHR ''c'')))"
|
|
text{* or better equivalently: *}
|
|
value "\<lbrace>(\<lfloor>CHR ''a''\<rfloor> || \<lfloor>CHR ''b''\<rfloor>) ~~ \<lfloor>CHR ''c''\<rfloor>\<rbrace>\<^sup>*"
|
|
|
|
section{* Some Standard and Derived Semantics *}
|
|
text\<open> This is just a reminder - already defined in @{theory "Regular-Sets.Regular_Exp"}
|
|
as @{term lang}.\<close>
|
|
|
|
text{* In the following, we give a semantics for our regular expressions, which so far have
|
|
just been a term language (i.e. abstract syntax). The semantics is a ``denotational semantics'',
|
|
i.e. we give a direct meaning for regular expressions in some universe of ``denotations''.
|
|
|
|
This universe of denotations is in our concrete case: *}
|
|
|
|
text{* Now the denotational semantics for regular expression can be defined on a post-card: *}
|
|
|
|
fun L :: "'a rexp => 'a lang"
|
|
where L_Emp : "L Zero = {}"
|
|
|L_One: "L One = {[]}"
|
|
|L_Atom: "L (\<lfloor>a\<rfloor>) = {[a]}"
|
|
|L_Un: "L (el || er) = (L el) \<union> (L er)"
|
|
|L_Conc: "L (el ~~ er) = {xs@ys | xs ys. xs \<in> L el \<and> ys \<in> L er}"
|
|
|L_Star: "L (Star e) = Regular_Set.star(L e)"
|
|
|
|
|
|
text\<open>A more useful definition is the sub-language - definition\<close>
|
|
fun L\<^sub>s\<^sub>u\<^sub>b :: "'a::order rexp => 'a lang"
|
|
where L\<^sub>s\<^sub>u\<^sub>b_Emp: "L\<^sub>s\<^sub>u\<^sub>b Zero = {}"
|
|
|L\<^sub>s\<^sub>u\<^sub>b_One: "L\<^sub>s\<^sub>u\<^sub>b One = {[]}"
|
|
|L\<^sub>s\<^sub>u\<^sub>b_Atom: "L\<^sub>s\<^sub>u\<^sub>b (\<lfloor>a\<rfloor>) = {z . \<forall>x. x \<le> a \<and> z=[x]}"
|
|
|L\<^sub>s\<^sub>u\<^sub>b_Un: "L\<^sub>s\<^sub>u\<^sub>b (el || er) = (L\<^sub>s\<^sub>u\<^sub>b el) \<union> (L\<^sub>s\<^sub>u\<^sub>b er)"
|
|
|L\<^sub>s\<^sub>u\<^sub>b_Conc: "L\<^sub>s\<^sub>u\<^sub>b (el ~~ er) = {xs@ys | xs ys. xs \<in> L\<^sub>s\<^sub>u\<^sub>b el \<and> ys \<in> L\<^sub>s\<^sub>u\<^sub>b er}"
|
|
|L\<^sub>s\<^sub>u\<^sub>b_Star: "L\<^sub>s\<^sub>u\<^sub>b (Star e) = Regular_Set.star(L\<^sub>s\<^sub>u\<^sub>b e)"
|
|
|
|
|
|
definition XX where "XX = (rexp2na example_expression)"
|
|
definition YY where "YY = na2da(rexp2na example_expression)"
|
|
(* reminder from execute *)
|
|
value "NA.accepts (rexp2na example_expression) [0,1,1,0,0,1]"
|
|
value "DA.accepts (na2da (rexp2na example_expression)) [0,1,1,0,0,1]"
|
|
|
|
|
|
section\<open>HOL - Adaptions and Export to SML\<close>
|
|
|
|
definition enabled :: "('a,'\<sigma> set)da \<Rightarrow> '\<sigma> set \<Rightarrow> 'a list \<Rightarrow> 'a list"
|
|
where "enabled A \<sigma> = filter (\<lambda>x. next A x \<sigma> \<noteq> {}) "
|
|
|
|
|
|
definition zero where "zero = (0::nat)"
|
|
definition one where "one = (1::nat)"
|
|
|
|
|
|
export_code zero one Suc Int.nat nat_of_integer int_of_integer (* for debugging *)
|
|
example_expression (* for debugging *)
|
|
|
|
Zero One Atom Plus Times Star (* regexp abstract syntax *)
|
|
|
|
rexp2na na2da enabled (* low-level automata interface *)
|
|
NA.accepts DA.accepts
|
|
|
|
in SML module_name RegExpChecker
|
|
file "RegExpChecker.sml" (* writing it to a file *)
|
|
|
|
(* potentially susceptible to race conditions ... *)
|
|
SML_file "RegExpChecker.sml" (* reads and eval generated file
|
|
into SML toplevel *)
|
|
SML_export \<open>structure RegExpChecker = RegExpChecker\<close> (* copies from SML toplevel into
|
|
Isabelle/ML toplevel *)
|
|
|
|
section\<open>The Abstract Interface For Monitor Expressions\<close>
|
|
text\<open>Here comes the hic : The reflection of the HOL-Automata module into an SML module
|
|
with an abstract interface hiding some generation artefacts like the internal states
|
|
of the deterministic automata ...\<close>
|
|
|
|
ML\<open>
|
|
|
|
structure RegExpInterface : sig
|
|
type automaton
|
|
type env
|
|
type cid
|
|
val alphabet : term list -> env
|
|
val ext_alphabet: env -> term list -> env
|
|
val conv : term -> env -> int RegExpChecker.rexp (* for debugging *)
|
|
val rexp_term2da: env -> term -> automaton
|
|
val enabled : automaton -> env -> cid list
|
|
val next : automaton -> env -> cid -> automaton
|
|
val final : automaton -> bool
|
|
val accepts : automaton -> env -> cid list -> bool
|
|
end
|
|
=
|
|
struct
|
|
local open RegExpChecker in
|
|
|
|
type state = bool list RegExpChecker.set
|
|
type env = string list
|
|
type cid = string
|
|
|
|
type automaton = state * ((Int.int -> state -> state) * (state -> bool))
|
|
|
|
val add_atom = fold_aterms (fn Const(c as(_,Type(@{type_name "rexp"},_)))=> insert (op=) c |_=>I);
|
|
fun alphabet termS = rev(map fst (fold add_atom termS []));
|
|
fun ext_alphabet env termS =
|
|
let val res = rev(map fst (fold add_atom termS [])) @ env;
|
|
val _ = if has_duplicates (op=) res
|
|
then error("reject and accept alphabets must be disjoint!")
|
|
else ()
|
|
in res end;
|
|
|
|
fun conv (Const(@{const_name "Regular_Exp.rexp.Zero"},_)) _ = Zero
|
|
|conv (Const(@{const_name "Regular_Exp.rexp.One"},_)) _ = Onea
|
|
|conv (Const(@{const_name "Regular_Exp.rexp.Times"},_) $ X $ Y) env = Times(conv X env, conv Y env)
|
|
|conv (Const(@{const_name "Regular_Exp.rexp.Plus"},_) $ X $ Y) env = Plus(conv X env, conv Y env)
|
|
|conv (Const(@{const_name "Regular_Exp.rexp.Star"},_) $ X) env = Star(conv X env)
|
|
|conv (Const(@{const_name "RegExpInterface.opt"},_) $ X) env = Plus(conv X env, Onea)
|
|
|conv (Const(@{const_name "RegExpInterface.rep1"},_) $ X) env = Times(conv X env, Star(conv X env))
|
|
|conv (Const (s, Type(@{type_name "rexp"},_))) env =
|
|
let val n = find_index (fn x => x = s) env
|
|
val _ = if n<0 then error"conversion error of regexp." else ()
|
|
in Atom(n) end
|
|
|conv S _ = error("conversion error of regexp:" ^ (Syntax.string_of_term (@{context})S))
|
|
|
|
val eq_int = {equal = curry(op =) : Int.int -> Int.int -> bool};
|
|
val eq_bool_list = {equal = curry(op =) : bool list -> bool list -> bool};
|
|
|
|
fun rexp_term2da env term = let val rexp = conv term env;
|
|
val nda = RegExpChecker.rexp2na eq_int rexp;
|
|
val da = RegExpChecker.na2da eq_bool_list nda;
|
|
in da end;
|
|
|
|
|
|
(* here comes the main interface of the module:
|
|
- "enabled" gives the part of the alphabet "env" for which the automatan does not
|
|
go into a final state
|
|
- next provides an automata transformation that produces an automaton that
|
|
recognizes the rest of a word after a *)
|
|
fun enabled (da as (state,(_,_))) env =
|
|
let val inds = RegExpChecker.enabled da state (0 upto (length env - 1))
|
|
in map (fn i => nth env i) inds end
|
|
|
|
fun next (current_state, (step,fin)) env a =
|
|
let val index = find_index (fn x => x = a) env
|
|
in if index < 0 then error"undefined id for monitor"
|
|
else (step index current_state,(step,fin))
|
|
end
|
|
|
|
fun final (current_state, (_,fin)) = fin current_state
|
|
|
|
fun accepts da env word = let fun index a = find_index (fn x => x = a) env
|
|
val indexL = map index word
|
|
val _ = if forall (fn x => x >= 0) indexL then ()
|
|
else error"undefined id for monitor"
|
|
in RegExpChecker.accepts da indexL end
|
|
|
|
end; (* local *)
|
|
end (* struct *)
|
|
\<close>
|
|
|
|
no_notation Atom ("\<lfloor>_\<rfloor>")
|
|
|
|
end
|