Browse Source

Added parser and overall cleanup.

tags/Isabelle2018
Achim D. Brucker 2 years ago
parent
commit
41634c70ac
2 changed files with 485 additions and 5 deletions
  1. +474
    -5
      Nano_JSON.thy
  2. +11
    -0
      example.json

+ 474
- 5
Nano_JSON.thy View File

@@ -33,8 +33,12 @@ chapter\<open>An Import/Export of JSON-like Formats for Isabelle/HOL\<close>
theory
"Nano_JSON"
imports
Complex_Main
"Assert" (* Can be removed, after removing all assertions. *)
Complex_Main (* required for type real *)
keywords
"import_JSON" :: thy_decl
and "definition_JSON" :: thy_decl
(* and "serialize_JSON" :: thy_decl *)

begin
text\<open>
This theory implements an import/export format for Isabelle/HOL that is inspired by
@@ -49,10 +53,21 @@ text\<open>

Still, our JSON-like import/expert should work with most real-world JSON files, i.e., simplifying
the data exchange between Isabelle/HOL and tools that can read/write JSON.

Overall, this theory should enable you to work with JSON encoded data in Isabelle/HOL without
the need of implementing parsers or serialization in Isabelle/ML. You should be able to implement
mapping from the Nano JSON HOL data types to your own data types on the level of Isabelle/HOL (i.e.,
as executable HOL functions). Nevertheless, the provided ML routine that converts between the
ML representation and the HOL representation of Nano JSON can also serve as a starting point
for converting the ML representation to your own, domain-specific, HOL encoding.
\<close>

section\<open>Defining a JSON-like Data Structure\<close>

text\<open>
In this section
\<close>

datatype number = INTEGER int | REAL real
datatype json = OBJECT "(string * json) list"
| ARRAY "json list"
@@ -62,10 +77,27 @@ datatype json = OBJECT "(string * json) list"
| NULL

text\<open>
Using the data type @{typ "json"}, we can now represent JSON encoded data easily in HOL:
\<close>

subsection\<open>Example\<close>
definition example01::json where
"example01 =
OBJECT [(''menu'', OBJECT [(''id'', STRING ''file''), (''value'', STRING ''File''),
(''popup'', OBJECT [(''menuitem'', ARRAY
[OBJECT [(''value'', STRING ''New''), (''onclick'', STRING ''CreateNewDoc()'')],
OBJECT [(''value'', STRING ''Open''), (''onclick'', STRING ''OpenDoc()'')],
OBJECT [(''value'', STRING ''Close''), (''onclick'', STRING ''CloseDoc()'')]
])]
)])]"

text\<open>
The translation of the data type @{typ "json"} to ML is straight forward. In addition, we also
provide methods for converting JSON instances between the representation as Isabelle terms and
the representation as ML data structure.
\<close>

subsection\<open>ML Implementation\<close>
ML\<open>
signature NANO_JSON_TYPE = sig
datatype number = INTEGER of int | REAL of real
@@ -91,7 +123,7 @@ structure Nano_Json_Type : NANO_JSON_TYPE = struct
| NULL

fun real_to_rat_approx r = let
val _ = warning ("Conversion of real numbers is not IEEE compliant!")
val _ = warning ("Conversion of (real) numbers is not JSON compliant.")
val rat = Real.toDecimal r
fun pow (_, 0) = 1
| pow (x, n) = if n mod 2 = 0 then pow (x*x, n div 2)
@@ -152,13 +184,429 @@ structure Nano_Json_Type : NANO_JSON_TYPE = struct
| json_of _ = error "Term not supported in json_of_term."
in
if type_of t = @{typ "json"} then json_of t
else error "Term not of type json!"
else error "Term not of type json."
end
end
\<close>

section\<open>Parsing Nano JSON\<close>

text\<open>
In this section, we define the infrastructure for parsing JSON-like data structures as
well as for importing them into Isabelle/HOL. This implementation was inspired by the
``Simple Standard ML JSON parser'' from Chris Cannam.
\<close>

subsection\<open>ML Implementation\<close>
subsubsection\<open>Lexer\<close>
ML\<open>
signature NANO_JSON_LEXER = sig
structure T : sig
datatype token = NUMBER of char list
| STRING of string
| BOOL of bool
| NULL
| CURLY_L
| CURLY_R
| SQUARE_L
| SQUARE_R
| COLON
| COMMA

val string_of_T : token -> string
end
val tokenize_string: string -> T.token list
end

structure Nano_Json_Lexer : NANO_JSON_LEXER = struct
structure T = struct
datatype token = NUMBER of char list
| STRING of string
| BOOL of bool
| NULL
| CURLY_L
| CURLY_R
| SQUARE_L
| SQUARE_R
| COLON
| COMMA

fun string_of_T t =
case t of NUMBER digits => String.implode digits
| STRING s => s
| BOOL b => Bool.toString b
| NULL => "null"
| CURLY_L => "{"
| CURLY_R => "}"
| SQUARE_L => "["
| SQUARE_R => "]"
| COLON => ":"
| COMMA => ","
end

fun lexer_error pos text = error (text ^ " at character position " ^
Int.toString (pos - 1))
fun token_error pos = lexer_error pos ("Unexpected token")

fun bmp_to_utf8 cp = map (Char.chr o Word.toInt)
(if cp < 0wx80
then [cp]
else if cp < 0wx800
then [Word.orb(0wxc0, Word.>>(cp,0w6)),
Word.orb(0wx8, Word.andb (cp, 0wx3f))]
else if cp < 0wx10000
then [Word.orb(0wxe0,Word.>>(cp, 0w12)),
Word.orb(0wx80, Word.andb(Word.>>(cp,0w6), 0wx3f)),
Word.orb(0wx80,Word.andb(cp, 0wx3f))]
else error ("Invalid BMP point in bmp_to_utf8 " ^ (Word.toString cp)))

fun lexNull pos acc (#"u" :: #"l" :: #"l" :: xs) =
lex (pos + 3) (T.NULL :: acc) xs
| lexNull pos _ _ = token_error pos

and lexTrue pos acc (#"r" :: #"u" :: #"e" :: xs) =
lex (pos + 3) (T.BOOL true :: acc) xs
| lexTrue pos _ _ = token_error pos

and lexFalse pos acc (#"a" :: #"l" :: #"s" :: #"e" :: xs) =
lex (pos + 4) (T.BOOL false :: acc) xs
| lexFalse pos _ _ = token_error pos

and lexChar tok pos acc xs =
lex pos (tok :: acc) xs
and lexString pos acc cc =
let datatype escaped = ESCAPED | NORMAL
fun lexString' pos _ ESCAPED [] =
lexer_error pos "End of input during escape sequence"
| lexString' pos _ NORMAL [] =
lexer_error pos "End of input during string"
| lexString' pos text ESCAPED (x :: xs) =
let fun esc c = lexString' (pos + 1) (c :: text) NORMAL xs
in case x of
#"\"" => esc x
| #"\\" => esc x
| #"/" => esc x
| #"b" => esc #"\b"
| #"f" => esc #"\f"
| #"n" => esc #"\n"
| #"r" => esc #"\r"
| #"t" => esc #"\t"
| _ => lexer_error pos ("Invalid escape \\" ^
Char.toString x)
end
| lexString' pos text NORMAL (#"\\" :: #"u" ::a::b::c::d:: xs) =
if List.all Char.isHexDigit [a,b,c,d]
then case Word.fromString ("0wx" ^ (String.implode [a,b,c,d])) of
SOME w => (let val utf = rev (bmp_to_utf8 w) in
lexString' (pos + 6) (utf @ text)
NORMAL xs
end
handle Fail err => lexer_error pos err)
| NONE => lexer_error pos "Invalid Unicode BMP escape sequence"
else lexer_error pos "Invalid Unicode BMP escape sequence"
| lexString' pos text NORMAL (x :: xs) =
if Char.ord x < 0x20
then lexer_error pos "Invalid unescaped control character"
else
case x of
#"\"" => (rev text, xs, pos + 1)
| #"\\" => lexString' (pos + 1) text ESCAPED xs
| _ => lexString' (pos + 1) (x :: text) NORMAL xs
val (text, rest, newpos) = lexString' pos [] NORMAL cc
in
lex newpos (T.STRING (String.implode text) :: acc) rest
end

and lexNumber firstChar pos acc cc =
let val valid = String.explode ".+-e"
fun lexNumber' pos digits [] = (rev digits, [], pos)
| lexNumber' pos digits (x :: xs) =
if x = #"E" then lexNumber' (pos + 1) (#"e" :: digits) xs
else if Char.isDigit x orelse List.exists (fn c => x = c) valid
then lexNumber' (pos + 1) (x :: digits) xs
else (rev digits, x :: xs, pos)
val (digits, rest, newpos) =
lexNumber' (pos - 1) [] (firstChar :: cc)
in
case digits of
[] => token_error pos
| _ => lex newpos (T.NUMBER digits :: acc) rest
end
and lex _ acc [] = rev acc
| lex pos acc (x::xs) =
(case x of
#" " => lex
| #"\t" => lex
| #"\n" => lex
| #"\r" => lex
| #"{" => lexChar T.CURLY_L
| #"}" => lexChar T.CURLY_R
| #"[" => lexChar T.SQUARE_L
| #"]" => lexChar T.SQUARE_R
| #":" => lexChar T.COLON
| #"," => lexChar T.COMMA
| #"\"" => lexString
| #"t" => lexTrue
| #"f" => lexFalse
| #"n" => lexNull
| x => lexNumber x) (pos + 1) acc xs

fun tokenize_string str = lex 1 [] (String.explode str)
end
\<close>

subsubsection\<open>Parser\<close>
ML\<open>
signature NANO_JSON_PARSER = sig
val json_of_string : string -> Nano_Json_Type.json
val term_of_json_string : string -> term
end

structure Nano_Json_Parser : NANO_JSON_PARSER = struct
open Nano_Json_Type
open Nano_Json_Lexer

fun show [] = "end of input"
| show (tok :: _) = T.string_of_T tok

val parse_error = error

fun parseNumber digits =
let open Char

fun okExpDigits [] = false
| okExpDigits (c :: []) = isDigit c
| okExpDigits (c :: cs) = isDigit c andalso okExpDigits cs

fun okExponent [] = false
| okExponent (#"+" :: cs) = okExpDigits cs
| okExponent (#"-" :: cs) = okExpDigits cs
| okExponent cc = okExpDigits cc

fun okFracTrailing [] = true
| okFracTrailing (c :: cs) =
(isDigit c andalso okFracTrailing cs) orelse
(c = #"e" andalso okExponent cs)

fun okFraction [] = false
| okFraction (c :: cs) =
isDigit c andalso okFracTrailing cs

fun okPosTrailing [] = true
| okPosTrailing (#"." :: cs) = okFraction cs
| okPosTrailing (#"e" :: cs) = okExponent cs
| okPosTrailing (c :: cs) =
isDigit c andalso okPosTrailing cs
fun okPositive [] = false
| okPositive (#"0" :: []) = true
| okPositive (#"0" :: #"." :: cs) = okFraction cs
| okPositive (#"0" :: #"e" :: cs) = okExponent cs
| okPositive (#"0" :: _) = false
| okPositive (c :: cs) = isDigit c andalso okPosTrailing cs
fun okNumber (#"-" :: cs) = okPositive cs
| okNumber cc = okPositive cc
in
if okNumber digits then let
val number = String.implode digits
in
if List.all (Char.isDigit) (String.explode number)
then (case Int.fromString (String.implode digits) of
NONE => parse_error "Number out of range"
| SOME r => INTEGER r)
else (case Real.fromString (String.implode digits) of
NONE => parse_error "Number out of range"
| SOME r => REAL r)
end
else parse_error ("Invalid number \"" ^ (String.implode digits) ^ "\"")
end
fun parseObject (T.CURLY_R :: xs) = (OBJECT [], xs)
| parseObject tokens =
let fun parsePair (T.STRING key :: T.COLON :: xs) = let
val (j, xs) = parseTokens xs
in
((key, j), xs)
end
| parsePair other =
parse_error("Object key/value pair expected around \"" ^
show other ^ "\"")
fun parseObject' _ [] = parse_error "End of input during object"
| parseObject' acc tokens =
case parsePair tokens of
(pair, T.COMMA :: xs) =>
parseObject' (pair :: acc) xs
| (pair, T.CURLY_R :: xs) =>
(OBJECT (rev (pair :: acc)), xs)
| (_, _) =>parse_error "Expected , or } after object element"
in
parseObject' [] tokens
end

and parseArray (T.SQUARE_R :: xs) = (ARRAY [], xs)
| parseArray tokens =
let fun parseArray' _ [] = error "End of input during array"
| parseArray' acc tokens =
case parseTokens tokens of
(j, T.COMMA :: xs) => parseArray' (j :: acc) xs
| (j, T.SQUARE_R :: xs) => (ARRAY (rev (j :: acc)), xs)
| (_, _) => error "Expected , or ] after array element"
in
parseArray' [] tokens
end

and parseTokens [] = parse_error "Value expected"
| parseTokens (tok :: xs) =
(case tok of
T.NUMBER d => (NUMBER ((parseNumber d)), xs)
| T.STRING s => (STRING s, xs)
| T.BOOL b => (BOOL b, xs)
| T.NULL => (NULL, xs)
| T.CURLY_L => parseObject xs
| T.SQUARE_L => parseArray xs
| _ => parse_error ("Unexpected token " ^ T.string_of_T tok ^
" before " ^ show xs))
fun json_of_string str = case parseTokens (Nano_Json_Lexer.tokenize_string str) of
(value, []) => value
| (_, _) => parse_error "Extra data after input"
val term_of_json_string = term_of_json o json_of_string
end
\<close>


subsection\<open>Isar Setup\<close>

subsubsection\<open>The JSON Cartouche\<close>

syntax "_cartouche_nano_json" :: "cartouche_position \<Rightarrow> 'a" ("JSON _")
parse_translation\<open>
let
fun translation args =
let
fun err () = raise TERM ("Common._cartouche_nano_json", args)
fun input s pos = Symbol_Pos.implode (Symbol_Pos.cartouche_content (Symbol_Pos.explode (s, pos)))
in
case args of
[(c as Const (@{syntax_const "_constrain"}, _)) $ Free (s, _) $ p] =>
(case Term_Position.decode_position p of
SOME (pos, _) => c $ Nano_Json_Parser.term_of_json_string (input s pos) $ p
| NONE => err ())
| _ => err ()
end
in
[(@{syntax_const "_cartouche_nano_json"}, K translation)]
end
\<close>

subsubsection\<open>Isar Top-Level Commands\<close>
ML\<open>
structure Nano_Json_Isar = struct
fun make_const_def (constname, trm) lthy = let
val arg = ((Binding.name constname, NoSyn), ((Binding.name (constname^"_def"),[]), trm))
val ((_, (_ , thm)), lthy') = Local_Theory.define arg lthy
in
(thm, lthy')
end
fun def_json name json = snd o (make_const_def (name, Nano_Json_Parser.term_of_json_string json ))
fun def_json_file name filename lthy = let
val filename = Path.explode filename
val thy = Proof_Context.theory_of lthy
val master_dir = Resources.master_directory thy
val abs_filename = if (Path.is_absolute filename)
then filename
else Path.append master_dir filename
val json = File.read abs_filename
in
def_json name json lthy
end
val jsonFileP = Parse.name -- Parse.name
val jsonP = Parse.name -- Parse.cartouche
end
\<close>

ML\<open>
val _ = Outer_Syntax.local_theory @{command_keyword "definition_JSON"} "Define JSON."
(Nano_Json_Isar.jsonP >> (fn (name, json) => Nano_Json_Isar.def_json name json));
val _ = Outer_Syntax.local_theory @{command_keyword "import_JSON"} "Define JSON from file."
(Nano_Json_Isar.jsonFileP >> (fn (name, filename) => Nano_Json_Isar.def_json_file name filename));
\<close>

subsection\<open>Examples\<close>

text\<open>
Now we can use the JSON Cartouche for defining JSON-like data ``on-the-fly'', e.g.:
\<close>
lemma \<open>y == JSON\<open>{"name": [true,false,"test"]}\<close>\<close>
oops
text\<open>
Note that you need to escape quotes within the JSON Cartouche, if you are using
quotes as lemma delimiters, e.g.,:
\<close>
lemma "y == JSON\<open>{\"name\": [true,false,\"test\"]}\<close>"
oops
text\<open>
Thus, we recommend to use the Cartouche delimiters when using the JSON Cartouche with non
trivial data structures:
\<close>

lemma \<open> example01 == JSON \<open>{"menu": {
"id": "file",
"value": "File",
"popup": {
"menuitem": [
{"value": "New", "onclick": "CreateNewDoc()"},
{"value": "Open", "onclick": "OpenDoc()"},
{"value": "Close", "onclick": "CloseDoc()"}
]
}
}}\<close>\<close>
by(simp add: example01_def)

text\<open>
Using the top level Isar commands defined in the last section, we can now easily define
JSON-like data:
\<close>

definition_JSON example02 \<open>
{"menu": {
"id": "file",
"value": "File",
"popup": {
"menuitem": [
{"value": "New", "onclick": "CreateNewDoc()"},
{"value": "Open", "onclick": "OpenDoc()"},
{"value": "Close", "onclick": "CloseDoc()"}
]
}
}}
\<close>
thm example02_def

lemma "example01 = example02"
by(simp add: example01_def example02_def)

text\<open>
Moreover, we can import JSON from external files:
\<close>

import_JSON example03 "example.json"
thm example03_def

lemma "example01 = example03"
by(simp add: example01_def example03_def)

section\<open>Serializing Nano JSON\<close>

text\<open>
In this section, we define the necessary infrastructure to serialize (export) data from HOL using
a JSON-like data structure that other JSON tools should be able to import.
\<close>

subsection\<open>ML Implementation\<close>
ML\<open>
signature NANO_JSON_SERIALIZER = sig
val serialize_json: Nano_Json_Type.json -> string
@@ -169,6 +617,7 @@ end

structure Nano_Json_Serializer : NANO_JSON_SERIALIZER = struct
open Nano_Json_Type

fun escapeJsonString s =
let fun bs c = "\\"^(Char.toString c)
fun escape #"\"" = bs #"\""
@@ -231,6 +680,26 @@ structure Nano_Json_Serializer : NANO_JSON_SERIALIZER = struct
end
\<close>

section\<open>Parsing Nano JSON\<close>
subsection\<open>Isar Setup\<close>

(* TODO *)

subsection\<open>Examples\<close>

(* TODO *)


section\<open>Putting Everything Together\<close>
text\<open>
For convenience, we provide an ML structure that provides access to both the parser and the
serializer:,
\<close>
ML\<open>
structure Nano_Json = struct
open Nano_Json_Type
open Nano_Json_Parser
open Nano_Json_Serializer
end
\<close>

end

+ 11
- 0
example.json View File

@@ -0,0 +1,11 @@
{"menu": {
"id": "file",
"value": "File",
"popup": {
"menuitem": [
{"value": "New", "onclick": "CreateNewDoc()"},
{"value": "Open", "onclick": "OpenDoc()"},
{"value": "Close", "onclick": "CloseDoc()"}
]
}
}}

Loading…
Cancel
Save