diff --git a/Nano_JSON.thy b/Nano_JSON.thy index 75a87ef..7843faa 100644 --- a/Nano_JSON.thy +++ b/Nano_JSON.thy @@ -33,8 +33,12 @@ chapter\An Import/Export of JSON-like Formats for Isabelle/HOL\ theory "Nano_JSON" imports - Complex_Main - "Assert" (* Can be removed, after removing all assertions. *) + Complex_Main (* required for type real *) +keywords + "import_JSON" :: thy_decl + and "definition_JSON" :: thy_decl +(* and "serialize_JSON" :: thy_decl *) + begin text\ This theory implements an import/export format for Isabelle/HOL that is inspired by @@ -49,10 +53,21 @@ text\ Still, our JSON-like import/expert should work with most real-world JSON files, i.e., simplifying the data exchange between Isabelle/HOL and tools that can read/write JSON. + + Overall, this theory should enable you to work with JSON encoded data in Isabelle/HOL without + the need of implementing parsers or serialization in Isabelle/ML. You should be able to implement + mapping from the Nano JSON HOL data types to your own data types on the level of Isabelle/HOL (i.e., + as executable HOL functions). Nevertheless, the provided ML routine that converts between the + ML representation and the HOL representation of Nano JSON can also serve as a starting point + for converting the ML representation to your own, domain-specific, HOL encoding. \ section\Defining a JSON-like Data Structure\ +text\ + In this section +\ + datatype number = INTEGER int | REAL real datatype json = OBJECT "(string * json) list" | ARRAY "json list" @@ -61,11 +76,28 @@ datatype json = OBJECT "(string * json) list" | BOOL "bool" | NULL +text\ + Using the data type @{typ "json"}, we can now represent JSON encoded data easily in HOL: +\ + +subsection\Example\ +definition example01::json where +"example01 = + OBJECT [(''menu'', OBJECT [(''id'', STRING ''file''), (''value'', STRING ''File''), + (''popup'', OBJECT [(''menuitem'', ARRAY + [OBJECT [(''value'', STRING ''New''), (''onclick'', STRING ''CreateNewDoc()'')], + OBJECT [(''value'', STRING ''Open''), (''onclick'', STRING ''OpenDoc()'')], + OBJECT [(''value'', STRING ''Close''), (''onclick'', STRING ''CloseDoc()'')] + ])] + )])]" + text\ The translation of the data type @{typ "json"} to ML is straight forward. In addition, we also provide methods for converting JSON instances between the representation as Isabelle terms and the representation as ML data structure. \ + +subsection\ML Implementation\ ML\ signature NANO_JSON_TYPE = sig datatype number = INTEGER of int | REAL of real @@ -91,7 +123,7 @@ structure Nano_Json_Type : NANO_JSON_TYPE = struct | NULL fun real_to_rat_approx r = let - val _ = warning ("Conversion of real numbers is not IEEE compliant!") + val _ = warning ("Conversion of (real) numbers is not JSON compliant.") val rat = Real.toDecimal r fun pow (_, 0) = 1 | pow (x, n) = if n mod 2 = 0 then pow (x*x, n div 2) @@ -152,13 +184,429 @@ structure Nano_Json_Type : NANO_JSON_TYPE = struct | json_of _ = error "Term not supported in json_of_term." in if type_of t = @{typ "json"} then json_of t - else error "Term not of type json!" + else error "Term not of type json." end end \ +section\Parsing Nano JSON\ + +text\ + In this section, we define the infrastructure for parsing JSON-like data structures as + well as for importing them into Isabelle/HOL. This implementation was inspired by the + ``Simple Standard ML JSON parser'' from Chris Cannam. +\ + +subsection\ML Implementation\ +subsubsection\Lexer\ +ML\ +signature NANO_JSON_LEXER = sig + structure T : sig + datatype token = NUMBER of char list + | STRING of string + | BOOL of bool + | NULL + | CURLY_L + | CURLY_R + | SQUARE_L + | SQUARE_R + | COLON + | COMMA + + val string_of_T : token -> string + end + val tokenize_string: string -> T.token list +end + +structure Nano_Json_Lexer : NANO_JSON_LEXER = struct + structure T = struct + datatype token = NUMBER of char list + | STRING of string + | BOOL of bool + | NULL + | CURLY_L + | CURLY_R + | SQUARE_L + | SQUARE_R + | COLON + | COMMA + + fun string_of_T t = + case t of NUMBER digits => String.implode digits + | STRING s => s + | BOOL b => Bool.toString b + | NULL => "null" + | CURLY_L => "{" + | CURLY_R => "}" + | SQUARE_L => "[" + | SQUARE_R => "]" + | COLON => ":" + | COMMA => "," + end + + fun lexer_error pos text = error (text ^ " at character position " ^ + Int.toString (pos - 1)) + fun token_error pos = lexer_error pos ("Unexpected token") + + fun bmp_to_utf8 cp = map (Char.chr o Word.toInt) + (if cp < 0wx80 + then [cp] + else if cp < 0wx800 + then [Word.orb(0wxc0, Word.>>(cp,0w6)), + Word.orb(0wx8, Word.andb (cp, 0wx3f))] + else if cp < 0wx10000 + then [Word.orb(0wxe0,Word.>>(cp, 0w12)), + Word.orb(0wx80, Word.andb(Word.>>(cp,0w6), 0wx3f)), + Word.orb(0wx80,Word.andb(cp, 0wx3f))] + else error ("Invalid BMP point in bmp_to_utf8 " ^ (Word.toString cp))) + + fun lexNull pos acc (#"u" :: #"l" :: #"l" :: xs) = + lex (pos + 3) (T.NULL :: acc) xs + | lexNull pos _ _ = token_error pos + + and lexTrue pos acc (#"r" :: #"u" :: #"e" :: xs) = + lex (pos + 3) (T.BOOL true :: acc) xs + | lexTrue pos _ _ = token_error pos + + and lexFalse pos acc (#"a" :: #"l" :: #"s" :: #"e" :: xs) = + lex (pos + 4) (T.BOOL false :: acc) xs + | lexFalse pos _ _ = token_error pos + + and lexChar tok pos acc xs = + lex pos (tok :: acc) xs + + and lexString pos acc cc = + let datatype escaped = ESCAPED | NORMAL + fun lexString' pos _ ESCAPED [] = + lexer_error pos "End of input during escape sequence" + | lexString' pos _ NORMAL [] = + lexer_error pos "End of input during string" + | lexString' pos text ESCAPED (x :: xs) = + let fun esc c = lexString' (pos + 1) (c :: text) NORMAL xs + in case x of + #"\"" => esc x + | #"\\" => esc x + | #"/" => esc x + | #"b" => esc #"\b" + | #"f" => esc #"\f" + | #"n" => esc #"\n" + | #"r" => esc #"\r" + | #"t" => esc #"\t" + | _ => lexer_error pos ("Invalid escape \\" ^ + Char.toString x) + end + | lexString' pos text NORMAL (#"\\" :: #"u" ::a::b::c::d:: xs) = + if List.all Char.isHexDigit [a,b,c,d] + then case Word.fromString ("0wx" ^ (String.implode [a,b,c,d])) of + SOME w => (let val utf = rev (bmp_to_utf8 w) in + lexString' (pos + 6) (utf @ text) + NORMAL xs + end + handle Fail err => lexer_error pos err) + | NONE => lexer_error pos "Invalid Unicode BMP escape sequence" + else lexer_error pos "Invalid Unicode BMP escape sequence" + | lexString' pos text NORMAL (x :: xs) = + if Char.ord x < 0x20 + then lexer_error pos "Invalid unescaped control character" + else + case x of + #"\"" => (rev text, xs, pos + 1) + | #"\\" => lexString' (pos + 1) text ESCAPED xs + | _ => lexString' (pos + 1) (x :: text) NORMAL xs + val (text, rest, newpos) = lexString' pos [] NORMAL cc + in + lex newpos (T.STRING (String.implode text) :: acc) rest + end + + and lexNumber firstChar pos acc cc = + let val valid = String.explode ".+-e" + fun lexNumber' pos digits [] = (rev digits, [], pos) + | lexNumber' pos digits (x :: xs) = + if x = #"E" then lexNumber' (pos + 1) (#"e" :: digits) xs + else if Char.isDigit x orelse List.exists (fn c => x = c) valid + then lexNumber' (pos + 1) (x :: digits) xs + else (rev digits, x :: xs, pos) + val (digits, rest, newpos) = + lexNumber' (pos - 1) [] (firstChar :: cc) + in + case digits of + [] => token_error pos + | _ => lex newpos (T.NUMBER digits :: acc) rest + end + + and lex _ acc [] = rev acc + | lex pos acc (x::xs) = + (case x of + #" " => lex + | #"\t" => lex + | #"\n" => lex + | #"\r" => lex + | #"{" => lexChar T.CURLY_L + | #"}" => lexChar T.CURLY_R + | #"[" => lexChar T.SQUARE_L + | #"]" => lexChar T.SQUARE_R + | #":" => lexChar T.COLON + | #"," => lexChar T.COMMA + | #"\"" => lexString + | #"t" => lexTrue + | #"f" => lexFalse + | #"n" => lexNull + | x => lexNumber x) (pos + 1) acc xs + + fun tokenize_string str = lex 1 [] (String.explode str) +end +\ + +subsubsection\Parser\ +ML\ +signature NANO_JSON_PARSER = sig + val json_of_string : string -> Nano_Json_Type.json + val term_of_json_string : string -> term +end + +structure Nano_Json_Parser : NANO_JSON_PARSER = struct + open Nano_Json_Type + open Nano_Json_Lexer + + fun show [] = "end of input" + | show (tok :: _) = T.string_of_T tok + + val parse_error = error + + fun parseNumber digits = + let open Char + + fun okExpDigits [] = false + | okExpDigits (c :: []) = isDigit c + | okExpDigits (c :: cs) = isDigit c andalso okExpDigits cs + + fun okExponent [] = false + | okExponent (#"+" :: cs) = okExpDigits cs + | okExponent (#"-" :: cs) = okExpDigits cs + | okExponent cc = okExpDigits cc + + fun okFracTrailing [] = true + | okFracTrailing (c :: cs) = + (isDigit c andalso okFracTrailing cs) orelse + (c = #"e" andalso okExponent cs) + + fun okFraction [] = false + | okFraction (c :: cs) = + isDigit c andalso okFracTrailing cs + + fun okPosTrailing [] = true + | okPosTrailing (#"." :: cs) = okFraction cs + | okPosTrailing (#"e" :: cs) = okExponent cs + | okPosTrailing (c :: cs) = + isDigit c andalso okPosTrailing cs + + fun okPositive [] = false + | okPositive (#"0" :: []) = true + | okPositive (#"0" :: #"." :: cs) = okFraction cs + | okPositive (#"0" :: #"e" :: cs) = okExponent cs + | okPositive (#"0" :: _) = false + | okPositive (c :: cs) = isDigit c andalso okPosTrailing cs + + fun okNumber (#"-" :: cs) = okPositive cs + | okNumber cc = okPositive cc + in + if okNumber digits then let + val number = String.implode digits + in + if List.all (Char.isDigit) (String.explode number) + then (case Int.fromString (String.implode digits) of + NONE => parse_error "Number out of range" + | SOME r => INTEGER r) + else (case Real.fromString (String.implode digits) of + NONE => parse_error "Number out of range" + | SOME r => REAL r) + end + else parse_error ("Invalid number \"" ^ (String.implode digits) ^ "\"") + end + + fun parseObject (T.CURLY_R :: xs) = (OBJECT [], xs) + | parseObject tokens = + let fun parsePair (T.STRING key :: T.COLON :: xs) = let + val (j, xs) = parseTokens xs + in + ((key, j), xs) + end + | parsePair other = + parse_error("Object key/value pair expected around \"" ^ + show other ^ "\"") + fun parseObject' _ [] = parse_error "End of input during object" + | parseObject' acc tokens = + case parsePair tokens of + (pair, T.COMMA :: xs) => + parseObject' (pair :: acc) xs + | (pair, T.CURLY_R :: xs) => + (OBJECT (rev (pair :: acc)), xs) + | (_, _) =>parse_error "Expected , or } after object element" + in + parseObject' [] tokens + end + + and parseArray (T.SQUARE_R :: xs) = (ARRAY [], xs) + | parseArray tokens = + let fun parseArray' _ [] = error "End of input during array" + | parseArray' acc tokens = + case parseTokens tokens of + (j, T.COMMA :: xs) => parseArray' (j :: acc) xs + | (j, T.SQUARE_R :: xs) => (ARRAY (rev (j :: acc)), xs) + | (_, _) => error "Expected , or ] after array element" + in + parseArray' [] tokens + end + + and parseTokens [] = parse_error "Value expected" + | parseTokens (tok :: xs) = + (case tok of + T.NUMBER d => (NUMBER ((parseNumber d)), xs) + | T.STRING s => (STRING s, xs) + | T.BOOL b => (BOOL b, xs) + | T.NULL => (NULL, xs) + | T.CURLY_L => parseObject xs + | T.SQUARE_L => parseArray xs + | _ => parse_error ("Unexpected token " ^ T.string_of_T tok ^ + " before " ^ show xs)) + + fun json_of_string str = case parseTokens (Nano_Json_Lexer.tokenize_string str) of + (value, []) => value + | (_, _) => parse_error "Extra data after input" + val term_of_json_string = term_of_json o json_of_string +end +\ + + +subsection\Isar Setup\ + +subsubsection\The JSON Cartouche\ + +syntax "_cartouche_nano_json" :: "cartouche_position \ 'a" ("JSON _") +parse_translation\ +let + fun translation args = + let + fun err () = raise TERM ("Common._cartouche_nano_json", args) + fun input s pos = Symbol_Pos.implode (Symbol_Pos.cartouche_content (Symbol_Pos.explode (s, pos))) + in + case args of + [(c as Const (@{syntax_const "_constrain"}, _)) $ Free (s, _) $ p] => + (case Term_Position.decode_position p of + SOME (pos, _) => c $ Nano_Json_Parser.term_of_json_string (input s pos) $ p + | NONE => err ()) + | _ => err () + end +in + [(@{syntax_const "_cartouche_nano_json"}, K translation)] +end +\ + +subsubsection\Isar Top-Level Commands\ +ML\ +structure Nano_Json_Isar = struct + fun make_const_def (constname, trm) lthy = let + val arg = ((Binding.name constname, NoSyn), ((Binding.name (constname^"_def"),[]), trm)) + val ((_, (_ , thm)), lthy') = Local_Theory.define arg lthy + in + (thm, lthy') + end + fun def_json name json = snd o (make_const_def (name, Nano_Json_Parser.term_of_json_string json )) + fun def_json_file name filename lthy = let + val filename = Path.explode filename + val thy = Proof_Context.theory_of lthy + val master_dir = Resources.master_directory thy + val abs_filename = if (Path.is_absolute filename) + then filename + else Path.append master_dir filename + val json = File.read abs_filename + in + def_json name json lthy + end + val jsonFileP = Parse.name -- Parse.name + val jsonP = Parse.name -- Parse.cartouche +end +\ + +ML\ +val _ = Outer_Syntax.local_theory @{command_keyword "definition_JSON"} "Define JSON." + (Nano_Json_Isar.jsonP >> (fn (name, json) => Nano_Json_Isar.def_json name json)); +val _ = Outer_Syntax.local_theory @{command_keyword "import_JSON"} "Define JSON from file." + (Nano_Json_Isar.jsonFileP >> (fn (name, filename) => Nano_Json_Isar.def_json_file name filename)); +\ + +subsection\Examples\ + +text\ +Now we can use the JSON Cartouche for defining JSON-like data ``on-the-fly'', e.g.: +\ +lemma \y == JSON\{"name": [true,false,"test"]}\\ + oops +text\ + Note that you need to escape quotes within the JSON Cartouche, if you are using + quotes as lemma delimiters, e.g.,: +\ +lemma "y == JSON\{\"name\": [true,false,\"test\"]}\" + oops +text\ + Thus, we recommend to use the Cartouche delimiters when using the JSON Cartouche with non + trivial data structures: +\ + +lemma \ example01 == JSON \{"menu": { + "id": "file", + "value": "File", + "popup": { + "menuitem": [ + {"value": "New", "onclick": "CreateNewDoc()"}, + {"value": "Open", "onclick": "OpenDoc()"}, + {"value": "Close", "onclick": "CloseDoc()"} + ] + } + }}\\ + by(simp add: example01_def) + +text\ + Using the top level Isar commands defined in the last section, we can now easily define + JSON-like data: +\ + +definition_JSON example02 \ +{"menu": { + "id": "file", + "value": "File", + "popup": { + "menuitem": [ + {"value": "New", "onclick": "CreateNewDoc()"}, + {"value": "Open", "onclick": "OpenDoc()"}, + {"value": "Close", "onclick": "CloseDoc()"} + ] + } +}} +\ +thm example02_def + +lemma "example01 = example02" + by(simp add: example01_def example02_def) + +text\ + Moreover, we can import JSON from external files: +\ + +import_JSON example03 "example.json" +thm example03_def + +lemma "example01 = example03" + by(simp add: example01_def example03_def) + section\Serializing Nano JSON\ +text\ + In this section, we define the necessary infrastructure to serialize (export) data from HOL using + a JSON-like data structure that other JSON tools should be able to import. +\ + +subsection\ML Implementation\ ML\ signature NANO_JSON_SERIALIZER = sig val serialize_json: Nano_Json_Type.json -> string @@ -169,6 +617,7 @@ end structure Nano_Json_Serializer : NANO_JSON_SERIALIZER = struct open Nano_Json_Type + fun escapeJsonString s = let fun bs c = "\\"^(Char.toString c) fun escape #"\"" = bs #"\"" @@ -231,6 +680,26 @@ structure Nano_Json_Serializer : NANO_JSON_SERIALIZER = struct end \ -section\Parsing Nano JSON\ +subsection\Isar Setup\ + +(* TODO *) + +subsection\Examples\ + +(* TODO *) + + +section\Putting Everything Together\ +text\ + For convenience, we provide an ML structure that provides access to both the parser and the + serializer:, +\ +ML\ +structure Nano_Json = struct + open Nano_Json_Type + open Nano_Json_Parser + open Nano_Json_Serializer +end +\ end diff --git a/example.json b/example.json new file mode 100644 index 0000000..5600991 --- /dev/null +++ b/example.json @@ -0,0 +1,11 @@ +{"menu": { + "id": "file", + "value": "File", + "popup": { + "menuitem": [ + {"value": "New", "onclick": "CreateNewDoc()"}, + {"value": "Open", "onclick": "OpenDoc()"}, + {"value": "Close", "onclick": "CloseDoc()"} + ] + } +}}