429 lines
16 KiB
Standard ML
429 lines
16 KiB
Standard ML
(* Modified by Frédéric Tuong
|
|
* Generation of Language.C Grammar with ML Interface Binding
|
|
* (c) 2018-2019 Université Paris-Saclay, Univ. Paris-Sud, France
|
|
*)
|
|
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
|
|
|
|
(* base.sig: Base signature file for SML-Yacc. This file contains signatures
|
|
that must be loaded before any of the files produced by ML-Yacc are loaded
|
|
*)
|
|
|
|
(* STREAM: signature for a lazy stream.*)
|
|
|
|
signature STREAM1 =
|
|
sig type ('xa, 'xb) stream
|
|
val streamify : ((('stack * 'stack_ml * 'stack_pos * 'stack_tree) * 'arg) -> '_a * (('stack * 'stack_ml * 'stack_pos * 'stack_tree) * 'arg)) -> 'arg -> ('_a, (('stack * 'stack_ml * 'stack_pos * 'stack_tree) * 'arg)) stream * 'arg
|
|
val cons : '_a * (('_a, '_b) stream * '_b) -> ('_a, '_b) stream * '_b
|
|
val get : ('_a, '_b) stream * '_b -> '_a * (('_a, '_b) stream * '_b)
|
|
end
|
|
|
|
signature STREAM2 =
|
|
sig type 'xa stream
|
|
val streamify : (unit -> '_a) -> '_a stream
|
|
val cons : '_a * '_a stream -> '_a stream
|
|
val get : '_a stream -> '_a * '_a stream
|
|
end
|
|
|
|
(* LR_TABLE: signature for an LR Table.
|
|
|
|
The list of actions and gotos passed to mkLrTable must be ordered by state
|
|
number. The values for state 0 are the first in the list, the values for
|
|
state 1 are next, etc.
|
|
*)
|
|
|
|
signature LR_TABLE =
|
|
sig
|
|
datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
|
|
datatype state = STATE of int
|
|
datatype term = T of int
|
|
datatype nonterm = NT of int
|
|
datatype action = SHIFT of state
|
|
| REDUCE of int
|
|
| ACCEPT
|
|
| ERROR
|
|
type table
|
|
|
|
val numStates : table -> int
|
|
val numRules : table -> int
|
|
val describeActions : table -> state ->
|
|
(term,action) pairlist * action
|
|
val describeGoto : table -> state -> (nonterm,state) pairlist
|
|
val action : table -> state * term -> action
|
|
val goto : table -> state * nonterm -> state
|
|
val initialState : table -> state
|
|
exception Goto of state * nonterm
|
|
|
|
val mkLrTable : {actions : ((term,action) pairlist * action) array,
|
|
gotos : (nonterm,state) pairlist array,
|
|
numStates : int, numRules : int,
|
|
initialState : state} -> table
|
|
end
|
|
|
|
(* TOKEN: signature revealing the internal structure of a token. This signature
|
|
TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc.
|
|
The {parser name}_TOKENS structures contain some types and functions to
|
|
construct tokens from values and positions.
|
|
|
|
The representation of token was very carefully chosen here to allow the
|
|
polymorphic parser to work without knowing the types of semantic values
|
|
or line numbers.
|
|
|
|
This has had an impact on the TOKENS structure produced by SML-Yacc, which
|
|
is a structure parameter to lexer functors. We would like to have some
|
|
type 'a token which functions to construct tokens would create. A
|
|
constructor function for a integer token might be
|
|
|
|
INT: int * 'a * 'a -> 'a token.
|
|
|
|
This is not possible because we need to have tokens with the representation
|
|
given below for the polymorphic parser.
|
|
|
|
Thus our constructur functions for tokens have the form:
|
|
|
|
INT: int * 'a * 'a -> (svalue,'a) token
|
|
|
|
This in turn has had an impact on the signature that lexers for SML-Yacc
|
|
must match and the types that a user must declare in the user declarations
|
|
section of lexers.
|
|
*)
|
|
|
|
signature TOKEN =
|
|
sig
|
|
structure LrTable : LR_TABLE
|
|
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
|
|
val sameToken : ('a,'b) token * ('a,'b) token -> bool
|
|
end
|
|
|
|
(* LR_PARSER: signature for a polymorphic LR parser *)
|
|
|
|
signature LR_PARSER1 =
|
|
sig
|
|
structure Stream : STREAM1
|
|
structure LrTable : LR_TABLE
|
|
structure Token : TOKEN
|
|
|
|
sharing LrTable = Token.LrTable
|
|
|
|
type ('_b, '_c) stack = (LrTable.state, '_b, '_c) stack'
|
|
|
|
type ('_b, '_c, 'arg) lexer = (('arg -> '_b * 'arg,'_c) Token.token, ('_b, '_c) stack * 'arg) Stream.stream * 'arg
|
|
|
|
val parse : {table : LrTable.table,
|
|
saction : int *
|
|
'_c *
|
|
(LrTable.state * ('_b * '_c * '_c)) list *
|
|
'arg
|
|
-> LrTable.nonterm *
|
|
(('arg -> '_b * 'arg) * '_c * '_c) *
|
|
(LrTable.state * ('_b * '_c * '_c)) list,
|
|
void : 'arg -> '_b * 'arg,
|
|
void_position : '_c,
|
|
accept : '_c * '_c -> ('_b, '_c) stack * 'arg -> 'arg,
|
|
reduce_init : (('_c * '_c) list * int) * 'arg -> 'arg,
|
|
reduce_get : (LrTable.state, '_b, '_c) C_Env.rule_reduce -> 'arg -> (LrTable.state, '_b, '_c) C_Env.rule_output0 * 'arg,
|
|
ec : { is_keyword : LrTable.term -> bool,
|
|
noShift : LrTable.term -> bool,
|
|
preferred_change : (LrTable.term list * LrTable.term list) list,
|
|
errtermvalue : LrTable.term -> 'arg -> '_b * 'arg,
|
|
showTerminal : LrTable.term -> string,
|
|
terms: LrTable.term list,
|
|
error : '_c * '_c -> ('_b, '_c) stack * 'arg -> 'arg
|
|
},
|
|
lookahead : int (* max amount of lookahead used in *)
|
|
(* error correction *)
|
|
}
|
|
-> ('_b, '_c, 'arg) lexer
|
|
-> ('_b, '_c, 'arg) lexer
|
|
end
|
|
|
|
signature LR_PARSER2 =
|
|
sig
|
|
structure Stream : STREAM2
|
|
structure LrTable : LR_TABLE
|
|
structure Token : TOKEN
|
|
|
|
sharing LrTable = Token.LrTable
|
|
|
|
exception ParseError
|
|
|
|
val parse : {table : LrTable.table,
|
|
lexer : ('_b,'_c) Token.token Stream.stream,
|
|
arg: 'arg,
|
|
saction : int *
|
|
'_c *
|
|
(LrTable.state * ('_b * '_c * '_c)) list *
|
|
'arg
|
|
-> LrTable.nonterm *
|
|
('_b * '_c * '_c) *
|
|
(LrTable.state * ('_b * '_c * '_c)) list,
|
|
void : '_b,
|
|
ec : { is_keyword : LrTable.term -> bool,
|
|
noShift : LrTable.term -> bool,
|
|
preferred_change : (LrTable.term list * LrTable.term list) list,
|
|
errtermvalue : LrTable.term -> '_b,
|
|
showTerminal : LrTable.term -> string,
|
|
terms: LrTable.term list,
|
|
error : string * '_c * '_c -> unit
|
|
},
|
|
lookahead : int (* max amount of lookahead used in *)
|
|
(* error correction *)
|
|
}
|
|
-> '_b * ('_b,'_c) Token.token Stream.stream
|
|
end
|
|
|
|
(* LEXER: a signature that most lexers produced for use with SML-Yacc's
|
|
output will match. The user is responsible for declaring type token,
|
|
type pos, and type svalue in the UserDeclarations section of a lexer.
|
|
|
|
Note that type token is abstract in the lexer. This allows SML-Yacc to
|
|
create a TOKENS signature for use with lexers produced by ML-Lex that
|
|
treats the type token abstractly. Lexers that are functors parametrized by
|
|
a Tokens structure matching a TOKENS signature cannot examine the structure
|
|
of tokens.
|
|
*)
|
|
|
|
signature LEXER =
|
|
sig
|
|
structure UserDeclarations :
|
|
sig
|
|
type ('a,'b) token
|
|
type pos
|
|
type svalue
|
|
end
|
|
val makeLexer : (int -> string)
|
|
-> unit
|
|
-> (UserDeclarations.svalue, UserDeclarations.pos) UserDeclarations.token
|
|
end
|
|
|
|
(* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which
|
|
also take an argument before yielding a function from unit to a token
|
|
*)
|
|
|
|
signature ARG_LEXER1 =
|
|
sig
|
|
structure UserDeclarations :
|
|
sig
|
|
type ('a,'b) token
|
|
type pos
|
|
type arg
|
|
type svalue0
|
|
type svalue = arg -> svalue0 * arg
|
|
type state
|
|
end
|
|
type stack = (UserDeclarations.state, UserDeclarations.svalue0, UserDeclarations.pos) stack'
|
|
val makeLexer : (stack * UserDeclarations.arg)
|
|
-> (UserDeclarations.svalue, UserDeclarations.pos) UserDeclarations.token
|
|
* (stack * UserDeclarations.arg)
|
|
end
|
|
|
|
signature ARG_LEXER2 =
|
|
sig
|
|
structure UserDeclarations :
|
|
sig
|
|
type ('a,'b) token
|
|
type pos
|
|
type arg
|
|
type svalue
|
|
end
|
|
val makeLexer : (int -> string)
|
|
-> UserDeclarations.arg
|
|
-> unit
|
|
-> (UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
|
|
end
|
|
|
|
(* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun
|
|
produced by SML-Yacc. All such structures match this signature.
|
|
|
|
The {parser name}LrValsFun produces a structure which contains all the values
|
|
except for the lexer needed to call the polymorphic parser mentioned
|
|
before.
|
|
|
|
*)
|
|
|
|
signature PARSER_DATA1 =
|
|
sig
|
|
(* the type of line numbers *)
|
|
type pos
|
|
|
|
(* the type of the user-supplied argument to the parser *)
|
|
type arg
|
|
|
|
(* the type of semantic values *)
|
|
type svalue0
|
|
type svalue = arg -> svalue0 * arg
|
|
|
|
(* the intended type of the result of the parser. This value is
|
|
produced by applying extract from the structure Actions to the
|
|
final semantic value resultiing from a parse.
|
|
*)
|
|
type result
|
|
|
|
structure LrTable : LR_TABLE
|
|
structure Token : TOKEN
|
|
sharing Token.LrTable = LrTable
|
|
|
|
(* structure Actions contains the functions which mantain the
|
|
semantic values stack in the parser. Void is used to provide
|
|
a default value for the semantic stack.
|
|
*)
|
|
structure Actions :
|
|
sig
|
|
val actions : int * pos * (LrTable.state * (svalue0 * pos * pos)) list * arg
|
|
-> LrTable.nonterm * (svalue * pos * pos) * (LrTable.state * (svalue0 * pos * pos)) list
|
|
val void : svalue
|
|
val extract : svalue0 -> result
|
|
end
|
|
|
|
(* structure EC contains information used to improve error
|
|
recovery in an error-correcting parser *)
|
|
structure EC :
|
|
sig
|
|
val is_keyword : LrTable.term -> bool
|
|
val noShift : LrTable.term -> bool
|
|
val preferred_change : (LrTable.term list * LrTable.term list) list
|
|
val errtermvalue : LrTable.term -> svalue
|
|
val showTerminal : LrTable.term -> string
|
|
val terms: LrTable.term list
|
|
end
|
|
|
|
(* table is the LR table for the parser *)
|
|
val table : LrTable.table
|
|
end
|
|
|
|
signature PARSER_DATA2 =
|
|
sig
|
|
(* the type of line numbers *)
|
|
type pos
|
|
|
|
(* the type of the user-supplied argument to the parser *)
|
|
type arg
|
|
|
|
(* the type of semantic values *)
|
|
type svalue
|
|
|
|
(* the intended type of the result of the parser. This value is
|
|
produced by applying extract from the structure Actions to the
|
|
final semantic value resultiing from a parse.
|
|
*)
|
|
type result
|
|
|
|
structure LrTable : LR_TABLE
|
|
structure Token : TOKEN
|
|
sharing Token.LrTable = LrTable
|
|
|
|
(* structure Actions contains the functions which mantain the
|
|
semantic values stack in the parser. Void is used to provide
|
|
a default value for the semantic stack.
|
|
*)
|
|
structure Actions :
|
|
sig
|
|
val actions : int * pos * (LrTable.state * (svalue * pos * pos)) list * arg
|
|
-> LrTable.nonterm * (svalue * pos * pos) * (LrTable.state * (svalue * pos * pos)) list
|
|
val void : svalue
|
|
val extract : svalue -> result
|
|
end
|
|
|
|
(* structure EC contains information used to improve error
|
|
recovery in an error-correcting parser *)
|
|
structure EC :
|
|
sig
|
|
val is_keyword : LrTable.term -> bool
|
|
val noShift : LrTable.term -> bool
|
|
val preferred_change : (LrTable.term list * LrTable.term list) list
|
|
val errtermvalue : LrTable.term -> svalue
|
|
val showTerminal : LrTable.term -> string
|
|
val terms: LrTable.term list
|
|
end
|
|
|
|
(* table is the LR table for the parser *)
|
|
val table : LrTable.table
|
|
end
|
|
|
|
(* signature PARSER is the signature that most user parsers created by
|
|
SML-Yacc will match.
|
|
*)
|
|
|
|
signature PARSER2 =
|
|
sig
|
|
structure Token : TOKEN
|
|
structure Stream : STREAM2
|
|
exception ParseError
|
|
|
|
(* type pos is the type of line numbers *)
|
|
type pos
|
|
|
|
(* type result is the type of the result from the parser *)
|
|
type result
|
|
|
|
(* the type of the user-supplied argument to the parser *)
|
|
type arg
|
|
|
|
(* type svalue is the type of semantic values for the semantic value
|
|
stack
|
|
*)
|
|
type svalue
|
|
|
|
(* val makeLexer is used to create a stream of tokens for the parser *)
|
|
val makeLexer : (int -> string)
|
|
-> (svalue, pos) Token.token Stream.stream
|
|
|
|
(* val parse takes a stream of tokens and a function to print
|
|
errors and returns a value of type result and a stream containing
|
|
the unused tokens
|
|
*)
|
|
val parse : int * ((svalue, pos) Token.token Stream.stream) * (string * pos * pos -> unit) * arg
|
|
-> result * (svalue, pos) Token.token Stream.stream
|
|
|
|
val sameToken : (svalue, pos) Token.token * (svalue,pos) Token.token
|
|
-> bool
|
|
end
|
|
|
|
(* signature ARG_PARSER is the signature that will be matched by parsers whose
|
|
lexer takes an additional argument.
|
|
*)
|
|
|
|
signature ARG_PARSER1 =
|
|
sig
|
|
structure Token : TOKEN
|
|
structure Stream : STREAM1
|
|
|
|
type arg
|
|
type pos
|
|
type svalue0
|
|
type svalue = arg -> svalue0 * arg
|
|
|
|
type stack = (Token.LrTable.state, svalue0, pos) stack'
|
|
|
|
type 'arg lexer = ((svalue, pos) Token.token, stack * 'arg) Stream.stream * 'arg
|
|
|
|
val makeLexer : arg -> arg lexer
|
|
val parse : int
|
|
* (pos * pos -> stack * arg -> arg)
|
|
* pos
|
|
* (pos * pos -> stack * arg -> arg)
|
|
* (((pos * pos) list * int) * arg -> arg)
|
|
* ((Token.LrTable.state, svalue0, pos) C_Env.rule_reduce -> arg -> (Token.LrTable.state, svalue0, pos) C_Env.rule_output0 * arg)
|
|
-> arg lexer
|
|
-> arg lexer
|
|
val sameToken : (svalue, pos) Token.token * (svalue, pos) Token.token -> bool
|
|
end
|
|
|
|
signature ARG_PARSER2 =
|
|
sig
|
|
structure Token : TOKEN
|
|
structure Stream : STREAM2
|
|
exception ParseError
|
|
|
|
type arg
|
|
type pos
|
|
type result
|
|
type svalue
|
|
|
|
val makeLexer : (int -> string) -> arg
|
|
-> (svalue, pos) Token.token Stream.stream
|
|
val parse : int * ((svalue, pos) Token.token Stream.stream) * (string * pos * pos -> unit) * arg
|
|
-> result * (svalue, pos) Token.token Stream.stream
|
|
val sameToken : (svalue, pos) Token.token * (svalue,pos) Token.token
|
|
-> bool
|
|
end
|