Merge commit 'df6a49f2e3730be38140b9f5a3303e1068403053' as 'mlyacc-polyml'

This commit is contained in:
Achim D. Brucker 2026-03-12 08:31:00 +00:00
commit 48afe1ca17
91 changed files with 41502 additions and 0 deletions

12
mlyacc-polyml/.gitignore vendored Normal file
View File

@ -0,0 +1,12 @@
# make
bin/
*.o
*.poly
# doc
doc/mlyacc-polyml.pdf
# mlyacc
*.grm.sml
*.grm.sig
*.grm.desc
# mllex
*.lex.sml

20
mlyacc-polyml/LICENSE Normal file
View File

@ -0,0 +1,20 @@
ML-YACC COPYRIGHT NOTICE, LICENSE AND DISCLAIMER.
Copyright 1989, 1990 by David R. Tarditi Jr. and Andrew W. Appel
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose and without fee is hereby granted,
provided that the above copyright notice appear in all copies and that
both the copyright notice and this permission notice and warranty
disclaimer appear in supporting documentation, and that the names of
David R. Tarditi Jr. and Andrew W. Appel not be used in advertising
or publicity pertaining to distribution of the software without
specific, written prior permission.
David R. Tarditi Jr. and Andrew W. Appel disclaim all warranties with regard to
this software, including all implied warranties of merchantability and fitness.
In no event shall David R. Tarditi Jr. and Andrew W. Appel be liable for any
special, indirect or consequential damages or any damages whatsoever resulting
from loss of use, data or profits, whether in an action of contract, negligence
or other tortious action, arising out of or in connection with the use or
performance of this software.

121
mlyacc-polyml/Makefile Normal file
View File

@ -0,0 +1,121 @@
## Copyright (C) 2020 Takayuki Goto
POLYML := poly
POLYMLC := polyc
POLYMLFLAGS := -q --error-exit --use $(shell readlink -f script/load.sml)
PDFLATEX := pdflatex
DIFF := diff
PREFIX := /usr/local/polyml
BINDIR := bin
LIBDIR := lib
DOCDIR := doc/mlyacc-polyml
MLLEX := mllex-polyml
MLYACC_POLYML := mlyacc-polyml
MLYACCLIB_VER := 1.0.0
MLYACCLIB := mlyacc-lib-$(MLYACCLIB_VER).poly
DOCS := doc/mlyacc-polyml.pdf
LIB_SRCS := $(wildcard mlyacc-lib/*)
SRCS := $(wildcard src/*) \
src/yacc.lex.sml \
src/yacc.grm.sig \
src/yacc.grm.sml
EXAMPLES := calc fol pascal
export POLYML
export POLYMLC
export POLYMLFLAGS
export MLLEX
export BINDIR
export LIBDIR
export MLYACCLIB_VER
all: mlyacc-polyml
.PHONY: mlyacc-polyml
mlyacc-polyml: mlyacc-polyml-nodocs docs
.PHONY: mlyacc-polyml-nodocs
mlyacc-polyml-nodocs: $(BINDIR)/$(MLYACC_POLYML)
$(BINDIR)/$(MLYACC_POLYML): $(MLYACC_POLYML).o
@echo " [POLYMLC] $@"
@$(POLYMLC) -o $@ $^
$(MLYACC_POLYML).o: $(LIBDIR)/$(MLYACCLIB) $(SRCS)
@echo " [POLYML] $@"
@echo "" | $(POLYML) $(POLYMLFLAGS) \
--eval 'PolyML.loadModule "./$<"' \
--eval 'load "src/load.sml"' \
--eval 'PolyML.export ("$@", Main.main)'
$(LIBDIR)/$(MLYACCLIB): $(LIB_SRCS)
@echo " [POLYML] $@"
@echo "" | $(POLYML) $(POLYMLFLAGS) \
--eval 'load "mlyacc-lib/load.sml"' \
--use mlyacc-lib.sml \
--eval 'PolyML.SaveState.saveModule ("$@", MLYaccLib)'
include Makefile.mlyacc
.PHONY: doc/mlyacc.pdf
doc/mlyacc.pdf:
$(MAKE) -C doc PDFLATEX:=$(PDFLATEX) mlyacc.pdf
$(DOCS): doc/mlyacc.pdf
cp doc/mlyacc.pdf $@
.PHONY: example
example: mlyacc-polyml-nodocs
$(MAKE) -C ./examples \
MLYACC=../$(BINDIR)/$(MLYACC_POLYML) \
MLYACCLIB=../$(LIBDIR)/$(MLYACCLIB)
.PHONY: docs
docs: $(DOCS)
.PHONY: test
test: mlyacc-polyml-nodocs
$(BINDIR)/$(MLYACC_POLYML) test/ml.grm
$(DIFF) test/ml.grm.sig test/ml.grm.sig.exp
$(DIFF) test/ml.grm.sml test/ml.grm.sml.exp
$(DIFF) test/ml.grm.desc test/ml.grm.desc.exp
$(RM) test/ml.grm.{sig, sml, desc}
.PHONY: install-nodocs
install-nodocs: mlyacc-polyml-nodocs
install -D -m 0755 -t $(PREFIX)/$(BINDIR) $(BINDIR)/$(MLYACC_POLYML)
install -D -m 0644 -t $(PREFIX)/$(LIBDIR)/mlyacc-lib $(LIBDIR)/$(MLYACCLIB)
.PHONY: install
install: install-nodocs docs
install -D -m 0444 -t $(PREFIX)/$(DOCDIR) $(DOCS)
.PHONY: clean
clean:
-$(RM) $(BINDIR)/$(MLYACC_POLYML) $(MLYACC_POLYML).o
-$(RM) $(LIBDIR)/$(MLYACCLIB)
$(MAKE) -C examples clean
$(MAKE) -C doc clean

View File

@ -0,0 +1,12 @@
%.lex.sml: %.lex
@echo " [MLLex] $@"
@$(MLLEX) $<
@chmod -w $<.*
%.grm.sig %.grm.sml: %.grm
@echo " [MLYacc] $@"
@$(MLYACC) $<
@chmod -w $<.*

95
mlyacc-polyml/Readme.md Normal file
View File

@ -0,0 +1,95 @@
# MLYacc for Poly/ML
MLYacc is an LALR parser generator for StandardML.
This repository publish MLYacc to be used from Poly/ML, and this is ported from MLYacc implementation bundled in MLton.
## Requires
- Poly/ML
- pdflatex (for docs)
- (optional) mllex-polyml
- (optional) mlyacc-polyml
## Build
To build `mlyacc-polyml`, run the `mlyacc-polyml` target or `mlyacc-polyml-nodocs` if you do not need documents.
```sh
$ make mlyacc-polyml
```
## Install
To install `mlyacc-polyml` and `mlyacc-lib-1.0.0.poly`, run the `install` target.
```sh
$ make install
```
By default, executables and binaries are installed to `/usr/local/polyml/{bin,lib}`.
You can specify the installation location by using the `PREFIX` variable.
```sh
$ make install PREFIX=~/.sml/polyml/5.8.1
```
If you do not want to install documents, run the `install-nodocs` target.
```sh
$ make install-nodocs
```
## Use
`mlyacc-polyml` take a `.grm` file and generates `.grm.sml`, `.grm.sig` and `.grm.desc` files.
For example, you pass `ml.grm` to this program, `ml.grm.sml`, `ml.grm.sig` and `ml.grm.desc` will be generated.
```sh
$ ./bin/mlyacc-polyml ml.grm
1 shift/reduce conflict
$ ls ml.grm.*
ml.grm.desc ml.grm.sig ml.grm.sml
```
These generated files depend on `mlyacc-lib`.
That library can be loaded as follows:
```sh
$ poly
> PolyML.loadModule "/usr/local/polyml/lib/mlyacc-lib-1.0.0.poly";
signature ARG_LEXER = ..
```
See `doc/mlyacc-polyml.pdf` for details which is generated by the [docs](#document) target.
## Test
To run unit tets, run the `test` target.
```sh
$ make test
```
## Document
To generate a document of mlyacc-polyml, run the `docs` target.
This target generates `mlyacc-polyml.pdf` to the `doc` directory.
```sh
$ make docs
```
## License
see LICENSE file for details.

2
mlyacc-polyml/bin/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

6
mlyacc-polyml/doc/.gitignore vendored Normal file
View File

@ -0,0 +1,6 @@
/mlyacc.aux
/mlyacc.dvi
/mlyacc.log
/mlyacc.pdf
/mlyacc.ps
/mlyacc.toc

View File

@ -0,0 +1,19 @@
## Copyright (C) 2013,2018 Matthew Fluet
# Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
# Jagannathan, and Stephen Weeks.
# Copyright (C) 1997-2000 NEC Research Institute.
#
# MLton is released under a BSD-style license.
# See the file MLton-LICENSE for details.
##
PDFLATEX := pdflatex
mlyacc.pdf: mlyacc.tex
$(PDFLATEX) mlyacc.tex
$(PDFLATEX) mlyacc.tex
$(PDFLATEX) mlyacc.tex
.PHONY: clean
clean:
-$(RM) mlyacc.aux mlyacc.log mlyacc.toc mlyacc.pdf

View File

@ -0,0 +1 @@
\newcommand{\parbox}[2]{#2}

1622
mlyacc-polyml/doc/mlyacc.tex Normal file

File diff suppressed because it is too large Load Diff

252
mlyacc-polyml/doc/tech.doc Normal file
View File

@ -0,0 +1,252 @@
A Hacker's guide ML-Yacc itself
The program for computing the LALR(1) table can be divided into 3 separate
parts. The first part computes the LR(0) graph. The second part attaches
lookahead to the LR(0) graph to get the LALR(1) graph. The third part
computes the parse tables from the LALR(1) graph.
Look at the file sigs.sml to see how the modules are layed out.
The file graph.sml contains the Graph functor, which produces a structure
containing a function mkGraph. mkGraph takes a grammar and returns a
some useful values and functions, including the LR(0) graph. It renumbers
the rules to an internal form to make the LR(0) graph generation more
efficient. The LR(0) graph includes only core items in its set of items.
The file look.sml takes some of theses values and produces functions
which tell whether a nonterm is nullable and the first set of a symbol
list.
The functor mkLalr creates a structure with a function that takes an LR(0)
graph and some other values (notably the first and nullable) functions
produced by Look and creates a stripped down version of an LR(0) graph with
lookaheads attached. Nullable items (which usually aren't core items) are
added and all other items without dots at the end (i.e. non-reduction items)
are removed.
The functor MkTable produces a function with takes the LR(0) graph
produced by the function in mkGraph and the LR(0) graph with lookaheads
produced by Lalr and creates an LALR(1) table from these graphs.
-----------------------------------------------------------------------
An overview of the algorithms used in LR(0) graph generation and
LALR(1) lookahead creation.
LR(0) graph
-----------
The LR(0) graph consists of sets of items. Each set of items will be
called a core set. The basic algorithm is:
let fun add_gotos(graph,f,nil,r) = (graph,r)
| add_gotos(graph,f,(a,symbol)::b,r)
let newgraph = graph + edge from f to a labelled
with symbol
in if a exists in graph then
add_gotos(newgraph,f,b,r)
else add_gotos(newgraph,f,b,a::r)
end
fun f(graph,nil) = graph
| f(graph,a::b) = f(add_gotos(graph,a,gotos of closure a,b))
in f(empty-graph,[initial core set])
end
For each core, we compute the new cores which result from doing a shift
or goto, and then add these new cores with the symbol used in the shift
or goto to the graph. We continue doing this until there are no more cores
to adds to the graph.
We have to take the closure of a core to include those items which are
derived from nonterminals with a dot before them. If item A -> 'a .B 'c
is in a core, the all productions derived by B must also be in the core.
We want to be able to do the following operations efficently:
(1) check if a core is in the graph already
(2) compute the closure of a core
(3) compute the cores resulting from goto/shift operations.
(1) This can be done efficiently if a complete order exists for the cores. This
can be done by imposing an ordering on items, giving each item a unique
integer and using the place in an item. This can be used to order a
set of items.
(2) Much of the computation for the closure can be done ahead of time.
The set of nonterminals to add for a given a nonterminal can be pre-computed
using a transitive closure algorithm (the transitive closure is sparse
in practice). One can then compute the closure for a core in the following
manner. First, compute the set of nonterminals with . in front of them.
This can be done in (m ln m) time. Next, use the results from the
transitive closure to compute the complete set of nonterminals that
should be used. Finally, for each nonterminal, merge its set of
productions (sort all rules by the nonterminals from which they
are derived before numbering them, then all we have to do is just
prepend the rules while scanning the list in reverse order).
(3) To do this, just scan the core closure, sorting rules by their
symbols into lists. Then reverse all the lists, and we have the
new core sets.
Lookahead representation
------------------------
The previous part throws away the result of the closure operations.
It is used only to compute new cores for use in the goto operation.
These intermediate results should be saved because they will be useful
here.
Lookaheads are attached to an item when
(1) an item is the result of a shift/goto. The item
must have the same lookahead as the item from which it
is derived.
(2) an item is added as the result of a closure. Note that
in fact all productions derived from a given nonterminal
are added here. This can be used (perhaps) to our
advantage, as we can represent a closure using just the
nonterminal.
This can be divided into two cases:
(a) A -> 'a .B 'c , where 'c derives epsilon,
(b) A -> 'a .B 'c , where 'c does not derive epsilon
In (a), lookahead(items derived from B) includes first('c)
and lookahead(A -> 'a .B 'c)
In (b), lookahead(items derived from B) includes only first('c).
This is an example of back propagation.
Note that an item is either the result of a closure or the
result of a shift/goto. It is never the result of both (that
would be a contradiction).
The following representation will be used:
goto/shift items:
an ordered list of item * lookahead ref *
lookahead ref for the resulting
shift/goto item in another core.
closure items:
for each nonterminal:
(1) lookahead ref
(2) a list of item * lookahead ref for the
resulting shift/goto item in another
core.
Lookahead algorithms
--------------------
After computing the LR(0) graph, lookaheads must be attached to the items in
the graph. An item i may receive lookaheads in two ways. If item i
was the result of a shift or goto from some item j, then lookahead(i) includes
lookahead(j). If item i is a production of some nonterminal B, and there
exists some item j of the form A -> x .B y, then item i will be added through
closure(j). This implies that lookahead(i) includes first(y). If y =>
epsilon, then lookahead(i) includes lookahead(j).
Lookahead must be recorded for completion items, which are items of the
form A -> x., non-closure items of the form A -> y . B z, where z is
not nullable, and closure items of the form A -> epsilon. (comment:
items of the form A -> .x can appear in the start state as non-closure items.
A must be the start symbol, which should not appear in the right hand side
of any rule. This implies that lookaheads will never be propagated to
such items)
We chose to omit closure items that do not have the form A -> epsilon.
It is possible to add lookaheads to closure items, but we have not
done so because it would greatly slow down the addition of lookaheads.
Instead we precompute the nonterminals whose productions are
added through the closure operation, the lookaheads for these
nonterminals, and whether the lookahead for these nonterminals
should include first(y) and lookahead(j) for some item j of the
form A -> x .B y. This information depends only on the particular
nonterminal whose closure is being taken.
Some notation is necessary to describe what is happening here. Let
=c=> denote items added in one closure step that are derived from some
nonterminal B in a production A -> x .B y. Let =c+=> denote items
added in one or more =c=> steps.
Consider the following productions
B -> S ;
S -> E
E -> F * E
F -> num
in a kernal with the item
B -> .S
The following derivations are possible:
B -> .S =c=> S -> .E =c+=> S -> .E, E -> .F * E, F -> .num
The nonterminals that are added through the closure operation
are the nonterminals for some item j = A -> .B x such that j =c+=> .C y.
Lookahead(C) includes first(y). If y =*=> epsilon then
lookahead (C) includes first (x). If x=*=> epsilon and y =*=> epsilon
then lookahead(C) includes first(j).
The following algorithm computes the information for each nonterminal:
(1) nonterminals such that c =c+=> .C y and y =*=> epsilon
Let s = the set of nonterminals added through closure = B
repeat
for all B which are elements of s,
if B -> .C z and z =*=> epsilon then
add B to s.
until s does not change.
(2) nonterminals added through closure and their lookaheads
Let s = the set of nonterminals added through closure = B
where A -> x . B y
repeat
for all B which are elements of s,
if B -> .C z then add C to s, and
add first(z) to lookahead(C)
until nothing changes.
Now, for each nonterminal A in s, find the set of nonterminals
such that A =c+=> .B z, and z =*=> epsilon (i.e. use the results
from 1). Add the lookahead for nonterminal A to the lookahead
for each nonterminal in this set.
These algorithms can be restated as either breadth-first or depth-first search
algorithms. The loop invariant of the algorithms is that whenever a
nonterminal is added to the set being calculated, all the productions
for the nonterminal are checked.
This algorithm computes the lookahead for each item:
for each state,
for each item of the form A -> u .B v in the state, where u may be
nullable,
let first_v = first(v)
l-ref = ref for A -> u .B v
s = the set of nonterminals added through the closure of B.
for each element X of s,
let r = the rules produced by an element X of s
l = the lookahead ref cells for each rule, i.e.
all items of A -> x. or A -> x .B y, where
y =*=> epsilon, and x is not epsilon
add the lookahead we have computed for X to the
elements of l
if B =c+=> X z, where z is nullable, add first(y) to
the l. If y =*=> epsilon, save l with the ref for
A -> x .B y in a list.
Now take the list of (lookahead ref, list of lookahead refs) and propagate
each lookahead ref cell's contents to the elements of the list of lookahead
ref cells associated with it. Iterate until no lookahead set changes.

3
mlyacc-polyml/examples/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
calc.poly
fol.poly
pascal.poly

View File

@ -0,0 +1,55 @@
## Copyright (C) 2020 Takayuki Goto
POLYML ?= poly
POLYMLC ?= polyc
POLYMLFLAGS ?= -q --error-exit --use ../script/load.sml
BINDIR ?= ../bin
LIBDIR ?= ../lib
MLLEX ?= mllex-polyml
MLYACC ?= $(BINDIR)/mlyacc-polyml
MLYACCLIB_VER ?= 1.0.0
MLYACCLIB ?= $(LIBDIR)/mlyacc-lib-$(MLYACCLIB_VERSION).poly
EXAMPLES := calc.poly fol.poly pascal.poly
include ../Makefile.mlyacc
define build-module
@echo " [POLYML] $@"
@echo "" | $(POLYML) $(POLYMLFLAGS) \
--eval 'PolyML.loadModule "$(MLYACCLIB)"' \
--eval 'load "./$(1)/load.sml"' \
--use ./$(1)/export.sml \
--eval 'PolyML.SaveState.saveModule ("$@", $(2))'
endef
all: $(EXAMPLES)
calc.poly: $(MLYACCLIB) $(addprefix calc/,calc.sml calc.lex.sml calc.grm.sml calc.grm.sig load.sml export.sml)
$(call build-module,calc,Calc)
fol.poly: $(MLYACCLIB) $(addprefix fol/,absyn.sml fol.grm.sml fol.grm.sig fol.lex.sml interface.sml link.sml parse.sml load.sml export.sml)
$(call build-module,fol,Fol)
pascal.poly: $(MLYACCLIB) $(addprefix pascal/,pascal.grm.sig pascal.grm.sml pascal.lex.sml parser.sml load.sml export.sml)
$(call build-module,pascal,Pascal)
.PHONY: clean
clean:
-$(RM) $(addprefix calc/calc.grm,.sml .sig .desc)
-$(RM) calc/calc.lex.sml
-$(RM) $(addprefix fol/fol.grm,.sml .sig .desc)
-$(RM) fol/fol.lex.sml
-$(RM) $(addprefix pascal/pascal.grm,.sml .sig .desc)
-$(RM) pascal/pascal.lex.sml
-$(RM) $(EXAMPLES)

View File

@ -0,0 +1,49 @@
This is a sample interactive calculator built using ML-Yacc and ML-Lex.
The calculator is defined by the files
calc.lex (* defines lexer *)
calc.grm (* defines grammar *)
calc.sml (* defines driver function, Calc.parse *)
calc.mlb (* ML Basis file *)
To compile this example, type the following commands
mllex calc.lex
mlyacc calc.grm
mlton calc.mlb
in this directory. They will invoke ml-lex and ml-yacc to process the
lexer specification calc.lex and the grammar specification calc.grm
respectively. Then it will compile the resulting SML source files
calc.lex.sml
calc.grm.sig
calc.grm.sml
and the calc.sml file containing the driver code.
The end result of compiling these files is an executable file named
"calc", that is based on the structure Calc containing a top-level
driver function named parse.
Calc.parse : unit -> unit
The calculator can be invoked by applying Calc.parse to the unit value.
- Calc.parse();
1+3;
result = 4
The calculator reads a sequence of expressions from the standard input
and prints the value of each expression after reading the expression.
Expressions must be separated by semicolons. An expression is not
evaluated until the semicolon is encountered. The calculator
terminates when an end-of-file is encountered. There is no attempt to
fix input errors: a lexical error will cause exception LexError to be
raised, while a syntax error will cause ParseError to be raised.
NOTE: The ML Basis file calc.mlb mentions the ml-yacc library
(ml-yacc-lib.cm). MLton's search path should be configured so that this
library will be found. This should normally be the case if MLton is
properly installed.

View File

@ -0,0 +1,50 @@
(* Sample interactive calculator for ML-Yacc *)
fun lookup "bogus" = 10000
| lookup s = 0
%%
%eop EOF SEMI
(* %pos declares the type of positions for terminals.
Each symbol has an associated left and right position. *)
%pos int
%left SUB PLUS
%left TIMES DIV
%right CARAT
%term ID of string | NUM of int | PLUS | TIMES | PRINT |
SEMI | EOF | CARAT | DIV | SUB
%nonterm EXP of int | START of int option
%name Calc
%subst PRINT for ID
%prefer PLUS TIMES DIV SUB
%keyword PRINT SEMI
%noshift EOF
%value ID ("bogus")
%verbose
%%
(* the parser returns the value associated with the expression *)
START : PRINT EXP (print (Int.toString EXP);
print "\n";
SOME EXP)
| EXP (SOME EXP)
| (NONE)
EXP : NUM (NUM)
| ID (lookup ID)
| EXP PLUS EXP (EXP1+EXP2)
| EXP TIMES EXP (EXP1*EXP2)
| EXP DIV EXP (EXP1 div EXP2)
| EXP SUB EXP (EXP1-EXP2)
| EXP CARAT EXP (let fun e (m,0) = 1
| e (m,l) = m*e(m,l-1)
in e (EXP1,EXP2)
end)

View File

@ -0,0 +1,35 @@
structure Tokens = Tokens
type pos = int
type svalue = Tokens.svalue
type ('a,'b) token = ('a,'b) Tokens.token
type lexresult= (svalue,pos) token
val pos = ref 0
fun eof () = Tokens.EOF(!pos,!pos)
fun error (e,l : int,_) = TextIO.output (TextIO.stdOut, String.concat[
"line ", (Int.toString l), ": ", e, "\n"
])
%%
%header (functor CalcLexFun(structure Tokens: Calc_TOKENS));
alpha=[A-Za-z];
digit=[0-9];
ws = [\ \t];
%%
\n => (pos := (!pos) + 1; lex());
{ws}+ => (lex());
{digit}+ => (Tokens.NUM (valOf (Int.fromString yytext), !pos, !pos));
"+" => (Tokens.PLUS(!pos,!pos));
"*" => (Tokens.TIMES(!pos,!pos));
";" => (Tokens.SEMI(!pos,!pos));
{alpha}+ => (if yytext="print"
then Tokens.PRINT(!pos,!pos)
else Tokens.ID(yytext,!pos,!pos)
);
"-" => (Tokens.SUB(!pos,!pos));
"^" => (Tokens.CARAT(!pos,!pos));
"/" => (Tokens.DIV(!pos,!pos));
"." => (error ("ignoring bad character "^yytext,!pos,!pos);
lex());

View File

@ -0,0 +1,9 @@
local
$(SML_LIB)/basis/basis.mlb
$(SML_LIB)/mlyacc-lib/mlyacc-lib.mlb
calc.grm.sig
calc.grm.sml
calc.lex.sml
in
calc.sml
end

View File

@ -0,0 +1,68 @@
(* calc.sml *)
(* This file provides glue code for building the calculator using the
* parser and lexer specified in calc.lex and calc.grm.
*)
structure Calc : sig
val parse : unit -> unit
end =
struct
(*
* We apply the functors generated from calc.lex and calc.grm to produce
* the CalcParser structure.
*)
structure CalcLrVals =
CalcLrValsFun(structure Token = LrParser.Token)
structure CalcLex =
CalcLexFun(structure Tokens = CalcLrVals.Tokens)
structure CalcParser =
Join(structure LrParser = LrParser
structure ParserData = CalcLrVals.ParserData
structure Lex = CalcLex)
(*
* We need a function which given a lexer invokes the parser. The
* function invoke does this.
*)
fun invoke lexstream =
let fun print_error (s,i:int,_) =
TextIO.output(TextIO.stdOut,
"Error, line " ^ (Int.toString i) ^ ", " ^ s ^ "\n")
in CalcParser.parse(0,lexstream,print_error,())
end
(*
* Finally, we need a driver function that reads one or more expressions
* from the standard input. The function parse, shown below, does
* this. It runs the calculator on the standard input and terminates when
* an end-of-file is encountered.
*)
fun parse () =
let val lexer = CalcParser.makeLexer (fn _ =>
(case TextIO.inputLine TextIO.stdIn
of SOME s => s
| _ => ""))
val dummyEOF = CalcLrVals.Tokens.EOF(0,0)
val dummySEMI = CalcLrVals.Tokens.SEMI(0,0)
fun loop lexer =
let val (result,lexer) = invoke lexer
val (nextToken,lexer) = CalcParser.Stream.get lexer
val _ = case result
of SOME r =>
TextIO.output(TextIO.stdOut,
"result = " ^ (Int.toString r) ^ "\n")
| NONE => ()
in if CalcParser.sameToken(nextToken,dummyEOF) then ()
else loop lexer
end
in loop lexer
end
end (* structure Calc *)

View File

@ -0,0 +1,7 @@
val Calc =
{
sigs = [],
structs = ["Calc"],
functors = [],
onStartup = NONE
}

View File

@ -0,0 +1,4 @@
use "calc.grm.sig";
use "calc.grm.sml";
use "calc.lex.sml";
use "calc.sml";

View File

@ -0,0 +1,57 @@
fol/README
This is a sample parser for first-order logic. The grammar
was contributed by Frank Pfenning.
The parser is defined by the files
fol.lex (* defines lexer *)
fol.grm (* defines grammar *)
link.sml (* constructs basic parser structures *)
absyn.sml (* a trivial abstract syntax *)
interface.sml (* interface to lexer and parser properties *)
parse.sml (* driver functions *)
fol.mlb (* ML Basis file *)
To compile this example, type the following commands
mllex fol.lex
mlyacc fol.grm
mlton fol.mlb
in this directory. They will invoke ml-lex and ml-yacc to process the
lexer specification fol.lex and the grammar specification fol.grm
respectively. Then they will compile the resulting SML source files
fol.lex.sml
fol.grm.sig
fol.grm.sml
and the other sml source files.
The end result of loading these files is a structure Parse containing
the following top-level driver functions:
val prog_parse : string -> Absyn.absyn
(* parse a program from a string *)
val query_parse : string -> Absyn.absyn
(* parse a query from a string *)
val file_parse : string -> Absyn.absyn
(* parse a program in a file *)
val top_parse : unit -> Absyn.absyn
(* parse a query from the standard input *)
The file list.fol is a sample input file that can be parsed using
the file_parse function:
- Parse.file_parse "list.fol";
NOTE: The ML Basis file sources.cm mentions the ml-yacc library
(ml-yacc-lib.cm). MLton's search path should be configured so that this
library will be found. This should normally be the case if MLton is
properly installed.

View File

@ -0,0 +1,11 @@
signature ABSYN =
sig
type absyn
val null : absyn
end
structure Absyn :> ABSYN =
struct
datatype absyn = NULL
val null = NULL
end

View File

@ -0,0 +1,7 @@
val Fol =
{
sigs = ["PARSE"],
structs = ["Parse"],
functors = [],
onStartup = NONE
}

View File

@ -0,0 +1,81 @@
%%
%header (functor FolLrValsFun (structure Token : TOKEN
structure Absyn : ABSYN ) : Fol_LRVALS)
%term
EOF | DOT | COMMA | SEMICOLON
| LPAREN | RPAREN
| BACKARROW | DOUBLEARROW
| ARROW | BAR
| TRUE | FORALL | EXISTS
| PARSEPROG | PARSEQUERY
| LCID of string | UCID of string | INT of string
(* gform: goal formula
dform: definite clause *)
%nonterm
start of Absyn.absyn
| clause | query | gform | dform
| atom | termlist | term | varbd | id
%start start
%eop EOF DOT
%pos int
%verbose
%pure
%right FORALL EXISTS
%left BACKARROW
%right SEMICOLON
%right COMMA
%right DOUBLEARROW
%right ARROW
%left BAR
%name Fol
%prefer DOT
%%
start : PARSEPROG clause (Absyn.null)
| PARSEQUERY query (Absyn.null)
clause : dform ()
| ()
query : gform ()
| ()
gform : TRUE ()
| gform COMMA gform () (* and *)
| gform SEMICOLON gform () (* disjunction *)
| gform BACKARROW dform () (* implication: dform implies gform *)
| gform ARROW gform BAR gform () (* if-then-else *)
| FORALL varbd gform () (* universal quantification *)
| EXISTS varbd gform () (* existential quantification *)
| atom () (* atomic formula *)
| LPAREN gform RPAREN ()
dform : TRUE ()
| dform COMMA dform () (* and *)
| dform BACKARROW gform () (* gform implies dform *)
| FORALL varbd dform ()
| atom ()
| LPAREN dform RPAREN ()
atom : LCID ()
| LCID LPAREN termlist RPAREN ()
termlist : term ()
| term COMMA termlist ()
term : id ()
| INT ()
| LCID LPAREN termlist RPAREN ()
varbd : LCID DOT ()
| UCID DOT ()
id : LCID ()
| UCID ()

View File

@ -0,0 +1,42 @@
structure Tokens = Tokens
structure Interface = Interface
open Interface
type pos = Interface.pos
type svalue = Tokens.svalue
type ('a,'b) token = ('a,'b) Tokens.token
type lexresult= (svalue,pos) token
val eof = fn () => Tokens.EOF(!line,!line)
fun makeInt (s : string) = s
%%
%header (functor FolLexFun(structure Tokens: Fol_TOKENS
structure Interface: INTERFACE) : LEXER);
lcstart=[a-z!&$+/<=>?@~|#*`]|\-;
ucstart=[A-Z_];
idchars={lcstart}|{ucstart}|[0-9];
lcid={lcstart}{idchars}*;
ucid={ucstart}{idchars}*;
ws=[\t\ ]*;
num=[0-9]+;
%%
<INITIAL>{ws} => (lex());
<INITIAL>\n => (next_line(); lex());
<INITIAL>":-" => (Tokens.BACKARROW(!line,!line));
<INITIAL>"," => (Tokens.COMMA(!line,!line));
<INITIAL>";" => (Tokens.SEMICOLON(!line,!line));
<INITIAL>"." => (Tokens.DOT(!line,!line));
<INITIAL>"(" => (Tokens.LPAREN(!line,!line));
<INITIAL>")" => (Tokens.RPAREN(!line,!line));
<INITIAL>"->" => (Tokens.ARROW(!line,!line));
<INITIAL>"=>" => (Tokens.DOUBLEARROW(!line,!line));
<INITIAL>"|" => (Tokens.BAR(!line,!line));
<INITIAL>"true" => (Tokens.TRUE(!line,!line));
<INITIAL>"forall" => (Tokens.FORALL(!line,!line));
<INITIAL>"exists" => (Tokens.EXISTS(!line,!line));
<INITIAL>{lcid} => (Tokens.LCID (yytext,!line,!line));
<INITIAL>{ucid} => (Tokens.UCID (yytext,!line,!line));
<INITIAL>{num} => (Tokens.INT (makeInt yytext,!line,!line));
<INITIAL>. => (error ("ignoring illegal character" ^ yytext,
!line,!line); lex());

View File

@ -0,0 +1,12 @@
local
$(SML_LIB)/basis/basis.mlb
$(SML_LIB)/mlyacc-lib/mlyacc-lib.mlb
absyn.sml
interface.sml
fol.grm.sig
fol.grm.sml
fol.lex.sml
parse.sml
in
link.sml
end

View File

@ -0,0 +1,31 @@
(* Externally visible aspects of the lexer and parser *)
signature INTERFACE =
sig
type pos
val line : pos ref
val init_line : unit -> unit
val next_line : unit -> unit
val error : string * pos * pos -> unit
type arg
val nothing : arg
end (* signature INTERFACE *)
functor Interface () : INTERFACE =
struct
type pos = int
val line = ref 0
fun init_line () = (line := 1)
fun next_line () = (line := !line + 1)
fun error (errmsg,line:pos,_) =
TextIO.output(TextIO.stdOut,"Line " ^ (Int.toString line) ^ ": " ^ errmsg ^ "\n")
type arg = unit
val nothing = ()
end (* functor INTERFACE *)

View File

@ -0,0 +1,19 @@
structure FolLrVals : Fol_LRVALS =
FolLrValsFun(structure Token = LrParser.Token
structure Absyn = Absyn);
structure Interface : INTERFACE = Interface();
structure FolLex : LEXER =
FolLexFun(structure Tokens = FolLrVals.Tokens
structure Interface = Interface);
structure FolParser : PARSER =
Join(structure ParserData = FolLrVals.ParserData
structure Lex = FolLex
structure LrParser = LrParser);
structure Parse : PARSE =
Parse (structure Absyn = Absyn
structure Interface = Interface
structure Parser = FolParser
structure Tokens = FolLrVals.Tokens );

View File

@ -0,0 +1,2 @@
append(nil,K,K).
append(cons(X,L),K,cons(X,M)) :- append(L,K,M).

View File

@ -0,0 +1,7 @@
use "absyn.sml";
use "interface.sml";
use "fol.grm.sig";
use "fol.grm.sml";
use "fol.lex.sml";
use "parse.sml";
use "link.sml";

View File

@ -0,0 +1,82 @@
(* Uses the generated lexer and parser to export parsing functions
*)
signature PARSE =
sig
structure Absyn : ABSYN
(* parse a program from a string *)
val prog_parse : string -> Absyn.absyn
(* parse a query from a string *)
val query_parse : string -> Absyn.absyn
(* parse a program in a file *)
val file_parse : string -> Absyn.absyn
(* parse a query from the standard input *)
val top_parse : unit -> Absyn.absyn
end (* signature PARSE *)
functor Parse (structure Absyn : ABSYN
structure Interface : INTERFACE
structure Parser : PARSER
sharing type Parser.arg = Interface.arg
sharing type Parser.pos = Interface.pos
sharing type Parser.result = Absyn.absyn
structure Tokens : Fol_TOKENS
sharing type Tokens.token = Parser.Token.token
sharing type Tokens.svalue = Parser.svalue
) : PARSE =
struct
structure Absyn = Absyn
fun parse (dummyToken,lookahead,reader : int -> string) =
let val _ = Interface.init_line()
val empty = !Interface.line
val dummyEOF = Tokens.EOF(empty,empty)
val dummyTOKEN = dummyToken(empty,empty)
fun invoke lexer =
let val newLexer = Parser.Stream.cons(dummyTOKEN,lexer)
in Parser.parse(lookahead,newLexer,Interface.error,
Interface.nothing)
end
fun loop lexer =
let val (result,lexer) = invoke lexer
val (nextToken,lexer) = Parser.Stream.get lexer
in if Parser.sameToken(nextToken,dummyEOF) then result
else loop lexer
end
in loop (Parser.makeLexer reader)
end
fun string_reader s =
let val next = ref s
in fn _ => !next before next := ""
end
fun prog_parse s = parse (Tokens.PARSEPROG,15,string_reader s)
fun query_parse s = parse (Tokens.PARSEQUERY,15,string_reader s)
fun file_parse name =
let val dev = TextIO.openIn name
in (parse (Tokens.PARSEPROG,15,fn i => TextIO.inputN(dev,i)))
before TextIO.closeIn dev
end
fun top_parse () =
parse (Tokens.PARSEQUERY,0,(fn i =>
(case TextIO.inputLine TextIO.stdIn
of SOME s => s
| _ => "")))
end (* functor Parse *)

View File

@ -0,0 +1,34 @@
This is a grammar for Berkeley Pascal, hacked to be SLR, though that is
not necessary because ML-Yacc supports LALR(1).
To construct the parser, make this your current directory and run the
following commands
mllex pascal.lex
mlyacc pascal.grm
mlton pascal.mlb
They will apply ML-Yacc to the file "pascal.grm" to create
the files "pascal.grm.sig" and "pascal.grm.sml", then
ML_Lex will be applied to pascal.lex to produce pascal.lex.sml.
Then these generated files will be compiled together with necessary
components from the ML-Yacc library supplied by the ml-yacc-lib.cm
file.
The end result is a structure Parser with two functions. The
function
parse: string ->
PascalParser.result *
(Parser.PascalParser.svalue,PascalParser.pos) LrParser.Token.token
LrParser.stream
parses input from a file, while
keybd: unit ->
Parser.PascalParser.result *
(Parser.PascalParser.svalue,Parser.PascalParser.pos)
LrParser.Token.token LrParser.stream
parses from the standard input.

View File

@ -0,0 +1,7 @@
val Pascal =
{
sigs = [],
structs = ["Parser"],
functors = [],
onStartup = NONE
}

View File

@ -0,0 +1,4 @@
use "pascal.grm.sig";
use "pascal.grm.sml";
use "pascal.lex.sml";
use "parser.sml";

View File

@ -0,0 +1,38 @@
(* parser.sml *)
(* driver for Pascal parser *)
structure Parser =
struct
structure PascalLrVals = PascalLrValsFun(structure Token = LrParser.Token)
structure PascalLex = PascalLexFun(structure Tokens = PascalLrVals.Tokens)
structure PascalParser = Join(structure Lex= PascalLex
structure LrParser = LrParser
structure ParserData = PascalLrVals.ParserData)
fun parse s =
let val dev = TextIO.openIn s
val stream = PascalParser.makeLexer(fn i => TextIO.inputN(dev,i))
fun error (e,i:int,_) =
TextIO.output(TextIO.stdOut,
s ^ "," ^ " line " ^ (Int.toString i) ^ ", Error: " ^ e ^ "\n")
in PascalLex.UserDeclarations.lineNum := 1;
PascalParser.parse(30,stream,error,())
before TextIO.closeIn dev
end
fun keybd () =
let val stream =
PascalParser.makeLexer (fn i => (case TextIO.inputLine TextIO.stdIn
of SOME s => s
| _ => ""))
fun error (e,i:int,_) =
TextIO.output(TextIO.stdOut,
"std_in," ^ " line " ^ (Int.toString i) ^ ", Error: " ^ e ^ "\n")
in PascalLex.UserDeclarations.lineNum := 1;
PascalParser.parse(0,stream,error,())
end
end (* structure Parser *)

View File

@ -0,0 +1,244 @@
%%
%name Pascal
%term
YAND | YARRAY | YBEGIN | YCASE |
YCONST | YDIV | YDO | YDOTDOT |
YTO | YELSE | YEND | YFILE |
YFOR | YFORWARD | YPROCEDURE | YGOTO |
YID | YIF | YIN | YINT |
YLABEL | YMOD | YNOT | YNUMB |
YOF | YOR | YPACKED | YNIL |
YFUNCTION | YPROG | YRECORD | YREPEAT |
YSET | YSTRING | YTHEN | YDOWNTO |
YTYPE | YUNTIL | YVAR | YWHILE |
YWITH | YBINT | YOCT | YHEX |
YCASELAB | YILLCH | YEXTERN |
YDOT | YLPAR | YRPAR | YSEMI | YCOMMA | YCOLON | YCARET | YLBRA |
YRBRA | YTILDE |
YLESS | YEQUAL | YGREATER
| YPLUS | YMINUS | YBAR
| UNARYSIGN
| YSTAR | YSLASH | YAMP
| EOF
%eop EOF
%pos int
%pure
%noshift EOF
%nonassoc YLESS YEQUAL YGREATER YIN
%left YPLUS YMINUS YOR YBAR
%left UNARYSIGN
%left YSTAR YSLASH YDIV YMOD YAND YAMP
%left YNOT
%nonterm goal | prog_hedr | block | decls | decl | labels | label_decl |
const_decl | type_decl | var_decl | proc_decl | pheadres | phead |
porf | params | param | ftype | param_list | const | number | const_list |
type' | simple_type | struct_type | simple_type_list | field_list |
fixed_part | field | variant_part | variant_list | variant | stat_list |
stat_lsth | cstat_list | cstat | stat | assign | expr | element_list |
element | variable | qual_var | wexpr | octhex | expr_list | wexpr_list |
relop | addop | divop | negop | var_list | id_list | const_id | type_id |
var_id | array_id | ptr_id | record_id | field_id | func_id
| begin
%keyword
YAND YARRAY YBEGIN YCASE
YCONST YDIV YDO
YTO YELSE YEND YFILE
YFOR YFORWARD YPROCEDURE YGOTO
YIF YIN
YLABEL YMOD YNOT
YOF YOR YPACKED YNIL
YFUNCTION YPROG YRECORD YREPEAT
YSET YSTRING YTHEN YDOWNTO
YTYPE YUNTIL YVAR YWHILE
YWITH YOCT YHEX
YEXTERN YAMP
%prefer YID YSEMI YCOMMA YLBRA
%subst YCOMMA for YSEMI | YSEMI for YCOMMA
%%
begin: goal ()
goal: prog_hedr decls block YDOT ()
| decls ()
prog_hedr: YPROG YID YLPAR id_list YRPAR YSEMI ()
| YPROG YID YSEMI ()
block: YBEGIN stat_list YEND ()
decls: decls decl ()
| ()
decl: labels ()
| const_decl ()
| type_decl ()
| var_decl ()
| proc_decl ()
labels: YLABEL label_decl YSEMI ()
label_decl: YINT ()
| label_decl YCOMMA YINT ()
const_decl: YCONST YID YEQUAL const YSEMI ()
| const_decl YID YEQUAL const YSEMI ()
| YCONST YID YEQUAL YID YSEMI ()
| const_decl YID YEQUAL YID YSEMI ()
type_decl: YTYPE YID YEQUAL type' YSEMI ()
| type_decl YID YEQUAL type' YSEMI ()
var_decl: YVAR id_list YCOLON type' YSEMI ()
| var_decl id_list YCOLON type' YSEMI ()
proc_decl: phead YFORWARD YSEMI ()
| phead YEXTERN YSEMI ()
| pheadres decls block YSEMI ()
pheadres: phead ()
phead: porf YID params ftype YSEMI ()
porf: YPROCEDURE ()
| YFUNCTION ()
params: YLPAR param_list YRPAR ()
| ()
param: id_list YCOLON type' ()
| YVAR id_list YCOLON type' ()
| YFUNCTION id_list params ftype ()
| YPROCEDURE id_list params ftype ()
ftype: YCOLON type' ()
| ()
param_list: param ()
| param_list YSEMI param ()
const: YSTRING ()
| number ()
| YPLUS number ()
| YMINUS number ()
| YPLUS YID ()
| YMINUS YID ()
number: YINT ()
| YBINT ()
| YNUMB ()
const_list: const ()
| const_list YCOMMA const ()
| YID ()
| const_list YCOMMA YID ()
type': simple_type ()
| YCARET YID ()
| struct_type ()
| YPACKED struct_type ()
simple_type: type_id ()
| YLPAR id_list YRPAR ()
| const YDOTDOT const ()
| YID YDOTDOT const ()
| const YDOTDOT YID ()
| YID YDOTDOT YID ()
struct_type: YARRAY YLBRA simple_type_list YRBRA YOF type' ()
| YFILE YOF type' ()
| YSET YOF simple_type ()
| YRECORD field_list YEND ()
simple_type_list: simple_type ()
| simple_type_list YCOMMA simple_type ()
field_list: fixed_part variant_part ()
fixed_part: field ()
| fixed_part YSEMI field ()
field: ()
| id_list YCOLON type' ()
variant_part: ()
| YCASE type_id YOF variant_list ()
| YCASE YID YCOLON type_id YOF variant_list ()
variant_list: variant ()
| variant_list YSEMI variant ()
variant: ()
| const_list YCOLON YLPAR field_list YRPAR ()
stat_list: stat ()
| stat_lsth stat ()
stat_lsth: stat_list YSEMI ()
cstat_list: cstat ()
| cstat_list YSEMI cstat ()
cstat: const_list YCOLON stat ()
| YCASELAB stat ()
| ()
stat: ()
| YINT YCOLON stat ()
| YID ()
| YID YLPAR wexpr_list YRPAR ()
| assign ()
| YBEGIN stat_list YEND ()
| YCASE expr YOF cstat_list YEND ()
| YWITH var_list YDO stat ()
| YWHILE expr YDO stat ()
| YREPEAT stat_list YUNTIL expr ()
| YFOR assign YTO expr YDO stat ()
| YFOR assign YDOWNTO expr YDO stat ()
| YGOTO YINT ()
| YIF expr YTHEN stat ()
| YIF expr YTHEN stat YELSE stat ()
assign: variable YCOLON YEQUAL expr ()
| YID YCOLON YEQUAL expr ()
expr: expr relop expr %prec YLESS ()
| YPLUS expr %prec UNARYSIGN ()
| YMINUS expr %prec UNARYSIGN ()
| expr addop expr %prec YPLUS ()
| expr divop expr %prec YSTAR ()
| YNIL ()
| YSTRING ()
| YINT ()
| YBINT ()
| YNUMB ()
| variable ()
| YID ()
| YID YLPAR wexpr_list YRPAR ()
| YLPAR expr YRPAR ()
| negop expr %prec YNOT ()
| YLBRA element_list YRBRA ()
| YLBRA YRBRA ()
element_list: element ()
| element_list YCOMMA element ()
element: expr ()
| expr YDOTDOT expr ()
variable: qual_var ()
qual_var: YID YLBRA expr_list YRBRA ()
| qual_var YLBRA expr_list YRBRA ()
| YID YDOT field_id ()
| qual_var YDOT field_id ()
| YID YCARET ()
| qual_var YCARET ()
wexpr: expr ()
| expr YCOLON expr ()
| expr YCOLON expr YCOLON expr ()
| expr octhex ()
| expr YCOLON expr octhex ()
octhex: YOCT ()
| YHEX ()
expr_list: expr ()
| expr_list YCOMMA expr ()
wexpr_list: wexpr ()
| wexpr_list YCOMMA wexpr ()
relop: YEQUAL ()
| YLESS ()
| YGREATER ()
| YLESS YGREATER ()
| YLESS YEQUAL ()
| YGREATER YEQUAL ()
| YIN ()
addop: YPLUS ()
| YMINUS ()
| YOR ()
| YBAR ()
divop: YSTAR ()
| YSLASH ()
| YDIV ()
| YMOD ()
| YAND ()
| YAMP ()
negop: YNOT ()
| YTILDE ()
var_list: variable ()
| var_list YCOMMA variable ()
| YID ()
| var_list YCOMMA YID ()
id_list: YID ()
| id_list YCOMMA YID ()
const_id: YID ()
type_id: YID ()
var_id: YID ()
array_id: YID ()
ptr_id: YID ()
record_id: YID ()
field_id: YID ()
func_id: YID ()

View File

@ -0,0 +1,139 @@
structure Tokens = Tokens
type pos = int
type svalue = Tokens.svalue
type ('a,'b) token = ('a,'b) Tokens.token
type lexresult = (svalue,pos) token
open Tokens
val lineNum = ref 0
val eof = fn () => EOF(!lineNum,!lineNum)
structure KeyWord : sig
val find : string ->
(int * int -> (svalue,int) token) option
end =
struct
val TableSize = 211
val HashFactor = 5
val hash = fn s =>
foldl (fn (c,v)=>(v*HashFactor+(ord c)) mod TableSize) 0 (explode s)
val HashTable = Array.array(TableSize,nil) :
(string * (int * int -> (svalue,int) token)) list Array.array
val add = fn (s,v) =>
let val i = hash s
in Array.update(HashTable,i,(s,v) :: (Array.sub(HashTable, i)))
end
val find = fn s =>
let val i = hash s
fun f ((key,v)::r) = if s=key then SOME v else f r
| f nil = NONE
in f (Array.sub(HashTable, i))
end
val _ =
(List.app add
[("and",YAND),
("array",YARRAY),
("begin",YBEGIN),
("case",YCASE),
("const",YCONST),
("div",YDIV),
("do",YDO),
("downto",YDOWNTO),
("else",YELSE),
("end",YEND),
("extern",YEXTERN),
("file",YFILE),
("for",YFOR),
("forward",YFORWARD),
("function",YFUNCTION),
("goto",YGOTO),
("hex",YHEX),
("if",YIF),
("in",YIN),
("label",YLABEL),
("mod",YMOD),
("nil",YNIL),
("not",YNOT),
("oct",YOCT),
("of",YOF),
("or",YOR),
("packed",YPACKED),
("procedure",YPROCEDURE),
("program",YPROG),
("record",YRECORD),
("repeat",YREPEAT),
("set",YSET),
("then",YTHEN),
("to",YTO),
("type",YTYPE),
("until",YUNTIL),
("var",YVAR),
("while",YWHILE),
("with",YWITH)
])
end
open KeyWord
%%
%header (functor PascalLexFun(structure Tokens : Pascal_TOKENS));
%s C B;
alpha=[A-Za-z];
digit=[0-9];
optsign=("+"|"-")?;
integer={digit}+;
frac="."{digit}+;
exp=(e|E){optsign}{digit}+;
octdigit=[0-7];
ws = [\ \t];
%%
<INITIAL>{ws}+ => (lex());
<INITIAL>\n+ => (lineNum := (!lineNum) + (String.size yytext); lex());
<INITIAL>{alpha}+ => (case find yytext of SOME v => v(!lineNum,!lineNum)
| _ => YID(!lineNum,!lineNum));
<INITIAL>{alpha}({alpha}|{digit})* => (YID(!lineNum,!lineNum));
<INITIAL>{optsign}{integer}({frac}{exp}?|{frac}?{exp}) => (YNUMB(!lineNum,!lineNum));
<INITIAL>{optsign}{integer} => (YINT(!lineNum,!lineNum));
<INITIAL>{octdigit}+(b|B) => (YBINT(!lineNum,!lineNum));
<INITIAL>"'"([^']|"''")*"'" => (YSTRING(!lineNum,!lineNum));
<INITIAL>"(*" => (YYBEGIN C; lex());
<INITIAL>".." => (YDOTDOT(!lineNum,!lineNum));
<INITIAL>"." => (YDOT(!lineNum,!lineNum));
<INITIAL>"(" => (YLPAR(!lineNum,!lineNum));
<INITIAL>")" => (YRPAR(!lineNum,!lineNum));
<INITIAL>";" => (YSEMI(!lineNum,!lineNum));
<INITIAL>"," => (YCOMMA(!lineNum,!lineNum));
<INITIAL>":" => (YCOLON(!lineNum,!lineNum));
<INITIAL>"^" => (YCARET(!lineNum,!lineNum));
<INITIAL>"[" => (YLBRA(!lineNum,!lineNum));
<INITIAL>"]" => (YRBRA(!lineNum,!lineNum));
<INITIAL>"~" => (YTILDE(!lineNum,!lineNum));
<INITIAL>"<" => (YLESS(!lineNum,!lineNum));
<INITIAL>"=" => (YEQUAL(!lineNum,!lineNum));
<INITIAL>">" => (YGREATER(!lineNum,!lineNum));
<INITIAL>"+" => (YPLUS(!lineNum,!lineNum));
<INITIAL>"-" => (YMINUS(!lineNum,!lineNum));
<INITIAL>"|" => (YBAR(!lineNum,!lineNum));
<INITIAL>"*" => (YSTAR(!lineNum,!lineNum));
<INITIAL>"/" => (YSLASH(!lineNum,!lineNum));
<INITIAL>"{" => (YYBEGIN B; lex());
<INITIAL>. => (YILLCH(!lineNum,!lineNum));
<C>\n+ => (lineNum := (!lineNum) + (String.size yytext); lex());
<C>[^()*\n]+ => (lex());
<C>"(*" => (lex());
<C>"*)" => (YYBEGIN INITIAL; lex());
<C>[*()] => (lex());
<B>\n+ => (lineNum := (!lineNum) + (String.size yytext); lex());
<B>[^{}\n]+ => (lex());
<B>"{" => (lex());
<B>"}" => (YYBEGIN INITIAL; lex());

View File

@ -0,0 +1,9 @@
local
$(SML_LIB)/basis/basis.mlb
$(SML_LIB)/mlyacc-lib/mlyacc-lib.mlb
pascal.grm.sig
pascal.grm.sml
pascal.lex.sml
in
parser.sml
end

View File

@ -0,0 +1,2 @@
Test files for the error-correcting parser. Files beginning with c
are correct. Those beginning with t have simple syntax errors.

View File

@ -0,0 +1,271 @@
program simplex(input, output);
{ two-phase simplex algorithm: version Feb. 24, 1988 }
{ copyright K. Steiglitz }
{ Computer Science Dept. }
{ Princeton University 08544 }
{ ken@princeton.edu }
const
maxpivots = 1000; { maximum no. of pivots }
large = 1.0e+31; { large number used in search for minimum cost column }
lowlim = -1.0e+31; { large negative number to test for unboundedness }
mmax = 32; { max. no. of rows }
ncolmax = 50; { max. no. of columns allowed in tableau }
eps = 1.0e-8; { for testing for zero }
var
done, unbounded, optimal: boolean; { flags for simplex }
result: (toomanycols, unbound, infeas, toomanypivots, opt);
m: 1..mmax; { no. of rows - 1, the rows are numbered 0..m }
numpivots: integer; { pivot count }
pivotcol, pivotrow: integer; { pivot column and row }
pivotel: real; { pivot element }
cbar: real; { price when searching for entering column }
carry: array[-1..mmax, -1..mmax] of real; { inverse-basis matrix of the
revised simplex method }
phase: 1..2; { phase }
price: array[0..mmax] of real; { shadow prices = row -1 of carry =
-dual variables }
basis: array[0..mmax] of integer; { basis columns, negative integers
artificial }
ncol: 1..ncolmax; { number of columns }
tab: array[0..mmax, 1..ncolmax] of real; { tableau }
lhs: array[0..mmax] of real; { left-hand-side }
d: array[1..ncolmax] of real; { current cost vector }
c: array[1..ncolmax] of real; { cost vector in original problem }
curcol: array[-1..mmax] of real; { current column }
curcost: real; { current cost }
i, col, row: integer; { miscellaneous variables }
procedure columnsearch;
{ looks for favorable column to enter basis.
returns lowest cost and its column number, or turns on the flag optimal }
var
i , col : integer;
tempcost: real; { minimum cost, temporary cost of column }
begin { columnsearch }
for i:= 0 to m do price[i]:= -carry[-1, i]; { set up price vector }
optimal:= false;
cbar:= large;
pivotcol:= 0;
for col:= 1 to ncol do
begin
tempcost:= d[col];
for i:= 0 to m do tempcost:= tempcost - price[i]*tab[i, col];
if( cbar > tempcost ) then
begin
cbar:= tempcost;
pivotcol:= col
end
end; { for col }
if ( cbar > -eps ) then optimal:= true
end; { columnsearch }
procedure rowsearch;
{ looks for pivot row. returns pivot row number,
or turns on the flag unbounded }
var
i, j: integer;
ratio, minratio: real; { ratio and minimum ratio for ratio test }
begin { rowsearch }
for i:= 0 to m do { generate column }
begin
curcol[i]:= 0.0; { current column = B inverse * original col. }
for j:= 0 to m do curcol[i]:=
curcol[i] + carry[i, j]*tab[j, pivotcol]
end;
curcol[-1]:= cbar; { first element in current column }
pivotrow:= -1;
minratio:= large;
for i:= 0 to m do { ratio test }
begin
if( curcol[i] > eps ) then
begin
ratio:= carry[i, -1]/curcol[i];
if( minratio > ratio ) then { favorable row }
begin
minratio:= ratio;
pivotrow:= i;
pivotel:= curcol[i]
end
else { break tie with max pivot }
if ( (minratio = ratio) and (pivotel < curcol[i]) ) then
begin
pivotrow:= i;
pivotel:= curcol[i]
end
end { curcol > eps }
end; { for i }
if ( pivotrow = -1 ) then unbounded:= true { nothing found }
else unbounded:= false
end; { rowsearch }
procedure pivot;
{ pivots }
var
i, j: integer;
begin { pivot }
basis[pivotrow]:= pivotcol;
for j:= -1 to m do carry[pivotrow, j]:= carry[pivotrow, j]/pivotel;
for i:= -1 to m do
if( i<> pivotrow ) then
for j:= -1 to m do
carry[i, j]:= carry[i, j] - carry[pivotrow, j]*curcol[i];
curcost:= -carry[-1, -1]
end; { pivot }
procedure changephase;
{ changes phase from 1 to 2, by switching to original cost vector }
var
i, j, b: integer;
begin { changephase }
phase:= 2;
for i:= 0 to m do if( basis[i] <= 0 ) then
writeln( '...artificial basis element ', basis[i]:5,
' remains in basis after phase 1');
for j:= 1 to ncol do d[j]:= c[j]; { switch to original cost vector }
for j:= -1 to m do
begin
carry[-1, j]:= 0.0;
for i:= 0 to m do
begin
b:= basis[i]; { ignore artificial basis elements that are }
if( b >= 1 ) then { still in basis }
carry[-1, j]:= carry[-1, j] - c[b]*carry[i,j]
end { for i }
end; { for j }
curcost:= -carry[-1, -1]
end; { changephase }
procedure setup;
{ sets up test problem, lhs = tab*x, x >= 0, min c*x }
{ nrow = number of rows; ncol = number of cols }
{ tab = tableau; lhs = constants }
var
i, j, nrow: integer;
begin { setup }
readln(nrow); { read number of rows }
readln(ncol); { read number of columns }
m:= nrow - 1; { rows are numbered 0..m }
for j:= 1 to ncol do
read(c[j]); { cost vector }
for i:= 0 to m do
begin
read(lhs[i]); { left-hand-side }
for j:= 1 to ncol do
read(tab[i, j]) { tableau }
end;
done:= false; { initialize carry matrix, etc. }
phase:= 1;
for i:= -1 to m do for j:= -1 to mmax do carry[i, j]:= 0.0;
for i:= 0 to m do carry[i, i]:= 1.0; { artificial basis }
for i:= 0 to m do
begin
carry[i, -1]:= lhs[i]; { -1 col of carry = left-hand-side }
carry[-1, -1]:= carry[-1, -1] - lhs[i] { - initial cost }
end;
curcost:= -carry[-1, -1];
for i:= 0 to m do basis[i]:= -i; { initial, artificial basis }
if( ncol <= ncolmax ) then { check number of columns }
for col:= 1 to ncol do { initialize cost vector for phase 1 }
begin
d[col]:= 0.0;
for row:= 0 to m do d[col]:= d[col] - tab[row, col]
end
else
begin
writeln('...termination: too many columns for storage');
done:= true;
result:= toomanycols
end;
numpivots:= 0;
end; { setup }
begin { simplex }
setup;
while( (numpivots < maxpivots) and (not done) and
( (curcost > lowlim) or (phase = 1) ) ) do
begin
columnsearch;
if( not optimal ) then
begin { not optimal }
rowsearch;
if( unbounded ) then
begin
done:= true;
result:= unbound;
writeln('problem is unbounded')
end
else
begin
pivot;
numpivots:= numpivots + 1;
if ( (numpivots = 1 ) or ( numpivots mod 10 = 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12)
end
end { not optimal }
else { optimal }
if( phase = 1 ) then
begin
if( curcost > eps ) then
begin
done:= true;
result:= infeas;
writeln('problem is infeasible')
end
else
begin
if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12);
writeln('phase 1 successfully completed');
changephase
end
end { if phase = 1 }
else
begin
if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12);
writeln('phase 2 successfully completed');
done:= true;
result:= opt
end
end; { while }
if( (curcost <= lowlim) and (phase = 2) ) then
begin
if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12);
result:= unbound;
writeln('problem is unbounded')
end;
if ( numpivots >= maxpivots ) then
begin
writeln('...termination: maximum number of pivots exceeded');
result:= toomanypivots
end;
if result = opt then
begin
writeln('optimal solution reached');
writeln('cost =', -carry[-1,-1]:10:6);
for i:= 0 to m do
writeln('x(', basis[i]:4, ')= ', carry[i,-1]:10:6)
end
end.

View File

@ -0,0 +1,4 @@
program p(input,output);
begin
if x=0 then x := 1
end.

View File

@ -0,0 +1,270 @@
junk simplex(input, output);
{ two-phase simplex algorithm: version Feb. 24, 1988 }
{ copyright K. Steiglitz }
{ Computer Science Dept. }
{ Princeton University 08544 }
{ ken@princeton.edu }
var
maxpivots = 1000; { maximum no. of pivots }
large = 1.0e+31; { large number used in search for minimum cost column }
lowlim = -1.0e+31; { large negative number to test for unboundedness }
mmax = 32; { max. no. of rows }
ncolmax = 50; { max. no. of columns allowed in tableau }
eps = 1.0e-8; { for testing for zero }
const
done, unbounded, optimal: boolean; { flags for simplex }
result: (toomanycols, unbound, infeas, toomanypivots, opt);
m: 1..mmax; { no. of rows - 1, the rows are numbered 0..m }
numpivots: integer; { pivot count }
pivotcol, pivotrow: integer; { pivot column and row }
pivotel: real; { pivot element }
cbar: real; { price when searching for entering column }
carry: array[-1..mmax, -1..mmax] of real; { inverse-basis matrix of the
revised simplex method }
phase: 1..2; { phase }
price: array[0..mmax] of real; { shadow prices = row -1 of carry =
-dual variables }
basis: array[0..mmax] of integer; { basis columns, negative integers
artificial }
ncol: 1..ncolmax; { number of columns }
tab: array[0..mmax, 1..ncolmax] of real; { tableau }
lhs: array[0..mmax] of real; { left-hand-side }
d: array[1..ncolmax] of real; { current cost vector }
c: array[1..ncolmax] of real; { cost vector in original problem }
curcol: array[-1..mmax] of real; { current column }
curcost: real; { current cost }
i, col, row: integer; { miscellaneous variables }
procedure columnsearch;
{ looks for favorable column to enter basis.
returns lowest cost and its column number, or turns on the flag optimal }
var
i , col : integer;
tempcost: real; { minimum cost, temporary cost of column }
begin { columnsearch }
for i:= 0 to m do price[i]:= -carry[-1, i]; { set up price vector }
optimal:= false;
cbar:= large;
pivotcol:= 0;
for col:= 1 to ncol do
begin
tempcost:= d[col];
for i:= 0 to m do tempcost:= tempcost - price[i]*tab[i, col];
if( cbar > tempcost ) then
begin
cbar:= tempcost;
pivotcol:= col
end
end; { for col }
if ( cbar > -eps ) then optimal:= true
end; { columnsearch }
procedure rowsearch;
{ looks for pivot row. returns pivot row number,
or turns on the flag unbounded }
var
i, j: integer;
ratio, minratio: real; { ratio and minimum ratio for ratio test }
begin { rowsearch }
for i:= 0 to m do { generate column }
begin
curcol[i]:= 0.0; { current column = B inverse * original col. }
for j:= 0 to m do curcol[i]:=
curcol[i] + carry[i, j]*tab[j, pivotcol]
end;
curcol[-1]:= cbar; { first element in current column }
pivotrow:= -1;
minratio:= large;
for i:= 0 to m do { ratio test }
begin
if( curcol[i] > eps ) then
begin
ratio:= carry[i, -1]/curcol[i];
if( minratio > ratio ) then { favorable row }
begin
minratio:= ratio;
pivotrow:= i;
pivotel:= curcol[i]
end
else { break tie with max pivot }
if ( (minratio = ratio) and (pivotel < curcol[i]) ) then
begin
pivotrow:= i;
pivotel:= curcol[i]
end
end { curcol > eps }
end; { for i }
if ( pivotrow = -1 ) then unbounded:= true { nothing found }
else unbounded:= false
end; { rowsearch }
procedure pivot;
{ pivots }
var
i, j: integer;
begin { pivot }
basis[pivotrow]:= pivotcol;
for j:= -1 to m do carry[pivotrow, j]:= carry[pivotrow, j]/pivotel;
for i:= -1 to m do
if( i<> pivotrow ) then
for j:= -1 to m do
carry[i, j]:= carry[i, j] - carry[pivotrow, j]*curcol[i];
curcost:= -carry[-1, -1]
end; { pivot }
procedure changephase;
{ changes phase from 1 to 2, by switching to original cost vector }
var
i, j, b: integer;
begin { changephase }
phase:= 2;
for i:= 0 to m do if( basis[i] <= 0 ) then
writeln( '...artificial basis element ', basis[i]:5,
' remains in basis after phase 1');
for j:= 1 to ncol do d[j]:= c[j]; { switch to original cost vector }
for j:= -1 to m do
begin
carry[-1, j]:= 0.0;
for i:= 0 to m do
begin
b:= basis[i]; { ignore artificial basis elements that are }
if( b >= 1 ) then { still in basis }
carry[-1, j]:= carry[-1, j] - c[b]*carry[i,j];
end { for i }
end; { for j }
curcost:= -carry[-1, -1]
end; { changephase }
procedure setup;
{ sets up test problem, lhs = tab*x, x >= 0, min c*x }
{ nrow = number of rows; ncol = number of cols }
{ tab = tableau; lhs = constants }
var
i, j, nrow: integer;
begin { setup }
readln(nrow); { read number of rows }
readln(ncol); { read number of columns }
m:= nrow - 1; { rows are numbered 0..m }
for j:= 1 to ncol do
read(c[j]); { cost vector }
for i:= 0 to m do
begin
read(lhs[i]); { left-hand-side }
for j:= 1 to ncol do
read(tab[i, j]); { tableau }
end;
done:= false; { initialize carry matrix, etc. }
phase:= 1;
for i:= -1 to m do for j:= -1 to mmax do carry[i, j]:= 0.0;
for i:= 0 to m do carry[i, i]:= 1.0; { artificial basis }
for i:= 0 to m do
begin
carry[i, -1]:= lhs[i]; { -1 col of carry = left-hand-side }
carry[-1, -1]:= carry[-1, -1] - lhs[i] { - initial cost }
end;
curcost:= -carry[-1, -1];
for i:= 0 to m do basis[i]:= -i; { initial, artificial basis }
if( ncol <= ncolmax ) then { check number of columns }
for col:= 1 to ncol do { initialize cost vector for phase 1 }
begin
d[col]:= 0.0;
for row:= 0 to m do d[col]:= d[col] - tab[row, col]
end
else
begin
writeln('...termination: too many columns for storage');
done:= true;
result:= toomanycols
end;
numpivots:= 0;
end; { setup }
begin { simplex }
setup;
while( (numpivots < maxpivots) and (not done) and
( (curcost > lowlim) or (phase = 1) ) ) do
begin
columnsearch;
if( not optimal ) then
begin { not optimal }
rowsearch;
if( unbounded ) then
begin
done:= true;
result:= unbound;
writeln('problem is unbounded')
end
else
begin
pivot;
numpivots:= numpivots + 1;
if ( (numpivots = 1 ) or ( numpivots mod 10 = 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12)
end
end { not optimal }
else { optimal }
if( phase = 1 ) then
begin
if( curcost > eps ) then
begin
done:= true;
result:= infeas;
writeln('problem is infeasible')
end
else
begin
if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12);
writeln('phase 1 successfully completed');
changephase
end
end { if phase = 1 }
else
begin
if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12);
writeln('phase 2 successfully completed');
done:= true;
result:= opt
end
end; { while }
if(((curcost <= lowlim) and (phase = 2) ) then
begin
if ( (numpivots <> 1 ) and ( numpivots mod 10 <> 0 ) ) then
writeln('pivot ', numpivots:4, ' cost= ', curcost:12);
result:= unbound;
writeln('problem is unbounded')
end;
if ( numpivots >= maxpivots ) then
begin
writeln('...termination: maximum number of pivots exceeded');
result:= toomanypivots,
end;
if result = opt then
begin
writeln('optimal solution reached');
writeln('cost =', -carry[-1,-1]:10:6);
for i:= 0 to m do
writeln('x(', basis[i]:4, ')= ', carry[i,-1]:10:6)
end.

View File

@ -0,0 +1,4 @@
program p(input,output);
begin
if x := 0 then x := 1
end.

View File

@ -0,0 +1,6 @@
program p(input,output);
function topsort(var x: order, var y : sorted, x : integer);
begin end;
begin
x:= 1
end.

View File

@ -0,0 +1,6 @@
program p(input,output);
var l,n: real;
var x, nonprime,prime: ;
begin
var
end.

View File

@ -0,0 +1,4 @@
program p(input,output)
begin
writeln(' '; 9, 'x'; 10, 'm'; 9, '[x]'; 9,'approx x]'; 19,
end.

View File

@ -0,0 +1,11 @@
program this (output)
procedure addcor;
var bins,start,i,last : integer; level : real;
begin bins := trunc((r1+r2)*maxcor);
if bins < 1 then bins := 1;
start := round(d*maxcor) - bins div 2;
level := mm/bins;
last := start+bins; if last>maxcor then last := maxcor;
corfarray[start] := corfarray[start]-level;
corfarray[last] := corfarray[last]+level;
end;

View File

@ -0,0 +1,5 @@
program p(input,otput);
begin
for i := 1 to maxelements do
y[i] := 0;
end.

2
mlyacc-polyml/lib/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*
!.gitignore

View File

@ -0,0 +1,24 @@
val MLYaccLib =
{
sigs = [
"STREAM",
"LR_TABLE",
"TOKEN",
"LR_PARSER",
"LEXER",
"ARG_LEXER",
"PARSER_DATA",
"PARSER",
"ARG_PARSER"
],
structs = [
"LrTable",
"Stream",
"LrParser"
],
functors = [
"Join",
"JoinWithArg"
],
onStartup = NONE
}

View File

@ -0,0 +1,298 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
(* base.sig: Base signature file for SML-Yacc. This file contains signatures
that must be loaded before any of the files produced by ML-Yacc are loaded
*)
(* STREAM: signature for a lazy stream.*)
signature STREAM =
sig type 'xa stream
val streamify : (unit -> '_a) -> '_a stream
val cons : '_a * '_a stream -> '_a stream
val get : '_a stream -> '_a * '_a stream
end
(* LR_TABLE: signature for an LR Table.
The list of actions and gotos passed to mkLrTable must be ordered by state
number. The values for state 0 are the first in the list, the values for
state 1 are next, etc.
*)
signature LR_TABLE =
sig
datatype ('a,'b) pairlist = EMPTY | PAIR of 'a * 'b * ('a,'b) pairlist
datatype state = STATE of int
datatype term = T of int
datatype nonterm = NT of int
datatype action = SHIFT of state
| REDUCE of int
| ACCEPT
| ERROR
type table
val numStates : table -> int
val numRules : table -> int
val describeActions : table -> state ->
(term,action) pairlist * action
val describeGoto : table -> state -> (nonterm,state) pairlist
val action : table -> state * term -> action
val goto : table -> state * nonterm -> state
val initialState : table -> state
exception Goto of state * nonterm
val mkLrTable : {actions : ((term,action) pairlist * action) array,
gotos : (nonterm,state) pairlist array,
numStates : int, numRules : int,
initialState : state} -> table
end
(* TOKEN: signature revealing the internal structure of a token. This signature
TOKEN distinct from the signature {parser name}_TOKENS produced by ML-Yacc.
The {parser name}_TOKENS structures contain some types and functions to
construct tokens from values and positions.
The representation of token was very carefully chosen here to allow the
polymorphic parser to work without knowing the types of semantic values
or line numbers.
This has had an impact on the TOKENS structure produced by SML-Yacc, which
is a structure parameter to lexer functors. We would like to have some
type 'a token which functions to construct tokens would create. A
constructor function for a integer token might be
INT: int * 'a * 'a -> 'a token.
This is not possible because we need to have tokens with the representation
given below for the polymorphic parser.
Thus our constructur functions for tokens have the form:
INT: int * 'a * 'a -> (svalue,'a) token
This in turn has had an impact on the signature that lexers for SML-Yacc
must match and the types that a user must declare in the user declarations
section of lexers.
*)
signature TOKEN =
sig
structure LrTable : LR_TABLE
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
val sameToken : ('a,'b) token * ('a,'b) token -> bool
end
(* LR_PARSER: signature for a polymorphic LR parser *)
signature LR_PARSER =
sig
structure Stream: STREAM
structure LrTable : LR_TABLE
structure Token : TOKEN
sharing LrTable = Token.LrTable
exception ParseError
val parse : {table : LrTable.table,
lexer : ('_b,'_c) Token.token Stream.stream,
arg: 'arg,
saction : int *
'_c *
(LrTable.state * ('_b * '_c * '_c)) list *
'arg ->
LrTable.nonterm *
('_b * '_c * '_c) *
((LrTable.state *('_b * '_c * '_c)) list),
void : '_b,
ec : { is_keyword : LrTable.term -> bool,
noShift : LrTable.term -> bool,
preferred_change : (LrTable.term list * LrTable.term list) list,
errtermvalue : LrTable.term -> '_b,
showTerminal : LrTable.term -> string,
terms: LrTable.term list,
error : string * '_c * '_c -> unit
},
lookahead : int (* max amount of lookahead used in *)
(* error correction *)
} -> '_b *
(('_b,'_c) Token.token Stream.stream)
end
(* LEXER: a signature that most lexers produced for use with SML-Yacc's
output will match. The user is responsible for declaring type token,
type pos, and type svalue in the UserDeclarations section of a lexer.
Note that type token is abstract in the lexer. This allows SML-Yacc to
create a TOKENS signature for use with lexers produced by ML-Lex that
treats the type token abstractly. Lexers that are functors parametrized by
a Tokens structure matching a TOKENS signature cannot examine the structure
of tokens.
*)
signature LEXER =
sig
structure UserDeclarations :
sig
type ('a,'b) token
type pos
type svalue
end
val makeLexer : (int -> string) -> unit ->
(UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
end
(* ARG_LEXER: the %arg option of ML-Lex allows users to produce lexers which
also take an argument before yielding a function from unit to a token
*)
signature ARG_LEXER =
sig
structure UserDeclarations :
sig
type ('a,'b) token
type pos
type svalue
type arg
end
val makeLexer : (int -> string) -> UserDeclarations.arg -> unit ->
(UserDeclarations.svalue,UserDeclarations.pos) UserDeclarations.token
end
(* PARSER_DATA: the signature of ParserData structures in {parser name}LrValsFun
produced by SML-Yacc. All such structures match this signature.
The {parser name}LrValsFun produces a structure which contains all the values
except for the lexer needed to call the polymorphic parser mentioned
before.
*)
signature PARSER_DATA =
sig
(* the type of line numbers *)
type pos
(* the type of semantic values *)
type svalue
(* the type of the user-supplied argument to the parser *)
type arg
(* the intended type of the result of the parser. This value is
produced by applying extract from the structure Actions to the
final semantic value resultiing from a parse.
*)
type result
structure LrTable : LR_TABLE
structure Token : TOKEN
sharing Token.LrTable = LrTable
(* structure Actions contains the functions which mantain the
semantic values stack in the parser. Void is used to provide
a default value for the semantic stack.
*)
structure Actions :
sig
val actions : int * pos *
(LrTable.state * (svalue * pos * pos)) list * arg->
LrTable.nonterm * (svalue * pos * pos) *
((LrTable.state *(svalue * pos * pos)) list)
val void : svalue
val extract : svalue -> result
end
(* structure EC contains information used to improve error
recovery in an error-correcting parser *)
structure EC :
sig
val is_keyword : LrTable.term -> bool
val noShift : LrTable.term -> bool
val preferred_change : (LrTable.term list * LrTable.term list) list
val errtermvalue : LrTable.term -> svalue
val showTerminal : LrTable.term -> string
val terms: LrTable.term list
end
(* table is the LR table for the parser *)
val table : LrTable.table
end
(* signature PARSER is the signature that most user parsers created by
SML-Yacc will match.
*)
signature PARSER =
sig
structure Token : TOKEN
structure Stream : STREAM
exception ParseError
(* type pos is the type of line numbers *)
type pos
(* type result is the type of the result from the parser *)
type result
(* the type of the user-supplied argument to the parser *)
type arg
(* type svalue is the type of semantic values for the semantic value
stack
*)
type svalue
(* val makeLexer is used to create a stream of tokens for the parser *)
val makeLexer : (int -> string) ->
(svalue,pos) Token.token Stream.stream
(* val parse takes a stream of tokens and a function to print
errors and returns a value of type result and a stream containing
the unused tokens
*)
val parse : int * ((svalue,pos) Token.token Stream.stream) *
(string * pos * pos -> unit) * arg ->
result * (svalue,pos) Token.token Stream.stream
val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
bool
end
(* signature ARG_PARSER is the signature that will be matched by parsers whose
lexer takes an additional argument.
*)
signature ARG_PARSER =
sig
structure Token : TOKEN
structure Stream : STREAM
exception ParseError
type arg
type lexarg
type pos
type result
type svalue
val makeLexer : (int -> string) -> lexarg ->
(svalue,pos) Token.token Stream.stream
val parse : int * ((svalue,pos) Token.token Stream.stream) *
(string * pos * pos -> unit) * arg ->
result * (svalue,pos) Token.token Stream.stream
val sameToken : (svalue,pos) Token.token * (svalue,pos) Token.token ->
bool
end

View File

@ -0,0 +1,94 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
(* functor Join creates a user parser by putting together a Lexer structure,
an LrValues structure, and a polymorphic parser structure. Note that
the Lexer and LrValues structure must share the type pos (i.e. the type
of line numbers), the type svalues for semantic values, and the type
of tokens.
*)
functor Join(structure Lex : LEXER
structure ParserData: PARSER_DATA
structure LrParser : LR_PARSER
sharing ParserData.LrTable = LrParser.LrTable
sharing ParserData.Token = LrParser.Token
sharing type Lex.UserDeclarations.svalue = ParserData.svalue
sharing type Lex.UserDeclarations.pos = ParserData.pos
sharing type Lex.UserDeclarations.token = ParserData.Token.token)
: PARSER =
struct
structure Token = ParserData.Token
structure Stream = LrParser.Stream
exception ParseError = LrParser.ParseError
type arg = ParserData.arg
type pos = ParserData.pos
type result = ParserData.result
type svalue = ParserData.svalue
val makeLexer = LrParser.Stream.streamify o Lex.makeLexer
val parse = fn (lookahead,lexer,error,arg) =>
(fn (a,b) => (ParserData.Actions.extract a,b))
(LrParser.parse {table = ParserData.table,
lexer=lexer,
lookahead=lookahead,
saction = ParserData.Actions.actions,
arg=arg,
void= ParserData.Actions.void,
ec = {is_keyword = ParserData.EC.is_keyword,
noShift = ParserData.EC.noShift,
preferred_change = ParserData.EC.preferred_change,
errtermvalue = ParserData.EC.errtermvalue,
error=error,
showTerminal = ParserData.EC.showTerminal,
terms = ParserData.EC.terms}}
)
val sameToken = Token.sameToken
end
(* functor JoinWithArg creates a variant of the parser structure produced
above. In this case, the makeLexer take an additional argument before
yielding a value of type unit -> (svalue,pos) token
*)
functor JoinWithArg(structure Lex : ARG_LEXER
structure ParserData: PARSER_DATA
structure LrParser : LR_PARSER
sharing ParserData.LrTable = LrParser.LrTable
sharing ParserData.Token = LrParser.Token
sharing type Lex.UserDeclarations.svalue = ParserData.svalue
sharing type Lex.UserDeclarations.pos = ParserData.pos
sharing type Lex.UserDeclarations.token = ParserData.Token.token)
: ARG_PARSER =
struct
structure Token = ParserData.Token
structure Stream = LrParser.Stream
exception ParseError = LrParser.ParseError
type arg = ParserData.arg
type lexarg = Lex.UserDeclarations.arg
type pos = ParserData.pos
type result = ParserData.result
type svalue = ParserData.svalue
val makeLexer = fn s => fn arg =>
LrParser.Stream.streamify (Lex.makeLexer s arg)
val parse = fn (lookahead,lexer,error,arg) =>
(fn (a,b) => (ParserData.Actions.extract a,b))
(LrParser.parse {table = ParserData.table,
lexer=lexer,
lookahead=lookahead,
saction = ParserData.Actions.actions,
arg=arg,
void= ParserData.Actions.void,
ec = {is_keyword = ParserData.EC.is_keyword,
noShift = ParserData.EC.noShift,
preferred_change = ParserData.EC.preferred_change,
errtermvalue = ParserData.EC.errtermvalue,
error=error,
showTerminal = ParserData.EC.showTerminal,
terms = ParserData.EC.terms}}
)
val sameToken = Token.sameToken
end;

View File

@ -0,0 +1,5 @@
use "base.sig";
use "join.sml";
use "lrtable.sml";
use "stream.sml";
use "parser2.sml";

View File

@ -0,0 +1,59 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
structure LrTable : LR_TABLE =
struct
val sub = Array.sub
infix 9 sub
datatype ('a,'b) pairlist = EMPTY
| PAIR of 'a * 'b * ('a,'b) pairlist
datatype term = T of int
datatype nonterm = NT of int
datatype state = STATE of int
datatype action = SHIFT of state
| REDUCE of int (* rulenum from grammar *)
| ACCEPT
| ERROR
exception Goto of state * nonterm
type table = {states: int, rules : int,initialState: state,
action: ((term,action) pairlist * action) array,
goto : (nonterm,state) pairlist array}
val numStates = fn ({states,...} : table) => states
val numRules = fn ({rules,...} : table) => rules
val describeActions =
fn ({action,...} : table) =>
fn (STATE s) => action sub s
val describeGoto =
fn ({goto,...} : table) =>
fn (STATE s) => goto sub s
fun findTerm (T term,row,default) =
let fun find (PAIR (T key,data,r)) =
if key < term then find r
else if key=term then data
else default
| find EMPTY = default
in find row
end
fun findNonterm (NT nt,row) =
let fun find (PAIR (NT key,data,r)) =
if key < nt then find r
else if key=nt then SOME data
else NONE
| find EMPTY = NONE
in find row
end
val action = fn ({action,...} : table) =>
fn (STATE state,term) =>
let val (row,default) = action sub state
in findTerm(term,row,default)
end
val goto = fn ({goto,...} : table) =>
fn (a as (STATE state,nonterm)) =>
case findNonterm(nonterm,goto sub state)
of SOME state => state
| NONE => raise (Goto a)
val initialState = fn ({initialState,...} : table) => initialState
val mkLrTable = fn {actions,gotos,initialState,numStates,numRules} =>
({action=actions,goto=gotos,
states=numStates,
rules=numRules,
initialState=initialState} : table)
end;

View File

@ -0,0 +1,97 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
(* drt (12/15/89) -- the functor should be used during development work,
but it is wastes space in the release version.
functor ParserGen(structure LrTable : LR_TABLE
structure Stream : STREAM) : LR_PARSER =
*)
structure LrParser :> LR_PARSER =
struct
val print = fn s => output(std_out,s)
val println = fn s => (print s; print "\n")
structure LrTable = LrTable
structure Stream = Stream
structure Token : TOKEN =
struct
structure LrTable = LrTable
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
val sameToken = fn (TOKEN (t,_),TOKEN(t',_)) => t=t'
end
open LrTable
open Token
val DEBUG = false
exception ParseError
type ('a,'b) elem = (state * ('a * 'b * 'b))
type ('a,'b) stack = ('a,'b) elem list
val showState = fn (STATE s) => ("STATE " ^ (makestring s))
fun printStack(stack: ('a,'b) elem list, n: int) =
case stack
of (state, _) :: rest =>
(print(" " ^ makestring n ^ ": ");
println(showState state);
printStack(rest, n+1)
)
| nil => ()
val parse = fn {arg : 'a,
table : LrTable.table,
lexer : ('_b,'_c) token Stream.stream,
saction : int * '_c * ('_b,'_c) stack * 'a ->
nonterm * ('_b * '_c * '_c) * ('_b,'_c) stack,
void : '_b,
ec = {is_keyword,preferred_change,
errtermvalue,showTerminal,
error,terms,noShift},
lookahead} =>
let fun prAction(stack as (state, _) :: _,
next as (TOKEN (term,_),_), action) =
(println "Parse: state stack:";
printStack(stack, 0);
print(" state="
^ showState state
^ " next="
^ showTerminal term
^ " action="
);
case action
of SHIFT s => println ("SHIFT " ^ showState s)
| REDUCE i => println ("REDUCE " ^ (makestring i))
| ERROR => println "ERROR"
| ACCEPT => println "ACCEPT";
action)
| prAction (_,_,action) = action
val action = LrTable.action table
val goto = LrTable.goto table
fun parseStep(next as (TOKEN (terminal, value as (_,leftPos,_)),lexer) :
('_b,'_c) token * ('_b,'_c) token Stream.stream,
stack as (state,_) :: _ : ('_b ,'_c) stack) =
case (if DEBUG then prAction(stack, next,action(state, terminal))
else action(state, terminal))
of SHIFT s => parseStep(Stream.get lexer, (s,value) :: stack)
| REDUCE i =>
let val (nonterm,value,stack as (state,_) :: _ ) =
saction(i,leftPos,stack,arg)
in parseStep(next,(goto(state,nonterm),value)::stack)
end
| ERROR => let val (_,leftPos,rightPos) = value
in error("syntax error\n",leftPos,rightPos);
raise ParseError
end
| ACCEPT => let val (_,(topvalue,_,_)) :: _ = stack
val (token,restLexer) = next
in (topvalue,Stream.cons(token,lexer))
end
val next as (TOKEN (terminal,(_,leftPos,_)),_) = Stream.get lexer
in parseStep(next,[(initialState table,(void,leftPos,leftPos))])
end
end;

View File

@ -0,0 +1,541 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
(* parser.sml: This is a parser driver for LR tables with an error-recovery
routine added to it. The routine used is described in detail in this
article:
'A Practical Method for LR and LL Syntactic Error Diagnosis and
Recovery', by M. Burke and G. Fisher, ACM Transactions on
Programming Langauges and Systems, Vol. 9, No. 2, April 1987,
pp. 164-197.
This program is an implementation is the partial, deferred method discussed
in the article. The algorithm and data structures used in the program
are described below.
This program assumes that all semantic actions are delayed. A semantic
action should produce a function from unit -> value instead of producing the
normal value. The parser returns the semantic value on the top of the
stack when accept is encountered. The user can deconstruct this value
and apply the unit -> value function in it to get the answer.
It also assumes that the lexer is a lazy stream.
Data Structures:
----------------
* The parser:
The state stack has the type
(state * (semantic value * line # * line #)) list
The parser keeps a queue of (state stack * lexer pair). A lexer pair
consists of a terminal * value pair and a lexer. This allows the
parser to reconstruct the states for terminals to the left of a
syntax error, and attempt to make error corrections there.
The queue consists of a pair of lists (x,y). New additions to
the queue are cons'ed onto y. The first element of x is the top
of the queue. If x is nil, then y is reversed and used
in place of x.
Algorithm:
----------
* The steady-state parser:
This parser keeps the length of the queue of state stacks at
a steady state by always removing an element from the front when
another element is placed on the end.
It has these arguments:
stack: current stack
queue: value of the queue
lexPair ((terminal,value),lex stream)
When SHIFT is encountered, the state to shift to and the value are
are pushed onto the state stack. The state stack and lexPair are
placed on the queue. The front element of the queue is removed.
When REDUCTION is encountered, the rule is applied to the current
stack to yield a triple (nonterm,value,new stack). A new
stack is formed by adding (goto(top state of stack,nonterm),value)
to the stack.
When ACCEPT is encountered, the top value from the stack and the
lexer are returned.
When an ERROR is encountered, fixError is called. FixError
takes the arguments to the parser, fixes the error if possible and
returns a new set of arguments.
* The distance-parser:
This parser includes an additional argument distance. It pushes
elements on the queue until it has parsed distance tokens, or an
ACCEPT or ERROR occurs. It returns a stack, lexer, the number of
tokens left unparsed, a queue, and an action option.
*)
signature FIFO =
sig type 'a queue
val empty : 'a queue
exception Empty
val get : 'a queue -> 'a * 'a queue
val put : 'a * 'a queue -> 'a queue
end
(* drt (12/15/89) -- the functor should be used in development work, but
it wastes space in the release version.
functor ParserGen(structure LrTable : LR_TABLE
structure Stream : STREAM) : LR_PARSER =
*)
structure LrParser :> LR_PARSER =
struct
structure LrTable = LrTable
structure Stream = Stream
fun eqT (LrTable.T i, LrTable.T i') = i = i'
structure Token : TOKEN =
struct
structure LrTable = LrTable
datatype ('a,'b) token = TOKEN of LrTable.term * ('a * 'b * 'b)
val sameToken = fn (TOKEN(t,_),TOKEN(t',_)) => eqT (t,t')
end
open LrTable
open Token
val DEBUG1 = false
val DEBUG2 = false
exception ParseError
exception ParseImpossible of int
structure Fifo :> FIFO =
struct
type 'a queue = ('a list * 'a list)
val empty = (nil,nil)
exception Empty
fun get(a::x, y) = (a, (x,y))
| get(nil, nil) = raise Empty
| get(nil, y) = get(rev y, nil)
fun put(a,(x,y)) = (x,a::y)
end
type ('a,'b) elem = (state * ('a * 'b * 'b))
type ('a,'b) stack = ('a,'b) elem list
type ('a,'b) lexv = ('a,'b) token
type ('a,'b) lexpair = ('a,'b) lexv * (('a,'b) lexv Stream.stream)
type ('a,'b) distanceParse =
('a,'b) lexpair *
('a,'b) stack *
(('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
int ->
('a,'b) lexpair *
('a,'b) stack *
(('a,'b) stack * ('a,'b) lexpair) Fifo.queue *
int *
action option
type ('a,'b) ecRecord =
{is_keyword : term -> bool,
preferred_change : (term list * term list) list,
error : string * 'b * 'b -> unit,
errtermvalue : term -> 'a,
terms : term list,
showTerminal : term -> string,
noShift : term -> bool}
local
val print = fn s => TextIO.output(TextIO.stdOut,s)
val println = fn s => (print s; print "\n")
val showState = fn (STATE s) => "STATE " ^ (Int.toString s)
in
fun printStack(stack: ('a,'b) stack, n: int) =
case stack
of (state,_) :: rest =>
(print("\t" ^ Int.toString n ^ ": ");
println(showState state);
printStack(rest, n+1))
| nil => ()
fun prAction showTerminal
(stack as (state,_) :: _, next as (TOKEN (term,_),_), action) =
(println "Parse: state stack:";
printStack(stack, 0);
print(" state="
^ showState state
^ " next="
^ showTerminal term
^ " action="
);
case action
of SHIFT state => println ("SHIFT " ^ (showState state))
| REDUCE i => println ("REDUCE " ^ (Int.toString i))
| ERROR => println "ERROR"
| ACCEPT => println "ACCEPT")
| prAction _ (_,_,action) = ()
end
(* ssParse: parser which maintains the queue of (state * lexvalues) in a
steady-state. It takes a table, showTerminal function, saction
function, and fixError function. It parses until an ACCEPT is
encountered, or an exception is raised. When an error is encountered,
fixError is called with the arguments of parseStep (lexv,stack,and
queue). It returns the lexv, and a new stack and queue adjusted so
that the lexv can be parsed *)
val ssParse =
fn (table,showTerminal,saction,fixError,arg) =>
let val prAction = prAction showTerminal
val action = LrTable.action table
val goto = LrTable.goto table
fun parseStep(args as
(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
lexer
),
stack as (state,_) :: _,
queue)) =
let val nextAction = action (state,terminal)
val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
else ()
in case nextAction
of SHIFT s =>
let val newStack = (s,value) :: stack
val newLexPair = Stream.get lexer
val (_,newQueue) =Fifo.get(Fifo.put((newStack,newLexPair),
queue))
in parseStep(newLexPair,(s,value)::stack,newQueue)
end
| REDUCE i =>
(case saction(i,leftPos,stack,arg)
of (nonterm,value,stack as (state,_) :: _) =>
parseStep(lexPair,(goto(state,nonterm),value)::stack,
queue)
| _ => raise (ParseImpossible 197))
| ERROR => parseStep(fixError args)
| ACCEPT =>
(case stack
of (_,(topvalue,_,_)) :: _ =>
let val (token,restLexer) = lexPair
in (topvalue,Stream.cons(token,restLexer))
end
| _ => raise (ParseImpossible 202))
end
| parseStep _ = raise (ParseImpossible 204)
in parseStep
end
(* distanceParse: parse until n tokens are shifted, or accept or
error are encountered. Takes a table, showTerminal function, and
semantic action function. Returns a parser which takes a lexPair
(lex result * lexer), a state stack, a queue, and a distance
(must be > 0) to parse. The parser returns a new lex-value, a stack
with the nth token shifted on top, a queue, a distance, and action
option. *)
val distanceParse =
fn (table,showTerminal,saction,arg) =>
let val prAction = prAction showTerminal
val action = LrTable.action table
val goto = LrTable.goto table
fun parseStep(lexPair,stack,queue,0) = (lexPair,stack,queue,0,NONE)
| parseStep(lexPair as (TOKEN (terminal, value as (_,leftPos,_)),
lexer
),
stack as (state,_) :: _,
queue,distance) =
let val nextAction = action(state,terminal)
val _ = if DEBUG1 then prAction(stack,lexPair,nextAction)
else ()
in case nextAction
of SHIFT s =>
let val newStack = (s,value) :: stack
val newLexPair = Stream.get lexer
in parseStep(newLexPair,(s,value)::stack,
Fifo.put((newStack,newLexPair),queue),distance-1)
end
| REDUCE i =>
(case saction(i,leftPos,stack,arg)
of (nonterm,value,stack as (state,_) :: _) =>
parseStep(lexPair,(goto(state,nonterm),value)::stack,
queue,distance)
| _ => raise (ParseImpossible 240))
| ERROR => (lexPair,stack,queue,distance,SOME nextAction)
| ACCEPT => (lexPair,stack,queue,distance,SOME nextAction)
end
| parseStep _ = raise (ParseImpossible 242)
in parseStep : ('_a,'_b) distanceParse
end
(* mkFixError: function to create fixError function which adjusts parser state
so that parse may continue in the presence of an error *)
fun mkFixError({is_keyword,terms,errtermvalue,
preferred_change,noShift,
showTerminal,error,...} : ('_a,'_b) ecRecord,
distanceParse : ('_a,'_b) distanceParse,
minAdvance,maxAdvance)
(lexv as (TOKEN (term,value as (_,leftPos,rightPos)),_),stack,queue) =
let val _ = if DEBUG2 then
error("syntax error found at " ^ (showTerminal term),
leftPos,rightPos)
else ()
fun tokAt(t,p) = TOKEN(t,(errtermvalue t,p,p))
val minDelta = 3
(* pull all the state * lexv elements from the queue *)
val stateList =
let fun f q = let val (elem,newQueue) = Fifo.get q
in elem :: (f newQueue)
end handle Fifo.Empty => nil
in f queue
end
(* now number elements of stateList, giving distance from
error token *)
val (_, numStateList) =
List.foldr (fn (a,(num,r)) => (num+1,(a,num)::r)) (0, []) stateList
(* Represent the set of potential changes as a linked list.
Values of datatype Change hold information about a potential change.
oper = oper to be applied
pos = the # of the element in stateList that would be altered.
distance = the number of tokens beyond the error token which the
change allows us to parse.
new = new terminal * value pair at that point
orig = original terminal * value pair at the point being changed.
*)
datatype ('a,'b) change = CHANGE of
{pos : int, distance : int, leftPos: 'b, rightPos: 'b,
new : ('a,'b) lexv list, orig : ('a,'b) lexv list}
val showTerms = concat o map (fn TOKEN(t,_) => " " ^ showTerminal t)
val printChange = fn c =>
let val CHANGE {distance,new,orig,pos,...} = c
in (print ("{distance= " ^ (Int.toString distance));
print (",orig ="); print(showTerms orig);
print (",new ="); print(showTerms new);
print (",pos= " ^ (Int.toString pos));
print "}\n")
end
val printChangeList = app printChange
(* parse: given a lexPair, a stack, and the distance from the error
token, return the distance past the error token that we are able to parse.*)
fun parse (lexPair,stack,queuePos : int) =
case distanceParse(lexPair,stack,Fifo.empty,queuePos+maxAdvance+1)
of (_,_,_,distance,SOME ACCEPT) =>
if maxAdvance-distance-1 >= 0
then maxAdvance
else maxAdvance-distance-1
| (_,_,_,distance,_) => maxAdvance - distance - 1
(* catList: concatenate results of scanning list *)
fun catList l f = List.foldr (fn(a,r)=> f a @ r) [] l
fun keywordsDelta new = if List.exists (fn(TOKEN(t,_))=>is_keyword t) new
then minDelta else 0
fun tryChange{lex,stack,pos,leftPos,rightPos,orig,new} =
let val lex' = List.foldr (fn (t',p)=>(t',Stream.cons p)) lex new
val distance = parse(lex',stack,pos+length new-length orig)
in if distance >= minAdvance + keywordsDelta new
then [CHANGE{pos=pos,leftPos=leftPos,rightPos=rightPos,
distance=distance,orig=orig,new=new}]
else []
end
(* tryDelete: Try to delete n terminals.
Return single-element [success] or nil.
Do not delete unshiftable terminals. *)
fun tryDelete n ((stack,lexPair as (TOKEN(term,(_,l,r)),_)),qPos) =
let fun del(0,accum,left,right,lexPair) =
tryChange{lex=lexPair,stack=stack,
pos=qPos,leftPos=left,rightPos=right,
orig=rev accum, new=[]}
| del(n,accum,left,right,(tok as TOKEN(term,(_,_,r)),lexer)) =
if noShift term then []
else del(n-1,tok::accum,left,r,Stream.get lexer)
in del(n,[],l,r,lexPair)
end
(* tryInsert: try to insert tokens before the current terminal;
return a list of the successes *)
fun tryInsert((stack,lexPair as (TOKEN(_,(_,l,_)),_)),queuePos) =
catList terms (fn t =>
tryChange{lex=lexPair,stack=stack,
pos=queuePos,orig=[],new=[tokAt(t,l)],
leftPos=l,rightPos=l})
(* trySubst: try to substitute tokens for the current terminal;
return a list of the successes *)
fun trySubst ((stack,lexPair as (orig as TOKEN (term,(_,l,r)),lexer)),
queuePos) =
if noShift term then []
else
catList terms (fn t =>
tryChange{lex=Stream.get lexer,stack=stack,
pos=queuePos,
leftPos=l,rightPos=r,orig=[orig],
new=[tokAt(t,r)]})
(* do_delete(toks,lexPair) tries to delete tokens "toks" from "lexPair".
If it succeeds, returns SOME(toks',l,r,lp), where
toks' is the actual tokens (with positions and values) deleted,
(l,r) are the (leftmost,rightmost) position of toks',
lp is what remains of the stream after deletion
*)
fun do_delete(nil,lp as (TOKEN(_,(_,l,_)),_)) = SOME(nil,l,l,lp)
| do_delete([t],(tok as TOKEN(t',(_,l,r)),lp')) =
if eqT (t, t')
then SOME([tok],l,r,Stream.get lp')
else NONE
| do_delete(t::rest,(tok as TOKEN(t',(_,l,r)),lp')) =
if eqT (t,t')
then case do_delete(rest,Stream.get lp')
of SOME(deleted,l',r',lp'') =>
SOME(tok::deleted,l,r',lp'')
| NONE => NONE
else NONE
fun tryPreferred((stack,lexPair),queuePos) =
catList preferred_change (fn (delete,insert) =>
if List.exists noShift delete then [] (* should give warning at
parser-generation time *)
else case do_delete(delete,lexPair)
of SOME(deleted,l,r,lp) =>
tryChange{lex=lp,stack=stack,pos=queuePos,
leftPos=l,rightPos=r,orig=deleted,
new=map (fn t=>(tokAt(t,r))) insert}
| NONE => [])
val changes = catList numStateList tryPreferred @
catList numStateList tryInsert @
catList numStateList trySubst @
catList numStateList (tryDelete 1) @
catList numStateList (tryDelete 2) @
catList numStateList (tryDelete 3)
val findMaxDist = fn l =>
foldr (fn (CHANGE {distance,...},high) => Int.max(distance,high)) 0 l
(* maxDist: max distance past error taken that we could parse *)
val maxDist = findMaxDist changes
(* remove changes which did not parse maxDist tokens past the error token *)
val changes = catList changes
(fn(c as CHANGE{distance,...}) =>
if distance=maxDist then [c] else [])
in case changes
of (l as change :: _) =>
let fun print_msg (CHANGE {new,orig,leftPos,rightPos,...}) =
let val s =
case (orig,new)
of (_::_,[]) => "deleting " ^ (showTerms orig)
| ([],_::_) => "inserting " ^ (showTerms new)
| _ => "replacing " ^ (showTerms orig) ^
" with " ^ (showTerms new)
in error ("syntax error: " ^ s,leftPos,rightPos)
end
val _ =
(if length l > 1 andalso DEBUG2 then
(print "multiple fixes possible; could fix it by:\n";
app print_msg l;
print "chosen correction:\n")
else ();
print_msg change)
(* findNth: find nth queue entry from the error
entry. Returns the Nth queue entry and the portion of
the queue from the beginning to the nth-1 entry. The
error entry is at the end of the queue.
Examples:
queue = a b c d e
findNth 0 = (e,a b c d)
findNth 1 = (d,a b c)
*)
val findNth = fn n =>
let fun f (h::t,0) = (h,rev t)
| f (h::t,n) = f(t,n-1)
| f (nil,_) = let exception FindNth
in raise FindNth
end
in f (rev stateList,n)
end
val CHANGE {pos,orig,new,...} = change
val (last,queueFront) = findNth pos
val (stack,lexPair) = last
val lp1 = foldl(fn (_,(_,r)) => Stream.get r) lexPair orig
val lp2 = foldr(fn(t,r)=>(t,Stream.cons r)) lp1 new
val restQueue =
Fifo.put((stack,lp2),
foldl Fifo.put Fifo.empty queueFront)
val (lexPair,stack,queue,_,_) =
distanceParse(lp2,stack,restQueue,pos)
in (lexPair,stack,queue)
end
| nil => (error("syntax error found at " ^ (showTerminal term),
leftPos,rightPos); raise ParseError)
end
val parse = fn {arg,table,lexer,saction,void,lookahead,
ec=ec as {showTerminal,...} : ('_a,'_b) ecRecord} =>
let val distance = 15 (* defer distance tokens *)
val minAdvance = 1 (* must parse at least 1 token past error *)
val maxAdvance = Int.max(lookahead,0)(* max distance for parse check *)
val lexPair = Stream.get lexer
val (TOKEN (_,(_,leftPos,_)),_) = lexPair
val startStack = [(initialState table,(void,leftPos,leftPos))]
val startQueue = Fifo.put((startStack,lexPair),Fifo.empty)
val distanceParse = distanceParse(table,showTerminal,saction,arg)
val fixError = mkFixError(ec,distanceParse,minAdvance,maxAdvance)
val ssParse = ssParse(table,showTerminal,saction,fixError,arg)
fun loop (lexPair,stack,queue,_,SOME ACCEPT) =
ssParse(lexPair,stack,queue)
| loop (lexPair,stack,queue,0,_) = ssParse(lexPair,stack,queue)
| loop (lexPair,stack,queue,distance,SOME ERROR) =
let val (lexPair,stack,queue) = fixError(lexPair,stack,queue)
in loop (distanceParse(lexPair,stack,queue,distance))
end
| loop _ = let exception ParseInternal
in raise ParseInternal
end
in loop (distanceParse(lexPair,startStack,startQueue,distance))
end
end;

View File

@ -0,0 +1,19 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
(* Stream: a structure implementing a lazy stream. The signature STREAM
is found in base.sig *)
structure Stream :> STREAM =
struct
datatype 'a str = EVAL of 'a * 'a str ref | UNEVAL of (unit->'a)
type 'a stream = 'a str ref
fun get(ref(EVAL t)) = t
| get(s as ref(UNEVAL f)) =
let val t = (f(), ref(UNEVAL f)) in s := EVAL t; t end
fun streamify f = ref(UNEVAL f)
fun cons(a,s) = ref(EVAL(a,s))
end;

View File

@ -0,0 +1,19 @@
local
structure File = OS.FileSys
structure Path = OS.Path
val pwd = File.getDir
val cd = File.chDir
in
(* pushd path; use file.sml; popd *)
fun load path =
let
val pwd = pwd ()
in
cd (Path.getParent path);
use (Path.file path) handle exn => (cd pwd; raise exn);
cd pwd
end
end

3
mlyacc-polyml/src/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/yacc.grm.sig
/yacc.grm.sml
/yacc.lex.sml

56
mlyacc-polyml/src/FILES Normal file
View File

@ -0,0 +1,56 @@
Base files; used by all parsers generated by ML-Yacc, included
ML-Yacc's own parser.
../lib/base.sig
../lib/stream.sml
../lib/lrtable.sml
../lib/join.sml
../lib/parser2.sml
Signatures and parser for ML-Yacc.
utils.sig
sigs.sml
hdr.sml
yacc.grm.sig
yacc.grm.sml
yacc.lex.sml
parse.sml
LR table generator:
base definitions:
grammar.sml
LR(0) graph generation:
intgrammar.sml
core.sml
coreutils.sml
graph.sml
LALR(1) table generation:
look.sml
lalr.sml
mklrtable.sml
modules to print out table structure:
mkprstruct.sml
shrink.sml
and verbose file:
verbose.sml
Rest of ML-Yacc:
Signature and module to handle abstract syntax for actions and remove
unused variable bindings from the abstract syntax:
absyn.sig
absyn.sml
module to check specification for errors, create grammar from
specification, have appropriate files printed out, and print out
semantic actions for the parser:
yacc.sml
module to hook everything together:
link.sml

2
mlyacc-polyml/src/README Normal file
View File

@ -0,0 +1,2 @@
These are the sources for the parser-generator part of ML-Yacc.
The file FILES explains what each file contains.

View File

@ -0,0 +1,29 @@
(* Modified by Vesa Karvonen on 2007-12-18.
* Create line directives in output.
*)
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
signature ABSYN =
sig
datatype exp = EVAR of string
| EAPP of exp * exp
| ETUPLE of exp list
| EINT of int
| FN of pat * exp
| LET of decl list * exp
| UNIT
| SEQ of exp * exp
| CODE of {text : string, pos : Header.pos}
and pat = PVAR of string
| PAPP of string * pat
| PTUPLE of pat list
| PLIST of pat list * pat option
| PINT of int
| WILD
| AS of string * pat
and decl = VB of pat * exp
and rule = RULE of pat * exp
val printRule : ((string -> unit) * (string -> unit)
* (Header.pos option -> string))
-> rule -> unit
end

166
mlyacc-polyml/src/absyn.sml Normal file
View File

@ -0,0 +1,166 @@
(* Modified by Vesa Karvonen on 2007-12-18.
* Create line directives in output.
*)
(* ML-Yacc Parser Generator (c) 1991 Andrew W. Appel, David R. Tarditi *)
structure Absyn : ABSYN =
struct
datatype exp
= CODE of {text : string, pos : Header.pos}
| EAPP of exp * exp
| EINT of int
| ETUPLE of exp list
| EVAR of string
| FN of pat * exp
| LET of decl list * exp
| SEQ of exp * exp
| UNIT
and pat
= PVAR of string
| PAPP of string * pat
| PINT of int
| PLIST of pat list * pat option
| PTUPLE of pat list
| WILD
| AS of string * pat
and decl = VB of pat * exp
and rule = RULE of pat * exp
fun idchar #"'" = true
| idchar #"_" = true
| idchar c = Char.isAlpha c orelse Char.isDigit c
fun code_to_ids s = let
fun g(nil,r) = r
| g(a as (h::t),r) = if Char.isAlpha h then f(t,[h],r) else g(t,r)
and f(nil,accum,r)= implode(rev accum)::r
| f(a as (h::t),accum,r) =
if idchar h then f(t,h::accum,r) else g(a,implode (rev accum) :: r)
in g(explode s,nil)
end
val simplifyRule : rule -> rule = fn (RULE(p,e)) =>
let val used : (string -> bool) =
let fun f(CODE s) = code_to_ids (#text s)
| f(EAPP(a,b)) = f a @ f b
| f(ETUPLE l) = List.concat (map f l)
| f(EVAR s) = [s]
| f(FN(_,e)) = f e
| f(LET(dl,e)) =
(List.concat (map (fn VB(_,e) => f e) dl)) @ f e
| f(SEQ(a,b)) = f a @ f b
| f _ = nil
val identifiers = f e
in fn s => List.exists (fn a=>a=s) identifiers
end
val simplifyPat : pat -> pat =
let fun f a =
case a
of (PVAR s) => if used s then a else WILD
| (PAPP(s,pat)) =>
(case f pat
of WILD => WILD
| pat' => PAPP(s,pat'))
| (PLIST (l, topt)) =>
let val l' = map f l
val topt' = Option.map f topt
fun notWild WILD = false
| notWild _ = true
in case topt' of
SOME WILD => if List.exists notWild l' then
PLIST (l', topt')
else WILD
| _ => PLIST (l', topt')
end
| (PTUPLE l) =>
let val l' = map f l
in if List.exists(fn WILD=>false | _ => true) l'
then PTUPLE l'
else WILD
end
| (AS(a,b)) =>
if used a then
case f b of
WILD => PVAR a
| b' => AS(a,b')
else f b
| _ => a
in f
end
val simplifyExp : exp -> exp =
let fun f(EAPP(a,b)) = EAPP(f a,f b)
| f(ETUPLE l) = ETUPLE(map f l)
| f(FN(p,e)) = FN(simplifyPat p,f e)
| f(LET(dl,e)) =
LET(map (fn VB(p,e) =>
VB(simplifyPat p,f e)) dl,
f e)
| f(SEQ(a,b)) = SEQ(f a,f b)
| f a = a
in f
end
in RULE(simplifyPat p,simplifyExp e)
end
fun printRule (say : string -> unit, sayln:string -> unit, fmtPos) r = let
fun flat (a, []) = rev a
| flat (a, SEQ (e1, e2) :: el) = flat (a, e1 :: e2 :: el)
| flat (a, e :: el) = flat (e :: a, el)
fun pl (lb, rb, c, f, [], a) = " " :: lb :: rb :: a
| pl (lb, rb, c, f, h :: t, a) =
" " :: lb :: f (h, foldr (fn (x, a) => c :: f (x, a))
(rb :: a)
t)
fun pe (CODE {text, pos}, a) =
" (" :: fmtPos (SOME pos) :: text :: fmtPos NONE :: ")" :: a
| pe (EAPP (x, y as (EAPP _)), a) =
pe (x, " (" :: pe (y, ")" :: a))
| pe (EAPP (x, y), a) =
pe (x, pe (y, a))
| pe (EINT i, a) =
" " :: Int.toString i :: a
| pe (ETUPLE l, a) = pl ("(", ")", ",", pe, l, a)
| pe (EVAR v, a) =
" " :: v :: a
| pe (FN (p, b), a) =
" (fn" :: pp (p, " =>" :: pe (b, ")" :: a))
| pe (LET ([], b), a) =
pe (b, a)
| pe (LET (dl, b), a) =
let fun pr (VB (p, e), a) =
" val " :: pp (p, " =" :: pe (e, "\n" :: a))
in " let" :: foldr pr (" in" :: pe (b, "\nend" :: a)) dl
end
| pe (SEQ (e1, e2), a) =
pl ("(", ")", ";", pe, flat ([], [e1, e2]), a)
| pe (UNIT, a) =
" ()" :: a
and pp (PVAR v, a) =
" " :: v :: a
| pp (PAPP (x, y as PAPP _), a) =
" " :: x :: " (" :: pp (y, ")" :: a)
| pp (PAPP (x, y), a) =
" " :: x :: pp (y, a)
| pp (PINT i, a) =
" " :: Int.toString i :: a
| pp (PLIST (l, NONE), a) =
pl ("[", "]", ",", pp, l, a)
| pp (PLIST (l, SOME t), a) =
" (" :: foldr (fn (x, a) => pp (x, " ::" :: a))
(pp (t, ")" :: a))
l
| pp (PTUPLE l, a) =
pl ("(", ")", ",", pp, l, a)
| pp (WILD, a) =
" _" :: a
| pp (AS (v, PVAR v'), a) =
" (" :: v :: " as " :: v' :: ")" :: a
| pp (AS (v, p), a) =
" (" :: v :: " as (" :: pp (p, "))" :: a)
fun out "\n" = sayln ""
| out s = say s
in
case simplifyRule r of
RULE (p, e) => app out (pp (p, " =>" :: pe (e, ["\n"])))
end
end;

View File

@ -0,0 +1,73 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkCore(structure IntGrammar : INTGRAMMAR) : CORE =
struct
open IntGrammar
open Grammar
structure IntGrammar = IntGrammar
structure Grammar = Grammar
datatype item = ITEM of
{ rule : rule,
dot : int,
rhsAfter : symbol list
}
val eqItem = fn (ITEM{rule=RULE{num=n,...},dot=d,...},
ITEM{rule=RULE{num=m,...},dot=e,...}) =>
n=m andalso d=e
val gtItem = fn (ITEM{rule=RULE{num=n,...},dot=d,...},
ITEM{rule=RULE{num=m,...},dot=e,...}) =>
n>m orelse (n=m andalso d>e)
structure ItemList = ListOrdSet
(struct
type elem = item
val eq = eqItem
val gt = gtItem
end)
open ItemList
datatype core = CORE of item list * int
val gtCore = fn (CORE (a,_),CORE (b,_)) => ItemList.set_gt(a,b)
val eqCore = fn (CORE (a,_),CORE (b,_)) => ItemList.set_eq(a,b)
(* functions for printing and debugging *)
val prItem = fn (symbolToString,nontermToString,print) =>
let val printInt = print o (Int.toString : int -> string)
val prSymbol = print o symbolToString
val prNonterm = print o nontermToString
fun showRest nil = ()
| showRest (h::t) = (prSymbol h; print " "; showRest t)
fun showRhs (l,0) = (print ". "; showRest l)
| showRhs (nil,_) = ()
| showRhs (h::t,n) = (prSymbol h;
print " ";
showRhs(t,n-1))
in fn (ITEM {rule=RULE {lhs,rhs,rulenum,num,...},
dot,rhsAfter,...}) =>
(prNonterm lhs; print " : "; showRhs(rhs,dot);
case rhsAfter
of nil => (print " (reduce by rule ";
printInt rulenum;
print ")")
| _ => ();
if DEBUG then
(print " (num "; printInt num; print ")")
else ())
end
val prCore = fn a as (_,_,print) =>
let val prItem = prItem a
in fn (CORE (items,state)) =>
(print "state ";
print (Int.toString state);
print ":\n\n";
app (fn i => (print "\t";
prItem i; print "\n")) items;
print "\n")
end
end;

View File

@ -0,0 +1,239 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkCoreUtils(structure Core : CORE) : CORE_UTILS =
struct
val sub = Array.sub
infix 9 sub
val DEBUG = true
structure Core = Core
structure IntGrammar = Core.IntGrammar
structure Grammar = IntGrammar.Grammar
open Grammar IntGrammar Core
structure Assoc = SymbolAssoc
structure NtList = ListOrdSet
(struct
type elem = nonterm
val eq = eqNonterm
val gt = gtNonterm
end)
val mkFuncs = fn (GRAMMAR {rules,terms,nonterms,...}) =>
let val derives=Array.array(nonterms,nil : rule list)
(* sort rules by their lhs nonterminal by placing them in an array indexed
in their lhs nonterminal *)
val _ =
let val f = fn {lhs=lhs as (NT n), rhs, precedence,rulenum} =>
let val rule=RULE{lhs=lhs,rhs=rhs,precedence=precedence,
rulenum=rulenum,num=0}
in Array.update(derives,n,rule::(derives sub n))
end
in app f rules
end
(* renumber rules so that rule numbers increase monotonically with
the number of their lhs nonterminal, and so that rules are numbered
sequentially. **Functions below assume that this number is true**,
i.e. productions for nonterm i are numbered from j to k,
productions for nonterm i+1 are numbered from k+1 to m, and
productions for nonterm 0 start at 0 *)
val _ =
let val f =
fn (RULE{lhs,rhs,precedence,rulenum,num}, (l,i)) =>
(RULE{lhs=lhs,rhs=rhs, precedence=precedence,
rulenum=rulenum, num=i}::l,i+1)
fun g(i,num) =
if i<nonterms then
let val (l,n) =
List.foldr f ([], num) (derives sub i)
in Array.update(derives,i,rev l); g(i+1,n)
end
else ()
in g(0,0)
end
(* list of rules - sorted by rule number. *)
val rules =
let fun g i =
if i < nonterms then (derives sub i) @ (g (i+1))
else nil
in g 0
end
(* produces: set of productions with nonterminal n as the lhs. The set
of productions *must* be sorted by rule number, because functions
below assume that this list is sorted *)
val produces = fn (NT n) =>
if DEBUG andalso (n<0 orelse n>=nonterms) then
let exception Produces of int in raise (Produces n) end
else derives sub n
val memoize = fn f =>
let fun loop i = if i = nonterms then nil
else f (NT i) :: (loop (i+1))
val data = Array.fromList(loop 0)
in fn (NT i) => data sub i
end
(* compute nonterminals which must be added to a closure when a given
nonterminal is added, i.e all nonterminals C for each nonterminal A such
that A =*=> Cx *)
val nontermClosure =
let val collectNonterms = fn n =>
List.foldr (fn (r,l) =>
case r
of RULE {rhs=NONTERM n :: _,...} =>
NtList.insert(n,l)
| _ => l) NtList.empty (produces n)
val closureNonterm = fn n =>
NtList.closure(NtList.singleton n,
collectNonterms)
in memoize closureNonterm
end
(* ntShifts: Take the items produced by a nonterminal, and sort them
by their first symbol. For each first symbol, make sure the item
list associated with the symbol is sorted also. ** This function
assumes that the item list returned by produces is sorted **
Create a table of item lists keyed by symbols. Scan the list
of items produced by a nonterminal, and insert those with a first
symbol on to the beginning of the item list for that symbol, creating
a list if necessary. Since produces returns an item list that is
already in order, the list for each symbol will also end up in order.
*)
fun sortItems nt =
let fun add_item (a as RULE{rhs=symbol::rest,...},r) =
let val item = ITEM{rule=a,dot=1,rhsAfter=rest}
in Assoc.insert((symbol,case Assoc.find (symbol,r)
of SOME l => item::l
| NONE => [item]),r)
end
| add_item (_,r) = r
in List.foldr add_item Assoc.empty (produces nt)
end
val ntShifts = memoize sortItems
(* getNonterms: get the nonterminals with a . before them in a core.
Returns a list of nonterminals in ascending order *)
fun getNonterms l =
List.foldr (fn (ITEM {rhsAfter=NONTERM sym ::_, ...},r) =>
NtList.insert(sym,r)
| (_,r) => r) [] l
(* closureNonterms: compute the nonterminals that would have a . before them
in the closure of the core. Returns a list of nonterminals in ascending
order *)
fun closureNonterms a =
let val nonterms = getNonterms a
in List.foldr (fn (nt,r) =>
NtList.union(nontermClosure nt,r))
nonterms nonterms
end
(* shifts: compute the core sets that result from shift/gotoing on
the closure of a kernal set. The items in core sets are sorted, of
course.
(1) compute the core sets that result just from items added
through the closure operation.
(2) then add the shift/gotos on kernal items.
We can do (1) the following way. Keep a table which for each shift/goto
symbol gives the list of items that result from shifting or gotoing on the
symbol. Compute the nonterminals that would have dots before them in the
closure of the kernal set. For each of these nonterminals, we already have an
item list in sorted order for each possible shift symbol. Scan the nonterminal
list from back to front. For each nonterminal, prepend the shift/goto list
for each shift symbol to the list already in the table.
We end up with the list of items in correct order for each shift/goto
symbol. We have kept the item lists in order, scanned the nonterminals from
back to front (=> that the items end up in ascending order), and never had any
duplicate items (each item is derived from only one nonterminal). *)
fun shifts (CORE (itemList,_)) =
let
(* mergeShiftItems: add an item list for a shift/goto symbol to the table *)
fun mergeShiftItems (args as ((k,l),r)) =
case Assoc.find(k,r)
of NONE => Assoc.insert args
| SOME old => Assoc.insert ((k,l@old),r)
(* mergeItems: add all items derived from a nonterminal to the table. We've
kept these items sorted by their shift/goto symbol (the first symbol on
their rhs) *)
fun mergeItems (n,r) =
Assoc.fold mergeShiftItems (ntShifts n) r
(* nonterms: a list of nonterminals that are in a core after the
closure operation *)
val nonterms = closureNonterms itemList
(* now create a table which for each shift/goto symbol gives the sorted list
of closure items which would result from first taking all the closure items
and then sorting them by the shift/goto symbols *)
val newsets = List.foldr mergeItems Assoc.empty nonterms
(* finally prepare to insert the kernal items of a core *)
fun insertItem ((k,i),r) =
case (Assoc.find(k,r))
of NONE => Assoc.insert((k,[i]),r)
| SOME l => Assoc.insert((k,Core.insert(i,l)),r)
fun shiftCores(ITEM{rule,dot,rhsAfter=symbol::rest},r) =
insertItem((symbol,
ITEM{rule=rule,dot=dot+1,rhsAfter=rest}),r)
| shiftCores(_,r) = r
(* insert the kernal items of a core *)
val newsets = List.foldr shiftCores newsets itemList
in Assoc.make_list newsets
end
(* nontermEpsProds: returns a list of epsilon productions produced by a
nonterminal sorted by rule number. ** Depends on produces returning
an ordered list **. It does not alter the order in which the rules
were returned by produces; it only removes non-epsilon productions *)
val nontermEpsProds =
let val f = fn nt =>
List.foldr
(fn (rule as RULE {rhs=nil,...},results) => rule :: results
| (_,results) => results)
[] (produces nt)
in memoize f
end
(* epsProds: take a core and compute a list of epsilon productions for it
sorted by rule number. ** Depends on closureNonterms returning a list
of nonterminals sorted by nonterminal #, rule numbers increasing
monotonically with their lhs production #, and nontermEpsProds returning
an ordered item list for each production
*)
fun epsProds (CORE (itemList,state)) =
let val prods = map nontermEpsProds (closureNonterms itemList)
in List.concat prods
end
in {produces=produces,shifts=shifts,rules=rules,epsProds=epsProds}
end
end;

View File

@ -0,0 +1,101 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
structure Grammar : GRAMMAR =
struct
(* define types term and nonterm using those in LrTable
datatype term = T of int
datatype nonterm = NT of int *)
open LrTable
datatype symbol = TERM of term | NONTERM of nonterm
datatype grammar = GRAMMAR of
{rules: {lhs: nonterm,
rhs: symbol list,
precedence: int option,
rulenum: int} list,
noshift : term list,
eop : term list,
terms: int,
nonterms: int,
start : nonterm,
precedence : term -> int option,
termToString : term -> string,
nontermToString : nonterm -> string}
end;
structure IntGrammar : INTGRAMMAR =
struct
structure Grammar = Grammar
open Grammar
datatype rule = RULE of
{lhs: nonterm,
rhs: symbol list,
num: int,(* internal # assigned by coreutils *)
rulenum: int,
precedence: int option}
val eqTerm : term * term -> bool = (op =)
val gtTerm : term * term -> bool = fn (T i,T j) => i>j
val eqNonterm : nonterm * nonterm -> bool = (op =)
val gtNonterm : nonterm * nonterm -> bool =
fn (NT i,NT j) => i>j
val eqSymbol : symbol * symbol -> bool = (op =)
val gtSymbol = fn (TERM (T i),TERM (T j)) => i>j
| (NONTERM (NT i),NONTERM (NT j)) => i>j
| (TERM _,NONTERM _) => false
| (NONTERM _,TERM _) => true
structure SymbolAssoc = Table(type key = symbol
val gt = gtSymbol)
structure NontermAssoc = Table(type key = nonterm
val gt = gtNonterm)
val DEBUG = false
val prRule = fn (a as symbolToString,nontermToString,print) =>
let val printSymbol = print o symbolToString
fun printRhs (h::t) = (printSymbol h; print " ";
printRhs t)
| printRhs nil = ()
in fn (RULE {lhs,rhs,num,rulenum,precedence,...}) =>
((print o nontermToString) lhs; print " : ";
printRhs rhs;
if DEBUG then (print " num = ";
print (Int.toString num);
print " rulenum = ";
print (Int.toString rulenum);
print " precedence = ";
case precedence
of NONE => print " none"
| (SOME i) =>
print (Int.toString i);
())
else ())
end
val prGrammar =
fn (a as (symbolToString,nontermToString,print)) =>
fn (GRAMMAR {rules,terms,nonterms,start,...}) =>
let val printRule =
let val prRule = prRule a
in fn {lhs,rhs,precedence,rulenum} =>
(prRule (RULE {lhs=lhs,rhs=rhs,num=0,
rulenum=rulenum, precedence=precedence});
print "\n")
end
in print "grammar = \n";
List.app printRule rules;
print "\n";
print (" terms = " ^ (Int.toString terms) ^
" nonterms = " ^ (Int.toString nonterms) ^
" start = ");
(print o nontermToString) start;
()
end
end;

View File

@ -0,0 +1,99 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkGraph(structure IntGrammar : INTGRAMMAR
structure Core : CORE
structure CoreUtils : CORE_UTILS
sharing IntGrammar = Core.IntGrammar = CoreUtils.IntGrammar
sharing CoreUtils.Core = Core
) : LRGRAPH =
struct
val sub = Array.sub
infix 9 sub
structure Core = Core
structure Grammar = IntGrammar.Grammar
structure IntGrammar = IntGrammar
open Core Core.Grammar CoreUtils IntGrammar
structure NodeSet = RbOrdSet
(struct
type elem = core
val eq = eqCore
val gt = gtCore
end)
open NodeSet
exception Shift of int * symbol
type graph = {edges: {edge:symbol,to:core} list array,
nodes: core list,nodeArray : core array}
val edges = fn (CORE (_,i),{edges,...}:graph) => edges sub i
val nodes = fn ({nodes,...} : graph) => nodes
val shift = fn ({edges,nodes,...} : graph) => fn a as (i,sym) =>
let fun find nil = raise (Shift a)
| find ({edge,to=CORE (_,state)} :: r) =
if gtSymbol(sym,edge) then find r
else if eqSymbol(edge,sym) then state
else raise (Shift a)
in find (edges sub i)
end
val core = fn ({nodeArray,...} : graph) =>
fn i => nodeArray sub i
val mkGraph = fn (g as (GRAMMAR {start,...})) =>
let val {shifts,produces,rules,epsProds} =
CoreUtils.mkFuncs g
fun add_goto ((symbol,a),(nodes,edges,future,num)) =
case find(CORE (a,0),nodes)
of NONE =>
let val core =CORE (a,num)
val edge = {edge=symbol,to=core}
in (insert(core,nodes),edge::edges,
core::future,num+1)
end
| (SOME c) =>
let val edge={edge=symbol,to=c}
in (nodes,edge::edges,future,num)
end
fun f (nodes,node_list,edge_list,nil,nil,num) =
let val nodes=rev node_list
in {nodes=nodes,
edges=Array.fromList (rev edge_list),
nodeArray = Array.fromList nodes
}
end
| f (nodes,node_list,edge_list,nil,y,num) =
f (nodes,node_list,edge_list,rev y,nil,num)
| f (nodes,node_list,edge_list,h::t,y,num) =
let val (nodes,edges,future,num) =
List.foldr add_goto (nodes,[],y,num) (shifts h)
in f (nodes,h::node_list,
edges::edge_list,t,future,num)
end
in {graph=
let val makeItem = fn (r as (RULE {rhs,...})) =>
ITEM{rule=r,dot=0,rhsAfter=rhs}
val initialItemList = map makeItem (produces start)
val orderedItemList =
List.foldr Core.insert [] initialItemList
val initial = CORE (orderedItemList,0)
in f(empty,nil,nil,[initial],nil,1)
end,
produces=produces,
rules=rules,
epsProds=epsProds}
end
val prGraph = fn a as (nontermToString,termToString,print) => fn g =>
let val printCore = prCore a
val printSymbol = print o nontermToString
val nodes = nodes g
val printEdges = fn n =>
List.app (fn {edge,to=CORE (_,state)} =>
(print "\tshift on ";
printSymbol edge;
print " to ";
print (Int.toString state);
print "\n")) (edges (n,g))
in List.app (fn c => (printCore c; print "\n"; printEdges c)) nodes
end
end;

110
mlyacc-polyml/src/hdr.sml Normal file
View File

@ -0,0 +1,110 @@
(* Modified by Vesa Karvonen on 2007-12-18.
* Create line directives in output.
*)
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor HeaderFun () : HEADER =
struct
val DEBUG = true
type pos = {line : int, col : int}
val pos = {line = ref 1, start = ref 0}
val text = ref (nil: string list)
type inputSource = {name : string,
errStream : TextIO.outstream,
inStream : TextIO.instream,
errorOccurred : bool ref}
val newSource =
fn (s : string,i : TextIO.instream ,errs : TextIO.outstream) =>
{name=s,errStream=errs,inStream=i,
errorOccurred = ref false}
val errorOccurred = fn (s : inputSource) =>fn () => !(#errorOccurred s)
val pr = fn out : TextIO.outstream => fn s : string => TextIO.output(out,s)
val error = fn {name,errStream, errorOccurred,...} : inputSource =>
let val pr = pr errStream
in fn l : pos => fn msg : string =>
(pr name; pr ", line "; pr (Int.toString (#line l)); pr ": Error: ";
pr msg; pr "\n"; errorOccurred := true)
end
val warn = fn {name,errStream, errorOccurred,...} : inputSource =>
let val pr = pr errStream
in fn l : pos => fn msg : string =>
(pr name; pr ", line "; pr (Int.toString (#line l)); pr ": Warning: ";
pr msg; pr "\n")
end
datatype prec = LEFT | RIGHT | NONASSOC
datatype symbol = SYMBOL of string * pos
val symbolName = fn SYMBOL(s,_) => s
val symbolPos = fn SYMBOL(_,p) => p
val symbolMake = fn sp => SYMBOL sp
type ty = string
val tyName = fn i => i
val tyMake = fn i => i
datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol |
FUNCTOR of string | START_SYM of symbol |
NSHIFT of symbol list | POS of string | PURE |
PARSE_ARG of string * string |
TOKEN_SIG_INFO of string
datatype declData = DECL of
{eop : symbol list,
keyword : symbol list,
nonterm : (symbol*ty option) list option,
prec : (prec * (symbol list)) list,
change: (symbol list * symbol list) list,
term : (symbol* ty option) list option,
control : control list,
value : (symbol * string) list}
type rhsData = {rhs:symbol list,code:string, prec:symbol option} list
datatype rule = RULE of {lhs : symbol, rhs : symbol list,
code : {text : string, pos : pos},
prec : symbol option}
type parseResult = string * declData * rule list
val getResult = fn p => p
fun join_decls
(DECL {eop=e,control=c,keyword=k,nonterm=n,prec,
change=su,term=t,value=v}:declData,
DECL {eop=e',control=c',keyword=k',nonterm=n',prec=prec',
change=su',term=t',value=v'} : declData,
inputSource,pos) =
let val ignore = fn s =>
(warn inputSource pos ("ignoring duplicate " ^ s ^
" declaration"))
val join = fn (e,NONE,NONE) => NONE
| (e,NONE,a) => a
| (e,a,NONE) => a
| (e,a,b) => (ignore e; a)
fun mergeControl (nil,a) = [a]
| mergeControl (l as h::t,a) =
case (h,a)
of (PARSER_NAME _,PARSER_NAME n1) => (ignore "%name"; l)
| (FUNCTOR _,FUNCTOR _) => (ignore "%header"; l)
| (PARSE_ARG _,PARSE_ARG _) => (ignore "%arg"; l)
| (START_SYM _,START_SYM s) => (ignore "%start"; l)
| (POS _,POS _) => (ignore "%pos"; l)
| (TOKEN_SIG_INFO _, TOKEN_SIG_INFO _)
=> (ignore "%token_sig_info"; l)
| (NSHIFT a,NSHIFT b) => (NSHIFT (a@b)::t)
| _ => h :: mergeControl(t,a)
fun loop (nil,r) = r
| loop (h::t,r) = loop(t,mergeControl(r,h))
in DECL {eop=e@e',control=loop(c',c),keyword=k'@k,
nonterm=join("%nonterm",n,n'), prec=prec@prec',
change=su@su', term=join("%term",t,t'),value=v@v'} :
declData
end
end;
structure Header = HeaderFun();

464
mlyacc-polyml/src/lalr.sml Normal file
View File

@ -0,0 +1,464 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkLalr ( structure IntGrammar : INTGRAMMAR
structure Core : CORE
structure Graph : LRGRAPH
structure Look: LOOK
sharing Graph.Core = Core
sharing Graph.IntGrammar = Core.IntGrammar =
Look.IntGrammar = IntGrammar) : LALR_GRAPH =
struct
val sub = Array.sub
infix 9 sub
open IntGrammar.Grammar IntGrammar Core Graph Look
structure Graph = Graph
structure Core = Core
structure Grammar = IntGrammar.Grammar
structure IntGrammar = IntGrammar
datatype tmpcore = TMPCORE of (item * term list ref) list * int
datatype lcore = LCORE of (item * term list) list * int
val prLcore =
fn a as (SymbolToString,nontermToString,termToString,print) =>
let val printItem = prItem (SymbolToString,nontermToString,print)
val printLookahead = prLook(termToString,print)
in fn (LCORE (items,state)) =>
(print "\n";
print "state ";
print (Int.toString state);
print " :\n\n";
List.app (fn (item,lookahead) =>
(print "{";
printItem item;
print ",";
printLookahead lookahead;
print "}\n")) items)
end
exception Lalr of int
structure ItemList = ListOrdSet
(struct
type elem = item * term list ref
val eq = fn ((a,_),(b,_)) => eqItem(a,b)
val gt = fn ((a,_),(b,_)) => gtItem(a,b)
end)
structure NontermSet = ListOrdSet
(struct
type elem = nonterm
val gt = gtNonterm
val eq = eqNonterm
end)
(* NTL: nonterms with lookahead *)
structure NTL = RbOrdSet
(struct
type elem = nonterm * term list
val gt = fn ((i,_),(j,_)) => gtNonterm(i,j)
val eq = fn ((i,_),(j,_)) => eqNonterm(i,j)
end)
val DEBUG = false
val addLookahead = fn {graph,nullable,first,eop,
rules,produces,nonterms,epsProds,
print,termToString,nontermToString} =>
let
val eop = Look.make_set eop
val symbolToString = fn (TERM t) => termToString t
| (NONTERM t) => nontermToString t
val print = if DEBUG then print
else fn _ => ()
val prLook = if DEBUG then prLook (termToString,print)
else fn _ => ()
val prNonterm = print o nontermToString
val prRule = if DEBUG
then prRule(symbolToString,nontermToString,print)
else fn _ => ()
val printInt = print o (Int.toString : int -> string)
val printItem = prItem(symbolToString,nontermToString,print)
(* look_pos: position in the rhs of a rule at which we should start placing
lookahead ref cells, i.e. the minimum place at which A -> x .B y, where
B is a nonterminal and y =*=> epsilon, or A -> x. is true. Positions are
given by the number of symbols before the place. The place before the first
symbol is 0, etc. *)
val look_pos =
let val positions = Array.array(length rules,0)
(* rule_pos: calculate place in the rhs of a rule at which we should start
placing lookahead ref cells *)
fun rule_pos (RULE {rhs,...}) =
case (rev rhs) of
nil => 0
| (TERM t) :: r => length rhs
| (NONTERM n :: r) => let
(* f assumes that everything after n in the
* rule has proven to be nullable so far.
* Remember that the rhs has been reversed,
* implying that this is true initially *)
(* A -> .z t B y, where y is nullable *)
fun f (b, (r as (TERM _ :: _))) = length r
(* A -> .z B C y *)
| f (c, (NONTERM b :: r)) =
if nullable c then f (b, r)
else length r + 1
(* A -> .B y, where y is nullable *)
| f (_, []) = 0
in f (n, r)
end
val check_rule = fn (rule as RULE {num,...}) =>
let val pos = rule_pos rule
in (print "look_pos: ";
prRule rule;
print " = ";
printInt pos;
print "\n";
Array.update(positions,num,rule_pos rule))
end
in app check_rule rules;
fn RULE{num,...} => (positions sub num)
end
(* rest_is_null: true for items of the form A -> x .B y, where y is nullable *)
val rest_is_null =
fn (ITEM{rule,dot, rhsAfter=NONTERM _ :: _}) =>
dot >= (look_pos rule)
| _ => false
(* map core to a new core including only items of the form A -> x. or
A -> x. B y, where y =*=> epsilon. It also adds epsilon productions to the
core. Each item is given a ref cell to hold the lookahead nonterminals for
it.*)
val map_core =
let val f = fn (item as ITEM {rhsAfter=nil,...},r) =>
(item,ref nil) :: r
| (item,r) =>
if (rest_is_null item)
then (item,ref nil)::r
else r
in fn (c as CORE (items,state)) =>
let val epsItems =
map (fn rule=>(ITEM{rule=rule,dot=0,rhsAfter=nil},
ref (nil : term list))
) (epsProds c)
in TMPCORE(ItemList.union(List.foldr f [] items,epsItems),state)
end
end
val new_nodes = map map_core (nodes graph)
exception Find
(* findRef: state * item -> lookahead ref cell for item *)
val findRef =
let val states = Array.fromList new_nodes
val dummy = ref nil
in fn (state,item) =>
let val TMPCORE (l,_) = states sub state
in case ItemList.find((item,dummy),l)
of SOME (_,look_ref) => look_ref
| NONE => (print "find failed: state ";
printInt state;
print "\nitem =\n";
printItem item;
print "\nactual items =\n";
app (fn (i,_) => (printItem i;
print "\n")) l;
raise Find)
end
end
(* findRuleRefs: state -> rule -> lookahead refs for rule. *)
val findRuleRefs =
let val shift = shift graph
in fn state =>
(* handle epsilon productions *)
fn (rule as RULE {rhs=nil,...}) =>
[findRef(state,ITEM{rule=rule,dot=0,rhsAfter=nil})]
| (rule as RULE {rhs=sym::rest,...}) =>
let val pos = Int.max(look_pos rule,1)
fun scan'(state,nil,pos,result) =
findRef(state,ITEM{rule=rule,
dot=pos,
rhsAfter=nil}) :: result
| scan'(state,rhs as sym::rest,pos,result) =
scan'(shift(state,sym), rest, pos+1,
findRef(state,ITEM{rule=rule,
dot=pos,
rhsAfter=rhs})::result)
(* find first item of the form A -> x .B y, where y =*=> epsilon and
x is not epsilon, or A -> x. use scan' to pick up all refs after this
point *)
fun scan(state,nil,_) =
[findRef(state,ITEM{rule=rule,dot=pos,rhsAfter=nil})]
| scan(state,rhs,0) = scan'(state,rhs,pos,nil)
| scan(state,sym::rest,place) =
scan(shift(state,sym),rest,place-1)
in scan(shift(state,sym),rest,pos-1)
end
end
(* function to compute for some nonterminal n the set of nonterminals A added
through the closure of nonterminal n such that n =c*=> .A x, where x is
nullable *)
val nonterms_w_null = fn nt =>
let val collect_nonterms = fn n =>
List.foldr (fn (rule as RULE {rhs=rhs as NONTERM n :: _,...},r) =>
(case
(rest_is_null(ITEM {dot=0,rhsAfter=rhs,rule=rule}))
of true => n :: r
| false => r)
| (_,r) => r) [] (produces n)
fun dfs(a as (n,r)) =
if (NontermSet.exists a) then r
else List.foldr dfs (NontermSet.insert(n,r))
(collect_nonterms n)
in dfs(nt,NontermSet.empty)
end
val nonterms_w_null =
let val data = Array.array(nonterms,NontermSet.empty)
fun f n = if n=nonterms then ()
else (Array.update(data,n,nonterms_w_null (NT n));
f (n+1))
in (f 0; fn (NT nt) => data sub nt)
end
(* look_info: for some nonterminal n the set of nonterms A added
through the closure of the nonterminal such that n =c+=> .Ax and the
lookahead accumlated for each nonterm A *)
val look_info = fn nt =>
let val collect_nonterms = fn n =>
List.foldr (fn (RULE {rhs=NONTERM n :: t,...},r) =>
(case NTL.find ((n,nil),r)
of SOME (key,data) =>
NTL.insert((n,Look.union(data,first t)),r)
| NONE => NTL.insert ((n,first t),r))
| (_,r) => r)
NTL.empty (produces n)
fun dfs(a as ((key1,data1),r)) =
case (NTL.find a)
of SOME (_,data2) =>
NTL.insert((key1,Look.union(data1,data2)),r)
| NONE => NTL.fold dfs (collect_nonterms key1)
(NTL.insert a)
in dfs((nt,nil),NTL.empty)
end
val look_info =
if not DEBUG then look_info
else fn nt =>
(print "look_info of "; prNonterm nt; print "=\n";
let val info = look_info nt
in (NTL.app (fn (nt,lookahead) =>
(prNonterm nt; print ": "; prLook lookahead;
print "\n\n")) info;
info)
end)
(* prop_look: propagate lookaheads for nonterms added in the closure of a
nonterm. Lookaheads must be propagated from each nonterminal m to
all nonterminals { n | m =c+=> nx, where x=*=>epsilon} *)
val prop_look = fn ntl =>
let val upd_lookhd = fn new_look => fn (nt,r) =>
case NTL.find ((nt,new_look),r)
of SOME (_,old_look) =>
NTL.insert((nt, Look.union(new_look,old_look)),r)
| NONE => raise (Lalr 241)
val upd_nonterm = fn ((nt,look),r) =>
NontermSet.fold (upd_lookhd look)
(nonterms_w_null nt) r
in NTL.fold upd_nonterm ntl ntl
end
val prop_look =
if not DEBUG then prop_look
else fn ntl =>
(print "prop_look =\n";
let val info = prop_look ntl
in (NTL.app (fn (nt,lookahead) =>
(prNonterm nt;
print ": ";
prLook lookahead;
print "\n\n")) info; info)
end)
(* now put the information from these functions together. Create a function
which takes a nonterminal n and returns a list of triplets of
(a nonterm added through closure,
the lookahead for the nonterm,
whether the nonterm should include the lookahead for the nonterminal
whose closure is being taken (i.e. first(y) for an item j of the
form A -> x .n y and lookahead(j) if y =*=> epsilon)
*)
val closure_nonterms =
let val data =
Array.array(nonterms,nil: (nonterm * term list * bool) list)
val do_nonterm = fn i =>
let val nonterms_followed_by_null =
nonterms_w_null i
val nonterms_added_through_closure =
NTL.make_list (prop_look (look_info i))
val result =
map (fn (nt,l) =>
(nt,l,NontermSet.exists (nt,nonterms_followed_by_null))
) nonterms_added_through_closure
in if DEBUG then
(print "closure_nonterms = ";
prNonterm i;
print "\n";
app (fn (nt,look,nullable) =>
(prNonterm nt;
print ":";
prLook look;
case nullable
of false => print "(false)\n"
| true => print "(true)\n")) result;
print "\n")
else ();
result
end
fun f i =
if i=nonterms then ()
else (Array.update(data,i,do_nonterm (NT i)); f (i+1))
val _ = f 0
in fn (NT i) => data sub i
end
(* add_nonterm_lookahead: Add lookahead to all completion items for rules added
when the closure of a given nonterm in some state is taken. It returns
a list of lookahead refs to which the given nonterm's lookahead should
be propagated. For each rule, it must trace the shift/gotos in the LR(0)
graph to find all items of the form A-> x .B y where y =*=> epsilon or
A -> x.
*)
val add_nonterm_lookahead = fn (nt,state) =>
let val f = fn ((nt,lookahead,nullable),r) =>
let val refs = map (findRuleRefs state) (produces nt)
val refs = List.concat refs
val _ = app (fn r =>
r := (Look.union (!r,lookahead))) refs
in if nullable then refs @ r else r
end
in List.foldr f [] (closure_nonterms nt)
end
(* scan_core: Scan a core for all items of the form A -> x .B y. Applies
add_nonterm_lookahead to each such B, and then merges first(y) into
the list of refs returned by add_nonterm_lookahead. It returns
a list of ref * ref list for all the items where y =*=> epsilon *)
val scan_core = fn (CORE (l,state)) =>
let fun f ((item as ITEM{rhsAfter= NONTERM b :: y,
dot,rule})::t,r) =
(case (add_nonterm_lookahead(b,state))
of nil => r
| l =>
let val first_y = first y
val newr = if dot >= (look_pos rule)
then (findRef(state,item),l)::r
else r
in (app (fn r =>
r := Look.union(!r,first_y)) l;
f (t,newr))
end)
| f (_ :: t,r) = f (t,r)
| f (nil,r) = r
in f (l,nil)
end
(* add end-of-parse symbols to set of items consisting of all items
immediately derived from the start symbol *)
val add_eop = fn (c as CORE (l,state),eop) =>
let fun f (item as ITEM {rule,dot,...}) =
let val refs = findRuleRefs state rule
in
(* first take care of kernal items. Add the end-of-parse symbols to
the lookahead sets for these items. Epsilon productions of the
start symbol do not need to be handled specially because they will
be in the kernal also *)
app (fn r => r := Look.union(!r,eop)) refs;
(* now take care of closure items. These are all nonterminals C which
have a derivation S =+=> .C x, where x is nullable *)
if dot >= (look_pos rule) then
case item
of ITEM{rhsAfter=NONTERM b :: _,...} =>
(case add_nonterm_lookahead(b,state)
of nil => ()
| l => app (fn r => r := Look.union(!r,eop)) l)
| _ => ()
else ()
end
in app f l
end
val iterate = fn l =>
let fun f lookahead (nil,done) = done
| f lookahead (h::t,done) =
let val old = !h
in h := Look.union (old,lookahead);
if (length (!h)) <> (length old)
then f lookahead (t,false)
else f lookahead(t,done)
end
fun g ((from,to)::rest,done) =
let val new_done = f (!from) (to,done)
in g (rest,new_done)
end
| g (nil,done) = done
fun loop true = ()
| loop false = loop (g (l,true))
in loop false
end
val lookahead = List.concat (map scan_core (nodes graph))
(* used to scan the item list of a TMPCORE and remove the items not
being reduced *)
val create_lcore_list =
fn ((item as ITEM {rhsAfter=nil,...},ref l),r) =>
(item,l) :: r
| (_,r) => r
in add_eop(Graph.core graph 0,eop);
iterate lookahead;
map (fn (TMPCORE (l,state)) =>
LCORE (List.foldr create_lcore_list [] l, state)) new_nodes
end
end;

View File

@ -0,0 +1,42 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
local
(* create parser *)
structure LrVals = MlyaccLrValsFun(structure Token = LrParser.Token
structure Hdr = Header)
structure Lex = LexMLYACC(structure Tokens = LrVals.Tokens
structure Hdr = Header)
structure Parser = JoinWithArg(structure Lex=Lex
structure ParserData = LrVals.ParserData
structure LrParser= LrParser)
structure ParseGenParser =
ParseGenParserFun(structure Parser = Parser
structure Header = Header)
(* create structure for computing LALR table from a grammar *)
structure MakeLrTable = mkMakeLrTable(structure IntGrammar =IntGrammar
structure LrTable = LrTable)
(* create structures for printing LALR tables:
Verbose prints a verbose description of an lalr table
PrintStruct prints an ML structure representing that is an lalr table *)
structure Verbose = mkVerbose(structure Errs = MakeLrTable.Errs)
structure PrintStruct =
mkPrintStruct(structure LrTable = MakeLrTable.LrTable
structure ShrinkLrTable =
ShrinkLrTableFun(structure LrTable=LrTable))
in
(* returns function which takes a file name, invokes the parser on the file,
does semantic checks, creates table, and prints it *)
structure ParseGen = ParseGenFun(structure ParseGenParser = ParseGenParser
structure MakeTable = MakeLrTable
structure Verbose = Verbose
structure PrintStruct = PrintStruct
structure Absyn = Absyn)
end

View File

@ -0,0 +1,23 @@
use "utils.sig";
use "utils.sml";
use "sigs.sml";
use "hdr.sml";
use "yacc.grm.sig";
use "yacc.grm.sml";
use "yacc.lex.sml";
use "parse.sml";
use "grammar.sml";
use "core.sml";
use "coreutils.sml";
use "graph.sml";
use "look.sml";
use "lalr.sml";
use "mklrtable.sml";
use "mkprstruct.sml";
use "shrink.sml";
use "verbose.sml";
use "absyn.sig";
use "absyn.sml";
use "yacc.sml";
use "link.sml";
use "main.sml";

161
mlyacc-polyml/src/look.sml Normal file
View File

@ -0,0 +1,161 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkLook (structure IntGrammar : INTGRAMMAR) : LOOK =
struct
val sub = Array.sub
infix 9 sub
structure Grammar = IntGrammar.Grammar
structure IntGrammar = IntGrammar
open Grammar IntGrammar
structure TermSet = ListOrdSet
(struct
type elem = term
val eq = eqTerm
val gt = gtTerm
end)
val union = TermSet.union
val make_set = TermSet.make_set
val prLook = fn (termToString,print) =>
let val printTerm = print o termToString
fun f nil = print " "
| f (a :: b) = (printTerm a; print " "; f b)
in f
end
structure NontermSet = ListOrdSet
(struct
type elem = nonterm
val eq = eqNonterm
val gt = gtNonterm
end)
val mkFuncs = fn {rules : rule list, nonterms : int,
produces : nonterm -> rule list} =>
let
(* nullable: create a function which tells if a nonterminal is nullable
or not.
Method: Keep an array of booleans. The nth entry is true if
NT i is nullable. If is false if we don't know whether NT i
is nullable.
Keep a list of rules whose remaining rhs we must prove to be
null. First, scan the list of rules and remove those rules
whose rhs contains a terminal. These rules are not nullable.
Now iterate through the rules that were left:
(1) if there is no remaining rhs we have proved that
the rule is nullable, mark the nonterminal for the
rule as nullable
(2) if the first element of the remaining rhs is
nullable, place the rule back on the list with
the rest of the rhs
(3) if we don't know whether the nonterminal is nullable,
place it back on the list
(4) repeat until the list does not change.
We have found all the possible nullable rules.
*)
val nullable = let
fun add_rule (RULE { lhs, rhs, ... }, r) = let
fun addNT (TERM _, _) = NONE
| addNT (_, NONE) = NONE
| addNT (NONTERM (NT i), SOME ntlist) = SOME (i :: ntlist)
in
case foldr addNT (SOME []) rhs of
NONE => r
| SOME ntlist => (lhs, ntlist) :: r
end
val items = List.foldr add_rule [] rules
val nullable = Array.array(nonterms,false)
fun f ((NT i,nil),(l,_)) = (Array.update(nullable,i,true);
(l,true))
| f (a as (lhs,(h::t)),(l,change)) =
(case (nullable sub h) of
false => (a::l,change)
| true => ((lhs,t)::l,true))
fun prove(l,true) = prove(List.foldr f (nil,false) l)
| prove(_,false) = ()
in (prove(items,true); fn (NT i) => nullable sub i)
end
(* scanRhs : look at a list of symbols, scanning past nullable
nonterminals, applying addSymbol to the symbols scanned *)
fun scanRhs addSymbol =
let fun f (nil,result) = result
| f ((sym as NONTERM nt) :: rest,result) =
if nullable nt then f (rest,addSymbol(sym,result))
else addSymbol(sym,result)
| f ((sym as TERM _) :: _,result) = addSymbol(sym,result)
in f
end
(* accumulate: look at the start of the right-hand-sides of rules,
looking past nullable nonterminals, applying addObj to the visible
symbols. *)
fun accumulate(rules, empty, addObj) =
List.foldr (fn (RULE {rhs,...},r) =>(scanRhs addObj) (rhs,r)) empty rules
val nontermMemo = fn f =>
let val lookup = Array.array(nonterms,nil)
fun g i = if i=nonterms then ()
else (Array.update(lookup,i,f (NT i)); g (i+1))
in (g 0; fn (NT j) => lookup sub j)
end
(* first1: the FIRST set of a nonterminal in the grammar. Only looks
at other terminals, but it is clever enough to move past nullable
nonterminals at the start of a production. *)
fun first1 nt = accumulate(produces nt, TermSet.empty,
fn (TERM t, set) => TermSet.insert (t,set)
| (_, set) => set)
val first1 = nontermMemo(first1)
(* starters1: given a nonterminal "nt", return the set of nonterminals
which can start its productions. Looks past nullables, but doesn't
recurse *)
fun starters1 nt = accumulate(produces nt, nil,
fn (NONTERM nt, set) =>
NontermSet.insert(nt,set)
| (_, set) => set)
val starters1 = nontermMemo(starters1)
(* first: maps a nonterminal to its first-set. Get all the starters of
the nonterminal, get the first1 terminal set of each of these,
union the whole lot together *)
fun first nt =
List.foldr (fn (a,r) => TermSet.union(r,first1 a))
[] (NontermSet.closure (NontermSet.singleton nt, starters1))
val first = nontermMemo(first)
(* prefix: all possible terminals starting a symbol list *)
fun prefix symbols =
scanRhs (fn (TERM t,r) => TermSet.insert(t,r)
| (NONTERM nt,r) => TermSet.union(first nt,r))
(symbols,nil)
fun nullable_string ((TERM t) :: r) = false
| nullable_string ((NONTERM nt) :: r) =
(case (nullable nt)
of true => nullable_string r
| f => f)
| nullable_string nil = true
in {nullable = nullable, first = prefix}
end
end;

View File

@ -0,0 +1,29 @@
(* Copyright (C) 2020 Takayuki Goto.
*
* MLYacc-Poly/ML is imported from MLton.
* See the LICENSE file for details.
*)
structure Main =
struct
fun usage s =
raise Fail (concat[s, "\n", "Usage: ", CommandLine.name(), " ", "file.grm"])
fun main args =
case args
of [file] => ParseGen.parseGen file
| [] => usage "no file"
| _ => usage "too many files"
val main = fn () => (
main (CommandLine.arguments());
OS.Process.exit OS.Process.success
) handle Fail msg => (
print(concat["Fail: ", msg, "\n"]);
OS.Process.exit OS.Process.failure
) handle exn => (
print(concat[exnMessage exn, "\n"]);
OS.Process.exit OS.Process.failure
)
end

View File

@ -0,0 +1,388 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkMakeLrTable (structure IntGrammar : INTGRAMMAR
structure LrTable : LR_TABLE
sharing type LrTable.term = IntGrammar.Grammar.term
sharing type LrTable.nonterm = IntGrammar.Grammar.nonterm
) : MAKE_LR_TABLE =
struct
val sub = Array.sub
infix 9 sub
structure Core = mkCore(structure IntGrammar = IntGrammar)
structure CoreUtils = mkCoreUtils(structure IntGrammar = IntGrammar
structure Core = Core)
structure Graph = mkGraph(structure IntGrammar = IntGrammar
structure Core = Core
structure CoreUtils = CoreUtils)
structure Look = mkLook(structure IntGrammar = IntGrammar)
structure Lalr = mkLalr(structure IntGrammar = IntGrammar
structure Core = Core
structure Graph = Graph
structure Look = Look)
structure LrTable = LrTable
structure IntGrammar = IntGrammar
structure Grammar = IntGrammar.Grammar
structure GotoList = ListOrdSet
(struct
type elem = Grammar.nonterm * LrTable.state
val eq = fn ((Grammar.NT a,_),(Grammar.NT b,_)) => a=b
val gt = fn ((Grammar.NT a,_),(Grammar.NT b,_)) => a>b
end)
structure Errs : LR_ERRS =
struct
structure LrTable = LrTable
datatype err = RR of LrTable.term * LrTable.state * int * int
| SR of LrTable.term * LrTable.state * int
| NOT_REDUCED of int
| NS of LrTable.term * int
| START of int
val summary = fn l =>
let val numRR = ref 0
val numSR = ref 0
val numSTART = ref 0
val numNOT_REDUCED = ref 0
val numNS = ref 0
fun loop (h::t) =
(case h
of RR _ => numRR := !numRR+1
| SR _ => numSR := !numSR+1
| START _ => numSTART := !numSTART+1
| NOT_REDUCED _ => numNOT_REDUCED := !numNOT_REDUCED+1
| NS _ => numNS := !numNS+1; loop t)
| loop nil = {rr = !numRR, sr = !numSR,
start = !numSTART,
not_reduced = !numNOT_REDUCED,
nonshift = !numNS}
in loop l
end
val printSummary = fn say => fn l =>
let val {rr,sr,start,
not_reduced,nonshift} = summary l
val say_plural = fn (i,s) =>
(say (Int.toString i); say " ";
case i
of 1 => (say s)
| _ => (say s; say "s"))
val say_error = fn (args as (i,s)) =>
case i
of 0 => ()
| i => (say_plural args; say "\n")
in say_error(rr,"reduce/reduce conflict");
say_error(sr,"shift/reduce conflict");
if nonshift<>0 then
(say "non-shiftable terminal used on the rhs of ";
say_plural(start,"rule"); say "\n")
else ();
if start<>0 then (say "start symbol used on the rhs of ";
say_plural(start,"rule"); say "\n")
else ();
if not_reduced<>0 then (say_plural(not_reduced,"rule");
say " not reduced\n")
else ()
end
end
open IntGrammar Grammar Errs LrTable Core
(* rules for resolving conflicts:
shift/reduce:
If either the terminal or the rule has no
precedence, a shift/reduce conflict is reported.
A shift is chosen for the table.
If both have precedences, the action with the
higher precedence is chosen.
If the precedences are equal, neither the
shift nor the reduce is chosen.
reduce/reduce:
A reduce/reduce conflict is reported. The lowest
numbered rule is chosen for reduction.
*)
(* method for filling tables - first compute the reductions called for in a
state, then add the shifts for the state to this information.
How to compute the reductions:
A reduction initially is given as an item and a lookahead set calling
for reduction by that item. The first reduction is mapped to a list of
terminal * rule pairs. Each additional reduction is then merged into this
list and reduce/reduce conflicts are resolved according to the rule
given.
Missed Errors:
This method misses some reduce/reduce conflicts that exist because
some reductions are removed from the list before conflicting reductions
can be compared against them. All reduce/reduce conflicts, however,
can be generated given a list of the reduce/reduce conflicts generated
by this method.
This can be done by taking the transitive closure of the relation given
by the list. If reduce/reduce (a,b) and reduce/reduce (b,c) are true,
then reduce/reduce (a,c) is true. The relation is symmetric and transitive.
Adding shifts:
Finally scan the list merging in shifts and resolving conflicts
according to the rule given.
Missed Shift/Reduce Errors:
Some errors may be missed by this method because some reductions were
removed as the result of reduce/reduce conflicts. For a shift/reduce
conflict of term a, reduction by rule n, shift/reduce conficts exist
for all rules y such that reduce/reduce (x,y) or reduce/reduce (y,x)
is true.
*)
fun unREDUCE (REDUCE num) = num
| unREDUCE _ = raise Fail "bug: unexpected action (expected REDUCE)"
val mergeReduces =
let val merge = fn state =>
let fun f (j as (pair1 as (T t1,action1)) :: r1,
k as (pair2 as (T t2,action2)) :: r2,result,errs) =
if t1 < t2 then f(r1,k,pair1::result,errs)
else if t1 > t2 then f(j,r2,pair2::result,errs)
else let val num1 = unREDUCE action1
val num2 = unREDUCE action2
val errs = RR(T t1,state,num1,num2) :: errs
val action = if num1 < num2 then pair1 else pair2
in f(r1,r2,action::result,errs)
end
| f (nil,nil,result,errs) = (rev result,errs)
| f (pair1::r,nil,result,errs) = f(r,nil,pair1::result,errs)
| f (nil,pair2 :: r,result,errs) = f(nil,r,pair2::result,errs)
in f
end
in fn state => fn ((ITEM {rule=RULE {rulenum,...},...}, lookahead),
(reduces,errs)) =>
let val action = REDUCE rulenum
val actions = map (fn a=>(a,action)) lookahead
in case reduces
of nil => (actions,errs)
| _ => merge state (reduces,actions,nil,errs)
end
end
val computeActions = fn (rules,precedence,graph,defaultReductions) =>
let val rulePrec =
let val precData = Array.array(length rules,NONE : int option)
in app (fn RULE {rulenum=r,precedence=p,...} => Array.update(precData,r,p))
rules;
fn i => precData sub i
end
fun mergeShifts(state,shifts,nil) = (shifts,nil)
| mergeShifts(state,nil,reduces) = (reduces,nil)
| mergeShifts(state,shifts,reduces) =
let fun f(shifts as (pair1 as (T t1,_)) :: r1,
reduces as (pair2 as (T t2,action)) :: r2,
result,errs) =
if t1 < t2 then f(r1,reduces,pair1 :: result,errs)
else if t1 > t2 then f(shifts,r2,pair2 :: result,errs)
else let val rulenum = unREDUCE action
val (term1,_) = pair1
in case (precedence term1,rulePrec rulenum)
of (SOME i,SOME j) =>
if i>j then f(r1,r2,pair1 :: result,errs)
else if j>i then f(r1,r2,pair2 :: result,errs)
else f(r1,r2,(T t1, ERROR)::result,errs)
| (_,_) =>
f(r1,r2,pair1 :: result,
SR (term1,state,rulenum)::errs)
end
| f (nil,nil,result,errs) = (rev result,errs)
| f (nil,h::t,result,errs) =
f (nil,t,h::result,errs)
| f (h::t,nil,result,errs) =
f (t,nil,h::result,errs)
in f(shifts,reduces,nil,nil)
end
fun mapCore ({edge=symbol,to=CORE (_,state)}::r,shifts,gotos) =
(case symbol
of (TERM t) => mapCore (r,(t,SHIFT(STATE state))::shifts,gotos)
| (NONTERM nt) => mapCore(r,shifts,(nt,STATE state)::gotos)
)
| mapCore (nil,shifts,gotos) = (rev shifts,rev gotos)
fun pruneError ((_,ERROR)::rest) = pruneError rest
| pruneError (a::rest) = a :: pruneError rest
| pruneError nil = nil
in fn (Lalr.LCORE (reduceItems,state),c as CORE (shiftItems,state')) =>
if DEBUG andalso (state <> state') then
let exception MkTable in raise MkTable end
else
let val (shifts,gotos) = mapCore (Graph.edges(c,graph),nil,nil)
val tableState = STATE state
in case reduceItems
of nil => ((shifts,ERROR),gotos,nil)
| h :: nil =>
let val (ITEM {rule=RULE {rulenum,...},...}, l) = h
val (reduces,_) = mergeReduces tableState (h,(nil,nil))
val (actions,errs) = mergeShifts(tableState,
shifts,reduces)
val actions' = pruneError actions
val (actions,default) =
let fun hasReduce (nil,actions) =
(rev actions,REDUCE rulenum)
| hasReduce ((a as (_,SHIFT _)) :: r,actions) =
hasReduce(r,a::actions)
| hasReduce (_ :: r,actions) =
hasReduce(r,actions)
fun loop (nil,actions) = (rev actions,ERROR)
| loop ((a as (_,SHIFT _)) :: r,actions) =
loop(r,a::actions)
| loop ((a as (_,REDUCE _)) :: r,actions) =
hasReduce(r,actions)
| loop (_ :: r,actions) = loop(r,actions)
in if defaultReductions
andalso length actions = length actions'
then loop(actions,nil)
else (actions',ERROR)
end
in ((actions,default), gotos,errs)
end
| l =>
let val (reduces,errs1) =
List.foldr (mergeReduces tableState) (nil,nil) l
val (actions,errs2) =
mergeShifts(tableState,shifts,reduces)
in ((pruneError actions,ERROR),gotos,errs1@errs2)
end
end
end
val mkTable = fn (grammar as GRAMMAR{rules,terms,nonterms,start,
precedence,termToString,noshift,
nontermToString,eop},defaultReductions) =>
let val symbolToString = fn (TERM t) => termToString t
| (NONTERM nt) => nontermToString nt
val {rules,graph,produces,epsProds,...} = Graph.mkGraph grammar
val {nullable,first} =
Look.mkFuncs{rules=rules,produces=produces,nonterms=nonterms}
val lcores = Lalr.addLookahead
{graph=graph,
nullable=nullable,
produces=produces,
eop=eop,
nonterms=nonterms,
first=first,
rules=rules,
epsProds=epsProds,
print=(fn s=>TextIO.output(TextIO.stdOut,s)),
termToString = termToString,
nontermToString = nontermToString}
fun zip (h::t,h'::t') = (h,h') :: zip(t,t')
| zip (nil,nil) = nil
| zip _ = let exception MkTable in raise MkTable end
fun unzip l =
let fun f ((a,b,c)::r,j,k,l) = f(r,a::j,b::k,c::l)
| f (nil,j,k,l) = (rev j,rev k,rev l)
in f(l,nil,nil,nil)
end
val (actions,gotos,errs) =
let val doState =
computeActions(rules,precedence,graph,
defaultReductions)
in unzip (map doState (zip(lcores,Graph.nodes graph)))
end
(* add goto from state 0 to a new state. The new state
has accept actions for all of the end-of-parse symbols *)
val (actions,gotos,errs) =
case gotos
of nil => (actions,gotos,errs)
| h :: t =>
let val newStateActions =
(map (fn t => (t,ACCEPT)) (Look.make_set eop),ERROR)
val state0Goto =
GotoList.insert((start,STATE (length actions)),h)
in (actions @ [newStateActions],
state0Goto :: (t @ [nil]),
errs @ [nil])
end
val startErrs =
List.foldr (fn (RULE {rhs,rulenum,...},r) =>
if (List.exists (fn NONTERM a => a=start
| _ => false) rhs)
then START rulenum :: r
else r) [] rules
val nonshiftErrs =
List.foldr (fn (RULE {rhs,rulenum,...},r) =>
(List.foldr (fn (nonshift,r) =>
if (List.exists (fn TERM a => a=nonshift
| _ => false) rhs)
then NS(nonshift,rulenum) :: r
else r) r noshift)
) [] rules
val notReduced =
let val ruleReduced = Array.array(length rules,false)
val test = fn REDUCE i => Array.update(ruleReduced,i,true)
| _ => ()
val _ = app (fn (actions,default) =>
(app (fn (_,r) => test r) actions;
test default)
) actions;
fun scan (i,r) =
if i >= 0 then
scan(i-1, if ruleReduced sub i then r
else NOT_REDUCED i :: r)
else r
in scan(Array.length ruleReduced-1,nil)
end handle Subscript =>
(if DEBUG then
print "rules not numbered correctly!"
else (); nil)
val numstates = length actions
val allErrs = startErrs @ notReduced @ nonshiftErrs @
(List.concat errs)
fun convert_to_pairlist(nil : ('a * 'b) list): ('a,'b) pairlist =
EMPTY
| convert_to_pairlist ((a,b) :: r) =
PAIR(a,b,convert_to_pairlist r)
in (mkLrTable {actions=Array.fromList(map (fn (a,b) =>
(convert_to_pairlist a,b)) actions),
gotos=Array.fromList (map convert_to_pairlist gotos),
numRules=length rules,numStates=length actions,
initialState=STATE 0},
let val errArray = Array.fromList errs
in fn (STATE state) => errArray sub state
end,
fn print =>
let val printCore =
prCore(symbolToString,nontermToString,print)
val core = Graph.core graph
in fn STATE state =>
printCore (if state=(numstates-1) then
Core.CORE (nil,state)
else (core state))
end,
allErrs)
end
end;

View File

@ -0,0 +1,196 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor mkPrintStruct(structure LrTable : LR_TABLE
structure ShrinkLrTable : SHRINK_LR_TABLE
sharing LrTable = ShrinkLrTable.LrTable):PRINT_STRUCT =
struct
val sub = Array.sub
infix 9 sub
structure LrTable = LrTable
open ShrinkLrTable LrTable
(* lineLength = approximately the largest number of characters to allow
on a line when printing out an encode string *)
val lineLength = 72
(* maxLength = length of a table entry. All table entries are encoded
using two 16-bit integers, one for the terminal number and the other
for the entry. Each integer is printed as two characters (low byte,
high byte), using the ML ascii escape sequence. We need 4
characters for each escape sequence and 16 characters for each entry
*)
val maxLength = 16
(* number of entries we can fit on a row *)
val numEntries = lineLength div maxLength
(* convert integer between 0 and 255 to the three character ascii
decimal escape sequence for it *)
val chr =
let val lookup = Array.array(256,"\000")
val intToString = fn i =>
if i>=100 then "\\" ^ (Int.toString i)
else if i>=10 then "\\0" ^ (Int.toString i)
else "\\00" ^ (Int.toString i)
fun loop n = if n=256 then ()
else (Array.update(lookup,n,intToString n); loop (n+1))
in loop 0; fn i => lookup sub i
end
val makeStruct = fn {table,name,print,verbose} =>
let
val states = numStates table
val rules = numRules table
fun printPairList (prEntry : 'a * 'b -> unit) l =
let fun f (EMPTY,_) = ()
| f (PAIR(a,b,r),count) =
if count >= numEntries then
(print "\\\n\\"; prEntry(a,b); f(r,1))
else (prEntry(a,b); f(r,(count+1)))
in f(l,0)
end
val printList : ('a -> unit) -> 'a list -> unit =
fn prEntry => fn l =>
let fun f (nil,_) = ()
| f (a :: r,count) =
if count >= numEntries then
(print "\\\n\\"; prEntry a; f(r,1))
else (prEntry a; f(r,count+1))
in f(l,0)
end
val prEnd = fn _ => print "\\000\\000\\\n\\"
fun printPairRow prEntry =
let val printEntries = printPairList prEntry
in fn l => (printEntries l; prEnd())
end
fun printPairRowWithDefault (prEntry,prDefault) =
let val f = printPairRow prEntry
in fn (l,default) => (prDefault default; f l)
end
fun printTable (printRow,count) =
(print "\"\\\n\\";
let fun f i = if i=count then ()
else (printRow i; f (i+1))
in f 0
end;
print"\"\n")
val printChar = print o chr
(* print an integer between 0 and 2^16-1 as a 2-byte character,
with the low byte first *)
val printInt = fn i => (printChar (i mod 256);
printChar (i div 256))
(* encode actions as integers:
ACCEPT => 0
ERROR => 1
SHIFT i => 2 + i
REDUCE rulenum => numstates+2+rulenum
*)
val printAction =
fn (REDUCE rulenum) => printInt (rulenum+states+2)
| (SHIFT (STATE i)) => printInt (i+2)
| ACCEPT => printInt 0
| ERROR => printInt 1
val printTermAction = fn (T t,action) =>
(printInt (t+1); printAction action)
val printGoto = fn (NT n,STATE s) => (printInt (n+1); printInt s)
val ((rowCount,rowNumbers,actionRows),entries)=
shrinkActionList(table,verbose)
val getActionRow = let val a = Array.fromList actionRows
in fn i => a sub i
end
val printGotoRow : int -> unit =
let val f = printPairRow printGoto
val g = describeGoto table
in fn i => f (g (STATE i))
end
val printActionRow =
let val f = printPairRowWithDefault(printTermAction,printAction)
in fn i => f (getActionRow i)
end
in print "val ";
print name;
print "=";
print "let val actionRows =\n";
printTable(printActionRow,rowCount);
print "val actionRowNumbers =\n\"";
printList (fn i => printInt i) rowNumbers;
print "\"\n";
print "val gotoT =\n";
printTable(printGotoRow,states);
print "val numstates = ";
print (Int.toString states);
print "\nval numrules = ";
print (Int.toString rules);
print "\n\
\val s = ref \"\" and index = ref 0\n\
\val string_to_int = fn () => \n\
\let val i = !index\n\
\in index := i+2; Char.ord(String.sub(!s,i)) + Char.ord(String.sub(!s,i+1)) * 256\n\
\end\n\
\val string_to_list = fn s' =>\n\
\ let val len = String.size s'\n\
\ fun f () =\n\
\ if !index < len then string_to_int() :: f()\n\
\ else nil\n\
\ in index := 0; s := s'; f ()\n\
\ end\n\
\val string_to_pairlist = fn (conv_key,conv_entry) =>\n\
\ let fun f () =\n\
\ case string_to_int()\n\
\ of 0 => EMPTY\n\
\ | n => PAIR(conv_key (n-1),conv_entry (string_to_int()),f())\n\
\ in f\n\
\ end\n\
\val string_to_pairlist_default = fn (conv_key,conv_entry) =>\n\
\ let val conv_row = string_to_pairlist(conv_key,conv_entry)\n\
\ in fn () =>\n\
\ let val default = conv_entry(string_to_int())\n\
\ val row = conv_row()\n\
\ in (row,default)\n\
\ end\n\
\ end\n\
\val string_to_table = fn (convert_row,s') =>\n\
\ let val len = String.size s'\n\
\ fun f ()=\n\
\ if !index < len then convert_row() :: f()\n\
\ else nil\n\
\ in (s := s'; index := 0; f ())\n\
\ end\n\
\local\n\
\ val memo = Array.array(numstates+numrules,ERROR)\n\
\ val _ =let fun g i=(Array.update(memo,i,REDUCE(i-numstates)); g(i+1))\n\
\ fun f i =\n\
\ if i=numstates then g i\n\
\ else (Array.update(memo,i,SHIFT (STATE i)); f (i+1))\n\
\ in f 0 handle General.Subscript => ()\n\
\ end\n\
\in\n\
\val entry_to_action = fn 0 => ACCEPT | 1 => ERROR | j => Array.sub(memo,(j-2))\n\
\end\n\
\val gotoT=Array.fromList(string_to_table(string_to_pairlist(NT,STATE),gotoT))\n\
\val actionRows=string_to_table(string_to_pairlist_default(T,entry_to_action),actionRows)\n\
\val actionRowNumbers = string_to_list actionRowNumbers\n\
\val actionT = let val actionRowLookUp=\n\
\let val a=Array.fromList(actionRows) in fn i=>Array.sub(a,i) end\n\
\in Array.fromList(List.map actionRowLookUp actionRowNumbers)\n\
\end\n\
\in LrTable.mkLrTable {actions=actionT,gotos=gotoT,numRules=numrules,\n\
\numStates=numstates,initialState=STATE ";
print (Int.toString ((fn (STATE i) => i) (initialState table)));
print "}\nend\n";
entries
end
end;

View File

@ -0,0 +1,29 @@
(* Modified by Vesa Karvonen on 2007-12-18.
* Create line directives in output.
*)
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
functor ParseGenParserFun(structure Header : HEADER
structure Parser : ARG_PARSER
where type pos = Header.pos
sharing type Parser.result = Header.parseResult
sharing type Parser.arg = Header.inputSource =
Parser.lexarg
) : PARSE_GEN_PARSER =
struct
structure Header = Header
val parse = fn file =>
let
val in_str = TextIO.openIn file
val source = Header.newSource(file,in_str,TextIO.stdOut)
val error = fn (s : string,p:Header.pos,_) =>
Header.error source p s
val stream = Parser.makeLexer (fn i => (TextIO.inputN(in_str,i)))
source
val (result,_) = (#line Header.pos := 1; #start Header.pos := 0;
Header.text := nil;
Parser.parse(15,stream,error,source))
in (TextIO.closeIn in_str; (result,source))
end
end;

View File

@ -0,0 +1,222 @@
(* ML-Yacc Parser Generator (c) 1991 Andrew W. Appel, David R. Tarditi *)
signature SORT_ARG =
sig
type entry
val gt : entry * entry -> bool
end
signature SORT =
sig
type entry
val sort : entry list -> entry list
end
signature EQUIV_ARG =
sig
type entry
val gt : entry * entry -> bool
val eq : entry * entry -> bool
end
signature EQUIV =
sig
type entry
(* equivalences: take a list of entries and divides them into
equivalence classes numbered 0 to n-1.
It returns a triple consisting of:
* the number of equivalence classes
* a list which maps each original entry to an equivalence
class. The nth entry in this list gives the equivalence
class for the nth entry in the original entry list.
* a list which maps equivalence classes to some representative
element. The nth entry in this list is an element from the
nth equivalence class
*)
val equivalences : entry list -> (int * int list * entry list)
end
(* An O(n lg n) merge sort routine *)
functor MergeSortFun(A : SORT_ARG) : SORT =
struct
type entry = A.entry
(* sort: an O(n lg n) merge sort routine. We create a list of lists
and then merge these lists in passes until only one list is left.*)
fun sort nil = nil
| sort l =
let (* merge: merge two lists *)
fun merge (l as a::at,r as b::bt) =
if A.gt(a,b)
then b :: merge(l,bt)
else a :: merge(at,r)
| merge (l,nil) = l
| merge (nil,r) = r
(* scan: merge pairs of lists on a list of lists.
Reduces the number of lists by about 1/2 *)
fun scan (a :: b :: rest) = merge(a,b) :: scan rest
| scan l = l
(* loop: calls scan on a list of lists until only
one list is left. It terminates only if the list of
lists is nonempty. (The pattern match for sort
ensures this.) *)
fun loop (a :: nil) = a
| loop l = loop (scan l)
in loop (map (fn a => [a]) l)
end
end
(* an O(n lg n) routine for placing items in equivalence classes *)
functor EquivFun(A : EQUIV_ARG) : EQUIV =
struct
val sub = Array.sub
infix 9 sub
(* Our algorithm for finding equivalence class is simple. The basic
idea is to sort the entries and place duplicates entries in the same
equivalence class.
Let the original entry list be E. We map E to a list of a pairs
consisting of the entry and its position in E, where the positions
are numbered 0 to n-1. Call this list of pairs EP.
We then sort EP on the original entries. The second elements in the
pairs now specify a permutation that will return us to EP.
We then scan the sorted list to create a list R of representative
entries, a list P of integers which permutes the sorted list back to
the original list and a list SE of integers which gives the
equivalence class for the nth entry in the sorted list .
We then return the length of R, R, and the list that results from
permuting SE by P.
*)
type entry = A.entry
val gt = fn ((a,_),(b,_)) => A.gt(a,b)
structure Sort = MergeSortFun(type entry = A.entry * int
val gt = gt)
val assignIndex =
fn l =>
let fun loop (index,nil) = nil
| loop (index,h :: t) = (h,index) :: loop(index+1,t)
in loop (0,l)
end
local fun loop ((e,_) :: t, prev, class, R , SE) =
if A.eq(e,prev)
then loop(t,e,class,R, class :: SE)
else loop(t,e,class+1,e :: R, (class + 1) :: SE)
| loop (nil,_,_,R,SE) = (rev R, rev SE)
in val createEquivalences =
fn nil => (nil,nil)
| (e,_) :: t => loop(t, e, 0, [e],[0])
end
val inversePermute = fn permutation =>
fn nil => nil
| l as h :: _ =>
let val result = Array.array(length l,h)
fun loop (elem :: r, dest :: s) =
(Array.update(result,dest,elem); loop(r,s))
| loop _ = ()
fun listofarray i =
if i < Array.length result then
(result sub i) :: listofarray (i+1)
else nil
in loop (l,permutation); listofarray 0
end
fun makePermutation x = map (fn (_,b) => b) x
val equivalences = fn l =>
let val EP = assignIndex l
val sorted = Sort.sort EP
val P = makePermutation sorted
val (R, SE) = createEquivalences sorted
in (length R, inversePermute P SE, R)
end
end
functor ShrinkLrTableFun(structure LrTable : LR_TABLE) : SHRINK_LR_TABLE =
struct
structure LrTable = LrTable
open LrTable
val gtAction = fn (a,b) =>
case a
of SHIFT (STATE s) =>
(case b of SHIFT (STATE s') => s>s' | _ => true)
| REDUCE i => (case b of SHIFT _ => false | REDUCE i' => i>i'
| _ => true)
| ACCEPT => (case b of ERROR => true | _ => false)
| ERROR => false
structure ActionEntryList =
struct
type entry = (term, action) pairlist * action
local
fun eqlist (EMPTY, EMPTY) = true
| eqlist (PAIR (T t,d,r),PAIR(T t',d',r')) =
t=t' andalso d=d' andalso eqlist(r,r')
| eqlist _ = false
fun gtlist (PAIR _,EMPTY) = true
| gtlist (PAIR(T t,d,r),PAIR(T t',d',r')) =
t>t' orelse (t=t' andalso
(gtAction(d,d') orelse
(d=d' andalso gtlist(r,r'))))
| gtlist _ = false
in
fun eq ((l,a): entry, (l',a'): entry) =
a = a' andalso eqlist (l,l')
fun gt ((l,a): entry, (l',a'): entry) =
gtAction(a,a') orelse (a=a' andalso gtlist(l,l'))
end
end
(* structure GotoEntryList =
struct
type entry = (nonterm,state) pairlist
val rec eq =
fn (EMPTY,EMPTY) => true
| (PAIR (t,d,r),PAIR(t',d',r')) =>
t=t' andalso d=d' andalso eq(r,r')
| _ => false
val rec gt =
fn (PAIR _,EMPTY) => true
| (PAIR(NT t,STATE d,r),PAIR(NT t',STATE d',r')) =>
t>t' orelse (t=t' andalso
(d>d' orelse (d=d' andalso gt(r,r'))))
| _ => false
end *)
structure EquivActionList = EquivFun(ActionEntryList)
val states = fn max =>
let fun f i=if i<max then STATE i :: f(i+1) else nil
in f 0
end
val length : ('a,'b) pairlist -> int =
fn l =>
let fun g(EMPTY,len) = len
| g(PAIR(_,_,r),len) = g(r,len+1)
in g(l,0)
end
val size : (('a,'b) pairlist * 'c) list -> int =
fn l =>
let val c = ref 0
in (app (fn (row,_) => c := !c + length row) l; !c)
end
val shrinkActionList =
fn (table,verbose) =>
case EquivActionList.equivalences
(map (describeActions table) (states (numStates table)))
of result as (_,_,l) => (result,if verbose then size l else 0)
end;

379
mlyacc-polyml/src/sigs.sml Normal file
View File

@ -0,0 +1,379 @@
(* Modified by Vesa Karvonen on 2007-12-18.
* Create line directives in output.
*)
(* ML-Yacc Parser Generator (c) 1989, 1991 Andrew W. Appel, David R. Tarditi *)
signature HEADER =
sig
type pos = {line : int, col : int}
val pos : {line : int ref, start : int ref}
val text : string list ref
type inputSource
val newSource : string * TextIO.instream * TextIO.outstream -> inputSource
val error : inputSource -> pos -> string -> unit
val warn : inputSource -> pos -> string -> unit
val errorOccurred : inputSource -> unit -> bool
datatype symbol = SYMBOL of string * pos
val symbolName : symbol -> string
val symbolPos : symbol -> pos
val symbolMake : string * pos -> symbol
type ty
val tyName : ty -> string
val tyMake : string -> ty
(* associativities: each kind of associativity is assigned a unique
integer *)
datatype prec = LEFT | RIGHT | NONASSOC
datatype control = NODEFAULT | VERBOSE | PARSER_NAME of symbol |
FUNCTOR of string | START_SYM of symbol |
NSHIFT of symbol list | POS of string | PURE |
PARSE_ARG of string * string |
TOKEN_SIG_INFO of string
datatype rule = RULE of {lhs : symbol, rhs : symbol list,
code : {text : string, pos : pos},
prec : symbol option}
datatype declData = DECL of
{eop : symbol list,
keyword : symbol list,
nonterm : (symbol * ty option) list option,
prec : (prec * (symbol list)) list,
change: (symbol list * symbol list) list,
term : (symbol * ty option) list option,
control : control list,
value : (symbol * string) list}
val join_decls : declData * declData * inputSource * pos -> declData
type parseResult
val getResult : parseResult -> string * declData * rule list
end;
signature PARSE_GEN_PARSER =
sig
structure Header : HEADER
val parse : string -> Header.parseResult * Header.inputSource
end;
signature PARSE_GEN =
sig
val parseGen : string -> unit
end;
signature GRAMMAR =
sig
datatype term = T of int
datatype nonterm = NT of int
datatype symbol = TERM of term | NONTERM of nonterm
(* grammar:
terminals should be numbered from 0 to terms-1,
nonterminals should be numbered from 0 to nonterms-1,
rules should be numbered between 0 and (length rules) - 1,
higher precedence binds tighter,
start nonterminal should not occur on the rhs of any rule
*)
datatype grammar = GRAMMAR of
{rules: {lhs : nonterm, rhs : symbol list,
precedence : int option, rulenum : int } list,
terms: int,
nonterms: int,
start : nonterm,
eop : term list,
noshift : term list,
precedence : term -> int option,
termToString : term -> string,
nontermToString : nonterm -> string}
end
(* signature for internal version of grammar *)
signature INTGRAMMAR =
sig
structure Grammar : GRAMMAR
structure SymbolAssoc : TABLE
structure NontermAssoc : TABLE
sharing type SymbolAssoc.key = Grammar.symbol
sharing type NontermAssoc.key = Grammar.nonterm
datatype rule = RULE of
{lhs : Grammar.nonterm,
rhs : Grammar.symbol list,
(* internal number of rule - convenient for producing LR graph *)
num : int,
rulenum : int,
precedence : int option}
val gtTerm : Grammar.term * Grammar.term -> bool
val eqTerm : Grammar.term * Grammar.term -> bool
val gtNonterm : Grammar.nonterm * Grammar.nonterm -> bool
val eqNonterm : Grammar.nonterm * Grammar.nonterm -> bool
val gtSymbol : Grammar.symbol * Grammar.symbol -> bool
val eqSymbol : Grammar.symbol * Grammar.symbol -> bool
(* Debugging information will be generated only if DEBUG is true. *)
val DEBUG : bool
val prRule : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
(string -> 'b) -> rule -> unit
val prGrammar : (Grammar.symbol -> string)*(Grammar.nonterm -> string) *
(string -> unit) -> Grammar.grammar -> unit
end
signature CORE =
sig
structure Grammar : GRAMMAR
structure IntGrammar : INTGRAMMAR
sharing Grammar = IntGrammar.Grammar
datatype item = ITEM of
{ rule : IntGrammar.rule,
dot : int,
(* rhsAfter: The portion of the rhs of a rule that lies after the dot *)
rhsAfter: Grammar.symbol list }
(* eqItem and gtItem compare items *)
val eqItem : item * item -> bool
val gtItem : item * item -> bool
(* functions for maintaining ordered item lists *)
val insert : item * item list -> item list
val union : item list * item list -> item list
(* core: a set of items. It is represented by an ordered list of items.
The list is in ascending order The rule numbers and the positions of the
dots are used to order the items. *)
datatype core = CORE of item list * int (* state # *)
(* gtCore and eqCore compare the lists of items *)
val gtCore : core * core -> bool
val eqCore : core * core -> bool
(* functions for debugging *)
val prItem : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
(string -> unit) -> item -> unit
val prCore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
(string -> unit) -> core -> unit
end
signature CORE_UTILS =
sig
structure Grammar : GRAMMAR
structure IntGrammar : INTGRAMMAR
structure Core : CORE
sharing Grammar = IntGrammar.Grammar = Core.Grammar
sharing IntGrammar = Core.IntGrammar
(* mkFuncs: create functions for the set of productions derived from a
nonterminal, the cores that result from shift/gotos from a core,
and return a list of rules *)
val mkFuncs : Grammar.grammar ->
{ produces : Grammar.nonterm -> IntGrammar.rule list,
(* shifts: take a core and compute all the cores that result from shifts/gotos
on symbols *)
shifts : Core.core -> (Grammar.symbol*Core.item list) list,
rules: IntGrammar.rule list,
(* epsProds: take a core compute epsilon productions for it *)
epsProds : Core.core -> IntGrammar.rule list}
end
signature LRGRAPH =
sig
structure Grammar : GRAMMAR
structure IntGrammar : INTGRAMMAR
structure Core : CORE
sharing Grammar = IntGrammar.Grammar = Core.Grammar
sharing IntGrammar = Core.IntGrammar
type graph
val edges : Core.core * graph -> {edge:Grammar.symbol,to:Core.core} list
val nodes : graph -> Core.core list
val shift : graph -> int * Grammar.symbol -> int (* int = state # *)
val core : graph -> int -> Core.core (* get core for a state *)
(* mkGraph: compute the LR(0) sets of items *)
val mkGraph : Grammar.grammar ->
{graph : graph,
produces : Grammar.nonterm -> IntGrammar.rule list,
rules : IntGrammar.rule list,
epsProds: Core.core -> IntGrammar.rule list}
val prGraph: (Grammar.symbol -> string)*(Grammar.nonterm -> string) *
(string -> unit) -> graph -> unit
end
signature LOOK =
sig
structure Grammar : GRAMMAR
structure IntGrammar : INTGRAMMAR
sharing Grammar = IntGrammar.Grammar
val union : Grammar.term list * Grammar.term list -> Grammar.term list
val make_set : Grammar.term list -> Grammar.term list
val mkFuncs : {rules : IntGrammar.rule list, nonterms : int,
produces : Grammar.nonterm -> IntGrammar.rule list} ->
{nullable: Grammar.nonterm -> bool,
first : Grammar.symbol list -> Grammar.term list}
val prLook : (Grammar.term -> string) * (string -> unit) ->
Grammar.term list -> unit
end
signature LALR_GRAPH =
sig
structure Grammar : GRAMMAR
structure IntGrammar : INTGRAMMAR
structure Core : CORE
structure Graph : LRGRAPH
sharing Grammar = IntGrammar.Grammar = Core.Grammar = Graph.Grammar
sharing IntGrammar = Core.IntGrammar = Graph.IntGrammar
sharing Core = Graph.Core
datatype lcore = LCORE of (Core.item * Grammar.term list) list * int
val addLookahead : {graph : Graph.graph,
first : Grammar.symbol list -> Grammar.term list,
eop : Grammar.term list,
nonterms : int,
nullable: Grammar.nonterm -> bool,
produces : Grammar.nonterm -> IntGrammar.rule list,
rules : IntGrammar.rule list,
epsProds : Core.core -> IntGrammar.rule list,
print : string -> unit, (* for debugging *)
termToString : Grammar.term -> string,
nontermToString : Grammar.nonterm -> string} ->
lcore list
val prLcore : (Grammar.symbol -> string) * (Grammar.nonterm -> string) *
(Grammar.term -> string) * (string -> unit) ->
lcore -> unit
end
(* LR_ERRS: errors found while constructing an LR table *)
signature LR_ERRS =
sig
structure LrTable : LR_TABLE
(* RR = reduce/reduce,
SR = shift/reduce
NS: non-shiftable terminal found on the rhs of a rule
NOT_REDUCED n: rule number n was not reduced
START n : start symbol found on the rhs of rule n *)
datatype err = RR of LrTable.term * LrTable.state * int * int
| SR of LrTable.term * LrTable.state * int
| NS of LrTable.term * int
| NOT_REDUCED of int
| START of int
val summary : err list -> {rr : int, sr: int,
not_reduced : int, start : int,nonshift : int}
val printSummary : (string -> unit) -> err list -> unit
end
(* PRINT_STRUCT: prints a structure which includes a value 'table' and a
structure Table whose signature matches LR_TABLE. The table in the printed
structure will contain the same information as the one passed to
printStruct, although the representation may be different. It returns
the number of entries left in the table after compaction.*)
signature PRINT_STRUCT =
sig
structure LrTable : LR_TABLE
val makeStruct :
{table : LrTable.table,
name : string,
print: string -> unit,
verbose : bool
} -> int
end
(* VERBOSE: signature for a structure which takes a table and creates a
verbose description of it *)
signature VERBOSE =
sig
structure Errs : LR_ERRS
val printVerbose :
{table : Errs.LrTable.table,
entries : int,
termToString : Errs.LrTable.term -> string,
nontermToString : Errs.LrTable.nonterm -> string,
stateErrs : Errs.LrTable.state -> Errs.err list,
errs : Errs.err list,
print: string -> unit,
printCores : (string -> unit) -> Errs.LrTable.state -> unit,
printRule : (string -> unit) -> int -> unit} -> unit
end
(* MAKE_LR_TABLE: signature for a structure which includes a structure
matching the signature LR_TABLE and a function which maps grammars
to tables *)
signature MAKE_LR_TABLE =
sig
structure Grammar : GRAMMAR
structure Errs : LR_ERRS
structure LrTable : LR_TABLE
sharing Errs.LrTable = LrTable
sharing type LrTable.term = Grammar.term
sharing type LrTable.nonterm = Grammar.nonterm
(* boolean value determines whether default reductions will be used.
If it is true, reductions will be used. *)
val mkTable : Grammar.grammar * bool ->
LrTable.table *
(LrTable.state -> Errs.err list) * (* errors in a state *)
((string -> unit) -> LrTable.state -> unit) *
Errs.err list (* list of all errors *)
end;
(* SHRINK_LR_TABLE: finds unique action entry rows in the action table
for the LR parser *)
signature SHRINK_LR_TABLE =
sig
(* Takes an action table represented as a list of action rows.
It returns the number of unique rows left in the action table,
a list of integers which maps each original row to a unique
row, and a list of unique rows *)
structure LrTable : LR_TABLE
val shrinkActionList : LrTable.table * bool ->
(int * int list *
((LrTable.term,LrTable.action) LrTable.pairlist *
LrTable.action) list) * int
end

View File

@ -0,0 +1,56 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
signature ORDSET =
sig
type set
type elem
exception Select_arb
val app : (elem -> unit) -> set -> unit
and card: set -> int
and closure: set * (elem -> set) -> set
and difference: set * set -> set
and elem_eq: (elem * elem -> bool)
and elem_gt : (elem * elem -> bool)
and empty: set
and exists: (elem * set) -> bool
and find : (elem * set) -> elem option
and fold: ((elem * 'b) -> 'b) -> set -> 'b -> 'b
and insert: (elem * set) -> set
and is_empty: set -> bool
and make_list: set -> elem list
and make_set: (elem list -> set)
and partition: (elem -> bool) -> (set -> set * set)
and remove: (elem * set) -> set
and revfold: ((elem * 'b) -> 'b) -> set -> 'b -> 'b
and select_arb: set -> elem
and set_eq: (set * set) -> bool
and set_gt: (set * set) -> bool
and singleton: (elem -> set)
and union: set * set -> set
end
signature TABLE =
sig
type 'a table
type key
val size : 'a table -> int
val empty: 'a table
val exists: (key * 'a table) -> bool
val find : (key * 'a table) -> 'a option
val insert: ((key * 'a) * 'a table) -> 'a table
val make_table : (key * 'a ) list -> 'a table
val make_list : 'a table -> (key * 'a) list
val fold : ((key * 'a) * 'b -> 'b) -> 'a table -> 'b -> 'b
end
signature HASH =
sig
type table
type elem
val size : table -> int
val add : elem * table -> table
val find : elem * table -> int option
val exists : elem * table -> bool
val empty : table
end;

531
mlyacc-polyml/src/utils.sml Normal file
View File

@ -0,0 +1,531 @@
(* ML-Yacc Parser Generator (c) 1989 Andrew W. Appel, David R. Tarditi *)
(* Implementation of ordered sets using ordered lists and red-black trees. The
code for red-black trees was originally written by Norris Boyd, which was
modified for use here.
*)
(* ordered sets implemented using ordered lists.
Upper bound running times for functions implemented here:
app = O(n)
card = O(n)
closure = O(n^2)
difference = O(n+m), where n,m = the size of the two sets used here.
empty = O(1)
exists = O(n)
find = O(n)
fold = O(n)
insert = O(n)
is_empty = O(1)
make_list = O(1)
make_set = O(n^2)
partition = O(n)
remove = O(n)
revfold = O(n)
select_arb = O(1)
set_eq = O(n), where n = the cardinality of the smaller set
set_gt = O(n), ditto
singleton = O(1)
union = O(n+m)
*)
functor ListOrdSet(B : sig type elem
val gt : elem * elem -> bool
val eq : elem * elem -> bool
end ) : ORDSET =
struct
type elem = B.elem
val elem_gt = B.gt
val elem_eq = B.eq
type set = elem list
exception Select_arb
val empty = nil
val insert = fn (key,s) =>
let fun f (l as (h::t)) =
if elem_gt(key,h) then h::(f t)
else if elem_eq(key,h) then key::t
else key::l
| f nil = [key]
in f s
end
val select_arb = fn nil => raise Select_arb
| a::b => a
val exists = fn (key,s) =>
let fun f (h::t) = if elem_gt(key,h) then f t
else elem_eq(h,key)
| f nil = false
in f s
end
val find = fn (key,s) =>
let fun f (h::t) = if elem_gt(key,h) then f t
else if elem_eq(h,key) then SOME h
else NONE
| f nil = NONE
in f s
end
fun revfold f lst init = List.foldl f init lst
fun fold f lst init = List.foldr f init lst
val app = List.app
fun set_eq(h::t,h'::t') =
(case elem_eq(h,h')
of true => set_eq(t,t')
| a => a)
| set_eq(nil,nil) = true
| set_eq _ = false
fun set_gt(h::t,h'::t') =
(case elem_gt(h,h')
of false => (case (elem_eq(h,h'))
of true => set_gt(t,t')
| a => a)
| a => a)
| set_gt(_::_,nil) = true
| set_gt _ = false
fun union(a as (h::t),b as (h'::t')) =
if elem_gt(h',h) then h::union(t,b)
else if elem_eq(h,h') then h::union(t,t')
else h'::union(a,t')
| union(nil,s) = s
| union(s,nil) = s
val make_list = fn s => s
val is_empty = fn nil => true | _ => false
val make_set = fn l => List.foldr insert [] l
val partition = fn f => fn s =>
fold (fn (e,(yes,no)) =>
if (f e) then (e::yes,no) else (e::no,yes)) s (nil,nil)
val remove = fn (e,s) =>
let fun f (l as (h::t)) = if elem_gt(h,e) then l
else if elem_eq(h,e) then t
else h::(f t)
| f nil = nil
in f s
end
(* difference: X-Y *)
fun difference (nil,_) = nil
| difference (r,nil) = r
| difference (a as (h::t),b as (h'::t')) =
if elem_gt (h',h) then h::difference(t,b)
else if elem_eq(h',h) then difference(t,t')
else difference(a,t')
fun singleton X = [X]
fun card(S) = fold (fn (a,count) => count+1) S 0
local
fun closure'(from, f, result) =
if is_empty from then result
else
let val (more,result) =
fold (fn (a,(more',result')) =>
let val more = f a
val new = difference(more,result)
in (union(more',new),union(result',new))
end) from
(empty,result)
in closure'(more,f,result)
end
in
fun closure(start, f) = closure'(start, f, start)
end
end
(* ordered set implemented using red-black trees:
Upper bound running time of the functions below:
app: O(n)
card: O(n)
closure: O(n^2 ln n)
difference: O(n ln n)
empty: O(1)
exists: O(ln n)
find: O(ln n)
fold: O(n)
insert: O(ln n)
is_empty: O(1)
make_list: O(n)
make_set: O(n ln n)
partition: O(n ln n)
remove: O(n ln n)
revfold: O(n)
select_arb: O(1)
set_eq: O(n)
set_gt: O(n)
singleton: O(1)
union: O(n ln n)
*)
functor RbOrdSet (B : sig type elem
val eq : (elem*elem) -> bool
val gt : (elem*elem) -> bool
end
) : ORDSET =
struct
type elem = B.elem
val elem_gt = B.gt
val elem_eq = B.eq
datatype Color = RED | BLACK
abstype set = EMPTY | TREE of (B.elem * Color * set * set)
with exception Select_arb
val empty = EMPTY
fun insert(key,t) =
let fun f EMPTY = TREE(key,RED,EMPTY,EMPTY)
| f (TREE(k,BLACK,l,r)) =
if elem_gt (key,k)
then case f r
of r as TREE(rk,RED, rl as TREE(rlk,RED,rll,rlr),rr) =>
(case l
of TREE(lk,RED,ll,lr) =>
TREE(k,RED,TREE(lk,BLACK,ll,lr),
TREE(rk,BLACK,rl,rr))
| _ => TREE(rlk,BLACK,TREE(k,RED,l,rll),
TREE(rk,RED,rlr,rr)))
| r as TREE(rk,RED,rl, rr as TREE(rrk,RED,rrl,rrr)) =>
(case l
of TREE(lk,RED,ll,lr) =>
TREE(k,RED,TREE(lk,BLACK,ll,lr),
TREE(rk,BLACK,rl,rr))
| _ => TREE(rk,BLACK,TREE(k,RED,l,rl),rr))
| r => TREE(k,BLACK,l,r)
else if elem_gt(k,key)
then case f l
of l as TREE(lk,RED,ll, lr as TREE(lrk,RED,lrl,lrr)) =>
(case r
of TREE(rk,RED,rl,rr) =>
TREE(k,RED,TREE(lk,BLACK,ll,lr),
TREE(rk,BLACK,rl,rr))
| _ => TREE(lrk,BLACK,TREE(lk,RED,ll,lrl),
TREE(k,RED,lrr,r)))
| l as TREE(lk,RED, ll as TREE(llk,RED,lll,llr), lr) =>
(case r
of TREE(rk,RED,rl,rr) =>
TREE(k,RED,TREE(lk,BLACK,ll,lr),
TREE(rk,BLACK,rl,rr))
| _ => TREE(lk,BLACK,ll,TREE(k,RED,lr,r)))
| l => TREE(k,BLACK,l,r)
else TREE(key,BLACK,l,r)
| f (TREE(k,RED,l,r)) =
if elem_gt(key,k) then TREE(k,RED,l, f r)
else if elem_gt(k,key) then TREE(k,RED, f l, r)
else TREE(key,RED,l,r)
in case f t
of TREE(k,RED, l as TREE(_,RED,_,_), r) => TREE(k,BLACK,l,r)
| TREE(k,RED, l, r as TREE(_,RED,_,_)) => TREE(k,BLACK,l,r)
| t => t
end
fun select_arb (TREE(k,_,l,r)) = k
| select_arb EMPTY = raise Select_arb
fun exists(key,t) =
let fun look EMPTY = false
| look (TREE(k,_,l,r)) =
if elem_gt(k,key) then look l
else if elem_gt(key,k) then look r
else true
in look t
end
fun find(key,t) =
let fun look EMPTY = NONE
| look (TREE(k,_,l,r)) =
if elem_gt(k,key) then look l
else if elem_gt(key,k) then look r
else SOME k
in look t
end
fun revfold f t start =
let fun scan (EMPTY,value) = value
| scan (TREE(k,_,l,r),value) = scan(r,f(k,scan(l,value)))
in scan(t,start)
end
fun fold f t start =
let fun scan(EMPTY,value) = value
| scan(TREE(k,_,l,r),value) = scan(l,f(k,scan(r,value)))
in scan(t,start)
end
fun app f t =
let fun scan EMPTY = ()
| scan(TREE(k,_,l,r)) = (scan l; f k; scan r)
in scan t
end
(* equal_tree : test if two trees are equal. Two trees are equal if
the set of leaves are equal *)
fun set_eq (tree1 as (TREE _),tree2 as (TREE _)) =
let datatype pos = L | R | M
exception Done
fun getvalue(stack as ((a,position)::b)) =
(case a
of (TREE(k,_,l,r)) =>
(case position
of L => getvalue ((l,L)::(a,M)::b)
| M => (k,case r of EMPTY => b | _ => (a,R)::b)
| R => getvalue ((r,L)::b)
)
| EMPTY => getvalue b
)
| getvalue(nil) = raise Done
fun f (nil,nil) = true
| f (s1 as (_ :: _),s2 as (_ :: _ )) =
let val (v1,news1) = getvalue s1
and (v2,news2) = getvalue s2
in (elem_eq(v1,v2)) andalso f(news1,news2)
end
| f _ = false
in f ((tree1,L)::nil,(tree2,L)::nil) handle Done => false
end
| set_eq (EMPTY,EMPTY) = true
| set_eq _ = false
(* gt_tree : Test if tree1 is greater than tree 2 *)
fun set_gt (tree1,tree2) =
let datatype pos = L | R | M
exception Done
fun getvalue(stack as ((a,position)::b)) =
(case a
of (TREE(k,_,l,r)) =>
(case position
of L => getvalue ((l,L)::(a,M)::b)
| M => (k,case r of EMPTY => b | _ => (a,R)::b)
| R => getvalue ((r,L)::b)
)
| EMPTY => getvalue b
)
| getvalue(nil) = raise Done
fun f (nil,nil) = false
| f (s1 as (_ :: _),s2 as (_ :: _ )) =
let val (v1,news1) = getvalue s1
and (v2,news2) = getvalue s2
in (elem_gt(v1,v2)) orelse (elem_eq(v1,v2) andalso f(news1,news2))
end
| f (_,nil) = true
| f (nil,_) = false
in f ((tree1,L)::nil,(tree2,L)::nil) handle Done => false
end
fun is_empty S = (let val _ = select_arb S in false end
handle Select_arb => true)
fun make_list S = fold (op ::) S nil
fun make_set l = List.foldr insert empty l
fun partition F S = fold (fn (a,(Yes,No)) =>
if F(a) then (insert(a,Yes),No)
else (Yes,insert(a,No)))
S (empty,empty)
fun remove(X, XSet) =
let val (YSet, _) =
partition (fn a => not (elem_eq (X, a))) XSet
in YSet
end