bringing ml-lpt-lib 110.68 into the main branch
git-svn-id: https://projects.brucker.ch/su4sml/svn/su4sml/trunk@8251 3260e6d1-4efc-4170-b0a7-36055960796d
This commit is contained in:
parent
7b67e892cf
commit
3055396f06
|
@ -0,0 +1,19 @@
|
||||||
|
(* antlr-lexer-sig.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Signature for the lexer argument to parser functors generated
|
||||||
|
* by ml-antlr.
|
||||||
|
*)
|
||||||
|
|
||||||
|
signature ANTLR_LEXER = sig
|
||||||
|
|
||||||
|
type strm
|
||||||
|
type pos = AntlrStreamPos.pos
|
||||||
|
|
||||||
|
val getPos : strm -> pos
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,20 @@
|
||||||
|
(* antlr-tokens-sig.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Signature for generated tokens module, for ml-antlr
|
||||||
|
*)
|
||||||
|
|
||||||
|
signature ANTLR_TOKENS = sig
|
||||||
|
|
||||||
|
type token
|
||||||
|
|
||||||
|
val allToks : token list
|
||||||
|
val isKW : token -> bool
|
||||||
|
val isEOF : token -> bool
|
||||||
|
val toString : token -> string
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,45 @@
|
||||||
|
(* ebnf.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* EBNF combinators used for ml-antlr.
|
||||||
|
*)
|
||||||
|
|
||||||
|
functor AntlrEBNF (S : sig
|
||||||
|
type strm
|
||||||
|
val getSpan : strm -> AntlrStreamPos.span
|
||||||
|
end) =
|
||||||
|
struct
|
||||||
|
|
||||||
|
fun optional (pred, parse, strm) =
|
||||||
|
if pred strm
|
||||||
|
then let
|
||||||
|
val (y, span, strm') = parse strm
|
||||||
|
in
|
||||||
|
(SOME y, span, strm')
|
||||||
|
end
|
||||||
|
else (NONE, S.getSpan strm, strm)
|
||||||
|
|
||||||
|
fun closure (pred, parse, strm) = let
|
||||||
|
fun iter (strm, (left, right), ys) =
|
||||||
|
if pred strm
|
||||||
|
then let
|
||||||
|
val (y, (_, right'), strm') = parse strm
|
||||||
|
in iter (strm', (left, right'), y::ys)
|
||||||
|
end
|
||||||
|
else (List.rev ys, (left, right), strm)
|
||||||
|
in
|
||||||
|
iter (strm, S.getSpan strm, [])
|
||||||
|
end
|
||||||
|
|
||||||
|
fun posclos (pred, parse, strm) = let
|
||||||
|
val (y, (left, _), strm') = parse strm
|
||||||
|
val (ys, (_, right), strm'') = closure (pred, parse, strm')
|
||||||
|
in
|
||||||
|
(y::ys, (left, right), strm'')
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,400 @@
|
||||||
|
(* err-handler.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Error repair for ml-antlr
|
||||||
|
*)
|
||||||
|
|
||||||
|
functor AntlrErrHandler (
|
||||||
|
structure Tok : ANTLR_TOKENS
|
||||||
|
structure Lex : ANTLR_LEXER
|
||||||
|
) : sig
|
||||||
|
|
||||||
|
exception ParseError
|
||||||
|
|
||||||
|
type 'a err_handler
|
||||||
|
type wstream
|
||||||
|
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||||
|
type 'a wreader = wstream -> 'a * AntlrStreamPos.span * wstream
|
||||||
|
|
||||||
|
val mkErrHandler : { get : unit -> 'a, put : 'a -> unit }
|
||||||
|
-> 'a err_handler * Tok.token wreader
|
||||||
|
val launch : 'a err_handler * lexer * 'b wreader * bool ->
|
||||||
|
Lex.strm -> ('b option * Lex.strm * Tok.token AntlrRepair.repair list)
|
||||||
|
val failure : 'a err_handler -> 'b
|
||||||
|
|
||||||
|
val getPos : wstream -> AntlrStreamPos.pos
|
||||||
|
val getSpan : wstream -> AntlrStreamPos.span
|
||||||
|
|
||||||
|
val whileDisabled : 'b err_handler -> (unit -> 'a) -> 'a
|
||||||
|
|
||||||
|
(*
|
||||||
|
val wrap : err_handler -> (R.strm -> ('a * R.strm)) -> R.strm -> ('a * R.strm)
|
||||||
|
val tryProds : 'b err_handler -> (R.strm -> 'a) list -> R.strm -> 'a
|
||||||
|
*)
|
||||||
|
|
||||||
|
end = struct
|
||||||
|
|
||||||
|
exception ParseError
|
||||||
|
exception Unrepairable
|
||||||
|
|
||||||
|
structure AR = AntlrRepair
|
||||||
|
|
||||||
|
structure WS = AntlrWrappedStream(
|
||||||
|
structure Tok = Tok
|
||||||
|
structure Lex = Lex)
|
||||||
|
|
||||||
|
type wstream = WS.wstream
|
||||||
|
val getPos = WS.getPos
|
||||||
|
val getSpan = WS.getSpan
|
||||||
|
|
||||||
|
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||||
|
type 'a wreader = wstream -> 'a * AntlrStreamPos.span * wstream
|
||||||
|
type 'a checkpoint = 'a * unit SMLofNJ.Cont.cont * wstream
|
||||||
|
|
||||||
|
datatype 'a err_handler = EH of {
|
||||||
|
checkpoints : 'a checkpoint list ref,
|
||||||
|
maxPos : WS.tok_pos ref,
|
||||||
|
cont : unit SMLofNJ.Cont.cont option ref,
|
||||||
|
get : unit -> 'a,
|
||||||
|
put : 'a -> unit,
|
||||||
|
rs : WS.repair_state,
|
||||||
|
enabled : bool ref
|
||||||
|
}
|
||||||
|
|
||||||
|
fun getGet (EH {get, ...}) = get
|
||||||
|
fun getPut (EH {put, ...}) = put
|
||||||
|
fun getRS (EH {rs, ...}) = rs
|
||||||
|
|
||||||
|
fun getCont (EH {cont, ...}) = !cont
|
||||||
|
fun setCont (EH {cont, ...}, new) = cont := new
|
||||||
|
|
||||||
|
fun getCheckpoints (EH {checkpoints, ...}) = !checkpoints
|
||||||
|
fun setCheckpoints (EH {checkpoints, ...}, new) = checkpoints := new
|
||||||
|
|
||||||
|
fun getMaxPos (EH {maxPos, ...}) = !maxPos
|
||||||
|
fun setMaxPos (EH {maxPos, ...}, new) = maxPos := new
|
||||||
|
|
||||||
|
fun getEnabled (EH {enabled, ...}) = !enabled
|
||||||
|
fun setEnabled (EH {enabled, ...}, n) = enabled := n
|
||||||
|
(*
|
||||||
|
fun getRepairs (EH {repairs, ...}) = !repairs
|
||||||
|
fun addRepair (EH {repairs, ...}, n) = repairs := (!repairs) @ [n] *)
|
||||||
|
|
||||||
|
fun mkErrHandler {get, put} = let
|
||||||
|
val cont = ref NONE
|
||||||
|
val checkpoints = ref []
|
||||||
|
val maxPos = ref ~1
|
||||||
|
val eh = EH {
|
||||||
|
cont = cont, checkpoints = checkpoints,
|
||||||
|
maxPos = maxPos, get = get, put = put,
|
||||||
|
rs = WS.mkRepairState(), enabled = ref true
|
||||||
|
}
|
||||||
|
fun lex ws =
|
||||||
|
if isSome (!cont)
|
||||||
|
then (maxPos := Int.max (WS.getTokPos ws, !maxPos);
|
||||||
|
WS.get1 ws)
|
||||||
|
else
|
||||||
|
if WS.getTokPos ws > !maxPos
|
||||||
|
then let
|
||||||
|
val () = SMLofNJ.Cont.callcc
|
||||||
|
(fn k => (checkpoints := (get(), k, ws) :: !checkpoints;
|
||||||
|
maxPos := WS.getTokPos ws))
|
||||||
|
in
|
||||||
|
WS.get1 ws
|
||||||
|
end
|
||||||
|
else WS.get1 ws
|
||||||
|
in (eh, lex)
|
||||||
|
end
|
||||||
|
|
||||||
|
val isEOF = Tok.isEOF o #1 o WS.get1
|
||||||
|
|
||||||
|
val minAdvance = 1
|
||||||
|
|
||||||
|
fun restoreCheckpoint (eh, (x, cont, strm)) =
|
||||||
|
(getPut eh x; (* retore refcell data for checkpoint *)
|
||||||
|
setMaxPos (eh, WS.getTokPos strm);
|
||||||
|
SMLofNJ.Cont.throw cont ())
|
||||||
|
|
||||||
|
fun tryRepair (eh, c) = let
|
||||||
|
val oldMax = getMaxPos eh
|
||||||
|
val firstTime = ref true
|
||||||
|
val () = SMLofNJ.Cont.callcc (fn k => (setCont (eh, SOME k)))
|
||||||
|
in if !firstTime then
|
||||||
|
(* first time through, try the repair *)
|
||||||
|
(firstTime := false; restoreCheckpoint (eh, c))
|
||||||
|
else
|
||||||
|
(* second time through, return the distance achieved *)
|
||||||
|
(setCont (eh, NONE); getMaxPos eh - oldMax)
|
||||||
|
end
|
||||||
|
|
||||||
|
local
|
||||||
|
|
||||||
|
val allToks = List.filter (not o Tok.isEOF) Tok.allToks
|
||||||
|
fun mkDelete strm = (WS.getPos strm, AR.Delete [#1 (WS.get1 strm)])
|
||||||
|
fun mkInsert strm tok = (WS.getPos strm, AR.Insert [tok])
|
||||||
|
fun mkSubst strm tok = (WS.getPos strm, AR.Subst { old = [#1 (WS.get1 strm)], new = [tok] })
|
||||||
|
fun allRepairs strm =
|
||||||
|
(if isEOF strm then [] else [mkDelete strm]) @
|
||||||
|
map (mkInsert strm) allToks @
|
||||||
|
(if isEOF strm then [] else map (mkSubst strm) allToks)
|
||||||
|
|
||||||
|
fun involvesKW (_, r) = (case r
|
||||||
|
of AR.Insert toks => List.exists Tok.isKW toks
|
||||||
|
| AR.Delete toks => List.exists Tok.isKW toks
|
||||||
|
| AR.Subst {old,new} => List.exists Tok.isKW (old @ new)
|
||||||
|
| AR.FailureAt _ => false
|
||||||
|
(* end case *))
|
||||||
|
|
||||||
|
in
|
||||||
|
fun trySingleToken eh = let
|
||||||
|
val RS = getRS eh
|
||||||
|
val oldRepairs = WS.getRepairs RS
|
||||||
|
val oldMax = getMaxPos eh
|
||||||
|
val oldMaxRepair = WS.maxRepairPos RS
|
||||||
|
val oldCheckpoints = getCheckpoints eh
|
||||||
|
fun restoreToErr() = (WS.setRepairs (RS, oldRepairs); setMaxPos (eh, oldMax))
|
||||||
|
(* stream value for checkpoint *)
|
||||||
|
fun strmOf (_, _, strm) = strm
|
||||||
|
fun setupRepair (r, c::cs) =
|
||||||
|
WS.setRepairs (RS, WS.addRepair (oldRepairs, WS.getTokPos (strmOf c), r))
|
||||||
|
| setupRepair _ = raise Fail "bug"
|
||||||
|
fun try (_::c::cs, [], best, n) =
|
||||||
|
if n < 15 andalso WS.getTokPos (strmOf c) > oldMaxRepair
|
||||||
|
then try (c::cs, allRepairs (strmOf c), best, n+1)
|
||||||
|
else try ([], [], best, n)
|
||||||
|
| try (c::cs, r::rs, best, n) = (
|
||||||
|
restoreToErr(); setupRepair (r, c::cs);
|
||||||
|
let val score = tryRepair (eh, c)
|
||||||
|
- (if involvesKW r then 2 else 0)
|
||||||
|
+ (case #2 r
|
||||||
|
of AR.Insert _ => ~1
|
||||||
|
| AR.Delete _ => 1
|
||||||
|
| AR.Subst _ => 0
|
||||||
|
| _ => 0)
|
||||||
|
val oldScore = case best of NONE => 0
|
||||||
|
| SOME (_, _, s) => s
|
||||||
|
in if score > oldScore andalso score > minAdvance
|
||||||
|
then try (c::cs, rs, SOME (c::cs, r, score), n)
|
||||||
|
else try (c::cs, rs, best, n)
|
||||||
|
end)
|
||||||
|
| try (_, [], SOME (c::cs, r, score), _) =
|
||||||
|
(setupRepair (r, c::cs);
|
||||||
|
setCheckpoints (eh, c::cs);
|
||||||
|
setMaxPos (eh, List.length cs);
|
||||||
|
restoreCheckpoint (eh, c))
|
||||||
|
| try _ = restoreToErr()
|
||||||
|
val curStrm = strmOf (hd oldCheckpoints)
|
||||||
|
in if WS.getTokPos curStrm <= WS.maxRepairPos RS then ()
|
||||||
|
else try (oldCheckpoints, allRepairs curStrm, NONE, 1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
val maxDel = 50
|
||||||
|
|
||||||
|
fun tryDeletion eh = let
|
||||||
|
fun getn (strm, 0, acc) = SOME (rev acc)
|
||||||
|
| getn (strm, n, acc) = let
|
||||||
|
val (tok, _, strm') = WS.get1 strm
|
||||||
|
in
|
||||||
|
if Tok.isEOF tok then NONE
|
||||||
|
else getn (strm', n-1, tok::acc)
|
||||||
|
end
|
||||||
|
val rs = getRS eh
|
||||||
|
val oldRepairs = WS.getRepairs rs
|
||||||
|
val oldMax = getMaxPos eh
|
||||||
|
val oldRepairMax = WS.maxRepairPos rs
|
||||||
|
fun restoreToErr() = (WS.setRepairs (rs, oldRepairs); setMaxPos (eh, oldMax))
|
||||||
|
(* stream value for checkpoint *)
|
||||||
|
fun strmOf (_, _, strm) = strm
|
||||||
|
val cs = getCheckpoints eh
|
||||||
|
fun tryCS ([], n, max) = ()
|
||||||
|
| tryCS (c::cs, n, max) =
|
||||||
|
if WS.getTokPos (strmOf c) <= oldRepairMax
|
||||||
|
orelse oldMax - WS.getTokPos (strmOf c) > maxDel then ()
|
||||||
|
else
|
||||||
|
(WS.setRepairs (rs,
|
||||||
|
WS.addRepair (oldRepairs, WS.getTokPos (strmOf c),
|
||||||
|
(WS.getPos (strmOf c), AR.Delete (valOf (getn (strmOf c, n, []))))));
|
||||||
|
setMaxPos (eh, WS.getTokPos (strmOf c));
|
||||||
|
if tryRepair (eh, c) > minAdvance + 2
|
||||||
|
then (setCheckpoints (eh, c::cs);
|
||||||
|
restoreCheckpoint (eh, c))
|
||||||
|
else (restoreToErr(); tryCS (cs, n+1, max)))
|
||||||
|
and tryN (n, c::cs, max) = (case getn (strmOf c, n, [])
|
||||||
|
of NONE => ()
|
||||||
|
| SOME toks => (tryCS (c::cs, n, max);
|
||||||
|
if n > max then () else tryN (n+1, c::cs, max))
|
||||||
|
(* end case *))
|
||||||
|
| tryN _ = raise Fail "bug"
|
||||||
|
in
|
||||||
|
tryN (1, [hd cs], 5);
|
||||||
|
tryN (1, cs, maxDel)
|
||||||
|
end
|
||||||
|
|
||||||
|
fun failure eh =
|
||||||
|
if getEnabled eh
|
||||||
|
then (case getCont eh
|
||||||
|
of NONE => (trySingleToken eh;
|
||||||
|
tryDeletion eh;
|
||||||
|
raise Unrepairable)
|
||||||
|
| SOME k => SMLofNJ.Cont.throw k ()
|
||||||
|
(* end case *))
|
||||||
|
else raise ParseError
|
||||||
|
|
||||||
|
fun launch (eh, lex, parse, reqEOF) strm = let
|
||||||
|
val wstrm = WS.wrap (getRS eh, strm, lex)
|
||||||
|
in let val (result, _, wstrm') = parse wstrm
|
||||||
|
val (strm', repairs) = WS.unwrap wstrm'
|
||||||
|
in
|
||||||
|
if reqEOF andalso not (isEOF wstrm') then failure eh
|
||||||
|
else ();
|
||||||
|
(SOME result, strm', repairs)
|
||||||
|
end
|
||||||
|
handle Unrepairable => let
|
||||||
|
val (_, repairs) = WS.unwrap wstrm
|
||||||
|
val (tok, (pos, _), _) = (WS.get1 o #3 o hd o getCheckpoints) eh
|
||||||
|
in (NONE, strm, repairs @ [(pos, AR.FailureAt tok)]) end
|
||||||
|
end
|
||||||
|
|
||||||
|
fun whileDisabled eh f = let
|
||||||
|
val oldEnabled = getEnabled eh
|
||||||
|
in
|
||||||
|
setEnabled (eh, false);
|
||||||
|
(f () handle e => (setEnabled (eh, oldEnabled);
|
||||||
|
raise e))
|
||||||
|
before setEnabled (eh, oldEnabled)
|
||||||
|
end
|
||||||
|
|
||||||
|
(*
|
||||||
|
fun throwIfEH (eh, t) =
|
||||||
|
if getEnabled eh then
|
||||||
|
Option.app (fn k => SMLofNJ.Cont.throw k (SOME t)) (getCont eh)
|
||||||
|
else ()
|
||||||
|
|
||||||
|
fun wrap eh f t = if not (getEnabled eh) then f t else let
|
||||||
|
val cont_ref : retry_cont option ref = ref NONE
|
||||||
|
val state = (getGet eh) ()
|
||||||
|
val t' = SMLofNJ.Cont.callcc (fn k => (cont_ref := SOME k; t))
|
||||||
|
val retry = (t', valOf (!cont_ref))
|
||||||
|
in
|
||||||
|
getPut eh state;
|
||||||
|
f t'
|
||||||
|
handle RepairableError => (
|
||||||
|
throwIfEH (eh, t');
|
||||||
|
raise JumpOut [retry])
|
||||||
|
| JumpOut stack => (
|
||||||
|
throwIfEH (eh, t');
|
||||||
|
raise JumpOut (retry::stack))
|
||||||
|
end
|
||||||
|
|
||||||
|
fun findWindow (stack) = let
|
||||||
|
val revStack = rev stack
|
||||||
|
val rightMost = hd revStack
|
||||||
|
fun TOf (t, _) = t
|
||||||
|
fun find [] = raise (Fail "BUG: findWindow given an empty stack")
|
||||||
|
| find [top] = (top, rightMost)
|
||||||
|
| find (top::stack) =
|
||||||
|
if R.farEnoughWindow {startAt = TOf top, endAt = TOf rightMost}
|
||||||
|
then (top, rightMost)
|
||||||
|
else find stack
|
||||||
|
in
|
||||||
|
find revStack
|
||||||
|
end
|
||||||
|
|
||||||
|
fun tryRepair (eh, cont) t =
|
||||||
|
(case SMLofNJ.Cont.callcc (fn k => (setCont (eh, SOME k); NONE))
|
||||||
|
of NONE =>
|
||||||
|
(* first time through, try the repair *)
|
||||||
|
SMLofNJ.Cont.throw cont t
|
||||||
|
| SOME t' =>
|
||||||
|
(* second time through, return the new right-most strm *)
|
||||||
|
(setCont (eh, NONE); t')
|
||||||
|
(* end case *))
|
||||||
|
|
||||||
|
fun primaryRepair (eh, stack) = let
|
||||||
|
val ((leftT, leftCont), (rightT, rightCont)) =
|
||||||
|
findWindow stack
|
||||||
|
val repair = R.chooseRepair {
|
||||||
|
startAt = leftT,
|
||||||
|
endAt = rightT,
|
||||||
|
try = tryRepair (eh, leftCont)
|
||||||
|
}
|
||||||
|
in case repair
|
||||||
|
of SOME {repair, repaired} =>
|
||||||
|
SOME (repair, leftCont, repaired)
|
||||||
|
| NONE => NONE
|
||||||
|
end
|
||||||
|
|
||||||
|
fun secondaryRepair (eh, revStack) = let
|
||||||
|
val stack = rev revStack
|
||||||
|
val (errStrm, errCont) = hd stack
|
||||||
|
fun try ((strm, cont), strm', next) = let
|
||||||
|
val strm'' = tryRepair (eh, cont) strm'
|
||||||
|
in case (R.tryDeletion {oldStartAt = strm,
|
||||||
|
startAt = strm',
|
||||||
|
endAt = strm''})
|
||||||
|
of SOME r => SOME (r, cont, strm')
|
||||||
|
| NONE => next()
|
||||||
|
end
|
||||||
|
fun rightRepair (strm, n) =
|
||||||
|
if n = 0 then NONE
|
||||||
|
else let
|
||||||
|
val strm' = R.skip (strm, 1)
|
||||||
|
in
|
||||||
|
try (hd stack, strm', fn () => rightRepair (strm', n-1))
|
||||||
|
end
|
||||||
|
fun leftRightRepair (strm, []) =
|
||||||
|
if R.isEmpty strm then
|
||||||
|
(addRepair (eh, (R.getPos errStrm,
|
||||||
|
Repair.FailureAt (#1 (R.get1 errStrm))));
|
||||||
|
raise UnrepairableError)
|
||||||
|
else leftRightRepair (R.skip (strm, 1), stack)
|
||||||
|
| leftRightRepair (strm, top::stack) =
|
||||||
|
try (top, strm, fn () => leftRightRepair (strm, stack))
|
||||||
|
in case rightRepair (errStrm, 5)
|
||||||
|
of SOME r => r
|
||||||
|
| _ => valOf (leftRightRepair (errStrm, []))
|
||||||
|
end
|
||||||
|
|
||||||
|
fun repair (eh, stack) = (case primaryRepair (eh, stack)
|
||||||
|
of SOME r => r
|
||||||
|
| NONE => secondaryRepair (eh, stack)
|
||||||
|
(* end case *))
|
||||||
|
|
||||||
|
fun launch eh f t = let
|
||||||
|
val (x, _, t') = wrap eh f t
|
||||||
|
handle JumpOut stack => let
|
||||||
|
val (r, cont, t') = repair (eh, stack)
|
||||||
|
in
|
||||||
|
addRepair (eh, r);
|
||||||
|
SMLofNJ.Cont.throw cont t'
|
||||||
|
end
|
||||||
|
in
|
||||||
|
throwIfEH (eh, t');
|
||||||
|
(SOME x, t', getRepairs eh)
|
||||||
|
end
|
||||||
|
handle UnrepairableError =>
|
||||||
|
(NONE, t, getRepairs eh)
|
||||||
|
*)
|
||||||
|
|
||||||
|
(*
|
||||||
|
fun tryProds eh prods strm = let
|
||||||
|
fun try [] = raise RepairableError
|
||||||
|
| try (prod :: prods) = let
|
||||||
|
val state = (getGet eh) ()
|
||||||
|
in
|
||||||
|
whileDisabled eh (fn () => prod strm)
|
||||||
|
handle _ =>
|
||||||
|
(getPut eh state;
|
||||||
|
try (prods))
|
||||||
|
end
|
||||||
|
in
|
||||||
|
try prods
|
||||||
|
end
|
||||||
|
*)
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,41 @@
|
||||||
|
(* ml-lpt-lib.cm
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Sources file for ml-lpt lib
|
||||||
|
*)
|
||||||
|
|
||||||
|
Library
|
||||||
|
|
||||||
|
signature ANTLR_LEXER
|
||||||
|
signature ANTLR_TOKENS
|
||||||
|
|
||||||
|
structure AntlrRepair
|
||||||
|
structure AntlrStreamPos
|
||||||
|
structure ULexBuffer
|
||||||
|
|
||||||
|
functor AntlrEBNF
|
||||||
|
functor AntlrErrHandler
|
||||||
|
|
||||||
|
is
|
||||||
|
|
||||||
|
$/basis.cm
|
||||||
|
$/smlnj-lib.cm
|
||||||
|
ebnf.sml
|
||||||
|
err-handler.sml
|
||||||
|
antlr-lexer-sig.sml
|
||||||
|
repair.sml
|
||||||
|
|
||||||
|
(*
|
||||||
|
repairable-strm-sig.sml
|
||||||
|
repairable-strm.sml
|
||||||
|
*)
|
||||||
|
wrapped-strm.sml
|
||||||
|
|
||||||
|
stream-pos.sml
|
||||||
|
antlr-tokens-sig.sml
|
||||||
|
|
||||||
|
ulex-buffer.sml
|
|
@ -0,0 +1,38 @@
|
||||||
|
(* ml-lpt-lib.mlb
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2008
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* MLB file for ml-lpt lib
|
||||||
|
*)
|
||||||
|
|
||||||
|
local
|
||||||
|
|
||||||
|
$(SML_LIB)/basis/basis.mlb
|
||||||
|
$(SML_LIB)/basis/sml-nj.mlb
|
||||||
|
$(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb
|
||||||
|
|
||||||
|
stream-pos.sml
|
||||||
|
ebnf.sml
|
||||||
|
antlr-lexer-sig.sml
|
||||||
|
antlr-tokens-sig.sml
|
||||||
|
repair.sml
|
||||||
|
wrapped-strm.sml
|
||||||
|
err-handler.sml
|
||||||
|
ulex-buffer.sml
|
||||||
|
|
||||||
|
in
|
||||||
|
|
||||||
|
signature ANTLR_LEXER
|
||||||
|
signature ANTLR_TOKENS
|
||||||
|
|
||||||
|
structure AntlrRepair
|
||||||
|
structure AntlrStreamPos
|
||||||
|
structure ULexBuffer
|
||||||
|
|
||||||
|
functor AntlrEBNF
|
||||||
|
functor AntlrErrHandler
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,38 @@
|
||||||
|
(* repair.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Representation and pretty-printing of ml-antlr repair actions
|
||||||
|
*)
|
||||||
|
|
||||||
|
structure AntlrRepair = struct
|
||||||
|
|
||||||
|
datatype 'a repair_action
|
||||||
|
= Insert of 'a list
|
||||||
|
| Delete of 'a list
|
||||||
|
| Subst of {
|
||||||
|
old : 'a list,
|
||||||
|
new : 'a list
|
||||||
|
}
|
||||||
|
| FailureAt of 'a
|
||||||
|
|
||||||
|
type 'a repair = AntlrStreamPos.pos * 'a repair_action
|
||||||
|
|
||||||
|
fun actionToString tokToString repair = let
|
||||||
|
val toksToString = (String.concatWith " ") o (map tokToString)
|
||||||
|
in case repair
|
||||||
|
of Insert toks => "try inserting " ^ toksToString toks
|
||||||
|
| Delete toks => "try deleting " ^ toksToString toks
|
||||||
|
| Subst {old, new} =>
|
||||||
|
"try substituting " ^ toksToString new ^ " for "
|
||||||
|
^ toksToString old
|
||||||
|
| FailureAt tok => "syntax error at " ^ toksToString [tok]
|
||||||
|
end
|
||||||
|
|
||||||
|
fun repairToString tokToString sm (pos, repair) =
|
||||||
|
(AntlrStreamPos.toString sm pos ^ ": " ^ actionToString tokToString repair)
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,112 @@
|
||||||
|
(* stream-pos.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Very simple position tracking and source maps for ml-ulex/ml-antlr
|
||||||
|
*)
|
||||||
|
|
||||||
|
structure AntlrStreamPos :> sig
|
||||||
|
|
||||||
|
type pos = Position.int
|
||||||
|
type span = pos * pos
|
||||||
|
type sourceloc = { fileName : string option, lineNo : int, colNo : int }
|
||||||
|
type sourcemap
|
||||||
|
|
||||||
|
exception PosMustIncrease
|
||||||
|
|
||||||
|
(* the result of moving forward an integer number of characters *)
|
||||||
|
val forward : pos * int -> pos
|
||||||
|
|
||||||
|
val mkSourcemap : unit -> sourcemap
|
||||||
|
val mkSourcemap' : string -> sourcemap
|
||||||
|
|
||||||
|
val same : sourcemap * sourcemap -> bool
|
||||||
|
|
||||||
|
(* log a new line occurence *)
|
||||||
|
val markNewLine : sourcemap -> pos -> unit
|
||||||
|
(* resychronize to a full source location *)
|
||||||
|
val resynch : sourcemap -> pos * sourceloc -> unit
|
||||||
|
|
||||||
|
val sourceLoc : sourcemap -> pos -> sourceloc
|
||||||
|
val fileName : sourcemap -> pos -> string option
|
||||||
|
val lineNo : sourcemap -> pos -> int
|
||||||
|
val colNo : sourcemap -> pos -> int
|
||||||
|
val toString : sourcemap -> pos -> string
|
||||||
|
val spanToString : sourcemap -> span -> string
|
||||||
|
|
||||||
|
end = struct
|
||||||
|
|
||||||
|
type pos = Position.int
|
||||||
|
type span = pos * pos
|
||||||
|
type sourceloc = { fileName : string option, lineNo : int, colNo : int }
|
||||||
|
type sourcemap = (sourceloc * pos) list ref
|
||||||
|
|
||||||
|
exception PosMustIncrease
|
||||||
|
|
||||||
|
fun forward (p, i) = p + (Position.fromInt i)
|
||||||
|
|
||||||
|
fun mkSrcMap fileOpt = ref [
|
||||||
|
({fileName = fileOpt, lineNo = 1, colNo = 0}, Position.fromInt ~1)
|
||||||
|
]
|
||||||
|
|
||||||
|
fun mkSourcemap () = mkSrcMap NONE
|
||||||
|
fun mkSourcemap' (fname) = mkSrcMap (SOME fname)
|
||||||
|
|
||||||
|
fun same (sm1 : sourcemap, sm2) = (sm1 = sm2)
|
||||||
|
|
||||||
|
fun markNewLine sm (newPos : pos) = let
|
||||||
|
val ({fileName, lineNo, colNo}, pos) = hd (!sm)
|
||||||
|
in
|
||||||
|
if pos < newPos then
|
||||||
|
sm := ({ fileName = fileName,
|
||||||
|
lineNo = lineNo + 1,
|
||||||
|
colNo = 0},
|
||||||
|
newPos)::(!sm)
|
||||||
|
else () (* raise PosMustIncrease *)
|
||||||
|
end
|
||||||
|
|
||||||
|
fun resynch sm (newPos : pos, sourceLoc) = let
|
||||||
|
val (_, pos) = hd (!sm)
|
||||||
|
in
|
||||||
|
(* if pos < newPos then *)
|
||||||
|
sm := (sourceLoc, newPos)::(!sm)
|
||||||
|
(* else raise PosMustIncrease *)
|
||||||
|
end
|
||||||
|
|
||||||
|
fun findLB ((loc, pos)::sm, pos' : pos) =
|
||||||
|
if pos <= pos' then (loc, pos)
|
||||||
|
else findLB(sm, pos')
|
||||||
|
| findLB _ = raise Fail "impossible"
|
||||||
|
|
||||||
|
fun sourceLoc sm pos = let
|
||||||
|
val ({fileName, lineNo, colNo}, anchor) = findLB(!sm, pos)
|
||||||
|
in
|
||||||
|
{fileName = fileName, lineNo = lineNo,
|
||||||
|
colNo = colNo + Position.toInt(pos - anchor)}
|
||||||
|
end
|
||||||
|
fun fileName sm pos = #fileName (sourceLoc sm pos)
|
||||||
|
fun lineNo sm pos = #lineNo (sourceLoc sm pos)
|
||||||
|
fun colNo sm pos = #colNo (sourceLoc sm pos)
|
||||||
|
fun toString sm pos = String.concat [
|
||||||
|
"[", case fileName sm pos
|
||||||
|
of NONE => ""
|
||||||
|
| SOME f => f ^ ":",
|
||||||
|
Int.toString (lineNo sm pos), ".",
|
||||||
|
Int.toString (colNo sm pos), "]"]
|
||||||
|
fun spanToString sm (pos1, pos2) =
|
||||||
|
if lineNo sm pos1 = lineNo sm pos2 andalso
|
||||||
|
colNo sm pos1 = colNo sm pos2
|
||||||
|
then toString sm pos1
|
||||||
|
else String.concat [
|
||||||
|
"[", case fileName sm pos1
|
||||||
|
of NONE => ""
|
||||||
|
| SOME f => f ^ ":",
|
||||||
|
Int.toString (lineNo sm pos1), ".",
|
||||||
|
Int.toString (colNo sm pos1), "-",
|
||||||
|
Int.toString (lineNo sm pos2), ".",
|
||||||
|
Int.toString (colNo sm pos2), "]"]
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,88 @@
|
||||||
|
(* ulex-buffer.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Forward-chained buffers for lexing
|
||||||
|
*)
|
||||||
|
|
||||||
|
structure ULexBuffer : sig
|
||||||
|
|
||||||
|
type stream
|
||||||
|
val mkStream : (AntlrStreamPos.pos * (unit -> string)) -> stream
|
||||||
|
val getc : stream -> (Char.char * stream) option
|
||||||
|
val getpos : stream -> AntlrStreamPos.pos
|
||||||
|
val subtract : stream * stream -> Substring.substring
|
||||||
|
val eof : stream -> bool
|
||||||
|
val lastWasNL : stream -> bool
|
||||||
|
|
||||||
|
end = struct
|
||||||
|
|
||||||
|
datatype stream = S of (buf * int * bool)
|
||||||
|
and buf = B of {
|
||||||
|
data : string,
|
||||||
|
basePos : AntlrStreamPos.pos,
|
||||||
|
more : more ref,
|
||||||
|
input : unit -> string
|
||||||
|
}
|
||||||
|
and more = UNKNOWN | YES of buf | NO
|
||||||
|
|
||||||
|
fun mkStream (pos, input) =
|
||||||
|
(S (B {data = "", basePos = pos,
|
||||||
|
more = ref UNKNOWN,
|
||||||
|
input = input},
|
||||||
|
0, true))
|
||||||
|
|
||||||
|
fun getc (S (buf as B {data, basePos, more, input}, pos, lastWasNL)) =
|
||||||
|
if pos < String.size data then let
|
||||||
|
val c = String.sub (data, pos)
|
||||||
|
in
|
||||||
|
SOME (c, S (buf, pos+1, c = #"\n"))
|
||||||
|
end
|
||||||
|
else (case !more
|
||||||
|
of NO => NONE
|
||||||
|
| YES buf' => getc (S (buf', 0, lastWasNL))
|
||||||
|
| UNKNOWN =>
|
||||||
|
(case input()
|
||||||
|
of "" => (more := NO; NONE)
|
||||||
|
| data' => let
|
||||||
|
val buf' = B {
|
||||||
|
data = data',
|
||||||
|
basePos = AntlrStreamPos.forward (basePos, String.size data),
|
||||||
|
more = ref UNKNOWN,
|
||||||
|
input = input
|
||||||
|
}
|
||||||
|
in
|
||||||
|
more := YES buf';
|
||||||
|
getc (S (buf', 0, lastWasNL))
|
||||||
|
end
|
||||||
|
(* end case *))
|
||||||
|
(* end case *))
|
||||||
|
|
||||||
|
fun getpos (S (B {basePos, ...}, pos, _)) = AntlrStreamPos.forward (basePos, pos)
|
||||||
|
|
||||||
|
fun subtract (new, old) = let
|
||||||
|
val (S (B {data = ndata, basePos = nbasePos, ...}, npos, _)) = new
|
||||||
|
val (S (B {data = odata, basePos = obasePos,
|
||||||
|
more, input}, opos, _)) = old
|
||||||
|
in
|
||||||
|
if nbasePos = obasePos then
|
||||||
|
Substring.substring (ndata, opos, npos-opos)
|
||||||
|
else case !more
|
||||||
|
of NO => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
|
||||||
|
| UNKNOWN => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
|
||||||
|
| YES buf =>
|
||||||
|
Substring.extract (
|
||||||
|
Substring.concat [
|
||||||
|
Substring.extract (odata, opos, NONE),
|
||||||
|
subtract (new, S (buf, 0, false))],
|
||||||
|
0, NONE)
|
||||||
|
end
|
||||||
|
|
||||||
|
fun eof s = not (isSome (getc s))
|
||||||
|
|
||||||
|
fun lastWasNL (S (_, _, lastWasNL)) = lastWasNL
|
||||||
|
|
||||||
|
end
|
|
@ -0,0 +1,110 @@
|
||||||
|
(* wrapped-strm.sml
|
||||||
|
*
|
||||||
|
* COPYRIGHT (c) 2006
|
||||||
|
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||||
|
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* "wrapped" streams, which track the number of tokens read
|
||||||
|
* and allow "prepending" a sequence of tokens.
|
||||||
|
*)
|
||||||
|
|
||||||
|
functor AntlrWrappedStream (
|
||||||
|
structure Tok : ANTLR_TOKENS
|
||||||
|
structure Lex : ANTLR_LEXER
|
||||||
|
) :> sig
|
||||||
|
|
||||||
|
type tok_pos = Int.int (* position in terms of number of tokens *)
|
||||||
|
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||||
|
|
||||||
|
type repairs
|
||||||
|
val addRepair : repairs * tok_pos * Tok.token AntlrRepair.repair -> repairs
|
||||||
|
|
||||||
|
type repair_state
|
||||||
|
val mkRepairState : unit -> repair_state
|
||||||
|
val getRepairs : repair_state -> repairs
|
||||||
|
val setRepairs : repair_state * repairs -> unit
|
||||||
|
val maxRepairPos : repair_state -> tok_pos
|
||||||
|
|
||||||
|
type wstream
|
||||||
|
val wrap : repair_state * Lex.strm * lexer -> wstream
|
||||||
|
val unwrap : wstream -> Lex.strm * Tok.token AntlrRepair.repair list
|
||||||
|
|
||||||
|
val get1 : wstream -> Tok.token * AntlrStreamPos.span * wstream
|
||||||
|
val getPos : wstream -> AntlrStreamPos.pos
|
||||||
|
val getSpan : wstream -> AntlrStreamPos.span
|
||||||
|
val getTokPos : wstream -> tok_pos
|
||||||
|
|
||||||
|
end = struct
|
||||||
|
|
||||||
|
type tok_pos = Int.int (* position in terms of number of tokens *)
|
||||||
|
type repair = tok_pos * Tok.token AntlrRepair.repair
|
||||||
|
type repairs = repair list
|
||||||
|
type repair_state = repairs ref (* invariant: at most one repair per tok_pos *)
|
||||||
|
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||||
|
|
||||||
|
datatype global_state = GS of {
|
||||||
|
lex : (Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm),
|
||||||
|
repairs : repair_state
|
||||||
|
}
|
||||||
|
|
||||||
|
datatype wstream = WSTREAM of {
|
||||||
|
curTok : tok_pos,
|
||||||
|
strm : Lex.strm,
|
||||||
|
gs : global_state
|
||||||
|
}
|
||||||
|
|
||||||
|
fun mkRepairState() = ref []
|
||||||
|
fun getRepairs repairs = !repairs
|
||||||
|
fun setRepairs (repairs, new) = repairs := new
|
||||||
|
fun maxRepairPos (ref []) = ~1
|
||||||
|
| maxRepairPos (ref ((p, _)::_)) = p
|
||||||
|
|
||||||
|
open AntlrRepair
|
||||||
|
|
||||||
|
fun addRepair (rs, pos, r) =
|
||||||
|
if pos > maxRepairPos (ref rs) then (pos, r)::rs
|
||||||
|
else raise Fail (String.concat [
|
||||||
|
"bug: repairs not monotonic adding at ",
|
||||||
|
Int.toString pos, " to a max pos of ",
|
||||||
|
Int.toString (maxRepairPos (ref rs))])
|
||||||
|
|
||||||
|
fun wrap (repairs, strm, lex) =
|
||||||
|
WSTREAM {strm = strm, curTok = 0, gs = GS {lex = lex, repairs = repairs}}
|
||||||
|
fun unwrap (WSTREAM {strm, gs = GS {repairs, ...}, ...}) =
|
||||||
|
(strm, rev (#2 (ListPair.unzip (!repairs))))
|
||||||
|
|
||||||
|
fun skip1 lex strm = let
|
||||||
|
val (_, _, strm') = lex strm
|
||||||
|
in strm' end
|
||||||
|
fun get1 (WSTREAM {strm, curTok, gs = gs as GS {lex, repairs}}) = let
|
||||||
|
fun findRepair [] = NONE
|
||||||
|
| findRepair ((pos, r)::rs) = if curTok = pos then SOME r
|
||||||
|
else findRepair rs
|
||||||
|
in case findRepair (!repairs)
|
||||||
|
of NONE => let
|
||||||
|
val (tok, span, strm') = lex strm
|
||||||
|
in
|
||||||
|
(tok, span, WSTREAM {strm = strm', curTok = curTok + 1, gs = gs})
|
||||||
|
end
|
||||||
|
| SOME (p, Insert [tok]) =>
|
||||||
|
(tok, (p, p), WSTREAM {strm = strm, curTok = curTok + 1, gs = gs})
|
||||||
|
| SOME (p, Delete toks) => let
|
||||||
|
val strm' = foldl (fn (_, s) => (skip1 lex) s) strm toks
|
||||||
|
val (tok, span, strm'') = lex strm'
|
||||||
|
in
|
||||||
|
(tok, span, WSTREAM {strm = strm'', curTok = curTok + 1, gs = gs})
|
||||||
|
end
|
||||||
|
| SOME (p, Subst {old = [old], new = [new]}) =>
|
||||||
|
(new, (p, p), WSTREAM {strm = skip1 lex strm, curTok = curTok + 1, gs = gs})
|
||||||
|
| SOME (p, FailureAt _) => raise Fail "bug: findRepair found FailureAt"
|
||||||
|
| _ => raise Fail "bug: unimplemented"
|
||||||
|
end
|
||||||
|
|
||||||
|
(* get position AFTER trimming whitespace *)
|
||||||
|
fun getPos ws = let val (_, (left, _), _) = get1 ws in left end
|
||||||
|
fun getSpan ws = (getPos ws, getPos ws)
|
||||||
|
fun getTokPos (WSTREAM {curTok, ...}) = curTok
|
||||||
|
|
||||||
|
|
||||||
|
end
|
Loading…
Reference in New Issue