bringing ml-lpt-lib 110.68 into the main branch
git-svn-id: https://projects.brucker.ch/su4sml/svn/su4sml/trunk@8251 3260e6d1-4efc-4170-b0a7-36055960796d
This commit is contained in:
parent
7b67e892cf
commit
3055396f06
|
@ -0,0 +1,19 @@
|
|||
(* antlr-lexer-sig.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Signature for the lexer argument to parser functors generated
|
||||
* by ml-antlr.
|
||||
*)
|
||||
|
||||
signature ANTLR_LEXER = sig
|
||||
|
||||
type strm
|
||||
type pos = AntlrStreamPos.pos
|
||||
|
||||
val getPos : strm -> pos
|
||||
|
||||
end
|
|
@ -0,0 +1,20 @@
|
|||
(* antlr-tokens-sig.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Signature for generated tokens module, for ml-antlr
|
||||
*)
|
||||
|
||||
signature ANTLR_TOKENS = sig
|
||||
|
||||
type token
|
||||
|
||||
val allToks : token list
|
||||
val isKW : token -> bool
|
||||
val isEOF : token -> bool
|
||||
val toString : token -> string
|
||||
|
||||
end
|
|
@ -0,0 +1,45 @@
|
|||
(* ebnf.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* EBNF combinators used for ml-antlr.
|
||||
*)
|
||||
|
||||
functor AntlrEBNF (S : sig
|
||||
type strm
|
||||
val getSpan : strm -> AntlrStreamPos.span
|
||||
end) =
|
||||
struct
|
||||
|
||||
fun optional (pred, parse, strm) =
|
||||
if pred strm
|
||||
then let
|
||||
val (y, span, strm') = parse strm
|
||||
in
|
||||
(SOME y, span, strm')
|
||||
end
|
||||
else (NONE, S.getSpan strm, strm)
|
||||
|
||||
fun closure (pred, parse, strm) = let
|
||||
fun iter (strm, (left, right), ys) =
|
||||
if pred strm
|
||||
then let
|
||||
val (y, (_, right'), strm') = parse strm
|
||||
in iter (strm', (left, right'), y::ys)
|
||||
end
|
||||
else (List.rev ys, (left, right), strm)
|
||||
in
|
||||
iter (strm, S.getSpan strm, [])
|
||||
end
|
||||
|
||||
fun posclos (pred, parse, strm) = let
|
||||
val (y, (left, _), strm') = parse strm
|
||||
val (ys, (_, right), strm'') = closure (pred, parse, strm')
|
||||
in
|
||||
(y::ys, (left, right), strm'')
|
||||
end
|
||||
|
||||
end
|
|
@ -0,0 +1,400 @@
|
|||
(* err-handler.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Error repair for ml-antlr
|
||||
*)
|
||||
|
||||
functor AntlrErrHandler (
|
||||
structure Tok : ANTLR_TOKENS
|
||||
structure Lex : ANTLR_LEXER
|
||||
) : sig
|
||||
|
||||
exception ParseError
|
||||
|
||||
type 'a err_handler
|
||||
type wstream
|
||||
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||
type 'a wreader = wstream -> 'a * AntlrStreamPos.span * wstream
|
||||
|
||||
val mkErrHandler : { get : unit -> 'a, put : 'a -> unit }
|
||||
-> 'a err_handler * Tok.token wreader
|
||||
val launch : 'a err_handler * lexer * 'b wreader * bool ->
|
||||
Lex.strm -> ('b option * Lex.strm * Tok.token AntlrRepair.repair list)
|
||||
val failure : 'a err_handler -> 'b
|
||||
|
||||
val getPos : wstream -> AntlrStreamPos.pos
|
||||
val getSpan : wstream -> AntlrStreamPos.span
|
||||
|
||||
val whileDisabled : 'b err_handler -> (unit -> 'a) -> 'a
|
||||
|
||||
(*
|
||||
val wrap : err_handler -> (R.strm -> ('a * R.strm)) -> R.strm -> ('a * R.strm)
|
||||
val tryProds : 'b err_handler -> (R.strm -> 'a) list -> R.strm -> 'a
|
||||
*)
|
||||
|
||||
end = struct
|
||||
|
||||
exception ParseError
|
||||
exception Unrepairable
|
||||
|
||||
structure AR = AntlrRepair
|
||||
|
||||
structure WS = AntlrWrappedStream(
|
||||
structure Tok = Tok
|
||||
structure Lex = Lex)
|
||||
|
||||
type wstream = WS.wstream
|
||||
val getPos = WS.getPos
|
||||
val getSpan = WS.getSpan
|
||||
|
||||
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||
type 'a wreader = wstream -> 'a * AntlrStreamPos.span * wstream
|
||||
type 'a checkpoint = 'a * unit SMLofNJ.Cont.cont * wstream
|
||||
|
||||
datatype 'a err_handler = EH of {
|
||||
checkpoints : 'a checkpoint list ref,
|
||||
maxPos : WS.tok_pos ref,
|
||||
cont : unit SMLofNJ.Cont.cont option ref,
|
||||
get : unit -> 'a,
|
||||
put : 'a -> unit,
|
||||
rs : WS.repair_state,
|
||||
enabled : bool ref
|
||||
}
|
||||
|
||||
fun getGet (EH {get, ...}) = get
|
||||
fun getPut (EH {put, ...}) = put
|
||||
fun getRS (EH {rs, ...}) = rs
|
||||
|
||||
fun getCont (EH {cont, ...}) = !cont
|
||||
fun setCont (EH {cont, ...}, new) = cont := new
|
||||
|
||||
fun getCheckpoints (EH {checkpoints, ...}) = !checkpoints
|
||||
fun setCheckpoints (EH {checkpoints, ...}, new) = checkpoints := new
|
||||
|
||||
fun getMaxPos (EH {maxPos, ...}) = !maxPos
|
||||
fun setMaxPos (EH {maxPos, ...}, new) = maxPos := new
|
||||
|
||||
fun getEnabled (EH {enabled, ...}) = !enabled
|
||||
fun setEnabled (EH {enabled, ...}, n) = enabled := n
|
||||
(*
|
||||
fun getRepairs (EH {repairs, ...}) = !repairs
|
||||
fun addRepair (EH {repairs, ...}, n) = repairs := (!repairs) @ [n] *)
|
||||
|
||||
fun mkErrHandler {get, put} = let
|
||||
val cont = ref NONE
|
||||
val checkpoints = ref []
|
||||
val maxPos = ref ~1
|
||||
val eh = EH {
|
||||
cont = cont, checkpoints = checkpoints,
|
||||
maxPos = maxPos, get = get, put = put,
|
||||
rs = WS.mkRepairState(), enabled = ref true
|
||||
}
|
||||
fun lex ws =
|
||||
if isSome (!cont)
|
||||
then (maxPos := Int.max (WS.getTokPos ws, !maxPos);
|
||||
WS.get1 ws)
|
||||
else
|
||||
if WS.getTokPos ws > !maxPos
|
||||
then let
|
||||
val () = SMLofNJ.Cont.callcc
|
||||
(fn k => (checkpoints := (get(), k, ws) :: !checkpoints;
|
||||
maxPos := WS.getTokPos ws))
|
||||
in
|
||||
WS.get1 ws
|
||||
end
|
||||
else WS.get1 ws
|
||||
in (eh, lex)
|
||||
end
|
||||
|
||||
val isEOF = Tok.isEOF o #1 o WS.get1
|
||||
|
||||
val minAdvance = 1
|
||||
|
||||
fun restoreCheckpoint (eh, (x, cont, strm)) =
|
||||
(getPut eh x; (* retore refcell data for checkpoint *)
|
||||
setMaxPos (eh, WS.getTokPos strm);
|
||||
SMLofNJ.Cont.throw cont ())
|
||||
|
||||
fun tryRepair (eh, c) = let
|
||||
val oldMax = getMaxPos eh
|
||||
val firstTime = ref true
|
||||
val () = SMLofNJ.Cont.callcc (fn k => (setCont (eh, SOME k)))
|
||||
in if !firstTime then
|
||||
(* first time through, try the repair *)
|
||||
(firstTime := false; restoreCheckpoint (eh, c))
|
||||
else
|
||||
(* second time through, return the distance achieved *)
|
||||
(setCont (eh, NONE); getMaxPos eh - oldMax)
|
||||
end
|
||||
|
||||
local
|
||||
|
||||
val allToks = List.filter (not o Tok.isEOF) Tok.allToks
|
||||
fun mkDelete strm = (WS.getPos strm, AR.Delete [#1 (WS.get1 strm)])
|
||||
fun mkInsert strm tok = (WS.getPos strm, AR.Insert [tok])
|
||||
fun mkSubst strm tok = (WS.getPos strm, AR.Subst { old = [#1 (WS.get1 strm)], new = [tok] })
|
||||
fun allRepairs strm =
|
||||
(if isEOF strm then [] else [mkDelete strm]) @
|
||||
map (mkInsert strm) allToks @
|
||||
(if isEOF strm then [] else map (mkSubst strm) allToks)
|
||||
|
||||
fun involvesKW (_, r) = (case r
|
||||
of AR.Insert toks => List.exists Tok.isKW toks
|
||||
| AR.Delete toks => List.exists Tok.isKW toks
|
||||
| AR.Subst {old,new} => List.exists Tok.isKW (old @ new)
|
||||
| AR.FailureAt _ => false
|
||||
(* end case *))
|
||||
|
||||
in
|
||||
fun trySingleToken eh = let
|
||||
val RS = getRS eh
|
||||
val oldRepairs = WS.getRepairs RS
|
||||
val oldMax = getMaxPos eh
|
||||
val oldMaxRepair = WS.maxRepairPos RS
|
||||
val oldCheckpoints = getCheckpoints eh
|
||||
fun restoreToErr() = (WS.setRepairs (RS, oldRepairs); setMaxPos (eh, oldMax))
|
||||
(* stream value for checkpoint *)
|
||||
fun strmOf (_, _, strm) = strm
|
||||
fun setupRepair (r, c::cs) =
|
||||
WS.setRepairs (RS, WS.addRepair (oldRepairs, WS.getTokPos (strmOf c), r))
|
||||
| setupRepair _ = raise Fail "bug"
|
||||
fun try (_::c::cs, [], best, n) =
|
||||
if n < 15 andalso WS.getTokPos (strmOf c) > oldMaxRepair
|
||||
then try (c::cs, allRepairs (strmOf c), best, n+1)
|
||||
else try ([], [], best, n)
|
||||
| try (c::cs, r::rs, best, n) = (
|
||||
restoreToErr(); setupRepair (r, c::cs);
|
||||
let val score = tryRepair (eh, c)
|
||||
- (if involvesKW r then 2 else 0)
|
||||
+ (case #2 r
|
||||
of AR.Insert _ => ~1
|
||||
| AR.Delete _ => 1
|
||||
| AR.Subst _ => 0
|
||||
| _ => 0)
|
||||
val oldScore = case best of NONE => 0
|
||||
| SOME (_, _, s) => s
|
||||
in if score > oldScore andalso score > minAdvance
|
||||
then try (c::cs, rs, SOME (c::cs, r, score), n)
|
||||
else try (c::cs, rs, best, n)
|
||||
end)
|
||||
| try (_, [], SOME (c::cs, r, score), _) =
|
||||
(setupRepair (r, c::cs);
|
||||
setCheckpoints (eh, c::cs);
|
||||
setMaxPos (eh, List.length cs);
|
||||
restoreCheckpoint (eh, c))
|
||||
| try _ = restoreToErr()
|
||||
val curStrm = strmOf (hd oldCheckpoints)
|
||||
in if WS.getTokPos curStrm <= WS.maxRepairPos RS then ()
|
||||
else try (oldCheckpoints, allRepairs curStrm, NONE, 1)
|
||||
end
|
||||
end
|
||||
|
||||
val maxDel = 50
|
||||
|
||||
fun tryDeletion eh = let
|
||||
fun getn (strm, 0, acc) = SOME (rev acc)
|
||||
| getn (strm, n, acc) = let
|
||||
val (tok, _, strm') = WS.get1 strm
|
||||
in
|
||||
if Tok.isEOF tok then NONE
|
||||
else getn (strm', n-1, tok::acc)
|
||||
end
|
||||
val rs = getRS eh
|
||||
val oldRepairs = WS.getRepairs rs
|
||||
val oldMax = getMaxPos eh
|
||||
val oldRepairMax = WS.maxRepairPos rs
|
||||
fun restoreToErr() = (WS.setRepairs (rs, oldRepairs); setMaxPos (eh, oldMax))
|
||||
(* stream value for checkpoint *)
|
||||
fun strmOf (_, _, strm) = strm
|
||||
val cs = getCheckpoints eh
|
||||
fun tryCS ([], n, max) = ()
|
||||
| tryCS (c::cs, n, max) =
|
||||
if WS.getTokPos (strmOf c) <= oldRepairMax
|
||||
orelse oldMax - WS.getTokPos (strmOf c) > maxDel then ()
|
||||
else
|
||||
(WS.setRepairs (rs,
|
||||
WS.addRepair (oldRepairs, WS.getTokPos (strmOf c),
|
||||
(WS.getPos (strmOf c), AR.Delete (valOf (getn (strmOf c, n, []))))));
|
||||
setMaxPos (eh, WS.getTokPos (strmOf c));
|
||||
if tryRepair (eh, c) > minAdvance + 2
|
||||
then (setCheckpoints (eh, c::cs);
|
||||
restoreCheckpoint (eh, c))
|
||||
else (restoreToErr(); tryCS (cs, n+1, max)))
|
||||
and tryN (n, c::cs, max) = (case getn (strmOf c, n, [])
|
||||
of NONE => ()
|
||||
| SOME toks => (tryCS (c::cs, n, max);
|
||||
if n > max then () else tryN (n+1, c::cs, max))
|
||||
(* end case *))
|
||||
| tryN _ = raise Fail "bug"
|
||||
in
|
||||
tryN (1, [hd cs], 5);
|
||||
tryN (1, cs, maxDel)
|
||||
end
|
||||
|
||||
fun failure eh =
|
||||
if getEnabled eh
|
||||
then (case getCont eh
|
||||
of NONE => (trySingleToken eh;
|
||||
tryDeletion eh;
|
||||
raise Unrepairable)
|
||||
| SOME k => SMLofNJ.Cont.throw k ()
|
||||
(* end case *))
|
||||
else raise ParseError
|
||||
|
||||
fun launch (eh, lex, parse, reqEOF) strm = let
|
||||
val wstrm = WS.wrap (getRS eh, strm, lex)
|
||||
in let val (result, _, wstrm') = parse wstrm
|
||||
val (strm', repairs) = WS.unwrap wstrm'
|
||||
in
|
||||
if reqEOF andalso not (isEOF wstrm') then failure eh
|
||||
else ();
|
||||
(SOME result, strm', repairs)
|
||||
end
|
||||
handle Unrepairable => let
|
||||
val (_, repairs) = WS.unwrap wstrm
|
||||
val (tok, (pos, _), _) = (WS.get1 o #3 o hd o getCheckpoints) eh
|
||||
in (NONE, strm, repairs @ [(pos, AR.FailureAt tok)]) end
|
||||
end
|
||||
|
||||
fun whileDisabled eh f = let
|
||||
val oldEnabled = getEnabled eh
|
||||
in
|
||||
setEnabled (eh, false);
|
||||
(f () handle e => (setEnabled (eh, oldEnabled);
|
||||
raise e))
|
||||
before setEnabled (eh, oldEnabled)
|
||||
end
|
||||
|
||||
(*
|
||||
fun throwIfEH (eh, t) =
|
||||
if getEnabled eh then
|
||||
Option.app (fn k => SMLofNJ.Cont.throw k (SOME t)) (getCont eh)
|
||||
else ()
|
||||
|
||||
fun wrap eh f t = if not (getEnabled eh) then f t else let
|
||||
val cont_ref : retry_cont option ref = ref NONE
|
||||
val state = (getGet eh) ()
|
||||
val t' = SMLofNJ.Cont.callcc (fn k => (cont_ref := SOME k; t))
|
||||
val retry = (t', valOf (!cont_ref))
|
||||
in
|
||||
getPut eh state;
|
||||
f t'
|
||||
handle RepairableError => (
|
||||
throwIfEH (eh, t');
|
||||
raise JumpOut [retry])
|
||||
| JumpOut stack => (
|
||||
throwIfEH (eh, t');
|
||||
raise JumpOut (retry::stack))
|
||||
end
|
||||
|
||||
fun findWindow (stack) = let
|
||||
val revStack = rev stack
|
||||
val rightMost = hd revStack
|
||||
fun TOf (t, _) = t
|
||||
fun find [] = raise (Fail "BUG: findWindow given an empty stack")
|
||||
| find [top] = (top, rightMost)
|
||||
| find (top::stack) =
|
||||
if R.farEnoughWindow {startAt = TOf top, endAt = TOf rightMost}
|
||||
then (top, rightMost)
|
||||
else find stack
|
||||
in
|
||||
find revStack
|
||||
end
|
||||
|
||||
fun tryRepair (eh, cont) t =
|
||||
(case SMLofNJ.Cont.callcc (fn k => (setCont (eh, SOME k); NONE))
|
||||
of NONE =>
|
||||
(* first time through, try the repair *)
|
||||
SMLofNJ.Cont.throw cont t
|
||||
| SOME t' =>
|
||||
(* second time through, return the new right-most strm *)
|
||||
(setCont (eh, NONE); t')
|
||||
(* end case *))
|
||||
|
||||
fun primaryRepair (eh, stack) = let
|
||||
val ((leftT, leftCont), (rightT, rightCont)) =
|
||||
findWindow stack
|
||||
val repair = R.chooseRepair {
|
||||
startAt = leftT,
|
||||
endAt = rightT,
|
||||
try = tryRepair (eh, leftCont)
|
||||
}
|
||||
in case repair
|
||||
of SOME {repair, repaired} =>
|
||||
SOME (repair, leftCont, repaired)
|
||||
| NONE => NONE
|
||||
end
|
||||
|
||||
fun secondaryRepair (eh, revStack) = let
|
||||
val stack = rev revStack
|
||||
val (errStrm, errCont) = hd stack
|
||||
fun try ((strm, cont), strm', next) = let
|
||||
val strm'' = tryRepair (eh, cont) strm'
|
||||
in case (R.tryDeletion {oldStartAt = strm,
|
||||
startAt = strm',
|
||||
endAt = strm''})
|
||||
of SOME r => SOME (r, cont, strm')
|
||||
| NONE => next()
|
||||
end
|
||||
fun rightRepair (strm, n) =
|
||||
if n = 0 then NONE
|
||||
else let
|
||||
val strm' = R.skip (strm, 1)
|
||||
in
|
||||
try (hd stack, strm', fn () => rightRepair (strm', n-1))
|
||||
end
|
||||
fun leftRightRepair (strm, []) =
|
||||
if R.isEmpty strm then
|
||||
(addRepair (eh, (R.getPos errStrm,
|
||||
Repair.FailureAt (#1 (R.get1 errStrm))));
|
||||
raise UnrepairableError)
|
||||
else leftRightRepair (R.skip (strm, 1), stack)
|
||||
| leftRightRepair (strm, top::stack) =
|
||||
try (top, strm, fn () => leftRightRepair (strm, stack))
|
||||
in case rightRepair (errStrm, 5)
|
||||
of SOME r => r
|
||||
| _ => valOf (leftRightRepair (errStrm, []))
|
||||
end
|
||||
|
||||
fun repair (eh, stack) = (case primaryRepair (eh, stack)
|
||||
of SOME r => r
|
||||
| NONE => secondaryRepair (eh, stack)
|
||||
(* end case *))
|
||||
|
||||
fun launch eh f t = let
|
||||
val (x, _, t') = wrap eh f t
|
||||
handle JumpOut stack => let
|
||||
val (r, cont, t') = repair (eh, stack)
|
||||
in
|
||||
addRepair (eh, r);
|
||||
SMLofNJ.Cont.throw cont t'
|
||||
end
|
||||
in
|
||||
throwIfEH (eh, t');
|
||||
(SOME x, t', getRepairs eh)
|
||||
end
|
||||
handle UnrepairableError =>
|
||||
(NONE, t, getRepairs eh)
|
||||
*)
|
||||
|
||||
(*
|
||||
fun tryProds eh prods strm = let
|
||||
fun try [] = raise RepairableError
|
||||
| try (prod :: prods) = let
|
||||
val state = (getGet eh) ()
|
||||
in
|
||||
whileDisabled eh (fn () => prod strm)
|
||||
handle _ =>
|
||||
(getPut eh state;
|
||||
try (prods))
|
||||
end
|
||||
in
|
||||
try prods
|
||||
end
|
||||
*)
|
||||
|
||||
end
|
|
@ -0,0 +1,41 @@
|
|||
(* ml-lpt-lib.cm
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Sources file for ml-lpt lib
|
||||
*)
|
||||
|
||||
Library
|
||||
|
||||
signature ANTLR_LEXER
|
||||
signature ANTLR_TOKENS
|
||||
|
||||
structure AntlrRepair
|
||||
structure AntlrStreamPos
|
||||
structure ULexBuffer
|
||||
|
||||
functor AntlrEBNF
|
||||
functor AntlrErrHandler
|
||||
|
||||
is
|
||||
|
||||
$/basis.cm
|
||||
$/smlnj-lib.cm
|
||||
ebnf.sml
|
||||
err-handler.sml
|
||||
antlr-lexer-sig.sml
|
||||
repair.sml
|
||||
|
||||
(*
|
||||
repairable-strm-sig.sml
|
||||
repairable-strm.sml
|
||||
*)
|
||||
wrapped-strm.sml
|
||||
|
||||
stream-pos.sml
|
||||
antlr-tokens-sig.sml
|
||||
|
||||
ulex-buffer.sml
|
|
@ -0,0 +1,38 @@
|
|||
(* ml-lpt-lib.mlb
|
||||
*
|
||||
* COPYRIGHT (c) 2008
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* MLB file for ml-lpt lib
|
||||
*)
|
||||
|
||||
local
|
||||
|
||||
$(SML_LIB)/basis/basis.mlb
|
||||
$(SML_LIB)/basis/sml-nj.mlb
|
||||
$(SML_LIB)/smlnj-lib/Util/smlnj-lib.mlb
|
||||
|
||||
stream-pos.sml
|
||||
ebnf.sml
|
||||
antlr-lexer-sig.sml
|
||||
antlr-tokens-sig.sml
|
||||
repair.sml
|
||||
wrapped-strm.sml
|
||||
err-handler.sml
|
||||
ulex-buffer.sml
|
||||
|
||||
in
|
||||
|
||||
signature ANTLR_LEXER
|
||||
signature ANTLR_TOKENS
|
||||
|
||||
structure AntlrRepair
|
||||
structure AntlrStreamPos
|
||||
structure ULexBuffer
|
||||
|
||||
functor AntlrEBNF
|
||||
functor AntlrErrHandler
|
||||
|
||||
end
|
|
@ -0,0 +1,38 @@
|
|||
(* repair.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Representation and pretty-printing of ml-antlr repair actions
|
||||
*)
|
||||
|
||||
structure AntlrRepair = struct
|
||||
|
||||
datatype 'a repair_action
|
||||
= Insert of 'a list
|
||||
| Delete of 'a list
|
||||
| Subst of {
|
||||
old : 'a list,
|
||||
new : 'a list
|
||||
}
|
||||
| FailureAt of 'a
|
||||
|
||||
type 'a repair = AntlrStreamPos.pos * 'a repair_action
|
||||
|
||||
fun actionToString tokToString repair = let
|
||||
val toksToString = (String.concatWith " ") o (map tokToString)
|
||||
in case repair
|
||||
of Insert toks => "try inserting " ^ toksToString toks
|
||||
| Delete toks => "try deleting " ^ toksToString toks
|
||||
| Subst {old, new} =>
|
||||
"try substituting " ^ toksToString new ^ " for "
|
||||
^ toksToString old
|
||||
| FailureAt tok => "syntax error at " ^ toksToString [tok]
|
||||
end
|
||||
|
||||
fun repairToString tokToString sm (pos, repair) =
|
||||
(AntlrStreamPos.toString sm pos ^ ": " ^ actionToString tokToString repair)
|
||||
|
||||
end
|
|
@ -0,0 +1,112 @@
|
|||
(* stream-pos.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Very simple position tracking and source maps for ml-ulex/ml-antlr
|
||||
*)
|
||||
|
||||
structure AntlrStreamPos :> sig
|
||||
|
||||
type pos = Position.int
|
||||
type span = pos * pos
|
||||
type sourceloc = { fileName : string option, lineNo : int, colNo : int }
|
||||
type sourcemap
|
||||
|
||||
exception PosMustIncrease
|
||||
|
||||
(* the result of moving forward an integer number of characters *)
|
||||
val forward : pos * int -> pos
|
||||
|
||||
val mkSourcemap : unit -> sourcemap
|
||||
val mkSourcemap' : string -> sourcemap
|
||||
|
||||
val same : sourcemap * sourcemap -> bool
|
||||
|
||||
(* log a new line occurence *)
|
||||
val markNewLine : sourcemap -> pos -> unit
|
||||
(* resychronize to a full source location *)
|
||||
val resynch : sourcemap -> pos * sourceloc -> unit
|
||||
|
||||
val sourceLoc : sourcemap -> pos -> sourceloc
|
||||
val fileName : sourcemap -> pos -> string option
|
||||
val lineNo : sourcemap -> pos -> int
|
||||
val colNo : sourcemap -> pos -> int
|
||||
val toString : sourcemap -> pos -> string
|
||||
val spanToString : sourcemap -> span -> string
|
||||
|
||||
end = struct
|
||||
|
||||
type pos = Position.int
|
||||
type span = pos * pos
|
||||
type sourceloc = { fileName : string option, lineNo : int, colNo : int }
|
||||
type sourcemap = (sourceloc * pos) list ref
|
||||
|
||||
exception PosMustIncrease
|
||||
|
||||
fun forward (p, i) = p + (Position.fromInt i)
|
||||
|
||||
fun mkSrcMap fileOpt = ref [
|
||||
({fileName = fileOpt, lineNo = 1, colNo = 0}, Position.fromInt ~1)
|
||||
]
|
||||
|
||||
fun mkSourcemap () = mkSrcMap NONE
|
||||
fun mkSourcemap' (fname) = mkSrcMap (SOME fname)
|
||||
|
||||
fun same (sm1 : sourcemap, sm2) = (sm1 = sm2)
|
||||
|
||||
fun markNewLine sm (newPos : pos) = let
|
||||
val ({fileName, lineNo, colNo}, pos) = hd (!sm)
|
||||
in
|
||||
if pos < newPos then
|
||||
sm := ({ fileName = fileName,
|
||||
lineNo = lineNo + 1,
|
||||
colNo = 0},
|
||||
newPos)::(!sm)
|
||||
else () (* raise PosMustIncrease *)
|
||||
end
|
||||
|
||||
fun resynch sm (newPos : pos, sourceLoc) = let
|
||||
val (_, pos) = hd (!sm)
|
||||
in
|
||||
(* if pos < newPos then *)
|
||||
sm := (sourceLoc, newPos)::(!sm)
|
||||
(* else raise PosMustIncrease *)
|
||||
end
|
||||
|
||||
fun findLB ((loc, pos)::sm, pos' : pos) =
|
||||
if pos <= pos' then (loc, pos)
|
||||
else findLB(sm, pos')
|
||||
| findLB _ = raise Fail "impossible"
|
||||
|
||||
fun sourceLoc sm pos = let
|
||||
val ({fileName, lineNo, colNo}, anchor) = findLB(!sm, pos)
|
||||
in
|
||||
{fileName = fileName, lineNo = lineNo,
|
||||
colNo = colNo + Position.toInt(pos - anchor)}
|
||||
end
|
||||
fun fileName sm pos = #fileName (sourceLoc sm pos)
|
||||
fun lineNo sm pos = #lineNo (sourceLoc sm pos)
|
||||
fun colNo sm pos = #colNo (sourceLoc sm pos)
|
||||
fun toString sm pos = String.concat [
|
||||
"[", case fileName sm pos
|
||||
of NONE => ""
|
||||
| SOME f => f ^ ":",
|
||||
Int.toString (lineNo sm pos), ".",
|
||||
Int.toString (colNo sm pos), "]"]
|
||||
fun spanToString sm (pos1, pos2) =
|
||||
if lineNo sm pos1 = lineNo sm pos2 andalso
|
||||
colNo sm pos1 = colNo sm pos2
|
||||
then toString sm pos1
|
||||
else String.concat [
|
||||
"[", case fileName sm pos1
|
||||
of NONE => ""
|
||||
| SOME f => f ^ ":",
|
||||
Int.toString (lineNo sm pos1), ".",
|
||||
Int.toString (colNo sm pos1), "-",
|
||||
Int.toString (lineNo sm pos2), ".",
|
||||
Int.toString (colNo sm pos2), "]"]
|
||||
|
||||
end
|
|
@ -0,0 +1,88 @@
|
|||
(* ulex-buffer.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* Forward-chained buffers for lexing
|
||||
*)
|
||||
|
||||
structure ULexBuffer : sig
|
||||
|
||||
type stream
|
||||
val mkStream : (AntlrStreamPos.pos * (unit -> string)) -> stream
|
||||
val getc : stream -> (Char.char * stream) option
|
||||
val getpos : stream -> AntlrStreamPos.pos
|
||||
val subtract : stream * stream -> Substring.substring
|
||||
val eof : stream -> bool
|
||||
val lastWasNL : stream -> bool
|
||||
|
||||
end = struct
|
||||
|
||||
datatype stream = S of (buf * int * bool)
|
||||
and buf = B of {
|
||||
data : string,
|
||||
basePos : AntlrStreamPos.pos,
|
||||
more : more ref,
|
||||
input : unit -> string
|
||||
}
|
||||
and more = UNKNOWN | YES of buf | NO
|
||||
|
||||
fun mkStream (pos, input) =
|
||||
(S (B {data = "", basePos = pos,
|
||||
more = ref UNKNOWN,
|
||||
input = input},
|
||||
0, true))
|
||||
|
||||
fun getc (S (buf as B {data, basePos, more, input}, pos, lastWasNL)) =
|
||||
if pos < String.size data then let
|
||||
val c = String.sub (data, pos)
|
||||
in
|
||||
SOME (c, S (buf, pos+1, c = #"\n"))
|
||||
end
|
||||
else (case !more
|
||||
of NO => NONE
|
||||
| YES buf' => getc (S (buf', 0, lastWasNL))
|
||||
| UNKNOWN =>
|
||||
(case input()
|
||||
of "" => (more := NO; NONE)
|
||||
| data' => let
|
||||
val buf' = B {
|
||||
data = data',
|
||||
basePos = AntlrStreamPos.forward (basePos, String.size data),
|
||||
more = ref UNKNOWN,
|
||||
input = input
|
||||
}
|
||||
in
|
||||
more := YES buf';
|
||||
getc (S (buf', 0, lastWasNL))
|
||||
end
|
||||
(* end case *))
|
||||
(* end case *))
|
||||
|
||||
fun getpos (S (B {basePos, ...}, pos, _)) = AntlrStreamPos.forward (basePos, pos)
|
||||
|
||||
fun subtract (new, old) = let
|
||||
val (S (B {data = ndata, basePos = nbasePos, ...}, npos, _)) = new
|
||||
val (S (B {data = odata, basePos = obasePos,
|
||||
more, input}, opos, _)) = old
|
||||
in
|
||||
if nbasePos = obasePos then
|
||||
Substring.substring (ndata, opos, npos-opos)
|
||||
else case !more
|
||||
of NO => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
|
||||
| UNKNOWN => raise Fail "BUG: ULexBuffer.subtract, but buffers are unrelated"
|
||||
| YES buf =>
|
||||
Substring.extract (
|
||||
Substring.concat [
|
||||
Substring.extract (odata, opos, NONE),
|
||||
subtract (new, S (buf, 0, false))],
|
||||
0, NONE)
|
||||
end
|
||||
|
||||
fun eof s = not (isSome (getc s))
|
||||
|
||||
fun lastWasNL (S (_, _, lastWasNL)) = lastWasNL
|
||||
|
||||
end
|
|
@ -0,0 +1,110 @@
|
|||
(* wrapped-strm.sml
|
||||
*
|
||||
* COPYRIGHT (c) 2006
|
||||
* John Reppy (http://www.cs.uchicago.edu/~jhr)
|
||||
* Aaron Turon (http://www.cs.uchicago.edu/~adrassi)
|
||||
* All rights reserved.
|
||||
*
|
||||
* "wrapped" streams, which track the number of tokens read
|
||||
* and allow "prepending" a sequence of tokens.
|
||||
*)
|
||||
|
||||
functor AntlrWrappedStream (
|
||||
structure Tok : ANTLR_TOKENS
|
||||
structure Lex : ANTLR_LEXER
|
||||
) :> sig
|
||||
|
||||
type tok_pos = Int.int (* position in terms of number of tokens *)
|
||||
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||
|
||||
type repairs
|
||||
val addRepair : repairs * tok_pos * Tok.token AntlrRepair.repair -> repairs
|
||||
|
||||
type repair_state
|
||||
val mkRepairState : unit -> repair_state
|
||||
val getRepairs : repair_state -> repairs
|
||||
val setRepairs : repair_state * repairs -> unit
|
||||
val maxRepairPos : repair_state -> tok_pos
|
||||
|
||||
type wstream
|
||||
val wrap : repair_state * Lex.strm * lexer -> wstream
|
||||
val unwrap : wstream -> Lex.strm * Tok.token AntlrRepair.repair list
|
||||
|
||||
val get1 : wstream -> Tok.token * AntlrStreamPos.span * wstream
|
||||
val getPos : wstream -> AntlrStreamPos.pos
|
||||
val getSpan : wstream -> AntlrStreamPos.span
|
||||
val getTokPos : wstream -> tok_pos
|
||||
|
||||
end = struct
|
||||
|
||||
type tok_pos = Int.int (* position in terms of number of tokens *)
|
||||
type repair = tok_pos * Tok.token AntlrRepair.repair
|
||||
type repairs = repair list
|
||||
type repair_state = repairs ref (* invariant: at most one repair per tok_pos *)
|
||||
type lexer = Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm
|
||||
|
||||
datatype global_state = GS of {
|
||||
lex : (Lex.strm -> Tok.token * AntlrStreamPos.span * Lex.strm),
|
||||
repairs : repair_state
|
||||
}
|
||||
|
||||
datatype wstream = WSTREAM of {
|
||||
curTok : tok_pos,
|
||||
strm : Lex.strm,
|
||||
gs : global_state
|
||||
}
|
||||
|
||||
fun mkRepairState() = ref []
|
||||
fun getRepairs repairs = !repairs
|
||||
fun setRepairs (repairs, new) = repairs := new
|
||||
fun maxRepairPos (ref []) = ~1
|
||||
| maxRepairPos (ref ((p, _)::_)) = p
|
||||
|
||||
open AntlrRepair
|
||||
|
||||
fun addRepair (rs, pos, r) =
|
||||
if pos > maxRepairPos (ref rs) then (pos, r)::rs
|
||||
else raise Fail (String.concat [
|
||||
"bug: repairs not monotonic adding at ",
|
||||
Int.toString pos, " to a max pos of ",
|
||||
Int.toString (maxRepairPos (ref rs))])
|
||||
|
||||
fun wrap (repairs, strm, lex) =
|
||||
WSTREAM {strm = strm, curTok = 0, gs = GS {lex = lex, repairs = repairs}}
|
||||
fun unwrap (WSTREAM {strm, gs = GS {repairs, ...}, ...}) =
|
||||
(strm, rev (#2 (ListPair.unzip (!repairs))))
|
||||
|
||||
fun skip1 lex strm = let
|
||||
val (_, _, strm') = lex strm
|
||||
in strm' end
|
||||
fun get1 (WSTREAM {strm, curTok, gs = gs as GS {lex, repairs}}) = let
|
||||
fun findRepair [] = NONE
|
||||
| findRepair ((pos, r)::rs) = if curTok = pos then SOME r
|
||||
else findRepair rs
|
||||
in case findRepair (!repairs)
|
||||
of NONE => let
|
||||
val (tok, span, strm') = lex strm
|
||||
in
|
||||
(tok, span, WSTREAM {strm = strm', curTok = curTok + 1, gs = gs})
|
||||
end
|
||||
| SOME (p, Insert [tok]) =>
|
||||
(tok, (p, p), WSTREAM {strm = strm, curTok = curTok + 1, gs = gs})
|
||||
| SOME (p, Delete toks) => let
|
||||
val strm' = foldl (fn (_, s) => (skip1 lex) s) strm toks
|
||||
val (tok, span, strm'') = lex strm'
|
||||
in
|
||||
(tok, span, WSTREAM {strm = strm'', curTok = curTok + 1, gs = gs})
|
||||
end
|
||||
| SOME (p, Subst {old = [old], new = [new]}) =>
|
||||
(new, (p, p), WSTREAM {strm = skip1 lex strm, curTok = curTok + 1, gs = gs})
|
||||
| SOME (p, FailureAt _) => raise Fail "bug: findRepair found FailureAt"
|
||||
| _ => raise Fail "bug: unimplemented"
|
||||
end
|
||||
|
||||
(* get position AFTER trimming whitespace *)
|
||||
fun getPos ws = let val (_, (left, _), _) = get1 ws in left end
|
||||
fun getSpan ws = (getPos ws, getPos ws)
|
||||
fun getTokPos (WSTREAM {curTok, ...}) = curTok
|
||||
|
||||
|
||||
end
|
Loading…
Reference in New Issue