315 lines
14 KiB
Standard ML
315 lines
14 KiB
Standard ML
|
|
|
|
|
|
|
|
|
|
|
|
(*--------------------------------------------------------------------------*)
|
|
(* Functor: SymbolTable *)
|
|
(* *)
|
|
(* Exceptions raised by functions in this structure: *)
|
|
(* getSymIndex : Key.InternalError *)
|
|
(* getSymKey : NoSuchSymbol *)
|
|
(* hasSymIndex : none *)
|
|
(* makeSymTable : none *)
|
|
(* nullSymTable : none *)
|
|
(* printSymTable : none *)
|
|
(* usedSymbols : none *)
|
|
(*--------------------------------------------------------------------------*)
|
|
(* A symbol table maps Keys to consecutive integers. *)
|
|
(*--------------------------------------------------------------------------*)
|
|
signature SymTable =
|
|
sig
|
|
type Key
|
|
type SymTable
|
|
|
|
exception NoSuchSymbol
|
|
|
|
val nullSymTable : string -> SymTable
|
|
val makeSymTable : string * int -> SymTable
|
|
val clearSymTable : SymTable * int option -> unit
|
|
|
|
val hasSymIndex : SymTable * Key -> int option
|
|
val getSymIndex : SymTable * Key -> int
|
|
val getSymKey : SymTable * int -> Key
|
|
val usedSymbols : SymTable -> int
|
|
|
|
val assignSymIndex : SymTable * Key * int -> unit
|
|
val reserveSymIndex : SymTable -> int
|
|
|
|
val extractSymTable : SymTable -> Key vector
|
|
val printSymTable : SymTable -> unit
|
|
end
|
|
|
|
functor SymTable (structure Key : Key) : SymTable =
|
|
struct
|
|
open UtilError UtilInt
|
|
|
|
exception NoSuchSymbol
|
|
|
|
type Key = Key.Key
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* a symbol table can have at most size MAX_WIDTH. This is because *)
|
|
(* arrays may at most have Array.maxLen elements. We only use powers *)
|
|
(* of two as sizes, so we are really only interested in the position *)
|
|
(* of maxLen's highest bit. That would be the maximal width for hash *)
|
|
(* tables, and thus we must decrease it by one for obtaining the max *)
|
|
(* table width. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun highestBit w = if w=0w0 then 0 else 1+highestBit(Word.>>(w,0w1))
|
|
val MAX_WIDTH = highestBit (Word.fromInt Array.maxLen)-1
|
|
|
|
type Bucket = (Key * int) list
|
|
val nullBucket = nil : Bucket
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* buckets are sorted - though they are probably small. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun addToBucket (ni as (key,_),bucket) =
|
|
let
|
|
fun doit nil = [ni]
|
|
| doit (nis as (ni' as (key',_))::rest) =
|
|
case Key.compare (key',key)
|
|
of LESS => ni'::doit rest
|
|
| EQUAL => ni::rest
|
|
| GREATER => ni::nis
|
|
in
|
|
doit bucket
|
|
end
|
|
fun searchBucket (key,bucket) =
|
|
let
|
|
fun doit nil = NONE
|
|
| doit ((key',i)::rest) =
|
|
case Key.compare (key',key)
|
|
of LESS => doit rest
|
|
| EQUAL => SOME i
|
|
| GREATER => NONE
|
|
in
|
|
doit bucket
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* a symbol table consists of *)
|
|
(* - an array tab holding for each index its key *)
|
|
(* - a hash table, i.e. Bucket array, of double size than tab *)
|
|
(* - a hashFun mapping Key to the range of the hash table *)
|
|
(* - an integer width for computing table sizes *)
|
|
(* - an integer size wich is the size of the value table *)
|
|
(* - an integer count holding the next free index *)
|
|
(*--------------------------------------------------------------------*)
|
|
type SymTable = {desc : string,
|
|
tab : Key array Unsynchronized.ref,
|
|
hash : Bucket array Unsynchronized.ref,
|
|
hashFun : (Key -> int) Unsynchronized.ref,
|
|
width : int Unsynchronized.ref, (* bit width *)
|
|
size : int Unsynchronized.ref, (* tab size=2^width, hash size is double *)
|
|
count : int Unsynchronized.ref (* number of entries *)
|
|
}
|
|
|
|
fun nullSymTable desc = {desc = desc,
|
|
tab = Unsynchronized.ref (Array.array(1,Key.null)),
|
|
hash = Unsynchronized.ref (Array.array(2,nullBucket)),
|
|
hashFun = Unsynchronized.ref (fn _ => 0),
|
|
count = Unsynchronized.ref 0,
|
|
size = Unsynchronized.ref 1,
|
|
width = Unsynchronized.ref 0} : SymTable
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* how many entries are in the symtable? *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun usedSymbols ({count,...}:SymTable) = !count
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* what is the table load, i.e. percentage of number of entries to *)
|
|
(* hash table size = 100*count/(2*size) = 50*count/size. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun hashRatio({count,size,...}:SymTable) = 50 * !count div !size
|
|
handle Div => 100
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* this is the hash function. Key.hash hashes data to arbitrary *)
|
|
(* words, that are mapped to the hash range by this function, where *)
|
|
(* mask is the bitmask corresponding to the size of the hash table: *)
|
|
(* 1. square the word produced by Key.hash *)
|
|
(* 2. take the width bits from the middle of the square, these are *)
|
|
(* the bit-places influenced by all input bit-places: *)
|
|
(* - shift to the right by half of the destination width *)
|
|
(* - mask out all bits to the left of destination *)
|
|
(* this is a simple strategy but experiences good results. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun square (x:word) = Word.*(x,x)
|
|
fun hashKey(half,mask) x =
|
|
Word.toInt(Word.andb(mask,Word.>>(square(Key.hash x),half)))
|
|
fun makeHashFun(size,width) =
|
|
let
|
|
val mask = Word.fromInt(2*size-1)
|
|
val half = Word.fromInt((width+1) div 2)
|
|
in
|
|
hashKey(half,mask)
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* create a new symtable for 2^w, but at least 2 and at most 2^m *)
|
|
(* entries, where m is the value of MAX_WIDTH. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun makeSymTable (desc,w) =
|
|
let
|
|
val width= Int.min(Int.max(1,w),MAX_WIDTH)
|
|
val size = Word.toInt(Word.<<(0w1,Word.fromInt(width-1)))
|
|
in {desc = desc,
|
|
tab = Unsynchronized.ref (Array.array(size,Key.null)),
|
|
hash = Unsynchronized.ref (Array.array(2*size,nullBucket)),
|
|
hashFun = Unsynchronized.ref (makeHashFun(size,width)),
|
|
width = Unsynchronized.ref width,
|
|
size = Unsynchronized.ref size,
|
|
count = Unsynchronized.ref 0}
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* clear a dictionary. If the 2nd arg is SOME w, use w for resizing. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun clearSymTable (symTab:SymTable,widthOpt) =
|
|
case widthOpt
|
|
of NONE =>
|
|
let
|
|
val {tab=Unsynchronized.ref tab,hash=Unsynchronized.ref hash,size,count,...} = symTab
|
|
val _ = appInterval (fn i => Array.update(tab,i,Key.null)) (0,!count-1)
|
|
val _ = appInterval (fn i => Array.update(hash,i,nullBucket)) (0,!size*2-1)
|
|
in
|
|
count := 0
|
|
end
|
|
| SOME w =>
|
|
let
|
|
val {tab,hash,hashFun,width,size,count,...} = symTab
|
|
val newWidth = Int.min(Int.max(1,w),MAX_WIDTH)
|
|
val newSize = Word.toInt(Word.<<(0w1,Word.fromInt(newWidth-1)))
|
|
val _ = tab := (Array.array(newSize,Key.null))
|
|
val _ = hash := (Array.array(2*newSize,nullBucket))
|
|
val _ = hashFun := (makeHashFun(newSize,newWidth))
|
|
val _ = width := newWidth
|
|
val _ = size := newSize
|
|
in
|
|
count := 0
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* grow a symtable to the double size. raise InternalError if the *)
|
|
(* table already has maximal size. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun growTable ({desc,tab,hash,hashFun,width,size,count}:SymTable) =
|
|
let
|
|
val newWidth = if !width < MAX_WIDTH then !width+1
|
|
else raise InternalError
|
|
("SymTable","growTable",
|
|
String.concat ["growing the ",desc," symbol table ",
|
|
"exceeded the system maximum size of ",
|
|
Int.toString Array.maxLen," for arrays"])
|
|
val newSize = !size*2
|
|
|
|
val oldTab = !tab
|
|
val newTab = Array.array(newSize,Key.null)
|
|
val newHash = Array.array(2*newSize,nullBucket)
|
|
val newHashFun = makeHashFun(newSize,newWidth)
|
|
|
|
fun addToNew (inv as (i,key)) =
|
|
let
|
|
val idx = newHashFun key
|
|
val _ = Array.update(newHash,idx,addToBucket((key,i),Array.sub(newHash,idx)))
|
|
val _ = Array.update(newTab,i,key)
|
|
in ()
|
|
end
|
|
val _ = Array.appi addToNew (!tab)
|
|
|
|
val _ = tab := newTab
|
|
val _ = hash := newHash
|
|
val _ = size := newSize
|
|
val _ = width := newWidth
|
|
val _ = hashFun := newHashFun
|
|
in ()
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* lookup the key for an index of the symbol table. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun getSymKey({tab,count,...}:SymTable,idx) =
|
|
if !count>idx then Array.sub(!tab,idx)
|
|
else raise NoSuchSymbol
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* map a Key to its index in the symbol table. if it is not in the *)
|
|
(* symbol table yet, add a new entry with a new index. grow the table *)
|
|
(* if there is no more free index in the table. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun getSymIndex(st as {tab,hash,hashFun,size,count,...}:SymTable,key) =
|
|
let
|
|
val idx = !hashFun key
|
|
val bucket = Array.sub(!hash,idx)
|
|
in
|
|
case searchBucket(key,bucket)
|
|
of SOME i => i
|
|
| NONE => let val i = !count
|
|
val (idx',buck') = if !size>i then (idx,bucket)
|
|
else let val _ = growTable st
|
|
val idx' = !hashFun key
|
|
val buck' = Array.sub(!hash,idx')
|
|
in (idx',buck')
|
|
end
|
|
val _ = Array.update(!hash,idx',addToBucket((key,i),buck'))
|
|
val _ = Array.update(!tab,i,key)
|
|
val _ = count := i+1
|
|
in i
|
|
end
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* does a Key have an entry in a symbol table? *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun hasSymIndex({hash,hashFun,...}:SymTable,key) =
|
|
let
|
|
val idx = !hashFun key
|
|
val buck = Array.sub(!hash,idx)
|
|
in
|
|
searchBucket(key,buck)
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* reserve an index for a (yet unknown) key. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun reserveSymIndex(st as {size,count=count as Unsynchronized.ref i,...}:SymTable) =
|
|
let
|
|
val _ = if !size>i then () else growTable st
|
|
val _ = count := i+1
|
|
in i
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* assign an index to a (previously reserved) index. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun assignSymIndex(st as {count,hash,hashFun,tab,...}:SymTable,key,i) =
|
|
if !count<=i then raise NoSuchSymbol
|
|
else let val idx = !hashFun key
|
|
val buck = Array.sub(!hash,idx)
|
|
val newBuck = addToBucket((key,i),buck)
|
|
val _ = Array.update(!hash,idx,newBuck)
|
|
val _ = Array.update(!tab,i,key)
|
|
in ()
|
|
end
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* extract the contents of a symbol table to a vector. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun extractSymTable({count,tab,...}:SymTable) =
|
|
ArraySlice.vector(ArraySlice.slice(!tab,0,SOME(!count)))
|
|
|
|
(*--------------------------------------------------------------------*)
|
|
(* print the contents of the symbol table. *)
|
|
(*--------------------------------------------------------------------*)
|
|
fun printSymTable ({desc,tab,count,...}:SymTable) =
|
|
(print (desc^" table:\n");
|
|
ArraySlice.appi
|
|
(fn (n,key) =>
|
|
print (" "^Int.toString n^": "^Key.toString key^"\n"))
|
|
(ArraySlice.slice(!tab,0,SOME (!count))))
|
|
end
|