(*--------------------------------------------------------------------------*) (* Functor: SymbolTable *) (* *) (* Exceptions raised by functions in this structure: *) (* getSymIndex : Key.InternalError *) (* getSymKey : NoSuchSymbol *) (* hasSymIndex : none *) (* makeSymTable : none *) (* nullSymTable : none *) (* printSymTable : none *) (* usedSymbols : none *) (*--------------------------------------------------------------------------*) (* A symbol table maps Keys to consecutive integers. *) (*--------------------------------------------------------------------------*) signature SymTable = sig type Key type SymTable exception NoSuchSymbol val nullSymTable : string -> SymTable val makeSymTable : string * int -> SymTable val clearSymTable : SymTable * int option -> unit val hasSymIndex : SymTable * Key -> int option val getSymIndex : SymTable * Key -> int val getSymKey : SymTable * int -> Key val usedSymbols : SymTable -> int val assignSymIndex : SymTable * Key * int -> unit val reserveSymIndex : SymTable -> int val extractSymTable : SymTable -> Key vector val printSymTable : SymTable -> unit end functor SymTable (structure Key : Key) : SymTable = struct open UtilError UtilInt exception NoSuchSymbol type Key = Key.Key (*--------------------------------------------------------------------*) (* a symbol table can have at most size MAX_WIDTH. This is because *) (* arrays may at most have Array.maxLen elements. We only use powers *) (* of two as sizes, so we are really only interested in the position *) (* of maxLen's highest bit. That would be the maximal width for hash *) (* tables, and thus we must decrease it by one for obtaining the max *) (* table width. *) (*--------------------------------------------------------------------*) fun highestBit w = if w=0w0 then 0 else 1+highestBit(Word.>>(w,0w1)) val MAX_WIDTH = highestBit (Word.fromInt Array.maxLen)-1 type Bucket = (Key * int) list val nullBucket = nil : Bucket (*--------------------------------------------------------------------*) (* buckets are sorted - though they are probably small. *) (*--------------------------------------------------------------------*) fun addToBucket (ni as (key,_),bucket) = let fun doit nil = [ni] | doit (nis as (ni' as (key',_))::rest) = case Key.compare (key',key) of LESS => ni'::doit rest | EQUAL => ni::rest | GREATER => ni::nis in doit bucket end fun searchBucket (key,bucket) = let fun doit nil = NONE | doit ((key',i)::rest) = case Key.compare (key',key) of LESS => doit rest | EQUAL => SOME i | GREATER => NONE in doit bucket end (*--------------------------------------------------------------------*) (* a symbol table consists of *) (* - an array tab holding for each index its key *) (* - a hash table, i.e. Bucket array, of double size than tab *) (* - a hashFun mapping Key to the range of the hash table *) (* - an integer width for computing table sizes *) (* - an integer size wich is the size of the value table *) (* - an integer count holding the next free index *) (*--------------------------------------------------------------------*) type SymTable = {desc : string, tab : Key array Unsynchronized.ref, hash : Bucket array Unsynchronized.ref, hashFun : (Key -> int) Unsynchronized.ref, width : int Unsynchronized.ref, (* bit width *) size : int Unsynchronized.ref, (* tab size=2^width, hash size is double *) count : int Unsynchronized.ref (* number of entries *) } fun nullSymTable desc = {desc = desc, tab = Unsynchronized.ref (Array.array(1,Key.null)), hash = Unsynchronized.ref (Array.array(2,nullBucket)), hashFun = Unsynchronized.ref (fn _ => 0), count = Unsynchronized.ref 0, size = Unsynchronized.ref 1, width = Unsynchronized.ref 0} : SymTable (*--------------------------------------------------------------------*) (* how many entries are in the symtable? *) (*--------------------------------------------------------------------*) fun usedSymbols ({count,...}:SymTable) = !count (*--------------------------------------------------------------------*) (* what is the table load, i.e. percentage of number of entries to *) (* hash table size = 100*count/(2*size) = 50*count/size. *) (*--------------------------------------------------------------------*) fun hashRatio({count,size,...}:SymTable) = 50 * !count div !size handle Div => 100 (*--------------------------------------------------------------------*) (* this is the hash function. Key.hash hashes data to arbitrary *) (* words, that are mapped to the hash range by this function, where *) (* mask is the bitmask corresponding to the size of the hash table: *) (* 1. square the word produced by Key.hash *) (* 2. take the width bits from the middle of the square, these are *) (* the bit-places influenced by all input bit-places: *) (* - shift to the right by half of the destination width *) (* - mask out all bits to the left of destination *) (* this is a simple strategy but experiences good results. *) (*--------------------------------------------------------------------*) fun square (x:word) = Word.*(x,x) fun hashKey(half,mask) x = Word.toInt(Word.andb(mask,Word.>>(square(Key.hash x),half))) fun makeHashFun(size,width) = let val mask = Word.fromInt(2*size-1) val half = Word.fromInt((width+1) div 2) in hashKey(half,mask) end (*--------------------------------------------------------------------*) (* create a new symtable for 2^w, but at least 2 and at most 2^m *) (* entries, where m is the value of MAX_WIDTH. *) (*--------------------------------------------------------------------*) fun makeSymTable (desc,w) = let val width= Int.min(Int.max(1,w),MAX_WIDTH) val size = Word.toInt(Word.<<(0w1,Word.fromInt(width-1))) in {desc = desc, tab = Unsynchronized.ref (Array.array(size,Key.null)), hash = Unsynchronized.ref (Array.array(2*size,nullBucket)), hashFun = Unsynchronized.ref (makeHashFun(size,width)), width = Unsynchronized.ref width, size = Unsynchronized.ref size, count = Unsynchronized.ref 0} end (*--------------------------------------------------------------------*) (* clear a dictionary. If the 2nd arg is SOME w, use w for resizing. *) (*--------------------------------------------------------------------*) fun clearSymTable (symTab:SymTable,widthOpt) = case widthOpt of NONE => let val {tab=Unsynchronized.ref tab,hash=Unsynchronized.ref hash,size,count,...} = symTab val _ = appInterval (fn i => Array.update(tab,i,Key.null)) (0,!count-1) val _ = appInterval (fn i => Array.update(hash,i,nullBucket)) (0,!size*2-1) in count := 0 end | SOME w => let val {tab,hash,hashFun,width,size,count,...} = symTab val newWidth = Int.min(Int.max(1,w),MAX_WIDTH) val newSize = Word.toInt(Word.<<(0w1,Word.fromInt(newWidth-1))) val _ = tab := (Array.array(newSize,Key.null)) val _ = hash := (Array.array(2*newSize,nullBucket)) val _ = hashFun := (makeHashFun(newSize,newWidth)) val _ = width := newWidth val _ = size := newSize in count := 0 end (*--------------------------------------------------------------------*) (* grow a symtable to the double size. raise InternalError if the *) (* table already has maximal size. *) (*--------------------------------------------------------------------*) fun growTable ({desc,tab,hash,hashFun,width,size,count}:SymTable) = let val newWidth = if !width < MAX_WIDTH then !width+1 else raise InternalError ("SymTable","growTable", String.concat ["growing the ",desc," symbol table ", "exceeded the system maximum size of ", Int.toString Array.maxLen," for arrays"]) val newSize = !size*2 val oldTab = !tab val newTab = Array.array(newSize,Key.null) val newHash = Array.array(2*newSize,nullBucket) val newHashFun = makeHashFun(newSize,newWidth) fun addToNew (inv as (i,key)) = let val idx = newHashFun key val _ = Array.update(newHash,idx,addToBucket((key,i),Array.sub(newHash,idx))) val _ = Array.update(newTab,i,key) in () end val _ = Array.appi addToNew (!tab) val _ = tab := newTab val _ = hash := newHash val _ = size := newSize val _ = width := newWidth val _ = hashFun := newHashFun in () end (*--------------------------------------------------------------------*) (* lookup the key for an index of the symbol table. *) (*--------------------------------------------------------------------*) fun getSymKey({tab,count,...}:SymTable,idx) = if !count>idx then Array.sub(!tab,idx) else raise NoSuchSymbol (*--------------------------------------------------------------------*) (* map a Key to its index in the symbol table. if it is not in the *) (* symbol table yet, add a new entry with a new index. grow the table *) (* if there is no more free index in the table. *) (*--------------------------------------------------------------------*) fun getSymIndex(st as {tab,hash,hashFun,size,count,...}:SymTable,key) = let val idx = !hashFun key val bucket = Array.sub(!hash,idx) in case searchBucket(key,bucket) of SOME i => i | NONE => let val i = !count val (idx',buck') = if !size>i then (idx,bucket) else let val _ = growTable st val idx' = !hashFun key val buck' = Array.sub(!hash,idx') in (idx',buck') end val _ = Array.update(!hash,idx',addToBucket((key,i),buck')) val _ = Array.update(!tab,i,key) val _ = count := i+1 in i end end (*--------------------------------------------------------------------*) (* does a Key have an entry in a symbol table? *) (*--------------------------------------------------------------------*) fun hasSymIndex({hash,hashFun,...}:SymTable,key) = let val idx = !hashFun key val buck = Array.sub(!hash,idx) in searchBucket(key,buck) end (*--------------------------------------------------------------------*) (* reserve an index for a (yet unknown) key. *) (*--------------------------------------------------------------------*) fun reserveSymIndex(st as {size,count=count as Unsynchronized.ref i,...}:SymTable) = let val _ = if !size>i then () else growTable st val _ = count := i+1 in i end (*--------------------------------------------------------------------*) (* assign an index to a (previously reserved) index. *) (*--------------------------------------------------------------------*) fun assignSymIndex(st as {count,hash,hashFun,tab,...}:SymTable,key,i) = if !count<=i then raise NoSuchSymbol else let val idx = !hashFun key val buck = Array.sub(!hash,idx) val newBuck = addToBucket((key,i),buck) val _ = Array.update(!hash,idx,newBuck) val _ = Array.update(!tab,i,key) in () end (*--------------------------------------------------------------------*) (* extract the contents of a symbol table to a vector. *) (*--------------------------------------------------------------------*) fun extractSymTable({count,tab,...}:SymTable) = ArraySlice.vector(ArraySlice.slice(!tab,0,SOME(!count))) (*--------------------------------------------------------------------*) (* print the contents of the symbol table. *) (*--------------------------------------------------------------------*) fun printSymTable ({desc,tab,count,...}:SymTable) = (print (desc^" table:\n"); ArraySlice.appi (fn (n,key) => print (" "^Int.toString n^": "^Key.toString key^"\n")) (ArraySlice.slice(!tab,0,SOME (!count)))) end