| author | Christian Urban <urbanc@in.tum.de> | 
| Sun, 26 Apr 2009 00:42:49 +0200 | |
| changeset 30986 | 047fa04a9fe8 | 
| parent 28397 | 389c5e494605 | 
| child 32740 | 9dd0a2f83429 | 
| permissions | -rw-r--r-- | 
| 18449 | 1 | (* Modified for Poly/ML from SML/NJ Library version 0.2 | 
| 2 | * | |
| 3 | * COPYRIGHT (c) 1992 by AT&T Bell Laboratories. | |
| 4 | * See file mosml/copyrght/copyrght.att for details. | |
| 5 | * | |
| 6 | * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974 | |
| 18508 | 7 | * Current version from the Moscow ML distribution, copied by permission. | 
| 18449 | 8 | *) | 
| 9 | ||
| 10 | (* Polyhash -- polymorphic hashtables as in the SML/NJ Library *) | |
| 11 | ||
| 12 | signature Polyhash = | |
| 13 | sig | |
| 14 | ||
| 15 | type ('key, 'data) hash_table
 | |
| 16 | ||
| 17 | val mkTable     : ('_key -> int) * ('_key * '_key -> bool) -> int * exn 
 | |
| 18 |                   -> ('_key, '_data) hash_table
 | |
| 19 | val numItems    : ('key, 'data) hash_table -> int
 | |
| 20 | val insert      : ('_key, '_data) hash_table -> '_key * '_data -> unit
 | |
| 21 | val peekInsert  : ('_key, '_data) hash_table -> '_key * '_data 
 | |
| 22 | -> '_data option | |
| 23 | val find        : ('key, 'data) hash_table -> 'key -> 'data
 | |
| 24 | val peek        : ('key, 'data) hash_table -> 'key -> 'data option
 | |
| 25 | val remove      : ('key, 'data) hash_table -> 'key -> 'data
 | |
| 26 | val listItems   : ('key, 'data) hash_table -> ('key * 'data) list
 | |
| 27 | val apply       : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
 | |
| 28 | val map         : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table 
 | |
| 29 |                   -> ('_key, '_res) hash_table
 | |
| 30 | val filter      : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
 | |
| 31 | val transform   : ('data -> '_res) -> ('_key, 'data) hash_table 
 | |
| 32 |                   -> ('_key, '_res) hash_table
 | |
| 33 | val copy        : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
 | |
| 34 | val bucketSizes : ('key, 'data) hash_table -> int list
 | |
| 35 | ||
| 36 | (*Additions due to L. C. Paulson and Jia Meng*) | |
| 37 | val hashw : word * word -> word | |
| 38 | val hashw_char : Char.char * word -> word | |
| 20662 | 39 | val hashw_int : int * word -> word | 
| 18449 | 40 | val hashw_vector : word Vector.vector * word -> word | 
| 41 | val hashw_string : string * word -> word | |
| 42 | val hashw_strings : string list * word -> word | |
| 43 | val hash_string : string -> int | |
| 44 | ||
| 45 | (* | |
| 46 |    [('key, 'data) hash_table] is the type of hashtables with keys of type
 | |
| 47 | 'key and data values of type 'data. | |
| 48 | ||
| 49 | [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable, | |
| 50 | using hash function hashVal and equality predicate sameKey. The sz | |
| 51 | is a size hint, and exc is the exception raised by function find. | |
| 52 | It must be the case that sameKey(k1, k2) implies hashVal(k1) = | |
| 53 | hashVal(k2) for all k1,k2. | |
| 54 | ||
| 55 | [numItems htbl] is the number of items in the hash table. | |
| 56 | ||
| 57 | [insert htbl (k, d)] inserts data d for key k. If k already had an | |
| 58 | item associated with it, then the old item is overwritten. | |
| 59 | ||
| 60 | [find htbl k] returns d, where d is the data item associated with key k, | |
| 61 | or raises the exception (given at creation of htbl) if there is no such d. | |
| 62 | ||
| 63 | [peek htbl k] returns SOME d, where d is the data item associated with | |
| 64 | key k, or NONE if there is no such d. | |
| 65 | ||
| 66 | [peekInsert htbl (k, d)] inserts data d for key k, if k is not | |
| 67 | already in the table, returning NONE. If k is already in the | |
| 68 | table, and the associated data value is d', then returns SOME d' | |
| 69 | and leaves the table unmodified. | |
| 70 | ||
| 71 | [remove htbl k] returns d, where d is the data item associated with key k, | |
| 72 | removing d from the table; or raises the exception if there is no such d. | |
| 73 | ||
| 74 | [listItems htbl] returns a list of the (key, data) pairs in the hashtable. | |
| 75 | ||
| 76 | [apply f htbl] applies function f to all (key, data) pairs in the | |
| 77 | hashtable, in some order. | |
| 78 | ||
| 79 | [map f htbl] returns a new hashtable, whose data items have been | |
| 80 | obtained by applying f to the (key, data) pairs in htbl. The new | |
| 81 | tables have the same keys, hash function, equality predicate, and | |
| 82 | exception, as htbl. | |
| 83 | ||
| 84 | [filter p htbl] deletes from htbl all data items which do not | |
| 85 | satisfy predicate p. | |
| 86 | ||
| 87 | [transform f htbl] as map, but only the (old) data values are used | |
| 88 | when computing the new data values. | |
| 89 | ||
| 90 | [copy htbl] returns a complete copy of htbl. | |
| 91 | ||
| 92 | [bucketSizes htbl] returns a list of the sizes of the buckets. | |
| 93 | This is to allow users to gauge the quality of their hashing | |
| 94 | function. | |
| 95 | *) | |
| 96 | ||
| 97 | end | |
| 98 | ||
| 99 | ||
| 100 | structure Polyhash :> Polyhash = | |
| 101 | struct | |
| 102 | ||
| 103 | datatype ('key, 'data) bucket_t
 | |
| 104 | = NIL | |
| 105 |   | B of int * 'key * 'data * ('key, 'data) bucket_t
 | |
| 106 | ||
| 107 | datatype ('key, 'data) hash_table = 
 | |
| 108 |     HT of {hashVal   : 'key -> int,
 | |
| 109 | sameKey : 'key * 'key -> bool, | |
| 110 | not_found : exn, | |
| 111 | 	   table     : ('key, 'data) bucket_t Array.array ref,
 | |
| 112 | n_items : int ref} | |
| 113 | ||
| 114 | local | |
| 115 | (* | |
| 116 | prim_val andb_ : int -> int -> int = 2 "and"; | |
| 117 | prim_val lshift_ : int -> int -> int = 2 "shift_left"; | |
| 118 | *) | |
| 119 | fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y)); | |
| 120 | fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y)); | |
| 121 | in | |
| 122 | fun index (i, sz) = andb_ i (sz-1) | |
| 123 | ||
| 124 | (* find smallest power of 2 (>= 32) that is >= n *) | |
| 125 | fun roundUp n = | |
| 126 | let fun f i = if (i >= n) then i else f (lshift_ i 1) | |
| 127 | in f 32 end | |
| 128 | end; | |
| 129 | ||
| 130 | (* Create a new table; the int is a size hint and the exception | |
| 131 | * is to be raised by find. | |
| 132 | *) | |
| 133 |     fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
 | |
| 134 | hashVal=hashVal, | |
| 135 | sameKey=sameKey, | |
| 136 | not_found = notFound, | |
| 137 | table = ref (Array.array(roundUp sizeHint, NIL)), | |
| 138 | n_items = ref 0 | |
| 139 | }; | |
| 140 | ||
| 141 | (* conditionally grow a table *) | |
| 142 |     fun growTable (HT{table, n_items, ...}) = let
 | |
| 143 | val arr = !table | |
| 144 | val sz = Array.length arr | |
| 145 | in | |
| 146 | if (!n_items >= sz) | |
| 147 | then let | |
| 148 | val newSz = sz+sz | |
| 149 | val newArr = Array.array (newSz, NIL) | |
| 150 | fun copy NIL = () | |
| 151 | | copy (B(h, key, v, rest)) = let | |
| 152 | val indx = index (h, newSz) | |
| 153 | in | |
| 154 | Array.update (newArr, indx, | |
| 155 | B(h, key, v, Array.sub(newArr, indx))); | |
| 156 | copy rest | |
| 157 | end | |
| 158 | fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1)) | |
| 159 | in | |
| 28397 
389c5e494605
handle _ should be avoided (spurious Interrupt will spoil the game);
 wenzelm parents: 
20662diff
changeset | 160 | (bucket 0) handle _ => (); (* FIXME avoid handle _ *) | 
| 18449 | 161 | table := newArr | 
| 162 | end | |
| 163 | else () | |
| 164 | end (* growTable *); | |
| 165 | ||
| 166 | (* Insert an item. If the key already has an item associated with it, | |
| 167 | * then the old item is discarded. | |
| 168 | *) | |
| 169 |     fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
 | |
| 170 | let | |
| 171 | val arr = !table | |
| 172 | val sz = Array.length arr | |
| 173 | val hash = hashVal key | |
| 174 | val indx = index (hash, sz) | |
| 175 | fun look NIL = ( | |
| 176 | Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx))); | |
| 20416 | 177 | inc n_items; | 
| 18449 | 178 | growTable tbl; | 
| 179 | NIL) | |
| 180 | | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) | |
| 181 | then B(hash, key, item, r) | |
| 182 | else (case (look r) | |
| 183 | of NIL => NIL | |
| 184 | | rest => B(h, k, v, rest) | |
| 185 | (* end case *)) | |
| 186 | in | |
| 187 | case (look (Array.sub (arr, indx))) | |
| 188 | of NIL => () | |
| 189 | | b => Array.update(arr, indx, b) | |
| 190 | end; | |
| 191 | ||
| 192 | (* Insert an item if not there already; if it is there already, | |
| 193 | then return the old data value and leave the table unmodified.. | |
| 194 | *) | |
| 195 |     fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
 | |
| 196 | let val arr = !table | |
| 197 | val sz = Array.length arr | |
| 198 | val hash = hashVal key | |
| 199 | val indx = index (hash, sz) | |
| 200 | fun look NIL = | |
| 201 | (Array.update(arr, indx, B(hash, key, item, | |
| 202 | Array.sub(arr, indx))); | |
| 20416 | 203 | inc n_items; | 
| 18449 | 204 | growTable tbl; | 
| 205 | NONE) | |
| 206 | | look (B(h, k, v, r)) = | |
| 207 | if hash = h andalso sameKey(key, k) then SOME v | |
| 208 | else look r | |
| 209 | in | |
| 210 | look (Array.sub (arr, indx)) | |
| 211 | end; | |
| 212 | ||
| 213 | (* find an item, the table's exception is raised if the item doesn't exist *) | |
| 214 |     fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
 | |
| 215 | val arr = !table | |
| 216 | val sz = Array.length arr | |
| 217 | val hash = hashVal key | |
| 218 | val indx = index (hash, sz) | |
| 219 | fun look NIL = raise not_found | |
| 220 | | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) | |
| 221 | then v | |
| 222 | else look r | |
| 223 | in | |
| 224 | look (Array.sub (arr, indx)) | |
| 225 | end; | |
| 226 | ||
| 227 | (* look for an item, return NONE if the item doesn't exist *) | |
| 228 |     fun peek (HT{hashVal, sameKey, table, ...}) key = let
 | |
| 229 | val arr = !table | |
| 230 | val sz = Array.length arr | |
| 231 | val hash = hashVal key | |
| 232 | val indx = index (hash, sz) | |
| 233 | fun look NIL = NONE | |
| 234 | | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) | |
| 235 | then SOME v | |
| 236 | else look r | |
| 237 | in | |
| 238 | look (Array.sub (arr, indx)) | |
| 239 | end; | |
| 240 | ||
| 241 | (* Remove an item. The table's exception is raised if | |
| 242 | * the item doesn't exist. | |
| 243 | *) | |
| 244 |     fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
 | |
| 245 | val arr = !table | |
| 246 | val sz = Array.length arr | |
| 247 | val hash = hashVal key | |
| 248 | val indx = index (hash, sz) | |
| 249 | fun look NIL = raise not_found | |
| 250 | | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) | |
| 251 | then (v, r) | |
| 252 | else let val (item, r') = look r in (item, B(h, k, v, r')) end | |
| 253 | val (item, bucket) = look (Array.sub (arr, indx)) | |
| 254 | in | |
| 255 | Array.update (arr, indx, bucket); | |
| 256 | n_items := !n_items - 1; | |
| 257 | item | |
| 258 | end (* remove *); | |
| 259 | ||
| 260 | (* Return the number of items in the table *) | |
| 261 |    fun numItems (HT{n_items, ...}) = !n_items
 | |
| 262 | ||
| 263 | (* return a list of the items in the table *) | |
| 264 |     fun listItems (HT{table = ref arr, n_items, ...}) = let
 | |
| 265 | fun f (_, l, 0) = l | |
| 266 | | f (~1, l, _) = l | |
| 267 | | f (i, l, n) = let | |
| 268 | fun g (NIL, l, n) = f (i-1, l, n) | |
| 269 | | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1) | |
| 270 | in | |
| 271 | g (Array.sub(arr, i), l, n) | |
| 272 | end | |
| 273 | in | |
| 274 | f ((Array.length arr) - 1, [], !n_items) | |
| 275 | end (* listItems *); | |
| 276 | ||
| 277 | (* Apply a function to the entries of the table *) | |
| 278 |     fun apply f (HT{table, ...}) = let
 | |
| 279 | fun appF NIL = () | |
| 280 | | appF (B(_, key, item, rest)) = ( | |
| 281 | f (key, item); | |
| 282 | appF rest) | |
| 283 | val arr = !table | |
| 284 | val sz = Array.length arr | |
| 285 | fun appToTbl i = if (i < sz) | |
| 286 | then (appF (Array.sub (arr, i)); appToTbl(i+1)) | |
| 287 | else () | |
| 288 | in | |
| 289 | appToTbl 0 | |
| 290 | end (* apply *); | |
| 291 | ||
| 292 | (* Map a table to a new table that has the same keys and exception *) | |
| 293 |     fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
 | |
| 294 | fun mapF NIL = NIL | |
| 295 | | mapF (B(hash, key, item, rest)) = | |
| 296 | B(hash, key, f (key, item), mapF rest) | |
| 297 | val arr = !table | |
| 298 | val sz = Array.length arr | |
| 299 | val newArr = Array.array (sz, NIL) | |
| 300 | fun mapTbl i = if (i < sz) | |
| 301 | then ( | |
| 302 | Array.update(newArr, i, mapF (Array.sub(arr, i))); | |
| 303 | mapTbl (i+1)) | |
| 304 | else () | |
| 305 | in | |
| 306 | mapTbl 0; | |
| 307 | 	    HT{hashVal=hashVal,
 | |
| 308 | sameKey=sameKey, | |
| 309 | table = ref newArr, | |
| 310 | n_items = ref(!n_items), | |
| 311 | not_found = not_found} | |
| 312 | end (* transform *); | |
| 313 | ||
| 314 | (* remove any hash table items that do not satisfy the given | |
| 315 | * predicate. | |
| 316 | *) | |
| 317 |     fun filter pred (HT{table, n_items, not_found, ...}) = let
 | |
| 318 | fun filterP NIL = NIL | |
| 319 | | filterP (B(hash, key, item, rest)) = if (pred(key, item)) | |
| 320 | then B(hash, key, item, filterP rest) | |
| 321 | else filterP rest | |
| 322 | val arr = !table | |
| 323 | val sz = Array.length arr | |
| 324 | fun filterTbl i = if (i < sz) | |
| 325 | then ( | |
| 326 | Array.update (arr, i, filterP (Array.sub (arr, i))); | |
| 327 | filterTbl (i+1)) | |
| 328 | else () | |
| 329 | in | |
| 330 | filterTbl 0 | |
| 331 | end (* filter *); | |
| 332 | ||
| 333 | (* Map a table to a new table that has the same keys, exception, | |
| 334 | hash function, and equality function *) | |
| 335 | ||
| 336 |     fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
 | |
| 337 | fun mapF NIL = NIL | |
| 338 | | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest) | |
| 339 | val arr = !table | |
| 340 | val sz = Array.length arr | |
| 341 | val newArr = Array.array (sz, NIL) | |
| 342 | fun mapTbl i = if (i < sz) | |
| 343 | then ( | |
| 344 | Array.update(newArr, i, mapF (Array.sub(arr, i))); | |
| 345 | mapTbl (i+1)) | |
| 346 | else () | |
| 347 | in | |
| 348 | mapTbl 0; | |
| 349 | 	    HT{hashVal=hashVal, 
 | |
| 350 | sameKey=sameKey, | |
| 351 | table = ref newArr, | |
| 352 | n_items = ref(!n_items), | |
| 353 | not_found = not_found} | |
| 354 | end (* transform *); | |
| 355 | ||
| 356 | (* Create a copy of a hash table *) | |
| 357 |     fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
 | |
| 358 | val arr = !table | |
| 359 | val sz = Array.length arr | |
| 360 | val newArr = Array.array (sz, NIL) | |
| 361 | fun mapTbl i = ( | |
| 362 | Array.update (newArr, i, Array.sub(arr, i)); | |
| 363 | mapTbl (i+1)) | |
| 364 | in | |
| 28397 
389c5e494605
handle _ should be avoided (spurious Interrupt will spoil the game);
 wenzelm parents: 
20662diff
changeset | 365 | (mapTbl 0) handle _ => (); (* FIXME avoid handle _ *) | 
| 18449 | 366 | 	    HT{hashVal=hashVal, 
 | 
| 367 | sameKey=sameKey, | |
| 368 | table = ref newArr, | |
| 369 | n_items = ref(!n_items), | |
| 370 | not_found = not_found} | |
| 371 | end (* copy *); | |
| 372 | ||
| 373 | (* returns a list of the sizes of the various buckets. This is to | |
| 374 | * allow users to gauge the quality of their hashing function. | |
| 375 | *) | |
| 376 |     fun bucketSizes (HT{table = ref arr, ...}) = let
 | |
| 377 | fun len (NIL, n) = n | |
| 378 | | len (B(_, _, _, r), n) = len(r, n+1) | |
| 379 | fun f (~1, l) = l | |
| 380 | | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l) | |
| 381 | in | |
| 382 | f ((Array.length arr)-1, []) | |
| 383 | end | |
| 384 | ||
| 385 | (*Added by lcp. | |
| 18508 | 386 | This is essentially the described in Compilers: | 
| 18449 | 387 | Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*) | 
| 388 | ||
| 18508 | 389 | (*This hash function is recommended in Compilers: Principles, Techniques, and | 
| 390 | Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly | |
| 391 | recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*) | |
| 392 | fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w)) | |
| 18449 | 393 | |
| 394 | fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w); | |
| 395 | ||
| 20662 | 396 | fun hashw_int (i,w) = hashw (Word.fromInt i, w); | 
| 397 | ||
| 18449 | 398 | fun hashw_vector (v,w) = Vector.foldl hashw w v; | 
| 399 | ||
| 400 | fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s; | |
| 401 | ||
| 402 | fun hashw_strings (ss, w) = List.foldl hashw_string w ss; | |
| 403 | ||
| 404 | fun hash_string s = Word.toIntX (hashw_string(s,0w0)); | |
| 405 | ||
| 406 | end |