author | wenzelm |
Mon, 16 Mar 2009 18:24:30 +0100 | |
changeset 30549 | d2d7874648bd |
parent 28397 | 389c5e494605 |
child 32740 | 9dd0a2f83429 |
permissions | -rw-r--r-- |
18449 | 1 |
(* Modified for Poly/ML from SML/NJ Library version 0.2 |
2 |
* |
|
3 |
* COPYRIGHT (c) 1992 by AT&T Bell Laboratories. |
|
4 |
* See file mosml/copyrght/copyrght.att for details. |
|
5 |
* |
|
6 |
* Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974 |
|
18508 | 7 |
* Current version from the Moscow ML distribution, copied by permission. |
18449 | 8 |
*) |
9 |
||
10 |
(* Polyhash -- polymorphic hashtables as in the SML/NJ Library *) |
|
11 |
||
12 |
signature Polyhash = |
|
13 |
sig |
|
14 |
||
15 |
type ('key, 'data) hash_table |
|
16 |
||
17 |
val mkTable : ('_key -> int) * ('_key * '_key -> bool) -> int * exn |
|
18 |
-> ('_key, '_data) hash_table |
|
19 |
val numItems : ('key, 'data) hash_table -> int |
|
20 |
val insert : ('_key, '_data) hash_table -> '_key * '_data -> unit |
|
21 |
val peekInsert : ('_key, '_data) hash_table -> '_key * '_data |
|
22 |
-> '_data option |
|
23 |
val find : ('key, 'data) hash_table -> 'key -> 'data |
|
24 |
val peek : ('key, 'data) hash_table -> 'key -> 'data option |
|
25 |
val remove : ('key, 'data) hash_table -> 'key -> 'data |
|
26 |
val listItems : ('key, 'data) hash_table -> ('key * 'data) list |
|
27 |
val apply : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit |
|
28 |
val map : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table |
|
29 |
-> ('_key, '_res) hash_table |
|
30 |
val filter : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit |
|
31 |
val transform : ('data -> '_res) -> ('_key, 'data) hash_table |
|
32 |
-> ('_key, '_res) hash_table |
|
33 |
val copy : ('_key, '_data) hash_table -> ('_key, '_data) hash_table |
|
34 |
val bucketSizes : ('key, 'data) hash_table -> int list |
|
35 |
||
36 |
(*Additions due to L. C. Paulson and Jia Meng*) |
|
37 |
val hashw : word * word -> word |
|
38 |
val hashw_char : Char.char * word -> word |
|
20662 | 39 |
val hashw_int : int * word -> word |
18449 | 40 |
val hashw_vector : word Vector.vector * word -> word |
41 |
val hashw_string : string * word -> word |
|
42 |
val hashw_strings : string list * word -> word |
|
43 |
val hash_string : string -> int |
|
44 |
||
45 |
(* |
|
46 |
[('key, 'data) hash_table] is the type of hashtables with keys of type |
|
47 |
'key and data values of type 'data. |
|
48 |
||
49 |
[mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable, |
|
50 |
using hash function hashVal and equality predicate sameKey. The sz |
|
51 |
is a size hint, and exc is the exception raised by function find. |
|
52 |
It must be the case that sameKey(k1, k2) implies hashVal(k1) = |
|
53 |
hashVal(k2) for all k1,k2. |
|
54 |
||
55 |
[numItems htbl] is the number of items in the hash table. |
|
56 |
||
57 |
[insert htbl (k, d)] inserts data d for key k. If k already had an |
|
58 |
item associated with it, then the old item is overwritten. |
|
59 |
||
60 |
[find htbl k] returns d, where d is the data item associated with key k, |
|
61 |
or raises the exception (given at creation of htbl) if there is no such d. |
|
62 |
||
63 |
[peek htbl k] returns SOME d, where d is the data item associated with |
|
64 |
key k, or NONE if there is no such d. |
|
65 |
||
66 |
[peekInsert htbl (k, d)] inserts data d for key k, if k is not |
|
67 |
already in the table, returning NONE. If k is already in the |
|
68 |
table, and the associated data value is d', then returns SOME d' |
|
69 |
and leaves the table unmodified. |
|
70 |
||
71 |
[remove htbl k] returns d, where d is the data item associated with key k, |
|
72 |
removing d from the table; or raises the exception if there is no such d. |
|
73 |
||
74 |
[listItems htbl] returns a list of the (key, data) pairs in the hashtable. |
|
75 |
||
76 |
[apply f htbl] applies function f to all (key, data) pairs in the |
|
77 |
hashtable, in some order. |
|
78 |
||
79 |
[map f htbl] returns a new hashtable, whose data items have been |
|
80 |
obtained by applying f to the (key, data) pairs in htbl. The new |
|
81 |
tables have the same keys, hash function, equality predicate, and |
|
82 |
exception, as htbl. |
|
83 |
||
84 |
[filter p htbl] deletes from htbl all data items which do not |
|
85 |
satisfy predicate p. |
|
86 |
||
87 |
[transform f htbl] as map, but only the (old) data values are used |
|
88 |
when computing the new data values. |
|
89 |
||
90 |
[copy htbl] returns a complete copy of htbl. |
|
91 |
||
92 |
[bucketSizes htbl] returns a list of the sizes of the buckets. |
|
93 |
This is to allow users to gauge the quality of their hashing |
|
94 |
function. |
|
95 |
*) |
|
96 |
||
97 |
end |
|
98 |
||
99 |
||
100 |
structure Polyhash :> Polyhash = |
|
101 |
struct |
|
102 |
||
103 |
datatype ('key, 'data) bucket_t |
|
104 |
= NIL |
|
105 |
| B of int * 'key * 'data * ('key, 'data) bucket_t |
|
106 |
||
107 |
datatype ('key, 'data) hash_table = |
|
108 |
HT of {hashVal : 'key -> int, |
|
109 |
sameKey : 'key * 'key -> bool, |
|
110 |
not_found : exn, |
|
111 |
table : ('key, 'data) bucket_t Array.array ref, |
|
112 |
n_items : int ref} |
|
113 |
||
114 |
local |
|
115 |
(* |
|
116 |
prim_val andb_ : int -> int -> int = 2 "and"; |
|
117 |
prim_val lshift_ : int -> int -> int = 2 "shift_left"; |
|
118 |
*) |
|
119 |
fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y)); |
|
120 |
fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y)); |
|
121 |
in |
|
122 |
fun index (i, sz) = andb_ i (sz-1) |
|
123 |
||
124 |
(* find smallest power of 2 (>= 32) that is >= n *) |
|
125 |
fun roundUp n = |
|
126 |
let fun f i = if (i >= n) then i else f (lshift_ i 1) |
|
127 |
in f 32 end |
|
128 |
end; |
|
129 |
||
130 |
(* Create a new table; the int is a size hint and the exception |
|
131 |
* is to be raised by find. |
|
132 |
*) |
|
133 |
fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{ |
|
134 |
hashVal=hashVal, |
|
135 |
sameKey=sameKey, |
|
136 |
not_found = notFound, |
|
137 |
table = ref (Array.array(roundUp sizeHint, NIL)), |
|
138 |
n_items = ref 0 |
|
139 |
}; |
|
140 |
||
141 |
(* conditionally grow a table *) |
|
142 |
fun growTable (HT{table, n_items, ...}) = let |
|
143 |
val arr = !table |
|
144 |
val sz = Array.length arr |
|
145 |
in |
|
146 |
if (!n_items >= sz) |
|
147 |
then let |
|
148 |
val newSz = sz+sz |
|
149 |
val newArr = Array.array (newSz, NIL) |
|
150 |
fun copy NIL = () |
|
151 |
| copy (B(h, key, v, rest)) = let |
|
152 |
val indx = index (h, newSz) |
|
153 |
in |
|
154 |
Array.update (newArr, indx, |
|
155 |
B(h, key, v, Array.sub(newArr, indx))); |
|
156 |
copy rest |
|
157 |
end |
|
158 |
fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1)) |
|
159 |
in |
|
28397
389c5e494605
handle _ should be avoided (spurious Interrupt will spoil the game);
wenzelm
parents:
20662
diff
changeset
|
160 |
(bucket 0) handle _ => (); (* FIXME avoid handle _ *) |
18449 | 161 |
table := newArr |
162 |
end |
|
163 |
else () |
|
164 |
end (* growTable *); |
|
165 |
||
166 |
(* Insert an item. If the key already has an item associated with it, |
|
167 |
* then the old item is discarded. |
|
168 |
*) |
|
169 |
fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) = |
|
170 |
let |
|
171 |
val arr = !table |
|
172 |
val sz = Array.length arr |
|
173 |
val hash = hashVal key |
|
174 |
val indx = index (hash, sz) |
|
175 |
fun look NIL = ( |
|
176 |
Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx))); |
|
20416 | 177 |
inc n_items; |
18449 | 178 |
growTable tbl; |
179 |
NIL) |
|
180 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) |
|
181 |
then B(hash, key, item, r) |
|
182 |
else (case (look r) |
|
183 |
of NIL => NIL |
|
184 |
| rest => B(h, k, v, rest) |
|
185 |
(* end case *)) |
|
186 |
in |
|
187 |
case (look (Array.sub (arr, indx))) |
|
188 |
of NIL => () |
|
189 |
| b => Array.update(arr, indx, b) |
|
190 |
end; |
|
191 |
||
192 |
(* Insert an item if not there already; if it is there already, |
|
193 |
then return the old data value and leave the table unmodified.. |
|
194 |
*) |
|
195 |
fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) = |
|
196 |
let val arr = !table |
|
197 |
val sz = Array.length arr |
|
198 |
val hash = hashVal key |
|
199 |
val indx = index (hash, sz) |
|
200 |
fun look NIL = |
|
201 |
(Array.update(arr, indx, B(hash, key, item, |
|
202 |
Array.sub(arr, indx))); |
|
20416 | 203 |
inc n_items; |
18449 | 204 |
growTable tbl; |
205 |
NONE) |
|
206 |
| look (B(h, k, v, r)) = |
|
207 |
if hash = h andalso sameKey(key, k) then SOME v |
|
208 |
else look r |
|
209 |
in |
|
210 |
look (Array.sub (arr, indx)) |
|
211 |
end; |
|
212 |
||
213 |
(* find an item, the table's exception is raised if the item doesn't exist *) |
|
214 |
fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let |
|
215 |
val arr = !table |
|
216 |
val sz = Array.length arr |
|
217 |
val hash = hashVal key |
|
218 |
val indx = index (hash, sz) |
|
219 |
fun look NIL = raise not_found |
|
220 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) |
|
221 |
then v |
|
222 |
else look r |
|
223 |
in |
|
224 |
look (Array.sub (arr, indx)) |
|
225 |
end; |
|
226 |
||
227 |
(* look for an item, return NONE if the item doesn't exist *) |
|
228 |
fun peek (HT{hashVal, sameKey, table, ...}) key = let |
|
229 |
val arr = !table |
|
230 |
val sz = Array.length arr |
|
231 |
val hash = hashVal key |
|
232 |
val indx = index (hash, sz) |
|
233 |
fun look NIL = NONE |
|
234 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) |
|
235 |
then SOME v |
|
236 |
else look r |
|
237 |
in |
|
238 |
look (Array.sub (arr, indx)) |
|
239 |
end; |
|
240 |
||
241 |
(* Remove an item. The table's exception is raised if |
|
242 |
* the item doesn't exist. |
|
243 |
*) |
|
244 |
fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let |
|
245 |
val arr = !table |
|
246 |
val sz = Array.length arr |
|
247 |
val hash = hashVal key |
|
248 |
val indx = index (hash, sz) |
|
249 |
fun look NIL = raise not_found |
|
250 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k)) |
|
251 |
then (v, r) |
|
252 |
else let val (item, r') = look r in (item, B(h, k, v, r')) end |
|
253 |
val (item, bucket) = look (Array.sub (arr, indx)) |
|
254 |
in |
|
255 |
Array.update (arr, indx, bucket); |
|
256 |
n_items := !n_items - 1; |
|
257 |
item |
|
258 |
end (* remove *); |
|
259 |
||
260 |
(* Return the number of items in the table *) |
|
261 |
fun numItems (HT{n_items, ...}) = !n_items |
|
262 |
||
263 |
(* return a list of the items in the table *) |
|
264 |
fun listItems (HT{table = ref arr, n_items, ...}) = let |
|
265 |
fun f (_, l, 0) = l |
|
266 |
| f (~1, l, _) = l |
|
267 |
| f (i, l, n) = let |
|
268 |
fun g (NIL, l, n) = f (i-1, l, n) |
|
269 |
| g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1) |
|
270 |
in |
|
271 |
g (Array.sub(arr, i), l, n) |
|
272 |
end |
|
273 |
in |
|
274 |
f ((Array.length arr) - 1, [], !n_items) |
|
275 |
end (* listItems *); |
|
276 |
||
277 |
(* Apply a function to the entries of the table *) |
|
278 |
fun apply f (HT{table, ...}) = let |
|
279 |
fun appF NIL = () |
|
280 |
| appF (B(_, key, item, rest)) = ( |
|
281 |
f (key, item); |
|
282 |
appF rest) |
|
283 |
val arr = !table |
|
284 |
val sz = Array.length arr |
|
285 |
fun appToTbl i = if (i < sz) |
|
286 |
then (appF (Array.sub (arr, i)); appToTbl(i+1)) |
|
287 |
else () |
|
288 |
in |
|
289 |
appToTbl 0 |
|
290 |
end (* apply *); |
|
291 |
||
292 |
(* Map a table to a new table that has the same keys and exception *) |
|
293 |
fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let |
|
294 |
fun mapF NIL = NIL |
|
295 |
| mapF (B(hash, key, item, rest)) = |
|
296 |
B(hash, key, f (key, item), mapF rest) |
|
297 |
val arr = !table |
|
298 |
val sz = Array.length arr |
|
299 |
val newArr = Array.array (sz, NIL) |
|
300 |
fun mapTbl i = if (i < sz) |
|
301 |
then ( |
|
302 |
Array.update(newArr, i, mapF (Array.sub(arr, i))); |
|
303 |
mapTbl (i+1)) |
|
304 |
else () |
|
305 |
in |
|
306 |
mapTbl 0; |
|
307 |
HT{hashVal=hashVal, |
|
308 |
sameKey=sameKey, |
|
309 |
table = ref newArr, |
|
310 |
n_items = ref(!n_items), |
|
311 |
not_found = not_found} |
|
312 |
end (* transform *); |
|
313 |
||
314 |
(* remove any hash table items that do not satisfy the given |
|
315 |
* predicate. |
|
316 |
*) |
|
317 |
fun filter pred (HT{table, n_items, not_found, ...}) = let |
|
318 |
fun filterP NIL = NIL |
|
319 |
| filterP (B(hash, key, item, rest)) = if (pred(key, item)) |
|
320 |
then B(hash, key, item, filterP rest) |
|
321 |
else filterP rest |
|
322 |
val arr = !table |
|
323 |
val sz = Array.length arr |
|
324 |
fun filterTbl i = if (i < sz) |
|
325 |
then ( |
|
326 |
Array.update (arr, i, filterP (Array.sub (arr, i))); |
|
327 |
filterTbl (i+1)) |
|
328 |
else () |
|
329 |
in |
|
330 |
filterTbl 0 |
|
331 |
end (* filter *); |
|
332 |
||
333 |
(* Map a table to a new table that has the same keys, exception, |
|
334 |
hash function, and equality function *) |
|
335 |
||
336 |
fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let |
|
337 |
fun mapF NIL = NIL |
|
338 |
| mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest) |
|
339 |
val arr = !table |
|
340 |
val sz = Array.length arr |
|
341 |
val newArr = Array.array (sz, NIL) |
|
342 |
fun mapTbl i = if (i < sz) |
|
343 |
then ( |
|
344 |
Array.update(newArr, i, mapF (Array.sub(arr, i))); |
|
345 |
mapTbl (i+1)) |
|
346 |
else () |
|
347 |
in |
|
348 |
mapTbl 0; |
|
349 |
HT{hashVal=hashVal, |
|
350 |
sameKey=sameKey, |
|
351 |
table = ref newArr, |
|
352 |
n_items = ref(!n_items), |
|
353 |
not_found = not_found} |
|
354 |
end (* transform *); |
|
355 |
||
356 |
(* Create a copy of a hash table *) |
|
357 |
fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let |
|
358 |
val arr = !table |
|
359 |
val sz = Array.length arr |
|
360 |
val newArr = Array.array (sz, NIL) |
|
361 |
fun mapTbl i = ( |
|
362 |
Array.update (newArr, i, Array.sub(arr, i)); |
|
363 |
mapTbl (i+1)) |
|
364 |
in |
|
28397
389c5e494605
handle _ should be avoided (spurious Interrupt will spoil the game);
wenzelm
parents:
20662
diff
changeset
|
365 |
(mapTbl 0) handle _ => (); (* FIXME avoid handle _ *) |
18449 | 366 |
HT{hashVal=hashVal, |
367 |
sameKey=sameKey, |
|
368 |
table = ref newArr, |
|
369 |
n_items = ref(!n_items), |
|
370 |
not_found = not_found} |
|
371 |
end (* copy *); |
|
372 |
||
373 |
(* returns a list of the sizes of the various buckets. This is to |
|
374 |
* allow users to gauge the quality of their hashing function. |
|
375 |
*) |
|
376 |
fun bucketSizes (HT{table = ref arr, ...}) = let |
|
377 |
fun len (NIL, n) = n |
|
378 |
| len (B(_, _, _, r), n) = len(r, n+1) |
|
379 |
fun f (~1, l) = l |
|
380 |
| f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l) |
|
381 |
in |
|
382 |
f ((Array.length arr)-1, []) |
|
383 |
end |
|
384 |
||
385 |
(*Added by lcp. |
|
18508 | 386 |
This is essentially the described in Compilers: |
18449 | 387 |
Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*) |
388 |
||
18508 | 389 |
(*This hash function is recommended in Compilers: Principles, Techniques, and |
390 |
Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly |
|
391 |
recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*) |
|
392 |
fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w)) |
|
18449 | 393 |
|
394 |
fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w); |
|
395 |
||
20662 | 396 |
fun hashw_int (i,w) = hashw (Word.fromInt i, w); |
397 |
||
18449 | 398 |
fun hashw_vector (v,w) = Vector.foldl hashw w v; |
399 |
||
400 |
fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s; |
|
401 |
||
402 |
fun hashw_strings (ss, w) = List.foldl hashw_string w ss; |
|
403 |
||
404 |
fun hash_string s = Word.toIntX (hashw_string(s,0w0)); |
|
405 |
||
406 |
end |