18449
|
1 |
(* Modified for Poly/ML from SML/NJ Library version 0.2
|
|
2 |
*
|
|
3 |
* COPYRIGHT (c) 1992 by AT&T Bell Laboratories.
|
|
4 |
* See file mosml/copyrght/copyrght.att for details.
|
|
5 |
*
|
|
6 |
* Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974
|
18508
|
7 |
* Current version from the Moscow ML distribution, copied by permission.
|
18449
|
8 |
*)
|
|
9 |
|
|
10 |
(* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)
|
|
11 |
|
|
12 |
signature Polyhash =
|
|
13 |
sig
|
|
14 |
|
|
15 |
type ('key, 'data) hash_table
|
|
16 |
|
|
17 |
val mkTable : ('_key -> int) * ('_key * '_key -> bool) -> int * exn
|
|
18 |
-> ('_key, '_data) hash_table
|
|
19 |
val numItems : ('key, 'data) hash_table -> int
|
|
20 |
val insert : ('_key, '_data) hash_table -> '_key * '_data -> unit
|
|
21 |
val peekInsert : ('_key, '_data) hash_table -> '_key * '_data
|
|
22 |
-> '_data option
|
|
23 |
val find : ('key, 'data) hash_table -> 'key -> 'data
|
|
24 |
val peek : ('key, 'data) hash_table -> 'key -> 'data option
|
|
25 |
val remove : ('key, 'data) hash_table -> 'key -> 'data
|
|
26 |
val listItems : ('key, 'data) hash_table -> ('key * 'data) list
|
|
27 |
val apply : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
|
|
28 |
val map : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table
|
|
29 |
-> ('_key, '_res) hash_table
|
|
30 |
val filter : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
|
|
31 |
val transform : ('data -> '_res) -> ('_key, 'data) hash_table
|
|
32 |
-> ('_key, '_res) hash_table
|
|
33 |
val copy : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
|
|
34 |
val bucketSizes : ('key, 'data) hash_table -> int list
|
|
35 |
|
|
36 |
(*Additions due to L. C. Paulson and Jia Meng*)
|
|
37 |
val hashw : word * word -> word
|
|
38 |
val hashw_char : Char.char * word -> word
|
20662
|
39 |
val hashw_int : int * word -> word
|
18449
|
40 |
val hashw_vector : word Vector.vector * word -> word
|
|
41 |
val hashw_string : string * word -> word
|
|
42 |
val hashw_strings : string list * word -> word
|
|
43 |
val hash_string : string -> int
|
|
44 |
|
|
45 |
(*
|
|
46 |
[('key, 'data) hash_table] is the type of hashtables with keys of type
|
|
47 |
'key and data values of type 'data.
|
|
48 |
|
|
49 |
[mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,
|
|
50 |
using hash function hashVal and equality predicate sameKey. The sz
|
|
51 |
is a size hint, and exc is the exception raised by function find.
|
|
52 |
It must be the case that sameKey(k1, k2) implies hashVal(k1) =
|
|
53 |
hashVal(k2) for all k1,k2.
|
|
54 |
|
|
55 |
[numItems htbl] is the number of items in the hash table.
|
|
56 |
|
|
57 |
[insert htbl (k, d)] inserts data d for key k. If k already had an
|
|
58 |
item associated with it, then the old item is overwritten.
|
|
59 |
|
|
60 |
[find htbl k] returns d, where d is the data item associated with key k,
|
|
61 |
or raises the exception (given at creation of htbl) if there is no such d.
|
|
62 |
|
|
63 |
[peek htbl k] returns SOME d, where d is the data item associated with
|
|
64 |
key k, or NONE if there is no such d.
|
|
65 |
|
|
66 |
[peekInsert htbl (k, d)] inserts data d for key k, if k is not
|
|
67 |
already in the table, returning NONE. If k is already in the
|
|
68 |
table, and the associated data value is d', then returns SOME d'
|
|
69 |
and leaves the table unmodified.
|
|
70 |
|
|
71 |
[remove htbl k] returns d, where d is the data item associated with key k,
|
|
72 |
removing d from the table; or raises the exception if there is no such d.
|
|
73 |
|
|
74 |
[listItems htbl] returns a list of the (key, data) pairs in the hashtable.
|
|
75 |
|
|
76 |
[apply f htbl] applies function f to all (key, data) pairs in the
|
|
77 |
hashtable, in some order.
|
|
78 |
|
|
79 |
[map f htbl] returns a new hashtable, whose data items have been
|
|
80 |
obtained by applying f to the (key, data) pairs in htbl. The new
|
|
81 |
tables have the same keys, hash function, equality predicate, and
|
|
82 |
exception, as htbl.
|
|
83 |
|
|
84 |
[filter p htbl] deletes from htbl all data items which do not
|
|
85 |
satisfy predicate p.
|
|
86 |
|
|
87 |
[transform f htbl] as map, but only the (old) data values are used
|
|
88 |
when computing the new data values.
|
|
89 |
|
|
90 |
[copy htbl] returns a complete copy of htbl.
|
|
91 |
|
|
92 |
[bucketSizes htbl] returns a list of the sizes of the buckets.
|
|
93 |
This is to allow users to gauge the quality of their hashing
|
|
94 |
function.
|
|
95 |
*)
|
|
96 |
|
|
97 |
end
|
|
98 |
|
|
99 |
|
|
100 |
structure Polyhash :> Polyhash =
|
|
101 |
struct
|
|
102 |
|
|
103 |
datatype ('key, 'data) bucket_t
|
|
104 |
= NIL
|
|
105 |
| B of int * 'key * 'data * ('key, 'data) bucket_t
|
|
106 |
|
|
107 |
datatype ('key, 'data) hash_table =
|
|
108 |
HT of {hashVal : 'key -> int,
|
|
109 |
sameKey : 'key * 'key -> bool,
|
|
110 |
not_found : exn,
|
|
111 |
table : ('key, 'data) bucket_t Array.array ref,
|
|
112 |
n_items : int ref}
|
|
113 |
|
|
114 |
local
|
|
115 |
(*
|
|
116 |
prim_val andb_ : int -> int -> int = 2 "and";
|
|
117 |
prim_val lshift_ : int -> int -> int = 2 "shift_left";
|
|
118 |
*)
|
|
119 |
fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));
|
|
120 |
fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));
|
|
121 |
in
|
|
122 |
fun index (i, sz) = andb_ i (sz-1)
|
|
123 |
|
|
124 |
(* find smallest power of 2 (>= 32) that is >= n *)
|
|
125 |
fun roundUp n =
|
|
126 |
let fun f i = if (i >= n) then i else f (lshift_ i 1)
|
|
127 |
in f 32 end
|
|
128 |
end;
|
|
129 |
|
|
130 |
(* Create a new table; the int is a size hint and the exception
|
|
131 |
* is to be raised by find.
|
|
132 |
*)
|
|
133 |
fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
|
|
134 |
hashVal=hashVal,
|
|
135 |
sameKey=sameKey,
|
|
136 |
not_found = notFound,
|
|
137 |
table = ref (Array.array(roundUp sizeHint, NIL)),
|
|
138 |
n_items = ref 0
|
|
139 |
};
|
|
140 |
|
|
141 |
(* conditionally grow a table *)
|
|
142 |
fun growTable (HT{table, n_items, ...}) = let
|
|
143 |
val arr = !table
|
|
144 |
val sz = Array.length arr
|
|
145 |
in
|
|
146 |
if (!n_items >= sz)
|
|
147 |
then let
|
|
148 |
val newSz = sz+sz
|
|
149 |
val newArr = Array.array (newSz, NIL)
|
|
150 |
fun copy NIL = ()
|
|
151 |
| copy (B(h, key, v, rest)) = let
|
|
152 |
val indx = index (h, newSz)
|
|
153 |
in
|
|
154 |
Array.update (newArr, indx,
|
|
155 |
B(h, key, v, Array.sub(newArr, indx)));
|
|
156 |
copy rest
|
|
157 |
end
|
|
158 |
fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))
|
|
159 |
in
|
|
160 |
(bucket 0) handle _ => ();
|
|
161 |
table := newArr
|
|
162 |
end
|
|
163 |
else ()
|
|
164 |
end (* growTable *);
|
|
165 |
|
|
166 |
(* Insert an item. If the key already has an item associated with it,
|
|
167 |
* then the old item is discarded.
|
|
168 |
*)
|
|
169 |
fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
|
|
170 |
let
|
|
171 |
val arr = !table
|
|
172 |
val sz = Array.length arr
|
|
173 |
val hash = hashVal key
|
|
174 |
val indx = index (hash, sz)
|
|
175 |
fun look NIL = (
|
|
176 |
Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));
|
20416
|
177 |
inc n_items;
|
18449
|
178 |
growTable tbl;
|
|
179 |
NIL)
|
|
180 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
|
|
181 |
then B(hash, key, item, r)
|
|
182 |
else (case (look r)
|
|
183 |
of NIL => NIL
|
|
184 |
| rest => B(h, k, v, rest)
|
|
185 |
(* end case *))
|
|
186 |
in
|
|
187 |
case (look (Array.sub (arr, indx)))
|
|
188 |
of NIL => ()
|
|
189 |
| b => Array.update(arr, indx, b)
|
|
190 |
end;
|
|
191 |
|
|
192 |
(* Insert an item if not there already; if it is there already,
|
|
193 |
then return the old data value and leave the table unmodified..
|
|
194 |
*)
|
|
195 |
fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
|
|
196 |
let val arr = !table
|
|
197 |
val sz = Array.length arr
|
|
198 |
val hash = hashVal key
|
|
199 |
val indx = index (hash, sz)
|
|
200 |
fun look NIL =
|
|
201 |
(Array.update(arr, indx, B(hash, key, item,
|
|
202 |
Array.sub(arr, indx)));
|
20416
|
203 |
inc n_items;
|
18449
|
204 |
growTable tbl;
|
|
205 |
NONE)
|
|
206 |
| look (B(h, k, v, r)) =
|
|
207 |
if hash = h andalso sameKey(key, k) then SOME v
|
|
208 |
else look r
|
|
209 |
in
|
|
210 |
look (Array.sub (arr, indx))
|
|
211 |
end;
|
|
212 |
|
|
213 |
(* find an item, the table's exception is raised if the item doesn't exist *)
|
|
214 |
fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
|
|
215 |
val arr = !table
|
|
216 |
val sz = Array.length arr
|
|
217 |
val hash = hashVal key
|
|
218 |
val indx = index (hash, sz)
|
|
219 |
fun look NIL = raise not_found
|
|
220 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
|
|
221 |
then v
|
|
222 |
else look r
|
|
223 |
in
|
|
224 |
look (Array.sub (arr, indx))
|
|
225 |
end;
|
|
226 |
|
|
227 |
(* look for an item, return NONE if the item doesn't exist *)
|
|
228 |
fun peek (HT{hashVal, sameKey, table, ...}) key = let
|
|
229 |
val arr = !table
|
|
230 |
val sz = Array.length arr
|
|
231 |
val hash = hashVal key
|
|
232 |
val indx = index (hash, sz)
|
|
233 |
fun look NIL = NONE
|
|
234 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
|
|
235 |
then SOME v
|
|
236 |
else look r
|
|
237 |
in
|
|
238 |
look (Array.sub (arr, indx))
|
|
239 |
end;
|
|
240 |
|
|
241 |
(* Remove an item. The table's exception is raised if
|
|
242 |
* the item doesn't exist.
|
|
243 |
*)
|
|
244 |
fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
|
|
245 |
val arr = !table
|
|
246 |
val sz = Array.length arr
|
|
247 |
val hash = hashVal key
|
|
248 |
val indx = index (hash, sz)
|
|
249 |
fun look NIL = raise not_found
|
|
250 |
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
|
|
251 |
then (v, r)
|
|
252 |
else let val (item, r') = look r in (item, B(h, k, v, r')) end
|
|
253 |
val (item, bucket) = look (Array.sub (arr, indx))
|
|
254 |
in
|
|
255 |
Array.update (arr, indx, bucket);
|
|
256 |
n_items := !n_items - 1;
|
|
257 |
item
|
|
258 |
end (* remove *);
|
|
259 |
|
|
260 |
(* Return the number of items in the table *)
|
|
261 |
fun numItems (HT{n_items, ...}) = !n_items
|
|
262 |
|
|
263 |
(* return a list of the items in the table *)
|
|
264 |
fun listItems (HT{table = ref arr, n_items, ...}) = let
|
|
265 |
fun f (_, l, 0) = l
|
|
266 |
| f (~1, l, _) = l
|
|
267 |
| f (i, l, n) = let
|
|
268 |
fun g (NIL, l, n) = f (i-1, l, n)
|
|
269 |
| g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)
|
|
270 |
in
|
|
271 |
g (Array.sub(arr, i), l, n)
|
|
272 |
end
|
|
273 |
in
|
|
274 |
f ((Array.length arr) - 1, [], !n_items)
|
|
275 |
end (* listItems *);
|
|
276 |
|
|
277 |
(* Apply a function to the entries of the table *)
|
|
278 |
fun apply f (HT{table, ...}) = let
|
|
279 |
fun appF NIL = ()
|
|
280 |
| appF (B(_, key, item, rest)) = (
|
|
281 |
f (key, item);
|
|
282 |
appF rest)
|
|
283 |
val arr = !table
|
|
284 |
val sz = Array.length arr
|
|
285 |
fun appToTbl i = if (i < sz)
|
|
286 |
then (appF (Array.sub (arr, i)); appToTbl(i+1))
|
|
287 |
else ()
|
|
288 |
in
|
|
289 |
appToTbl 0
|
|
290 |
end (* apply *);
|
|
291 |
|
|
292 |
(* Map a table to a new table that has the same keys and exception *)
|
|
293 |
fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
|
|
294 |
fun mapF NIL = NIL
|
|
295 |
| mapF (B(hash, key, item, rest)) =
|
|
296 |
B(hash, key, f (key, item), mapF rest)
|
|
297 |
val arr = !table
|
|
298 |
val sz = Array.length arr
|
|
299 |
val newArr = Array.array (sz, NIL)
|
|
300 |
fun mapTbl i = if (i < sz)
|
|
301 |
then (
|
|
302 |
Array.update(newArr, i, mapF (Array.sub(arr, i)));
|
|
303 |
mapTbl (i+1))
|
|
304 |
else ()
|
|
305 |
in
|
|
306 |
mapTbl 0;
|
|
307 |
HT{hashVal=hashVal,
|
|
308 |
sameKey=sameKey,
|
|
309 |
table = ref newArr,
|
|
310 |
n_items = ref(!n_items),
|
|
311 |
not_found = not_found}
|
|
312 |
end (* transform *);
|
|
313 |
|
|
314 |
(* remove any hash table items that do not satisfy the given
|
|
315 |
* predicate.
|
|
316 |
*)
|
|
317 |
fun filter pred (HT{table, n_items, not_found, ...}) = let
|
|
318 |
fun filterP NIL = NIL
|
|
319 |
| filterP (B(hash, key, item, rest)) = if (pred(key, item))
|
|
320 |
then B(hash, key, item, filterP rest)
|
|
321 |
else filterP rest
|
|
322 |
val arr = !table
|
|
323 |
val sz = Array.length arr
|
|
324 |
fun filterTbl i = if (i < sz)
|
|
325 |
then (
|
|
326 |
Array.update (arr, i, filterP (Array.sub (arr, i)));
|
|
327 |
filterTbl (i+1))
|
|
328 |
else ()
|
|
329 |
in
|
|
330 |
filterTbl 0
|
|
331 |
end (* filter *);
|
|
332 |
|
|
333 |
(* Map a table to a new table that has the same keys, exception,
|
|
334 |
hash function, and equality function *)
|
|
335 |
|
|
336 |
fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
|
|
337 |
fun mapF NIL = NIL
|
|
338 |
| mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)
|
|
339 |
val arr = !table
|
|
340 |
val sz = Array.length arr
|
|
341 |
val newArr = Array.array (sz, NIL)
|
|
342 |
fun mapTbl i = if (i < sz)
|
|
343 |
then (
|
|
344 |
Array.update(newArr, i, mapF (Array.sub(arr, i)));
|
|
345 |
mapTbl (i+1))
|
|
346 |
else ()
|
|
347 |
in
|
|
348 |
mapTbl 0;
|
|
349 |
HT{hashVal=hashVal,
|
|
350 |
sameKey=sameKey,
|
|
351 |
table = ref newArr,
|
|
352 |
n_items = ref(!n_items),
|
|
353 |
not_found = not_found}
|
|
354 |
end (* transform *);
|
|
355 |
|
|
356 |
(* Create a copy of a hash table *)
|
|
357 |
fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
|
|
358 |
val arr = !table
|
|
359 |
val sz = Array.length arr
|
|
360 |
val newArr = Array.array (sz, NIL)
|
|
361 |
fun mapTbl i = (
|
|
362 |
Array.update (newArr, i, Array.sub(arr, i));
|
|
363 |
mapTbl (i+1))
|
|
364 |
in
|
|
365 |
(mapTbl 0) handle _ => ();
|
|
366 |
HT{hashVal=hashVal,
|
|
367 |
sameKey=sameKey,
|
|
368 |
table = ref newArr,
|
|
369 |
n_items = ref(!n_items),
|
|
370 |
not_found = not_found}
|
|
371 |
end (* copy *);
|
|
372 |
|
|
373 |
(* returns a list of the sizes of the various buckets. This is to
|
|
374 |
* allow users to gauge the quality of their hashing function.
|
|
375 |
*)
|
|
376 |
fun bucketSizes (HT{table = ref arr, ...}) = let
|
|
377 |
fun len (NIL, n) = n
|
|
378 |
| len (B(_, _, _, r), n) = len(r, n+1)
|
|
379 |
fun f (~1, l) = l
|
|
380 |
| f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)
|
|
381 |
in
|
|
382 |
f ((Array.length arr)-1, [])
|
|
383 |
end
|
|
384 |
|
|
385 |
(*Added by lcp.
|
18508
|
386 |
This is essentially the described in Compilers:
|
18449
|
387 |
Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)
|
|
388 |
|
18508
|
389 |
(*This hash function is recommended in Compilers: Principles, Techniques, and
|
|
390 |
Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly
|
|
391 |
recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)
|
|
392 |
fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))
|
18449
|
393 |
|
|
394 |
fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);
|
|
395 |
|
20662
|
396 |
fun hashw_int (i,w) = hashw (Word.fromInt i, w);
|
|
397 |
|
18449
|
398 |
fun hashw_vector (v,w) = Vector.foldl hashw w v;
|
|
399 |
|
|
400 |
fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;
|
|
401 |
|
|
402 |
fun hashw_strings (ss, w) = List.foldl hashw_string w ss;
|
|
403 |
|
|
404 |
fun hash_string s = Word.toIntX (hashw_string(s,0w0));
|
|
405 |
|
|
406 |
end
|