| author | huffman | 
| Sat, 06 Jun 2009 10:28:34 -0700 | |
| changeset 31489 | 10080e31b294 | 
| parent 28397 | 389c5e494605 | 
| child 32740 | 9dd0a2f83429 | 
| permissions | -rw-r--r-- | 
| 18449 | 1  | 
(* Modified for Poly/ML from SML/NJ Library version 0.2  | 
2  | 
*  | 
|
3  | 
* COPYRIGHT (c) 1992 by AT&T Bell Laboratories.  | 
|
4  | 
* See file mosml/copyrght/copyrght.att for details.  | 
|
5  | 
*  | 
|
6  | 
* Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974  | 
|
| 18508 | 7  | 
* Current version from the Moscow ML distribution, copied by permission.  | 
| 18449 | 8  | 
*)  | 
9  | 
||
10  | 
(* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)  | 
|
11  | 
||
12  | 
signature Polyhash =  | 
|
13  | 
sig  | 
|
14  | 
||
15  | 
type ('key, 'data) hash_table
 | 
|
16  | 
||
17  | 
val mkTable     : ('_key -> int) * ('_key * '_key -> bool) -> int * exn 
 | 
|
18  | 
                  -> ('_key, '_data) hash_table
 | 
|
19  | 
val numItems    : ('key, 'data) hash_table -> int
 | 
|
20  | 
val insert      : ('_key, '_data) hash_table -> '_key * '_data -> unit
 | 
|
21  | 
val peekInsert  : ('_key, '_data) hash_table -> '_key * '_data 
 | 
|
22  | 
-> '_data option  | 
|
23  | 
val find        : ('key, 'data) hash_table -> 'key -> 'data
 | 
|
24  | 
val peek        : ('key, 'data) hash_table -> 'key -> 'data option
 | 
|
25  | 
val remove      : ('key, 'data) hash_table -> 'key -> 'data
 | 
|
26  | 
val listItems   : ('key, 'data) hash_table -> ('key * 'data) list
 | 
|
27  | 
val apply       : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
 | 
|
28  | 
val map         : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table 
 | 
|
29  | 
                  -> ('_key, '_res) hash_table
 | 
|
30  | 
val filter      : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
 | 
|
31  | 
val transform   : ('data -> '_res) -> ('_key, 'data) hash_table 
 | 
|
32  | 
                  -> ('_key, '_res) hash_table
 | 
|
33  | 
val copy        : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
 | 
|
34  | 
val bucketSizes : ('key, 'data) hash_table -> int list
 | 
|
35  | 
||
36  | 
(*Additions due to L. C. Paulson and Jia Meng*)  | 
|
37  | 
val hashw : word * word -> word  | 
|
38  | 
val hashw_char : Char.char * word -> word  | 
|
| 20662 | 39  | 
val hashw_int : int * word -> word  | 
| 18449 | 40  | 
val hashw_vector : word Vector.vector * word -> word  | 
41  | 
val hashw_string : string * word -> word  | 
|
42  | 
val hashw_strings : string list * word -> word  | 
|
43  | 
val hash_string : string -> int  | 
|
44  | 
||
45  | 
(*  | 
|
46  | 
   [('key, 'data) hash_table] is the type of hashtables with keys of type
 | 
|
47  | 
'key and data values of type 'data.  | 
|
48  | 
||
49  | 
[mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,  | 
|
50  | 
using hash function hashVal and equality predicate sameKey. The sz  | 
|
51  | 
is a size hint, and exc is the exception raised by function find.  | 
|
52  | 
It must be the case that sameKey(k1, k2) implies hashVal(k1) =  | 
|
53  | 
hashVal(k2) for all k1,k2.  | 
|
54  | 
||
55  | 
[numItems htbl] is the number of items in the hash table.  | 
|
56  | 
||
57  | 
[insert htbl (k, d)] inserts data d for key k. If k already had an  | 
|
58  | 
item associated with it, then the old item is overwritten.  | 
|
59  | 
||
60  | 
[find htbl k] returns d, where d is the data item associated with key k,  | 
|
61  | 
or raises the exception (given at creation of htbl) if there is no such d.  | 
|
62  | 
||
63  | 
[peek htbl k] returns SOME d, where d is the data item associated with  | 
|
64  | 
key k, or NONE if there is no such d.  | 
|
65  | 
||
66  | 
[peekInsert htbl (k, d)] inserts data d for key k, if k is not  | 
|
67  | 
already in the table, returning NONE. If k is already in the  | 
|
68  | 
table, and the associated data value is d', then returns SOME d'  | 
|
69  | 
and leaves the table unmodified.  | 
|
70  | 
||
71  | 
[remove htbl k] returns d, where d is the data item associated with key k,  | 
|
72  | 
removing d from the table; or raises the exception if there is no such d.  | 
|
73  | 
||
74  | 
[listItems htbl] returns a list of the (key, data) pairs in the hashtable.  | 
|
75  | 
||
76  | 
[apply f htbl] applies function f to all (key, data) pairs in the  | 
|
77  | 
hashtable, in some order.  | 
|
78  | 
||
79  | 
[map f htbl] returns a new hashtable, whose data items have been  | 
|
80  | 
obtained by applying f to the (key, data) pairs in htbl. The new  | 
|
81  | 
tables have the same keys, hash function, equality predicate, and  | 
|
82  | 
exception, as htbl.  | 
|
83  | 
||
84  | 
[filter p htbl] deletes from htbl all data items which do not  | 
|
85  | 
satisfy predicate p.  | 
|
86  | 
||
87  | 
[transform f htbl] as map, but only the (old) data values are used  | 
|
88  | 
when computing the new data values.  | 
|
89  | 
||
90  | 
[copy htbl] returns a complete copy of htbl.  | 
|
91  | 
||
92  | 
[bucketSizes htbl] returns a list of the sizes of the buckets.  | 
|
93  | 
This is to allow users to gauge the quality of their hashing  | 
|
94  | 
function.  | 
|
95  | 
*)  | 
|
96  | 
||
97  | 
end  | 
|
98  | 
||
99  | 
||
100  | 
structure Polyhash :> Polyhash =  | 
|
101  | 
struct  | 
|
102  | 
||
103  | 
datatype ('key, 'data) bucket_t
 | 
|
104  | 
= NIL  | 
|
105  | 
  | B of int * 'key * 'data * ('key, 'data) bucket_t
 | 
|
106  | 
||
107  | 
datatype ('key, 'data) hash_table = 
 | 
|
108  | 
    HT of {hashVal   : 'key -> int,
 | 
|
109  | 
sameKey : 'key * 'key -> bool,  | 
|
110  | 
not_found : exn,  | 
|
111  | 
	   table     : ('key, 'data) bucket_t Array.array ref,
 | 
|
112  | 
n_items : int ref}  | 
|
113  | 
||
114  | 
local  | 
|
115  | 
(*  | 
|
116  | 
prim_val andb_ : int -> int -> int = 2 "and";  | 
|
117  | 
prim_val lshift_ : int -> int -> int = 2 "shift_left";  | 
|
118  | 
*)  | 
|
119  | 
fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));  | 
|
120  | 
fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));  | 
|
121  | 
in  | 
|
122  | 
fun index (i, sz) = andb_ i (sz-1)  | 
|
123  | 
||
124  | 
(* find smallest power of 2 (>= 32) that is >= n *)  | 
|
125  | 
fun roundUp n =  | 
|
126  | 
let fun f i = if (i >= n) then i else f (lshift_ i 1)  | 
|
127  | 
in f 32 end  | 
|
128  | 
end;  | 
|
129  | 
||
130  | 
(* Create a new table; the int is a size hint and the exception  | 
|
131  | 
* is to be raised by find.  | 
|
132  | 
*)  | 
|
133  | 
    fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
 | 
|
134  | 
hashVal=hashVal,  | 
|
135  | 
sameKey=sameKey,  | 
|
136  | 
not_found = notFound,  | 
|
137  | 
table = ref (Array.array(roundUp sizeHint, NIL)),  | 
|
138  | 
n_items = ref 0  | 
|
139  | 
};  | 
|
140  | 
||
141  | 
(* conditionally grow a table *)  | 
|
142  | 
    fun growTable (HT{table, n_items, ...}) = let
 | 
|
143  | 
val arr = !table  | 
|
144  | 
val sz = Array.length arr  | 
|
145  | 
in  | 
|
146  | 
if (!n_items >= sz)  | 
|
147  | 
then let  | 
|
148  | 
val newSz = sz+sz  | 
|
149  | 
val newArr = Array.array (newSz, NIL)  | 
|
150  | 
fun copy NIL = ()  | 
|
151  | 
| copy (B(h, key, v, rest)) = let  | 
|
152  | 
val indx = index (h, newSz)  | 
|
153  | 
in  | 
|
154  | 
Array.update (newArr, indx,  | 
|
155  | 
B(h, key, v, Array.sub(newArr, indx)));  | 
|
156  | 
copy rest  | 
|
157  | 
end  | 
|
158  | 
fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))  | 
|
159  | 
in  | 
|
| 
28397
 
389c5e494605
handle _ should be avoided (spurious Interrupt will spoil the game);
 
wenzelm 
parents: 
20662 
diff
changeset
 | 
160  | 
(bucket 0) handle _ => (); (* FIXME avoid handle _ *)  | 
| 18449 | 161  | 
table := newArr  | 
162  | 
end  | 
|
163  | 
else ()  | 
|
164  | 
end (* growTable *);  | 
|
165  | 
||
166  | 
(* Insert an item. If the key already has an item associated with it,  | 
|
167  | 
* then the old item is discarded.  | 
|
168  | 
*)  | 
|
169  | 
    fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
 | 
|
170  | 
let  | 
|
171  | 
val arr = !table  | 
|
172  | 
val sz = Array.length arr  | 
|
173  | 
val hash = hashVal key  | 
|
174  | 
val indx = index (hash, sz)  | 
|
175  | 
fun look NIL = (  | 
|
176  | 
Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));  | 
|
| 20416 | 177  | 
inc n_items;  | 
| 18449 | 178  | 
growTable tbl;  | 
179  | 
NIL)  | 
|
180  | 
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))  | 
|
181  | 
then B(hash, key, item, r)  | 
|
182  | 
else (case (look r)  | 
|
183  | 
of NIL => NIL  | 
|
184  | 
| rest => B(h, k, v, rest)  | 
|
185  | 
(* end case *))  | 
|
186  | 
in  | 
|
187  | 
case (look (Array.sub (arr, indx)))  | 
|
188  | 
of NIL => ()  | 
|
189  | 
| b => Array.update(arr, indx, b)  | 
|
190  | 
end;  | 
|
191  | 
||
192  | 
(* Insert an item if not there already; if it is there already,  | 
|
193  | 
then return the old data value and leave the table unmodified..  | 
|
194  | 
*)  | 
|
195  | 
    fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
 | 
|
196  | 
let val arr = !table  | 
|
197  | 
val sz = Array.length arr  | 
|
198  | 
val hash = hashVal key  | 
|
199  | 
val indx = index (hash, sz)  | 
|
200  | 
fun look NIL =  | 
|
201  | 
(Array.update(arr, indx, B(hash, key, item,  | 
|
202  | 
Array.sub(arr, indx)));  | 
|
| 20416 | 203  | 
inc n_items;  | 
| 18449 | 204  | 
growTable tbl;  | 
205  | 
NONE)  | 
|
206  | 
| look (B(h, k, v, r)) =  | 
|
207  | 
if hash = h andalso sameKey(key, k) then SOME v  | 
|
208  | 
else look r  | 
|
209  | 
in  | 
|
210  | 
look (Array.sub (arr, indx))  | 
|
211  | 
end;  | 
|
212  | 
||
213  | 
(* find an item, the table's exception is raised if the item doesn't exist *)  | 
|
214  | 
    fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
 | 
|
215  | 
val arr = !table  | 
|
216  | 
val sz = Array.length arr  | 
|
217  | 
val hash = hashVal key  | 
|
218  | 
val indx = index (hash, sz)  | 
|
219  | 
fun look NIL = raise not_found  | 
|
220  | 
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))  | 
|
221  | 
then v  | 
|
222  | 
else look r  | 
|
223  | 
in  | 
|
224  | 
look (Array.sub (arr, indx))  | 
|
225  | 
end;  | 
|
226  | 
||
227  | 
(* look for an item, return NONE if the item doesn't exist *)  | 
|
228  | 
    fun peek (HT{hashVal, sameKey, table, ...}) key = let
 | 
|
229  | 
val arr = !table  | 
|
230  | 
val sz = Array.length arr  | 
|
231  | 
val hash = hashVal key  | 
|
232  | 
val indx = index (hash, sz)  | 
|
233  | 
fun look NIL = NONE  | 
|
234  | 
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))  | 
|
235  | 
then SOME v  | 
|
236  | 
else look r  | 
|
237  | 
in  | 
|
238  | 
look (Array.sub (arr, indx))  | 
|
239  | 
end;  | 
|
240  | 
||
241  | 
(* Remove an item. The table's exception is raised if  | 
|
242  | 
* the item doesn't exist.  | 
|
243  | 
*)  | 
|
244  | 
    fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
 | 
|
245  | 
val arr = !table  | 
|
246  | 
val sz = Array.length arr  | 
|
247  | 
val hash = hashVal key  | 
|
248  | 
val indx = index (hash, sz)  | 
|
249  | 
fun look NIL = raise not_found  | 
|
250  | 
| look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))  | 
|
251  | 
then (v, r)  | 
|
252  | 
else let val (item, r') = look r in (item, B(h, k, v, r')) end  | 
|
253  | 
val (item, bucket) = look (Array.sub (arr, indx))  | 
|
254  | 
in  | 
|
255  | 
Array.update (arr, indx, bucket);  | 
|
256  | 
n_items := !n_items - 1;  | 
|
257  | 
item  | 
|
258  | 
end (* remove *);  | 
|
259  | 
||
260  | 
(* Return the number of items in the table *)  | 
|
261  | 
   fun numItems (HT{n_items, ...}) = !n_items
 | 
|
262  | 
||
263  | 
(* return a list of the items in the table *)  | 
|
264  | 
    fun listItems (HT{table = ref arr, n_items, ...}) = let
 | 
|
265  | 
fun f (_, l, 0) = l  | 
|
266  | 
| f (~1, l, _) = l  | 
|
267  | 
| f (i, l, n) = let  | 
|
268  | 
fun g (NIL, l, n) = f (i-1, l, n)  | 
|
269  | 
| g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)  | 
|
270  | 
in  | 
|
271  | 
g (Array.sub(arr, i), l, n)  | 
|
272  | 
end  | 
|
273  | 
in  | 
|
274  | 
f ((Array.length arr) - 1, [], !n_items)  | 
|
275  | 
end (* listItems *);  | 
|
276  | 
||
277  | 
(* Apply a function to the entries of the table *)  | 
|
278  | 
    fun apply f (HT{table, ...}) = let
 | 
|
279  | 
fun appF NIL = ()  | 
|
280  | 
| appF (B(_, key, item, rest)) = (  | 
|
281  | 
f (key, item);  | 
|
282  | 
appF rest)  | 
|
283  | 
val arr = !table  | 
|
284  | 
val sz = Array.length arr  | 
|
285  | 
fun appToTbl i = if (i < sz)  | 
|
286  | 
then (appF (Array.sub (arr, i)); appToTbl(i+1))  | 
|
287  | 
else ()  | 
|
288  | 
in  | 
|
289  | 
appToTbl 0  | 
|
290  | 
end (* apply *);  | 
|
291  | 
||
292  | 
(* Map a table to a new table that has the same keys and exception *)  | 
|
293  | 
    fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
 | 
|
294  | 
fun mapF NIL = NIL  | 
|
295  | 
| mapF (B(hash, key, item, rest)) =  | 
|
296  | 
B(hash, key, f (key, item), mapF rest)  | 
|
297  | 
val arr = !table  | 
|
298  | 
val sz = Array.length arr  | 
|
299  | 
val newArr = Array.array (sz, NIL)  | 
|
300  | 
fun mapTbl i = if (i < sz)  | 
|
301  | 
then (  | 
|
302  | 
Array.update(newArr, i, mapF (Array.sub(arr, i)));  | 
|
303  | 
mapTbl (i+1))  | 
|
304  | 
else ()  | 
|
305  | 
in  | 
|
306  | 
mapTbl 0;  | 
|
307  | 
	    HT{hashVal=hashVal,
 | 
|
308  | 
sameKey=sameKey,  | 
|
309  | 
table = ref newArr,  | 
|
310  | 
n_items = ref(!n_items),  | 
|
311  | 
not_found = not_found}  | 
|
312  | 
end (* transform *);  | 
|
313  | 
||
314  | 
(* remove any hash table items that do not satisfy the given  | 
|
315  | 
* predicate.  | 
|
316  | 
*)  | 
|
317  | 
    fun filter pred (HT{table, n_items, not_found, ...}) = let
 | 
|
318  | 
fun filterP NIL = NIL  | 
|
319  | 
| filterP (B(hash, key, item, rest)) = if (pred(key, item))  | 
|
320  | 
then B(hash, key, item, filterP rest)  | 
|
321  | 
else filterP rest  | 
|
322  | 
val arr = !table  | 
|
323  | 
val sz = Array.length arr  | 
|
324  | 
fun filterTbl i = if (i < sz)  | 
|
325  | 
then (  | 
|
326  | 
Array.update (arr, i, filterP (Array.sub (arr, i)));  | 
|
327  | 
filterTbl (i+1))  | 
|
328  | 
else ()  | 
|
329  | 
in  | 
|
330  | 
filterTbl 0  | 
|
331  | 
end (* filter *);  | 
|
332  | 
||
333  | 
(* Map a table to a new table that has the same keys, exception,  | 
|
334  | 
hash function, and equality function *)  | 
|
335  | 
||
336  | 
    fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
 | 
|
337  | 
fun mapF NIL = NIL  | 
|
338  | 
| mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)  | 
|
339  | 
val arr = !table  | 
|
340  | 
val sz = Array.length arr  | 
|
341  | 
val newArr = Array.array (sz, NIL)  | 
|
342  | 
fun mapTbl i = if (i < sz)  | 
|
343  | 
then (  | 
|
344  | 
Array.update(newArr, i, mapF (Array.sub(arr, i)));  | 
|
345  | 
mapTbl (i+1))  | 
|
346  | 
else ()  | 
|
347  | 
in  | 
|
348  | 
mapTbl 0;  | 
|
349  | 
	    HT{hashVal=hashVal, 
 | 
|
350  | 
sameKey=sameKey,  | 
|
351  | 
table = ref newArr,  | 
|
352  | 
n_items = ref(!n_items),  | 
|
353  | 
not_found = not_found}  | 
|
354  | 
end (* transform *);  | 
|
355  | 
||
356  | 
(* Create a copy of a hash table *)  | 
|
357  | 
    fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
 | 
|
358  | 
val arr = !table  | 
|
359  | 
val sz = Array.length arr  | 
|
360  | 
val newArr = Array.array (sz, NIL)  | 
|
361  | 
fun mapTbl i = (  | 
|
362  | 
Array.update (newArr, i, Array.sub(arr, i));  | 
|
363  | 
mapTbl (i+1))  | 
|
364  | 
in  | 
|
| 
28397
 
389c5e494605
handle _ should be avoided (spurious Interrupt will spoil the game);
 
wenzelm 
parents: 
20662 
diff
changeset
 | 
365  | 
(mapTbl 0) handle _ => (); (* FIXME avoid handle _ *)  | 
| 18449 | 366  | 
	    HT{hashVal=hashVal, 
 | 
367  | 
sameKey=sameKey,  | 
|
368  | 
table = ref newArr,  | 
|
369  | 
n_items = ref(!n_items),  | 
|
370  | 
not_found = not_found}  | 
|
371  | 
end (* copy *);  | 
|
372  | 
||
373  | 
(* returns a list of the sizes of the various buckets. This is to  | 
|
374  | 
* allow users to gauge the quality of their hashing function.  | 
|
375  | 
*)  | 
|
376  | 
    fun bucketSizes (HT{table = ref arr, ...}) = let
 | 
|
377  | 
fun len (NIL, n) = n  | 
|
378  | 
| len (B(_, _, _, r), n) = len(r, n+1)  | 
|
379  | 
fun f (~1, l) = l  | 
|
380  | 
| f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)  | 
|
381  | 
in  | 
|
382  | 
f ((Array.length arr)-1, [])  | 
|
383  | 
end  | 
|
384  | 
||
385  | 
(*Added by lcp.  | 
|
| 18508 | 386  | 
This is essentially the described in Compilers:  | 
| 18449 | 387  | 
Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)  | 
388  | 
||
| 18508 | 389  | 
(*This hash function is recommended in Compilers: Principles, Techniques, and  | 
390  | 
Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly  | 
|
391  | 
recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)  | 
|
392  | 
fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))  | 
|
| 18449 | 393  | 
|
394  | 
fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);  | 
|
395  | 
||
| 20662 | 396  | 
fun hashw_int (i,w) = hashw (Word.fromInt i, w);  | 
397  | 
||
| 18449 | 398  | 
fun hashw_vector (v,w) = Vector.foldl hashw w v;  | 
399  | 
||
400  | 
fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;  | 
|
401  | 
||
402  | 
fun hashw_strings (ss, w) = List.foldl hashw_string w ss;  | 
|
403  | 
||
404  | 
fun hash_string s = Word.toIntX (hashw_string(s,0w0));  | 
|
405  | 
||
406  | 
end  |