--- a/src/HOL/Tools/polyhash.ML Mon Mar 29 15:26:19 2010 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,406 +0,0 @@
-(* Modified for Poly/ML from SML/NJ Library version 0.2
- *
- * COPYRIGHT (c) 1992 by AT&T Bell Laboratories.
- * See file mosml/copyrght/copyrght.att for details.
- *
- * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974
- * Current version from the Moscow ML distribution, copied by permission.
- *)
-
-(* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)
-
-signature Polyhash =
-sig
-
-type ('key, 'data) hash_table
-
-val mkTable : ('_key -> int) * ('_key * '_key -> bool) -> int * exn
- -> ('_key, '_data) hash_table
-val numItems : ('key, 'data) hash_table -> int
-val insert : ('_key, '_data) hash_table -> '_key * '_data -> unit
-val peekInsert : ('_key, '_data) hash_table -> '_key * '_data
- -> '_data option
-val find : ('key, 'data) hash_table -> 'key -> 'data
-val peek : ('key, 'data) hash_table -> 'key -> 'data option
-val remove : ('key, 'data) hash_table -> 'key -> 'data
-val listItems : ('key, 'data) hash_table -> ('key * 'data) list
-val apply : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
-val map : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table
- -> ('_key, '_res) hash_table
-val filter : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
-val transform : ('data -> '_res) -> ('_key, 'data) hash_table
- -> ('_key, '_res) hash_table
-val copy : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
-val bucketSizes : ('key, 'data) hash_table -> int list
-
-(*Additions due to L. C. Paulson and Jia Meng*)
-val hashw : word * word -> word
-val hashw_char : Char.char * word -> word
-val hashw_int : int * word -> word
-val hashw_vector : word Vector.vector * word -> word
-val hashw_string : string * word -> word
-val hashw_strings : string list * word -> word
-val hash_string : string -> int
-
-(*
- [('key, 'data) hash_table] is the type of hashtables with keys of type
- 'key and data values of type 'data.
-
- [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,
- using hash function hashVal and equality predicate sameKey. The sz
- is a size hint, and exc is the exception raised by function find.
- It must be the case that sameKey(k1, k2) implies hashVal(k1) =
- hashVal(k2) for all k1,k2.
-
- [numItems htbl] is the number of items in the hash table.
-
- [insert htbl (k, d)] inserts data d for key k. If k already had an
- item associated with it, then the old item is overwritten.
-
- [find htbl k] returns d, where d is the data item associated with key k,
- or raises the exception (given at creation of htbl) if there is no such d.
-
- [peek htbl k] returns SOME d, where d is the data item associated with
- key k, or NONE if there is no such d.
-
- [peekInsert htbl (k, d)] inserts data d for key k, if k is not
- already in the table, returning NONE. If k is already in the
- table, and the associated data value is d', then returns SOME d'
- and leaves the table unmodified.
-
- [remove htbl k] returns d, where d is the data item associated with key k,
- removing d from the table; or raises the exception if there is no such d.
-
- [listItems htbl] returns a list of the (key, data) pairs in the hashtable.
-
- [apply f htbl] applies function f to all (key, data) pairs in the
- hashtable, in some order.
-
- [map f htbl] returns a new hashtable, whose data items have been
- obtained by applying f to the (key, data) pairs in htbl. The new
- tables have the same keys, hash function, equality predicate, and
- exception, as htbl.
-
- [filter p htbl] deletes from htbl all data items which do not
- satisfy predicate p.
-
- [transform f htbl] as map, but only the (old) data values are used
- when computing the new data values.
-
- [copy htbl] returns a complete copy of htbl.
-
- [bucketSizes htbl] returns a list of the sizes of the buckets.
- This is to allow users to gauge the quality of their hashing
- function.
-*)
-
-end
-
-
-structure Polyhash :> Polyhash =
-struct
-
-datatype ('key, 'data) bucket_t
- = NIL
- | B of int * 'key * 'data * ('key, 'data) bucket_t
-
-datatype ('key, 'data) hash_table =
- HT of {hashVal : 'key -> int,
- sameKey : 'key * 'key -> bool,
- not_found : exn,
- table : ('key, 'data) bucket_t Array.array Unsynchronized.ref,
- n_items : int Unsynchronized.ref}
-
-local
-(*
- prim_val andb_ : int -> int -> int = 2 "and";
- prim_val lshift_ : int -> int -> int = 2 "shift_left";
-*)
- fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));
- fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));
-in
- fun index (i, sz) = andb_ i (sz-1)
-
- (* find smallest power of 2 (>= 32) that is >= n *)
- fun roundUp n =
- let fun f i = if (i >= n) then i else f (lshift_ i 1)
- in f 32 end
-end;
-
- (* Create a new table; the int is a size hint and the exception
- * is to be raised by find.
- *)
- fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
- hashVal=hashVal,
- sameKey=sameKey,
- not_found = notFound,
- table = Unsynchronized.ref (Array.array(roundUp sizeHint, NIL)),
- n_items = Unsynchronized.ref 0
- };
-
- (* conditionally grow a table *)
- fun growTable (HT{table, n_items, ...}) = let
- val arr = !table
- val sz = Array.length arr
- in
- if (!n_items >= sz)
- then let
- val newSz = sz+sz
- val newArr = Array.array (newSz, NIL)
- fun copy NIL = ()
- | copy (B(h, key, v, rest)) = let
- val indx = index (h, newSz)
- in
- Array.update (newArr, indx,
- B(h, key, v, Array.sub(newArr, indx)));
- copy rest
- end
- fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))
- in
- (bucket 0) handle _ => (); (* FIXME avoid handle _ *)
- table := newArr
- end
- else ()
- end (* growTable *);
-
- (* Insert an item. If the key already has an item associated with it,
- * then the old item is discarded.
- *)
- fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
- let
- val arr = !table
- val sz = Array.length arr
- val hash = hashVal key
- val indx = index (hash, sz)
- fun look NIL = (
- Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));
- Unsynchronized.inc n_items;
- growTable tbl;
- NIL)
- | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
- then B(hash, key, item, r)
- else (case (look r)
- of NIL => NIL
- | rest => B(h, k, v, rest)
- (* end case *))
- in
- case (look (Array.sub (arr, indx)))
- of NIL => ()
- | b => Array.update(arr, indx, b)
- end;
-
- (* Insert an item if not there already; if it is there already,
- then return the old data value and leave the table unmodified..
- *)
- fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
- let val arr = !table
- val sz = Array.length arr
- val hash = hashVal key
- val indx = index (hash, sz)
- fun look NIL =
- (Array.update(arr, indx, B(hash, key, item,
- Array.sub(arr, indx)));
- Unsynchronized.inc n_items;
- growTable tbl;
- NONE)
- | look (B(h, k, v, r)) =
- if hash = h andalso sameKey(key, k) then SOME v
- else look r
- in
- look (Array.sub (arr, indx))
- end;
-
- (* find an item, the table's exception is raised if the item doesn't exist *)
- fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
- val arr = !table
- val sz = Array.length arr
- val hash = hashVal key
- val indx = index (hash, sz)
- fun look NIL = raise not_found
- | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
- then v
- else look r
- in
- look (Array.sub (arr, indx))
- end;
-
- (* look for an item, return NONE if the item doesn't exist *)
- fun peek (HT{hashVal, sameKey, table, ...}) key = let
- val arr = !table
- val sz = Array.length arr
- val hash = hashVal key
- val indx = index (hash, sz)
- fun look NIL = NONE
- | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
- then SOME v
- else look r
- in
- look (Array.sub (arr, indx))
- end;
-
- (* Remove an item. The table's exception is raised if
- * the item doesn't exist.
- *)
- fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
- val arr = !table
- val sz = Array.length arr
- val hash = hashVal key
- val indx = index (hash, sz)
- fun look NIL = raise not_found
- | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
- then (v, r)
- else let val (item, r') = look r in (item, B(h, k, v, r')) end
- val (item, bucket) = look (Array.sub (arr, indx))
- in
- Array.update (arr, indx, bucket);
- n_items := !n_items - 1;
- item
- end (* remove *);
-
- (* Return the number of items in the table *)
- fun numItems (HT{n_items, ...}) = !n_items
-
- (* return a list of the items in the table *)
- fun listItems (HT{table = Unsynchronized.ref arr, n_items, ...}) = let
- fun f (_, l, 0) = l
- | f (~1, l, _) = l
- | f (i, l, n) = let
- fun g (NIL, l, n) = f (i-1, l, n)
- | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)
- in
- g (Array.sub(arr, i), l, n)
- end
- in
- f ((Array.length arr) - 1, [], !n_items)
- end (* listItems *);
-
- (* Apply a function to the entries of the table *)
- fun apply f (HT{table, ...}) = let
- fun appF NIL = ()
- | appF (B(_, key, item, rest)) = (
- f (key, item);
- appF rest)
- val arr = !table
- val sz = Array.length arr
- fun appToTbl i = if (i < sz)
- then (appF (Array.sub (arr, i)); appToTbl(i+1))
- else ()
- in
- appToTbl 0
- end (* apply *);
-
- (* Map a table to a new table that has the same keys and exception *)
- fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
- fun mapF NIL = NIL
- | mapF (B(hash, key, item, rest)) =
- B(hash, key, f (key, item), mapF rest)
- val arr = !table
- val sz = Array.length arr
- val newArr = Array.array (sz, NIL)
- fun mapTbl i = if (i < sz)
- then (
- Array.update(newArr, i, mapF (Array.sub(arr, i)));
- mapTbl (i+1))
- else ()
- in
- mapTbl 0;
- HT{hashVal=hashVal,
- sameKey=sameKey,
- table = Unsynchronized.ref newArr,
- n_items = Unsynchronized.ref(!n_items),
- not_found = not_found}
- end (* transform *);
-
- (* remove any hash table items that do not satisfy the given
- * predicate.
- *)
- fun filter pred (HT{table, n_items, not_found, ...}) = let
- fun filterP NIL = NIL
- | filterP (B(hash, key, item, rest)) = if (pred(key, item))
- then B(hash, key, item, filterP rest)
- else filterP rest
- val arr = !table
- val sz = Array.length arr
- fun filterTbl i = if (i < sz)
- then (
- Array.update (arr, i, filterP (Array.sub (arr, i)));
- filterTbl (i+1))
- else ()
- in
- filterTbl 0
- end (* filter *);
-
- (* Map a table to a new table that has the same keys, exception,
- hash function, and equality function *)
-
- fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
- fun mapF NIL = NIL
- | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)
- val arr = !table
- val sz = Array.length arr
- val newArr = Array.array (sz, NIL)
- fun mapTbl i = if (i < sz)
- then (
- Array.update(newArr, i, mapF (Array.sub(arr, i)));
- mapTbl (i+1))
- else ()
- in
- mapTbl 0;
- HT{hashVal=hashVal,
- sameKey=sameKey,
- table = Unsynchronized.ref newArr,
- n_items = Unsynchronized.ref(!n_items),
- not_found = not_found}
- end (* transform *);
-
- (* Create a copy of a hash table *)
- fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
- val arr = !table
- val sz = Array.length arr
- val newArr = Array.array (sz, NIL)
- fun mapTbl i = (
- Array.update (newArr, i, Array.sub(arr, i));
- mapTbl (i+1))
- in
- (mapTbl 0) handle _ => (); (* FIXME avoid handle _ *)
- HT{hashVal=hashVal,
- sameKey=sameKey,
- table = Unsynchronized.ref newArr,
- n_items = Unsynchronized.ref(!n_items),
- not_found = not_found}
- end (* copy *);
-
- (* returns a list of the sizes of the various buckets. This is to
- * allow users to gauge the quality of their hashing function.
- *)
- fun bucketSizes (HT{table = Unsynchronized.ref arr, ...}) = let
- fun len (NIL, n) = n
- | len (B(_, _, _, r), n) = len(r, n+1)
- fun f (~1, l) = l
- | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)
- in
- f ((Array.length arr)-1, [])
- end
-
- (*Added by lcp.
- This is essentially the described in Compilers:
- Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)
-
- (*This hash function is recommended in Compilers: Principles, Techniques, and
- Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly
- recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)
- fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))
-
- fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);
-
- fun hashw_int (i,w) = hashw (Word.fromInt i, w);
-
- fun hashw_vector (v,w) = Vector.foldl hashw w v;
-
- fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;
-
- fun hashw_strings (ss, w) = List.foldl hashw_string w ss;
-
- fun hash_string s = Word.toIntX (hashw_string(s,0w0));
-
-end