src/HOL/Tools/polyhash.ML
author wenzelm
Tue Sep 29 16:24:36 2009 +0200 (2009-09-29)
changeset 32740 9dd0a2f83429
parent 28397 389c5e494605
child 32960 69916a850301
permissions -rw-r--r--
explicit indication of Unsynchronized.ref;
     1 (* Modified for Poly/ML from SML/NJ Library version 0.2
     2  *
     3  * COPYRIGHT (c) 1992 by AT&T Bell Laboratories.
     4  * See file mosml/copyrght/copyrght.att for details.
     5  *
     6  * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974
     7  * Current version from the Moscow ML distribution, copied by permission.
     8  *)
     9 
    10 (* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)
    11 
    12 signature Polyhash =
    13 sig
    14 
    15 type ('key, 'data) hash_table
    16 
    17 val mkTable     : ('_key -> int) * ('_key * '_key -> bool) -> int * exn 
    18                   -> ('_key, '_data) hash_table
    19 val numItems    : ('key, 'data) hash_table -> int
    20 val insert      : ('_key, '_data) hash_table -> '_key * '_data -> unit
    21 val peekInsert  : ('_key, '_data) hash_table -> '_key * '_data 
    22                   -> '_data option
    23 val find        : ('key, 'data) hash_table -> 'key -> 'data
    24 val peek        : ('key, 'data) hash_table -> 'key -> 'data option
    25 val remove      : ('key, 'data) hash_table -> 'key -> 'data
    26 val listItems   : ('key, 'data) hash_table -> ('key * 'data) list
    27 val apply       : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
    28 val map         : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table 
    29                   -> ('_key, '_res) hash_table
    30 val filter      : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
    31 val transform   : ('data -> '_res) -> ('_key, 'data) hash_table 
    32                   -> ('_key, '_res) hash_table
    33 val copy        : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
    34 val bucketSizes : ('key, 'data) hash_table -> int list
    35 
    36 (*Additions due to L. C. Paulson and Jia Meng*)
    37 val hashw : word * word -> word
    38 val hashw_char : Char.char * word -> word
    39 val hashw_int : int * word -> word
    40 val hashw_vector : word Vector.vector * word -> word
    41 val hashw_string : string * word -> word
    42 val hashw_strings : string list * word -> word
    43 val hash_string : string -> int
    44 
    45 (* 
    46    [('key, 'data) hash_table] is the type of hashtables with keys of type
    47    'key and data values of type 'data.
    48 
    49    [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,
    50    using hash function hashVal and equality predicate sameKey.  The sz
    51    is a size hint, and exc is the exception raised by function find.
    52    It must be the case that sameKey(k1, k2) implies hashVal(k1) =
    53    hashVal(k2) for all k1,k2.
    54 
    55    [numItems htbl] is the number of items in the hash table.
    56 
    57    [insert htbl (k, d)] inserts data d for key k.  If k already had an
    58    item associated with it, then the old item is overwritten.
    59 
    60    [find htbl k] returns d, where d is the data item associated with key k, 
    61    or raises the exception (given at creation of htbl) if there is no such d.
    62 
    63    [peek htbl k] returns SOME d, where d is the data item associated with 
    64    key k, or NONE if there is no such d.
    65 
    66    [peekInsert htbl (k, d)] inserts data d for key k, if k is not
    67    already in the table, returning NONE.  If k is already in the
    68    table, and the associated data value is d', then returns SOME d'
    69    and leaves the table unmodified.
    70 
    71    [remove htbl k] returns d, where d is the data item associated with key k, 
    72    removing d from the table; or raises the exception if there is no such d.
    73 
    74    [listItems htbl] returns a list of the (key, data) pairs in the hashtable.
    75 
    76    [apply f htbl] applies function f to all (key, data) pairs in the 
    77    hashtable, in some order.
    78 
    79    [map f htbl] returns a new hashtable, whose data items have been
    80    obtained by applying f to the (key, data) pairs in htbl.  The new
    81    tables have the same keys, hash function, equality predicate, and
    82    exception, as htbl.
    83 
    84    [filter p htbl] deletes from htbl all data items which do not
    85    satisfy predicate p.
    86 
    87    [transform f htbl] as map, but only the (old) data values are used
    88    when computing the new data values.
    89 
    90    [copy htbl] returns a complete copy of htbl.
    91 
    92    [bucketSizes htbl] returns a list of the sizes of the buckets.
    93    This is to allow users to gauge the quality of their hashing
    94    function.  
    95 *)
    96 
    97 end
    98 
    99 
   100 structure Polyhash :> Polyhash =
   101 struct
   102 
   103 datatype ('key, 'data) bucket_t
   104   = NIL
   105   | B of int * 'key * 'data * ('key, 'data) bucket_t
   106 
   107 datatype ('key, 'data) hash_table = 
   108     HT of {hashVal   : 'key -> int,
   109 	   sameKey   : 'key * 'key -> bool,
   110 	   not_found : exn,
   111 	   table     : ('key, 'data) bucket_t Array.array Unsynchronized.ref,
   112 	   n_items   : int Unsynchronized.ref}
   113 
   114 local
   115 (*
   116     prim_val andb_      : int -> int -> int = 2 "and";
   117     prim_val lshift_    : int -> int -> int = 2 "shift_left";
   118 *)
   119     fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));
   120     fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));
   121 in 
   122     fun index (i, sz) = andb_ i (sz-1)
   123 
   124   (* find smallest power of 2 (>= 32) that is >= n *)
   125     fun roundUp n = 
   126 	let fun f i = if (i >= n) then i else f (lshift_ i 1)
   127 	in f 32 end
   128 end;
   129 
   130   (* Create a new table; the int is a size hint and the exception
   131    * is to be raised by find.
   132    *)
   133     fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
   134             hashVal=hashVal,
   135 	    sameKey=sameKey,
   136 	    not_found = notFound,
   137 	    table = Unsynchronized.ref (Array.array(roundUp sizeHint, NIL)),
   138 	    n_items = Unsynchronized.ref 0
   139 	  };
   140 
   141   (* conditionally grow a table *)
   142     fun growTable (HT{table, n_items, ...}) = let
   143 	    val arr = !table
   144 	    val sz = Array.length arr
   145 	    in
   146 	      if (!n_items >= sz)
   147 		then let
   148 		  val newSz = sz+sz
   149 		  val newArr = Array.array (newSz, NIL)
   150 		  fun copy NIL = ()
   151 		    | copy (B(h, key, v, rest)) = let
   152 			val indx = index (h, newSz)
   153 			in
   154 			  Array.update (newArr, indx,
   155 			    B(h, key, v, Array.sub(newArr, indx)));
   156 			  copy rest
   157 			end
   158 		  fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))
   159 		  in
   160 		    (bucket 0) handle _ => ();  (* FIXME avoid handle _ *)
   161 		    table := newArr
   162 		  end
   163 		else ()
   164 	    end (* growTable *);
   165 
   166   (* Insert an item.  If the key already has an item associated with it,
   167    * then the old item is discarded.
   168    *)
   169     fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
   170 	let
   171 	  val arr = !table
   172 	  val sz = Array.length arr
   173 	  val hash = hashVal key
   174 	  val indx = index (hash, sz)
   175 	  fun look NIL = (
   176 		Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));
   177 		Unsynchronized.inc n_items;
   178 		growTable tbl;
   179 		NIL)
   180 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   181 		then B(hash, key, item, r)
   182 		else (case (look r)
   183 		   of NIL => NIL
   184 		    | rest => B(h, k, v, rest)
   185 		  (* end case *))
   186 	  in
   187 	    case (look (Array.sub (arr, indx)))
   188 	     of NIL => ()
   189 	      | b => Array.update(arr, indx, b)
   190 	  end;
   191 
   192   (* Insert an item if not there already; if it is there already, 
   193      then return the old data value and leave the table unmodified..
   194    *)
   195     fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
   196 	let val arr = !table
   197 	    val sz = Array.length arr
   198 	    val hash = hashVal key
   199 	    val indx = index (hash, sz)
   200 	    fun look NIL = 
   201 		(Array.update(arr, indx, B(hash, key, item, 
   202 					   Array.sub(arr, indx)));
   203 		 Unsynchronized.inc n_items;
   204 		 growTable tbl;
   205 		 NONE)
   206 	      | look (B(h, k, v, r)) = 
   207 		if hash = h andalso sameKey(key, k) then SOME v
   208 		else look r
   209 	in
   210 	    look (Array.sub (arr, indx))
   211 	end;
   212 
   213   (* find an item, the table's exception is raised if the item doesn't exist *)
   214     fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
   215 	  val arr = !table
   216 	  val sz = Array.length arr
   217 	  val hash = hashVal key
   218 	  val indx = index (hash, sz)
   219 	  fun look NIL = raise not_found
   220 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   221 		then v
   222 		else look r
   223 	  in
   224 	    look (Array.sub (arr, indx))
   225 	  end;
   226 
   227   (* look for an item, return NONE if the item doesn't exist *)
   228     fun peek (HT{hashVal, sameKey, table, ...}) key = let
   229 	  val arr = !table
   230 	  val sz = Array.length arr
   231 	  val hash = hashVal key
   232 	  val indx = index (hash, sz)
   233 	  fun look NIL = NONE
   234 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   235 		then SOME v
   236 		else look r
   237 	  in
   238 	    look (Array.sub (arr, indx))
   239 	  end;
   240 
   241   (* Remove an item.  The table's exception is raised if
   242    * the item doesn't exist.
   243    *)
   244     fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
   245 	  val arr = !table
   246 	  val sz = Array.length arr
   247 	  val hash = hashVal key
   248 	  val indx = index (hash, sz)
   249 	  fun look NIL = raise not_found
   250 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   251 		then (v, r)
   252 		else let val (item, r') = look r in (item, B(h, k, v, r')) end
   253 	  val (item, bucket) = look (Array.sub (arr, indx))
   254 	  in
   255 	    Array.update (arr, indx, bucket);
   256 	    n_items := !n_items - 1;
   257 	    item
   258 	  end (* remove *);
   259 
   260   (* Return the number of items in the table *)
   261    fun numItems (HT{n_items, ...}) = !n_items
   262 
   263   (* return a list of the items in the table *)
   264     fun listItems (HT{table = Unsynchronized.ref arr, n_items, ...}) = let
   265 	  fun f (_, l, 0) = l
   266 	    | f (~1, l, _) = l
   267 	    | f (i, l, n) = let
   268 		fun g (NIL, l, n) = f (i-1, l, n)
   269 		  | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)
   270 		in
   271 		  g (Array.sub(arr, i), l, n)
   272 		end
   273 	  in
   274 	    f ((Array.length arr) - 1, [], !n_items)
   275 	  end (* listItems *);
   276 
   277   (* Apply a function to the entries of the table *)
   278     fun apply f (HT{table, ...}) = let
   279 	  fun appF NIL = ()
   280 	    | appF (B(_, key, item, rest)) = (
   281 		f (key, item);
   282 		appF rest)
   283 	  val arr = !table
   284 	  val sz = Array.length arr
   285 	  fun appToTbl i = if (i < sz)
   286 		then (appF (Array.sub (arr, i)); appToTbl(i+1))
   287 		else ()
   288 	  in
   289 	    appToTbl 0
   290 	  end (* apply *);
   291 
   292   (* Map a table to a new table that has the same keys and exception *)
   293     fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
   294 	  fun mapF NIL = NIL
   295 	    | mapF (B(hash, key, item, rest)) =
   296 		B(hash, key, f (key, item), mapF rest)
   297 	  val arr = !table
   298 	  val sz = Array.length arr
   299 	  val newArr = Array.array (sz, NIL)
   300 	  fun mapTbl i = if (i < sz)
   301 		then (
   302 		  Array.update(newArr, i, mapF (Array.sub(arr, i)));
   303 		  mapTbl (i+1))
   304 		else ()
   305 	  in
   306 	    mapTbl 0;
   307 	    HT{hashVal=hashVal,
   308 	       sameKey=sameKey,
   309 	       table = Unsynchronized.ref newArr, 
   310 	       n_items = Unsynchronized.ref(!n_items), 
   311 	       not_found = not_found}
   312 	  end (* transform *);
   313 
   314   (* remove any hash table items that do not satisfy the given
   315    * predicate.
   316    *)
   317     fun filter pred (HT{table, n_items, not_found, ...}) = let
   318 	  fun filterP NIL = NIL
   319 	    | filterP (B(hash, key, item, rest)) = if (pred(key, item))
   320 		then B(hash, key, item, filterP rest)
   321 		else filterP rest
   322 	  val arr = !table
   323 	  val sz = Array.length arr
   324 	  fun filterTbl i = if (i < sz)
   325 		then (
   326 		  Array.update (arr, i, filterP (Array.sub (arr, i)));
   327 		  filterTbl (i+1))
   328 		else ()
   329 	  in
   330 	    filterTbl 0
   331 	  end (* filter *);
   332 
   333   (* Map a table to a new table that has the same keys, exception,
   334      hash function, and equality function *)
   335 
   336     fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
   337 	  fun mapF NIL = NIL
   338 	    | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)
   339 	  val arr = !table
   340 	  val sz = Array.length arr
   341 	  val newArr = Array.array (sz, NIL)
   342 	  fun mapTbl i = if (i < sz)
   343 		then (
   344 		  Array.update(newArr, i, mapF (Array.sub(arr, i)));
   345 		  mapTbl (i+1))
   346 		else ()
   347 	  in
   348 	    mapTbl 0;
   349 	    HT{hashVal=hashVal, 
   350 	       sameKey=sameKey, 
   351 	       table = Unsynchronized.ref newArr, 
   352 	       n_items = Unsynchronized.ref(!n_items), 
   353 	       not_found = not_found}
   354 	  end (* transform *);
   355 
   356   (* Create a copy of a hash table *)
   357     fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
   358 	  val arr = !table
   359 	  val sz = Array.length arr
   360 	  val newArr = Array.array (sz, NIL)
   361 	  fun mapTbl i = (
   362 		Array.update (newArr, i, Array.sub(arr, i));
   363 		mapTbl (i+1))
   364 	  in
   365 	    (mapTbl 0) handle _ => ();  (* FIXME avoid handle _ *)
   366 	    HT{hashVal=hashVal, 
   367 	       sameKey=sameKey,
   368 	       table = Unsynchronized.ref newArr, 
   369 	       n_items = Unsynchronized.ref(!n_items), 
   370 	       not_found = not_found}
   371 	  end (* copy *);
   372 
   373   (* returns a list of the sizes of the various buckets.  This is to
   374    * allow users to gauge the quality of their hashing function.
   375    *)
   376     fun bucketSizes (HT{table = Unsynchronized.ref arr, ...}) = let
   377 	  fun len (NIL, n) = n
   378 	    | len (B(_, _, _, r), n) = len(r, n+1)
   379 	  fun f (~1, l) = l
   380 	    | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)
   381 	  in
   382 	    f ((Array.length arr)-1, [])
   383 	  end
   384 
   385    (*Added by lcp.
   386       This is essentially the  described in Compilers:
   387       Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)
   388 
   389    (*This hash function is recommended in Compilers: Principles, Techniques, and
   390      Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly
   391      recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)
   392    fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))
   393 
   394    fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);
   395    
   396    fun hashw_int (i,w) = hashw (Word.fromInt i, w);
   397    
   398    fun hashw_vector (v,w) = Vector.foldl hashw w v;
   399    
   400    fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;
   401    
   402    fun hashw_strings (ss, w) = List.foldl hashw_string w ss;
   403 
   404    fun hash_string s = Word.toIntX (hashw_string(s,0w0));
   405 
   406 end