src/HOL/Tools/polyhash.ML
author wenzelm
Thu Aug 03 17:30:36 2006 +0200 (2006-08-03)
changeset 20328 5b240a4216b0
parent 18508 c5861e128a95
child 20416 f9cb300118ca
permissions -rw-r--r--
RuleInsts.bires_inst_tac;
     1 (* Modified for Poly/ML from SML/NJ Library version 0.2
     2  *
     3  * COPYRIGHT (c) 1992 by AT&T Bell Laboratories.
     4  * See file mosml/copyrght/copyrght.att for details.
     5  *
     6  * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974
     7  * Current version from the Moscow ML distribution, copied by permission.
     8  *)
     9 
    10 (* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)
    11 
    12 signature Polyhash =
    13 sig
    14 
    15 type ('key, 'data) hash_table
    16 
    17 val mkTable     : ('_key -> int) * ('_key * '_key -> bool) -> int * exn 
    18                   -> ('_key, '_data) hash_table
    19 val numItems    : ('key, 'data) hash_table -> int
    20 val insert      : ('_key, '_data) hash_table -> '_key * '_data -> unit
    21 val peekInsert  : ('_key, '_data) hash_table -> '_key * '_data 
    22                   -> '_data option
    23 val find        : ('key, 'data) hash_table -> 'key -> 'data
    24 val peek        : ('key, 'data) hash_table -> 'key -> 'data option
    25 val remove      : ('key, 'data) hash_table -> 'key -> 'data
    26 val listItems   : ('key, 'data) hash_table -> ('key * 'data) list
    27 val apply       : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
    28 val map         : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table 
    29                   -> ('_key, '_res) hash_table
    30 val filter      : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
    31 val transform   : ('data -> '_res) -> ('_key, 'data) hash_table 
    32                   -> ('_key, '_res) hash_table
    33 val copy        : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
    34 val bucketSizes : ('key, 'data) hash_table -> int list
    35 
    36 (*Additions due to L. C. Paulson and Jia Meng*)
    37 val hashw : word * word -> word
    38 val hashw_char : Char.char * word -> word
    39 val hashw_vector : word Vector.vector * word -> word
    40 val hashw_string : string * word -> word
    41 val hashw_strings : string list * word -> word
    42 val hash_string : string -> int
    43 
    44 (* 
    45    [('key, 'data) hash_table] is the type of hashtables with keys of type
    46    'key and data values of type 'data.
    47 
    48    [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,
    49    using hash function hashVal and equality predicate sameKey.  The sz
    50    is a size hint, and exc is the exception raised by function find.
    51    It must be the case that sameKey(k1, k2) implies hashVal(k1) =
    52    hashVal(k2) for all k1,k2.
    53 
    54    [numItems htbl] is the number of items in the hash table.
    55 
    56    [insert htbl (k, d)] inserts data d for key k.  If k already had an
    57    item associated with it, then the old item is overwritten.
    58 
    59    [find htbl k] returns d, where d is the data item associated with key k, 
    60    or raises the exception (given at creation of htbl) if there is no such d.
    61 
    62    [peek htbl k] returns SOME d, where d is the data item associated with 
    63    key k, or NONE if there is no such d.
    64 
    65    [peekInsert htbl (k, d)] inserts data d for key k, if k is not
    66    already in the table, returning NONE.  If k is already in the
    67    table, and the associated data value is d', then returns SOME d'
    68    and leaves the table unmodified.
    69 
    70    [remove htbl k] returns d, where d is the data item associated with key k, 
    71    removing d from the table; or raises the exception if there is no such d.
    72 
    73    [listItems htbl] returns a list of the (key, data) pairs in the hashtable.
    74 
    75    [apply f htbl] applies function f to all (key, data) pairs in the 
    76    hashtable, in some order.
    77 
    78    [map f htbl] returns a new hashtable, whose data items have been
    79    obtained by applying f to the (key, data) pairs in htbl.  The new
    80    tables have the same keys, hash function, equality predicate, and
    81    exception, as htbl.
    82 
    83    [filter p htbl] deletes from htbl all data items which do not
    84    satisfy predicate p.
    85 
    86    [transform f htbl] as map, but only the (old) data values are used
    87    when computing the new data values.
    88 
    89    [copy htbl] returns a complete copy of htbl.
    90 
    91    [bucketSizes htbl] returns a list of the sizes of the buckets.
    92    This is to allow users to gauge the quality of their hashing
    93    function.  
    94 *)
    95 
    96 end
    97 
    98 
    99 structure Polyhash :> Polyhash =
   100 struct
   101 
   102 datatype ('key, 'data) bucket_t
   103   = NIL
   104   | B of int * 'key * 'data * ('key, 'data) bucket_t
   105 
   106 datatype ('key, 'data) hash_table = 
   107     HT of {hashVal   : 'key -> int,
   108 	   sameKey   : 'key * 'key -> bool,
   109 	   not_found : exn,
   110 	   table     : ('key, 'data) bucket_t Array.array ref,
   111 	   n_items   : int ref}
   112 
   113 local
   114 (*
   115     prim_val andb_      : int -> int -> int = 2 "and";
   116     prim_val lshift_    : int -> int -> int = 2 "shift_left";
   117 *)
   118     fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));
   119     fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));
   120 in 
   121     fun index (i, sz) = andb_ i (sz-1)
   122 
   123   (* find smallest power of 2 (>= 32) that is >= n *)
   124     fun roundUp n = 
   125 	let fun f i = if (i >= n) then i else f (lshift_ i 1)
   126 	in f 32 end
   127 end;
   128 
   129   (* Create a new table; the int is a size hint and the exception
   130    * is to be raised by find.
   131    *)
   132     fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
   133             hashVal=hashVal,
   134 	    sameKey=sameKey,
   135 	    not_found = notFound,
   136 	    table = ref (Array.array(roundUp sizeHint, NIL)),
   137 	    n_items = ref 0
   138 	  };
   139 
   140   (* conditionally grow a table *)
   141     fun growTable (HT{table, n_items, ...}) = let
   142 	    val arr = !table
   143 	    val sz = Array.length arr
   144 	    in
   145 	      if (!n_items >= sz)
   146 		then let
   147 		  val newSz = sz+sz
   148 		  val newArr = Array.array (newSz, NIL)
   149 		  fun copy NIL = ()
   150 		    | copy (B(h, key, v, rest)) = let
   151 			val indx = index (h, newSz)
   152 			in
   153 			  Array.update (newArr, indx,
   154 			    B(h, key, v, Array.sub(newArr, indx)));
   155 			  copy rest
   156 			end
   157 		  fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))
   158 		  in
   159 		    (bucket 0) handle _ => ();
   160 		    table := newArr
   161 		  end
   162 		else ()
   163 	    end (* growTable *);
   164 
   165   (* Insert an item.  If the key already has an item associated with it,
   166    * then the old item is discarded.
   167    *)
   168     fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
   169 	let
   170 	  val arr = !table
   171 	  val sz = Array.length arr
   172 	  val hash = hashVal key
   173 	  val indx = index (hash, sz)
   174 	  fun look NIL = (
   175 		Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));
   176 		n_items := !n_items + 1;
   177 		growTable tbl;
   178 		NIL)
   179 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   180 		then B(hash, key, item, r)
   181 		else (case (look r)
   182 		   of NIL => NIL
   183 		    | rest => B(h, k, v, rest)
   184 		  (* end case *))
   185 	  in
   186 	    case (look (Array.sub (arr, indx)))
   187 	     of NIL => ()
   188 	      | b => Array.update(arr, indx, b)
   189 	  end;
   190 
   191   (* Insert an item if not there already; if it is there already, 
   192      then return the old data value and leave the table unmodified..
   193    *)
   194     fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
   195 	let val arr = !table
   196 	    val sz = Array.length arr
   197 	    val hash = hashVal key
   198 	    val indx = index (hash, sz)
   199 	    fun look NIL = 
   200 		(Array.update(arr, indx, B(hash, key, item, 
   201 					   Array.sub(arr, indx)));
   202 		 n_items := !n_items + 1;
   203 		 growTable tbl;
   204 		 NONE)
   205 	      | look (B(h, k, v, r)) = 
   206 		if hash = h andalso sameKey(key, k) then SOME v
   207 		else look r
   208 	in
   209 	    look (Array.sub (arr, indx))
   210 	end;
   211 
   212   (* find an item, the table's exception is raised if the item doesn't exist *)
   213     fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
   214 	  val arr = !table
   215 	  val sz = Array.length arr
   216 	  val hash = hashVal key
   217 	  val indx = index (hash, sz)
   218 	  fun look NIL = raise not_found
   219 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   220 		then v
   221 		else look r
   222 	  in
   223 	    look (Array.sub (arr, indx))
   224 	  end;
   225 
   226   (* look for an item, return NONE if the item doesn't exist *)
   227     fun peek (HT{hashVal, sameKey, table, ...}) key = let
   228 	  val arr = !table
   229 	  val sz = Array.length arr
   230 	  val hash = hashVal key
   231 	  val indx = index (hash, sz)
   232 	  fun look NIL = NONE
   233 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   234 		then SOME v
   235 		else look r
   236 	  in
   237 	    look (Array.sub (arr, indx))
   238 	  end;
   239 
   240   (* Remove an item.  The table's exception is raised if
   241    * the item doesn't exist.
   242    *)
   243     fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
   244 	  val arr = !table
   245 	  val sz = Array.length arr
   246 	  val hash = hashVal key
   247 	  val indx = index (hash, sz)
   248 	  fun look NIL = raise not_found
   249 	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   250 		then (v, r)
   251 		else let val (item, r') = look r in (item, B(h, k, v, r')) end
   252 	  val (item, bucket) = look (Array.sub (arr, indx))
   253 	  in
   254 	    Array.update (arr, indx, bucket);
   255 	    n_items := !n_items - 1;
   256 	    item
   257 	  end (* remove *);
   258 
   259   (* Return the number of items in the table *)
   260    fun numItems (HT{n_items, ...}) = !n_items
   261 
   262   (* return a list of the items in the table *)
   263     fun listItems (HT{table = ref arr, n_items, ...}) = let
   264 	  fun f (_, l, 0) = l
   265 	    | f (~1, l, _) = l
   266 	    | f (i, l, n) = let
   267 		fun g (NIL, l, n) = f (i-1, l, n)
   268 		  | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)
   269 		in
   270 		  g (Array.sub(arr, i), l, n)
   271 		end
   272 	  in
   273 	    f ((Array.length arr) - 1, [], !n_items)
   274 	  end (* listItems *);
   275 
   276   (* Apply a function to the entries of the table *)
   277     fun apply f (HT{table, ...}) = let
   278 	  fun appF NIL = ()
   279 	    | appF (B(_, key, item, rest)) = (
   280 		f (key, item);
   281 		appF rest)
   282 	  val arr = !table
   283 	  val sz = Array.length arr
   284 	  fun appToTbl i = if (i < sz)
   285 		then (appF (Array.sub (arr, i)); appToTbl(i+1))
   286 		else ()
   287 	  in
   288 	    appToTbl 0
   289 	  end (* apply *);
   290 
   291   (* Map a table to a new table that has the same keys and exception *)
   292     fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
   293 	  fun mapF NIL = NIL
   294 	    | mapF (B(hash, key, item, rest)) =
   295 		B(hash, key, f (key, item), mapF rest)
   296 	  val arr = !table
   297 	  val sz = Array.length arr
   298 	  val newArr = Array.array (sz, NIL)
   299 	  fun mapTbl i = if (i < sz)
   300 		then (
   301 		  Array.update(newArr, i, mapF (Array.sub(arr, i)));
   302 		  mapTbl (i+1))
   303 		else ()
   304 	  in
   305 	    mapTbl 0;
   306 	    HT{hashVal=hashVal,
   307 	       sameKey=sameKey,
   308 	       table = ref newArr, 
   309 	       n_items = ref(!n_items), 
   310 	       not_found = not_found}
   311 	  end (* transform *);
   312 
   313   (* remove any hash table items that do not satisfy the given
   314    * predicate.
   315    *)
   316     fun filter pred (HT{table, n_items, not_found, ...}) = let
   317 	  fun filterP NIL = NIL
   318 	    | filterP (B(hash, key, item, rest)) = if (pred(key, item))
   319 		then B(hash, key, item, filterP rest)
   320 		else filterP rest
   321 	  val arr = !table
   322 	  val sz = Array.length arr
   323 	  fun filterTbl i = if (i < sz)
   324 		then (
   325 		  Array.update (arr, i, filterP (Array.sub (arr, i)));
   326 		  filterTbl (i+1))
   327 		else ()
   328 	  in
   329 	    filterTbl 0
   330 	  end (* filter *);
   331 
   332   (* Map a table to a new table that has the same keys, exception,
   333      hash function, and equality function *)
   334 
   335     fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
   336 	  fun mapF NIL = NIL
   337 	    | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)
   338 	  val arr = !table
   339 	  val sz = Array.length arr
   340 	  val newArr = Array.array (sz, NIL)
   341 	  fun mapTbl i = if (i < sz)
   342 		then (
   343 		  Array.update(newArr, i, mapF (Array.sub(arr, i)));
   344 		  mapTbl (i+1))
   345 		else ()
   346 	  in
   347 	    mapTbl 0;
   348 	    HT{hashVal=hashVal, 
   349 	       sameKey=sameKey, 
   350 	       table = ref newArr, 
   351 	       n_items = ref(!n_items), 
   352 	       not_found = not_found}
   353 	  end (* transform *);
   354 
   355   (* Create a copy of a hash table *)
   356     fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
   357 	  val arr = !table
   358 	  val sz = Array.length arr
   359 	  val newArr = Array.array (sz, NIL)
   360 	  fun mapTbl i = (
   361 		Array.update (newArr, i, Array.sub(arr, i));
   362 		mapTbl (i+1))
   363 	  in
   364 	    (mapTbl 0) handle _ => ();
   365 	    HT{hashVal=hashVal, 
   366 	       sameKey=sameKey,
   367 	       table = ref newArr, 
   368 	       n_items = ref(!n_items), 
   369 	       not_found = not_found}
   370 	  end (* copy *);
   371 
   372   (* returns a list of the sizes of the various buckets.  This is to
   373    * allow users to gauge the quality of their hashing function.
   374    *)
   375     fun bucketSizes (HT{table = ref arr, ...}) = let
   376 	  fun len (NIL, n) = n
   377 	    | len (B(_, _, _, r), n) = len(r, n+1)
   378 	  fun f (~1, l) = l
   379 	    | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)
   380 	  in
   381 	    f ((Array.length arr)-1, [])
   382 	  end
   383 
   384    (*Added by lcp.
   385       This is essentially the  described in Compilers:
   386       Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)
   387 
   388    (*This hash function is recommended in Compilers: Principles, Techniques, and
   389      Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly
   390      recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)
   391    fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))
   392 
   393    fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);
   394    
   395    fun hashw_vector (v,w) = Vector.foldl hashw w v;
   396    
   397    fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;
   398    
   399    fun hashw_strings (ss, w) = List.foldl hashw_string w ss;
   400 
   401    fun hash_string s = Word.toIntX (hashw_string(s,0w0));
   402 
   403 end