src/HOL/Tools/polyhash.ML
author wenzelm
Wed Feb 15 21:34:55 2006 +0100 (2006-02-15)
changeset 19046 bc5c6c9b114e
parent 18508 c5861e128a95
child 20416 f9cb300118ca
permissions -rw-r--r--
removed distinct, renamed gen_distinct to distinct;
paulson@18449
     1
(* Modified for Poly/ML from SML/NJ Library version 0.2
paulson@18449
     2
 *
paulson@18449
     3
 * COPYRIGHT (c) 1992 by AT&T Bell Laboratories.
paulson@18449
     4
 * See file mosml/copyrght/copyrght.att for details.
paulson@18449
     5
 *
paulson@18449
     6
 * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974
paulson@18508
     7
 * Current version from the Moscow ML distribution, copied by permission.
paulson@18449
     8
 *)
paulson@18449
     9
paulson@18449
    10
(* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)
paulson@18449
    11
paulson@18449
    12
signature Polyhash =
paulson@18449
    13
sig
paulson@18449
    14
paulson@18449
    15
type ('key, 'data) hash_table
paulson@18449
    16
paulson@18449
    17
val mkTable     : ('_key -> int) * ('_key * '_key -> bool) -> int * exn 
paulson@18449
    18
                  -> ('_key, '_data) hash_table
paulson@18449
    19
val numItems    : ('key, 'data) hash_table -> int
paulson@18449
    20
val insert      : ('_key, '_data) hash_table -> '_key * '_data -> unit
paulson@18449
    21
val peekInsert  : ('_key, '_data) hash_table -> '_key * '_data 
paulson@18449
    22
                  -> '_data option
paulson@18449
    23
val find        : ('key, 'data) hash_table -> 'key -> 'data
paulson@18449
    24
val peek        : ('key, 'data) hash_table -> 'key -> 'data option
paulson@18449
    25
val remove      : ('key, 'data) hash_table -> 'key -> 'data
paulson@18449
    26
val listItems   : ('key, 'data) hash_table -> ('key * 'data) list
paulson@18449
    27
val apply       : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
paulson@18449
    28
val map         : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table 
paulson@18449
    29
                  -> ('_key, '_res) hash_table
paulson@18449
    30
val filter      : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
paulson@18449
    31
val transform   : ('data -> '_res) -> ('_key, 'data) hash_table 
paulson@18449
    32
                  -> ('_key, '_res) hash_table
paulson@18449
    33
val copy        : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
paulson@18449
    34
val bucketSizes : ('key, 'data) hash_table -> int list
paulson@18449
    35
paulson@18449
    36
(*Additions due to L. C. Paulson and Jia Meng*)
paulson@18449
    37
val hashw : word * word -> word
paulson@18449
    38
val hashw_char : Char.char * word -> word
paulson@18449
    39
val hashw_vector : word Vector.vector * word -> word
paulson@18449
    40
val hashw_string : string * word -> word
paulson@18449
    41
val hashw_strings : string list * word -> word
paulson@18449
    42
val hash_string : string -> int
paulson@18449
    43
paulson@18449
    44
(* 
paulson@18449
    45
   [('key, 'data) hash_table] is the type of hashtables with keys of type
paulson@18449
    46
   'key and data values of type 'data.
paulson@18449
    47
paulson@18449
    48
   [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,
paulson@18449
    49
   using hash function hashVal and equality predicate sameKey.  The sz
paulson@18449
    50
   is a size hint, and exc is the exception raised by function find.
paulson@18449
    51
   It must be the case that sameKey(k1, k2) implies hashVal(k1) =
paulson@18449
    52
   hashVal(k2) for all k1,k2.
paulson@18449
    53
paulson@18449
    54
   [numItems htbl] is the number of items in the hash table.
paulson@18449
    55
paulson@18449
    56
   [insert htbl (k, d)] inserts data d for key k.  If k already had an
paulson@18449
    57
   item associated with it, then the old item is overwritten.
paulson@18449
    58
paulson@18449
    59
   [find htbl k] returns d, where d is the data item associated with key k, 
paulson@18449
    60
   or raises the exception (given at creation of htbl) if there is no such d.
paulson@18449
    61
paulson@18449
    62
   [peek htbl k] returns SOME d, where d is the data item associated with 
paulson@18449
    63
   key k, or NONE if there is no such d.
paulson@18449
    64
paulson@18449
    65
   [peekInsert htbl (k, d)] inserts data d for key k, if k is not
paulson@18449
    66
   already in the table, returning NONE.  If k is already in the
paulson@18449
    67
   table, and the associated data value is d', then returns SOME d'
paulson@18449
    68
   and leaves the table unmodified.
paulson@18449
    69
paulson@18449
    70
   [remove htbl k] returns d, where d is the data item associated with key k, 
paulson@18449
    71
   removing d from the table; or raises the exception if there is no such d.
paulson@18449
    72
paulson@18449
    73
   [listItems htbl] returns a list of the (key, data) pairs in the hashtable.
paulson@18449
    74
paulson@18449
    75
   [apply f htbl] applies function f to all (key, data) pairs in the 
paulson@18449
    76
   hashtable, in some order.
paulson@18449
    77
paulson@18449
    78
   [map f htbl] returns a new hashtable, whose data items have been
paulson@18449
    79
   obtained by applying f to the (key, data) pairs in htbl.  The new
paulson@18449
    80
   tables have the same keys, hash function, equality predicate, and
paulson@18449
    81
   exception, as htbl.
paulson@18449
    82
paulson@18449
    83
   [filter p htbl] deletes from htbl all data items which do not
paulson@18449
    84
   satisfy predicate p.
paulson@18449
    85
paulson@18449
    86
   [transform f htbl] as map, but only the (old) data values are used
paulson@18449
    87
   when computing the new data values.
paulson@18449
    88
paulson@18449
    89
   [copy htbl] returns a complete copy of htbl.
paulson@18449
    90
paulson@18449
    91
   [bucketSizes htbl] returns a list of the sizes of the buckets.
paulson@18449
    92
   This is to allow users to gauge the quality of their hashing
paulson@18449
    93
   function.  
paulson@18449
    94
*)
paulson@18449
    95
paulson@18449
    96
end
paulson@18449
    97
paulson@18449
    98
paulson@18449
    99
structure Polyhash :> Polyhash =
paulson@18449
   100
struct
paulson@18449
   101
paulson@18449
   102
datatype ('key, 'data) bucket_t
paulson@18449
   103
  = NIL
paulson@18449
   104
  | B of int * 'key * 'data * ('key, 'data) bucket_t
paulson@18449
   105
paulson@18449
   106
datatype ('key, 'data) hash_table = 
paulson@18449
   107
    HT of {hashVal   : 'key -> int,
paulson@18449
   108
	   sameKey   : 'key * 'key -> bool,
paulson@18449
   109
	   not_found : exn,
paulson@18449
   110
	   table     : ('key, 'data) bucket_t Array.array ref,
paulson@18449
   111
	   n_items   : int ref}
paulson@18449
   112
paulson@18449
   113
local
paulson@18449
   114
(*
paulson@18449
   115
    prim_val andb_      : int -> int -> int = 2 "and";
paulson@18449
   116
    prim_val lshift_    : int -> int -> int = 2 "shift_left";
paulson@18449
   117
*)
paulson@18449
   118
    fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));
paulson@18449
   119
    fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));
paulson@18449
   120
in 
paulson@18449
   121
    fun index (i, sz) = andb_ i (sz-1)
paulson@18449
   122
paulson@18449
   123
  (* find smallest power of 2 (>= 32) that is >= n *)
paulson@18449
   124
    fun roundUp n = 
paulson@18449
   125
	let fun f i = if (i >= n) then i else f (lshift_ i 1)
paulson@18449
   126
	in f 32 end
paulson@18449
   127
end;
paulson@18449
   128
paulson@18449
   129
  (* Create a new table; the int is a size hint and the exception
paulson@18449
   130
   * is to be raised by find.
paulson@18449
   131
   *)
paulson@18449
   132
    fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
paulson@18449
   133
            hashVal=hashVal,
paulson@18449
   134
	    sameKey=sameKey,
paulson@18449
   135
	    not_found = notFound,
paulson@18449
   136
	    table = ref (Array.array(roundUp sizeHint, NIL)),
paulson@18449
   137
	    n_items = ref 0
paulson@18449
   138
	  };
paulson@18449
   139
paulson@18449
   140
  (* conditionally grow a table *)
paulson@18449
   141
    fun growTable (HT{table, n_items, ...}) = let
paulson@18449
   142
	    val arr = !table
paulson@18449
   143
	    val sz = Array.length arr
paulson@18449
   144
	    in
paulson@18449
   145
	      if (!n_items >= sz)
paulson@18449
   146
		then let
paulson@18449
   147
		  val newSz = sz+sz
paulson@18449
   148
		  val newArr = Array.array (newSz, NIL)
paulson@18449
   149
		  fun copy NIL = ()
paulson@18449
   150
		    | copy (B(h, key, v, rest)) = let
paulson@18449
   151
			val indx = index (h, newSz)
paulson@18449
   152
			in
paulson@18449
   153
			  Array.update (newArr, indx,
paulson@18449
   154
			    B(h, key, v, Array.sub(newArr, indx)));
paulson@18449
   155
			  copy rest
paulson@18449
   156
			end
paulson@18449
   157
		  fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))
paulson@18449
   158
		  in
paulson@18449
   159
		    (bucket 0) handle _ => ();
paulson@18449
   160
		    table := newArr
paulson@18449
   161
		  end
paulson@18449
   162
		else ()
paulson@18449
   163
	    end (* growTable *);
paulson@18449
   164
paulson@18449
   165
  (* Insert an item.  If the key already has an item associated with it,
paulson@18449
   166
   * then the old item is discarded.
paulson@18449
   167
   *)
paulson@18449
   168
    fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
paulson@18449
   169
	let
paulson@18449
   170
	  val arr = !table
paulson@18449
   171
	  val sz = Array.length arr
paulson@18449
   172
	  val hash = hashVal key
paulson@18449
   173
	  val indx = index (hash, sz)
paulson@18449
   174
	  fun look NIL = (
paulson@18449
   175
		Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));
paulson@18449
   176
		n_items := !n_items + 1;
paulson@18449
   177
		growTable tbl;
paulson@18449
   178
		NIL)
paulson@18449
   179
	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
paulson@18449
   180
		then B(hash, key, item, r)
paulson@18449
   181
		else (case (look r)
paulson@18449
   182
		   of NIL => NIL
paulson@18449
   183
		    | rest => B(h, k, v, rest)
paulson@18449
   184
		  (* end case *))
paulson@18449
   185
	  in
paulson@18449
   186
	    case (look (Array.sub (arr, indx)))
paulson@18449
   187
	     of NIL => ()
paulson@18449
   188
	      | b => Array.update(arr, indx, b)
paulson@18449
   189
	  end;
paulson@18449
   190
paulson@18449
   191
  (* Insert an item if not there already; if it is there already, 
paulson@18449
   192
     then return the old data value and leave the table unmodified..
paulson@18449
   193
   *)
paulson@18449
   194
    fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
paulson@18449
   195
	let val arr = !table
paulson@18449
   196
	    val sz = Array.length arr
paulson@18449
   197
	    val hash = hashVal key
paulson@18449
   198
	    val indx = index (hash, sz)
paulson@18449
   199
	    fun look NIL = 
paulson@18449
   200
		(Array.update(arr, indx, B(hash, key, item, 
paulson@18449
   201
					   Array.sub(arr, indx)));
paulson@18449
   202
		 n_items := !n_items + 1;
paulson@18449
   203
		 growTable tbl;
paulson@18449
   204
		 NONE)
paulson@18449
   205
	      | look (B(h, k, v, r)) = 
paulson@18449
   206
		if hash = h andalso sameKey(key, k) then SOME v
paulson@18449
   207
		else look r
paulson@18449
   208
	in
paulson@18449
   209
	    look (Array.sub (arr, indx))
paulson@18449
   210
	end;
paulson@18449
   211
paulson@18449
   212
  (* find an item, the table's exception is raised if the item doesn't exist *)
paulson@18449
   213
    fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
paulson@18449
   214
	  val arr = !table
paulson@18449
   215
	  val sz = Array.length arr
paulson@18449
   216
	  val hash = hashVal key
paulson@18449
   217
	  val indx = index (hash, sz)
paulson@18449
   218
	  fun look NIL = raise not_found
paulson@18449
   219
	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
paulson@18449
   220
		then v
paulson@18449
   221
		else look r
paulson@18449
   222
	  in
paulson@18449
   223
	    look (Array.sub (arr, indx))
paulson@18449
   224
	  end;
paulson@18449
   225
paulson@18449
   226
  (* look for an item, return NONE if the item doesn't exist *)
paulson@18449
   227
    fun peek (HT{hashVal, sameKey, table, ...}) key = let
paulson@18449
   228
	  val arr = !table
paulson@18449
   229
	  val sz = Array.length arr
paulson@18449
   230
	  val hash = hashVal key
paulson@18449
   231
	  val indx = index (hash, sz)
paulson@18449
   232
	  fun look NIL = NONE
paulson@18449
   233
	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
paulson@18449
   234
		then SOME v
paulson@18449
   235
		else look r
paulson@18449
   236
	  in
paulson@18449
   237
	    look (Array.sub (arr, indx))
paulson@18449
   238
	  end;
paulson@18449
   239
paulson@18449
   240
  (* Remove an item.  The table's exception is raised if
paulson@18449
   241
   * the item doesn't exist.
paulson@18449
   242
   *)
paulson@18449
   243
    fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
paulson@18449
   244
	  val arr = !table
paulson@18449
   245
	  val sz = Array.length arr
paulson@18449
   246
	  val hash = hashVal key
paulson@18449
   247
	  val indx = index (hash, sz)
paulson@18449
   248
	  fun look NIL = raise not_found
paulson@18449
   249
	    | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
paulson@18449
   250
		then (v, r)
paulson@18449
   251
		else let val (item, r') = look r in (item, B(h, k, v, r')) end
paulson@18449
   252
	  val (item, bucket) = look (Array.sub (arr, indx))
paulson@18449
   253
	  in
paulson@18449
   254
	    Array.update (arr, indx, bucket);
paulson@18449
   255
	    n_items := !n_items - 1;
paulson@18449
   256
	    item
paulson@18449
   257
	  end (* remove *);
paulson@18449
   258
paulson@18449
   259
  (* Return the number of items in the table *)
paulson@18449
   260
   fun numItems (HT{n_items, ...}) = !n_items
paulson@18449
   261
paulson@18449
   262
  (* return a list of the items in the table *)
paulson@18449
   263
    fun listItems (HT{table = ref arr, n_items, ...}) = let
paulson@18449
   264
	  fun f (_, l, 0) = l
paulson@18449
   265
	    | f (~1, l, _) = l
paulson@18449
   266
	    | f (i, l, n) = let
paulson@18449
   267
		fun g (NIL, l, n) = f (i-1, l, n)
paulson@18449
   268
		  | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)
paulson@18449
   269
		in
paulson@18449
   270
		  g (Array.sub(arr, i), l, n)
paulson@18449
   271
		end
paulson@18449
   272
	  in
paulson@18449
   273
	    f ((Array.length arr) - 1, [], !n_items)
paulson@18449
   274
	  end (* listItems *);
paulson@18449
   275
paulson@18449
   276
  (* Apply a function to the entries of the table *)
paulson@18449
   277
    fun apply f (HT{table, ...}) = let
paulson@18449
   278
	  fun appF NIL = ()
paulson@18449
   279
	    | appF (B(_, key, item, rest)) = (
paulson@18449
   280
		f (key, item);
paulson@18449
   281
		appF rest)
paulson@18449
   282
	  val arr = !table
paulson@18449
   283
	  val sz = Array.length arr
paulson@18449
   284
	  fun appToTbl i = if (i < sz)
paulson@18449
   285
		then (appF (Array.sub (arr, i)); appToTbl(i+1))
paulson@18449
   286
		else ()
paulson@18449
   287
	  in
paulson@18449
   288
	    appToTbl 0
paulson@18449
   289
	  end (* apply *);
paulson@18449
   290
paulson@18449
   291
  (* Map a table to a new table that has the same keys and exception *)
paulson@18449
   292
    fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
paulson@18449
   293
	  fun mapF NIL = NIL
paulson@18449
   294
	    | mapF (B(hash, key, item, rest)) =
paulson@18449
   295
		B(hash, key, f (key, item), mapF rest)
paulson@18449
   296
	  val arr = !table
paulson@18449
   297
	  val sz = Array.length arr
paulson@18449
   298
	  val newArr = Array.array (sz, NIL)
paulson@18449
   299
	  fun mapTbl i = if (i < sz)
paulson@18449
   300
		then (
paulson@18449
   301
		  Array.update(newArr, i, mapF (Array.sub(arr, i)));
paulson@18449
   302
		  mapTbl (i+1))
paulson@18449
   303
		else ()
paulson@18449
   304
	  in
paulson@18449
   305
	    mapTbl 0;
paulson@18449
   306
	    HT{hashVal=hashVal,
paulson@18449
   307
	       sameKey=sameKey,
paulson@18449
   308
	       table = ref newArr, 
paulson@18449
   309
	       n_items = ref(!n_items), 
paulson@18449
   310
	       not_found = not_found}
paulson@18449
   311
	  end (* transform *);
paulson@18449
   312
paulson@18449
   313
  (* remove any hash table items that do not satisfy the given
paulson@18449
   314
   * predicate.
paulson@18449
   315
   *)
paulson@18449
   316
    fun filter pred (HT{table, n_items, not_found, ...}) = let
paulson@18449
   317
	  fun filterP NIL = NIL
paulson@18449
   318
	    | filterP (B(hash, key, item, rest)) = if (pred(key, item))
paulson@18449
   319
		then B(hash, key, item, filterP rest)
paulson@18449
   320
		else filterP rest
paulson@18449
   321
	  val arr = !table
paulson@18449
   322
	  val sz = Array.length arr
paulson@18449
   323
	  fun filterTbl i = if (i < sz)
paulson@18449
   324
		then (
paulson@18449
   325
		  Array.update (arr, i, filterP (Array.sub (arr, i)));
paulson@18449
   326
		  filterTbl (i+1))
paulson@18449
   327
		else ()
paulson@18449
   328
	  in
paulson@18449
   329
	    filterTbl 0
paulson@18449
   330
	  end (* filter *);
paulson@18449
   331
paulson@18449
   332
  (* Map a table to a new table that has the same keys, exception,
paulson@18449
   333
     hash function, and equality function *)
paulson@18449
   334
paulson@18449
   335
    fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
paulson@18449
   336
	  fun mapF NIL = NIL
paulson@18449
   337
	    | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)
paulson@18449
   338
	  val arr = !table
paulson@18449
   339
	  val sz = Array.length arr
paulson@18449
   340
	  val newArr = Array.array (sz, NIL)
paulson@18449
   341
	  fun mapTbl i = if (i < sz)
paulson@18449
   342
		then (
paulson@18449
   343
		  Array.update(newArr, i, mapF (Array.sub(arr, i)));
paulson@18449
   344
		  mapTbl (i+1))
paulson@18449
   345
		else ()
paulson@18449
   346
	  in
paulson@18449
   347
	    mapTbl 0;
paulson@18449
   348
	    HT{hashVal=hashVal, 
paulson@18449
   349
	       sameKey=sameKey, 
paulson@18449
   350
	       table = ref newArr, 
paulson@18449
   351
	       n_items = ref(!n_items), 
paulson@18449
   352
	       not_found = not_found}
paulson@18449
   353
	  end (* transform *);
paulson@18449
   354
paulson@18449
   355
  (* Create a copy of a hash table *)
paulson@18449
   356
    fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
paulson@18449
   357
	  val arr = !table
paulson@18449
   358
	  val sz = Array.length arr
paulson@18449
   359
	  val newArr = Array.array (sz, NIL)
paulson@18449
   360
	  fun mapTbl i = (
paulson@18449
   361
		Array.update (newArr, i, Array.sub(arr, i));
paulson@18449
   362
		mapTbl (i+1))
paulson@18449
   363
	  in
paulson@18449
   364
	    (mapTbl 0) handle _ => ();
paulson@18449
   365
	    HT{hashVal=hashVal, 
paulson@18449
   366
	       sameKey=sameKey,
paulson@18449
   367
	       table = ref newArr, 
paulson@18449
   368
	       n_items = ref(!n_items), 
paulson@18449
   369
	       not_found = not_found}
paulson@18449
   370
	  end (* copy *);
paulson@18449
   371
paulson@18449
   372
  (* returns a list of the sizes of the various buckets.  This is to
paulson@18449
   373
   * allow users to gauge the quality of their hashing function.
paulson@18449
   374
   *)
paulson@18449
   375
    fun bucketSizes (HT{table = ref arr, ...}) = let
paulson@18449
   376
	  fun len (NIL, n) = n
paulson@18449
   377
	    | len (B(_, _, _, r), n) = len(r, n+1)
paulson@18449
   378
	  fun f (~1, l) = l
paulson@18449
   379
	    | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)
paulson@18449
   380
	  in
paulson@18449
   381
	    f ((Array.length arr)-1, [])
paulson@18449
   382
	  end
paulson@18449
   383
paulson@18449
   384
   (*Added by lcp.
paulson@18508
   385
      This is essentially the  described in Compilers:
paulson@18449
   386
      Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)
paulson@18449
   387
paulson@18508
   388
   (*This hash function is recommended in Compilers: Principles, Techniques, and
paulson@18508
   389
     Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly
paulson@18508
   390
     recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)
paulson@18508
   391
   fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))
paulson@18449
   392
paulson@18449
   393
   fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);
paulson@18449
   394
   
paulson@18449
   395
   fun hashw_vector (v,w) = Vector.foldl hashw w v;
paulson@18449
   396
   
paulson@18449
   397
   fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;
paulson@18449
   398
   
paulson@18449
   399
   fun hashw_strings (ss, w) = List.foldl hashw_string w ss;
paulson@18449
   400
paulson@18449
   401
   fun hash_string s = Word.toIntX (hashw_string(s,0w0));
paulson@18449
   402
paulson@18449
   403
end