src/HOL/Tools/polyhash.ML
author boehmes
Sat Mar 27 02:10:00 2010 +0100 (2010-03-27)
changeset 35983 27e2fa7d4ce7
parent 32960 69916a850301
permissions -rw-r--r--
slightly more general simproc (avoids errors of linarith)
     1 (* Modified for Poly/ML from SML/NJ Library version 0.2
     2  *
     3  * COPYRIGHT (c) 1992 by AT&T Bell Laboratories.
     4  * See file mosml/copyrght/copyrght.att for details.
     5  *
     6  * Original author: John Reppy, AT&T Bell Laboratories, Murray Hill, NJ 07974
     7  * Current version from the Moscow ML distribution, copied by permission.
     8  *)
     9 
    10 (* Polyhash -- polymorphic hashtables as in the SML/NJ Library *)
    11 
    12 signature Polyhash =
    13 sig
    14 
    15 type ('key, 'data) hash_table
    16 
    17 val mkTable     : ('_key -> int) * ('_key * '_key -> bool) -> int * exn 
    18                   -> ('_key, '_data) hash_table
    19 val numItems    : ('key, 'data) hash_table -> int
    20 val insert      : ('_key, '_data) hash_table -> '_key * '_data -> unit
    21 val peekInsert  : ('_key, '_data) hash_table -> '_key * '_data 
    22                   -> '_data option
    23 val find        : ('key, 'data) hash_table -> 'key -> 'data
    24 val peek        : ('key, 'data) hash_table -> 'key -> 'data option
    25 val remove      : ('key, 'data) hash_table -> 'key -> 'data
    26 val listItems   : ('key, 'data) hash_table -> ('key * 'data) list
    27 val apply       : ('key * 'data -> unit) -> ('key, 'data) hash_table -> unit
    28 val map         : ('_key * 'data -> '_res) -> ('_key, 'data) hash_table 
    29                   -> ('_key, '_res) hash_table
    30 val filter      : ('key * 'data -> bool) -> ('key, 'data) hash_table -> unit
    31 val transform   : ('data -> '_res) -> ('_key, 'data) hash_table 
    32                   -> ('_key, '_res) hash_table
    33 val copy        : ('_key, '_data) hash_table -> ('_key, '_data) hash_table
    34 val bucketSizes : ('key, 'data) hash_table -> int list
    35 
    36 (*Additions due to L. C. Paulson and Jia Meng*)
    37 val hashw : word * word -> word
    38 val hashw_char : Char.char * word -> word
    39 val hashw_int : int * word -> word
    40 val hashw_vector : word Vector.vector * word -> word
    41 val hashw_string : string * word -> word
    42 val hashw_strings : string list * word -> word
    43 val hash_string : string -> int
    44 
    45 (* 
    46    [('key, 'data) hash_table] is the type of hashtables with keys of type
    47    'key and data values of type 'data.
    48 
    49    [mkTable (hashVal, sameKey) (sz, exc)] returns a new hashtable,
    50    using hash function hashVal and equality predicate sameKey.  The sz
    51    is a size hint, and exc is the exception raised by function find.
    52    It must be the case that sameKey(k1, k2) implies hashVal(k1) =
    53    hashVal(k2) for all k1,k2.
    54 
    55    [numItems htbl] is the number of items in the hash table.
    56 
    57    [insert htbl (k, d)] inserts data d for key k.  If k already had an
    58    item associated with it, then the old item is overwritten.
    59 
    60    [find htbl k] returns d, where d is the data item associated with key k, 
    61    or raises the exception (given at creation of htbl) if there is no such d.
    62 
    63    [peek htbl k] returns SOME d, where d is the data item associated with 
    64    key k, or NONE if there is no such d.
    65 
    66    [peekInsert htbl (k, d)] inserts data d for key k, if k is not
    67    already in the table, returning NONE.  If k is already in the
    68    table, and the associated data value is d', then returns SOME d'
    69    and leaves the table unmodified.
    70 
    71    [remove htbl k] returns d, where d is the data item associated with key k, 
    72    removing d from the table; or raises the exception if there is no such d.
    73 
    74    [listItems htbl] returns a list of the (key, data) pairs in the hashtable.
    75 
    76    [apply f htbl] applies function f to all (key, data) pairs in the 
    77    hashtable, in some order.
    78 
    79    [map f htbl] returns a new hashtable, whose data items have been
    80    obtained by applying f to the (key, data) pairs in htbl.  The new
    81    tables have the same keys, hash function, equality predicate, and
    82    exception, as htbl.
    83 
    84    [filter p htbl] deletes from htbl all data items which do not
    85    satisfy predicate p.
    86 
    87    [transform f htbl] as map, but only the (old) data values are used
    88    when computing the new data values.
    89 
    90    [copy htbl] returns a complete copy of htbl.
    91 
    92    [bucketSizes htbl] returns a list of the sizes of the buckets.
    93    This is to allow users to gauge the quality of their hashing
    94    function.  
    95 *)
    96 
    97 end
    98 
    99 
   100 structure Polyhash :> Polyhash =
   101 struct
   102 
   103 datatype ('key, 'data) bucket_t
   104   = NIL
   105   | B of int * 'key * 'data * ('key, 'data) bucket_t
   106 
   107 datatype ('key, 'data) hash_table = 
   108     HT of {hashVal   : 'key -> int,
   109            sameKey   : 'key * 'key -> bool,
   110            not_found : exn,
   111            table     : ('key, 'data) bucket_t Array.array Unsynchronized.ref,
   112            n_items   : int Unsynchronized.ref}
   113 
   114 local
   115 (*
   116     prim_val andb_      : int -> int -> int = 2 "and";
   117     prim_val lshift_    : int -> int -> int = 2 "shift_left";
   118 *)
   119     fun andb_ x y = Word.toInt (Word.andb (Word.fromInt x, Word.fromInt y));
   120     fun lshift_ x y = Word.toInt (Word.<< (Word.fromInt x, Word.fromInt y));
   121 in 
   122     fun index (i, sz) = andb_ i (sz-1)
   123 
   124   (* find smallest power of 2 (>= 32) that is >= n *)
   125     fun roundUp n = 
   126         let fun f i = if (i >= n) then i else f (lshift_ i 1)
   127         in f 32 end
   128 end;
   129 
   130   (* Create a new table; the int is a size hint and the exception
   131    * is to be raised by find.
   132    *)
   133     fun mkTable (hashVal, sameKey) (sizeHint, notFound) = HT{
   134             hashVal=hashVal,
   135             sameKey=sameKey,
   136             not_found = notFound,
   137             table = Unsynchronized.ref (Array.array(roundUp sizeHint, NIL)),
   138             n_items = Unsynchronized.ref 0
   139           };
   140 
   141   (* conditionally grow a table *)
   142     fun growTable (HT{table, n_items, ...}) = let
   143             val arr = !table
   144             val sz = Array.length arr
   145             in
   146               if (!n_items >= sz)
   147                 then let
   148                   val newSz = sz+sz
   149                   val newArr = Array.array (newSz, NIL)
   150                   fun copy NIL = ()
   151                     | copy (B(h, key, v, rest)) = let
   152                         val indx = index (h, newSz)
   153                         in
   154                           Array.update (newArr, indx,
   155                             B(h, key, v, Array.sub(newArr, indx)));
   156                           copy rest
   157                         end
   158                   fun bucket n = (copy (Array.sub(arr, n)); bucket (n+1))
   159                   in
   160                     (bucket 0) handle _ => ();  (* FIXME avoid handle _ *)
   161                     table := newArr
   162                   end
   163                 else ()
   164             end (* growTable *);
   165 
   166   (* Insert an item.  If the key already has an item associated with it,
   167    * then the old item is discarded.
   168    *)
   169     fun insert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
   170         let
   171           val arr = !table
   172           val sz = Array.length arr
   173           val hash = hashVal key
   174           val indx = index (hash, sz)
   175           fun look NIL = (
   176                 Array.update(arr, indx, B(hash, key, item, Array.sub(arr, indx)));
   177                 Unsynchronized.inc n_items;
   178                 growTable tbl;
   179                 NIL)
   180             | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   181                 then B(hash, key, item, r)
   182                 else (case (look r)
   183                    of NIL => NIL
   184                     | rest => B(h, k, v, rest)
   185                   (* end case *))
   186           in
   187             case (look (Array.sub (arr, indx)))
   188              of NIL => ()
   189               | b => Array.update(arr, indx, b)
   190           end;
   191 
   192   (* Insert an item if not there already; if it is there already, 
   193      then return the old data value and leave the table unmodified..
   194    *)
   195     fun peekInsert (tbl as HT{hashVal, sameKey, table, n_items, ...}) (key, item) =
   196         let val arr = !table
   197             val sz = Array.length arr
   198             val hash = hashVal key
   199             val indx = index (hash, sz)
   200             fun look NIL = 
   201                 (Array.update(arr, indx, B(hash, key, item, 
   202                                            Array.sub(arr, indx)));
   203                  Unsynchronized.inc n_items;
   204                  growTable tbl;
   205                  NONE)
   206               | look (B(h, k, v, r)) = 
   207                 if hash = h andalso sameKey(key, k) then SOME v
   208                 else look r
   209         in
   210             look (Array.sub (arr, indx))
   211         end;
   212 
   213   (* find an item, the table's exception is raised if the item doesn't exist *)
   214     fun find (HT{hashVal, sameKey, table, not_found, ...}) key = let
   215           val arr = !table
   216           val sz = Array.length arr
   217           val hash = hashVal key
   218           val indx = index (hash, sz)
   219           fun look NIL = raise not_found
   220             | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   221                 then v
   222                 else look r
   223           in
   224             look (Array.sub (arr, indx))
   225           end;
   226 
   227   (* look for an item, return NONE if the item doesn't exist *)
   228     fun peek (HT{hashVal, sameKey, table, ...}) key = let
   229           val arr = !table
   230           val sz = Array.length arr
   231           val hash = hashVal key
   232           val indx = index (hash, sz)
   233           fun look NIL = NONE
   234             | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   235                 then SOME v
   236                 else look r
   237           in
   238             look (Array.sub (arr, indx))
   239           end;
   240 
   241   (* Remove an item.  The table's exception is raised if
   242    * the item doesn't exist.
   243    *)
   244     fun remove (HT{hashVal, sameKey, not_found, table, n_items}) key = let
   245           val arr = !table
   246           val sz = Array.length arr
   247           val hash = hashVal key
   248           val indx = index (hash, sz)
   249           fun look NIL = raise not_found
   250             | look (B(h, k, v, r)) = if ((hash = h) andalso sameKey(key, k))
   251                 then (v, r)
   252                 else let val (item, r') = look r in (item, B(h, k, v, r')) end
   253           val (item, bucket) = look (Array.sub (arr, indx))
   254           in
   255             Array.update (arr, indx, bucket);
   256             n_items := !n_items - 1;
   257             item
   258           end (* remove *);
   259 
   260   (* Return the number of items in the table *)
   261    fun numItems (HT{n_items, ...}) = !n_items
   262 
   263   (* return a list of the items in the table *)
   264     fun listItems (HT{table = Unsynchronized.ref arr, n_items, ...}) = let
   265           fun f (_, l, 0) = l
   266             | f (~1, l, _) = l
   267             | f (i, l, n) = let
   268                 fun g (NIL, l, n) = f (i-1, l, n)
   269                   | g (B(_, k, v, r), l, n) = g(r, (k, v)::l, n-1)
   270                 in
   271                   g (Array.sub(arr, i), l, n)
   272                 end
   273           in
   274             f ((Array.length arr) - 1, [], !n_items)
   275           end (* listItems *);
   276 
   277   (* Apply a function to the entries of the table *)
   278     fun apply f (HT{table, ...}) = let
   279           fun appF NIL = ()
   280             | appF (B(_, key, item, rest)) = (
   281                 f (key, item);
   282                 appF rest)
   283           val arr = !table
   284           val sz = Array.length arr
   285           fun appToTbl i = if (i < sz)
   286                 then (appF (Array.sub (arr, i)); appToTbl(i+1))
   287                 else ()
   288           in
   289             appToTbl 0
   290           end (* apply *);
   291 
   292   (* Map a table to a new table that has the same keys and exception *)
   293     fun map f (HT{hashVal, sameKey, table, n_items, not_found}) = let
   294           fun mapF NIL = NIL
   295             | mapF (B(hash, key, item, rest)) =
   296                 B(hash, key, f (key, item), mapF rest)
   297           val arr = !table
   298           val sz = Array.length arr
   299           val newArr = Array.array (sz, NIL)
   300           fun mapTbl i = if (i < sz)
   301                 then (
   302                   Array.update(newArr, i, mapF (Array.sub(arr, i)));
   303                   mapTbl (i+1))
   304                 else ()
   305           in
   306             mapTbl 0;
   307             HT{hashVal=hashVal,
   308                sameKey=sameKey,
   309                table = Unsynchronized.ref newArr, 
   310                n_items = Unsynchronized.ref(!n_items), 
   311                not_found = not_found}
   312           end (* transform *);
   313 
   314   (* remove any hash table items that do not satisfy the given
   315    * predicate.
   316    *)
   317     fun filter pred (HT{table, n_items, not_found, ...}) = let
   318           fun filterP NIL = NIL
   319             | filterP (B(hash, key, item, rest)) = if (pred(key, item))
   320                 then B(hash, key, item, filterP rest)
   321                 else filterP rest
   322           val arr = !table
   323           val sz = Array.length arr
   324           fun filterTbl i = if (i < sz)
   325                 then (
   326                   Array.update (arr, i, filterP (Array.sub (arr, i)));
   327                   filterTbl (i+1))
   328                 else ()
   329           in
   330             filterTbl 0
   331           end (* filter *);
   332 
   333   (* Map a table to a new table that has the same keys, exception,
   334      hash function, and equality function *)
   335 
   336     fun transform f (HT{hashVal, sameKey, table, n_items, not_found}) = let
   337           fun mapF NIL = NIL
   338             | mapF (B(hash, key, item, rest)) = B(hash, key, f item, mapF rest)
   339           val arr = !table
   340           val sz = Array.length arr
   341           val newArr = Array.array (sz, NIL)
   342           fun mapTbl i = if (i < sz)
   343                 then (
   344                   Array.update(newArr, i, mapF (Array.sub(arr, i)));
   345                   mapTbl (i+1))
   346                 else ()
   347           in
   348             mapTbl 0;
   349             HT{hashVal=hashVal, 
   350                sameKey=sameKey, 
   351                table = Unsynchronized.ref newArr, 
   352                n_items = Unsynchronized.ref(!n_items), 
   353                not_found = not_found}
   354           end (* transform *);
   355 
   356   (* Create a copy of a hash table *)
   357     fun copy (HT{hashVal, sameKey, table, n_items, not_found}) = let
   358           val arr = !table
   359           val sz = Array.length arr
   360           val newArr = Array.array (sz, NIL)
   361           fun mapTbl i = (
   362                 Array.update (newArr, i, Array.sub(arr, i));
   363                 mapTbl (i+1))
   364           in
   365             (mapTbl 0) handle _ => ();  (* FIXME avoid handle _ *)
   366             HT{hashVal=hashVal, 
   367                sameKey=sameKey,
   368                table = Unsynchronized.ref newArr, 
   369                n_items = Unsynchronized.ref(!n_items), 
   370                not_found = not_found}
   371           end (* copy *);
   372 
   373   (* returns a list of the sizes of the various buckets.  This is to
   374    * allow users to gauge the quality of their hashing function.
   375    *)
   376     fun bucketSizes (HT{table = Unsynchronized.ref arr, ...}) = let
   377           fun len (NIL, n) = n
   378             | len (B(_, _, _, r), n) = len(r, n+1)
   379           fun f (~1, l) = l
   380             | f (i, l) = f (i-1, len (Array.sub (arr, i), 0) :: l)
   381           in
   382             f ((Array.length arr)-1, [])
   383           end
   384 
   385    (*Added by lcp.
   386       This is essentially the  described in Compilers:
   387       Principles, Techniques, and Tools, by Aho, Sethi and Ullman.*)
   388 
   389    (*This hash function is recommended in Compilers: Principles, Techniques, and
   390      Tools, by Aho, Sethi and Ullman. The hashpjw function, which they particularly
   391      recommend, triggers a bug in versions of Poly/ML up to 4.2.0.*)
   392    fun hashw (u,w) = Word.+ (u, Word.*(0w65599,w))
   393 
   394    fun hashw_char (c,w) = hashw (Word.fromInt (Char.ord c), w);
   395    
   396    fun hashw_int (i,w) = hashw (Word.fromInt i, w);
   397    
   398    fun hashw_vector (v,w) = Vector.foldl hashw w v;
   399    
   400    fun hashw_string (s:string, w) = CharVector.foldl hashw_char w s;
   401    
   402    fun hashw_strings (ss, w) = List.foldl hashw_string w ss;
   403 
   404    fun hash_string s = Word.toIntX (hashw_string(s,0w0));
   405 
   406 end