compact representation of sets of integers;
authorwenzelm
Wed, 29 Nov 2023 00:07:54 +0100
changeset 79074 7f24c5be57bd
parent 79071 7ab8b3f1d84b
child 79075 e671abce8f8e
compact representation of sets of integers;
src/Pure/General/bitset.ML
src/Pure/ROOT.ML
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Pure/General/bitset.ML	Wed Nov 29 00:07:54 2023 +0100
@@ -0,0 +1,216 @@
+(*  Title:      Pure/General/bitset.ML
+    Author:     Makarius
+
+Compact representation of sets of integers (see also Pure/General/table.ML and
+Pure/General/set.ML).
+*)
+
+signature BITSET =
+sig
+  type T
+  val empty: T
+  val build: (T -> T) -> T
+  val is_empty: T -> bool
+  val fold: (int -> 'a -> 'a) -> T -> 'a -> 'a
+  val fold_rev: (int -> 'a -> 'a) -> T -> 'a -> 'a
+  val dest: T -> int list
+  val min: T -> int option
+  val max: T -> int option
+  val get_first: (int -> 'a option) -> T -> 'a option
+  val exists: (int -> bool) -> T -> bool
+  val forall: (int -> bool) -> T -> bool
+  val member: T -> int -> bool
+  val subset: T * T -> bool
+  val eq_set: T * T -> bool
+  val insert: int -> T -> T
+  val make: int list -> T
+  val merge: T * T -> T
+  val merges: T list -> T
+  val remove: int -> T -> T
+  val subtract: T -> T -> T
+  val restrict: (int -> bool) -> T -> T
+  val inter: T -> T -> T
+  val union: T -> T -> T
+end;
+
+structure Bitset: BITSET =
+struct
+
+(* bits and words *)
+
+exception BAD of int;
+
+val word_size = Word.wordSize;
+
+val min_bit = 0;
+val max_bit = word_size - 1;
+fun check_bit n = min_bit <= n andalso n <= max_bit;
+
+fun make_bit n = if check_bit n then Word.<< (0w1, Word.fromInt n) else raise BAD n;
+val mimimum_bit = make_bit min_bit;
+val maximum_bit = make_bit max_bit;
+
+fun make_int m n = if check_bit n then m * word_size + n else raise BAD n;
+fun dest_int x = Integer.div_mod x word_size;
+
+fun add_bits v w = Word.orb (v, w);
+fun del_bits v w = Word.andb (Word.notb v, w);
+fun incl_bits v w = add_bits v w = w;
+
+fun fold_bits f w =
+  let
+    fun bits n b a =
+      let val a' = if incl_bits b w then f n a else a in
+        if n = max_bit then a'
+        else bits (n + 1) (Word.<< (b, 0w1)) a'
+      end;
+  in bits min_bit mimimum_bit end;
+
+fun fold_rev_bits f w =
+  let
+    fun bits n b a =
+      let val a' = if incl_bits b w then f n a else a in
+        if n = min_bit then a'
+        else bits (n - 1) (Word.>> (b, 0w1)) a'
+      end;
+  in bits max_bit maximum_bit end;
+
+
+(* datatype *)
+
+datatype T = Bitset of word Inttab.table;
+
+val empty = Bitset Inttab.empty;
+
+fun build (f: T -> T) = f empty;
+
+fun is_empty (Bitset t) = Inttab.is_empty t;
+
+
+(* fold combinators *)
+
+fun fold_set f (Bitset t) =
+  Inttab.fold (fn (m, w) =>
+    (if m < 0 then fold_rev_bits else fold_bits) (f o make_int m) w) t;
+
+fun fold_rev_set f (Bitset t) =
+  Inttab.fold_rev (fn (m, w) =>
+    (if m < 0 then fold_bits else fold_rev_bits) (f o make_int m) w) t;
+
+val dest = Library.build o fold_rev_set cons;
+
+
+(* min/max entries *)
+
+fun min (Bitset t) =
+  Inttab.min t |> Option.map (fn (m, w) =>
+    make_int m (fold_bits Integer.min w max_bit));
+
+fun max (Bitset t) =
+  Inttab.max t |> Option.map (fn (m, w) =>
+    make_int m (fold_bits Integer.max w min_bit));
+
+
+(* linear search *)
+
+fun get_first f set =
+  let exception FOUND of 'a in
+    fold_set (fn x => fn a => (case f x of SOME b => raise FOUND b | NONE => a)) set NONE
+      handle FOUND b => SOME b
+  end;
+
+fun exists pred = is_some o get_first (fn x => if pred x then SOME x else NONE);
+fun forall pred = not o exists (not o pred);
+
+
+(* member *)
+
+fun member (Bitset t) x =
+  let val (m, n) = dest_int x in
+    (case Inttab.lookup t m of
+      NONE => false
+    | SOME w => incl_bits (make_bit n) w)
+  end;
+
+
+(* subset *)
+
+fun subset (Bitset t1, Bitset t2) =
+  pointer_eq (t1, t2) orelse
+    Inttab.size t1 <= Inttab.size t2 andalso
+    t1 |> Inttab.forall (fn (m, w1) =>
+      (case Inttab.lookup t2 m of
+        NONE => false
+      | SOME w2 => incl_bits w1 w2));
+
+fun eq_set (set1, set2) =
+  pointer_eq (set1, set2) orelse subset (set1, set2) andalso subset (set2, set1);
+
+
+(* insert *)
+
+fun insert x (Bitset t) =
+  let val (m, n) = dest_int x
+  in Bitset (Inttab.map_default (m, 0w0) (add_bits (make_bit n)) t) end;
+
+fun make xs = build (fold insert xs);
+
+
+(* merge *)
+
+fun join_bits (w1, w2) =
+  let val w = add_bits w2 w1
+  in if w = w1 then raise Inttab.SAME else w end;
+
+fun merge (set1 as Bitset t1, set2 as Bitset t2) =
+  if pointer_eq (set1, set2) then set1
+  else if is_empty set1 then set2
+  else if is_empty set2 then set1
+  else Bitset (Inttab.join (K join_bits) (t1, t2));
+
+fun merges sets = Library.foldl merge (empty, sets);
+
+
+(* remove *)
+
+fun remove x (set as Bitset t) =
+  let val (m, n) = dest_int x in
+    (case Inttab.lookup t m of
+      NONE => set
+    | SOME w =>
+        let val w' = del_bits (make_bit n) w in
+          if w = w' then set
+          else if w' = 0w0 then Bitset (Inttab.delete m t)
+          else Bitset (Inttab.update (m, w') t)
+        end)
+  end;
+
+val subtract = fold_set remove;
+
+
+(* conventional set operations *)
+
+fun restrict pred set =
+  fold_set (fn x => not (pred x) ? remove x) set set;
+
+fun inter set1 set2 =
+  if pointer_eq (set1, set2) then set1
+  else if is_empty set1 orelse is_empty set2 then empty
+  else restrict (member set1) set2;
+
+fun union set1 set2 = merge (set2, set1);
+
+
+(* ML pretty-printing *)
+
+val _ =
+  ML_system_pp (fn depth => fn _ => fn set =>
+    ML_Pretty.to_polyml
+      (ML_Pretty.enum "," "{" "}" (ML_Pretty.from_polyml o ML_system_pretty) (dest set, depth)));
+
+
+(*final declarations of this structure!*)
+val fold = fold_set;
+val fold_rev = fold_rev_set;
+
+end;
--- a/src/Pure/ROOT.ML	Sun Nov 26 21:06:47 2023 +0000
+++ b/src/Pure/ROOT.ML	Wed Nov 29 00:07:54 2023 +0100
@@ -46,6 +46,7 @@
 ML_file "General/integer.ML";
 ML_file "General/alist.ML";
 ML_file "General/table.ML";
+ML_file "General/bitset.ML";
 ML_file "General/set.ML";
 
 ML_file "General/random.ML";