improved scanning
authorobua
Thu, 16 Feb 2006 23:30:47 +0100
changeset 19089 2e487fe9593a
parent 19088 7870cf61c4b3
child 19090 c442f3362f7a
improved scanning
src/HOL/Import/ImportRecorder.thy
src/HOL/Import/lazy_scan.ML
src/HOL/Import/lazy_seq.ML
src/HOL/Import/scan.ML
src/HOL/Import/seq.ML
src/HOL/Import/xml.ML
src/HOL/Import/xmlconv.ML
--- a/src/HOL/Import/ImportRecorder.thy	Thu Feb 16 21:15:38 2006 +0100
+++ b/src/HOL/Import/ImportRecorder.thy	Thu Feb 16 23:30:47 2006 +0100
@@ -1,4 +1,4 @@
 theory ImportRecorder imports Main 
-uses  "susp.ML" "lazy_seq.ML" "lazy_scan.ML" "xml.ML" "xmlconv.ML" "importrecorder.ML"
+uses  "seq.ML" "scan.ML" "susp.ML" "lazy_seq.ML" "xml.ML" "xmlconv.ML" "importrecorder.ML"
 begin
 end
\ No newline at end of file
--- a/src/HOL/Import/lazy_scan.ML	Thu Feb 16 21:15:38 2006 +0100
+++ b/src/HOL/Import/lazy_scan.ML	Thu Feb 16 23:30:47 2006 +0100
@@ -29,6 +29,7 @@
     val option   : ('a,'b) scanner -> ('a,'b option) scanner
     val repeat   : ('a,'b) scanner -> ('a,'b list) scanner
     val repeat1  : ('a,'b) scanner -> ('a,'b list) scanner
+    val repeat_fixed : int -> ('a, 'b) scanner -> ('a, 'b list) scanner  
     val ahead    : ('a,'b) scanner -> ('a,'b) scanner
     val unless   : ('a, 'b) scanner -> ('a,'c) scanner -> ('a,'c) scanner
     val $$       : ''a -> (''a,''a) scanner
@@ -36,6 +37,8 @@
     val scan_full: ('a,'b) scanner -> 'a LazySeq.seq -> 'b LazySeq.seq
 
     val scan_id : (string, string) scanner
+    val scan_nat : (string, int) scanner
+
     val this : ''a list -> (''a, ''a list) scanner
     val this_string : string -> (string, string) scanner
 end
@@ -175,6 +178,15 @@
 	(x::xs,toks3)
     end
 
+fun repeat_fixed n sc =
+    let
+	fun R n xs toks =
+	    if (n <= 0) then (List.rev xs, toks)
+	    else case (sc toks) of (x, toks2) => R (n-1) (x::xs) toks2
+    in
+	R n []
+    end
+
 fun ahead (sc:'a->'b*'a) toks = (#1 (sc toks),toks)
 
 fun unless test sc toks =
@@ -208,6 +220,10 @@
 
 val scan_id = one Symbol.is_letter ^^ (any Symbol.is_letdig >> implode);
 
+val nat_of_list = the o Int.fromString o implode 
+
+val scan_nat = repeat1 (one Symbol.is_digit) >> nat_of_list 
+
 fun this [] = (fn toks => ([], toks))
   | this (xs' as (x::xs)) = one (fn y => x=y) -- this xs >> K xs'
 
--- a/src/HOL/Import/lazy_seq.ML	Thu Feb 16 21:15:38 2006 +0100
+++ b/src/HOL/Import/lazy_seq.ML	Thu Feb 16 23:30:47 2006 +0100
@@ -8,12 +8,11 @@
 signature LAZY_SEQ =
 sig
 
-    type 'a seq
+    include EXTENDED_SCANNER_SEQ
 
     (* From LIST *)
 
-    val null : 'a seq -> bool
-    val length : 'a seq -> int
+    val fromString : string -> string seq
     val @ : 'a seq * 'a seq -> 'a seq
     val hd : 'a seq -> 'a
     val tl : 'a seq -> 'a seq
@@ -21,7 +20,6 @@
     val getItem : 'a seq -> ('a * 'a seq) option
     val nth : 'a seq * int -> 'a
     val take : 'a seq * int -> 'a seq
-    val take_at_most : 'a seq * int -> 'a list
     val drop : 'a seq * int -> 'a seq
     val rev : 'a seq -> 'a seq
     val concat : 'a seq seq -> 'a seq
@@ -89,6 +87,7 @@
 exception Empty
 
 fun getItem (Seq s) = force s
+val pull = getItem
 fun make f = Seq (delay f)
 
 fun null s = is_none (getItem s)
@@ -149,11 +148,11 @@
     else F' s n
 end
 
-fun take_at_most (s,n) = 
+fun take_at_most s n = 
     if n <= 0 then [] else
     case getItem s of 
 	NONE => []
-      | SOME (x,s') => x::(take_at_most (s',n-1))
+      | SOME (x,s') => x::(take_at_most s' (n-1))
 
 local
     fun F s 0 = s
@@ -547,4 +546,9 @@
 	F e (getItem s)
     end
 
+fun fromString s = of_list (Symbol.explode s)
+
 end
+
+structure LazyScan = Scanner (structure Seq = LazySeq)
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Import/scan.ML	Thu Feb 16 23:30:47 2006 +0100
@@ -0,0 +1,220 @@
+(*  Title:      HOL/Import/lazy_scan.ML
+    ID:         $Id$
+    Author:     Sebastian Skalberg, TU Muenchen / Steven Obua, TU Muenchen
+
+Scanner combinators for sequences.
+*)
+
+signature SCANNER =
+sig
+
+    include SCANNER_SEQ
+
+    exception SyntaxError
+
+    type ('a,'b) scanner = 'a seq -> 'b * 'a seq
+
+    val :--      : ('a,'b) scanner * ('b -> ('a,'c) scanner)
+		   -> ('a,'b*'c) scanner
+    val --       : ('a,'b) scanner * ('a,'c) scanner -> ('a,'b*'c) scanner
+    val >>       : ('a,'b) scanner * ('b -> 'c) -> ('a,'c) scanner
+    val --|      : ('a,'b) scanner * ('a,'c) scanner -> ('a,'b) scanner
+    val |--      : ('a,'b) scanner * ('a,'c) scanner -> ('a,'c) scanner
+    val ^^       : ('a,string) scanner * ('a,string) scanner
+		   -> ('a,string) scanner 
+    val ||       : ('a,'b) scanner * ('a,'b) scanner -> ('a,'b) scanner
+    val one      : ('a -> bool) -> ('a,'a) scanner
+    val anyone   : ('a,'a) scanner
+    val succeed  : 'b -> ('a,'b) scanner
+    val any      : ('a -> bool) -> ('a,'a list) scanner
+    val any1     : ('a -> bool) -> ('a,'a list) scanner
+    val optional : ('a,'b) scanner -> 'b -> ('a,'b) scanner
+    val option   : ('a,'b) scanner -> ('a,'b option) scanner
+    val repeat   : ('a,'b) scanner -> ('a,'b list) scanner
+    val repeat1  : ('a,'b) scanner -> ('a,'b list) scanner
+    val repeat_fixed : int -> ('a, 'b) scanner -> ('a, 'b list) scanner  
+    val ahead    : ('a,'b) scanner -> ('a,'b) scanner
+    val unless   : ('a, 'b) scanner -> ('a,'c) scanner -> ('a,'c) scanner
+    val $$       : ''a -> (''a,''a) scanner
+    val !!       : ('a seq -> string) -> ('a,'b) scanner -> ('a,'b) scanner
+    
+    val scan_id : (string, string) scanner
+    val scan_nat : (string, int) scanner
+
+    val this : ''a list -> (''a, ''a list) scanner
+    val this_string : string -> (string, string) scanner
+end
+
+functor Scanner (structure Seq : SCANNER_SEQ) : SCANNER =
+struct
+
+infix 7 |-- --|
+infix 5 :-- -- ^^
+infix 3 >>
+infix 0 ||
+
+exception SyntaxError
+exception Fail of string
+
+type 'a seq = 'a Seq.seq
+type ('a,'b) scanner = 'a seq -> 'b * 'a seq
+
+val pull = Seq.pull
+
+fun (sc1 :-- sc2) toks =
+    let
+	val (x,toks2) = sc1 toks
+	val (y,toks3) = sc2 x toks2
+    in
+	((x,y),toks3)
+    end
+
+fun (sc1 -- sc2) toks =
+    let
+	val (x,toks2) = sc1 toks
+	val (y,toks3) = sc2 toks2
+    in
+	((x,y),toks3)
+    end
+
+fun (sc >> f) toks =
+    let
+	val (x,toks2) = sc toks
+    in
+	(f x,toks2)
+    end
+
+fun (sc1 --| sc2) toks =
+    let
+	val (x,toks2) = sc1 toks
+	val (_,toks3) = sc2 toks2
+    in
+	(x,toks3)
+    end
+
+fun (sc1 |-- sc2) toks =
+    let
+	val (_,toks2) = sc1 toks
+    in
+	sc2 toks2
+    end
+
+fun (sc1 ^^ sc2) toks =
+    let
+	val (x,toks2) = sc1 toks
+	val (y,toks3) = sc2 toks2
+    in
+	(x^y,toks3)
+    end
+
+fun (sc1 || sc2) toks =
+    (sc1 toks)
+    handle SyntaxError => sc2 toks
+
+fun anyone toks = case pull toks of NONE => raise SyntaxError | SOME x => x
+
+fun one p toks = case anyone toks of x as (t, toks) => if p t then x else raise SyntaxError
+
+fun succeed e toks = (e,toks)
+
+fun any p toks =
+    case pull toks of
+	NONE =>  ([],toks)
+      | SOME(x,toks2) => if p x
+			 then
+			     let
+				 val (xs,toks3) = any p toks2
+			     in
+				 (x::xs,toks3)
+			     end
+			 else ([],toks)
+
+fun any1 p toks =
+    let
+	val (x,toks2) = one p toks
+	val (xs,toks3) = any p toks2
+    in
+	(x::xs,toks3)
+    end
+
+fun optional sc def =  sc || succeed def
+fun option sc = (sc >> SOME) || succeed NONE
+
+(*
+fun repeat sc =
+    let
+	fun R toks =
+	    let
+		val (x,toks2) = sc toks
+		val (xs,toks3) = R toks2
+	    in
+		(x::xs,toks3)
+	    end
+	    handle SyntaxError => ([],toks)
+    in
+	R
+    end
+*)
+
+(* A tail-recursive version of repeat.  It is (ever so) slightly slower
+ * than the above, non-tail-recursive version (due to the garbage generation
+ * associated with the reversal of the list).  However,  this version will be
+ * able to process input where the former version must give up (due to stack
+ * overflow).  The slowdown seems to be around the one percent mark.
+ *)
+fun repeat sc =
+    let
+	fun R xs toks =
+	    case SOME (sc toks) handle SyntaxError => NONE of
+		SOME (x,toks2) => R (x::xs) toks2
+	      | NONE => (List.rev xs,toks)
+    in
+	R []
+    end
+
+fun repeat1 sc toks =
+    let
+	val (x,toks2) = sc toks
+	val (xs,toks3) = repeat sc toks2
+    in
+	(x::xs,toks3)
+    end
+
+fun repeat_fixed n sc =
+    let
+	fun R n xs toks =
+	    if (n <= 0) then (List.rev xs, toks)
+	    else case (sc toks) of (x, toks2) => R (n-1) (x::xs) toks2
+    in
+	R n []
+    end
+
+fun ahead (sc:'a->'b*'a) toks = (#1 (sc toks),toks)
+
+fun unless test sc toks =
+    let
+	val test_failed = (test toks;false) handle SyntaxError => true
+    in
+	if test_failed
+	then sc toks
+	else raise SyntaxError
+    end
+
+fun $$ arg = one (fn x => x = arg)
+
+fun !! f sc toks = (sc toks
+		    handle SyntaxError => raise Fail (f toks))
+
+val scan_id = one Symbol.is_letter ^^ (any Symbol.is_letdig >> implode);
+
+val nat_of_list = the o Int.fromString o implode 
+
+val scan_nat = repeat1 (one Symbol.is_digit) >> nat_of_list 
+
+fun this [] = (fn toks => ([], toks))
+  | this (xs' as (x::xs)) = one (fn y => x=y) -- this xs >> K xs'
+
+fun this_string s = this (explode s) >> K s
+
+end
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/Import/seq.ML	Thu Feb 16 23:30:47 2006 +0100
@@ -0,0 +1,99 @@
+signature SCANNER_SEQ =
+sig
+    type 'a seq
+    
+    val pull : 'a seq -> ('a * 'a seq) option 
+end
+
+signature EXTENDED_SCANNER_SEQ =
+sig
+
+  include SCANNER_SEQ
+
+  val null : 'a seq -> bool
+  val length : 'a seq -> int
+  val take_at_most : 'a seq -> int -> 'a list
+
+end
+
+functor ExtendScannerSeq (structure Seq : SCANNER_SEQ) : EXTENDED_SCANNER_SEQ =
+struct
+  
+type 'a seq = 'a Seq.seq
+
+val pull = Seq.pull
+
+fun null s = is_none (pull s)
+
+fun take_at_most s n = 
+    if n <= 0 then [] else
+    case pull s of 
+	NONE => []
+      | SOME (x,s') => x::(take_at_most s' (n-1))
+
+fun length' s n = 
+    case pull s of
+	NONE => n
+      | SOME (_, s') => length' s' (n+1)
+
+fun length s = length' s 0
+		
+end  
+
+signature VECTOR_SCANNER_SEQ = 
+sig
+    include EXTENDED_SCANNER_SEQ
+
+    val fromString : string -> string seq
+    val fromVector : 'a Vector.vector -> 'a seq
+end
+
+structure VectorScannerSeq :> VECTOR_SCANNER_SEQ =
+struct
+  
+structure Incubator : SCANNER_SEQ =
+struct
+
+type 'a seq = 'a Vector.vector * int * int
+fun pull (v, len, i) = if i >= len then NONE else SOME (Vector.sub (v, i), (v, len, i+1))
+
+end
+
+structure Extended = ExtendScannerSeq (structure Seq = Incubator)
+
+open Extended
+
+fun fromVector v = (v, Vector.length v, 0)
+fun vector_from_string s = Vector.tabulate (String.size s, fn i => String.str (String.sub (s, i)))
+fun fromString s = fromVector (vector_from_string s)
+
+end
+
+signature LIST_SCANNER_SEQ =
+sig
+    include EXTENDED_SCANNER_SEQ
+    
+    val fromString : string -> string seq
+    val fromList : 'a list -> 'a seq
+end
+
+structure ListScannerSeq :> LIST_SCANNER_SEQ =
+struct
+     
+structure Incubator : SCANNER_SEQ =
+struct
+
+type 'a seq = 'a list
+fun pull [] = NONE
+  | pull (x::xs) = SOME (x, xs)
+
+end
+
+structure Extended = ExtendScannerSeq (structure Seq = Incubator)
+
+open Extended
+
+val fromList = I
+val fromString = Symbol.explode
+
+end
--- a/src/HOL/Import/xml.ML	Thu Feb 16 21:15:38 2006 +0100
+++ b/src/HOL/Import/xml.ML	Thu Feb 16 23:30:47 2006 +0100
@@ -12,24 +12,29 @@
   val text_charref: string -> string
   val cdata: string -> string
   val element: string -> (string * string) list -> string list -> string
+  
   datatype tree =
       Elem of string * (string * string) list * tree list
     | Text of string
+  
   val string_of_tree: tree -> string
   val tree_of_string: string -> tree
+
+  val encoded_string_of_tree : tree -> string
+  val tree_of_encoded_string : string -> tree
 end;
 
-structure XML =
+structure XML :> XML =
 struct
 
-structure Scan = LazyScan
+structure Seq = ListScannerSeq
+structure Scan = Scanner (structure Seq = Seq)
 open Scan
 
 (** string based representation (small scale) **)
 
 val header = "<?xml version=\"1.0\"?>\n";
 
-
 (* text and character data *)
 
 fun decode "&lt;" = "<"
@@ -89,7 +94,7 @@
 
 (** XML parsing **)
 
-fun beginning n xs = Symbol.beginning n (LazySeq.take_at_most (xs, n))
+fun beginning n xs = Symbol.beginning n (Seq.take_at_most xs n)
 
 fun err s xs =
   "XML parsing error: " ^ s ^ "\nfound: " ^ quote (beginning 100 xs) ;
@@ -149,11 +154,44 @@
 
 fun tree_of_string s =
     let
-	val seq = LazySeq.of_list (Symbol.explode s)
+	val seq = Seq.fromString s
 	val scanner = !! (err "Malformed element") (scan_whspc |-- parse_elem --| scan_whspc)
 	val (x, toks) = scanner seq
     in
-	if LazySeq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
+	if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
     end
-	
+
+(* More efficient saving and loading of xml trees employing a proprietary external format *)
+
+fun write_lstring s buf = buf |> Buffer.add (string_of_int (size s)) |> Buffer.add " " |> Buffer.add s
+fun parse_lstring toks = (scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i (one (K true))) >> snd >> implode) toks
+
+fun write_list w l buf = buf |> Buffer.add (string_of_int (length l)) |> Buffer.add " " |> fold w l     
+fun parse_list sc = scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i sc) >> snd
+
+fun write_tree (Text s) buf = buf |> Buffer.add "T" |> write_lstring s
+  | write_tree (Elem (name, attrs, children)) buf = 
+    buf |> Buffer.add "E" 
+	|> write_lstring name 
+	|> write_list (fn (a,b) => fn buf => buf |> write_lstring a |> write_lstring b) attrs 
+	|> write_list write_tree children
+
+fun parse_tree toks = (one (K true) :-- (fn "T" => parse_lstring >> Text | "E" => parse_elem | _ => raise SyntaxError) >> snd) toks
+and parse_elem toks = (parse_lstring -- parse_list (parse_lstring -- parse_lstring) -- parse_list parse_tree >> (fn ((n, a), c) => Elem (n,a,c))) toks
+
+fun encoded_string_of_tree tree = Buffer.content (write_tree tree Buffer.empty)
+
+fun tree_of_encoded_string s = 
+    let
+	fun print (a,b) = writeln (a^" "^(string_of_int b))
+	val _ = print ("length of encoded string: ", size s)
+	val _ = writeln "Seq.fromString..."
+	val seq = timeit (fn () => Seq.fromString s)
+	val  _ = print ("length of sequence", timeit (fn () => Seq.length seq))
+	val scanner = !! (err "Malformed encoded xml") parse_tree
+	val (x, toks) = scanner seq
+    in
+	if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
+    end	       
+
 end;
--- a/src/HOL/Import/xmlconv.ML	Thu Feb 16 21:15:38 2006 +0100
+++ b/src/HOL/Import/xmlconv.ML	Thu Feb 16 23:30:47 2006 +0100
@@ -57,6 +57,9 @@
 
   val write_to_file: 'a output -> string -> 'a -> unit
   val read_from_file: 'a input -> string -> 'a
+
+  val serialize_to_file : 'a output -> string -> 'a -> unit
+  val deserialize_from_file : 'a input -> string -> 'a
 end;
 
 structure XMLConv : XML_CONV =
@@ -240,14 +243,14 @@
     ) e
 
 
-fun write_to_file output fname x = File.write (Path.unpack fname) (XML.string_of_tree (output x))
+fun to_file f output fname x = File.write (Path.unpack fname) (f (output x))
  
-fun read_from_file input fname = 
+fun from_file f input fname = 
     let
 	val _ = writeln "read_from_file enter"
 	val s = File.read (Path.unpack fname)
 	val _ = writeln "done: read file"
-	val tree = timeit (fn () => XML.tree_of_string s)
+	val tree = timeit (fn () => f s)
 	val _ = writeln "done: tree"
 	val x = timeit (fn () => input tree)
 	val _ = writeln "done: input"
@@ -255,6 +258,12 @@
 	x
     end
 
+fun write_to_file x = to_file XML.string_of_tree x
+fun read_from_file x = timeit (fn () => (writeln "read_from_file"; from_file XML.tree_of_string x))
+
+fun serialize_to_file x = to_file XML.encoded_string_of_tree x
+fun deserialize_from_file x = timeit (fn () => (writeln "deserialize_from_file"; from_file XML.tree_of_encoded_string x))
+
 end;
 
 structure XMLConvOutput =