# HG changeset patch # User obua # Date 1140129047 -3600 # Node ID 2e487fe9593a7eef6413175fa3ead0e7d7baa9a4 # Parent 7870cf61c4b361fa0ba15b98ccfd919a327aed5a improved scanning diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/ImportRecorder.thy --- a/src/HOL/Import/ImportRecorder.thy Thu Feb 16 21:15:38 2006 +0100 +++ b/src/HOL/Import/ImportRecorder.thy Thu Feb 16 23:30:47 2006 +0100 @@ -1,4 +1,4 @@ theory ImportRecorder imports Main -uses "susp.ML" "lazy_seq.ML" "lazy_scan.ML" "xml.ML" "xmlconv.ML" "importrecorder.ML" +uses "seq.ML" "scan.ML" "susp.ML" "lazy_seq.ML" "xml.ML" "xmlconv.ML" "importrecorder.ML" begin end \ No newline at end of file diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/lazy_scan.ML --- a/src/HOL/Import/lazy_scan.ML Thu Feb 16 21:15:38 2006 +0100 +++ b/src/HOL/Import/lazy_scan.ML Thu Feb 16 23:30:47 2006 +0100 @@ -29,6 +29,7 @@ val option : ('a,'b) scanner -> ('a,'b option) scanner val repeat : ('a,'b) scanner -> ('a,'b list) scanner val repeat1 : ('a,'b) scanner -> ('a,'b list) scanner + val repeat_fixed : int -> ('a, 'b) scanner -> ('a, 'b list) scanner val ahead : ('a,'b) scanner -> ('a,'b) scanner val unless : ('a, 'b) scanner -> ('a,'c) scanner -> ('a,'c) scanner val $$ : ''a -> (''a,''a) scanner @@ -36,6 +37,8 @@ val scan_full: ('a,'b) scanner -> 'a LazySeq.seq -> 'b LazySeq.seq val scan_id : (string, string) scanner + val scan_nat : (string, int) scanner + val this : ''a list -> (''a, ''a list) scanner val this_string : string -> (string, string) scanner end @@ -175,6 +178,15 @@ (x::xs,toks3) end +fun repeat_fixed n sc = + let + fun R n xs toks = + if (n <= 0) then (List.rev xs, toks) + else case (sc toks) of (x, toks2) => R (n-1) (x::xs) toks2 + in + R n [] + end + fun ahead (sc:'a->'b*'a) toks = (#1 (sc toks),toks) fun unless test sc toks = @@ -208,6 +220,10 @@ val scan_id = one Symbol.is_letter ^^ (any Symbol.is_letdig >> implode); +val nat_of_list = the o Int.fromString o implode + +val scan_nat = repeat1 (one Symbol.is_digit) >> nat_of_list + fun this [] = (fn toks => ([], toks)) | this (xs' as (x::xs)) = one (fn y => x=y) -- this xs >> K xs' diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/lazy_seq.ML --- a/src/HOL/Import/lazy_seq.ML Thu Feb 16 21:15:38 2006 +0100 +++ b/src/HOL/Import/lazy_seq.ML Thu Feb 16 23:30:47 2006 +0100 @@ -8,12 +8,11 @@ signature LAZY_SEQ = sig - type 'a seq + include EXTENDED_SCANNER_SEQ (* From LIST *) - val null : 'a seq -> bool - val length : 'a seq -> int + val fromString : string -> string seq val @ : 'a seq * 'a seq -> 'a seq val hd : 'a seq -> 'a val tl : 'a seq -> 'a seq @@ -21,7 +20,6 @@ val getItem : 'a seq -> ('a * 'a seq) option val nth : 'a seq * int -> 'a val take : 'a seq * int -> 'a seq - val take_at_most : 'a seq * int -> 'a list val drop : 'a seq * int -> 'a seq val rev : 'a seq -> 'a seq val concat : 'a seq seq -> 'a seq @@ -89,6 +87,7 @@ exception Empty fun getItem (Seq s) = force s +val pull = getItem fun make f = Seq (delay f) fun null s = is_none (getItem s) @@ -149,11 +148,11 @@ else F' s n end -fun take_at_most (s,n) = +fun take_at_most s n = if n <= 0 then [] else case getItem s of NONE => [] - | SOME (x,s') => x::(take_at_most (s',n-1)) + | SOME (x,s') => x::(take_at_most s' (n-1)) local fun F s 0 = s @@ -547,4 +546,9 @@ F e (getItem s) end +fun fromString s = of_list (Symbol.explode s) + end + +structure LazyScan = Scanner (structure Seq = LazySeq) + diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/scan.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Import/scan.ML Thu Feb 16 23:30:47 2006 +0100 @@ -0,0 +1,220 @@ +(* Title: HOL/Import/lazy_scan.ML + ID: $Id$ + Author: Sebastian Skalberg, TU Muenchen / Steven Obua, TU Muenchen + +Scanner combinators for sequences. +*) + +signature SCANNER = +sig + + include SCANNER_SEQ + + exception SyntaxError + + type ('a,'b) scanner = 'a seq -> 'b * 'a seq + + val :-- : ('a,'b) scanner * ('b -> ('a,'c) scanner) + -> ('a,'b*'c) scanner + val -- : ('a,'b) scanner * ('a,'c) scanner -> ('a,'b*'c) scanner + val >> : ('a,'b) scanner * ('b -> 'c) -> ('a,'c) scanner + val --| : ('a,'b) scanner * ('a,'c) scanner -> ('a,'b) scanner + val |-- : ('a,'b) scanner * ('a,'c) scanner -> ('a,'c) scanner + val ^^ : ('a,string) scanner * ('a,string) scanner + -> ('a,string) scanner + val || : ('a,'b) scanner * ('a,'b) scanner -> ('a,'b) scanner + val one : ('a -> bool) -> ('a,'a) scanner + val anyone : ('a,'a) scanner + val succeed : 'b -> ('a,'b) scanner + val any : ('a -> bool) -> ('a,'a list) scanner + val any1 : ('a -> bool) -> ('a,'a list) scanner + val optional : ('a,'b) scanner -> 'b -> ('a,'b) scanner + val option : ('a,'b) scanner -> ('a,'b option) scanner + val repeat : ('a,'b) scanner -> ('a,'b list) scanner + val repeat1 : ('a,'b) scanner -> ('a,'b list) scanner + val repeat_fixed : int -> ('a, 'b) scanner -> ('a, 'b list) scanner + val ahead : ('a,'b) scanner -> ('a,'b) scanner + val unless : ('a, 'b) scanner -> ('a,'c) scanner -> ('a,'c) scanner + val $$ : ''a -> (''a,''a) scanner + val !! : ('a seq -> string) -> ('a,'b) scanner -> ('a,'b) scanner + + val scan_id : (string, string) scanner + val scan_nat : (string, int) scanner + + val this : ''a list -> (''a, ''a list) scanner + val this_string : string -> (string, string) scanner +end + +functor Scanner (structure Seq : SCANNER_SEQ) : SCANNER = +struct + +infix 7 |-- --| +infix 5 :-- -- ^^ +infix 3 >> +infix 0 || + +exception SyntaxError +exception Fail of string + +type 'a seq = 'a Seq.seq +type ('a,'b) scanner = 'a seq -> 'b * 'a seq + +val pull = Seq.pull + +fun (sc1 :-- sc2) toks = + let + val (x,toks2) = sc1 toks + val (y,toks3) = sc2 x toks2 + in + ((x,y),toks3) + end + +fun (sc1 -- sc2) toks = + let + val (x,toks2) = sc1 toks + val (y,toks3) = sc2 toks2 + in + ((x,y),toks3) + end + +fun (sc >> f) toks = + let + val (x,toks2) = sc toks + in + (f x,toks2) + end + +fun (sc1 --| sc2) toks = + let + val (x,toks2) = sc1 toks + val (_,toks3) = sc2 toks2 + in + (x,toks3) + end + +fun (sc1 |-- sc2) toks = + let + val (_,toks2) = sc1 toks + in + sc2 toks2 + end + +fun (sc1 ^^ sc2) toks = + let + val (x,toks2) = sc1 toks + val (y,toks3) = sc2 toks2 + in + (x^y,toks3) + end + +fun (sc1 || sc2) toks = + (sc1 toks) + handle SyntaxError => sc2 toks + +fun anyone toks = case pull toks of NONE => raise SyntaxError | SOME x => x + +fun one p toks = case anyone toks of x as (t, toks) => if p t then x else raise SyntaxError + +fun succeed e toks = (e,toks) + +fun any p toks = + case pull toks of + NONE => ([],toks) + | SOME(x,toks2) => if p x + then + let + val (xs,toks3) = any p toks2 + in + (x::xs,toks3) + end + else ([],toks) + +fun any1 p toks = + let + val (x,toks2) = one p toks + val (xs,toks3) = any p toks2 + in + (x::xs,toks3) + end + +fun optional sc def = sc || succeed def +fun option sc = (sc >> SOME) || succeed NONE + +(* +fun repeat sc = + let + fun R toks = + let + val (x,toks2) = sc toks + val (xs,toks3) = R toks2 + in + (x::xs,toks3) + end + handle SyntaxError => ([],toks) + in + R + end +*) + +(* A tail-recursive version of repeat. It is (ever so) slightly slower + * than the above, non-tail-recursive version (due to the garbage generation + * associated with the reversal of the list). However, this version will be + * able to process input where the former version must give up (due to stack + * overflow). The slowdown seems to be around the one percent mark. + *) +fun repeat sc = + let + fun R xs toks = + case SOME (sc toks) handle SyntaxError => NONE of + SOME (x,toks2) => R (x::xs) toks2 + | NONE => (List.rev xs,toks) + in + R [] + end + +fun repeat1 sc toks = + let + val (x,toks2) = sc toks + val (xs,toks3) = repeat sc toks2 + in + (x::xs,toks3) + end + +fun repeat_fixed n sc = + let + fun R n xs toks = + if (n <= 0) then (List.rev xs, toks) + else case (sc toks) of (x, toks2) => R (n-1) (x::xs) toks2 + in + R n [] + end + +fun ahead (sc:'a->'b*'a) toks = (#1 (sc toks),toks) + +fun unless test sc toks = + let + val test_failed = (test toks;false) handle SyntaxError => true + in + if test_failed + then sc toks + else raise SyntaxError + end + +fun $$ arg = one (fn x => x = arg) + +fun !! f sc toks = (sc toks + handle SyntaxError => raise Fail (f toks)) + +val scan_id = one Symbol.is_letter ^^ (any Symbol.is_letdig >> implode); + +val nat_of_list = the o Int.fromString o implode + +val scan_nat = repeat1 (one Symbol.is_digit) >> nat_of_list + +fun this [] = (fn toks => ([], toks)) + | this (xs' as (x::xs)) = one (fn y => x=y) -- this xs >> K xs' + +fun this_string s = this (explode s) >> K s + +end + diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/seq.ML --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/HOL/Import/seq.ML Thu Feb 16 23:30:47 2006 +0100 @@ -0,0 +1,99 @@ +signature SCANNER_SEQ = +sig + type 'a seq + + val pull : 'a seq -> ('a * 'a seq) option +end + +signature EXTENDED_SCANNER_SEQ = +sig + + include SCANNER_SEQ + + val null : 'a seq -> bool + val length : 'a seq -> int + val take_at_most : 'a seq -> int -> 'a list + +end + +functor ExtendScannerSeq (structure Seq : SCANNER_SEQ) : EXTENDED_SCANNER_SEQ = +struct + +type 'a seq = 'a Seq.seq + +val pull = Seq.pull + +fun null s = is_none (pull s) + +fun take_at_most s n = + if n <= 0 then [] else + case pull s of + NONE => [] + | SOME (x,s') => x::(take_at_most s' (n-1)) + +fun length' s n = + case pull s of + NONE => n + | SOME (_, s') => length' s' (n+1) + +fun length s = length' s 0 + +end + +signature VECTOR_SCANNER_SEQ = +sig + include EXTENDED_SCANNER_SEQ + + val fromString : string -> string seq + val fromVector : 'a Vector.vector -> 'a seq +end + +structure VectorScannerSeq :> VECTOR_SCANNER_SEQ = +struct + +structure Incubator : SCANNER_SEQ = +struct + +type 'a seq = 'a Vector.vector * int * int +fun pull (v, len, i) = if i >= len then NONE else SOME (Vector.sub (v, i), (v, len, i+1)) + +end + +structure Extended = ExtendScannerSeq (structure Seq = Incubator) + +open Extended + +fun fromVector v = (v, Vector.length v, 0) +fun vector_from_string s = Vector.tabulate (String.size s, fn i => String.str (String.sub (s, i))) +fun fromString s = fromVector (vector_from_string s) + +end + +signature LIST_SCANNER_SEQ = +sig + include EXTENDED_SCANNER_SEQ + + val fromString : string -> string seq + val fromList : 'a list -> 'a seq +end + +structure ListScannerSeq :> LIST_SCANNER_SEQ = +struct + +structure Incubator : SCANNER_SEQ = +struct + +type 'a seq = 'a list +fun pull [] = NONE + | pull (x::xs) = SOME (x, xs) + +end + +structure Extended = ExtendScannerSeq (structure Seq = Incubator) + +open Extended + +val fromList = I +val fromString = Symbol.explode + +end diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/xml.ML --- a/src/HOL/Import/xml.ML Thu Feb 16 21:15:38 2006 +0100 +++ b/src/HOL/Import/xml.ML Thu Feb 16 23:30:47 2006 +0100 @@ -12,24 +12,29 @@ val text_charref: string -> string val cdata: string -> string val element: string -> (string * string) list -> string list -> string + datatype tree = Elem of string * (string * string) list * tree list | Text of string + val string_of_tree: tree -> string val tree_of_string: string -> tree + + val encoded_string_of_tree : tree -> string + val tree_of_encoded_string : string -> tree end; -structure XML = +structure XML :> XML = struct -structure Scan = LazyScan +structure Seq = ListScannerSeq +structure Scan = Scanner (structure Seq = Seq) open Scan (** string based representation (small scale) **) val header = "\n"; - (* text and character data *) fun decode "<" = "<" @@ -89,7 +94,7 @@ (** XML parsing **) -fun beginning n xs = Symbol.beginning n (LazySeq.take_at_most (xs, n)) +fun beginning n xs = Symbol.beginning n (Seq.take_at_most xs n) fun err s xs = "XML parsing error: " ^ s ^ "\nfound: " ^ quote (beginning 100 xs) ; @@ -149,11 +154,44 @@ fun tree_of_string s = let - val seq = LazySeq.of_list (Symbol.explode s) + val seq = Seq.fromString s val scanner = !! (err "Malformed element") (scan_whspc |-- parse_elem --| scan_whspc) val (x, toks) = scanner seq in - if LazySeq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'") + if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'") end - + +(* More efficient saving and loading of xml trees employing a proprietary external format *) + +fun write_lstring s buf = buf |> Buffer.add (string_of_int (size s)) |> Buffer.add " " |> Buffer.add s +fun parse_lstring toks = (scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i (one (K true))) >> snd >> implode) toks + +fun write_list w l buf = buf |> Buffer.add (string_of_int (length l)) |> Buffer.add " " |> fold w l +fun parse_list sc = scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i sc) >> snd + +fun write_tree (Text s) buf = buf |> Buffer.add "T" |> write_lstring s + | write_tree (Elem (name, attrs, children)) buf = + buf |> Buffer.add "E" + |> write_lstring name + |> write_list (fn (a,b) => fn buf => buf |> write_lstring a |> write_lstring b) attrs + |> write_list write_tree children + +fun parse_tree toks = (one (K true) :-- (fn "T" => parse_lstring >> Text | "E" => parse_elem | _ => raise SyntaxError) >> snd) toks +and parse_elem toks = (parse_lstring -- parse_list (parse_lstring -- parse_lstring) -- parse_list parse_tree >> (fn ((n, a), c) => Elem (n,a,c))) toks + +fun encoded_string_of_tree tree = Buffer.content (write_tree tree Buffer.empty) + +fun tree_of_encoded_string s = + let + fun print (a,b) = writeln (a^" "^(string_of_int b)) + val _ = print ("length of encoded string: ", size s) + val _ = writeln "Seq.fromString..." + val seq = timeit (fn () => Seq.fromString s) + val _ = print ("length of sequence", timeit (fn () => Seq.length seq)) + val scanner = !! (err "Malformed encoded xml") parse_tree + val (x, toks) = scanner seq + in + if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'") + end + end; diff -r 7870cf61c4b3 -r 2e487fe9593a src/HOL/Import/xmlconv.ML --- a/src/HOL/Import/xmlconv.ML Thu Feb 16 21:15:38 2006 +0100 +++ b/src/HOL/Import/xmlconv.ML Thu Feb 16 23:30:47 2006 +0100 @@ -57,6 +57,9 @@ val write_to_file: 'a output -> string -> 'a -> unit val read_from_file: 'a input -> string -> 'a + + val serialize_to_file : 'a output -> string -> 'a -> unit + val deserialize_from_file : 'a input -> string -> 'a end; structure XMLConv : XML_CONV = @@ -240,14 +243,14 @@ ) e -fun write_to_file output fname x = File.write (Path.unpack fname) (XML.string_of_tree (output x)) +fun to_file f output fname x = File.write (Path.unpack fname) (f (output x)) -fun read_from_file input fname = +fun from_file f input fname = let val _ = writeln "read_from_file enter" val s = File.read (Path.unpack fname) val _ = writeln "done: read file" - val tree = timeit (fn () => XML.tree_of_string s) + val tree = timeit (fn () => f s) val _ = writeln "done: tree" val x = timeit (fn () => input tree) val _ = writeln "done: input" @@ -255,6 +258,12 @@ x end +fun write_to_file x = to_file XML.string_of_tree x +fun read_from_file x = timeit (fn () => (writeln "read_from_file"; from_file XML.tree_of_string x)) + +fun serialize_to_file x = to_file XML.encoded_string_of_tree x +fun deserialize_from_file x = timeit (fn () => (writeln "deserialize_from_file"; from_file XML.tree_of_encoded_string x)) + end; structure XMLConvOutput =