src/HOL/Import/xml.ML
author wenzelm
Wed, 15 Apr 2009 11:14:48 +0200
changeset 30895 bad26d8f0adf
parent 26537 188961eb1f08
child 32952 aeb1e44fbc19
permissions -rw-r--r--
updated for Isabelle2009;
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24584
01e83ffa6c54 fixed title
haftmann
parents: 23784
diff changeset
     1
(*  Title:      HOL/Import/xml.ML
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     2
    ID:         $Id$
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     3
26537
188961eb1f08 tuned comments;
wenzelm
parents: 24584
diff changeset
     4
Yet another version of XML support.
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     5
*)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     6
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     7
signature XML =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     8
sig
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     9
  val header: string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    10
  val text: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    11
  val text_charref: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    12
  val cdata: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    13
  val element: string -> (string * string) list -> string list -> string
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    14
  
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    15
  datatype tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    16
      Elem of string * (string * string) list * tree list
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    17
    | Text of string
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    18
  
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    19
  val string_of_tree: tree -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    20
  val tree_of_string: string -> tree
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    21
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    22
  val encoded_string_of_tree : tree -> string
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    23
  val tree_of_encoded_string : string -> tree
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    24
end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    25
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    26
structure XML :> XML =
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    27
struct
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    28
19093
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    29
(*structure Seq = VectorScannerSeq
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    30
structure Scan = Scanner (structure Seq = Seq)*)
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    31
structure Seq = StringScannerSeq
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    32
structure Scan = StringScanner
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    33
19095
9497f7b174be replaced Symbol.explode by explode
obua
parents: 19093
diff changeset
    34
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    35
open Scan
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    36
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    37
(** string based representation (small scale) **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    38
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    39
val header = "<?xml version=\"1.0\"?>\n";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    40
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    41
(* text and character data *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    42
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    43
fun decode "&lt;" = "<"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    44
  | decode "&gt;" = ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    45
  | decode "&amp;" = "&"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    46
  | decode "&apos;" = "'"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    47
  | decode "&quot;" = "\""
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    48
  | decode c = c;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    49
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    50
fun encode "<" = "&lt;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    51
  | encode ">" = "&gt;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    52
  | encode "&" = "&amp;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    53
  | encode "'" = "&apos;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    54
  | encode "\"" = "&quot;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    55
  | encode c = c;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    56
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    57
fun encode_charref c = "&#" ^ Int.toString (ord c) ^ ";"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    58
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    59
val text = Library.translate_string encode
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    60
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    61
val text_charref = translate_string encode_charref;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    62
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    63
val cdata = enclose "<![CDATA[" "]]>\n"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    64
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    65
(* elements *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    66
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    67
fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    68
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    69
fun element name atts cs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    70
  let val elem = space_implode " " (name :: map attribute atts) in
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    71
    if null cs then enclose "<" "/>" elem
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    72
    else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    73
  end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    74
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    75
(** explicit XML trees **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    76
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    77
datatype tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    78
    Elem of string * (string * string) list * tree list
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    79
  | Text of string;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    80
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    81
fun string_of_tree tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    82
  let
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    83
    fun string_of (Elem (name, atts, ts)) buf =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    84
        let val buf' =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    85
          buf |> Buffer.add "<"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    86
          |> fold Buffer.add (separate " " (name :: map attribute atts))
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    87
        in
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    88
          if null ts then
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    89
            buf' |> Buffer.add "/>"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    90
          else
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    91
            buf' |> Buffer.add ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    92
            |> fold string_of ts
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    93
            |> Buffer.add "</" |> Buffer.add name |> Buffer.add ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    94
        end
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    95
      | string_of (Text s) buf = Buffer.add (text s) buf;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    96
  in Buffer.content (string_of tree Buffer.empty) end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    97
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    98
(** XML parsing **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    99
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   100
fun beginning n xs = Symbol.beginning n (Seq.take_at_most xs n)
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   101
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   102
fun err s xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   103
  "XML parsing error: " ^ s ^ "\nfound: " ^ quote (beginning 100 xs) ;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   104
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   105
val scan_whspc = Scan.any Symbol.is_blank;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   106
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   107
val scan_special = $$ "&" ^^ scan_id ^^ $$ ";" >> decode;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   108
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   109
val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   110
  (scan_special || Scan.one Symbol.is_regular)) >> implode;
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   111
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   112
val parse_cdata = Scan.this_string "<![CDATA[" |--
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   113
  (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   114
    implode) --| Scan.this_string "]]>";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   115
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   116
val parse_att =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   117
    scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   118
    (($$ "\"" || $$ "'") :-- (fn s => (Scan.repeat (Scan.unless ($$ s)
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   119
    (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s) >> snd);
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   120
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   121
val parse_comment = Scan.this_string "<!--" --
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   122
  Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   123
  Scan.this_string "-->";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   124
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   125
val scan_comment_whspc = 
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   126
    (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   127
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   128
val parse_pi = Scan.this_string "<?" |--
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   129
  Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   130
  Scan.this_string "?>";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   131
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   132
fun parse_content xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   133
  ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   134
    (Scan.repeat ((* scan_whspc |-- *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   135
       (   parse_elem >> single
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   136
        || parse_cdata >> (single o Text)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   137
        || parse_pi >> K []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   138
        || parse_comment >> K []) --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   139
       Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   140
         >> op @) >> List.concat) >> op @)(* --| scan_whspc*)) xs
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   141
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   142
and parse_elem xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   143
  ($$ "<" |-- scan_id --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   144
    Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   145
      !! (err "Expected > or />")
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   146
        (Scan.this_string "/>" >> K []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   147
         || $$ ">" |-- parse_content --|
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   148
            !! (err ("Expected </" ^ s ^ ">"))
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   149
              (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   150
    (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   151
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   152
val parse_document =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   153
  Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   154
    (Scan.repeat (Scan.unless ($$ ">")
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   155
      (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   156
  parse_elem;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   157
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   158
fun tree_of_string s =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   159
    let
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   160
	val seq = Seq.fromString s
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   161
	val scanner = !! (err "Malformed element") (scan_whspc |-- parse_elem --| scan_whspc)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   162
	val (x, toks) = scanner seq
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   163
    in
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   164
	if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   165
    end
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   166
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   167
(* More efficient saving and loading of xml trees employing a proprietary external format *)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   168
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   169
fun write_lstring s buf = buf |> Buffer.add (string_of_int (size s)) |> Buffer.add " " |> Buffer.add s
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   170
fun parse_lstring toks = (scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i (one (K true))) >> snd >> implode) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   171
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   172
fun write_list w l buf = buf |> Buffer.add (string_of_int (length l)) |> Buffer.add " " |> fold w l     
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   173
fun parse_list sc = scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i sc) >> snd
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   174
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   175
fun write_tree (Text s) buf = buf |> Buffer.add "T" |> write_lstring s
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   176
  | write_tree (Elem (name, attrs, children)) buf = 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   177
    buf |> Buffer.add "E" 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   178
	|> write_lstring name 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   179
	|> write_list (fn (a,b) => fn buf => buf |> write_lstring a |> write_lstring b) attrs 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   180
	|> write_list write_tree children
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   181
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   182
fun parse_tree toks = (one (K true) :-- (fn "T" => parse_lstring >> Text | "E" => parse_elem | _ => raise SyntaxError) >> snd) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   183
and parse_elem toks = (parse_lstring -- parse_list (parse_lstring -- parse_lstring) -- parse_list parse_tree >> (fn ((n, a), c) => Elem (n,a,c))) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   184
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   185
fun encoded_string_of_tree tree = Buffer.content (write_tree tree Buffer.empty)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   186
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   187
fun tree_of_encoded_string s = 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   188
    let
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   189
	fun print (a,b) = writeln (a^" "^(string_of_int b))
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   190
	val _ = print ("length of encoded string: ", size s)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   191
	val _ = writeln "Seq.fromString..."
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   192
	val seq = timeit (fn () => Seq.fromString s)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   193
	val  _ = print ("length of sequence", timeit (fn () => Seq.length seq))
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   194
	val scanner = !! (err "Malformed encoded xml") parse_tree
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   195
	val (x, toks) = scanner seq
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   196
    in
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   197
	if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   198
    end	       
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   199
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   200
end;