src/HOL/Import/xml.ML
author haftmann
Sat, 15 Sep 2007 19:27:35 +0200
changeset 24584 01e83ffa6c54
parent 23784 75e6b9dd5336
child 26537 188961eb1f08
permissions -rw-r--r--
fixed title
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24584
01e83ffa6c54 fixed title
haftmann
parents: 23784
diff changeset
     1
(*  Title:      HOL/Import/xml.ML
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     2
    ID:         $Id$
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     3
    Author:     David Aspinall, Stefan Berghofer and Markus Wenzel
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     4
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     5
Basic support for XML.
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     6
*)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     7
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     8
signature XML =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     9
sig
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    10
  val header: string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    11
  val text: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    12
  val text_charref: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    13
  val cdata: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    14
  val element: string -> (string * string) list -> string list -> string
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    15
  
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    16
  datatype tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    17
      Elem of string * (string * string) list * tree list
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    18
    | Text of string
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    19
  
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    20
  val string_of_tree: tree -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    21
  val tree_of_string: string -> tree
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    22
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    23
  val encoded_string_of_tree : tree -> string
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    24
  val tree_of_encoded_string : string -> tree
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    25
end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    26
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    27
structure XML :> XML =
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    28
struct
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    29
19093
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    30
(*structure Seq = VectorScannerSeq
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    31
structure Scan = Scanner (structure Seq = Seq)*)
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    32
structure Seq = StringScannerSeq
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    33
structure Scan = StringScanner
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    34
19095
9497f7b174be replaced Symbol.explode by explode
obua
parents: 19093
diff changeset
    35
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    36
open Scan
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    37
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    38
(** string based representation (small scale) **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    39
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    40
val header = "<?xml version=\"1.0\"?>\n";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    41
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    42
(* text and character data *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    43
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    44
fun decode "&lt;" = "<"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    45
  | decode "&gt;" = ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    46
  | decode "&amp;" = "&"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    47
  | decode "&apos;" = "'"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    48
  | decode "&quot;" = "\""
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    49
  | decode c = c;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    50
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    51
fun encode "<" = "&lt;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    52
  | encode ">" = "&gt;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    53
  | encode "&" = "&amp;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    54
  | encode "'" = "&apos;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    55
  | encode "\"" = "&quot;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    56
  | encode c = c;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    57
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    58
fun encode_charref c = "&#" ^ Int.toString (ord c) ^ ";"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    59
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    60
val text = Library.translate_string encode
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    61
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    62
val text_charref = translate_string encode_charref;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    63
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    64
val cdata = enclose "<![CDATA[" "]]>\n"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    65
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    66
(* elements *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    67
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    68
fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    69
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    70
fun element name atts cs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    71
  let val elem = space_implode " " (name :: map attribute atts) in
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    72
    if null cs then enclose "<" "/>" elem
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    73
    else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    74
  end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    75
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    76
(** explicit XML trees **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    77
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    78
datatype tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    79
    Elem of string * (string * string) list * tree list
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    80
  | Text of string;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    81
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    82
fun string_of_tree tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    83
  let
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    84
    fun string_of (Elem (name, atts, ts)) buf =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    85
        let val buf' =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    86
          buf |> Buffer.add "<"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    87
          |> fold Buffer.add (separate " " (name :: map attribute atts))
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    88
        in
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    89
          if null ts then
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    90
            buf' |> Buffer.add "/>"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    91
          else
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    92
            buf' |> Buffer.add ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    93
            |> fold string_of ts
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    94
            |> Buffer.add "</" |> Buffer.add name |> Buffer.add ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    95
        end
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    96
      | string_of (Text s) buf = Buffer.add (text s) buf;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    97
  in Buffer.content (string_of tree Buffer.empty) end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    98
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    99
(** XML parsing **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   100
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   101
fun beginning n xs = Symbol.beginning n (Seq.take_at_most xs n)
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   102
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   103
fun err s xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   104
  "XML parsing error: " ^ s ^ "\nfound: " ^ quote (beginning 100 xs) ;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   105
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   106
val scan_whspc = Scan.any Symbol.is_blank;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   107
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   108
val scan_special = $$ "&" ^^ scan_id ^^ $$ ";" >> decode;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   109
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   110
val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   111
  (scan_special || Scan.one Symbol.is_regular)) >> implode;
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   112
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   113
val parse_cdata = Scan.this_string "<![CDATA[" |--
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   114
  (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   115
    implode) --| Scan.this_string "]]>";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   116
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   117
val parse_att =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   118
    scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   119
    (($$ "\"" || $$ "'") :-- (fn s => (Scan.repeat (Scan.unless ($$ s)
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   120
    (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s) >> snd);
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   121
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   122
val parse_comment = Scan.this_string "<!--" --
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   123
  Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   124
  Scan.this_string "-->";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   125
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   126
val scan_comment_whspc = 
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   127
    (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   128
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   129
val parse_pi = Scan.this_string "<?" |--
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   130
  Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   131
  Scan.this_string "?>";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   132
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   133
fun parse_content xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   134
  ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   135
    (Scan.repeat ((* scan_whspc |-- *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   136
       (   parse_elem >> single
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   137
        || parse_cdata >> (single o Text)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   138
        || parse_pi >> K []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   139
        || parse_comment >> K []) --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   140
       Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   141
         >> op @) >> List.concat) >> op @)(* --| scan_whspc*)) xs
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   142
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   143
and parse_elem xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   144
  ($$ "<" |-- scan_id --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   145
    Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   146
      !! (err "Expected > or />")
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   147
        (Scan.this_string "/>" >> K []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   148
         || $$ ">" |-- parse_content --|
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   149
            !! (err ("Expected </" ^ s ^ ">"))
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   150
              (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   151
    (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   152
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   153
val parse_document =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   154
  Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   155
    (Scan.repeat (Scan.unless ($$ ">")
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   156
      (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   157
  parse_elem;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   158
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   159
fun tree_of_string s =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   160
    let
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   161
	val seq = Seq.fromString s
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   162
	val scanner = !! (err "Malformed element") (scan_whspc |-- parse_elem --| scan_whspc)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   163
	val (x, toks) = scanner seq
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   164
    in
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   165
	if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   166
    end
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   167
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   168
(* More efficient saving and loading of xml trees employing a proprietary external format *)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   169
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   170
fun write_lstring s buf = buf |> Buffer.add (string_of_int (size s)) |> Buffer.add " " |> Buffer.add s
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   171
fun parse_lstring toks = (scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i (one (K true))) >> snd >> implode) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   172
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   173
fun write_list w l buf = buf |> Buffer.add (string_of_int (length l)) |> Buffer.add " " |> fold w l     
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   174
fun parse_list sc = scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i sc) >> snd
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   175
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   176
fun write_tree (Text s) buf = buf |> Buffer.add "T" |> write_lstring s
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   177
  | write_tree (Elem (name, attrs, children)) buf = 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   178
    buf |> Buffer.add "E" 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   179
	|> write_lstring name 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   180
	|> write_list (fn (a,b) => fn buf => buf |> write_lstring a |> write_lstring b) attrs 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   181
	|> write_list write_tree children
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   182
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   183
fun parse_tree toks = (one (K true) :-- (fn "T" => parse_lstring >> Text | "E" => parse_elem | _ => raise SyntaxError) >> snd) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   184
and parse_elem toks = (parse_lstring -- parse_list (parse_lstring -- parse_lstring) -- parse_list parse_tree >> (fn ((n, a), c) => Elem (n,a,c))) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   185
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   186
fun encoded_string_of_tree tree = Buffer.content (write_tree tree Buffer.empty)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   187
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   188
fun tree_of_encoded_string s = 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   189
    let
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   190
	fun print (a,b) = writeln (a^" "^(string_of_int b))
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   191
	val _ = print ("length of encoded string: ", size s)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   192
	val _ = writeln "Seq.fromString..."
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   193
	val seq = timeit (fn () => Seq.fromString s)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   194
	val  _ = print ("length of sequence", timeit (fn () => Seq.length seq))
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   195
	val scanner = !! (err "Malformed encoded xml") parse_tree
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   196
	val (x, toks) = scanner seq
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   197
    in
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   198
	if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   199
    end	       
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   200
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   201
end;