src/HOL/Import/xml.ML
author haftmann
Wed, 11 Aug 2010 17:59:33 +0200
changeset 38383 1ad96229b455
parent 32960 69916a850301
child 41491 a2ad5b824051
permissions -rw-r--r--
tuned whitespace
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24584
01e83ffa6c54 fixed title
haftmann
parents: 23784
diff changeset
     1
(*  Title:      HOL/Import/xml.ML
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     2
26537
188961eb1f08 tuned comments;
wenzelm
parents: 24584
diff changeset
     3
Yet another version of XML support.
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     4
*)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     5
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     6
signature XML =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     7
sig
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     8
  val header: string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
     9
  val text: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    10
  val text_charref: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    11
  val cdata: string -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    12
  val element: string -> (string * string) list -> string list -> string
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    13
  
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    14
  datatype tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    15
      Elem of string * (string * string) list * tree list
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    16
    | Text of string
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    17
  
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    18
  val string_of_tree: tree -> string
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    19
  val tree_of_string: string -> tree
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    20
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    21
  val encoded_string_of_tree : tree -> string
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    22
  val tree_of_encoded_string : string -> tree
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    23
end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    24
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    25
structure XML :> XML =
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    26
struct
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    27
19093
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    28
(*structure Seq = VectorScannerSeq
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    29
structure Scan = Scanner (structure Seq = Seq)*)
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    30
structure Seq = StringScannerSeq
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    31
structure Scan = StringScanner
6d584f9d2021 use monomorphic sequences / scanners
obua
parents: 19091
diff changeset
    32
19095
9497f7b174be replaced Symbol.explode by explode
obua
parents: 19093
diff changeset
    33
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    34
open Scan
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    35
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    36
(** string based representation (small scale) **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    37
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    38
val header = "<?xml version=\"1.0\"?>\n";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    39
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    40
(* text and character data *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    41
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    42
fun decode "&lt;" = "<"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    43
  | decode "&gt;" = ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    44
  | decode "&amp;" = "&"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    45
  | decode "&apos;" = "'"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    46
  | decode "&quot;" = "\""
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    47
  | decode c = c;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    48
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    49
fun encode "<" = "&lt;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    50
  | encode ">" = "&gt;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    51
  | encode "&" = "&amp;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    52
  | encode "'" = "&apos;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    53
  | encode "\"" = "&quot;"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    54
  | encode c = c;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    55
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    56
fun encode_charref c = "&#" ^ Int.toString (ord c) ^ ";"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    57
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    58
val text = Library.translate_string encode
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    59
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    60
val text_charref = translate_string encode_charref;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    61
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    62
val cdata = enclose "<![CDATA[" "]]>\n"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    63
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    64
(* elements *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    65
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    66
fun attribute (a, x) = a ^ " = \"" ^ text x ^ "\"";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    67
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    68
fun element name atts cs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    69
  let val elem = space_implode " " (name :: map attribute atts) in
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    70
    if null cs then enclose "<" "/>" elem
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    71
    else enclose "<" ">" elem ^ implode cs ^ enclose "</" ">" name
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    72
  end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    73
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    74
(** explicit XML trees **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    75
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    76
datatype tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    77
    Elem of string * (string * string) list * tree list
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    78
  | Text of string;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    79
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    80
fun string_of_tree tree =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    81
  let
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    82
    fun string_of (Elem (name, atts, ts)) buf =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    83
        let val buf' =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    84
          buf |> Buffer.add "<"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    85
          |> fold Buffer.add (separate " " (name :: map attribute atts))
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    86
        in
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    87
          if null ts then
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    88
            buf' |> Buffer.add "/>"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    89
          else
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    90
            buf' |> Buffer.add ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    91
            |> fold string_of ts
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    92
            |> Buffer.add "</" |> Buffer.add name |> Buffer.add ">"
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    93
        end
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    94
      | string_of (Text s) buf = Buffer.add (text s) buf;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    95
  in Buffer.content (string_of tree Buffer.empty) end;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    96
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    97
(** XML parsing **)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
    98
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
    99
fun beginning n xs = Symbol.beginning n (Seq.take_at_most xs n)
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   100
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   101
fun err s xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   102
  "XML parsing error: " ^ s ^ "\nfound: " ^ quote (beginning 100 xs) ;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   103
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   104
val scan_whspc = Scan.any Symbol.is_blank;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   105
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   106
val scan_special = $$ "&" ^^ scan_id ^^ $$ ";" >> decode;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   107
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   108
val parse_chars = Scan.repeat1 (Scan.unless ((* scan_whspc -- *)$$ "<")
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   109
  (scan_special || Scan.one Symbol.is_regular)) >> implode;
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   110
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   111
val parse_cdata = Scan.this_string "<![CDATA[" |--
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   112
  (Scan.repeat (Scan.unless (Scan.this_string "]]>") (Scan.one Symbol.is_regular)) >>
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   113
    implode) --| Scan.this_string "]]>";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   114
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   115
val parse_att =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   116
    scan_id --| scan_whspc --| $$ "=" --| scan_whspc --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   117
    (($$ "\"" || $$ "'") :-- (fn s => (Scan.repeat (Scan.unless ($$ s)
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   118
    (scan_special || Scan.one Symbol.is_regular)) >> implode) --| $$ s) >> snd);
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   119
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   120
val parse_comment = Scan.this_string "<!--" --
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   121
  Scan.repeat (Scan.unless (Scan.this_string "-->") (Scan.one Symbol.is_regular)) --
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   122
  Scan.this_string "-->";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   123
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   124
val scan_comment_whspc = 
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   125
    (scan_whspc >> K()) --| (Scan.repeat (parse_comment |-- (scan_whspc >> K())));
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   126
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   127
val parse_pi = Scan.this_string "<?" |--
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   128
  Scan.repeat (Scan.unless (Scan.this_string "?>") (Scan.one Symbol.is_regular)) --|
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   129
  Scan.this_string "?>";
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   130
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   131
fun parse_content xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   132
  ((Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) [] --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   133
    (Scan.repeat ((* scan_whspc |-- *)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   134
       (   parse_elem >> single
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   135
        || parse_cdata >> (single o Text)
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   136
        || parse_pi >> K []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   137
        || parse_comment >> K []) --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   138
       Scan.optional ((* scan_whspc |-- *) parse_chars >> (single o Text)) []
32952
aeb1e44fbc19 replaced String.concat by implode;
wenzelm
parents: 26537
diff changeset
   139
         >> op @) >> flat) >> op @)(* --| scan_whspc*)) xs
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   140
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   141
and parse_elem xs =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   142
  ($$ "<" |-- scan_id --
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   143
    Scan.repeat (scan_whspc |-- parse_att) --| scan_whspc :-- (fn (s, _) =>
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   144
      !! (err "Expected > or />")
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   145
        (Scan.this_string "/>" >> K []
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   146
         || $$ ">" |-- parse_content --|
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   147
            !! (err ("Expected </" ^ s ^ ">"))
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   148
              (Scan.this_string ("</" ^ s) --| scan_whspc --| $$ ">"))) >>
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   149
    (fn ((s, atts), ts) => Elem (s, atts, ts))) xs;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   150
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   151
val parse_document =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   152
  Scan.option (Scan.this_string "<!DOCTYPE" -- scan_whspc |--
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   153
    (Scan.repeat (Scan.unless ($$ ">")
23784
75e6b9dd5336 Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents: 19095
diff changeset
   154
      (Scan.one Symbol.is_regular)) >> implode) --| $$ ">" --| scan_whspc) --
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   155
  parse_elem;
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   156
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   157
fun tree_of_string s =
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   158
    let
32960
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   159
        val seq = Seq.fromString s
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   160
        val scanner = !! (err "Malformed element") (scan_whspc |-- parse_elem --| scan_whspc)
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   161
        val (x, toks) = scanner seq
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   162
    in
32960
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   163
        if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   164
    end
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   165
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   166
(* More efficient saving and loading of xml trees employing a proprietary external format *)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   167
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   168
fun write_lstring s buf = buf |> Buffer.add (string_of_int (size s)) |> Buffer.add " " |> Buffer.add s
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   169
fun parse_lstring toks = (scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i (one (K true))) >> snd >> implode) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   170
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   171
fun write_list w l buf = buf |> Buffer.add (string_of_int (length l)) |> Buffer.add " " |> fold w l     
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   172
fun parse_list sc = scan_nat --| one Symbol.is_blank :-- (fn i => repeat_fixed i sc) >> snd
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   173
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   174
fun write_tree (Text s) buf = buf |> Buffer.add "T" |> write_lstring s
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   175
  | write_tree (Elem (name, attrs, children)) buf = 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   176
    buf |> Buffer.add "E" 
32960
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   177
        |> write_lstring name 
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   178
        |> write_list (fn (a,b) => fn buf => buf |> write_lstring a |> write_lstring b) attrs 
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   179
        |> write_list write_tree children
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   180
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   181
fun parse_tree toks = (one (K true) :-- (fn "T" => parse_lstring >> Text | "E" => parse_elem | _ => raise SyntaxError) >> snd) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   182
and parse_elem toks = (parse_lstring -- parse_list (parse_lstring -- parse_lstring) -- parse_list parse_tree >> (fn ((n, a), c) => Elem (n,a,c))) toks
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   183
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   184
fun encoded_string_of_tree tree = Buffer.content (write_tree tree Buffer.empty)
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   185
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   186
fun tree_of_encoded_string s = 
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   187
    let
32960
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   188
        fun print (a,b) = writeln (a^" "^(string_of_int b))
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   189
        val _ = print ("length of encoded string: ", size s)
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   190
        val _ = writeln "Seq.fromString..."
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   191
        val seq = timeit (fn () => Seq.fromString s)
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   192
        val  _ = print ("length of sequence", timeit (fn () => Seq.length seq))
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   193
        val scanner = !! (err "Malformed encoded xml") parse_tree
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   194
        val (x, toks) = scanner seq
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   195
    in
32960
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   196
        if Seq.null toks then x else error ("Unprocessed input: '"^(beginning 100 toks)^"'")
69916a850301 eliminated hard tabulators, guessing at each author's individual tab-width;
wenzelm
parents: 32952
diff changeset
   197
    end        
19089
2e487fe9593a improved scanning
obua
parents: 19064
diff changeset
   198
19064
bf19cc5a7899 fixed bugs, added caching
obua
parents:
diff changeset
   199
end;