# HG changeset patch # User noschinl # Date 1302722610 -7200 # Node ID 7a1655920fe8f589061c3bb07b580f3167159a0f # Parent 782991e4180dc97905463b4c1326ffe6d9e90031 Add YXML.parse_file to parse and process big data files diff -r 782991e4180d -r 7a1655920fe8 src/Pure/General/yxml.ML --- a/src/Pure/General/yxml.ML Wed Apr 13 20:43:00 2011 +0200 +++ b/src/Pure/General/yxml.ML Wed Apr 13 21:23:30 2011 +0200 @@ -106,6 +106,7 @@ fun pop ((("", _), _) :: _) = err_unbalanced "" | pop ((markup, body) :: pending) = add (XML.Elem (markup, rev body)) pending; +val stack_init = [(("", []), [])]; (* parsing *) @@ -119,10 +120,16 @@ | parse_chunk ("" :: name :: atts) = push name (map parse_attrib atts) | parse_chunk txts = fold (add o XML.Text) txts; +fun preparse_chunk _ "" x = x + | preparse_chunk f str (pending, results) = + (case parse_chunk (String.fields (is_char Y) str) pending of + [(("", _), [result])] => (stack_init, f result :: results) + | foo => (foo, results)); + in fun parse_body source = - (case fold parse_chunk (split_string source) [(("", []), [])] of + (case fold parse_chunk (split_string source) stack_init of [(("", _), result)] => rev result | ((name, _), _) :: _ => err_unbalanced name); @@ -132,6 +139,12 @@ | [] => XML.Text "" | _ => err "multiple results"); +fun parse_file' f path = + (case File.fold_fields (is_char X) (preparse_chunk f) + path (stack_init, []) of + ([(("", _), [])], result) => rev result + | (((name, _), _) :: _, _) => err_unbalanced name); + end; end;