# HG changeset patch # User wenzelm # Date 1278444794 -7200 # Node ID 5d2b3e8273718144ca21ab5477e2ec94887f55b7 # Parent 8244558af8a55148b3c3ce8f0067795024110fa8 implode pseudo utf8, i.e. decode byte-stuffed low ASCII characters; diff -r 8244558af8a5 -r 5d2b3e827371 src/Pure/General/symbol.ML --- a/src/Pure/General/symbol.ML Tue Jul 06 10:02:24 2010 +0200 +++ b/src/Pure/General/symbol.ML Tue Jul 06 21:33:14 2010 +0200 @@ -432,6 +432,10 @@ fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192; +fun implode_pseudo_utf8 (cs as ["\192", c]) = + if ord c < 160 then chr (ord c - 128) else implode cs + | implode_pseudo_utf8 cs = implode cs; + val scan_encoded_newline = $$ "\^M" -- $$ "\n" >> K "\n" || $$ "\^M" >> K "\n" || @@ -443,7 +447,7 @@ val scan = Scan.one is_plain || - Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode || + Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 || scan_encoded_newline || ($$ "\\" ^^ $$ "<" ^^ !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))