# HG changeset patch # User wenzelm # Date 1261054695 -3600 # Node ID c2f176a38448181ba14e6900700589c694de9ad4 # Parent 61e19e96828f1fcc6f417719f44bdd566b3cfac4 robust representation of low ASCII control characters within XML/YXML text; diff -r 61e19e96828f -r c2f176a38448 src/Pure/General/symbol.ML --- a/src/Pure/General/symbol.ML Wed Dec 16 15:15:39 2009 +0100 +++ b/src/Pure/General/symbol.ML Thu Dec 17 13:58:15 2009 +0100 @@ -34,6 +34,7 @@ val is_ascii_hex: symbol -> bool val is_ascii_quasi: symbol -> bool val is_ascii_blank: symbol -> bool + val is_ascii_control: symbol -> bool val is_ascii_lower: symbol -> bool val is_ascii_upper: symbol -> bool val to_ascii_lower: symbol -> symbol @@ -163,6 +164,8 @@ fn " " => true | "\t" => true | "\n" => true | "\^K" => true | "\^L" => true | "\^M" => true | _ => false; +fun is_ascii_control s = is_char s andalso ord s < 32 andalso not (is_ascii_blank s); + fun is_ascii_lower s = is_char s andalso (ord "a" <= ord s andalso ord s <= ord "z"); fun is_ascii_upper s = is_char s andalso (ord "A" <= ord s andalso ord s <= ord "Z"); diff -r 61e19e96828f -r c2f176a38448 src/Pure/General/yxml.ML --- a/src/Pure/General/yxml.ML Wed Dec 16 15:15:39 2009 +0100 +++ b/src/Pure/General/yxml.ML Thu Dec 17 13:58:15 2009 +0100 @@ -15,6 +15,7 @@ signature YXML = sig + val binary_text: string -> string val output_markup: Markup.T -> string * string val element: string -> XML.attributes -> string list -> string val string_of: XML.tree -> string @@ -27,6 +28,19 @@ (** string representation **) +(* binary_text -- idempotent recoding *) + +fun pseudo_utf8 c = + if Symbol.is_ascii_control c + then chr 192 ^ chr (128 + ord c) + else c; + +fun binary_text str = + if exists_string Symbol.is_ascii_control str + then translate_string pseudo_utf8 str + else str; + + (* markers *) val X = Symbol.ENQ;