robust representation of low ASCII control characters within XML/YXML text;
authorwenzelm
Thu, 17 Dec 2009 13:58:15 +0100
changeset 34095 c2f176a38448
parent 34094 61e19e96828f
child 34096 e438a5875c16
robust representation of low ASCII control characters within XML/YXML text;
src/Pure/General/symbol.ML
src/Pure/General/yxml.ML
--- a/src/Pure/General/symbol.ML	Wed Dec 16 15:15:39 2009 +0100
+++ b/src/Pure/General/symbol.ML	Thu Dec 17 13:58:15 2009 +0100
@@ -34,6 +34,7 @@
   val is_ascii_hex: symbol -> bool
   val is_ascii_quasi: symbol -> bool
   val is_ascii_blank: symbol -> bool
+  val is_ascii_control: symbol -> bool
   val is_ascii_lower: symbol -> bool
   val is_ascii_upper: symbol -> bool
   val to_ascii_lower: symbol -> symbol
@@ -163,6 +164,8 @@
   fn " " => true | "\t" => true | "\n" => true | "\^K" => true | "\^L" => true | "\^M" => true
     | _ => false;
 
+fun is_ascii_control s = is_char s andalso ord s < 32 andalso not (is_ascii_blank s);
+
 fun is_ascii_lower s = is_char s andalso (ord "a" <= ord s andalso ord s <= ord "z");
 fun is_ascii_upper s = is_char s andalso (ord "A" <= ord s andalso ord s <= ord "Z");
 
--- a/src/Pure/General/yxml.ML	Wed Dec 16 15:15:39 2009 +0100
+++ b/src/Pure/General/yxml.ML	Thu Dec 17 13:58:15 2009 +0100
@@ -15,6 +15,7 @@
 
 signature YXML =
 sig
+  val binary_text: string -> string
   val output_markup: Markup.T -> string * string
   val element: string -> XML.attributes -> string list -> string
   val string_of: XML.tree -> string
@@ -27,6 +28,19 @@
 
 (** string representation **)
 
+(* binary_text -- idempotent recoding *)
+
+fun pseudo_utf8 c =
+  if Symbol.is_ascii_control c
+  then chr 192 ^ chr (128 + ord c)
+  else c;
+
+fun binary_text str =
+  if exists_string Symbol.is_ascii_control str
+  then translate_string pseudo_utf8 str
+  else str;
+
+
 (* markers *)
 
 val X = Symbol.ENQ;