merged
authorblanchet
Mon, 28 Jun 2010 08:55:46 +0200
changeset 37586 a209fff74792
parent 37561 19fca77829ea (diff)
parent 37585 c2ed8112ce57 (current diff)
child 37587 466031e057a0
merged
src/HOL/Tools/Sledgehammer/sledgehammer_fact_preprocessor.ML
src/HOL/Tools/Sledgehammer/sledgehammer_fol_clause.ML
src/HOL/Tools/Sledgehammer/sledgehammer_hol_clause.ML
--- a/NEWS	Fri Jun 25 23:35:14 2010 +0200
+++ b/NEWS	Mon Jun 28 08:55:46 2010 +0200
@@ -4,6 +4,17 @@
 New in this Isabelle version
 ----------------------------
 
+*** General ***
+
+* Explicit treatment of UTF8 sequences as Isabelle symbols, such that
+a Unicode character is treated as a single symbol, not a sequence of
+non-ASCII bytes as before.  Since Isabelle/ML string literals may
+contain symbols without further backslash escapes, Unicode can now be
+used here as well.  Recall that Symbol.explode in ML provides a
+consistent view on symbols, while raw explode (or String.explode)
+merely give a byte-oriented representation.
+
+
 *** HOL ***
 
 * Some previously unqualified names have been qualified:
--- a/doc-src/IsarImplementation/Thy/Prelim.thy	Fri Jun 25 23:35:14 2010 +0200
+++ b/doc-src/IsarImplementation/Thy/Prelim.thy	Mon Jun 28 08:55:46 2010 +0200
@@ -527,8 +527,10 @@
 
   \begin{enumerate}
 
-  \item a single ASCII character ``@{text "c"}'' or raw byte in the
-  range of 128\dots 255, for example ``\verb,a,'',
+  \item a single ASCII character ``@{text "c"}'', for example
+  ``\verb,a,'',
+
+  \item a codepoint according to UTF8 (non-ASCII byte sequence),
 
   \item a regular symbol ``\verb,\,\verb,<,@{text "ident"}\verb,>,'',
   for example ``\verb,\,\verb,<alpha>,'',
@@ -555,19 +557,17 @@
   ``\verb,\,\verb,<alpha>,'' is classified as a letter, which means it
   may occur within regular Isabelle identifiers.
 
-  Since the character set underlying Isabelle symbols is 7-bit ASCII
-  and 8-bit characters are passed through transparently, Isabelle can
-  also process Unicode/UCS data in UTF-8 encoding.\footnote{When
-  counting precise source positions internally, bytes in the range of
-  128\dots 191 are ignored.  In UTF-8 encoding, this interval covers
-  the additional trailer bytes, so Isabelle happens to count Unicode
-  characters here, not bytes in memory.  In ISO-Latin encoding, the
-  ignored range merely includes some extra punctuation characters that
-  even have replacements within the standard collection of Isabelle
-  symbols; the accented letters range is counted properly.} Unicode
-  provides its own collection of mathematical symbols, but within the
-  core Isabelle/ML world there is no link to the standard collection
-  of Isabelle regular symbols.
+  The character set underlying Isabelle symbols is 7-bit ASCII, but
+  8-bit character sequences are passed-through unchanged.  Unicode/UCS
+  data in UTF-8 encoding is processed in a non-strict fashion, such
+  that well-formed code sequences are recognized
+  accordingly.\footnote{Note that ISO-Latin-1 differs from UTF-8 only
+  in some special punctuation characters that even have replacements
+  within the standard collection of Isabelle symbols.  Text consisting
+  of ASCII plus accented letters can be processed in either encoding.}
+  Unicode provides its own collection of mathematical symbols, but
+  within the core Isabelle/ML world there is no link to the standard
+  collection of Isabelle regular symbols.
 
   \medskip Output of Isabelle symbols depends on the print mode
   (\secref{print-mode}).  For example, the standard {\LaTeX} setup of
@@ -612,8 +612,8 @@
 
   \item @{ML_type "Symbol.sym"} is a concrete datatype that represents
   the different kinds of symbols explicitly, with constructors @{ML
-  "Symbol.Char"}, @{ML "Symbol.Sym"}, @{ML "Symbol.Ctrl"}, @{ML
-  "Symbol.Raw"}.
+  "Symbol.Char"}, @{ML "Symbol.Sym"}, @{ML "Symbol.UTF8"}, @{ML
+  "Symbol.Ctrl"}, @{ML "Symbol.Raw"}.
 
   \item @{ML "Symbol.decode"} converts the string representation of a
   symbol into the datatype version.
--- a/doc-src/IsarImplementation/Thy/document/Prelim.tex	Fri Jun 25 23:35:14 2010 +0200
+++ b/doc-src/IsarImplementation/Thy/document/Prelim.tex	Mon Jun 28 08:55:46 2010 +0200
@@ -648,8 +648,10 @@
 
   \begin{enumerate}
 
-  \item a single ASCII character ``\isa{c}'' or raw byte in the
-  range of 128\dots 255, for example ``\verb,a,'',
+  \item a single ASCII character ``\isa{c}'', for example
+  ``\verb,a,'',
+
+  \item a codepoint according to UTF8 (non-ASCII byte sequence),
 
   \item a regular symbol ``\verb,\,\verb,<,\isa{ident}\verb,>,'',
   for example ``\verb,\,\verb,<alpha>,'',
@@ -673,19 +675,17 @@
   ``\verb,\,\verb,<alpha>,'' is classified as a letter, which means it
   may occur within regular Isabelle identifiers.
 
-  Since the character set underlying Isabelle symbols is 7-bit ASCII
-  and 8-bit characters are passed through transparently, Isabelle can
-  also process Unicode/UCS data in UTF-8 encoding.\footnote{When
-  counting precise source positions internally, bytes in the range of
-  128\dots 191 are ignored.  In UTF-8 encoding, this interval covers
-  the additional trailer bytes, so Isabelle happens to count Unicode
-  characters here, not bytes in memory.  In ISO-Latin encoding, the
-  ignored range merely includes some extra punctuation characters that
-  even have replacements within the standard collection of Isabelle
-  symbols; the accented letters range is counted properly.} Unicode
-  provides its own collection of mathematical symbols, but within the
-  core Isabelle/ML world there is no link to the standard collection
-  of Isabelle regular symbols.
+  The character set underlying Isabelle symbols is 7-bit ASCII, but
+  8-bit character sequences are passed-through unchanged.  Unicode/UCS
+  data in UTF-8 encoding is processed in a non-strict fashion, such
+  that well-formed code sequences are recognized
+  accordingly.\footnote{Note that ISO-Latin-1 differs from UTF-8 only
+  in some special punctuation characters that even have replacements
+  within the standard collection of Isabelle symbols.  Text consisting
+  of ASCII plus accented letters can be processed in either encoding.}
+  Unicode provides its own collection of mathematical symbols, but
+  within the core Isabelle/ML world there is no link to the standard
+  collection of Isabelle regular symbols.
 
   \medskip Output of Isabelle symbols depends on the print mode
   (\secref{print-mode}).  For example, the standard {\LaTeX} setup of
@@ -733,7 +733,7 @@
   \cite{isabelle-isar-ref}.
 
   \item \verb|Symbol.sym| is a concrete datatype that represents
-  the different kinds of symbols explicitly, with constructors \verb|Symbol.Char|, \verb|Symbol.Sym|, \verb|Symbol.Ctrl|, \verb|Symbol.Raw|.
+  the different kinds of symbols explicitly, with constructors \verb|Symbol.Char|, \verb|Symbol.Sym|, \verb|Symbol.UTF8|, \verb|Symbol.Ctrl|, \verb|Symbol.Raw|.
 
   \item \verb|Symbol.decode| converts the string representation of a
   symbol into the datatype version.
--- a/src/HOL/Tools/Predicate_Compile/predicate_compile_core.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/HOL/Tools/Predicate_Compile/predicate_compile_core.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -2535,8 +2535,8 @@
       THEN (rtac pred_intro_rule
       (* How to handle equality correctly? *)
       THEN_ALL_NEW (K (print_tac options "state before assumption matching")
-      THEN' (fn i => REPEAT (atac i ORELSE (CHANGED (asm_full_simp_tac split_ss i)
-        THEN print_tac options "state after pre-simplification:")))
+      THEN' (atac ORELSE' ((CHANGED o asm_full_simp_tac split_ss) THEN' (TRY o atac)))
+        THEN' (K (print_tac options "state after pre-simplification:"))
       THEN' (K (print_tac options "state after assumption matching:")))) 1)
     | prove_prems2 out_ts ((p, deriv) :: ps) =
       let
--- a/src/HOL/Tools/Quotient/quotient_def.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/HOL/Tools/Quotient/quotient_def.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -12,7 +12,7 @@
   val quotdef_cmd: (binding option * mixfix) * (Attrib.binding * (string * string)) ->
     local_theory -> Quotient_Info.qconsts_info * local_theory
 
-  val lift_raw_const: typ list -> string * term -> local_theory ->
+  val lift_raw_const: typ list -> (string * term * mixfix) -> local_theory ->
     Quotient_Info.qconsts_info * local_theory
 end;
 
@@ -88,13 +88,13 @@
 end
 
 (* a wrapper for automatically lifting a raw constant *)
-fun lift_raw_const qtys (qconst_name, rconst) ctxt =
+fun lift_raw_const qtys (qconst_name, rconst, mx) ctxt =
 let
   val rty = fastype_of rconst
   val qty = derive_qtyp qtys rty ctxt
+  val lhs = Free (qconst_name, qty)
 in
-  quotient_def ((NONE, NoSyn), (Attrib.empty_binding,
-    (Free (qconst_name, qty), rconst))) ctxt
+  quotient_def ((NONE, mx), (Attrib.empty_binding, (lhs, rconst))) ctxt
 end
 
 (* parser and command *)
--- a/src/HOL/Tools/Quotient/quotient_tacs.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/HOL/Tools/Quotient/quotient_tacs.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -649,13 +649,21 @@
      so we do it both in the original theorem *)
   val thm' = Drule.eta_contraction_rule thm
   val ((_, [thm'']), ctxt') = Variable.import false [thm'] ctxt
-  val goal = (quotient_lift_all qtys ctxt' o prop_of) thm''
+  val goal = derive_qtrm qtys (prop_of thm'') ctxt' 
 in
   Goal.prove ctxt' [] [] goal (K (lift_tac ctxt' [thm'] 1))
   |> singleton (ProofContext.export ctxt' ctxt)
 end;
 
-(* An Attribute which automatically constructs the qthm *)
-val lifted_attrib = Thm.rule_attribute (fn ctxt => lifted [] (Context.proof_of ctxt))
+(* An attribute which automatically constructs the qthm 
+   using all quotients defined so far.
+*)
+val lifted_attrib = Thm.rule_attribute (fn context => 
+       let
+         val ctxt = Context.proof_of context
+         val qtys = map #qtyp (Quotient_Info.quotdata_dest ctxt)
+       in
+         lifted qtys ctxt
+       end)
 
 end; (* structure *)
--- a/src/HOL/Tools/Quotient/quotient_term.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/HOL/Tools/Quotient/quotient_term.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -27,7 +27,7 @@
   val inj_repabs_trm_chk: Proof.context -> term * term -> term
 
   val derive_qtyp: typ list -> typ -> Proof.context -> typ
-  val quotient_lift_all: typ list -> Proof.context -> term -> term
+  val derive_qtrm: typ list -> term -> Proof.context -> term
 end;
 
 structure Quotient_Term: QUOTIENT_TERM =
@@ -692,94 +692,85 @@
 
 (* subst_tys takes a list of (rty, qty) substitution pairs
    and replaces all occurences of rty in the given type
-   by appropriate qty, with substitution *)
-fun subst_ty thy ty (rty, qty) r =
-  if r <> NONE then r else
-  case try (Sign.typ_match thy (rty, ty)) Vartab.empty of
-    SOME inst => SOME (Envir.subst_type inst qty)
-  | NONE => NONE
-fun subst_tys thy substs ty =
-  case fold (subst_ty thy ty) substs NONE of
-    SOME ty => ty
-  | NONE =>
-      (case ty of
-        Type (s, tys) => Type (s, map (subst_tys thy substs) tys)
-      | x => x)
+   by an appropriate qty
+*)
+fun subst_rtyp ctxt ty_subst rty =
+  case rty of
+    Type (s, rtys) =>
+      let
+        val thy = ProofContext.theory_of ctxt
+        val rty' = Type (s, map (subst_rtyp ctxt ty_subst) rtys)
+
+        fun matches [] = rty'
+          | matches ((rty, qty)::tail) =
+              case try (Sign.typ_match thy (rty, rty')) Vartab.empty of
+                NONE => matches tail
+              | SOME inst => Envir.subst_type inst qty
+      in
+        matches ty_subst 
+      end 
+  | _ => rty
+
+(* subst_trms takes a list of (rconst, qconst) substitution pairs,
+   as well as (rty, qty) substitution pairs, and replaces all
+   occurences of rconst and rty by appropriate instances of
+   qconst and qty
+*)
+fun subst_rtrm ctxt ty_subst trm_subst rtrm =
+  case rtrm of
+    t1 $ t2 => (subst_rtrm ctxt ty_subst trm_subst t1) $ (subst_rtrm ctxt ty_subst trm_subst t2)
+  | Abs (x, ty, t) => Abs (x, subst_rtyp ctxt ty_subst ty, subst_rtrm ctxt ty_subst trm_subst t)
+  | Free(n, ty) => Free(n, subst_rtyp ctxt ty_subst ty)
+  | Var(n, ty) => Var(n, subst_rtyp ctxt ty_subst ty)
+  | Bound i => Bound i
+  | Const (a, ty) => 
+      let
+        val thy = ProofContext.theory_of ctxt
 
-(* subst_trms takes a list of (rtrm, qtrm) substitution pairs
-   and if the given term matches any of the raw terms it
-   returns the appropriate qtrm instantiated. If none of
-   them matched it returns NONE. *)
-fun subst_trm thy t (rtrm, qtrm) s =
-  if s <> NONE then s else
-    case try (Pattern.match thy (rtrm, t)) (Vartab.empty, Vartab.empty) of
-      SOME inst => SOME (Envir.subst_term inst qtrm)
-    | NONE => NONE;
-fun subst_trms thy substs t = fold (subst_trm thy t) substs NONE
+        fun matches [] = Const (a, subst_rtyp ctxt ty_subst ty)
+          | matches ((rconst, qconst)::tail) =
+              case try (Pattern.match thy (rconst, rtrm)) (Vartab.empty, Vartab.empty) of
+                NONE => matches tail
+              | SOME inst => Envir.subst_term inst qconst
+      in
+        matches trm_subst
+      end
+
+(* generates type substitutions out of the
+   types involved in a quotient
+*)
+fun get_ty_subst qtys ctxt =
+  Quotient_Info.quotdata_dest ctxt
+   |> map (fn x => (#rtyp x, #qtyp x))
+   |> filter (fn (_, qty) => member (op =) qtys qty)
 
-(* prepares type and term substitution pairs to be used by above
-   functions that let replace all raw constructs by appropriate
-   lifted counterparts. *)
-fun get_ty_trm_substs qtys ctxt =
-let
-  val thy = ProofContext.theory_of ctxt
-  val quot_infos  = Quotient_Info.quotdata_dest ctxt
-  val const_infos = Quotient_Info.qconsts_dest ctxt
-  val all_ty_substs = map (fn ri => (#rtyp ri, #qtyp ri)) quot_infos
-  val ty_substs =
-    if qtys = [] then all_ty_substs else
-    filter (fn (_, qty) => member (op =) qtys qty) all_ty_substs
-  val const_substs = map (fn ci => (#rconst ci, #qconst ci)) const_infos
-  fun rel_eq rel = HOLogic.eq_const (subst_tys thy ty_substs (domain_type (fastype_of rel)))
-  val rel_substs = map (fn ri => (#equiv_rel ri, rel_eq (#equiv_rel ri))) quot_infos
-  fun valid_trm_subst (rt, qt) = (subst_tys thy ty_substs (fastype_of rt) = fastype_of qt)
-  val all_trm_substs = const_substs @ rel_substs
+(* generates term substitutions out of the qconst 
+   definitions and relations in a quotient
+*)
+fun get_trm_subst qtys ctxt =
+let  
+  val subst_rtyp' = subst_rtyp ctxt (get_ty_subst qtys ctxt)
+  fun proper (rt, qt) = (subst_rtyp' (fastype_of rt) = fastype_of qt)
+  
+  val const_substs = 
+    Quotient_Info.qconsts_dest ctxt
+     |> map (fn x => (#rconst x, #qconst x)) 
+
+  val rel_substs = 
+    Quotient_Info.quotdata_dest ctxt
+     |> map (fn x => (#equiv_rel x, HOLogic.eq_const (#qtyp x)))
 in
-  (ty_substs, filter valid_trm_subst all_trm_substs)
+  filter proper (const_substs @ rel_substs)
 end
 
+(* derives a qtyp and qtrm out of a rtyp and rtrm,
+   respectively 
+*)
 fun derive_qtyp qtys rty ctxt =
-let
-  val thy = ProofContext.theory_of ctxt
-  val (ty_substs, _) = get_ty_trm_substs qtys ctxt
-in
-  subst_tys thy ty_substs rty
-end
-
-(*
-Takes a term and
-
-* replaces raw constants by the quotient constants
-
-* replaces equivalence relations by equalities
-
-* replaces raw types by the quotient types
-
-*)
+  subst_rtyp ctxt (get_ty_subst qtys ctxt) rty
 
-fun quotient_lift_all qtys ctxt t =
-let
-  val thy = ProofContext.theory_of ctxt
-  val (ty_substs, substs) = get_ty_trm_substs qtys ctxt
-  fun lift_aux t =
-    case subst_trms thy substs t of
-      SOME x => x
-    | NONE =>
-      (case t of
-        a $ b => lift_aux a $ lift_aux b
-      | Abs(a, ty, s) =>
-          let
-            val (y, s') = Term.dest_abs (a, ty, s)
-            val nty = subst_tys thy ty_substs ty
-          in
-            Abs(y, nty, abstract_over (Free (y, nty), lift_aux s'))
-          end
-      | Free(n, ty) => Free(n, subst_tys thy ty_substs ty)
-      | Var(n, ty) => Var(n, subst_tys thy ty_substs ty)
-      | Bound i => Bound i
-      | Const(s, ty) => Const(s, subst_tys thy ty_substs ty))
-in
-  lift_aux t
-end
+fun derive_qtrm qtys rtrm ctxt =
+  subst_rtrm ctxt (get_ty_subst qtys ctxt) (get_trm_subst qtys ctxt) rtrm
+
 
 end; (* structure *)
--- a/src/Pure/General/position.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/Pure/General/position.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -67,8 +67,8 @@
 fun advance_count "\n" (i: int, j: int, k: int) =
       (if_valid i (i + 1), if_valid j 1, if_valid k (k + 1))
   | advance_count s (i, j, k) =
-      if Symbol.is_regular s andalso not (Symbol.is_utf8_trailer s)
-      then (i, if_valid j (j + 1), if_valid k (k + 1)) else (i, j, k);
+      if Symbol.is_regular s then (i, if_valid j (j + 1), if_valid k (k + 1))
+      else (i, j, k);
 
 fun invalid_count (i, j, k) =
   not (valid i orelse valid j orelse valid k);
--- a/src/Pure/General/symbol.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/Pure/General/symbol.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -15,9 +15,9 @@
   val space: symbol
   val spaces: int -> string
   val is_char: symbol -> bool
+  val is_utf8: symbol -> bool
   val is_symbolic: symbol -> bool
   val is_printable: symbol -> bool
-  val is_utf8_trailer: symbol -> bool
   val eof: symbol
   val is_eof: symbol -> bool
   val not_eof: symbol -> bool
@@ -42,7 +42,7 @@
   val is_raw: symbol -> bool
   val decode_raw: symbol -> string
   val encode_raw: string -> string
-  datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string
+  datatype sym = Char of string | UTF8 of string | Sym of string | Ctrl of string | Raw of string
   val decode: symbol -> sym
   datatype kind = Letter | Digit | Quasi | Blank | Other
   val kind: symbol -> kind
@@ -108,14 +108,14 @@
 
 fun is_char s = size s = 1;
 
+fun is_utf8 s = size s > 0 andalso forall_string (fn c => ord c >= 128) s;
+
 fun is_symbolic s =
   String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s);
 
 fun is_printable s =
   if is_char s then ord space <= ord s andalso ord s <= ord "~"
-  else not (String.isPrefix "\\<^" s);
-
-fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
+  else is_utf8 s orelse not (String.isPrefix "\\<^" s);
 
 
 (* input source control *)
@@ -224,10 +224,12 @@
 
 (* symbol variants *)
 
-datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string;
+datatype sym =
+  Char of string | UTF8 of string | Sym of string | Ctrl of string | Raw of string;
 
 fun decode s =
   if is_char s then Char s
+  else if is_utf8 s then UTF8 s
   else if is_raw s then Raw (decode_raw s)
   else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))
   else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))
@@ -426,7 +428,9 @@
 
 local
 
-fun is_plain s = s <> "\^M" andalso s <> "\\" andalso not_eof s;
+fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\";
+
+fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
 
 val scan_encoded_newline =
   $$ "\^M" -- $$ "\n" >> K "\n" ||
@@ -439,6 +443,7 @@
 
 val scan =
   Scan.one is_plain ||
+  Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode ||
   scan_encoded_newline ||
   ($$ "\\" ^^ $$ "<" ^^
     !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))
@@ -471,7 +476,7 @@
 fun no_explode [] = true
   | no_explode ("\\" :: "<" :: _) = false
   | no_explode ("\^M" :: _) = false
-  | no_explode (_ :: cs) = no_explode cs;
+  | no_explode (c :: cs) = is_ascii c andalso no_explode cs;
 
 in
 
@@ -486,7 +491,11 @@
 
 (* escape *)
 
-val esc = fn s => if is_char s then s else "\\" ^ s;
+val esc = fn s =>
+  if is_char s then s
+  else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s
+  else "\\" ^ s;
+
 val escape = implode o map esc o sym_explode;
 
 
--- a/src/Pure/ML/ml_syntax.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/Pure/ML/ml_syntax.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -26,6 +26,7 @@
   val print_sort: sort -> string
   val print_typ: typ -> string
   val print_term: term -> string
+  val pretty_string: string -> Pretty.T
 end;
 
 structure ML_Syntax: ML_SYNTAX =
@@ -92,4 +93,15 @@
       "Abs (" ^ print_string s ^ ", " ^ print_typ T ^ ", " ^ print_term t ^ ")"
   | print_term (t1 $ t2) = atomic (print_term t1) ^ " $ " ^ atomic (print_term t2);
 
+
+(* toplevel pretty printing *)
+
+fun pretty_string raw_str =
+  let
+    val str =
+      implode (map (fn XML.Elem _ => "<markup>" | XML.Text s => s) (YXML.parse_body raw_str))
+        handle Fail _ => raw_str;
+    val syms = Symbol.explode str handle ERROR _ => explode str;
+  in Pretty.str (quote (implode (map print_char syms))) end;
+
 end;
--- a/src/Pure/Thy/latex.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/Pure/Thy/latex.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -77,6 +77,7 @@
 fun output_known_sym (known_sym, known_ctrl) sym =
   (case Symbol.decode sym of
     Symbol.Char s => output_chr s
+  | Symbol.UTF8 s => s
   | Symbol.Sym s => if known_sym s then enclose "{\\isasym" "}" s else output_chrs sym
   | Symbol.Ctrl s => if known_ctrl s then enclose "\\isactrl" " " s else output_chrs sym
   | Symbol.Raw s => s);
--- a/src/Pure/pure_setup.ML	Fri Jun 25 23:35:14 2010 +0200
+++ b/src/Pure/pure_setup.ML	Mon Jun 28 08:55:46 2010 +0200
@@ -18,6 +18,7 @@
 
 (* ML toplevel pretty printing *)
 
+toplevel_pp ["String", "string"] "ML_Syntax.pretty_string";
 toplevel_pp ["Pretty", "T"] "(fn _: Pretty.T => Pretty.str \"<pretty>\")";
 toplevel_pp ["Task_Queue", "task"] "Pretty.str o Task_Queue.str_of_task";
 toplevel_pp ["Task_Queue", "group"] "Pretty.str o Task_Queue.str_of_group";