support for nested text cartouches;
authorwenzelm
Sat, 18 Jan 2014 19:15:12 +0100
changeset 55033 8e8243975860
parent 55032 b49366215417
child 55034 04b443d54aee
support for nested text cartouches; clarified Symbol.is_symbolic: exclude \<open> and \<close>;
etc/isabelle.css
etc/isar-keywords.el
etc/symbols
lib/texinputs/isabelle.sty
lib/texinputs/isabellesym.sty
src/Doc/IsarRef/Inner_Syntax.thy
src/Doc/IsarRef/Outer_Syntax.thy
src/HOL/ROOT
src/HOL/ex/Cartouche_Examples.thy
src/Pure/General/pretty.ML
src/Pure/General/scan.scala
src/Pure/General/symbol.ML
src/Pure/General/symbol.scala
src/Pure/General/symbol_pos.ML
src/Pure/Isar/args.ML
src/Pure/Isar/parse.ML
src/Pure/Isar/token.ML
src/Pure/Isar/token.scala
src/Pure/PIDE/markup.ML
src/Pure/PIDE/markup.scala
src/Pure/Syntax/lexicon.ML
src/Pure/Thy/html.ML
src/Pure/Thy/latex.ML
src/Pure/Thy/thy_syntax.ML
src/Pure/library.ML
src/Pure/library.scala
src/Pure/pure_thy.ML
src/Tools/jEdit/etc/options
src/Tools/jEdit/src/rendering.scala
--- a/etc/isabelle.css	Fri Jan 17 20:51:36 2014 +0100
+++ b/etc/isabelle.css	Sat Jan 18 19:15:12 2014 +0100
@@ -30,6 +30,7 @@
 .literal        { font-weight: bold; }
 .delimiter      { }
 .inner_string   { color: #D2691E; }
+.inner_cartouche { color: #CC6600; }
 .inner_comment  { color: #8B0000; }
 
 .bold           { font-weight: bold; }
@@ -40,6 +41,7 @@
 .string         { color: #008B00; }
 .altstring      { color: #8B8B00; }
 .verbatim       { color: #00008B; }
+.cartouche      { color: #CC6600; }
 .comment        { color: #8B0000; }
 .control        { background-color: #FF6A6A; }
 .bad            { background-color: #FF6A6A; }
--- a/etc/isar-keywords.el	Fri Jan 17 20:51:36 2014 +0100
+++ b/etc/isar-keywords.el	Sat Jan 18 19:15:12 2014 +0100
@@ -38,6 +38,7 @@
     "bundle"
     "by"
     "cannot_undo"
+    "cartouche"
     "case"
     "case_of_simps"
     "cd"
@@ -389,6 +390,7 @@
 (defconst isar-keywords-diag
   '("ML_command"
     "ML_val"
+    "cartouche"
     "class_deps"
     "code_deps"
     "code_thms"
--- a/etc/symbols	Fri Jan 17 20:51:36 2014 +0100
+++ b/etc/symbols	Sat Jan 18 19:15:12 2014 +0100
@@ -348,6 +348,8 @@
 \<hungarumlaut>         code: 0x0002dd
 \<some>                 code: 0x0003f5
 \<newline>              code: 0x0023ce
+\<open>                 code: 0x002039  abbrev: <<  font: IsabelleText
+\<close>                code: 0x00203a  abbrev: >>  font: IsabelleText
 \<^sub>                 code: 0x0021e9  group: control  font: IsabelleText
 \<^sup>                 code: 0x0021e7  group: control  font: IsabelleText
 \<^bold>                code: 0x002759  group: control  font: IsabelleText
--- a/lib/texinputs/isabelle.sty	Fri Jan 17 20:51:36 2014 +0100
+++ b/lib/texinputs/isabelle.sty	Sat Jan 18 19:15:12 2014 +0100
@@ -97,6 +97,8 @@
 \chardef\isachartilde=`\~%
 \def\isacharverbatimopen{\isacharbraceleft\isacharasterisk}%
 \def\isacharverbatimclose{\isacharasterisk\isacharbraceright}%
+\def\isacartoucheopen{\isatext{\raise.3ex\hbox{$\scriptscriptstyle\langle$}}}%
+\def\isacartoucheclose{\isatext{\raise.3ex\hbox{$\scriptscriptstyle\rangle$}}}%
 }
 
 
--- a/lib/texinputs/isabellesym.sty	Fri Jan 17 20:51:36 2014 +0100
+++ b/lib/texinputs/isabellesym.sty	Sat Jan 18 19:15:12 2014 +0100
@@ -354,3 +354,6 @@
 \newcommand{\isasymbind}{\isamath{\mathbin{>\!\!\!>\mkern-6.7mu=}}}
 \newcommand{\isasymthen}{\isamath{\mathbin{>\!\!\!>}}}
 \newcommand{\isasymsome}{\isamath{\epsilon\,}}
+\newcommand{\isasymopen}{\isatext{\raise.3ex\hbox{$\scriptscriptstyle\langle$}}}
+\newcommand{\isasymclose}{\isatext{\raise.3ex\hbox{$\scriptscriptstyle\rangle$}}}
+
--- a/src/Doc/IsarRef/Inner_Syntax.thy	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Doc/IsarRef/Inner_Syntax.thy	Sat Jan 18 19:15:12 2014 +0100
@@ -633,14 +633,16 @@
     @{syntax_def (inner) xnum_token} & = & @{verbatim "#"}@{syntax_ref nat}@{text "  |  "}@{verbatim "#-"}@{syntax_ref nat} \\
 
     @{syntax_def (inner) str_token} & = & @{verbatim "''"} @{text "\<dots>"} @{verbatim "''"} \\
+    @{syntax_def (inner) cartouche} & = & @{verbatim "\<open>"} @{text "\<dots>"} @{verbatim "\<close>"} \\
   \end{supertabular}
   \end{center}
 
   The token categories @{syntax (inner) num_token}, @{syntax (inner)
-  float_token}, @{syntax (inner) xnum_token}, and @{syntax (inner)
-  str_token} are not used in Pure.  Object-logics may implement numerals
-  and string constants by adding appropriate syntax declarations,
-  together with some translation functions (e.g.\ see Isabelle/HOL).
+  float_token}, @{syntax (inner) xnum_token}, @{syntax (inner)
+  str_token}, and @{syntax (inner) cartouche} are not used in Pure.
+  Object-logics may implement numerals and string literals by adding
+  appropriate syntax declarations, together with some translation
+  functions (e.g.\ see Isabelle/HOL).
 
   The derived categories @{syntax_def (inner) num_const}, @{syntax_def
   (inner) float_const}, and @{syntax_def (inner) num_const} provide
--- a/src/Doc/IsarRef/Outer_Syntax.thy	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Doc/IsarRef/Outer_Syntax.thy	Sat Jan 18 19:15:12 2014 +0100
@@ -147,6 +147,7 @@
     @{syntax_def typevar} & = & @{verbatim "?"}@{text "typefree  |  "}@{verbatim "?"}@{text typefree}@{verbatim "."}@{text nat} \\
     @{syntax_def string} & = & @{verbatim "\""} @{text "\<dots>"} @{verbatim "\""} \\
     @{syntax_def altstring} & = & @{verbatim "`"} @{text "\<dots>"} @{verbatim "`"} \\
+    @{syntax_def cartouche} & = & @{verbatim "\<open>"} @{text "\<dots>"} @{verbatim "\<close>"} \\
     @{syntax_def verbatim} & = & @{verbatim "{*"} @{text "\<dots>"} @{verbatim "*"}@{verbatim "}"} \\[1ex]
 
     @{text letter} & = & @{text "latin  |  "}@{verbatim "\\"}@{verbatim "<"}@{text latin}@{verbatim ">"}@{text "  |  "}@{verbatim "\\"}@{verbatim "<"}@{text "latin latin"}@{verbatim ">"}@{text "  |  greek  |"} \\
@@ -191,6 +192,11 @@
   text is {\LaTeX} source, one may usually add some blank or comment
   to avoid the critical character sequence.
 
+  A @{syntax_ref cartouche} consists of arbitrary text, with properly
+  balanced blocks of ``@{verbatim "\<open>"}~@{text "\<dots>"}~@{verbatim
+  "\<close>"}''.  Note that the rendering of cartouche delimiters is
+  usually like this: ``@{text "\<open> \<dots> \<close>"}''.
+
   Source comments take the form @{verbatim "(*"}~@{text
   "\<dots>"}~@{verbatim "*)"} and may be nested, although the user-interface
   might prevent this.  Note that this form indicates source comments
--- a/src/HOL/ROOT	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/HOL/ROOT	Sat Jan 18 19:15:12 2014 +0100
@@ -562,6 +562,7 @@
     SVC_Oracle
     Simps_Case_Conv_Examples
     ML
+    Cartouche_Examples
   theories [skip_proofs = false]
     Meson_Test
   theories [condition = SVC_HOME]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/HOL/ex/Cartouche_Examples.thy	Sat Jan 18 19:15:12 2014 +0100
@@ -0,0 +1,61 @@
+header {* Some examples with text cartouches *}
+
+theory Cartouche_Examples
+imports Main
+keywords "cartouche" :: diag
+begin
+
+subsection {* Outer syntax *}
+
+ML {*
+  Outer_Syntax.improper_command @{command_spec "cartouche"} ""
+    (Parse.cartouche >> (fn s =>
+      Toplevel.imperative (fn () => writeln s)))
+*}
+
+cartouche \<open>abc\<close>
+cartouche \<open>abc \<open>\<alpha>\<beta>\<gamma>\<close> xzy\<close>
+
+
+subsection {* Inner syntax *}
+
+syntax "_string_cartouche" :: "cartouche_position \<Rightarrow> string"  ("STR _")
+
+parse_translation {*
+  let
+    val mk_nib =
+      Syntax.const o Lexicon.mark_const o
+        fst o Term.dest_Const o HOLogic.mk_nibble;
+
+    fun mk_char (s, pos) =
+      let
+        val c =
+          if Symbol.is_ascii s then ord s
+          else if s = "" then 10
+          else error ("String literal contains illegal symbol: " ^ quote s ^ Position.here pos);
+      in Syntax.const @{const_syntax Char} $ mk_nib (c div 16) $ mk_nib (c mod 16) end;
+
+    fun mk_string [] = Const (@{const_syntax Nil}, @{typ string})
+      | mk_string (s :: ss) =
+          Syntax.const @{const_syntax Cons} $ mk_char s $ mk_string ss;
+
+    fun string_tr ctxt args =
+      let fun err () = raise TERM ("string_tr", []) in
+        (case args of
+          [(c as Const (@{syntax_const "_constrain"}, _)) $ Free (s, _) $ p] =>
+            (case Term_Position.decode_position p of
+              SOME (pos, _) =>
+                c $ mk_string (Symbol_Pos.cartouche_content (Symbol_Pos.explode (s, pos))) $ p
+            | NONE => err ())
+        | _ => err ())
+      end;
+  in
+    [(@{syntax_const "_string_cartouche"}, string_tr)]
+  end
+*}
+
+term "STR \<open>\<close>"
+term "STR \<open>abc\<close>"
+lemma "STR \<open>abc\<close> @ STR \<open>xyz\<close> = STR \<open>abcxyz\<close>" by simp
+
+end
--- a/src/Pure/General/pretty.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/General/pretty.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -51,6 +51,7 @@
   val block_enclose: T * T -> T list -> T
   val quote: T -> T
   val backquote: T -> T
+  val cartouche: T -> T
   val separate: string -> T list -> T list
   val commas: T list -> T list
   val enclose: string -> string -> T list -> T
@@ -182,6 +183,7 @@
 
 fun quote prt = blk (1, [str "\"", prt, str "\""]);
 fun backquote prt = blk (1, [str "`", prt, str "`"]);
+fun cartouche prt = blk (1, [str "\\<open>", prt, str "\\<close>"]);
 
 fun separate sep prts =
   flat (Library.separate [str sep, brk 1] (map single prts));
--- a/src/Pure/General/scan.scala	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/General/scan.scala	Sat Jan 18 19:15:12 2014 +0100
@@ -23,6 +23,7 @@
   case object Finished extends Context
   case class Quoted(quote: String) extends Context
   case object Verbatim extends Context
+  case class Cartouche(depth: Int) extends Context
   case class Comment(depth: Int) extends Context
 
 
@@ -274,6 +275,72 @@
       "{*" ~ verbatim_body ^^ { case x ~ y => x + y }
 
 
+    /* nested text cartouches */
+
+    private def cartouche_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)]
+    {
+      require(depth >= 0)
+
+      def apply(in: Input) =
+      {
+        val start = in.offset
+        val end = in.source.length
+        val matcher = new Symbol.Matcher(in.source)
+
+        var i = start
+        var d = depth
+        var finished = false
+        while (!finished) {
+          if (i < end) {
+            val n = matcher(i, end)
+            val sym = in.source.subSequence(i, i + n).toString
+
+            if (Symbol.is_open(sym)) { i += n; d += 1 }
+            else if (d > 0) {
+              i += n
+              if (Symbol.is_close(sym)) d -= 1
+            }
+            else finished = true
+          }
+          else finished = true
+        }
+        if (i == start) Failure("bad input", in)
+        else Success((in.source.subSequence(start, i).toString, d), in.drop(i - start))
+      }
+    }.named("cartouche_depth")
+
+    def cartouche: Parser[String] =
+      cartouche_depth(0) ^? { case (x, d) if d == 0 => x }
+
+    def cartouche_context(ctxt: Context): Parser[(String, Context)] =
+    {
+      val depth =
+        ctxt match {
+          case Finished => 0
+          case Cartouche(d) => d
+          case _ => -1
+        }
+      if (depth >= 0)
+        cartouche_depth(depth) ^^
+          { case (x, 0) => (x, Finished)
+            case (x, d) => (x, Cartouche(d)) }
+      else failure("")
+    }
+
+    val recover_cartouche: Parser[String] =
+      cartouche_depth(0) ^^ (_._1)
+
+    def cartouche_content(source: String): String =
+    {
+      def err(): Nothing = error("Malformed text cartouche: " + quote(source))
+      val source1 =
+        Library.try_unprefix(Symbol.open_decoded, source) orElse
+          Library.try_unprefix(Symbol.open, source) getOrElse err()
+      Library.try_unsuffix(Symbol.close_decoded, source1) orElse
+        Library.try_unsuffix(Symbol.close, source1) getOrElse err()
+    }
+
+
     /* nested comments */
 
     private def comment_depth(depth: Int): Parser[(String, Int)] = new Parser[(String, Int)]
@@ -309,11 +376,6 @@
     def comment: Parser[String] =
       comment_depth(0) ^? { case (x, d) if d == 0 => x }
 
-    def comment_content(source: String): String =
-    {
-      require(parseAll(comment, source).successful)
-      source.substring(2, source.length - 2)
-    }
     def comment_context(ctxt: Context): Parser[(String, Context)] =
     {
       val depth =
@@ -332,6 +394,12 @@
     val recover_comment: Parser[String] =
       comment_depth(0) ^^ (_._1)
 
+    def comment_content(source: String): String =
+    {
+      require(parseAll(comment, source).successful)
+      source.substring(2, source.length - 2)
+    }
+
 
     /* outer syntax tokens */
 
@@ -340,9 +408,10 @@
       val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
       val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
       val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
+      val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))
       val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
 
-      string | (alt_string | (verb | cmt))
+      string | (alt_string | (verb | (cart | cmt)))
     }
 
     private def other_token(is_command: String => Boolean)
@@ -380,8 +449,10 @@
       val space = many1(Symbol.is_blank) ^^ (x => Token(Token.Kind.SPACE, x))
 
       val recover_delimited =
-        (recover_quoted("\"") | (recover_quoted("`") | (recover_verbatim | recover_comment))) ^^
-          (x => Token(Token.Kind.ERROR, x))
+        (recover_quoted("\"") |
+          (recover_quoted("`") |
+            (recover_verbatim |
+              (recover_cartouche | recover_comment)))) ^^ (x => Token(Token.Kind.ERROR, x))
 
       val bad = one(_ => true) ^^ (x => Token(Token.Kind.ERROR, x))
 
@@ -400,10 +471,11 @@
       val alt_string =
         quoted_context("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
       val verb = verbatim_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
+      val cart = cartouche_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
       val cmt = comment_context(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
       val other = other_token(is_command) ^^ { case x => (x, Finished) }
 
-      string | (alt_string | (verb | (cmt | other)))
+      string | (alt_string | (verb | (cart | (cmt | other))))
     }
   }
 }
--- a/src/Pure/General/symbol.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/General/symbol.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -13,6 +13,7 @@
   val is_char: symbol -> bool
   val is_utf8: symbol -> bool
   val is_symbolic: symbol -> bool
+  val is_symbolic_char: symbol -> bool
   val is_printable: symbol -> bool
   val eof: symbol
   val is_eof: symbol -> bool
@@ -98,12 +99,17 @@
 
 fun is_utf8 s = size s > 0 andalso forall_string (fn c => ord c >= 128) s;
 
+fun raw_symbolic s =
+  String.isPrefix "\\<" s andalso String.isSuffix ">" s andalso not (String.isPrefix "\\<^" s);
+
 fun is_symbolic s =
-  String.isPrefix "\\<" s andalso String.isSuffix ">" s andalso not (String.isPrefix "\\<^" s);
+  s <> "\\<open>" andalso s <> "\\<close>" andalso raw_symbolic s;
+
+val is_symbolic_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~");
 
 fun is_printable s =
   if is_char s then ord space <= ord s andalso ord s <= ord "~"
-  else is_utf8 s orelse is_symbolic s;
+  else is_utf8 s orelse raw_symbolic s;
 
 
 (* input source control *)
@@ -517,7 +523,7 @@
 
 fun symbolic_end (_ :: "\\<^sub>" :: _) = true
   | symbolic_end ("'" :: ss) = symbolic_end ss
-  | symbolic_end (s :: _) = is_symbolic s
+  | symbolic_end (s :: _) = raw_symbolic s
   | symbolic_end [] = false;
 
 fun bump_init str =
--- a/src/Pure/General/symbol.scala	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/General/symbol.scala	Sat Jan 18 19:15:12 2014 +0100
@@ -366,6 +366,12 @@
     val symbolic = recode_set((for { (sym, _) <- symbols; if raw_symbolic(sym) } yield sym): _*)
 
 
+    /* cartouches */
+
+    val open_decoded = decode(open)
+    val close_decoded = decode(close)
+
+
     /* control symbols */
 
     val ctrl_decoded: Set[Symbol] =
@@ -420,12 +426,29 @@
   def is_letdig(sym: Symbol): Boolean = is_letter(sym) || is_digit(sym) || is_quasi(sym)
   def is_blank(sym: Symbol): Boolean = symbols.blanks.contains(sym)
 
-  def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
-  def is_symbolic(sym: Symbol): Boolean = raw_symbolic(sym) || symbols.symbolic.contains(sym)
+
+  /* cartouches */
+
+  val open = "\\<open>"
+  val close = "\\<close>"
+
+  def open_decoded: Symbol = symbols.open_decoded
+  def close_decoded: Symbol = symbols.close_decoded
+
+  def is_open(sym: Symbol): Boolean = sym == open_decoded || sym == open
+  def is_close(sym: Symbol): Boolean = sym == close_decoded || sym == close
+
+
+  /* symbols for symbolic identifiers */
 
   private def raw_symbolic(sym: Symbol): Boolean =
     sym.startsWith("\\<") && sym.endsWith(">") && !sym.startsWith("\\<^")
 
+  def is_symbolic(sym: Symbol): Boolean =
+    !is_open(sym) && !is_close(sym) && (raw_symbolic(sym) || symbols.symbolic.contains(sym))
+
+  def is_symbolic_char(sym: Symbol): Boolean = symbols.sym_chars.contains(sym)
+
 
   /* control symbols */
 
@@ -433,7 +456,7 @@
     sym.startsWith("\\<^") || symbols.ctrl_decoded.contains(sym)
 
   def is_controllable(sym: Symbol): Boolean =
-    !is_blank(sym) && !is_ctrl(sym) && !is_malformed(sym)
+    !is_blank(sym) && !is_ctrl(sym) && !is_open(sym) && !is_close(sym) && !is_malformed(sym)
 
   def sub_decoded: Symbol = symbols.sub_decoded
   def sup_decoded: Symbol = symbols.sup_decoded
--- a/src/Pure/General/symbol_pos.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/General/symbol_pos.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -13,6 +13,7 @@
   val $$$ : Symbol.symbol -> T list -> T list * T list
   val ~$$$ : Symbol.symbol -> T list -> T list * T list
   val content: T list -> string
+  val range: T list -> Position.range
   val is_eof: T -> bool
   val stopper: T Scan.stopper
   val !!! : Scan.message -> (T list -> 'a) -> T list -> 'a
@@ -27,6 +28,12 @@
   val quote_string_q: string -> string
   val quote_string_qq: string -> string
   val quote_string_bq: string -> string
+  val scan_cartouche: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
+    T list -> T list * T list
+  val scan_cartouche_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
+    T list -> T list * T list
+  val recover_cartouche: T list -> T list * T list
+  val cartouche_content: T list -> T list
   val scan_comment: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
     T list -> T list * T list
   val scan_comment_body: (string -> (T list -> T list * T list) -> T list -> T list * T list) ->
@@ -36,7 +43,6 @@
     (T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source
   type text = string
   val implode: T list -> text
-  val range: T list -> Position.range
   val implode_range: Position.T -> Position.T -> T list -> text * Position.range
   val explode: text * Position.T -> T list
   val scan_ident: T list -> T list * T list
@@ -54,6 +60,11 @@
 
 val content = implode o map symbol;
 
+fun range (syms as (_, pos) :: _) =
+      let val pos' = List.last syms |-> Position.advance
+      in Position.range pos pos' end
+  | range [] = Position.no_range;
+
 
 (* stopper *)
 
@@ -81,6 +92,7 @@
 
 fun change_prompt scan = Scan.prompt "# " scan;
 
+fun $$ s = Scan.one (fn x => symbol x = s);
 fun $$$ s = Scan.one (fn x => symbol x = s) >> single;
 fun ~$$$ s = Scan.one (fn x => symbol x <> s) >> single;
 
@@ -147,6 +159,47 @@
 end;
 
 
+(* nested text cartouches *)
+
+local
+
+val scan_cart =
+  Scan.depend (fn (d: int) => $$ "\\<open>" >> pair (d + 1)) ||
+  Scan.depend (fn 0 => Scan.fail | d => $$ "\\<close>" >> pair (d - 1)) ||
+  Scan.lift (Scan.one (fn (s, _) => s <> "\\<close>" andalso Symbol.is_regular s));
+
+val scan_carts = Scan.pass 0 (Scan.repeat scan_cart);
+
+val scan_body = change_prompt scan_carts;
+
+in
+
+fun scan_cartouche cut =
+  $$ "\\<open>" ::: cut "unclosed text cartouche" (scan_body @@@ $$$ "\\<close>");
+
+fun scan_cartouche_body cut =
+  $$ "\\<open>" |-- cut "unclosed text cartouche" (scan_body --| $$ "\\<close>");
+
+val recover_cartouche =
+  $$$ "\\<open>" @@@ scan_carts;
+
+end;
+
+fun cartouche_content syms =
+  let
+    fun err () =
+      error ("Malformed text cartouche: " ^ quote (content syms) ^
+        Position.here (Position.set_range (range syms)));
+  in
+    (case syms of
+      ("\\<open>", _) :: rest =>
+        (case rev rest of
+          ("\\<close>", _) :: rrest => rev rrest
+        | _ => err ())
+    | _ => err ())
+  end;
+
+
 (* ML-style comments *)
 
 local
@@ -196,11 +249,6 @@
 
 val implode = implode o pad;
 
-fun range (syms as (_, pos) :: _) =
-      let val pos' = List.last syms |-> Position.advance
-      in Position.range pos pos' end
-  | range [] = Position.no_range;
-
 fun implode_range pos1 pos2 syms =
   let val syms' = (("", pos1) :: syms @ [("", pos2)])
   in (implode syms', range syms') end;
--- a/src/Pure/Isar/args.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Isar/args.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -227,7 +227,7 @@
 
 val argument_kinds =
  [Token.Ident, Token.LongIdent, Token.SymIdent, Token.Var, Token.TypeIdent, Token.TypeVar,
-  Token.Nat, Token.Float, Token.String, Token.AltString, Token.Verbatim];
+  Token.Nat, Token.Float, Token.String, Token.AltString, Token.Cartouche, Token.Verbatim];
 
 fun parse_args is_symid =
   let
--- a/src/Pure/Isar/parse.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Isar/parse.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -32,6 +32,7 @@
   val string: string parser
   val alt_string: string parser
   val verbatim: string parser
+  val cartouche: string parser
   val sync: string parser
   val eof: string parser
   val command_name: string -> string parser
@@ -185,6 +186,7 @@
 val string = kind Token.String;
 val alt_string = kind Token.AltString;
 val verbatim = kind Token.Verbatim;
+val cartouche = kind Token.Cartouche;
 val sync = kind Token.Sync;
 val eof = kind Token.EOF;
 
--- a/src/Pure/Isar/token.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Isar/token.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -8,7 +8,7 @@
 sig
   datatype kind =
     Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
-    Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue |
+    Nat | Float | String | AltString | Verbatim | Cartouche | Space | Comment | InternalValue |
     Error of string | Sync | EOF
   type file = {src_path: Path.T, text: string, pos: Position.T}
   datatype value =
@@ -100,7 +100,7 @@
 
 datatype kind =
   Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |
-  Nat | Float | String | AltString | Verbatim | Space | Comment | InternalValue |
+  Nat | Float | String | AltString | Verbatim | Cartouche | Space | Comment | InternalValue |
   Error of string | Sync | EOF;
 
 datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot;
@@ -119,6 +119,7 @@
   | String => "string"
   | AltString => "back-quoted string"
   | Verbatim => "verbatim text"
+  | Cartouche => "cartouche"
   | Space => "white space"
   | Comment => "comment text"
   | InternalValue => "internal value"
@@ -221,6 +222,7 @@
     String => Symbol_Pos.quote_string_qq x
   | AltString => Symbol_Pos.quote_string_bq x
   | Verbatim => enclose "{*" "*}" x
+  | Cartouche => cartouche x
   | Comment => enclose "(*" "*)" x
   | Sync => ""
   | EOF => ""
@@ -300,16 +302,14 @@
 
 (* scan symbolic idents *)
 
-val is_sym_char = member (op =) (raw_explode "!#$%&*+-/<=>?@^_|~");
-
 val scan_symid =
-  Scan.many1 (is_sym_char o Symbol_Pos.symbol) ||
+  Scan.many1 (Symbol.is_symbolic_char o Symbol_Pos.symbol) ||
   Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single;
 
 fun is_symid str =
   (case try Symbol.explode str of
-    SOME [s] => Symbol.is_symbolic s orelse is_sym_char s
-  | SOME ss => forall is_sym_char ss
+    SOME [s] => Symbol.is_symbolic s orelse Symbol.is_symbolic_char s
+  | SOME ss => forall Symbol.is_symbolic_char ss
   | _ => false);
 
 fun ident_or_symbolic "begin" = false
@@ -333,6 +333,12 @@
   $$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat);
 
 
+(* scan cartouche *)
+
+val scan_cartouche =
+  Symbol_Pos.scan_pos -- (Symbol_Pos.scan_cartouche_body !!! -- Symbol_Pos.scan_pos);
+
+
 (* scan space *)
 
 fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n";
@@ -367,6 +373,7 @@
   (Symbol_Pos.scan_string_qq err_prefix >> token_range String ||
     Symbol_Pos.scan_string_bq err_prefix >> token_range AltString ||
     scan_verbatim >> token_range Verbatim ||
+    scan_cartouche >> token_range Cartouche ||
     scan_comment >> token_range Comment ||
     scan_space >> token Space ||
     Scan.one (Symbol.is_sync o Symbol_Pos.symbol) >> (token Sync o single) ||
@@ -387,6 +394,7 @@
   (Symbol_Pos.recover_string_qq ||
     Symbol_Pos.recover_string_bq ||
     recover_verbatim ||
+    Symbol_Pos.recover_cartouche ||
     Symbol_Pos.recover_comment ||
     Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single)
   >> (single o token (Error msg));
--- a/src/Pure/Isar/token.scala	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Isar/token.scala	Sat Jan 18 19:15:12 2014 +0100
@@ -26,6 +26,7 @@
     val STRING = Value("string")
     val ALT_STRING = Value("back-quoted string")
     val VERBATIM = Value("verbatim text")
+    val CARTOUCHE = Value("cartouche")
     val SPACE = Value("white space")
     val COMMENT = Value("comment text")
     val ERROR = Value("bad input")
@@ -74,6 +75,7 @@
     kind == Token.Kind.STRING ||
     kind == Token.Kind.ALT_STRING ||
     kind == Token.Kind.VERBATIM ||
+    kind == Token.Kind.CARTOUCHE ||
     kind == Token.Kind.COMMENT
   def is_ident: Boolean = kind == Token.Kind.IDENT
   def is_sym_ident: Boolean = kind == Token.Kind.SYM_IDENT
@@ -107,6 +109,7 @@
     if (kind == Token.Kind.STRING) Scan.Lexicon.empty.quoted_content("\"", source)
     else if (kind == Token.Kind.ALT_STRING) Scan.Lexicon.empty.quoted_content("`", source)
     else if (kind == Token.Kind.VERBATIM) Scan.Lexicon.empty.verbatim_content(source)
+    else if (kind == Token.Kind.CARTOUCHE) Scan.Lexicon.empty.cartouche_content(source)
     else if (kind == Token.Kind.COMMENT) Scan.Lexicon.empty.comment_content(source)
     else source
 
--- a/src/Pure/PIDE/markup.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/PIDE/markup.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -59,6 +59,7 @@
   val literalN: string val literal: T
   val delimiterN: string val delimiter: T
   val inner_stringN: string val inner_string: T
+  val inner_cartoucheN: string val inner_cartouche: T
   val inner_commentN: string val inner_comment: T
   val token_rangeN: string val token_range: T
   val sortN: string val sort: T
@@ -92,6 +93,7 @@
   val stringN: string val string: T
   val altstringN: string val altstring: T
   val verbatimN: string val verbatim: T
+  val cartoucheN: string val cartouche: T
   val commentN: string val comment: T
   val controlN: string val control: T
   val tokenN: string val token: Properties.T -> T
@@ -307,6 +309,7 @@
 val (literalN, literal) = markup_elem "literal";
 val (delimiterN, delimiter) = markup_elem "delimiter";
 val (inner_stringN, inner_string) = markup_elem "inner_string";
+val (inner_cartoucheN, inner_cartouche) = markup_elem "inner_cartouche";
 val (inner_commentN, inner_comment) = markup_elem "inner_comment";
 
 val (token_rangeN, token_range) = markup_elem "token_range";
@@ -361,6 +364,7 @@
 val (stringN, string) = markup_elem "string";
 val (altstringN, altstring) = markup_elem "altstring";
 val (verbatimN, verbatim) = markup_elem "verbatim";
+val (cartoucheN, cartouche) = markup_elem "cartouche";
 val (commentN, comment) = markup_elem "comment";
 val (controlN, control) = markup_elem "control";
 
--- a/src/Pure/PIDE/markup.scala	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/PIDE/markup.scala	Sat Jan 18 19:15:12 2014 +0100
@@ -133,6 +133,7 @@
   val LITERAL = "literal"
   val DELIMITER = "delimiter"
   val INNER_STRING = "inner_string"
+  val INNER_CARTOUCHE = "inner_cartouche"
   val INNER_COMMENT = "inner_comment"
 
   val TOKEN_RANGE = "token_range"
@@ -190,6 +191,7 @@
   val STRING = "string"
   val ALTSTRING = "altstring"
   val VERBATIM = "verbatim"
+  val CARTOUCHE = "cartouche"
   val COMMENT = "comment"
   val CONTROL = "control"
 
--- a/src/Pure/Syntax/lexicon.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Syntax/lexicon.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -25,7 +25,7 @@
   val is_tid: string -> bool
   datatype token_kind =
     Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
-    NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF
+    NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF
   datatype token = Token of token_kind * string * Position.range
   val str_of_token: token -> string
   val pos_of_token: token -> Position.T
@@ -120,7 +120,7 @@
 
 datatype token_kind =
   Literal | IdentSy | LongIdentSy | VarSy | TFreeSy | TVarSy |
-  NumSy | FloatSy | XNumSy | StrSy | Space | Comment | EOF;
+  NumSy | FloatSy | XNumSy | StrSy | Cartouche | Space | Comment | EOF;
 
 datatype token = Token of token_kind * string * Position.range;
 
@@ -160,7 +160,8 @@
   ("num_token", NumSy),
   ("float_token", FloatSy),
   ("xnum_token", XNumSy),
-  ("str_token", StrSy)];
+  ("str_token", StrSy),
+  ("cartouche", Cartouche)];
 
 val terminals = map #1 terminal_kinds;
 val is_terminal = member (op =) terminals;
@@ -175,13 +176,14 @@
 
 val token_kind_markup =
  fn TFreeSy => Markup.tfree
-  | TVarSy  => Markup.tvar
-  | NumSy   => Markup.numeral
+  | TVarSy => Markup.tvar
+  | NumSy => Markup.numeral
   | FloatSy => Markup.numeral
-  | XNumSy  => Markup.numeral
-  | StrSy   => Markup.inner_string
+  | XNumSy => Markup.numeral
+  | StrSy => Markup.inner_string
+  | Cartouche => Markup.inner_cartouche
   | Comment => Markup.inner_comment
-  | _       => Markup.empty;
+  | _ => Markup.empty;
 
 fun report_of_token (Token (kind, s, (pos, _))) =
   let val markup = if kind = Literal then literal_markup s else token_kind_markup kind
@@ -267,6 +269,7 @@
     val scan_lit = Scan.literal lex >> token Literal;
 
     val scan_token =
+      Symbol_Pos.scan_cartouche !!! >> token Cartouche ||
       Symbol_Pos.scan_comment !!! >> token Comment ||
       Scan.max token_leq scan_lit scan_val ||
       scan_str >> token StrSy ||
--- a/src/Pure/Thy/html.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Thy/html.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -188,6 +188,8 @@
     ("\\<longleftrightarrow>", (3, "&lt;-&gt;")),
     ("\\<longrightarrow>", (3, "--&gt;")),
     ("\\<rightarrow>", (2, "-&gt;")),
+    ("\\<open>", (1, "&#8249;")),
+    ("\\<close>", (1, "&#8250;")),
     ("\\<^bsub>", (0, hidden "&#8664;" ^ "<sub>")),
     ("\\<^esub>", (0, hidden "&#8665;" ^ "</sub>")),
     ("\\<^bsup>", (0, hidden "&#8663;" ^ "<sup>")),
--- a/src/Pure/Thy/latex.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Thy/latex.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -125,6 +125,8 @@
         val ants = Antiquote.read (Symbol_Pos.explode (txt, pos), pos);
         val out = implode (map output_syms_antiq ants);
       in enclose "{\\isacharverbatimopen}" "{\\isacharverbatimclose}" out end
+    else if Token.is_kind Token.Cartouche tok then
+      enclose "{\\isacartoucheopen}" "{\\isacartoucheclose}" (output_syms s)
     else output_syms s
   end;
 
--- a/src/Pure/Thy/thy_syntax.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/Thy/thy_syntax.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -56,6 +56,7 @@
   | Token.String        => (Markup.string, "")
   | Token.AltString     => (Markup.altstring, "")
   | Token.Verbatim      => (Markup.verbatim, "")
+  | Token.Cartouche     => (Markup.cartouche, "")
   | Token.Space         => (Markup.empty, "")
   | Token.Comment       => (Markup.comment, "")
   | Token.InternalValue => (Markup.empty, "")
--- a/src/Pure/library.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/library.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -139,6 +139,7 @@
   val enclose: string -> string -> string -> string
   val unenclose: string -> string
   val quote: string -> string
+  val cartouche: string -> string
   val space_implode: string -> string list -> string
   val commas: string list -> string
   val commas_quote: string list -> string
@@ -729,6 +730,8 @@
 (*simple quoting (does not escape special chars)*)
 val quote = enclose "\"" "\"";
 
+val cartouche = enclose "\\<open>" "\\<close>";
+
 fun space_implode a bs = implode (separate a bs);
 
 val commas = space_implode ", ";
--- a/src/Pure/library.scala	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/library.scala	Sat Jan 18 19:15:12 2014 +0100
@@ -110,6 +110,9 @@
   def try_unprefix(prfx: String, s: String): Option[String] =
     if (s.startsWith(prfx)) Some(s.substring(prfx.length)) else None
 
+  def try_unsuffix(sffx: String, s: String): Option[String] =
+    if (s.endsWith(sffx)) Some(s.substring(0, s.length - sffx.length)) else None
+
   def trim_line(s: String): String =
     if (s.endsWith("\r\n")) s.substring(0, s.length - 2)
     else if (s.endsWith("\r") || s.endsWith("\n")) s.substring(0, s.length - 1)
--- a/src/Pure/pure_thy.ML	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Pure/pure_thy.ML	Sat Jan 18 19:15:12 2014 +0100
@@ -71,7 +71,7 @@
         "any", "prop'", "num_const", "float_const", "xnum_const", "num_position",
         "float_position", "xnum_position", "index", "struct", "tid_position",
         "tvar_position", "id_position", "longid_position", "var_position", "str_position",
-        "type_name", "class_name"]))
+        "cartouche_position", "type_name", "class_name"]))
   #> Sign.add_syntax_i (map (fn x => (x, typ "'a", NoSyn)) token_markers)
   #> Sign.add_syntax_i
    [("",            typ "prop' => prop",               Delimfix "_"),
@@ -155,6 +155,7 @@
     ("_position",   typ "longid => longid_position",   Delimfix "_"),
     ("_position",   typ "var => var_position",         Delimfix "_"),
     ("_position",   typ "str_token => str_position",   Delimfix "_"),
+    ("_position",   typ "cartouche => cartouche_position", Delimfix "_"),
     ("_type_constraint_", typ "'a",                    NoSyn),
     ("_context_const", typ "id_position => logic",     Delimfix "CONST _"),
     ("_context_const", typ "id_position => aprop",     Delimfix "CONST _"),
--- a/src/Tools/jEdit/etc/options	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Tools/jEdit/etc/options	Sat Jan 18 19:15:12 2014 +0100
@@ -92,6 +92,7 @@
 option var_color : string = "00009BFF"
 option inner_numeral_color : string = "FF0000FF"
 option inner_quoted_color : string = "D2691EFF"
+option inner_cartouche_color : string = "CC6600FF"
 option inner_comment_color : string = "8B0000FF"
 option dynamic_color : string = "7BA428FF"
 
--- a/src/Tools/jEdit/src/rendering.scala	Fri Jan 17 20:51:36 2014 +0100
+++ b/src/Tools/jEdit/src/rendering.scala	Sat Jan 18 19:15:12 2014 +0100
@@ -99,6 +99,7 @@
       Token.Kind.STRING -> LITERAL1,
       Token.Kind.ALT_STRING -> LITERAL2,
       Token.Kind.VERBATIM -> COMMENT3,
+      Token.Kind.CARTOUCHE -> COMMENT4,
       Token.Kind.SPACE -> NULL,
       Token.Kind.COMMENT -> COMMENT1,
       Token.Kind.ERROR -> INVALID
@@ -156,6 +157,7 @@
   val var_color = color_value("var_color")
   val inner_numeral_color = color_value("inner_numeral_color")
   val inner_quoted_color = color_value("inner_quoted_color")
+  val inner_cartouche_color = color_value("inner_cartouche_color")
   val inner_comment_color = color_value("inner_comment_color")
   val dynamic_color = color_value("dynamic_color")
 
@@ -593,6 +595,7 @@
       Markup.BOUND -> bound_color,
       Markup.VAR -> var_color,
       Markup.INNER_STRING -> inner_quoted_color,
+      Markup.INNER_CARTOUCHE -> inner_cartouche_color,
       Markup.INNER_COMMENT -> inner_comment_color,
       Markup.DYNAMIC_FACT -> dynamic_color,
       Markup.ML_KEYWORD -> keyword1_color,