discontinued raw symbols;
authorwenzelm
Thu, 22 Sep 2016 11:25:27 +0200
changeset 63936 b87784e19a77
parent 63935 aa1fe1103ab8
child 63937 45ed7d0aeb6f
discontinued raw symbols; discontinued Symbol.source; use initial Symbol.explode;
NEWS
src/Doc/Implementation/ML.thy
src/HOL/Tools/Sledgehammer/sledgehammer_util.ML
src/Pure/General/symbol.ML
src/Pure/General/symbol.scala
src/Pure/General/symbol_explode.ML
src/Pure/Isar/outer_syntax.ML
src/Pure/Isar/token.ML
src/Pure/ML/ml_lex.ML
src/Pure/Thy/latex.ML
src/Pure/Thy/thy_header.ML
--- a/NEWS	Thu Sep 22 00:12:17 2016 +0200
+++ b/NEWS	Thu Sep 22 11:25:27 2016 +0200
@@ -37,6 +37,8 @@
 symbol \<^raw:...>. INCOMPATIBILITY, notably for LaTeXsugar.thy and its
 derivatives.
 
+* \<^raw:...> symbols are no longer supported.
+
 * New symbol \<circle>, e.g. for temporal operator.
 
 * Old 'header' command is no longer supported (legacy since
--- a/src/Doc/Implementation/ML.thy	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Doc/Implementation/ML.thy	Thu Sep 22 11:25:27 2016 +0200
@@ -1217,13 +1217,6 @@
 
     \<^enum> a control symbol ``\<^verbatim>\<open>\<^ident>\<close>'', for example ``\<^verbatim>\<open>\<^bold>\<close>'',
 
-    \<^enum> a raw symbol ``\<^verbatim>\<open>\\<close>\<^verbatim>\<open><^raw:\<close>\<open>text\<close>\<^verbatim>\<open>>\<close>'' where \<open>text\<close> consists of
-    printable characters excluding ``\<^verbatim>\<open>.\<close>'' and ``\<^verbatim>\<open>>\<close>'', for example
-    ``\<^verbatim>\<open>\<^raw:$\sum_{i = 1}^n$>\<close>'',
-
-    \<^enum> a numbered raw control symbol ``\<^verbatim>\<open>\\<close>\<^verbatim>\<open><^raw\<close>\<open>n\<close>\<^verbatim>\<open>>\<close>, where \<open>n\<close> consists
-    of digits, for example ``\<^verbatim>\<open>\<^raw42>\<close>''.
-
   The \<open>ident\<close> syntax for symbol names is \<open>letter (letter | digit)\<^sup>*\<close>, where
   \<open>letter = A..Za..z\<close> and \<open>digit = 0..9\<close>. There are infinitely many regular
   symbols and control symbols, but a fixed collection of standard symbols is
@@ -1275,7 +1268,7 @@
   \<^descr> Type @{ML_type "Symbol.sym"} is a concrete datatype that represents the
   different kinds of symbols explicitly, with constructors @{ML
   "Symbol.Char"}, @{ML "Symbol.UTF8"}, @{ML "Symbol.Sym"}, @{ML
-  "Symbol.Control"}, @{ML "Symbol.Raw"}, @{ML "Symbol.Malformed"}.
+  "Symbol.Control"}, @{ML "Symbol.Malformed"}.
 
   \<^descr> @{ML "Symbol.decode"} converts the string representation of a symbol into
   the datatype version.
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -125,8 +125,8 @@
     val get = maps (Proof_Context.get_fact ctxt o fst)
     val keywords = Thy_Header.get_keywords' ctxt
   in
-    Source.of_string name
-    |> Symbol.source
+    Symbol.explode name
+    |> Source.of_list
     |> Token.source keywords Position.start
     |> Token.source_proper
     |> Source.source Token.stopper (Parse.thms1 >> get)
--- a/src/Pure/General/symbol.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/General/symbol.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -1,5 +1,5 @@
 (*  Title:      Pure/General/symbol.ML
-    Author:     Markus Wenzel, TU Muenchen
+    Author:     Makarius
 
 Generalized characters with infinitely many named symbols.
 *)
@@ -7,6 +7,7 @@
 signature SYMBOL =
 sig
   type symbol = string
+  val explode: string -> symbol list
   val spaces: int -> string
   val STX: symbol
   val DEL: symbol
@@ -41,12 +42,8 @@
   val to_ascii_upper: symbol -> symbol
   val is_ascii_identifier: string -> bool
   val scan_ascii_id: string list -> string * string list
-  val is_raw: symbol -> bool
-  val decode_raw: symbol -> string
-  val encode_raw: string -> string
   datatype sym =
-    Char of string | UTF8 of string | Sym of string | Control of string | Raw of string |
-    Malformed of string | EOF
+    Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF
   val decode: symbol -> sym
   datatype kind = Letter | Digit | Quasi | Blank | Other
   val kind: symbol -> kind
@@ -58,8 +55,6 @@
   val is_quasi_letter: symbol -> bool
   val is_letdig: symbol -> bool
   val beginning: int -> symbol list -> string
-  val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source
-  val explode: string -> symbol list
   val esc: symbol -> string
   val escape: string -> string
   val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
@@ -81,11 +76,10 @@
 (*Symbols, which are considered the smallest entities of any Isabelle
   string, may be of the following form:
 
-    (1) ASCII symbols: a
+    (1) ASCII: a
+    (2) UTF-8: รค
     (2) regular symbols: \<ident>
     (3) control symbols: \<^ident>
-    (4) raw control symbols: \<^raw:...>, where "..." may be any printable
-        character (excluding ".", ">"), or \<^raw000>
 
   Output is subject to the print_mode variable (default: verbatim),
   actual interpretation in display is up to front-end tools.
@@ -191,31 +185,6 @@
 val scan_ascii_id = Scan.one is_ascii_letter ^^ (Scan.many is_ascii_letdig >> implode);
 
 
-(* encode_raw *)
-
-fun raw_chr c =
-  is_char c andalso
-    (ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
-      orelse ord c >= 128);
-
-fun encode_raw "" = ""
-  | encode_raw str =
-      let
-        val raw0 = enclose "\092<^raw:" ">";
-        val raw1 = raw0 o implode;
-        val raw2 = enclose "\092<^raw" ">" o string_of_int o ord;
-
-        fun encode cs = enc (take_prefix raw_chr cs)
-        and enc ([], []) = []
-          | enc (cs, []) = [raw1 cs]
-          | enc ([], d :: ds) = raw2 d :: encode ds
-          | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
-      in
-        if exists_string (not o raw_chr) str then implode (encode (raw_explode str))
-        else raw0 str
-      end;
-
-
 (* diagnostics *)
 
 fun beginning n cs =
@@ -230,28 +199,15 @@
   end;
 
 
-(* decode_raw *)
-
-fun is_raw s =
-  String.isPrefix "\092<^raw" s andalso String.isSuffix ">" s;
-
-fun decode_raw s =
-  if not (is_raw s) then error (malformed_msg s)
-  else if String.isPrefix "\092<^raw:" s then String.substring (s, 7, size s - 8)
-  else chr (#1 (Library.read_int (raw_explode (String.substring (s, 6, size s - 7)))));
-
-
 (* symbol variants *)
 
 datatype sym =
-  Char of string | UTF8 of string | Sym of string | Control of string | Raw of string |
-  Malformed of string | EOF;
+  Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF;
 
 fun decode s =
   if s = "" then EOF
   else if is_char s then Char s
   else if is_utf8 s then UTF8 s
-  else if is_raw s then Raw (decode_raw s)
   else if is_malformed s then Malformed s
   else if is_control s then Control (String.substring (s, 3, size s - 4))
   else Sym (String.substring (s, 2, size s - 3));
@@ -429,65 +385,6 @@
 fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
 
 
-
-(** symbol input **)
-
-(* source *)
-
-local
-
-fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\";
-
-fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
-
-fun implode_pseudo_utf8 (cs as ["\192", c]) =
-      if ord c < 160 then chr (ord c - 128) else implode cs
-  | implode_pseudo_utf8 cs = implode cs;
-
-val scan_encoded_newline =
-  $$ "\^M" -- $$ "\n" >> K "\n" ||
-  $$ "\^M" >> K "\n";
-
-val scan_raw =
-  Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
-  Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
-
-val scan_total =
-  Scan.one is_plain ||
-  Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
-  scan_encoded_newline ||
-  ($$ "\\" ^^ $$ "<" ^^
-    (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^
-      Scan.optional ($$ ">") "")) ||
-  Scan.one not_eof;
-
-in
-
-fun source src = Source.source stopper (Scan.bulk scan_total) src;
-
-end;
-
-
-(* explode *)
-
-local
-
-fun no_explode [] = true
-  | no_explode ("\\" :: "<" :: _) = false
-  | no_explode ("\^M" :: _) = false
-  | no_explode (c :: cs) = is_ascii c andalso no_explode cs;
-
-in
-
-fun sym_explode str =
-  let val chs = raw_explode str in
-    if no_explode chs then chs
-    else Source.exhaust (source (Source.of_list chs))
-  end;
-
-end;
-
-
 (* escape *)
 
 val esc = fn s =>
@@ -495,7 +392,7 @@
   else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s
   else "\\" ^ s;
 
-val escape = implode o map esc o sym_explode;
+val escape = implode o map esc o Symbol.explode;
 
 
 
@@ -523,12 +420,12 @@
 
 val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I);
 
-val explode_words = split_words o sym_explode;
+val explode_words = split_words o Symbol.explode;
 
 
 (* blanks *)
 
-val trim_blanks = sym_explode #> trim is_blank #> implode;
+val trim_blanks = Symbol.explode #> trim is_blank #> implode;
 
 
 (* bump string -- treat as base 26 or base 1 numbers *)
@@ -539,7 +436,7 @@
   | symbolic_end [] = false;
 
 fun bump_init str =
-  if symbolic_end (rev (sym_explode str)) then str ^ "'"
+  if symbolic_end (rev (Symbol.explode str)) then str ^ "'"
   else str ^ "a";
 
 fun bump_string str =
@@ -551,7 +448,7 @@
           then chr (ord s + 1) :: ss
           else "a" :: s :: ss;
 
-    val (ss, qs) = apfst rev (take_suffix is_quasi (sym_explode str));
+    val (ss, qs) = apfst rev (take_suffix is_quasi (Symbol.explode str));
     val ss' = if symbolic_end ss then "'" :: ss else bump ss;
   in implode (rev ss' @ qs) end;
 
@@ -574,11 +471,11 @@
 
 val xsymbolsN = "xsymbols";
 
-fun output s = (s, sym_length (sym_explode s));
+fun output s = (s, sym_length (Symbol.explode s));
 
 
 (*final declarations of this structure!*)
-val explode = sym_explode;
+val explode = Symbol.explode;
 val length = sym_length;
 
 end;
--- a/src/Pure/General/symbol.scala	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/General/symbol.scala	Thu Sep 22 11:25:27 2016 +0200
@@ -64,9 +64,7 @@
   /* symbol matching */
 
   private val symbol_total = new Regex("""(?xs)
-    [\ud800-\udbff][\udc00-\udfff] | \r\n |
-    \\ < (?: \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | \^? ([A-Za-z][A-Za-z0-9_']*)? ) >? |
-    .""")
+    [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
 
   private def is_plain(c: Char): Boolean =
     !(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
--- a/src/Pure/General/symbol_explode.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/General/symbol_explode.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -1,19 +1,12 @@
-(*  Title:      Pure/General/symbol.ML
+(*  Title:      Pure/General/symbol_explode.ML
     Author:     Makarius
 
-String explode operation for Isabelle symbols.
+String explode operation for Isabelle symbols (see also symbol.ML).
 *)
 
-signature SYMBOL_EXPLODE =
-sig
-  val symbol_explode: string -> string list
-end;
-
-structure Symbol_Explode: SYMBOL_EXPLODE =
+structure Symbol: sig val explode: string -> string list end =
 struct
 
-local
-
 fun is_ascii_letter c = #"A" <= c andalso c <= #"Z" orelse #"a" <= c andalso c <= #"z";
 fun is_ascii_letdig c =
   is_ascii_letter c orelse #"0" <= c andalso c <= #"9" orelse c = #"_" orelse c = #"'";
@@ -22,9 +15,7 @@
 fun is_utf8_trailer c = #"\128" <= c andalso c < #"\192";
 fun is_utf8_control c = #"\128" <= c andalso c < #"\160";
 
-in
-
-fun symbol_explode string =
+fun explode string =
   let
     fun char i = String.sub (string, i);
     fun string_range i j = String.substring (string, i, j - i);
@@ -36,29 +27,27 @@
     fun maybe_char c = maybe (fn c' => c = c');
     fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i;
 
-    fun explode i =
+    fun scan i =
       if i < n then
         let val ch = char i in
           (*encoded newline*)
-          if ch = #"\^M" then "\n" :: explode (maybe_char #"\n" (i + 1))
+          if ch = #"\^M" then "\n" :: scan (maybe_char #"\n" (i + 1))
           (*pseudo utf8: encoded ascii control*)
           else if ch = #"\192" andalso
             test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2))
-          then chr (Char.ord (char (i + 1)) - 128) :: explode (i + 2)
+          then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2)
           (*utf8*)
           else if is_utf8 ch then
             let val j = many is_utf8_trailer (i + 1)
-            in string_range i j :: explode j end
+            in string_range i j :: scan j end
           (*named symbol*)
           else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then
             let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">"
-            in string_range i j :: explode j end
+            in string_range i j :: scan j end
           (*single character*)
-          else String.str ch :: explode (i + 1)
+          else String.str ch :: scan (i + 1)
         end
       else [];
-  in explode 0 end;
+  in scan 0 end;
 
 end;
-
-end;
--- a/src/Pure/Isar/outer_syntax.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Isar/outer_syntax.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -220,17 +220,17 @@
 
 in
 
-fun parse thy pos str =
-  Source.of_string str
-  |> Symbol.source
-  |> Token.source (Thy_Header.get_keywords thy) pos
-  |> commands_source thy
-  |> Source.exhaust;
+fun parse thy pos =
+  Symbol.explode
+  #> Source.of_list
+  #> Token.source (Thy_Header.get_keywords thy) pos
+  #> commands_source thy
+  #> Source.exhaust;
 
-fun parse_tokens thy toks =
-  Source.of_list toks
-  |> commands_source thy
-  |> Source.exhaust;
+fun parse_tokens thy =
+  Source.of_list
+  #> commands_source thy
+  #> Source.exhaust;
 
 end;
 
--- a/src/Pure/Isar/token.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Isar/token.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -650,8 +650,8 @@
 (* explode *)
 
 fun explode keywords pos =
-  Source.of_string #>
-  Symbol.source #>
+  Symbol.explode #>
+  Source.of_list #>
   source keywords pos #>
   Source.exhaust;
 
--- a/src/Pure/ML/ml_lex.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/ML/ml_lex.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -351,7 +351,7 @@
   Symbol_Pos.source (Position.line 1) src
   |> Source.source Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!! "bad input" scan_ml)) recover);
 
-val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
+val tokenize = Symbol.explode #> Source.of_list #> source #> Source.exhaust;
 
 val read_pos = gen_read false;
 
--- a/src/Pure/Thy/latex.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Thy/latex.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -103,7 +103,6 @@
   | Symbol.UTF8 s => s
   | Symbol.Sym s => if known_sym s then enclose "{\\isasym" "}" s else output_chrs sym
   | Symbol.Control s => if known_ctrl s then enclose "\\isactrl" " " s else output_chrs sym
-  | Symbol.Raw _ => error "Bad raw symbol"
   | Symbol.Malformed s => error (Symbol.malformed_msg s)
   | Symbol.EOF => error "Bad EOF symbol");
 
--- a/src/Pure/Thy/thy_header.ML	Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Thy/thy_header.ML	Thu Sep 22 11:25:27 2016 +0200
@@ -168,11 +168,10 @@
 val header =
   (Scan.repeat heading -- Parse.command theoryN -- Parse.tags) |-- Parse.!!! args;
 
-fun token_source pos str =
-  str
-  |> Source.of_string_limited 8000
-  |> Symbol.source
-  |> Token.source_strict bootstrap_keywords pos;
+fun token_source pos =
+  Symbol.explode
+  #> Source.of_list
+  #> Token.source_strict bootstrap_keywords pos;
 
 fun read_source pos source =
   let val res =