discontinued raw symbols;
discontinued Symbol.source;
use initial Symbol.explode;
--- a/NEWS Thu Sep 22 00:12:17 2016 +0200
+++ b/NEWS Thu Sep 22 11:25:27 2016 +0200
@@ -37,6 +37,8 @@
symbol \<^raw:...>. INCOMPATIBILITY, notably for LaTeXsugar.thy and its
derivatives.
+* \<^raw:...> symbols are no longer supported.
+
* New symbol \<circle>, e.g. for temporal operator.
* Old 'header' command is no longer supported (legacy since
--- a/src/Doc/Implementation/ML.thy Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Doc/Implementation/ML.thy Thu Sep 22 11:25:27 2016 +0200
@@ -1217,13 +1217,6 @@
\<^enum> a control symbol ``\<^verbatim>\<open>\<^ident>\<close>'', for example ``\<^verbatim>\<open>\<^bold>\<close>'',
- \<^enum> a raw symbol ``\<^verbatim>\<open>\\<close>\<^verbatim>\<open><^raw:\<close>\<open>text\<close>\<^verbatim>\<open>>\<close>'' where \<open>text\<close> consists of
- printable characters excluding ``\<^verbatim>\<open>.\<close>'' and ``\<^verbatim>\<open>>\<close>'', for example
- ``\<^verbatim>\<open>\<^raw:$\sum_{i = 1}^n$>\<close>'',
-
- \<^enum> a numbered raw control symbol ``\<^verbatim>\<open>\\<close>\<^verbatim>\<open><^raw\<close>\<open>n\<close>\<^verbatim>\<open>>\<close>, where \<open>n\<close> consists
- of digits, for example ``\<^verbatim>\<open>\<^raw42>\<close>''.
-
The \<open>ident\<close> syntax for symbol names is \<open>letter (letter | digit)\<^sup>*\<close>, where
\<open>letter = A..Za..z\<close> and \<open>digit = 0..9\<close>. There are infinitely many regular
symbols and control symbols, but a fixed collection of standard symbols is
@@ -1275,7 +1268,7 @@
\<^descr> Type @{ML_type "Symbol.sym"} is a concrete datatype that represents the
different kinds of symbols explicitly, with constructors @{ML
"Symbol.Char"}, @{ML "Symbol.UTF8"}, @{ML "Symbol.Sym"}, @{ML
- "Symbol.Control"}, @{ML "Symbol.Raw"}, @{ML "Symbol.Malformed"}.
+ "Symbol.Control"}, @{ML "Symbol.Malformed"}.
\<^descr> @{ML "Symbol.decode"} converts the string representation of a symbol into
the datatype version.
--- a/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML Thu Sep 22 11:25:27 2016 +0200
@@ -125,8 +125,8 @@
val get = maps (Proof_Context.get_fact ctxt o fst)
val keywords = Thy_Header.get_keywords' ctxt
in
- Source.of_string name
- |> Symbol.source
+ Symbol.explode name
+ |> Source.of_list
|> Token.source keywords Position.start
|> Token.source_proper
|> Source.source Token.stopper (Parse.thms1 >> get)
--- a/src/Pure/General/symbol.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/General/symbol.ML Thu Sep 22 11:25:27 2016 +0200
@@ -1,5 +1,5 @@
(* Title: Pure/General/symbol.ML
- Author: Markus Wenzel, TU Muenchen
+ Author: Makarius
Generalized characters with infinitely many named symbols.
*)
@@ -7,6 +7,7 @@
signature SYMBOL =
sig
type symbol = string
+ val explode: string -> symbol list
val spaces: int -> string
val STX: symbol
val DEL: symbol
@@ -41,12 +42,8 @@
val to_ascii_upper: symbol -> symbol
val is_ascii_identifier: string -> bool
val scan_ascii_id: string list -> string * string list
- val is_raw: symbol -> bool
- val decode_raw: symbol -> string
- val encode_raw: string -> string
datatype sym =
- Char of string | UTF8 of string | Sym of string | Control of string | Raw of string |
- Malformed of string | EOF
+ Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF
val decode: symbol -> sym
datatype kind = Letter | Digit | Quasi | Blank | Other
val kind: symbol -> kind
@@ -58,8 +55,6 @@
val is_quasi_letter: symbol -> bool
val is_letdig: symbol -> bool
val beginning: int -> symbol list -> string
- val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source
- val explode: string -> symbol list
val esc: symbol -> string
val escape: string -> string
val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a
@@ -81,11 +76,10 @@
(*Symbols, which are considered the smallest entities of any Isabelle
string, may be of the following form:
- (1) ASCII symbols: a
+ (1) ASCII: a
+ (2) UTF-8: รค
(2) regular symbols: \<ident>
(3) control symbols: \<^ident>
- (4) raw control symbols: \<^raw:...>, where "..." may be any printable
- character (excluding ".", ">"), or \<^raw000>
Output is subject to the print_mode variable (default: verbatim),
actual interpretation in display is up to front-end tools.
@@ -191,31 +185,6 @@
val scan_ascii_id = Scan.one is_ascii_letter ^^ (Scan.many is_ascii_letdig >> implode);
-(* encode_raw *)
-
-fun raw_chr c =
- is_char c andalso
- (ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"
- orelse ord c >= 128);
-
-fun encode_raw "" = ""
- | encode_raw str =
- let
- val raw0 = enclose "\092<^raw:" ">";
- val raw1 = raw0 o implode;
- val raw2 = enclose "\092<^raw" ">" o string_of_int o ord;
-
- fun encode cs = enc (take_prefix raw_chr cs)
- and enc ([], []) = []
- | enc (cs, []) = [raw1 cs]
- | enc ([], d :: ds) = raw2 d :: encode ds
- | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;
- in
- if exists_string (not o raw_chr) str then implode (encode (raw_explode str))
- else raw0 str
- end;
-
-
(* diagnostics *)
fun beginning n cs =
@@ -230,28 +199,15 @@
end;
-(* decode_raw *)
-
-fun is_raw s =
- String.isPrefix "\092<^raw" s andalso String.isSuffix ">" s;
-
-fun decode_raw s =
- if not (is_raw s) then error (malformed_msg s)
- else if String.isPrefix "\092<^raw:" s then String.substring (s, 7, size s - 8)
- else chr (#1 (Library.read_int (raw_explode (String.substring (s, 6, size s - 7)))));
-
-
(* symbol variants *)
datatype sym =
- Char of string | UTF8 of string | Sym of string | Control of string | Raw of string |
- Malformed of string | EOF;
+ Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF;
fun decode s =
if s = "" then EOF
else if is_char s then Char s
else if is_utf8 s then UTF8 s
- else if is_raw s then Raw (decode_raw s)
else if is_malformed s then Malformed s
else if is_control s then Control (String.substring (s, 3, size s - 4))
else Sym (String.substring (s, 2, size s - 3));
@@ -429,65 +385,6 @@
fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;
-
-(** symbol input **)
-
-(* source *)
-
-local
-
-fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\";
-
-fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;
-
-fun implode_pseudo_utf8 (cs as ["\192", c]) =
- if ord c < 160 then chr (ord c - 128) else implode cs
- | implode_pseudo_utf8 cs = implode cs;
-
-val scan_encoded_newline =
- $$ "\^M" -- $$ "\n" >> K "\n" ||
- $$ "\^M" >> K "\n";
-
-val scan_raw =
- Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||
- Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);
-
-val scan_total =
- Scan.one is_plain ||
- Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 ||
- scan_encoded_newline ||
- ($$ "\\" ^^ $$ "<" ^^
- (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^
- Scan.optional ($$ ">") "")) ||
- Scan.one not_eof;
-
-in
-
-fun source src = Source.source stopper (Scan.bulk scan_total) src;
-
-end;
-
-
-(* explode *)
-
-local
-
-fun no_explode [] = true
- | no_explode ("\\" :: "<" :: _) = false
- | no_explode ("\^M" :: _) = false
- | no_explode (c :: cs) = is_ascii c andalso no_explode cs;
-
-in
-
-fun sym_explode str =
- let val chs = raw_explode str in
- if no_explode chs then chs
- else Source.exhaust (source (Source.of_list chs))
- end;
-
-end;
-
-
(* escape *)
val esc = fn s =>
@@ -495,7 +392,7 @@
else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s
else "\\" ^ s;
-val escape = implode o map esc o sym_explode;
+val escape = implode o map esc o Symbol.explode;
@@ -523,12 +420,12 @@
val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I);
-val explode_words = split_words o sym_explode;
+val explode_words = split_words o Symbol.explode;
(* blanks *)
-val trim_blanks = sym_explode #> trim is_blank #> implode;
+val trim_blanks = Symbol.explode #> trim is_blank #> implode;
(* bump string -- treat as base 26 or base 1 numbers *)
@@ -539,7 +436,7 @@
| symbolic_end [] = false;
fun bump_init str =
- if symbolic_end (rev (sym_explode str)) then str ^ "'"
+ if symbolic_end (rev (Symbol.explode str)) then str ^ "'"
else str ^ "a";
fun bump_string str =
@@ -551,7 +448,7 @@
then chr (ord s + 1) :: ss
else "a" :: s :: ss;
- val (ss, qs) = apfst rev (take_suffix is_quasi (sym_explode str));
+ val (ss, qs) = apfst rev (take_suffix is_quasi (Symbol.explode str));
val ss' = if symbolic_end ss then "'" :: ss else bump ss;
in implode (rev ss' @ qs) end;
@@ -574,11 +471,11 @@
val xsymbolsN = "xsymbols";
-fun output s = (s, sym_length (sym_explode s));
+fun output s = (s, sym_length (Symbol.explode s));
(*final declarations of this structure!*)
-val explode = sym_explode;
+val explode = Symbol.explode;
val length = sym_length;
end;
--- a/src/Pure/General/symbol.scala Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/General/symbol.scala Thu Sep 22 11:25:27 2016 +0200
@@ -64,9 +64,7 @@
/* symbol matching */
private val symbol_total = new Regex("""(?xs)
- [\ud800-\udbff][\udc00-\udfff] | \r\n |
- \\ < (?: \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | \^? ([A-Za-z][A-Za-z0-9_']*)? ) >? |
- .""")
+ [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""")
private def is_plain(c: Char): Boolean =
!(c == '\r' || c == '\\' || Character.isHighSurrogate(c))
--- a/src/Pure/General/symbol_explode.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/General/symbol_explode.ML Thu Sep 22 11:25:27 2016 +0200
@@ -1,19 +1,12 @@
-(* Title: Pure/General/symbol.ML
+(* Title: Pure/General/symbol_explode.ML
Author: Makarius
-String explode operation for Isabelle symbols.
+String explode operation for Isabelle symbols (see also symbol.ML).
*)
-signature SYMBOL_EXPLODE =
-sig
- val symbol_explode: string -> string list
-end;
-
-structure Symbol_Explode: SYMBOL_EXPLODE =
+structure Symbol: sig val explode: string -> string list end =
struct
-local
-
fun is_ascii_letter c = #"A" <= c andalso c <= #"Z" orelse #"a" <= c andalso c <= #"z";
fun is_ascii_letdig c =
is_ascii_letter c orelse #"0" <= c andalso c <= #"9" orelse c = #"_" orelse c = #"'";
@@ -22,9 +15,7 @@
fun is_utf8_trailer c = #"\128" <= c andalso c < #"\192";
fun is_utf8_control c = #"\128" <= c andalso c < #"\160";
-in
-
-fun symbol_explode string =
+fun explode string =
let
fun char i = String.sub (string, i);
fun string_range i j = String.substring (string, i, j - i);
@@ -36,29 +27,27 @@
fun maybe_char c = maybe (fn c' => c = c');
fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i;
- fun explode i =
+ fun scan i =
if i < n then
let val ch = char i in
(*encoded newline*)
- if ch = #"\^M" then "\n" :: explode (maybe_char #"\n" (i + 1))
+ if ch = #"\^M" then "\n" :: scan (maybe_char #"\n" (i + 1))
(*pseudo utf8: encoded ascii control*)
else if ch = #"\192" andalso
test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2))
- then chr (Char.ord (char (i + 1)) - 128) :: explode (i + 2)
+ then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2)
(*utf8*)
else if is_utf8 ch then
let val j = many is_utf8_trailer (i + 1)
- in string_range i j :: explode j end
+ in string_range i j :: scan j end
(*named symbol*)
else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then
let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">"
- in string_range i j :: explode j end
+ in string_range i j :: scan j end
(*single character*)
- else String.str ch :: explode (i + 1)
+ else String.str ch :: scan (i + 1)
end
else [];
- in explode 0 end;
+ in scan 0 end;
end;
-
-end;
--- a/src/Pure/Isar/outer_syntax.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Isar/outer_syntax.ML Thu Sep 22 11:25:27 2016 +0200
@@ -220,17 +220,17 @@
in
-fun parse thy pos str =
- Source.of_string str
- |> Symbol.source
- |> Token.source (Thy_Header.get_keywords thy) pos
- |> commands_source thy
- |> Source.exhaust;
+fun parse thy pos =
+ Symbol.explode
+ #> Source.of_list
+ #> Token.source (Thy_Header.get_keywords thy) pos
+ #> commands_source thy
+ #> Source.exhaust;
-fun parse_tokens thy toks =
- Source.of_list toks
- |> commands_source thy
- |> Source.exhaust;
+fun parse_tokens thy =
+ Source.of_list
+ #> commands_source thy
+ #> Source.exhaust;
end;
--- a/src/Pure/Isar/token.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Isar/token.ML Thu Sep 22 11:25:27 2016 +0200
@@ -650,8 +650,8 @@
(* explode *)
fun explode keywords pos =
- Source.of_string #>
- Symbol.source #>
+ Symbol.explode #>
+ Source.of_list #>
source keywords pos #>
Source.exhaust;
--- a/src/Pure/ML/ml_lex.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/ML/ml_lex.ML Thu Sep 22 11:25:27 2016 +0200
@@ -351,7 +351,7 @@
Symbol_Pos.source (Position.line 1) src
|> Source.source Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!! "bad input" scan_ml)) recover);
-val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust;
+val tokenize = Symbol.explode #> Source.of_list #> source #> Source.exhaust;
val read_pos = gen_read false;
--- a/src/Pure/Thy/latex.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Thy/latex.ML Thu Sep 22 11:25:27 2016 +0200
@@ -103,7 +103,6 @@
| Symbol.UTF8 s => s
| Symbol.Sym s => if known_sym s then enclose "{\\isasym" "}" s else output_chrs sym
| Symbol.Control s => if known_ctrl s then enclose "\\isactrl" " " s else output_chrs sym
- | Symbol.Raw _ => error "Bad raw symbol"
| Symbol.Malformed s => error (Symbol.malformed_msg s)
| Symbol.EOF => error "Bad EOF symbol");
--- a/src/Pure/Thy/thy_header.ML Thu Sep 22 00:12:17 2016 +0200
+++ b/src/Pure/Thy/thy_header.ML Thu Sep 22 11:25:27 2016 +0200
@@ -168,11 +168,10 @@
val header =
(Scan.repeat heading -- Parse.command theoryN -- Parse.tags) |-- Parse.!!! args;
-fun token_source pos str =
- str
- |> Source.of_string_limited 8000
- |> Symbol.source
- |> Token.source_strict bootstrap_keywords pos;
+fun token_source pos =
+ Symbol.explode
+ #> Source.of_list
+ #> Token.source_strict bootstrap_keywords pos;
fun read_source pos source =
let val res =