# HG changeset patch # User wenzelm # Date 1474536327 -7200 # Node ID b87784e19a7762643b37cdad8bfd1471cc0da28c # Parent aa1fe1103ab8bad99cca7eaa207d6a00d2a7df6d discontinued raw symbols; discontinued Symbol.source; use initial Symbol.explode; diff -r aa1fe1103ab8 -r b87784e19a77 NEWS --- a/NEWS Thu Sep 22 00:12:17 2016 +0200 +++ b/NEWS Thu Sep 22 11:25:27 2016 +0200 @@ -37,6 +37,8 @@ symbol \<^raw:...>. INCOMPATIBILITY, notably for LaTeXsugar.thy and its derivatives. +* \<^raw:...> symbols are no longer supported. + * New symbol \, e.g. for temporal operator. * Old 'header' command is no longer supported (legacy since diff -r aa1fe1103ab8 -r b87784e19a77 src/Doc/Implementation/ML.thy --- a/src/Doc/Implementation/ML.thy Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Doc/Implementation/ML.thy Thu Sep 22 11:25:27 2016 +0200 @@ -1217,13 +1217,6 @@ \<^enum> a control symbol ``\<^verbatim>\\<^ident>\'', for example ``\<^verbatim>\\<^bold>\'', - \<^enum> a raw symbol ``\<^verbatim>\\\\<^verbatim>\<^raw:\\text\\<^verbatim>\>\'' where \text\ consists of - printable characters excluding ``\<^verbatim>\.\'' and ``\<^verbatim>\>\'', for example - ``\<^verbatim>\\<^raw:$\sum_{i = 1}^n$>\'', - - \<^enum> a numbered raw control symbol ``\<^verbatim>\\\\<^verbatim>\<^raw\\n\\<^verbatim>\>\, where \n\ consists - of digits, for example ``\<^verbatim>\\<^raw42>\''. - The \ident\ syntax for symbol names is \letter (letter | digit)\<^sup>*\, where \letter = A..Za..z\ and \digit = 0..9\. There are infinitely many regular symbols and control symbols, but a fixed collection of standard symbols is @@ -1275,7 +1268,7 @@ \<^descr> Type @{ML_type "Symbol.sym"} is a concrete datatype that represents the different kinds of symbols explicitly, with constructors @{ML "Symbol.Char"}, @{ML "Symbol.UTF8"}, @{ML "Symbol.Sym"}, @{ML - "Symbol.Control"}, @{ML "Symbol.Raw"}, @{ML "Symbol.Malformed"}. + "Symbol.Control"}, @{ML "Symbol.Malformed"}. \<^descr> @{ML "Symbol.decode"} converts the string representation of a symbol into the datatype version. diff -r aa1fe1103ab8 -r b87784e19a77 src/HOL/Tools/Sledgehammer/sledgehammer_util.ML --- a/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/HOL/Tools/Sledgehammer/sledgehammer_util.ML Thu Sep 22 11:25:27 2016 +0200 @@ -125,8 +125,8 @@ val get = maps (Proof_Context.get_fact ctxt o fst) val keywords = Thy_Header.get_keywords' ctxt in - Source.of_string name - |> Symbol.source + Symbol.explode name + |> Source.of_list |> Token.source keywords Position.start |> Token.source_proper |> Source.source Token.stopper (Parse.thms1 >> get) diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/General/symbol.ML --- a/src/Pure/General/symbol.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/General/symbol.ML Thu Sep 22 11:25:27 2016 +0200 @@ -1,5 +1,5 @@ (* Title: Pure/General/symbol.ML - Author: Markus Wenzel, TU Muenchen + Author: Makarius Generalized characters with infinitely many named symbols. *) @@ -7,6 +7,7 @@ signature SYMBOL = sig type symbol = string + val explode: string -> symbol list val spaces: int -> string val STX: symbol val DEL: symbol @@ -41,12 +42,8 @@ val to_ascii_upper: symbol -> symbol val is_ascii_identifier: string -> bool val scan_ascii_id: string list -> string * string list - val is_raw: symbol -> bool - val decode_raw: symbol -> string - val encode_raw: string -> string datatype sym = - Char of string | UTF8 of string | Sym of string | Control of string | Raw of string | - Malformed of string | EOF + Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF val decode: symbol -> sym datatype kind = Letter | Digit | Quasi | Blank | Other val kind: symbol -> kind @@ -58,8 +55,6 @@ val is_quasi_letter: symbol -> bool val is_letdig: symbol -> bool val beginning: int -> symbol list -> string - val source: (string, 'a) Source.source -> (symbol, (string, 'a) Source.source) Source.source - val explode: string -> symbol list val esc: symbol -> string val escape: string -> string val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a @@ -81,11 +76,10 @@ (*Symbols, which are considered the smallest entities of any Isabelle string, may be of the following form: - (1) ASCII symbols: a + (1) ASCII: a + (2) UTF-8: รค (2) regular symbols: \ (3) control symbols: \<^ident> - (4) raw control symbols: \<^raw:...>, where "..." may be any printable - character (excluding ".", ">"), or \<^raw000> Output is subject to the print_mode variable (default: verbatim), actual interpretation in display is up to front-end tools. @@ -191,31 +185,6 @@ val scan_ascii_id = Scan.one is_ascii_letter ^^ (Scan.many is_ascii_letdig >> implode); -(* encode_raw *) - -fun raw_chr c = - is_char c andalso - (ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">" - orelse ord c >= 128); - -fun encode_raw "" = "" - | encode_raw str = - let - val raw0 = enclose "\092<^raw:" ">"; - val raw1 = raw0 o implode; - val raw2 = enclose "\092<^raw" ">" o string_of_int o ord; - - fun encode cs = enc (take_prefix raw_chr cs) - and enc ([], []) = [] - | enc (cs, []) = [raw1 cs] - | enc ([], d :: ds) = raw2 d :: encode ds - | enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds; - in - if exists_string (not o raw_chr) str then implode (encode (raw_explode str)) - else raw0 str - end; - - (* diagnostics *) fun beginning n cs = @@ -230,28 +199,15 @@ end; -(* decode_raw *) - -fun is_raw s = - String.isPrefix "\092<^raw" s andalso String.isSuffix ">" s; - -fun decode_raw s = - if not (is_raw s) then error (malformed_msg s) - else if String.isPrefix "\092<^raw:" s then String.substring (s, 7, size s - 8) - else chr (#1 (Library.read_int (raw_explode (String.substring (s, 6, size s - 7))))); - - (* symbol variants *) datatype sym = - Char of string | UTF8 of string | Sym of string | Control of string | Raw of string | - Malformed of string | EOF; + Char of string | UTF8 of string | Sym of string | Control of string | Malformed of string | EOF; fun decode s = if s = "" then EOF else if is_char s then Char s else if is_utf8 s then UTF8 s - else if is_raw s then Raw (decode_raw s) else if is_malformed s then Malformed s else if is_control s then Control (String.substring (s, 3, size s - 4)) else Sym (String.substring (s, 2, size s - 3)); @@ -429,65 +385,6 @@ fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end; - -(** symbol input **) - -(* source *) - -local - -fun is_plain s = is_ascii s andalso s <> "\^M" andalso s <> "\\"; - -fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192; - -fun implode_pseudo_utf8 (cs as ["\192", c]) = - if ord c < 160 then chr (ord c - 128) else implode cs - | implode_pseudo_utf8 cs = implode cs; - -val scan_encoded_newline = - $$ "\^M" -- $$ "\n" >> K "\n" || - $$ "\^M" >> K "\n"; - -val scan_raw = - Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) || - Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode); - -val scan_total = - Scan.one is_plain || - Scan.one is_utf8 ::: Scan.many is_utf8_trailer >> implode_pseudo_utf8 || - scan_encoded_newline || - ($$ "\\" ^^ $$ "<" ^^ - (($$ "^" ^^ Scan.optional (scan_raw || scan_ascii_id) "" || Scan.optional scan_ascii_id "") ^^ - Scan.optional ($$ ">") "")) || - Scan.one not_eof; - -in - -fun source src = Source.source stopper (Scan.bulk scan_total) src; - -end; - - -(* explode *) - -local - -fun no_explode [] = true - | no_explode ("\\" :: "<" :: _) = false - | no_explode ("\^M" :: _) = false - | no_explode (c :: cs) = is_ascii c andalso no_explode cs; - -in - -fun sym_explode str = - let val chs = raw_explode str in - if no_explode chs then chs - else Source.exhaust (source (Source.of_list chs)) - end; - -end; - - (* escape *) val esc = fn s => @@ -495,7 +392,7 @@ else if is_utf8 s then translate_string (fn c => "\\" ^ string_of_int (ord c)) s else "\\" ^ s; -val escape = implode o map esc o sym_explode; +val escape = implode o map esc o Symbol.explode; @@ -523,12 +420,12 @@ val split_words = scanner "Bad text" (Scan.repeat scan_word >> map_filter I); -val explode_words = split_words o sym_explode; +val explode_words = split_words o Symbol.explode; (* blanks *) -val trim_blanks = sym_explode #> trim is_blank #> implode; +val trim_blanks = Symbol.explode #> trim is_blank #> implode; (* bump string -- treat as base 26 or base 1 numbers *) @@ -539,7 +436,7 @@ | symbolic_end [] = false; fun bump_init str = - if symbolic_end (rev (sym_explode str)) then str ^ "'" + if symbolic_end (rev (Symbol.explode str)) then str ^ "'" else str ^ "a"; fun bump_string str = @@ -551,7 +448,7 @@ then chr (ord s + 1) :: ss else "a" :: s :: ss; - val (ss, qs) = apfst rev (take_suffix is_quasi (sym_explode str)); + val (ss, qs) = apfst rev (take_suffix is_quasi (Symbol.explode str)); val ss' = if symbolic_end ss then "'" :: ss else bump ss; in implode (rev ss' @ qs) end; @@ -574,11 +471,11 @@ val xsymbolsN = "xsymbols"; -fun output s = (s, sym_length (sym_explode s)); +fun output s = (s, sym_length (Symbol.explode s)); (*final declarations of this structure!*) -val explode = sym_explode; +val explode = Symbol.explode; val length = sym_length; end; diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/General/symbol.scala --- a/src/Pure/General/symbol.scala Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/General/symbol.scala Thu Sep 22 11:25:27 2016 +0200 @@ -64,9 +64,7 @@ /* symbol matching */ private val symbol_total = new Regex("""(?xs) - [\ud800-\udbff][\udc00-\udfff] | \r\n | - \\ < (?: \^raw: [\x20-\x7e\u0100-\uffff && [^.>]]* | \^? ([A-Za-z][A-Za-z0-9_']*)? ) >? | - .""") + [\ud800-\udbff][\udc00-\udfff] | \r\n | \\ < \^? ([A-Za-z][A-Za-z0-9_']*)? >? | .""") private def is_plain(c: Char): Boolean = !(c == '\r' || c == '\\' || Character.isHighSurrogate(c)) diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/General/symbol_explode.ML --- a/src/Pure/General/symbol_explode.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/General/symbol_explode.ML Thu Sep 22 11:25:27 2016 +0200 @@ -1,19 +1,12 @@ -(* Title: Pure/General/symbol.ML +(* Title: Pure/General/symbol_explode.ML Author: Makarius -String explode operation for Isabelle symbols. +String explode operation for Isabelle symbols (see also symbol.ML). *) -signature SYMBOL_EXPLODE = -sig - val symbol_explode: string -> string list -end; - -structure Symbol_Explode: SYMBOL_EXPLODE = +structure Symbol: sig val explode: string -> string list end = struct -local - fun is_ascii_letter c = #"A" <= c andalso c <= #"Z" orelse #"a" <= c andalso c <= #"z"; fun is_ascii_letdig c = is_ascii_letter c orelse #"0" <= c andalso c <= #"9" orelse c = #"_" orelse c = #"'"; @@ -22,9 +15,7 @@ fun is_utf8_trailer c = #"\128" <= c andalso c < #"\192"; fun is_utf8_control c = #"\128" <= c andalso c < #"\160"; -in - -fun symbol_explode string = +fun explode string = let fun char i = String.sub (string, i); fun string_range i j = String.substring (string, i, j - i); @@ -36,29 +27,27 @@ fun maybe_char c = maybe (fn c' => c = c'); fun maybe_ascii_id i = if test is_ascii_letter i then many is_ascii_letdig (i + 1) else i; - fun explode i = + fun scan i = if i < n then let val ch = char i in (*encoded newline*) - if ch = #"\^M" then "\n" :: explode (maybe_char #"\n" (i + 1)) + if ch = #"\^M" then "\n" :: scan (maybe_char #"\n" (i + 1)) (*pseudo utf8: encoded ascii control*) else if ch = #"\192" andalso test is_utf8_control (i + 1) andalso not (test is_utf8 (i + 2)) - then chr (Char.ord (char (i + 1)) - 128) :: explode (i + 2) + then chr (Char.ord (char (i + 1)) - 128) :: scan (i + 2) (*utf8*) else if is_utf8 ch then let val j = many is_utf8_trailer (i + 1) - in string_range i j :: explode j end + in string_range i j :: scan j end (*named symbol*) else if ch = #"\\" andalso test (fn c => c = #"<") (i + 1) then let val j = (i + 2) |> maybe_char #"^" |> maybe_ascii_id |> maybe_char #">" - in string_range i j :: explode j end + in string_range i j :: scan j end (*single character*) - else String.str ch :: explode (i + 1) + else String.str ch :: scan (i + 1) end else []; - in explode 0 end; + in scan 0 end; end; - -end; diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/Isar/outer_syntax.ML --- a/src/Pure/Isar/outer_syntax.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/Isar/outer_syntax.ML Thu Sep 22 11:25:27 2016 +0200 @@ -220,17 +220,17 @@ in -fun parse thy pos str = - Source.of_string str - |> Symbol.source - |> Token.source (Thy_Header.get_keywords thy) pos - |> commands_source thy - |> Source.exhaust; +fun parse thy pos = + Symbol.explode + #> Source.of_list + #> Token.source (Thy_Header.get_keywords thy) pos + #> commands_source thy + #> Source.exhaust; -fun parse_tokens thy toks = - Source.of_list toks - |> commands_source thy - |> Source.exhaust; +fun parse_tokens thy = + Source.of_list + #> commands_source thy + #> Source.exhaust; end; diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/Isar/token.ML --- a/src/Pure/Isar/token.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/Isar/token.ML Thu Sep 22 11:25:27 2016 +0200 @@ -650,8 +650,8 @@ (* explode *) fun explode keywords pos = - Source.of_string #> - Symbol.source #> + Symbol.explode #> + Source.of_list #> source keywords pos #> Source.exhaust; diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/ML/ml_lex.ML --- a/src/Pure/ML/ml_lex.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/ML/ml_lex.ML Thu Sep 22 11:25:27 2016 +0200 @@ -351,7 +351,7 @@ Symbol_Pos.source (Position.line 1) src |> Source.source Symbol_Pos.stopper (Scan.recover (Scan.bulk (!!! "bad input" scan_ml)) recover); -val tokenize = Source.of_string #> Symbol.source #> source #> Source.exhaust; +val tokenize = Symbol.explode #> Source.of_list #> source #> Source.exhaust; val read_pos = gen_read false; diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/Thy/latex.ML --- a/src/Pure/Thy/latex.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/Thy/latex.ML Thu Sep 22 11:25:27 2016 +0200 @@ -103,7 +103,6 @@ | Symbol.UTF8 s => s | Symbol.Sym s => if known_sym s then enclose "{\\isasym" "}" s else output_chrs sym | Symbol.Control s => if known_ctrl s then enclose "\\isactrl" " " s else output_chrs sym - | Symbol.Raw _ => error "Bad raw symbol" | Symbol.Malformed s => error (Symbol.malformed_msg s) | Symbol.EOF => error "Bad EOF symbol"); diff -r aa1fe1103ab8 -r b87784e19a77 src/Pure/Thy/thy_header.ML --- a/src/Pure/Thy/thy_header.ML Thu Sep 22 00:12:17 2016 +0200 +++ b/src/Pure/Thy/thy_header.ML Thu Sep 22 11:25:27 2016 +0200 @@ -168,11 +168,10 @@ val header = (Scan.repeat heading -- Parse.command theoryN -- Parse.tags) |-- Parse.!!! args; -fun token_source pos str = - str - |> Source.of_string_limited 8000 - |> Symbol.source - |> Token.source_strict bootstrap_keywords pos; +fun token_source pos = + Symbol.explode + #> Source.of_list + #> Token.source_strict bootstrap_keywords pos; fun read_source pos source = let val res =