| author | wenzelm | 
| Thu, 04 Jun 2009 17:31:37 +0200 | |
| changeset 31425 | e8d5417a1831 | 
| parent 31013 | 69a476d6fea6 | 
| child 31545 | 5f1f0a20af4d | 
| permissions | -rw-r--r-- | 
| 6118 | 1  | 
(* Title: Pure/General/symbol.ML  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
2  | 
Author: Markus Wenzel, TU Muenchen  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
3  | 
|
| 21897 | 4  | 
Generalized characters with infinitely many named symbols.  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
5  | 
*)  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
6  | 
|
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
7  | 
signature SYMBOL =  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
8  | 
sig  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
9  | 
type symbol  | 
| 26524 | 10  | 
val SOH: symbol  | 
11  | 
val STX: symbol  | 
|
| 
26538
 
d65504ffb47d
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
 
wenzelm 
parents: 
26524 
diff
changeset
 | 
12  | 
val ENQ: symbol  | 
| 
 
d65504ffb47d
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
 
wenzelm 
parents: 
26524 
diff
changeset
 | 
13  | 
val ACK: symbol  | 
| 26524 | 14  | 
val DEL: symbol  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
15  | 
val space: symbol  | 
| 23618 | 16  | 
val spaces: int -> string  | 
| 14678 | 17  | 
val is_char: symbol -> bool  | 
18  | 
val is_symbolic: symbol -> bool  | 
|
19  | 
val is_printable: symbol -> bool  | 
|
| 26632 | 20  | 
val is_utf8_trailer: symbol -> bool  | 
| 31013 | 21  | 
val name_of: symbol -> string  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
22  | 
val eof: symbol  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
23  | 
val is_eof: symbol -> bool  | 
| 27766 | 24  | 
val not_eof: symbol -> bool  | 
| 27732 | 25  | 
val stopper: symbol Scan.stopper  | 
| 14678 | 26  | 
val sync: symbol  | 
27  | 
val is_sync: symbol -> bool  | 
|
28  | 
val malformed: symbol  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
29  | 
val end_malformed: symbol  | 
| 25641 | 30  | 
val separate_chars: string -> string  | 
| 
23784
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23728 
diff
changeset
 | 
31  | 
val is_regular: symbol -> bool  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
32  | 
val is_ascii: symbol -> bool  | 
| 14678 | 33  | 
val is_ascii_letter: symbol -> bool  | 
34  | 
val is_ascii_digit: symbol -> bool  | 
|
| 
24580
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
35  | 
val is_ascii_hex: symbol -> bool  | 
| 14678 | 36  | 
val is_ascii_quasi: symbol -> bool  | 
37  | 
val is_ascii_blank: symbol -> bool  | 
|
| 20200 | 38  | 
val is_ascii_lower: symbol -> bool  | 
39  | 
val is_ascii_upper: symbol -> bool  | 
|
40  | 
val to_ascii_lower: symbol -> symbol  | 
|
41  | 
val to_ascii_upper: symbol -> symbol  | 
|
| 14834 | 42  | 
val is_raw: symbol -> bool  | 
43  | 
val decode_raw: symbol -> string  | 
|
| 
14977
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
44  | 
val encode_raw: string -> string  | 
| 14873 | 45  | 
datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string  | 
46  | 
val decode: symbol -> sym  | 
|
| 14678 | 47  | 
datatype kind = Letter | Digit | Quasi | Blank | Other  | 
48  | 
val kind: symbol -> kind  | 
|
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
49  | 
val is_letter: symbol -> bool  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
50  | 
val is_digit: symbol -> bool  | 
| 12904 | 51  | 
val is_quasi: symbol -> bool  | 
| 14678 | 52  | 
val is_blank: symbol -> bool  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
53  | 
val is_quasi_letter: symbol -> bool  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
54  | 
val is_letdig: symbol -> bool  | 
| 16138 | 55  | 
val is_ident: symbol list -> bool  | 
| 14728 | 56  | 
val beginning: int -> symbol list -> string  | 
| 14678 | 57  | 
val scanner: string -> (string list -> 'a * string list) -> symbol list -> 'a  | 
| 13730 | 58  | 
val scan_id: string list -> string * string list  | 
| 
27835
 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 
wenzelm 
parents: 
27766 
diff
changeset
 | 
59  | 
  val source: {do_recover: bool} -> (string, 'a) Source.source ->
 | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
60  | 
(symbol, (string, 'a) Source.source) Source.source  | 
| 6272 | 61  | 
val explode: string -> symbol list  | 
| 31425 | 62  | 
val esc: symbol -> string  | 
| 
14977
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
63  | 
val escape: string -> string  | 
| 14678 | 64  | 
val strip_blanks: string -> string  | 
65  | 
val bump_init: string -> string  | 
|
| 12904 | 66  | 
val bump_string: string -> string  | 
| 14678 | 67  | 
val length: symbol list -> int  | 
| 6692 | 68  | 
val xsymbolsN: string  | 
| 29324 | 69  | 
val output: string -> output * int  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
70  | 
end;  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
71  | 
|
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
72  | 
structure Symbol: SYMBOL =  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
73  | 
struct  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
74  | 
|
| 14678 | 75  | 
(** type symbol **)  | 
| 6272 | 76  | 
|
| 14678 | 77  | 
(*Symbols, which are considered the smallest entities of any Isabelle  | 
| 6272 | 78  | 
string, may be of the following form:  | 
| 14678 | 79  | 
|
| 14834 | 80  | 
(1) ASCII symbols: a  | 
| 17823 | 81  | 
(2) regular symbols: \<ident>  | 
| 14834 | 82  | 
(3) control symbols: \<^ident>  | 
83  | 
(4) raw control symbols: \<^raw:...>, where "..." may be any printable  | 
|
| 
20205
 
7b2958d3d575
raw symbols: disallow dot to avoid confusion in NameSpace.unpack;
 
wenzelm 
parents: 
20200 
diff
changeset
 | 
84  | 
character (excluding ".", ">"), or \<^raw000>  | 
| 6272 | 85  | 
|
| 14678 | 86  | 
Output is subject to the print_mode variable (default: verbatim),  | 
87  | 
actual interpretation in display is up to front-end tools.  | 
|
| 6272 | 88  | 
*)  | 
89  | 
||
90  | 
type symbol = string;  | 
|
91  | 
||
| 26524 | 92  | 
val SOH = chr 1;  | 
93  | 
val STX = chr 2;  | 
|
| 
26538
 
d65504ffb47d
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
 
wenzelm 
parents: 
26524 
diff
changeset
 | 
94  | 
val ENQ = chr 5;  | 
| 
 
d65504ffb47d
replaced ETX/EOT by ENQ/ACK, which are less likely to be interpreted by tty etc.;
 
wenzelm 
parents: 
26524 
diff
changeset
 | 
95  | 
val ACK = chr 6;  | 
| 26524 | 96  | 
val DEL = chr 127;  | 
97  | 
||
98  | 
val space = chr 32;  | 
|
| 17063 | 99  | 
|
100  | 
local  | 
|
101  | 
val small_spaces = Vector.tabulate (65, fn i => Library.replicate_string i space);  | 
|
102  | 
in  | 
|
103  | 
fun spaces k =  | 
|
104  | 
if k < 64 then Vector.sub (small_spaces, k)  | 
|
105  | 
else Library.replicate_string (k div 64) (Vector.sub (small_spaces, 64)) ^  | 
|
106  | 
Vector.sub (small_spaces, k mod 64);  | 
|
107  | 
end;  | 
|
| 14678 | 108  | 
|
109  | 
fun is_char s = size s = 1;  | 
|
110  | 
||
111  | 
fun is_symbolic s =  | 
|
112  | 
String.isPrefix "\\<" s andalso not (String.isPrefix "\\<^" s);  | 
|
113  | 
||
114  | 
fun is_printable s =  | 
|
115  | 
if is_char s then ord space <= ord s andalso ord s <= ord "~"  | 
|
116  | 
else not (String.isPrefix "\\<^" s);  | 
|
| 6272 | 117  | 
|
| 26632 | 118  | 
fun is_utf8_trailer s = is_char s andalso 128 <= ord s andalso ord s < 192;  | 
119  | 
||
| 6272 | 120  | 
|
| 14678 | 121  | 
(* input source control *)  | 
| 6272 | 122  | 
|
| 14678 | 123  | 
val eof = "";  | 
| 6272 | 124  | 
fun is_eof s = s = eof;  | 
125  | 
fun not_eof s = s <> eof;  | 
|
| 27732 | 126  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
| 6272 | 127  | 
|
| 14678 | 128  | 
val sync = "\\<^sync>";  | 
129  | 
fun is_sync s = s = sync;  | 
|
130  | 
||
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
131  | 
val malformed = "[[";  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
132  | 
val end_malformed = "]]";  | 
| 25641 | 133  | 
|
134  | 
val separate_chars = explode #> space_implode space;  | 
|
135  | 
fun malformed_msg s = "Malformed symbolic character: " ^ quote (separate_chars s);  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
136  | 
|
| 
23784
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23728 
diff
changeset
 | 
137  | 
fun is_regular s =  | 
| 
 
75e6b9dd5336
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23728 
diff
changeset
 | 
138  | 
not_eof s andalso s <> sync andalso s <> malformed andalso s <> end_malformed;  | 
| 14678 | 139  | 
|
| 31013 | 140  | 
fun name_of s = if is_symbolic s  | 
141  | 
then (unsuffix ">" o unprefix "\\<") s  | 
|
142  | 
else error (malformed_msg s);  | 
|
143  | 
||
| 14678 | 144  | 
|
145  | 
(* ascii symbols *)  | 
|
146  | 
||
147  | 
fun is_ascii s = is_char s andalso ord s < 128;  | 
|
148  | 
||
149  | 
fun is_ascii_letter s =  | 
|
150  | 
is_char s andalso  | 
|
151  | 
(ord "A" <= ord s andalso ord s <= ord "Z" orelse  | 
|
152  | 
ord "a" <= ord s andalso ord s <= ord "z");  | 
|
153  | 
||
154  | 
fun is_ascii_digit s =  | 
|
155  | 
is_char s andalso ord "0" <= ord s andalso ord s <= ord "9";  | 
|
156  | 
||
| 
24580
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
157  | 
fun is_ascii_hex s =  | 
| 
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
158  | 
is_char s andalso  | 
| 
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
159  | 
(ord "0" <= ord s andalso ord s <= ord "9" orelse  | 
| 
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
160  | 
ord "A" <= ord s andalso ord s <= ord "F" orelse  | 
| 
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
161  | 
ord "a" <= ord s andalso ord s <= ord "f");  | 
| 
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
162  | 
|
| 14678 | 163  | 
fun is_ascii_quasi "_" = true  | 
164  | 
| is_ascii_quasi "'" = true  | 
|
165  | 
| is_ascii_quasi _ = false;  | 
|
166  | 
||
167  | 
val is_ascii_blank =  | 
|
| 
24580
 
916259859344
replaced Symbol.is_hex_letter to Symbol.is_ascii_hex;
 
wenzelm 
parents: 
23784 
diff
changeset
 | 
168  | 
fn " " => true | "\t" => true | "\n" => true | "\^K" => true | "\^L" => true | "\^M" => true  | 
| 14678 | 169  | 
| _ => false;  | 
170  | 
||
| 20200 | 171  | 
fun is_ascii_lower s = is_char s andalso (ord "a" <= ord s andalso ord s <= ord "z");  | 
172  | 
fun is_ascii_upper s = is_char s andalso (ord "A" <= ord s andalso ord s <= ord "Z");  | 
|
173  | 
||
174  | 
fun to_ascii_lower s = if is_ascii_upper s then chr (ord s + ord "a" - ord "A") else s;  | 
|
175  | 
fun to_ascii_upper s = if is_ascii_lower s then chr (ord s + ord "A" - ord "a") else s;  | 
|
176  | 
||
| 14678 | 177  | 
|
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
178  | 
(* encode_raw *)  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
179  | 
|
| 
20205
 
7b2958d3d575
raw symbols: disallow dot to avoid confusion in NameSpace.unpack;
 
wenzelm 
parents: 
20200 
diff
changeset
 | 
180  | 
fun raw_chr c =  | 
| 
 
7b2958d3d575
raw symbols: disallow dot to avoid confusion in NameSpace.unpack;
 
wenzelm 
parents: 
20200 
diff
changeset
 | 
181  | 
ord space <= ord c andalso ord c <= ord "~" andalso c <> "." andalso c <> ">"  | 
| 17823 | 182  | 
orelse ord c >= 128;  | 
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
183  | 
|
| 29324 | 184  | 
fun encode_raw "" = ""  | 
185  | 
| encode_raw str =  | 
|
186  | 
let  | 
|
187  | 
val raw0 = enclose "\\<^raw:" ">";  | 
|
188  | 
val raw1 = raw0 o implode;  | 
|
189  | 
val raw2 = enclose "\\<^raw" ">" o string_of_int o ord;  | 
|
190  | 
||
191  | 
fun encode cs = enc (Library.take_prefix raw_chr cs)  | 
|
192  | 
and enc ([], []) = []  | 
|
193  | 
| enc (cs, []) = [raw1 cs]  | 
|
194  | 
| enc ([], d :: ds) = raw2 d :: encode ds  | 
|
195  | 
| enc (cs, d :: ds) = raw1 cs :: raw2 d :: encode ds;  | 
|
196  | 
in  | 
|
197  | 
if exists_string (not o raw_chr) str then implode (encode (explode str))  | 
|
198  | 
else raw0 str  | 
|
199  | 
end;  | 
|
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
200  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
201  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
202  | 
(* diagnostics *)  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
203  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
204  | 
fun beginning n cs =  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
205  | 
let  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
206  | 
val drop_blanks = #1 o Library.take_suffix is_ascii_blank;  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
207  | 
val all_cs = drop_blanks cs;  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
208  | 
val dots = if length all_cs > n then " ..." else "";  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
209  | 
in  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
210  | 
(drop_blanks (Library.take (n, all_cs))  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
211  | 
|> map (fn c => if is_ascii_blank c then space else c)  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
212  | 
|> implode) ^ dots  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
213  | 
end;  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
214  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
215  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
216  | 
(* decode_raw *)  | 
| 14834 | 217  | 
|
218  | 
fun is_raw s =  | 
|
| 17063 | 219  | 
String.isPrefix "\\<^raw" s andalso String.isSuffix ">" s;  | 
| 14834 | 220  | 
|
221  | 
fun decode_raw s =  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
222  | 
if not (is_raw s) then error (malformed_msg s)  | 
| 14834 | 223  | 
else if String.isPrefix "\\<^raw:" s then String.substring (s, 7, size s - 8)  | 
224  | 
else chr (#1 (Library.read_int (explode (String.substring (s, 6, size s - 7)))));  | 
|
225  | 
||
226  | 
||
| 14873 | 227  | 
(* symbol variants *)  | 
228  | 
||
229  | 
datatype sym = Char of string | Sym of string | Ctrl of string | Raw of string;  | 
|
230  | 
||
231  | 
fun decode s =  | 
|
232  | 
if is_char s then Char s  | 
|
233  | 
else if is_raw s then Raw (decode_raw s)  | 
|
234  | 
else if String.isPrefix "\\<^" s then Ctrl (String.substring (s, 3, size s - 4))  | 
|
235  | 
else if String.isPrefix "\\<" s then Sym (String.substring (s, 2, size s - 3))  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
236  | 
else error (malformed_msg s);  | 
| 14873 | 237  | 
|
238  | 
||
| 14678 | 239  | 
(* standard symbol kinds *)  | 
240  | 
||
241  | 
datatype kind = Letter | Digit | Quasi | Blank | Other;  | 
|
| 6272 | 242  | 
|
| 
14171
 
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
 
skalberg 
parents: 
13730 
diff
changeset
 | 
243  | 
local  | 
| 14678 | 244  | 
val symbol_kinds = Symtab.make  | 
245  | 
   [("\\<A>", Letter),
 | 
|
246  | 
    ("\\<B>", Letter),
 | 
|
247  | 
    ("\\<C>", Letter),
 | 
|
248  | 
    ("\\<D>", Letter),
 | 
|
249  | 
    ("\\<E>", Letter),
 | 
|
250  | 
    ("\\<F>", Letter),
 | 
|
251  | 
    ("\\<G>", Letter),
 | 
|
252  | 
    ("\\<H>", Letter),
 | 
|
253  | 
    ("\\<I>", Letter),
 | 
|
254  | 
    ("\\<J>", Letter),
 | 
|
255  | 
    ("\\<K>", Letter),
 | 
|
256  | 
    ("\\<L>", Letter),
 | 
|
257  | 
    ("\\<M>", Letter),
 | 
|
258  | 
    ("\\<N>", Letter),
 | 
|
259  | 
    ("\\<O>", Letter),
 | 
|
260  | 
    ("\\<P>", Letter),
 | 
|
261  | 
    ("\\<Q>", Letter),
 | 
|
262  | 
    ("\\<R>", Letter),
 | 
|
263  | 
    ("\\<S>", Letter),
 | 
|
264  | 
    ("\\<T>", Letter),
 | 
|
265  | 
    ("\\<U>", Letter),
 | 
|
266  | 
    ("\\<V>", Letter),
 | 
|
267  | 
    ("\\<W>", Letter),
 | 
|
268  | 
    ("\\<X>", Letter),
 | 
|
269  | 
    ("\\<Y>", Letter),
 | 
|
270  | 
    ("\\<Z>", Letter),
 | 
|
271  | 
    ("\\<a>", Letter),
 | 
|
272  | 
    ("\\<b>", Letter),
 | 
|
273  | 
    ("\\<c>", Letter),
 | 
|
274  | 
    ("\\<d>", Letter),
 | 
|
275  | 
    ("\\<e>", Letter),
 | 
|
276  | 
    ("\\<f>", Letter),
 | 
|
277  | 
    ("\\<g>", Letter),
 | 
|
278  | 
    ("\\<h>", Letter),
 | 
|
279  | 
    ("\\<i>", Letter),
 | 
|
280  | 
    ("\\<j>", Letter),
 | 
|
281  | 
    ("\\<k>", Letter),
 | 
|
282  | 
    ("\\<l>", Letter),
 | 
|
283  | 
    ("\\<m>", Letter),
 | 
|
284  | 
    ("\\<n>", Letter),
 | 
|
285  | 
    ("\\<o>", Letter),
 | 
|
286  | 
    ("\\<p>", Letter),
 | 
|
287  | 
    ("\\<q>", Letter),
 | 
|
288  | 
    ("\\<r>", Letter),
 | 
|
289  | 
    ("\\<s>", Letter),
 | 
|
290  | 
    ("\\<t>", Letter),
 | 
|
291  | 
    ("\\<u>", Letter),
 | 
|
292  | 
    ("\\<v>", Letter),
 | 
|
293  | 
    ("\\<w>", Letter),
 | 
|
294  | 
    ("\\<x>", Letter),
 | 
|
295  | 
    ("\\<y>", Letter),
 | 
|
296  | 
    ("\\<z>", Letter),
 | 
|
297  | 
    ("\\<AA>", Letter),
 | 
|
298  | 
    ("\\<BB>", Letter),
 | 
|
299  | 
    ("\\<CC>", Letter),
 | 
|
300  | 
    ("\\<DD>", Letter),
 | 
|
301  | 
    ("\\<EE>", Letter),
 | 
|
302  | 
    ("\\<FF>", Letter),
 | 
|
303  | 
    ("\\<GG>", Letter),
 | 
|
304  | 
    ("\\<HH>", Letter),
 | 
|
305  | 
    ("\\<II>", Letter),
 | 
|
306  | 
    ("\\<JJ>", Letter),
 | 
|
307  | 
    ("\\<KK>", Letter),
 | 
|
308  | 
    ("\\<LL>", Letter),
 | 
|
309  | 
    ("\\<MM>", Letter),
 | 
|
310  | 
    ("\\<NN>", Letter),
 | 
|
311  | 
    ("\\<OO>", Letter),
 | 
|
312  | 
    ("\\<PP>", Letter),
 | 
|
313  | 
    ("\\<QQ>", Letter),
 | 
|
314  | 
    ("\\<RR>", Letter),
 | 
|
315  | 
    ("\\<SS>", Letter),
 | 
|
316  | 
    ("\\<TT>", Letter),
 | 
|
317  | 
    ("\\<UU>", Letter),
 | 
|
318  | 
    ("\\<VV>", Letter),
 | 
|
319  | 
    ("\\<WW>", Letter),
 | 
|
320  | 
    ("\\<XX>", Letter),
 | 
|
321  | 
    ("\\<YY>", Letter),
 | 
|
322  | 
    ("\\<ZZ>", Letter),
 | 
|
323  | 
    ("\\<aa>", Letter),
 | 
|
324  | 
    ("\\<bb>", Letter),
 | 
|
325  | 
    ("\\<cc>", Letter),
 | 
|
326  | 
    ("\\<dd>", Letter),
 | 
|
327  | 
    ("\\<ee>", Letter),
 | 
|
328  | 
    ("\\<ff>", Letter),
 | 
|
329  | 
    ("\\<gg>", Letter),
 | 
|
330  | 
    ("\\<hh>", Letter),
 | 
|
331  | 
    ("\\<ii>", Letter),
 | 
|
332  | 
    ("\\<jj>", Letter),
 | 
|
333  | 
    ("\\<kk>", Letter),
 | 
|
334  | 
    ("\\<ll>", Letter),
 | 
|
335  | 
    ("\\<mm>", Letter),
 | 
|
336  | 
    ("\\<nn>", Letter),
 | 
|
337  | 
    ("\\<oo>", Letter),
 | 
|
338  | 
    ("\\<pp>", Letter),
 | 
|
339  | 
    ("\\<qq>", Letter),
 | 
|
340  | 
    ("\\<rr>", Letter),
 | 
|
341  | 
    ("\\<ss>", Letter),
 | 
|
342  | 
    ("\\<tt>", Letter),
 | 
|
343  | 
    ("\\<uu>", Letter),
 | 
|
344  | 
    ("\\<vv>", Letter),
 | 
|
345  | 
    ("\\<ww>", Letter),
 | 
|
346  | 
    ("\\<xx>", Letter),
 | 
|
347  | 
    ("\\<yy>", Letter),
 | 
|
348  | 
    ("\\<zz>", Letter),
 | 
|
349  | 
    ("\\<alpha>", Letter),
 | 
|
350  | 
    ("\\<beta>", Letter),
 | 
|
351  | 
    ("\\<gamma>", Letter),
 | 
|
352  | 
    ("\\<delta>", Letter),
 | 
|
353  | 
    ("\\<epsilon>", Letter),
 | 
|
354  | 
    ("\\<zeta>", Letter),
 | 
|
355  | 
    ("\\<eta>", Letter),
 | 
|
356  | 
    ("\\<theta>", Letter),
 | 
|
357  | 
    ("\\<iota>", Letter),
 | 
|
358  | 
    ("\\<kappa>", Letter),
 | 
|
359  | 
    ("\\<lambda>", Other),      (*sic!*)
 | 
|
360  | 
    ("\\<mu>", Letter),
 | 
|
361  | 
    ("\\<nu>", Letter),
 | 
|
362  | 
    ("\\<xi>", Letter),
 | 
|
363  | 
    ("\\<pi>", Letter),
 | 
|
364  | 
    ("\\<rho>", Letter),
 | 
|
365  | 
    ("\\<sigma>", Letter),
 | 
|
366  | 
    ("\\<tau>", Letter),
 | 
|
367  | 
    ("\\<upsilon>", Letter),
 | 
|
368  | 
    ("\\<phi>", Letter),
 | 
|
| 25521 | 369  | 
    ("\\<chi>", Letter),
 | 
| 14678 | 370  | 
    ("\\<psi>", Letter),
 | 
371  | 
    ("\\<omega>", Letter),
 | 
|
372  | 
    ("\\<Gamma>", Letter),
 | 
|
373  | 
    ("\\<Delta>", Letter),
 | 
|
374  | 
    ("\\<Theta>", Letter),
 | 
|
375  | 
    ("\\<Lambda>", Letter),
 | 
|
376  | 
    ("\\<Xi>", Letter),
 | 
|
377  | 
    ("\\<Pi>", Letter),
 | 
|
378  | 
    ("\\<Sigma>", Letter),
 | 
|
379  | 
    ("\\<Upsilon>", Letter),
 | 
|
380  | 
    ("\\<Phi>", Letter),
 | 
|
381  | 
    ("\\<Psi>", Letter),
 | 
|
382  | 
    ("\\<Omega>", Letter),
 | 
|
| 14961 | 383  | 
    ("\\<^isub>", Letter),
 | 
384  | 
    ("\\<^isup>", Letter),
 | 
|
| 14678 | 385  | 
    ("\\<spacespace>", Blank)];
 | 
| 
14171
 
0cab06e3bbd0
Extended the notion of letter and digit, such that now one may use greek,
 
skalberg 
parents: 
13730 
diff
changeset
 | 
386  | 
in  | 
| 14678 | 387  | 
fun kind s =  | 
388  | 
if is_ascii_letter s then Letter  | 
|
389  | 
else if is_ascii_digit s then Digit  | 
|
390  | 
else if is_ascii_quasi s then Quasi  | 
|
391  | 
else if is_ascii_blank s then Blank  | 
|
392  | 
else if is_char s then Other  | 
|
| 18939 | 393  | 
else the_default Other (Symtab.lookup symbol_kinds s);  | 
| 14678 | 394  | 
end;  | 
| 14173 | 395  | 
|
| 14678 | 396  | 
fun is_letter s = kind s = Letter;  | 
397  | 
fun is_digit s = kind s = Digit;  | 
|
398  | 
fun is_quasi s = kind s = Quasi;  | 
|
399  | 
fun is_blank s = kind s = Blank;  | 
|
| 6272 | 400  | 
|
| 14678 | 401  | 
fun is_quasi_letter s = let val k = kind s in k = Letter orelse k = Quasi end;  | 
402  | 
fun is_letdig s = let val k = kind s in k = Letter orelse k = Digit orelse k = Quasi end;  | 
|
| 11010 | 403  | 
|
| 16138 | 404  | 
fun is_ident [] = false  | 
405  | 
| is_ident (c :: cs) = is_letter c andalso forall is_letdig cs;  | 
|
406  | 
||
| 6272 | 407  | 
|
408  | 
||
| 14678 | 409  | 
(** symbol input **)  | 
410  | 
||
411  | 
(* scanning through symbols *)  | 
|
| 6272 | 412  | 
|
| 6640 | 413  | 
fun scanner msg scan chs =  | 
414  | 
let  | 
|
| 15531 | 415  | 
fun message (cs, NONE) = msg ^ ": " ^ quote (beginning 10 cs)  | 
416  | 
| message (cs, SOME msg') = msg ^ ", " ^ msg' ^ ": " ^ quote (beginning 10 cs);  | 
|
| 14961 | 417  | 
val fin_scan = Scan.error (Scan.finite stopper (!! message scan));  | 
| 6640 | 418  | 
in  | 
419  | 
(case fin_scan chs of  | 
|
420  | 
(result, []) => result  | 
|
| 15531 | 421  | 
| (_, rest) => error (message (rest, NONE)))  | 
| 6640 | 422  | 
end;  | 
423  | 
||
| 
21858
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21495 
diff
changeset
 | 
424  | 
val scan_id = Scan.one is_letter ^^ (Scan.many is_letdig >> implode);  | 
| 14678 | 425  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
426  | 
|
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
427  | 
(* source *)  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
428  | 
|
| 14678 | 429  | 
local  | 
| 
14561
 
c53396af770e
* raw control symbols are of the form \<^raw:...> now.
 
schirmer 
parents: 
14559 
diff
changeset
 | 
430  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
431  | 
fun is_plain s = s <> "\^M" andalso s <> "\\" andalso not_eof s;  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
432  | 
|
| 14678 | 433  | 
val scan_encoded_newline =  | 
| 17756 | 434  | 
$$ "\^M" -- $$ "\n" >> K "\n" ||  | 
435  | 
$$ "\^M" >> K "\n" ||  | 
|
| 
14956
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
436  | 
$$ "\\" -- Scan.optional ($$ "\\") "" -- Scan.this_string "<^newline>" >> K "\n";  | 
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
437  | 
|
| 
 
70ec4b8aef8d
prevent looping of error messages involving malformed symbols;
 
wenzelm 
parents: 
14908 
diff
changeset
 | 
438  | 
val scan_raw =  | 
| 
21858
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21495 
diff
changeset
 | 
439  | 
Scan.this_string "raw:" ^^ (Scan.many raw_chr >> implode) ||  | 
| 
 
05f57309170c
avoid conflict with Alice keywords: renamed pack -> implode, unpack -> explode, any -> many, avoided assert;
 
wenzelm 
parents: 
21495 
diff
changeset
 | 
440  | 
Scan.this_string "raw" ^^ (Scan.many1 is_ascii_digit >> implode);  | 
| 14678 | 441  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
442  | 
val scan =  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
443  | 
Scan.one is_plain ||  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
444  | 
scan_encoded_newline ||  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
445  | 
(($$ "\\" --| Scan.optional ($$ "\\") "") ^^ $$ "<" ^^  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
446  | 
    !! (fn (cs, _) => malformed_msg (beginning 10 ("\\" :: "<" :: cs)))
 | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
447  | 
(($$ "^" ^^ (scan_raw || scan_id) || scan_id) ^^ $$ ">")) ||  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
448  | 
Scan.one not_eof;  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
449  | 
|
| 
27745
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
450  | 
val scan_resync =  | 
| 
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
451  | 
Scan.one is_ascii_blank || $$ "\"" || $$ "`" || $$ "\\" ||  | 
| 
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
452  | 
Scan.this_string "(*" || Scan.this_string "*)" ||  | 
| 
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
453  | 
  Scan.this_string "{*" || Scan.this_string "*}";
 | 
| 
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
454  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
455  | 
val recover =  | 
| 27903 | 456  | 
(Scan.this (explode "\\\\<") || Scan.this (explode "\\<")) @@@  | 
| 
27745
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
457  | 
Scan.repeat (Scan.unless scan_resync (Scan.one not_eof))  | 
| 
 
31899d977a89
improved recover: also resync on symbol/comment/verbatim delimiters;
 
wenzelm 
parents: 
27732 
diff
changeset
 | 
458  | 
>> (fn ss => malformed :: ss @ [end_malformed]);  | 
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
459  | 
|
| 14678 | 460  | 
in  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
461  | 
|
| 
27835
 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 
wenzelm 
parents: 
27766 
diff
changeset
 | 
462  | 
fun source {do_recover} src =
 | 
| 
23682
 
cf4773532006
nested source: explicit interactive flag for recover avoids duplicate errors;
 
wenzelm 
parents: 
23676 
diff
changeset
 | 
463  | 
Source.source stopper (Scan.bulk scan)  | 
| 
 
cf4773532006
nested source: explicit interactive flag for recover avoids duplicate errors;
 
wenzelm 
parents: 
23676 
diff
changeset
 | 
464  | 
(if do_recover then SOME (false, K recover) else NONE) src;  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
465  | 
|
| 14678 | 466  | 
end;  | 
467  | 
||
| 
14562
 
980da32f4617
proper handling of lines terminated by CRLF or CR;
 
wenzelm 
parents: 
14561 
diff
changeset
 | 
468  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
469  | 
(* explode *)  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
470  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
471  | 
local  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
472  | 
|
| 
14562
 
980da32f4617
proper handling of lines terminated by CRLF or CR;
 
wenzelm 
parents: 
14561 
diff
changeset
 | 
473  | 
fun no_explode [] = true  | 
| 
 
980da32f4617
proper handling of lines terminated by CRLF or CR;
 
wenzelm 
parents: 
14561 
diff
changeset
 | 
474  | 
  | no_explode ("\\" :: "<" :: _) = false
 | 
| 17756 | 475  | 
  | no_explode ("\^M" :: _) = false
 | 
| 
14562
 
980da32f4617
proper handling of lines terminated by CRLF or CR;
 
wenzelm 
parents: 
14561 
diff
changeset
 | 
476  | 
| no_explode (_ :: cs) = no_explode cs;  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
477  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
478  | 
in  | 
| 
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
479  | 
|
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
480  | 
fun sym_explode str =  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
481  | 
let val chs = explode str in  | 
| 
14562
 
980da32f4617
proper handling of lines terminated by CRLF or CR;
 
wenzelm 
parents: 
14561 
diff
changeset
 | 
482  | 
if no_explode chs then chs  | 
| 
27835
 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 
wenzelm 
parents: 
27766 
diff
changeset
 | 
483  | 
    else Source.exhaust (source {do_recover = false} (Source.of_list chs))
 | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
484  | 
end;  | 
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
485  | 
|
| 
23676
 
ea9b7e9c2301
scan: changed treatment of malformed symbols, passed to next stage;
 
wenzelm 
parents: 
23618 
diff
changeset
 | 
486  | 
end;  | 
| 14994 | 487  | 
|
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
488  | 
|
| 
14977
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
489  | 
(* escape *)  | 
| 
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
490  | 
|
| 31425 | 491  | 
val esc = fn s => if is_char s then s else "\\" ^ s;  | 
492  | 
val escape = implode o map esc o sym_explode;  | 
|
| 
14977
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
493  | 
|
| 
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
494  | 
|
| 14678 | 495  | 
(* blanks *)  | 
496  | 
||
497  | 
fun strip_blanks s =  | 
|
498  | 
sym_explode s  | 
|
499  | 
|> Library.take_prefix is_blank |> #2  | 
|
500  | 
|> Library.take_suffix is_blank |> #1  | 
|
501  | 
|> implode;  | 
|
502  | 
||
503  | 
||
504  | 
(* bump string -- treat as base 26 or base 1 numbers *)  | 
|
505  | 
||
| 15979 | 506  | 
fun symbolic_end (_ :: "\\<^isub>" :: _) = true  | 
507  | 
| symbolic_end (_ :: "\\<^isup>" :: _) = true  | 
|
| 14908 | 508  | 
| symbolic_end (s :: _) = is_symbolic s  | 
509  | 
| symbolic_end [] = false;  | 
|
| 14678 | 510  | 
|
511  | 
fun bump_init str =  | 
|
| 14908 | 512  | 
if symbolic_end (rev (sym_explode str)) then str ^ "'"  | 
| 14678 | 513  | 
else str ^ "a";  | 
| 12904 | 514  | 
|
515  | 
fun bump_string str =  | 
|
516  | 
let  | 
|
517  | 
fun bump [] = ["a"]  | 
|
518  | 
      | bump ("z" :: ss) = "a" :: bump ss
 | 
|
519  | 
| bump (s :: ss) =  | 
|
| 14678 | 520  | 
if is_char s andalso ord "a" <= ord s andalso ord s < ord "z"  | 
| 12904 | 521  | 
then chr (ord s + 1) :: ss  | 
522  | 
else "a" :: s :: ss;  | 
|
| 14678 | 523  | 
|
524  | 
val (ss, qs) = apfst rev (Library.take_suffix is_quasi (sym_explode str));  | 
|
| 14908 | 525  | 
val ss' = if symbolic_end ss then "'" :: ss else bump ss;  | 
| 14678 | 526  | 
in implode (rev ss' @ qs) end;  | 
527  | 
||
| 12904 | 528  | 
|
| 6272 | 529  | 
|
| 29324 | 530  | 
(** symbol output **)  | 
| 
14977
 
77d88064991a
added escape, export encode_raw, default mode now trivial, tuned;
 
wenzelm 
parents: 
14961 
diff
changeset
 | 
531  | 
|
| 29324 | 532  | 
(* length *)  | 
| 6272 | 533  | 
|
| 14678 | 534  | 
fun sym_len s =  | 
| 
24593
 
1547ea587f5a
added some int constraints (ML_Parse.fix_ints not active here);
 
wenzelm 
parents: 
24580 
diff
changeset
 | 
535  | 
if not (is_printable s) then (0: int)  | 
| 14678 | 536  | 
else if String.isPrefix "\\<long" s then 2  | 
537  | 
else if String.isPrefix "\\<Long" s then 2  | 
|
538  | 
else if s = "\\<spacespace>" then 2  | 
|
539  | 
else 1;  | 
|
540  | 
||
| 19473 | 541  | 
fun sym_length ss = fold (fn s => fn n => sym_len s + n) ss 0;  | 
| 14678 | 542  | 
|
| 29324 | 543  | 
|
544  | 
(* print mode *)  | 
|
545  | 
||
546  | 
val xsymbolsN = "xsymbols";  | 
|
547  | 
||
548  | 
fun output s = (s, sym_length (sym_explode s));  | 
|
549  | 
||
550  | 
||
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
551  | 
(*final declarations of this structure!*)  | 
| 29324 | 552  | 
val explode = sym_explode;  | 
| 6272 | 553  | 
val length = sym_length;  | 
| 
6116
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
554  | 
|
| 
 
8ba2f25610f7
files scan.ML, source.ML, symbol.ML, pretty.ML moved to Pure/General;
 
wenzelm 
parents:  
diff
changeset
 | 
555  | 
end;  |