| author | wenzelm | 
| Tue, 30 Mar 2010 00:47:52 +0200 | |
| changeset 36016 | 4f5c7a19ebe0 | 
| parent 32738 | 15bb09ca0378 | 
| permissions | -rw-r--r-- | 
| 5825 | 1  | 
(* Title: Pure/Isar/outer_lex.ML  | 
2  | 
Author: Markus Wenzel, TU Muenchen  | 
|
3  | 
||
4  | 
Outer lexical syntax for Isabelle/Isar.  | 
|
5  | 
*)  | 
|
6  | 
||
7  | 
signature OUTER_LEX =  | 
|
8  | 
sig  | 
|
9  | 
datatype token_kind =  | 
|
| 27814 | 10  | 
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |  | 
11  | 
Nat | String | AltString | Verbatim | Space | Comment | InternalValue |  | 
|
12  | 
Malformed | Error of string | Sync | EOF  | 
|
13  | 
datatype value =  | 
|
14  | 
Text of string | Typ of typ | Term of term | Fact of thm list |  | 
|
15  | 
Attribute of morphism -> attribute  | 
|
16  | 
type token  | 
|
| 5825 | 17  | 
val str_of_kind: token_kind -> string  | 
18  | 
val position_of: token -> Position.T  | 
|
| 
27752
 
ea7d573e565f
removed obsolete range_of (already included in position);
 
wenzelm 
parents: 
27747 
diff
changeset
 | 
19  | 
val end_position_of: token -> Position.T  | 
| 5825 | 20  | 
val pos_of: token -> string  | 
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
21  | 
val eof: token  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
22  | 
val is_eof: token -> bool  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
23  | 
val not_eof: token -> bool  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
24  | 
val not_sync: token -> bool  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
25  | 
val stopper: token Scan.stopper  | 
| 23721 | 26  | 
val kind_of: token -> token_kind  | 
| 5825 | 27  | 
val is_kind: token_kind -> token -> bool  | 
| 7026 | 28  | 
val keyword_with: (string -> bool) -> token -> bool  | 
| 16029 | 29  | 
val ident_with: (string -> bool) -> token -> bool  | 
| 5825 | 30  | 
val is_proper: token -> bool  | 
| 9130 | 31  | 
val is_semicolon: token -> bool  | 
| 17069 | 32  | 
val is_comment: token -> bool  | 
| 8580 | 33  | 
val is_begin_ignore: token -> bool  | 
34  | 
val is_end_ignore: token -> bool  | 
|
| 17069 | 35  | 
val is_blank: token -> bool  | 
| 8651 | 36  | 
val is_newline: token -> bool  | 
| 
27747
 
d41abb7bc08a
token: maintain of source, which retains original position information;
 
wenzelm 
parents: 
27733 
diff
changeset
 | 
37  | 
val source_of: token -> string  | 
| 30573 | 38  | 
val source_position_of: token -> Symbol_Pos.text * Position.T  | 
| 27814 | 39  | 
val content_of: token -> string  | 
| 14991 | 40  | 
val unparse: token -> string  | 
| 
23788
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
41  | 
val text_of: token -> string * string  | 
| 27814 | 42  | 
val get_value: token -> value option  | 
43  | 
val map_value: (value -> value) -> token -> token  | 
|
44  | 
val mk_text: string -> token  | 
|
45  | 
val mk_typ: typ -> token  | 
|
46  | 
val mk_term: term -> token  | 
|
47  | 
val mk_fact: thm list -> token  | 
|
48  | 
val mk_attribute: (morphism -> attribute) -> token  | 
|
49  | 
val assignable: token -> token  | 
|
50  | 
val assign: value option -> token -> unit  | 
|
51  | 
val closure: token -> token  | 
|
52  | 
val ident_or_symbolic: string -> bool  | 
|
| 30573 | 53  | 
val !!! : string -> (Symbol_Pos.T list -> 'a) -> Symbol_Pos.T list -> 'a  | 
| 27769 | 54  | 
val source_proper: (token, 'a) Source.source -> (token, (token, 'a) Source.source) Source.source  | 
| 
27835
 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 
wenzelm 
parents: 
27814 
diff
changeset
 | 
55  | 
  val source': {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 30573 | 56  | 
(Symbol_Pos.T, 'a) Source.source -> (token, (Symbol_Pos.T, 'a) Source.source) Source.source  | 
| 
27835
 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 
wenzelm 
parents: 
27814 
diff
changeset
 | 
57  | 
  val source: {do_recover: bool Option.option} -> (unit -> Scan.lexicon * Scan.lexicon) ->
 | 
| 27769 | 58  | 
Position.T -> (Symbol.symbol, 'a) Source.source -> (token,  | 
| 30573 | 59  | 
(Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
60  | 
val read_antiq: Scan.lexicon -> (token list -> 'a * token list) ->  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
61  | 
Symbol_Pos.T list * Position.T -> 'a  | 
| 5825 | 62  | 
end;  | 
63  | 
||
64  | 
structure OuterLex: OUTER_LEX =  | 
|
65  | 
struct  | 
|
66  | 
||
67  | 
(** tokens **)  | 
|
68  | 
||
| 27814 | 69  | 
(* token values *)  | 
70  | 
||
71  | 
(*The value slot assigns an (optional) internal value to a token,  | 
|
72  | 
usually as a side-effect of special scanner setup (see also  | 
|
73  | 
args.ML). Note that an assignable ref designates an intermediate  | 
|
74  | 
state of internalization -- it is NOT meant to persist.*)  | 
|
75  | 
||
76  | 
datatype value =  | 
|
77  | 
Text of string |  | 
|
78  | 
Typ of typ |  | 
|
79  | 
Term of term |  | 
|
80  | 
Fact of thm list |  | 
|
81  | 
Attribute of morphism -> attribute;  | 
|
82  | 
||
83  | 
datatype slot =  | 
|
84  | 
Slot |  | 
|
85  | 
Value of value option |  | 
|
| 32738 | 86  | 
Assignable of value option Unsynchronized.ref;  | 
| 27814 | 87  | 
|
88  | 
||
| 5825 | 89  | 
(* datatype token *)  | 
90  | 
||
91  | 
datatype token_kind =  | 
|
| 27814 | 92  | 
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar |  | 
93  | 
Nat | String | AltString | Verbatim | Space | Comment | InternalValue |  | 
|
94  | 
Malformed | Error of string | Sync | EOF;  | 
|
| 5825 | 95  | 
|
| 30573 | 96  | 
datatype token = Token of (Symbol_Pos.text * Position.range) * (token_kind * string) * slot;  | 
| 5825 | 97  | 
|
98  | 
val str_of_kind =  | 
|
| 7026 | 99  | 
fn Command => "command"  | 
100  | 
| Keyword => "keyword"  | 
|
| 5825 | 101  | 
| Ident => "identifier"  | 
102  | 
| LongIdent => "long identifier"  | 
|
103  | 
| SymIdent => "symbolic identifier"  | 
|
104  | 
| Var => "schematic variable"  | 
|
105  | 
| TypeIdent => "type variable"  | 
|
106  | 
| TypeVar => "schematic type variable"  | 
|
107  | 
| Nat => "number"  | 
|
108  | 
| String => "string"  | 
|
| 
17164
 
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
 
wenzelm 
parents: 
17069 
diff
changeset
 | 
109  | 
| AltString => "back-quoted string"  | 
| 5825 | 110  | 
| Verbatim => "verbatim text"  | 
| 
7682
 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 
wenzelm 
parents: 
7477 
diff
changeset
 | 
111  | 
| Space => "white space"  | 
| 
 
46de8064c93c
added Space, Comment token kinds (keep actual text);
 
wenzelm 
parents: 
7477 
diff
changeset
 | 
112  | 
| Comment => "comment text"  | 
| 27814 | 113  | 
| InternalValue => "internal value"  | 
| 
23729
 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 
wenzelm 
parents: 
23721 
diff
changeset
 | 
114  | 
| Malformed => "malformed symbolic character"  | 
| 
 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 
wenzelm 
parents: 
23721 
diff
changeset
 | 
115  | 
| Error _ => "bad input"  | 
| 
23788
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
116  | 
| Sync => "sync marker"  | 
| 5825 | 117  | 
| EOF => "end-of-file";  | 
118  | 
||
119  | 
||
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
120  | 
(* position *)  | 
| 5825 | 121  | 
|
| 27814 | 122  | 
fun position_of (Token ((_, (pos, _)), _, _)) = pos;  | 
123  | 
fun end_position_of (Token ((_, (_, pos)), _, _)) = pos;  | 
|
| 27663 | 124  | 
|
| 5825 | 125  | 
val pos_of = Position.str_of o position_of;  | 
126  | 
||
127  | 
||
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
128  | 
(* control tokens *)  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
129  | 
|
| 27814 | 130  | 
fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot);
 | 
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
131  | 
val eof = mk_eof Position.none;  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
132  | 
|
| 27814 | 133  | 
fun is_eof (Token (_, (EOF, _), _)) = true  | 
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
134  | 
| is_eof _ = false;  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
135  | 
|
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
136  | 
val not_eof = not o is_eof;  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
137  | 
|
| 27814 | 138  | 
fun not_sync (Token (_, (Sync, _), _)) = false  | 
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
139  | 
| not_sync _ = true;  | 
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
140  | 
|
| 
27752
 
ea7d573e565f
removed obsolete range_of (already included in position);
 
wenzelm 
parents: 
27747 
diff
changeset
 | 
141  | 
val stopper =  | 
| 
 
ea7d573e565f
removed obsolete range_of (already included in position);
 
wenzelm 
parents: 
27747 
diff
changeset
 | 
142  | 
Scan.stopper (fn [] => eof | toks => mk_eof (end_position_of (List.last toks))) is_eof;  | 
| 
27733
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
143  | 
|
| 
 
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
 
wenzelm 
parents: 
27663 
diff
changeset
 | 
144  | 
|
| 5825 | 145  | 
(* kind of token *)  | 
146  | 
||
| 27814 | 147  | 
fun kind_of (Token (_, (k, _), _)) = k;  | 
148  | 
fun is_kind k (Token (_, (k', _), _)) = k = k';  | 
|
| 5825 | 149  | 
|
| 27814 | 150  | 
fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x  | 
| 7026 | 151  | 
| keyword_with _ _ = false;  | 
| 5825 | 152  | 
|
| 27814 | 153  | 
fun ident_with pred (Token (_, (Ident, x), _)) = pred x  | 
| 16029 | 154  | 
| ident_with _ _ = false;  | 
155  | 
||
| 27814 | 156  | 
fun is_proper (Token (_, (Space, _), _)) = false  | 
157  | 
| is_proper (Token (_, (Comment, _), _)) = false  | 
|
| 5825 | 158  | 
| is_proper _ = true;  | 
159  | 
||
| 27814 | 160  | 
fun is_semicolon (Token (_, (Keyword, ";"), _)) = true  | 
| 9130 | 161  | 
| is_semicolon _ = false;  | 
162  | 
||
| 27814 | 163  | 
fun is_comment (Token (_, (Comment, _), _)) = true  | 
| 17069 | 164  | 
| is_comment _ = false;  | 
165  | 
||
| 27814 | 166  | 
fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true  | 
| 8580 | 167  | 
| is_begin_ignore _ = false;  | 
168  | 
||
| 27814 | 169  | 
fun is_end_ignore (Token (_, (Comment, ">"), _)) = true  | 
| 8580 | 170  | 
| is_end_ignore _ = false;  | 
171  | 
||
| 8651 | 172  | 
|
| 17069 | 173  | 
(* blanks and newlines -- space tokens obey lines *)  | 
| 8651 | 174  | 
|
| 27814 | 175  | 
fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x)  | 
| 17069 | 176  | 
| is_blank _ = false;  | 
177  | 
||
| 27814 | 178  | 
fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x  | 
| 8651 | 179  | 
| is_newline _ = false;  | 
180  | 
||
| 5825 | 181  | 
|
| 14991 | 182  | 
(* token content *)  | 
| 9155 | 183  | 
|
| 27814 | 184  | 
fun source_of (Token ((source, (pos, _)), _, _)) =  | 
185  | 
YXML.string_of (XML.Elem (Markup.tokenN, Position.properties_of pos, [XML.Text source]));  | 
|
| 
25642
 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 
wenzelm 
parents: 
25582 
diff
changeset
 | 
186  | 
|
| 27885 | 187  | 
fun source_position_of (Token ((source, (pos, _)), _, _)) = (source, pos);  | 
| 27873 | 188  | 
|
| 27814 | 189  | 
fun content_of (Token (_, (_, x), _)) = x;  | 
| 
27747
 
d41abb7bc08a
token: maintain of source, which retains original position information;
 
wenzelm 
parents: 
27733 
diff
changeset
 | 
190  | 
|
| 
 
d41abb7bc08a
token: maintain of source, which retains original position information;
 
wenzelm 
parents: 
27733 
diff
changeset
 | 
191  | 
|
| 
 
d41abb7bc08a
token: maintain of source, which retains original position information;
 
wenzelm 
parents: 
27733 
diff
changeset
 | 
192  | 
(* unparse *)  | 
| 
 
d41abb7bc08a
token: maintain of source, which retains original position information;
 
wenzelm 
parents: 
27733 
diff
changeset
 | 
193  | 
|
| 18547 | 194  | 
fun escape q =  | 
195  | 
implode o map (fn s => if s = q orelse s = "\\" then "\\" ^ s else s) o Symbol.explode;  | 
|
196  | 
||
| 27814 | 197  | 
fun unparse (Token (_, (kind, x), _)) =  | 
| 14991 | 198  | 
(case kind of  | 
| 18547 | 199  | 
String => x |> quote o escape "\""  | 
200  | 
| AltString => x |> enclose "`" "`" o escape "`"  | 
|
| 14991 | 201  | 
  | Verbatim => x |> enclose "{*" "*}"
 | 
202  | 
| Comment => x |> enclose "(*" "*)"  | 
|
| 
28034
 
33050286e65d
text_of Malformed: explode with spaces -- Output.output/escape ineffective by default;
 
wenzelm 
parents: 
27885 
diff
changeset
 | 
203  | 
| Malformed => space_implode " " (explode x)  | 
| 
23729
 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 
wenzelm 
parents: 
23721 
diff
changeset
 | 
204  | 
| Sync => ""  | 
| 
 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 
wenzelm 
parents: 
23721 
diff
changeset
 | 
205  | 
| EOF => ""  | 
| 14991 | 206  | 
| _ => x);  | 
207  | 
||
| 
23788
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
208  | 
fun text_of tok =  | 
| 
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
209  | 
  if is_semicolon tok then ("terminator", "")
 | 
| 
23729
 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 
wenzelm 
parents: 
23721 
diff
changeset
 | 
210  | 
else  | 
| 
23788
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
211  | 
let  | 
| 
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
212  | 
val k = str_of_kind (kind_of tok);  | 
| 
25642
 
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
 
wenzelm 
parents: 
25582 
diff
changeset
 | 
213  | 
val s = unparse tok  | 
| 27814 | 214  | 
handle ERROR _ => Symbol.separate_chars (content_of tok);  | 
| 
23788
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
215  | 
in  | 
| 
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
216  | 
if s = "" then (k, "")  | 
| 
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
217  | 
else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) then (k ^ " " ^ s, "")  | 
| 
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
218  | 
else (k, s)  | 
| 
 
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
 
wenzelm 
parents: 
23729 
diff
changeset
 | 
219  | 
end;  | 
| 
23729
 
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
 
wenzelm 
parents: 
23721 
diff
changeset
 | 
220  | 
|
| 5825 | 221  | 
|
222  | 
||
| 27814 | 223  | 
(** associated values **)  | 
224  | 
||
225  | 
(* access values *)  | 
|
226  | 
||
227  | 
fun get_value (Token (_, _, Value v)) = v  | 
|
228  | 
| get_value _ = NONE;  | 
|
229  | 
||
230  | 
fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v)))  | 
|
231  | 
| map_value _ tok = tok;  | 
|
232  | 
||
233  | 
||
234  | 
(* make values *)  | 
|
235  | 
||
236  | 
fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v));  | 
|
237  | 
||
238  | 
val mk_text = mk_value "<text>" o Text;  | 
|
239  | 
val mk_typ = mk_value "<typ>" o Typ;  | 
|
240  | 
val mk_term = mk_value "<term>" o Term;  | 
|
241  | 
val mk_fact = mk_value "<fact>" o Fact;  | 
|
242  | 
val mk_attribute = mk_value "<attribute>" o Attribute;  | 
|
243  | 
||
244  | 
||
245  | 
(* static binding *)  | 
|
246  | 
||
247  | 
(*1st stage: make empty slots assignable*)  | 
|
| 32738 | 248  | 
fun assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE))  | 
| 27814 | 249  | 
| assignable tok = tok;  | 
250  | 
||
251  | 
(*2nd stage: assign values as side-effect of scanning*)  | 
|
252  | 
fun assign v (Token (_, _, Assignable r)) = r := v  | 
|
253  | 
| assign _ _ = ();  | 
|
254  | 
||
255  | 
(*3rd stage: static closure of final values*)  | 
|
| 32738 | 256  | 
fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v)  | 
| 27814 | 257  | 
| closure tok = tok;  | 
258  | 
||
259  | 
||
260  | 
||
| 5825 | 261  | 
(** scanners **)  | 
262  | 
||
| 30573 | 263  | 
open Basic_Symbol_Pos;  | 
| 5825 | 264  | 
|
| 30573 | 265  | 
fun !!! msg = Symbol_Pos.!!! ("Outer lexical error: " ^ msg);
 | 
| 5825 | 266  | 
|
267  | 
||
268  | 
(* scan symbolic idents *)  | 
|
269  | 
||
| 20664 | 270  | 
val is_sym_char = member (op =) (explode "!#$%&*+-/<=>?@^_|~");  | 
| 5825 | 271  | 
|
| 8231 | 272  | 
val scan_symid =  | 
| 27769 | 273  | 
Scan.many1 (is_sym_char o symbol) ||  | 
274  | 
Scan.one (Symbol.is_symbolic o symbol) >> single;  | 
|
| 5825 | 275  | 
|
| 8231 | 276  | 
fun is_symid str =  | 
277  | 
(case try Symbol.explode str of  | 
|
| 15531 | 278  | 
SOME [s] => Symbol.is_symbolic s orelse is_sym_char s  | 
279  | 
| SOME ss => forall is_sym_char ss  | 
|
| 8231 | 280  | 
| _ => false);  | 
281  | 
||
| 27814 | 282  | 
fun ident_or_symbolic "begin" = false  | 
283  | 
| ident_or_symbolic ":" = true  | 
|
284  | 
| ident_or_symbolic "::" = true  | 
|
285  | 
| ident_or_symbolic s = Syntax.is_identifier s orelse is_symid s;  | 
|
| 5825 | 286  | 
|
287  | 
||
288  | 
(* scan verbatim text *)  | 
|
289  | 
||
290  | 
val scan_verb =  | 
|
| 27769 | 291  | 
$$$ "*" --| Scan.ahead (~$$$ "}") ||  | 
292  | 
Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single;  | 
|
| 5825 | 293  | 
|
294  | 
val scan_verbatim =  | 
|
| 30573 | 295  | 
  (Symbol_Pos.scan_pos --| $$$ "{" --| $$$ "*") -- !!! "missing end of verbatim text"
 | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
296  | 
(Symbol_Pos.change_prompt  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
297  | 
((Scan.repeat scan_verb >> flat) -- ($$$ "*" |-- $$$ "}" |-- Symbol_Pos.scan_pos)));  | 
| 5825 | 298  | 
|
299  | 
||
300  | 
(* scan space *)  | 
|
301  | 
||
| 19305 | 302  | 
fun is_space s = Symbol.is_blank s andalso s <> "\n";  | 
| 5825 | 303  | 
|
304  | 
val scan_space =  | 
|
| 27769 | 305  | 
Scan.many1 (is_space o symbol) @@@ Scan.optional ($$$ "\n") [] ||  | 
306  | 
Scan.many (is_space o symbol) @@@ $$$ "\n";  | 
|
| 5825 | 307  | 
|
308  | 
||
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
309  | 
(* scan comment *)  | 
| 5825 | 310  | 
|
311  | 
val scan_comment =  | 
|
| 30573 | 312  | 
Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body !!! -- Symbol_Pos.scan_pos);  | 
| 5825 | 313  | 
|
314  | 
||
| 23678 | 315  | 
(* scan malformed symbols *)  | 
316  | 
||
317  | 
val scan_malformed =  | 
|
| 27769 | 318  | 
$$$ Symbol.malformed |--  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
319  | 
Symbol_Pos.change_prompt (Scan.many (Symbol.is_regular o symbol))  | 
| 27769 | 320  | 
--| Scan.option ($$$ Symbol.end_malformed);  | 
| 
27752
 
ea7d573e565f
removed obsolete range_of (already included in position);
 
wenzelm 
parents: 
27747 
diff
changeset
 | 
321  | 
|
| 
 
ea7d573e565f
removed obsolete range_of (already included in position);
 
wenzelm 
parents: 
27747 
diff
changeset
 | 
322  | 
|
| 27663 | 323  | 
|
| 27769 | 324  | 
(** token sources **)  | 
| 5825 | 325  | 
|
| 27769 | 326  | 
fun source_proper src = src |> Source.filter is_proper;  | 
| 5825 | 327  | 
|
| 23678 | 328  | 
local  | 
329  | 
||
| 27769 | 330  | 
fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2;  | 
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
331  | 
|
| 27799 | 332  | 
fun token k ss =  | 
| 30573 | 333  | 
Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.untabify_content ss), Slot);  | 
| 27799 | 334  | 
|
335  | 
fun token_range k (pos1, (ss, pos2)) =  | 
|
| 30573 | 336  | 
Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.untabify_content ss), Slot);  | 
| 23678 | 337  | 
|
| 27769 | 338  | 
fun scan (lex1, lex2) = !!! "bad input"  | 
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
339  | 
(Symbol_Pos.scan_string >> token_range String ||  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
340  | 
Symbol_Pos.scan_alt_string >> token_range AltString ||  | 
| 27799 | 341  | 
scan_verbatim >> token_range Verbatim ||  | 
342  | 
scan_comment >> token_range Comment ||  | 
|
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
343  | 
scan_space >> token Space ||  | 
| 
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
344  | 
scan_malformed >> token Malformed ||  | 
| 
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
345  | 
Scan.one (Symbol.is_sync o symbol) >> (token Sync o single) ||  | 
| 
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
346  | 
(Scan.max token_leq  | 
| 27769 | 347  | 
(Scan.max token_leq  | 
348  | 
(Scan.literal lex2 >> pair Command)  | 
|
349  | 
(Scan.literal lex1 >> pair Keyword))  | 
|
350  | 
(Syntax.scan_longid >> pair LongIdent ||  | 
|
351  | 
Syntax.scan_id >> pair Ident ||  | 
|
352  | 
Syntax.scan_var >> pair Var ||  | 
|
353  | 
Syntax.scan_tid >> pair TypeIdent ||  | 
|
354  | 
Syntax.scan_tvar >> pair TypeVar ||  | 
|
355  | 
Syntax.scan_nat >> pair Nat ||  | 
|
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
356  | 
scan_symid >> pair SymIdent) >> uncurry token));  | 
| 27769 | 357  | 
|
358  | 
fun recover msg =  | 
|
359  | 
Scan.many ((Symbol.is_regular andf (not o Symbol.is_blank)) o symbol)  | 
|
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
360  | 
>> (single o token (Error msg));  | 
| 23678 | 361  | 
|
362  | 
in  | 
|
| 5825 | 363  | 
|
| 
27835
 
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
 
wenzelm 
parents: 
27814 
diff
changeset
 | 
364  | 
fun source' {do_recover} get_lex =
 | 
| 30573 | 365  | 
Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs))  | 
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
366  | 
(Option.map (rpair recover) do_recover);  | 
| 
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
367  | 
|
| 5825 | 368  | 
fun source do_recover get_lex pos src =  | 
| 30573 | 369  | 
Symbol_Pos.source pos src  | 
| 
27780
 
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
 
wenzelm 
parents: 
27769 
diff
changeset
 | 
370  | 
|> source' do_recover get_lex;  | 
| 23678 | 371  | 
|
372  | 
end;  | 
|
| 5825 | 373  | 
|
| 
30586
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
374  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
375  | 
(* read_antiq *)  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
376  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
377  | 
fun read_antiq lex scan (syms, pos) =  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
378  | 
let  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
379  | 
    fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.str_of pos ^ ":\n" ^
 | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
380  | 
      "@{" ^ Symbol_Pos.content syms ^ "}");
 | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
381  | 
|
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
382  | 
val res =  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
383  | 
Source.of_list syms  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
384  | 
      |> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon))
 | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
385  | 
|> source_proper  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
386  | 
|> Source.source stopper (Scan.error (Scan.bulk scan)) NONE  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
387  | 
|> Source.exhaust;  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
388  | 
in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end;  | 
| 
 
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
 
wenzelm 
parents: 
30573 
diff
changeset
 | 
389  | 
|
| 5825 | 390  | 
end;  |