author | wenzelm |
Thu, 05 Mar 2015 13:28:04 +0100 | |
changeset 59616 | eb59c6968219 |
parent 59125 | ee19c92ae8b4 |
child 59646 | 48d400469bcb |
permissions | -rw-r--r-- |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
1 |
(* Title: Pure/Isar/token.ML |
5825 | 2 |
Author: Markus Wenzel, TU Muenchen |
3 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
4 |
Outer token syntax for Isabelle/Isar. |
5825 | 5 |
*) |
6 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
7 |
signature TOKEN = |
5825 | 8 |
sig |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
9 |
datatype kind = |
59081 | 10 |
(*immediate source*) |
11 |
Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat | |
|
12 |
Float | Space | |
|
13 |
(*delimited content*) |
|
14 |
String | Alt_String | Verbatim | Cartouche | Comment | |
|
15 |
(*special content*) |
|
16 |
Error of string | Internal_Value | EOF |
|
58012 | 17 |
val str_of_kind: kind -> string |
55788
67699e08e969
store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing;
wenzelm
parents:
55750
diff
changeset
|
18 |
type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T} |
58012 | 19 |
type T |
20 |
type src |
|
27814 | 21 |
datatype value = |
58012 | 22 |
Source of src | |
57944
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
23 |
Literal of bool * Markup.T | |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
24 |
Name of string * morphism | |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
25 |
Typ of typ | |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
26 |
Term of term | |
57942
e5bec882fdd0
more informative Token.Fact: retain name of dynamic fact (without selection);
wenzelm
parents:
56202
diff
changeset
|
27 |
Fact of string option * thm list | |
57944
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
28 |
Attribute of morphism -> attribute | |
58017 | 29 |
Declaration of declaration | |
57944
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
30 |
Files of file Exn.result list |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
31 |
val name0: string -> value |
55708 | 32 |
val pos_of: T -> Position.T |
55709
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
33 |
val range_of: T list -> Position.range |
58012 | 34 |
val src: xstring * Position.T -> T list -> src |
35 |
val name_of_src: src -> string * Position.T |
|
36 |
val args_of_src: src -> T list |
|
37 |
val range_of_src: src -> Position.T |
|
38 |
val check_src: Proof.context -> 'a Name_Space.table -> src -> src * 'a |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
39 |
val eof: T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
40 |
val is_eof: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
41 |
val not_eof: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
42 |
val stopper: T Scan.stopper |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
43 |
val kind_of: T -> kind |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
44 |
val is_kind: kind -> T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
45 |
val keyword_with: (string -> bool) -> T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
46 |
val ident_with: (string -> bool) -> T -> bool |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
47 |
val is_command: T -> bool |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
48 |
val is_name: T -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
49 |
val is_proper: T -> bool |
51266
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
wenzelm
parents:
50239
diff
changeset
|
50 |
val is_improper: T -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
51 |
val is_comment: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
52 |
val is_begin_ignore: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
53 |
val is_end_ignore: T -> bool |
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
54 |
val is_error: T -> bool |
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
55 |
val is_space: T -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
56 |
val is_blank: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
57 |
val is_newline: T -> bool |
55111 | 58 |
val inner_syntax_of: T -> string |
59064 | 59 |
val source_position_of: T -> Input.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
60 |
val content_of: T -> string |
56202 | 61 |
val keyword_markup: bool * Markup.T -> string -> Markup.T |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
62 |
val completion_report: T -> Position.report_text list |
59125 | 63 |
val reports: Keyword.keywords -> T -> Position.report_text list |
64 |
val markups: Keyword.keywords -> T -> Markup.T list |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
65 |
val unparse: T -> string |
58012 | 66 |
val unparse_src: src -> string list |
55745 | 67 |
val print: T -> string |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
68 |
val text_of: T -> string * string |
54520
cee77d2e9582
release file errors at runtime: Command.eval instead of Command.read;
wenzelm
parents:
54519
diff
changeset
|
69 |
val get_files: T -> file Exn.result list |
cee77d2e9582
release file errors at runtime: Command.eval instead of Command.read;
wenzelm
parents:
54519
diff
changeset
|
70 |
val put_files: file Exn.result list -> T -> T |
58025 | 71 |
val make_value: string -> value -> T |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
72 |
val get_value: T -> value option |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
73 |
val map_value: (value -> value) -> T -> T |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
74 |
val reports_of_value: T -> Position.report list |
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
75 |
val transform: morphism -> T -> T |
58012 | 76 |
val transform_src: morphism -> src -> src |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
77 |
val init_assignable: T -> T |
58012 | 78 |
val init_assignable_src: src -> src |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
79 |
val assign: value option -> T -> unit |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
80 |
val closure: T -> T |
58012 | 81 |
val closure_src: src -> src |
82 |
val pretty_value: Proof.context -> T -> Pretty.T |
|
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
83 |
val pretty_src: Proof.context -> src -> Pretty.T |
27814 | 84 |
val ident_or_symbolic: string -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
85 |
val source_proper: (T, 'a) Source.source -> (T, (T, 'a) Source.source) Source.source |
58904
f49c4f883c58
eliminated pointless dynamic keywords (TTY legacy);
wenzelm
parents:
58903
diff
changeset
|
86 |
val source: Keyword.keywords -> |
58864 | 87 |
Position.T -> (Symbol.symbol, 'a) Source.source -> (T, |
88 |
(Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source |
|
58904
f49c4f883c58
eliminated pointless dynamic keywords (TTY legacy);
wenzelm
parents:
58903
diff
changeset
|
89 |
val source_strict: Keyword.keywords -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
90 |
Position.T -> (Symbol.symbol, 'a) Source.source -> (T, |
30573 | 91 |
(Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source |
59083 | 92 |
val explode: Keyword.keywords -> Position.T -> string -> T list |
59085
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
93 |
val make: (int * int) * string -> Position.T -> T * Position.T |
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
94 |
type 'a parser = T list -> 'a * T list |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
95 |
type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list) |
58903 | 96 |
val read_no_commands: Keyword.keywords -> 'a parser -> Symbol_Pos.T list -> 'a list |
97 |
val read_antiq: Keyword.keywords -> 'a parser -> Symbol_Pos.T list * Position.T -> 'a |
|
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
98 |
val syntax_generic: 'a context_parser -> src -> Context.generic -> 'a * Context.generic |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
99 |
val syntax: 'a context_parser -> src -> Proof.context -> 'a * Proof.context |
5825 | 100 |
end; |
101 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
102 |
structure Token: TOKEN = |
5825 | 103 |
struct |
104 |
||
105 |
(** tokens **) |
|
106 |
||
58012 | 107 |
(* token kind *) |
5825 | 108 |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
109 |
datatype kind = |
59081 | 110 |
(*immediate source*) |
111 |
Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat | |
|
112 |
Float | Space | |
|
113 |
(*delimited content*) |
|
114 |
String | Alt_String | Verbatim | Cartouche | Comment | |
|
115 |
(*special content*) |
|
116 |
Error of string | Internal_Value | EOF; |
|
5825 | 117 |
|
118 |
val str_of_kind = |
|
7026 | 119 |
fn Command => "command" |
120 |
| Keyword => "keyword" |
|
5825 | 121 |
| Ident => "identifier" |
59081 | 122 |
| Long_Ident => "long identifier" |
123 |
| Sym_Ident => "symbolic identifier" |
|
5825 | 124 |
| Var => "schematic variable" |
59081 | 125 |
| Type_Ident => "type variable" |
126 |
| Type_Var => "schematic type variable" |
|
40290
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38229
diff
changeset
|
127 |
| Nat => "natural number" |
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38229
diff
changeset
|
128 |
| Float => "floating-point number" |
59081 | 129 |
| Space => "white space" |
55103 | 130 |
| String => "quoted string" |
59081 | 131 |
| Alt_String => "back-quoted string" |
5825 | 132 |
| Verbatim => "verbatim text" |
55103 | 133 |
| Cartouche => "text cartouche" |
7682
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
134 |
| Comment => "comment text" |
59081 | 135 |
| Internal_Value => "internal value" |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
136 |
| Error _ => "bad input" |
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
48905
diff
changeset
|
137 |
| EOF => "end-of-input"; |
5825 | 138 |
|
59085
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
139 |
val immediate_kinds = |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
140 |
Vector.fromList |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
141 |
[Command, Keyword, Ident, Long_Ident, Sym_Ident, Var, Type_Ident, Type_Var, Nat, Float, Space]; |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
142 |
|
59081 | 143 |
val delimited_kind = member (op =) [String, Alt_String, Verbatim, Cartouche, Comment]; |
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
144 |
|
5825 | 145 |
|
58012 | 146 |
(* datatype token *) |
147 |
||
148 |
(*The value slot assigns an (optional) internal value to a token, |
|
149 |
usually as a side-effect of special scanner setup (see also |
|
150 |
args.ML). Note that an assignable ref designates an intermediate |
|
151 |
state of internalization -- it is NOT meant to persist.*) |
|
152 |
||
153 |
type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}; |
|
154 |
||
155 |
datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot |
|
156 |
||
157 |
and src = |
|
158 |
Src of |
|
159 |
{name: string * Position.T, |
|
160 |
args: T list, |
|
161 |
output_info: (string * Markup.T) option} |
|
162 |
||
163 |
and slot = |
|
164 |
Slot | |
|
165 |
Value of value option | |
|
166 |
Assignable of value option Unsynchronized.ref |
|
167 |
||
168 |
and value = |
|
169 |
Source of src | |
|
170 |
Literal of bool * Markup.T | |
|
171 |
Name of string * morphism | |
|
172 |
Typ of typ | |
|
173 |
Term of term | |
|
174 |
Fact of string option * thm list | (*optional name for dynamic fact, i.e. fact "variable"*) |
|
175 |
Attribute of morphism -> attribute | |
|
58017 | 176 |
Declaration of declaration | |
58012 | 177 |
Files of file Exn.result list; |
178 |
||
179 |
fun name0 a = Name (a, Morphism.identity); |
|
180 |
||
181 |
||
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
182 |
(* position *) |
5825 | 183 |
|
55708 | 184 |
fun pos_of (Token ((_, (pos, _)), _, _)) = pos; |
185 |
fun end_pos_of (Token ((_, (_, pos)), _, _)) = pos; |
|
27663 | 186 |
|
55709
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
187 |
fun range_of (toks as tok :: _) = |
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
188 |
let val pos' = end_pos_of (List.last toks) |
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
189 |
in Position.range (pos_of tok) pos' end |
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
190 |
| range_of [] = Position.no_range; |
5825 | 191 |
|
192 |
||
58012 | 193 |
(* src *) |
194 |
||
195 |
fun src name args = Src {name = name, args = args, output_info = NONE}; |
|
196 |
||
197 |
fun map_args f (Src {name, args, output_info}) = |
|
198 |
Src {name = name, args = map f args, output_info = output_info}; |
|
199 |
||
200 |
fun name_of_src (Src {name, ...}) = name; |
|
201 |
fun args_of_src (Src {args, ...}) = args; |
|
202 |
||
203 |
fun range_of_src (Src {name = (_, pos), args, ...}) = |
|
204 |
if null args then pos |
|
205 |
else Position.set_range (pos, #2 (range_of args)); |
|
206 |
||
207 |
fun check_src ctxt table (Src {name = (xname, pos), args, output_info = _}) = |
|
208 |
let |
|
209 |
val (name, x) = Name_Space.check (Context.Proof ctxt) table (xname, pos); |
|
210 |
val space = Name_Space.space_of_table table; |
|
211 |
val kind = Name_Space.kind_of space; |
|
212 |
val markup = Name_Space.markup space name; |
|
213 |
in (Src {name = (name, pos), args = args, output_info = SOME (kind, markup)}, x) end; |
|
214 |
||
215 |
||
58855 | 216 |
(* stopper *) |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
217 |
|
27814 | 218 |
fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot); |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
219 |
val eof = mk_eof Position.none; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
220 |
|
27814 | 221 |
fun is_eof (Token (_, (EOF, _), _)) = true |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
222 |
| is_eof _ = false; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
223 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
224 |
val not_eof = not o is_eof; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
225 |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
226 |
val stopper = |
55708 | 227 |
Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
228 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
229 |
|
5825 | 230 |
(* kind of token *) |
231 |
||
27814 | 232 |
fun kind_of (Token (_, (k, _), _)) = k; |
233 |
fun is_kind k (Token (_, (k', _), _)) = k = k'; |
|
5825 | 234 |
|
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
235 |
val is_command = is_kind Command; |
59123
e68e44836d04
imitate command markup and rendering of Isabelle/jEdit in HTML output;
wenzelm
parents:
59112
diff
changeset
|
236 |
|
59081 | 237 |
val is_name = is_kind Ident orf is_kind Sym_Ident orf is_kind String orf is_kind Nat; |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
238 |
|
27814 | 239 |
fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x |
7026 | 240 |
| keyword_with _ _ = false; |
5825 | 241 |
|
27814 | 242 |
fun ident_with pred (Token (_, (Ident, x), _)) = pred x |
16029 | 243 |
| ident_with _ _ = false; |
244 |
||
27814 | 245 |
fun is_proper (Token (_, (Space, _), _)) = false |
246 |
| is_proper (Token (_, (Comment, _), _)) = false |
|
5825 | 247 |
| is_proper _ = true; |
248 |
||
51266
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
wenzelm
parents:
50239
diff
changeset
|
249 |
val is_improper = not o is_proper; |
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
wenzelm
parents:
50239
diff
changeset
|
250 |
|
27814 | 251 |
fun is_comment (Token (_, (Comment, _), _)) = true |
17069 | 252 |
| is_comment _ = false; |
253 |
||
27814 | 254 |
fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true |
8580 | 255 |
| is_begin_ignore _ = false; |
256 |
||
27814 | 257 |
fun is_end_ignore (Token (_, (Comment, ">"), _)) = true |
8580 | 258 |
| is_end_ignore _ = false; |
259 |
||
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
260 |
fun is_error (Token (_, (Error _, _), _)) = true |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
261 |
| is_error _ = false; |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
262 |
|
8651 | 263 |
|
17069 | 264 |
(* blanks and newlines -- space tokens obey lines *) |
8651 | 265 |
|
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
266 |
fun is_space (Token (_, (Space, _), _)) = true |
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
267 |
| is_space _ = false; |
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
268 |
|
27814 | 269 |
fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) |
17069 | 270 |
| is_blank _ = false; |
271 |
||
27814 | 272 |
fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x |
8651 | 273 |
| is_newline _ = false; |
274 |
||
5825 | 275 |
|
14991 | 276 |
(* token content *) |
9155 | 277 |
|
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
278 |
fun inner_syntax_of (Token ((source, (pos, _)), (kind, x), _)) = |
43731
70072780e095
inner syntax supports inlined YXML according to Term_XML (particularly useful for producing text under program control);
wenzelm
parents:
43709
diff
changeset
|
279 |
if YXML.detect x then x |
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
280 |
else |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
281 |
let |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
282 |
val delimited = delimited_kind kind; |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
283 |
val tree = XML.Elem (Markup.token delimited (Position.properties_of pos), [XML.Text source]); |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
284 |
in YXML.string_of tree end; |
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
285 |
|
58978
e42da880c61e
more position information, e.g. relevant for errors in generated ML source;
wenzelm
parents:
58904
diff
changeset
|
286 |
fun source_position_of (Token ((source, range), (kind, _), _)) = |
59064 | 287 |
Input.source (delimited_kind kind) source range; |
27873 | 288 |
|
27814 | 289 |
fun content_of (Token (_, (_, x), _)) = x; |
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
290 |
|
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
291 |
|
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
292 |
(* markup reports *) |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
293 |
|
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
294 |
local |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
295 |
|
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
296 |
val token_kind_markup = |
59124 | 297 |
fn Var => (Markup.var, "") |
59081 | 298 |
| Type_Ident => (Markup.tfree, "") |
299 |
| Type_Var => (Markup.tvar, "") |
|
300 |
| String => (Markup.string, "") |
|
301 |
| Alt_String => (Markup.alt_string, "") |
|
302 |
| Verbatim => (Markup.verbatim, "") |
|
303 |
| Cartouche => (Markup.cartouche, "") |
|
304 |
| Comment => (Markup.comment, "") |
|
305 |
| Error msg => (Markup.bad, msg) |
|
59124 | 306 |
| _ => (Markup.empty, ""); |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
307 |
|
59125 | 308 |
fun keyword_reports tok = map (fn markup => ((pos_of tok, markup), "")); |
59123
e68e44836d04
imitate command markup and rendering of Isabelle/jEdit in HTML output;
wenzelm
parents:
59112
diff
changeset
|
309 |
|
59125 | 310 |
fun command_markups keywords x = |
311 |
if Keyword.is_theory_end keywords x then [Markup.keyword2] |
|
312 |
else if Keyword.is_proof_asm keywords x then [Markup.keyword3] |
|
313 |
else if Keyword.is_improper keywords x then [Markup.keyword1, Markup.improper] |
|
314 |
else [Markup.keyword1]; |
|
59123
e68e44836d04
imitate command markup and rendering of Isabelle/jEdit in HTML output;
wenzelm
parents:
59112
diff
changeset
|
315 |
|
56063 | 316 |
in |
317 |
||
56202 | 318 |
fun keyword_markup (important, keyword) x = |
319 |
if important orelse Symbol.is_ascii_identifier x then keyword else Markup.delimiter; |
|
55919
2eb8c13339a5
more explicit quasi_keyword markup, for Args.$$$ material, which is somewhere in between of outer and inner syntax;
wenzelm
parents:
55916
diff
changeset
|
320 |
|
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
321 |
fun completion_report tok = |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
322 |
if is_kind Keyword tok |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
323 |
then map (fn m => ((pos_of tok, m), "")) (Completion.suppress_abbrevs (content_of tok)) |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
324 |
else []; |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
325 |
|
59125 | 326 |
fun reports keywords tok = |
59123
e68e44836d04
imitate command markup and rendering of Isabelle/jEdit in HTML output;
wenzelm
parents:
59112
diff
changeset
|
327 |
if is_command tok then |
59125 | 328 |
keyword_reports tok (command_markups keywords (content_of tok)) |
59123
e68e44836d04
imitate command markup and rendering of Isabelle/jEdit in HTML output;
wenzelm
parents:
59112
diff
changeset
|
329 |
else if is_kind Keyword tok then |
59125 | 330 |
keyword_reports tok [keyword_markup (false, Markup.keyword2) (content_of tok)] |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
331 |
else |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
332 |
let val (m, text) = token_kind_markup (kind_of tok) |
59125 | 333 |
in [((pos_of tok, m), text)] end; |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
334 |
|
59125 | 335 |
fun markups keywords = map (#2 o #1) o reports keywords; |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
336 |
|
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
337 |
end; |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
338 |
|
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
339 |
|
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
340 |
(* unparse *) |
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
341 |
|
27814 | 342 |
fun unparse (Token (_, (kind, x), _)) = |
14991 | 343 |
(case kind of |
43773 | 344 |
String => Symbol_Pos.quote_string_qq x |
59081 | 345 |
| Alt_String => Symbol_Pos.quote_string_bq x |
43773 | 346 |
| Verbatim => enclose "{*" "*}" x |
55033 | 347 |
| Cartouche => cartouche x |
43773 | 348 |
| Comment => enclose "(*" "*)" x |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
349 |
| EOF => "" |
14991 | 350 |
| _ => x); |
351 |
||
58012 | 352 |
fun unparse_src (Src {args, ...}) = map unparse args; |
353 |
||
59125 | 354 |
fun print tok = Markup.markups (markups Keyword.empty_keywords tok) (unparse tok); |
55745 | 355 |
|
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
356 |
fun text_of tok = |
58861
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
357 |
let |
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
358 |
val k = str_of_kind (kind_of tok); |
59125 | 359 |
val ms = markups Keyword.empty_keywords tok; |
58861
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
360 |
val s = unparse tok; |
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
361 |
in |
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
362 |
if s = "" then (k, "") |
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
363 |
else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) |
59125 | 364 |
then (k ^ " " ^ Markup.markups ms s, "") |
365 |
else (k, Markup.markups ms s) |
|
58861
5ff61774df11
command-line terminator ";" is no longer accepted;
wenzelm
parents:
58855
diff
changeset
|
366 |
end; |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
367 |
|
5825 | 368 |
|
369 |
||
27814 | 370 |
(** associated values **) |
371 |
||
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
372 |
(* inlined file content *) |
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
373 |
|
54519
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
374 |
fun get_files (Token (_, _, Value (SOME (Files files)))) = files |
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
375 |
| get_files _ = []; |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
376 |
|
54519
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
377 |
fun put_files [] tok = tok |
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
378 |
| put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files))) |
55708 | 379 |
| put_files _ tok = raise Fail ("Cannot put inlined files here" ^ Position.here (pos_of tok)); |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
380 |
|
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
381 |
|
27814 | 382 |
(* access values *) |
383 |
||
58025 | 384 |
fun make_value name v = |
59081 | 385 |
Token ((name, Position.no_range), (Internal_Value, name), Value (SOME v)); |
58025 | 386 |
|
27814 | 387 |
fun get_value (Token (_, _, Value v)) = v |
388 |
| get_value _ = NONE; |
|
389 |
||
390 |
fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) |
|
391 |
| map_value _ tok = tok; |
|
392 |
||
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
393 |
fun reports_of_value tok = |
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
394 |
(case get_value tok of |
56063 | 395 |
SOME (Literal markup) => |
396 |
let |
|
397 |
val pos = pos_of tok; |
|
398 |
val x = content_of tok; |
|
399 |
in |
|
400 |
if Position.is_reported pos then |
|
401 |
map (pair pos) (keyword_markup markup x :: Completion.suppress_abbrevs x) |
|
402 |
else [] |
|
403 |
end |
|
404 |
| _ => []); |
|
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
405 |
|
27814 | 406 |
|
58012 | 407 |
(* transform *) |
408 |
||
409 |
fun transform phi = |
|
410 |
map_value (fn v => |
|
411 |
(case v of |
|
412 |
Source src => Source (transform_src phi src) |
|
413 |
| Literal _ => v |
|
414 |
| Name (a, psi) => Name (a, psi $> phi) |
|
415 |
| Typ T => Typ (Morphism.typ phi T) |
|
416 |
| Term t => Term (Morphism.term phi t) |
|
417 |
| Fact (a, ths) => Fact (a, Morphism.fact phi ths) |
|
418 |
| Attribute att => Attribute (Morphism.transform phi att) |
|
58017 | 419 |
| Declaration decl => Declaration (Morphism.transform phi decl) |
58012 | 420 |
| Files _ => v)) |
421 |
and transform_src phi = map_args (transform phi); |
|
422 |
||
423 |
||
424 |
(* static binding *) |
|
425 |
||
426 |
(*1st stage: initialize assignable slots*) |
|
427 |
fun init_assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) |
|
428 |
| init_assignable (tok as Token (_, _, Assignable r)) = (r := NONE; tok) |
|
429 |
| init_assignable tok = tok; |
|
430 |
||
431 |
val init_assignable_src = map_args init_assignable; |
|
432 |
||
433 |
(*2nd stage: assign values as side-effect of scanning*) |
|
434 |
fun assign v (Token (_, _, Assignable r)) = r := v |
|
435 |
| assign _ _ = (); |
|
436 |
||
437 |
(*3rd stage: static closure of final values*) |
|
438 |
fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) |
|
439 |
| closure tok = tok; |
|
440 |
||
441 |
val closure_src = map_args closure; |
|
442 |
||
443 |
||
444 |
(* pretty *) |
|
57944
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
445 |
|
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
446 |
fun pretty_value ctxt tok = |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
447 |
(case get_value tok of |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
448 |
SOME (Literal markup) => |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
449 |
let val x = content_of tok |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
450 |
in Pretty.mark_str (keyword_markup markup x, x) end |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
451 |
| SOME (Name (a, _)) => Pretty.str (quote a) |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
452 |
| SOME (Typ T) => Syntax.pretty_typ ctxt T |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
453 |
| SOME (Term t) => Syntax.pretty_term ctxt t |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
454 |
| SOME (Fact (_, ths)) => |
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
455 |
Pretty.enclose "(" ")" (Pretty.breaks (map (Pretty.backquote o Display.pretty_thm ctxt) ths)) |
59125 | 456 |
| _ => Pretty.marks_str (markups Keyword.empty_keywords tok, unparse tok)); |
57944
fff8d328da56
more informative Token.Name with history of morphisms;
wenzelm
parents:
57942
diff
changeset
|
457 |
|
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
458 |
fun pretty_src ctxt src = |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
459 |
let |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
460 |
val Src {name = (name, _), args, output_info} = src; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
461 |
val prt_name = |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
462 |
(case output_info of |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
463 |
NONE => Pretty.str name |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
464 |
| SOME (_, markup) => Pretty.mark_str (markup, name)); |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
465 |
val prt_arg = pretty_value ctxt; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
466 |
in Pretty.block (Pretty.breaks (prt_name :: map prt_arg args)) end; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
467 |
|
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
468 |
|
59125 | 469 |
|
5825 | 470 |
(** scanners **) |
471 |
||
30573 | 472 |
open Basic_Symbol_Pos; |
5825 | 473 |
|
48764 | 474 |
val err_prefix = "Outer lexical error: "; |
475 |
||
476 |
fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); |
|
5825 | 477 |
|
478 |
||
479 |
(* scan symbolic idents *) |
|
480 |
||
8231 | 481 |
val scan_symid = |
55033 | 482 |
Scan.many1 (Symbol.is_symbolic_char o Symbol_Pos.symbol) || |
40525
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents:
40523
diff
changeset
|
483 |
Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; |
5825 | 484 |
|
8231 | 485 |
fun is_symid str = |
486 |
(case try Symbol.explode str of |
|
55033 | 487 |
SOME [s] => Symbol.is_symbolic s orelse Symbol.is_symbolic_char s |
488 |
| SOME ss => forall Symbol.is_symbolic_char ss |
|
8231 | 489 |
| _ => false); |
490 |
||
27814 | 491 |
fun ident_or_symbolic "begin" = false |
492 |
| ident_or_symbolic ":" = true |
|
493 |
| ident_or_symbolic "::" = true |
|
50239 | 494 |
| ident_or_symbolic s = Symbol_Pos.is_identifier s orelse is_symid s; |
5825 | 495 |
|
496 |
||
497 |
(* scan verbatim text *) |
|
498 |
||
499 |
val scan_verb = |
|
55107 | 500 |
$$$ "*" --| Scan.ahead (~$$ "}") || |
58854 | 501 |
Scan.one (fn (s, _) => s <> "*" andalso Symbol.not_eof s) >> single; |
5825 | 502 |
|
503 |
val scan_verbatim = |
|
55107 | 504 |
Scan.ahead ($$ "{" -- $$ "*") |-- |
55106 | 505 |
!!! "unclosed verbatim text" |
55107 | 506 |
((Symbol_Pos.scan_pos --| $$ "{" --| $$ "*") -- |
58850 | 507 |
((Scan.repeat scan_verb >> flat) -- ($$ "*" |-- $$ "}" |-- Symbol_Pos.scan_pos))); |
5825 | 508 |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
509 |
val recover_verbatim = |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
510 |
$$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat); |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
511 |
|
5825 | 512 |
|
55033 | 513 |
(* scan cartouche *) |
514 |
||
515 |
val scan_cartouche = |
|
55104
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents:
55103
diff
changeset
|
516 |
Symbol_Pos.scan_pos -- |
55105 | 517 |
((Symbol_Pos.scan_cartouche err_prefix >> Symbol_Pos.cartouche_content) -- Symbol_Pos.scan_pos); |
55033 | 518 |
|
519 |
||
5825 | 520 |
(* scan space *) |
521 |
||
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
522 |
fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n"; |
5825 | 523 |
|
524 |
val scan_space = |
|
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
525 |
Scan.many1 space_symbol @@@ Scan.optional ($$$ "\n") [] || |
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
526 |
Scan.many space_symbol @@@ $$$ "\n"; |
5825 | 527 |
|
528 |
||
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
529 |
(* scan comment *) |
5825 | 530 |
|
531 |
val scan_comment = |
|
55105 | 532 |
Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body err_prefix -- Symbol_Pos.scan_pos); |
5825 | 533 |
|
534 |
||
27663 | 535 |
|
27769 | 536 |
(** token sources **) |
5825 | 537 |
|
27769 | 538 |
fun source_proper src = src |> Source.filter is_proper; |
5825 | 539 |
|
23678 | 540 |
local |
541 |
||
27769 | 542 |
fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
543 |
|
27799 | 544 |
fun token k ss = |
43709
717e96cf9527
discontinued special treatment of hard tabulators;
wenzelm
parents:
42503
diff
changeset
|
545 |
Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot); |
27799 | 546 |
|
547 |
fun token_range k (pos1, (ss, pos2)) = |
|
59112 | 548 |
Token (Symbol_Pos.implode_range (pos1, pos2) ss, (k, Symbol_Pos.content ss), Slot); |
23678 | 549 |
|
59083 | 550 |
fun scan_token keywords = !!! "bad input" |
48764 | 551 |
(Symbol_Pos.scan_string_qq err_prefix >> token_range String || |
59081 | 552 |
Symbol_Pos.scan_string_bq err_prefix >> token_range Alt_String || |
27799 | 553 |
scan_verbatim >> token_range Verbatim || |
55033 | 554 |
scan_cartouche >> token_range Cartouche || |
27799 | 555 |
scan_comment >> token_range Comment || |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
556 |
scan_space >> token Space || |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
557 |
(Scan.max token_leq |
27769 | 558 |
(Scan.max token_leq |
58903 | 559 |
(Scan.literal (Keyword.major_keywords keywords) >> pair Command) |
560 |
(Scan.literal (Keyword.minor_keywords keywords) >> pair Keyword)) |
|
59081 | 561 |
(Lexicon.scan_longid >> pair Long_Ident || |
42290
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
562 |
Lexicon.scan_id >> pair Ident || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
563 |
Lexicon.scan_var >> pair Var || |
59081 | 564 |
Lexicon.scan_tid >> pair Type_Ident || |
565 |
Lexicon.scan_tvar >> pair Type_Var || |
|
42290
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
566 |
Lexicon.scan_float >> pair Float || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
567 |
Lexicon.scan_nat >> pair Nat || |
59081 | 568 |
scan_symid >> pair Sym_Ident) >> uncurry token)); |
27769 | 569 |
|
570 |
fun recover msg = |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
571 |
(Symbol_Pos.recover_string_qq || |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
572 |
Symbol_Pos.recover_string_bq || |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
573 |
recover_verbatim || |
55033 | 574 |
Symbol_Pos.recover_cartouche || |
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
575 |
Symbol_Pos.recover_comment || |
58854 | 576 |
Scan.one (Symbol.not_eof o Symbol_Pos.symbol) >> single) |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
577 |
>> (single o token (Error msg)); |
23678 | 578 |
|
579 |
in |
|
5825 | 580 |
|
58904
f49c4f883c58
eliminated pointless dynamic keywords (TTY legacy);
wenzelm
parents:
58903
diff
changeset
|
581 |
fun source' strict keywords = |
58864 | 582 |
let |
59083 | 583 |
val scan_strict = Scan.bulk (scan_token keywords); |
58864 | 584 |
val scan = if strict then scan_strict else Scan.recover scan_strict recover; |
585 |
in Source.source Symbol_Pos.stopper scan end; |
|
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
586 |
|
58904
f49c4f883c58
eliminated pointless dynamic keywords (TTY legacy);
wenzelm
parents:
58903
diff
changeset
|
587 |
fun source keywords pos src = Symbol_Pos.source pos src |> source' false keywords; |
f49c4f883c58
eliminated pointless dynamic keywords (TTY legacy);
wenzelm
parents:
58903
diff
changeset
|
588 |
fun source_strict keywords pos src = Symbol_Pos.source pos src |> source' true keywords; |
23678 | 589 |
|
590 |
end; |
|
5825 | 591 |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
592 |
|
59083 | 593 |
(* explode *) |
594 |
||
595 |
fun explode keywords pos = |
|
596 |
Source.of_string #> |
|
597 |
Symbol.source #> |
|
598 |
source keywords pos #> |
|
599 |
Source.exhaust; |
|
600 |
||
601 |
||
59085
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
602 |
(* make *) |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
603 |
|
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
604 |
fun make ((k, n), s) pos = |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
605 |
let |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
606 |
val pos' = Position.advance_offset n pos; |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
607 |
val range = Position.range pos pos'; |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
608 |
val tok = |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
609 |
if k < Vector.length immediate_kinds then |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
610 |
Token ((s, range), (Vector.sub (immediate_kinds, k), s), Slot) |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
611 |
else |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
612 |
(case explode Keyword.empty_keywords pos s of |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
613 |
[tok] => tok |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
614 |
| _ => Token ((s, range), (Error (err_prefix ^ "exactly one token expected"), s), Slot)) |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
615 |
in (tok, pos') end; |
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
616 |
|
08a6901eb035
clarified define_command: send tokens more directly, without requiring keywords in ML;
wenzelm
parents:
59083
diff
changeset
|
617 |
|
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
618 |
|
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
619 |
(** parsers **) |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
620 |
|
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
621 |
type 'a parser = T list -> 'a * T list; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
622 |
type 'a context_parser = Context.generic * T list -> 'a * (Context.generic * T list); |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
623 |
|
58045 | 624 |
|
625 |
(* read source *) |
|
626 |
||
58903 | 627 |
fun read_no_commands keywords scan syms = |
58045 | 628 |
Source.of_list syms |
58904
f49c4f883c58
eliminated pointless dynamic keywords (TTY legacy);
wenzelm
parents:
58903
diff
changeset
|
629 |
|> source' true (Keyword.no_command_keywords keywords) |
58045 | 630 |
|> source_proper |
58864 | 631 |
|> Source.source stopper (Scan.error (Scan.bulk scan)) |
58045 | 632 |
|> Source.exhaust; |
633 |
||
58903 | 634 |
fun read_antiq keywords scan (syms, pos) = |
58045 | 635 |
let |
636 |
fun err msg = |
|
637 |
cat_error msg ("Malformed antiquotation" ^ Position.here pos ^ ":\n" ^ |
|
638 |
"@{" ^ Symbol_Pos.content syms ^ "}"); |
|
58903 | 639 |
val res = read_no_commands keywords scan syms handle ERROR msg => err msg; |
58045 | 640 |
in (case res of [x] => x | _ => err "") end; |
641 |
||
642 |
||
643 |
(* wrapped syntax *) |
|
644 |
||
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
645 |
fun syntax_generic scan (Src {name = (name, pos), args = args0, output_info}) context = |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
646 |
let |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
647 |
val args1 = map init_assignable args0; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
648 |
fun reported_text () = |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
649 |
if Context_Position.is_visible_generic context then |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
650 |
((pos, Markup.operator) :: maps (reports_of_value o closure) args1) |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
651 |
|> map (fn (p, m) => Position.reported_text p m "") |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
652 |
else []; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
653 |
in |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
654 |
(case Scan.error (Scan.finite' stopper (Scan.option scan)) (context, args1) of |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
655 |
(SOME x, (context', [])) => |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
656 |
let val _ = Output.report (reported_text ()) |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
657 |
in (x, context') end |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
658 |
| (_, (_, args2)) => |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
659 |
let |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
660 |
val print_name = |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
661 |
(case output_info of |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
662 |
NONE => quote name |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
663 |
| SOME (kind, markup) => plain_words kind ^ " " ^ quote (Markup.markup markup name)); |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
664 |
val print_args = |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
665 |
if null args2 then "" else ":\n " ^ space_implode " " (map print args2); |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
666 |
in |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
667 |
error ("Bad arguments for " ^ print_name ^ Position.here pos ^ print_args ^ |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
668 |
Markup.markup_report (implode (reported_text ()))) |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
669 |
end) |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
670 |
end; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
671 |
|
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
672 |
fun syntax scan src = apsnd Context.the_proof o syntax_generic scan src o Context.Proof; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
673 |
|
5825 | 674 |
end; |
58011
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
675 |
|
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
676 |
type 'a parser = 'a Token.parser; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
677 |
type 'a context_parser = 'a Token.context_parser; |
bc6bced136e5
tuned signature -- moved type src to Token, without aliases;
wenzelm
parents:
57944
diff
changeset
|
678 |