author | wenzelm |
Fri, 01 Aug 2014 20:15:00 +0200 | |
changeset 57835 | 43ff8638c02c |
parent 56202 | 0a11d17eeeff |
child 57942 | e5bec882fdd0 |
permissions | -rw-r--r-- |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
1 |
(* Title: Pure/Isar/token.ML |
5825 | 2 |
Author: Markus Wenzel, TU Muenchen |
3 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
4 |
Outer token syntax for Isabelle/Isar. |
5825 | 5 |
*) |
6 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
7 |
signature TOKEN = |
5825 | 8 |
sig |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
9 |
datatype kind = |
27814 | 10 |
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | |
55033 | 11 |
Nat | Float | String | AltString | Verbatim | Cartouche | Space | Comment | InternalValue | |
40958
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
wenzelm
parents:
40627
diff
changeset
|
12 |
Error of string | Sync | EOF |
55788
67699e08e969
store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing;
wenzelm
parents:
55750
diff
changeset
|
13 |
type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T} |
27814 | 14 |
datatype value = |
56202 | 15 |
Literal of bool * Markup.T | Text of string | Typ of typ | Term of term | Fact of thm list | |
54520
cee77d2e9582
release file errors at runtime: Command.eval instead of Command.read;
wenzelm
parents:
54519
diff
changeset
|
16 |
Attribute of morphism -> attribute | Files of file Exn.result list |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
17 |
type T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
18 |
val str_of_kind: kind -> string |
55708 | 19 |
val pos_of: T -> Position.T |
55709
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
20 |
val range_of: T list -> Position.range |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
21 |
val eof: T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
22 |
val is_eof: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
23 |
val not_eof: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
24 |
val not_sync: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
25 |
val stopper: T Scan.stopper |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
26 |
val kind_of: T -> kind |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
27 |
val is_kind: kind -> T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
28 |
val keyword_with: (string -> bool) -> T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
29 |
val ident_with: (string -> bool) -> T -> bool |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
30 |
val is_command: T -> bool |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
31 |
val is_name: T -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
32 |
val is_proper: T -> bool |
51266
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
wenzelm
parents:
50239
diff
changeset
|
33 |
val is_improper: T -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
34 |
val is_semicolon: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
35 |
val is_comment: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
36 |
val is_begin_ignore: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
37 |
val is_end_ignore: T -> bool |
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
38 |
val is_error: T -> bool |
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
39 |
val is_space: T -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
40 |
val is_blank: T -> bool |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
41 |
val is_newline: T -> bool |
55111 | 42 |
val inner_syntax_of: T -> string |
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
43 |
val source_position_of: T -> Symbol_Pos.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
44 |
val content_of: T -> string |
56202 | 45 |
val keyword_markup: bool * Markup.T -> string -> Markup.T |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
46 |
val completion_report: T -> Position.report_text list |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
47 |
val report: T -> Position.report_text |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
48 |
val markup: T -> Markup.T |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
49 |
val unparse: T -> string |
55745 | 50 |
val print: T -> string |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
51 |
val text_of: T -> string * string |
54520
cee77d2e9582
release file errors at runtime: Command.eval instead of Command.read;
wenzelm
parents:
54519
diff
changeset
|
52 |
val get_files: T -> file Exn.result list |
cee77d2e9582
release file errors at runtime: Command.eval instead of Command.read;
wenzelm
parents:
54519
diff
changeset
|
53 |
val put_files: file Exn.result list -> T -> T |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
54 |
val get_value: T -> value option |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
55 |
val map_value: (value -> value) -> T -> T |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
56 |
val reports_of_value: T -> Position.report list |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
57 |
val mk_text: string -> T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
58 |
val mk_typ: typ -> T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
59 |
val mk_term: term -> T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
60 |
val mk_fact: thm list -> T |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
61 |
val mk_attribute: (morphism -> attribute) -> T |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
62 |
val init_assignable: T -> T |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
63 |
val assign: value option -> T -> unit |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
64 |
val closure: T -> T |
27814 | 65 |
val ident_or_symbolic: string -> bool |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
66 |
val source_proper: (T, 'a) Source.source -> (T, (T, 'a) Source.source) Source.source |
48741 | 67 |
val source': {do_recover: bool option} -> (unit -> Scan.lexicon * Scan.lexicon) -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
68 |
(Symbol_Pos.T, 'a) Source.source -> (T, (Symbol_Pos.T, 'a) Source.source) Source.source |
48741 | 69 |
val source: {do_recover: bool option} -> (unit -> Scan.lexicon * Scan.lexicon) -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
70 |
Position.T -> (Symbol.symbol, 'a) Source.source -> (T, |
30573 | 71 |
(Symbol_Pos.T, Position.T * (Symbol.symbol, 'a) Source.source) Source.source) Source.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
72 |
val read_antiq: Scan.lexicon -> (T list -> 'a * T list) -> Symbol_Pos.T list * Position.T -> 'a |
5825 | 73 |
end; |
74 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
75 |
structure Token: TOKEN = |
5825 | 76 |
struct |
77 |
||
78 |
(** tokens **) |
|
79 |
||
27814 | 80 |
(* token values *) |
81 |
||
82 |
(*The value slot assigns an (optional) internal value to a token, |
|
83 |
usually as a side-effect of special scanner setup (see also |
|
84 |
args.ML). Note that an assignable ref designates an intermediate |
|
85 |
state of internalization -- it is NOT meant to persist.*) |
|
86 |
||
55788
67699e08e969
store blobs / inlined files as separate text lines: smaller values are more healthy for the Poly/ML RTS and allow implicit sharing;
wenzelm
parents:
55750
diff
changeset
|
87 |
type file = {src_path: Path.T, lines: string list, digest: SHA1.digest, pos: Position.T}; |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
88 |
|
27814 | 89 |
datatype value = |
56202 | 90 |
Literal of bool * Markup.T | |
27814 | 91 |
Text of string | |
92 |
Typ of typ | |
|
93 |
Term of term | |
|
94 |
Fact of thm list | |
|
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
95 |
Attribute of morphism -> attribute | |
54520
cee77d2e9582
release file errors at runtime: Command.eval instead of Command.read;
wenzelm
parents:
54519
diff
changeset
|
96 |
Files of file Exn.result list; |
27814 | 97 |
|
98 |
datatype slot = |
|
99 |
Slot | |
|
100 |
Value of value option | |
|
32738 | 101 |
Assignable of value option Unsynchronized.ref; |
27814 | 102 |
|
103 |
||
5825 | 104 |
(* datatype token *) |
105 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
106 |
datatype kind = |
27814 | 107 |
Command | Keyword | Ident | LongIdent | SymIdent | Var | TypeIdent | TypeVar | |
55033 | 108 |
Nat | Float | String | AltString | Verbatim | Cartouche | Space | Comment | InternalValue | |
40958
755f8fe7ced9
eliminated obsolete Token.Malformed -- subsumed by Token.Error;
wenzelm
parents:
40627
diff
changeset
|
109 |
Error of string | Sync | EOF; |
5825 | 110 |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
32738
diff
changeset
|
111 |
datatype T = Token of (Symbol_Pos.text * Position.range) * (kind * string) * slot; |
5825 | 112 |
|
113 |
val str_of_kind = |
|
7026 | 114 |
fn Command => "command" |
115 |
| Keyword => "keyword" |
|
5825 | 116 |
| Ident => "identifier" |
117 |
| LongIdent => "long identifier" |
|
118 |
| SymIdent => "symbolic identifier" |
|
119 |
| Var => "schematic variable" |
|
120 |
| TypeIdent => "type variable" |
|
121 |
| TypeVar => "schematic type variable" |
|
40290
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38229
diff
changeset
|
122 |
| Nat => "natural number" |
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
38229
diff
changeset
|
123 |
| Float => "floating-point number" |
55103 | 124 |
| String => "quoted string" |
17164
a786e1a1ce02
added AltString token (delimited by ASCII back-quotes);
wenzelm
parents:
17069
diff
changeset
|
125 |
| AltString => "back-quoted string" |
5825 | 126 |
| Verbatim => "verbatim text" |
55103 | 127 |
| Cartouche => "text cartouche" |
7682
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
128 |
| Space => "white space" |
46de8064c93c
added Space, Comment token kinds (keep actual text);
wenzelm
parents:
7477
diff
changeset
|
129 |
| Comment => "comment text" |
27814 | 130 |
| InternalValue => "internal value" |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
131 |
| Error _ => "bad input" |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
132 |
| Sync => "sync marker" |
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
48905
diff
changeset
|
133 |
| EOF => "end-of-input"; |
5825 | 134 |
|
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
135 |
val delimited_kind = member (op =) [String, AltString, Verbatim, Cartouche, Comment]; |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
136 |
|
5825 | 137 |
|
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
138 |
(* position *) |
5825 | 139 |
|
55708 | 140 |
fun pos_of (Token ((_, (pos, _)), _, _)) = pos; |
141 |
fun end_pos_of (Token ((_, (_, pos)), _, _)) = pos; |
|
27663 | 142 |
|
55709
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
143 |
fun range_of (toks as tok :: _) = |
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
144 |
let val pos' = end_pos_of (List.last toks) |
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
145 |
in Position.range (pos_of tok) pos' end |
4e5a83a46ded
clarified Token.range_of in accordance to Symbol_Pos.range;
wenzelm
parents:
55708
diff
changeset
|
146 |
| range_of [] = Position.no_range; |
5825 | 147 |
|
148 |
||
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
149 |
(* control tokens *) |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
150 |
|
27814 | 151 |
fun mk_eof pos = Token (("", (pos, Position.none)), (EOF, ""), Slot); |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
152 |
val eof = mk_eof Position.none; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
153 |
|
27814 | 154 |
fun is_eof (Token (_, (EOF, _), _)) = true |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
155 |
| is_eof _ = false; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
156 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
157 |
val not_eof = not o is_eof; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
158 |
|
27814 | 159 |
fun not_sync (Token (_, (Sync, _), _)) = false |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
160 |
| not_sync _ = true; |
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
161 |
|
27752
ea7d573e565f
removed obsolete range_of (already included in position);
wenzelm
parents:
27747
diff
changeset
|
162 |
val stopper = |
55708 | 163 |
Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; |
27733
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
164 |
|
d3d7038fb7b5
abstract type Scan.stopper, position taken from last input token;
wenzelm
parents:
27663
diff
changeset
|
165 |
|
5825 | 166 |
(* kind of token *) |
167 |
||
27814 | 168 |
fun kind_of (Token (_, (k, _), _)) = k; |
169 |
fun is_kind k (Token (_, (k', _), _)) = k = k'; |
|
5825 | 170 |
|
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
171 |
val is_command = is_kind Command; |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
172 |
val is_name = is_kind Ident orf is_kind SymIdent orf is_kind String orf is_kind Nat; |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
173 |
|
27814 | 174 |
fun keyword_with pred (Token (_, (Keyword, x), _)) = pred x |
7026 | 175 |
| keyword_with _ _ = false; |
5825 | 176 |
|
27814 | 177 |
fun ident_with pred (Token (_, (Ident, x), _)) = pred x |
16029 | 178 |
| ident_with _ _ = false; |
179 |
||
27814 | 180 |
fun is_proper (Token (_, (Space, _), _)) = false |
181 |
| is_proper (Token (_, (Comment, _), _)) = false |
|
5825 | 182 |
| is_proper _ = true; |
183 |
||
51266
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
wenzelm
parents:
50239
diff
changeset
|
184 |
val is_improper = not o is_proper; |
3007d0bc9cb1
unified Command.is_proper in ML with Scala (see also 123be08eed88);
wenzelm
parents:
50239
diff
changeset
|
185 |
|
27814 | 186 |
fun is_semicolon (Token (_, (Keyword, ";"), _)) = true |
9130 | 187 |
| is_semicolon _ = false; |
188 |
||
27814 | 189 |
fun is_comment (Token (_, (Comment, _), _)) = true |
17069 | 190 |
| is_comment _ = false; |
191 |
||
27814 | 192 |
fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true |
8580 | 193 |
| is_begin_ignore _ = false; |
194 |
||
27814 | 195 |
fun is_end_ignore (Token (_, (Comment, ">"), _)) = true |
8580 | 196 |
| is_end_ignore _ = false; |
197 |
||
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
198 |
fun is_error (Token (_, (Error _, _), _)) = true |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
199 |
| is_error _ = false; |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
48743
diff
changeset
|
200 |
|
8651 | 201 |
|
17069 | 202 |
(* blanks and newlines -- space tokens obey lines *) |
8651 | 203 |
|
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
204 |
fun is_space (Token (_, (Space, _), _)) = true |
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
205 |
| is_space _ = false; |
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
206 |
|
27814 | 207 |
fun is_blank (Token (_, (Space, x), _)) = not (String.isSuffix "\n" x) |
17069 | 208 |
| is_blank _ = false; |
209 |
||
27814 | 210 |
fun is_newline (Token (_, (Space, x), _)) = String.isSuffix "\n" x |
8651 | 211 |
| is_newline _ = false; |
212 |
||
5825 | 213 |
|
14991 | 214 |
(* token content *) |
9155 | 215 |
|
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
216 |
fun inner_syntax_of (Token ((source, (pos, _)), (kind, x), _)) = |
43731
70072780e095
inner syntax supports inlined YXML according to Term_XML (particularly useful for producing text under program control);
wenzelm
parents:
43709
diff
changeset
|
217 |
if YXML.detect x then x |
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
218 |
else |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
219 |
let |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
220 |
val delimited = delimited_kind kind; |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
221 |
val tree = XML.Elem (Markup.token delimited (Position.properties_of pos), [XML.Text source]); |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
222 |
in YXML.string_of tree end; |
25642
ebdff0dca2a5
text_of: made even more robust against recurrent errors;
wenzelm
parents:
25582
diff
changeset
|
223 |
|
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
224 |
fun source_position_of (Token ((source, (pos, _)), (kind, _), _)) = |
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55788
diff
changeset
|
225 |
{delimited = delimited_kind kind, text = source, pos = pos}; |
27873 | 226 |
|
27814 | 227 |
fun content_of (Token (_, (_, x), _)) = x; |
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
228 |
|
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
229 |
|
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
230 |
(* markup reports *) |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
231 |
|
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
232 |
local |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
233 |
|
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
234 |
val token_kind_markup = |
55750
baa7a1e57f4a
back to Markup.command for actual tokens (amending 4a4e5686e091) -- avoid conflict of jEdit token marker with Rendering.text_colors;
wenzelm
parents:
55745
diff
changeset
|
235 |
fn Command => (Markup.command, "") |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
236 |
| Keyword => (Markup.keyword2, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
237 |
| Ident => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
238 |
| LongIdent => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
239 |
| SymIdent => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
240 |
| Var => (Markup.var, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
241 |
| TypeIdent => (Markup.tfree, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
242 |
| TypeVar => (Markup.tvar, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
243 |
| Nat => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
244 |
| Float => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
245 |
| String => (Markup.string, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
246 |
| AltString => (Markup.altstring, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
247 |
| Verbatim => (Markup.verbatim, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
248 |
| Cartouche => (Markup.cartouche, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
249 |
| Space => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
250 |
| Comment => (Markup.comment, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
251 |
| InternalValue => (Markup.empty, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
252 |
| Error msg => (Markup.bad, msg) |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
253 |
| Sync => (Markup.control, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
254 |
| EOF => (Markup.control, ""); |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
255 |
|
56063 | 256 |
in |
257 |
||
56202 | 258 |
fun keyword_markup (important, keyword) x = |
259 |
if important orelse Symbol.is_ascii_identifier x then keyword else Markup.delimiter; |
|
55919
2eb8c13339a5
more explicit quasi_keyword markup, for Args.$$$ material, which is somewhere in between of outer and inner syntax;
wenzelm
parents:
55916
diff
changeset
|
260 |
|
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
261 |
fun completion_report tok = |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
262 |
if is_kind Keyword tok |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
263 |
then map (fn m => ((pos_of tok, m), "")) (Completion.suppress_abbrevs (content_of tok)) |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
264 |
else []; |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
265 |
|
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
266 |
fun report tok = |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
267 |
if is_kind Keyword tok then |
56202 | 268 |
((pos_of tok, keyword_markup (false, Markup.keyword2) (content_of tok)), "") |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
269 |
else |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
270 |
let val (m, text) = token_kind_markup (kind_of tok) |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
271 |
in ((pos_of tok, m), text) end; |
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
272 |
|
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
273 |
val markup = #2 o #1 o report; |
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
274 |
|
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
275 |
end; |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
276 |
|
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
277 |
|
27747
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
278 |
(* unparse *) |
d41abb7bc08a
token: maintain of source, which retains original position information;
wenzelm
parents:
27733
diff
changeset
|
279 |
|
27814 | 280 |
fun unparse (Token (_, (kind, x), _)) = |
14991 | 281 |
(case kind of |
43773 | 282 |
String => Symbol_Pos.quote_string_qq x |
283 |
| AltString => Symbol_Pos.quote_string_bq x |
|
284 |
| Verbatim => enclose "{*" "*}" x |
|
55033 | 285 |
| Cartouche => cartouche x |
43773 | 286 |
| Comment => enclose "(*" "*)" x |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
287 |
| Sync => "" |
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
288 |
| EOF => "" |
14991 | 289 |
| _ => x); |
290 |
||
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
291 |
fun print tok = Markup.markup (markup tok) (unparse tok); |
55745 | 292 |
|
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
293 |
fun text_of tok = |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
294 |
if is_semicolon tok then ("terminator", "") |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
295 |
else |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
296 |
let |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
297 |
val k = str_of_kind (kind_of tok); |
55915
607948c90bf0
suppress short abbreviations more uniformly, for outer and quasi-outer syntax;
wenzelm
parents:
55914
diff
changeset
|
298 |
val m = markup tok; |
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
299 |
val s = unparse tok; |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
300 |
in |
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
301 |
if s = "" then (k, "") |
55744
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
302 |
else if size s < 40 andalso not (exists_string (fn c => c = "\n") s) |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
303 |
then (k ^ " " ^ Markup.markup m s, "") |
4a4e5686e091
clarified token markup: keyword1/keyword2 is for syntax, and "command" the entity kind;
wenzelm
parents:
55709
diff
changeset
|
304 |
else (k, Markup.markup m s) |
23788
54ce229dc858
Symbol.not_eof/sync is superceded by Symbol.is_regular (rules out further control symbols);
wenzelm
parents:
23729
diff
changeset
|
305 |
end; |
23729
d1ba656978c5
separated Malformed (symbolic char) from Error (bad input);
wenzelm
parents:
23721
diff
changeset
|
306 |
|
5825 | 307 |
|
308 |
||
27814 | 309 |
(** associated values **) |
310 |
||
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
311 |
(* inlined file content *) |
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
312 |
|
54519
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
313 |
fun get_files (Token (_, _, Value (SOME (Files files)))) = files |
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
314 |
| get_files _ = []; |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
315 |
|
54519
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
316 |
fun put_files [] tok = tok |
5fed81762406
maintain blobs within document state: digest + text in ML, digest-only in Scala;
wenzelm
parents:
51266
diff
changeset
|
317 |
| put_files files (Token (x, y, Slot)) = Token (x, y, Value (SOME (Files files))) |
55708 | 318 |
| put_files _ tok = raise Fail ("Cannot put inlined files here" ^ Position.here (pos_of tok)); |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
319 |
|
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48771
diff
changeset
|
320 |
|
27814 | 321 |
(* access values *) |
322 |
||
323 |
fun get_value (Token (_, _, Value v)) = v |
|
324 |
| get_value _ = NONE; |
|
325 |
||
326 |
fun map_value f (Token (x, y, Value (SOME v))) = Token (x, y, Value (SOME (f v))) |
|
327 |
| map_value _ tok = tok; |
|
328 |
||
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
329 |
fun reports_of_value tok = |
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
330 |
(case get_value tok of |
56063 | 331 |
SOME (Literal markup) => |
332 |
let |
|
333 |
val pos = pos_of tok; |
|
334 |
val x = content_of tok; |
|
335 |
in |
|
336 |
if Position.is_reported pos then |
|
337 |
map (pair pos) (keyword_markup markup x :: Completion.suppress_abbrevs x) |
|
338 |
else [] |
|
339 |
end |
|
340 |
| _ => []); |
|
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
341 |
|
27814 | 342 |
|
343 |
(* make values *) |
|
344 |
||
345 |
fun mk_value k v = Token ((k, Position.no_range), (InternalValue, k), Value (SOME v)); |
|
346 |
||
347 |
val mk_text = mk_value "<text>" o Text; |
|
348 |
val mk_typ = mk_value "<typ>" o Typ; |
|
349 |
val mk_term = mk_value "<term>" o Term; |
|
350 |
val mk_fact = mk_value "<fact>" o Fact; |
|
351 |
val mk_attribute = mk_value "<attribute>" o Attribute; |
|
352 |
||
353 |
||
354 |
(* static binding *) |
|
355 |
||
55914
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
356 |
(*1st stage: initialize assignable slots*) |
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
357 |
fun init_assignable (Token (x, y, Slot)) = Token (x, y, Assignable (Unsynchronized.ref NONE)) |
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
358 |
| init_assignable (tok as Token (_, _, Assignable r)) = (r := NONE; tok) |
c5b752d549e3
clarified init_assignable: make double-sure that initial values are reset;
wenzelm
parents:
55828
diff
changeset
|
359 |
| init_assignable tok = tok; |
27814 | 360 |
|
361 |
(*2nd stage: assign values as side-effect of scanning*) |
|
362 |
fun assign v (Token (_, _, Assignable r)) = r := v |
|
363 |
| assign _ _ = (); |
|
364 |
||
365 |
(*3rd stage: static closure of final values*) |
|
32738 | 366 |
fun closure (Token (x, y, Assignable (Unsynchronized.ref v))) = Token (x, y, Value v) |
27814 | 367 |
| closure tok = tok; |
368 |
||
369 |
||
370 |
||
5825 | 371 |
(** scanners **) |
372 |
||
30573 | 373 |
open Basic_Symbol_Pos; |
5825 | 374 |
|
48764 | 375 |
val err_prefix = "Outer lexical error: "; |
376 |
||
377 |
fun !!! msg = Symbol_Pos.!!! (fn () => err_prefix ^ msg); |
|
5825 | 378 |
|
379 |
||
380 |
(* scan symbolic idents *) |
|
381 |
||
8231 | 382 |
val scan_symid = |
55033 | 383 |
Scan.many1 (Symbol.is_symbolic_char o Symbol_Pos.symbol) || |
40525
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents:
40523
diff
changeset
|
384 |
Scan.one (Symbol.is_symbolic o Symbol_Pos.symbol) >> single; |
5825 | 385 |
|
8231 | 386 |
fun is_symid str = |
387 |
(case try Symbol.explode str of |
|
55033 | 388 |
SOME [s] => Symbol.is_symbolic s orelse Symbol.is_symbolic_char s |
389 |
| SOME ss => forall Symbol.is_symbolic_char ss |
|
8231 | 390 |
| _ => false); |
391 |
||
27814 | 392 |
fun ident_or_symbolic "begin" = false |
393 |
| ident_or_symbolic ":" = true |
|
394 |
| ident_or_symbolic "::" = true |
|
50239 | 395 |
| ident_or_symbolic s = Symbol_Pos.is_identifier s orelse is_symid s; |
5825 | 396 |
|
397 |
||
398 |
(* scan verbatim text *) |
|
399 |
||
400 |
val scan_verb = |
|
55107 | 401 |
$$$ "*" --| Scan.ahead (~$$ "}") || |
27769 | 402 |
Scan.one (fn (s, _) => s <> "*" andalso Symbol.is_regular s) >> single; |
5825 | 403 |
|
404 |
val scan_verbatim = |
|
55107 | 405 |
Scan.ahead ($$ "{" -- $$ "*") |-- |
55106 | 406 |
!!! "unclosed verbatim text" |
55107 | 407 |
((Symbol_Pos.scan_pos --| $$ "{" --| $$ "*") -- |
55106 | 408 |
Symbol_Pos.change_prompt |
55107 | 409 |
((Scan.repeat scan_verb >> flat) -- ($$ "*" |-- $$ "}" |-- Symbol_Pos.scan_pos))); |
5825 | 410 |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
411 |
val recover_verbatim = |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
412 |
$$$ "{" @@@ $$$ "*" @@@ (Scan.repeat scan_verb >> flat); |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
413 |
|
5825 | 414 |
|
55033 | 415 |
(* scan cartouche *) |
416 |
||
417 |
val scan_cartouche = |
|
55104
8284c0d5bf52
clarified scan_cartouche_depth, according to Scala version;
wenzelm
parents:
55103
diff
changeset
|
418 |
Symbol_Pos.scan_pos -- |
55105 | 419 |
((Symbol_Pos.scan_cartouche err_prefix >> Symbol_Pos.cartouche_content) -- Symbol_Pos.scan_pos); |
55033 | 420 |
|
421 |
||
5825 | 422 |
(* scan space *) |
423 |
||
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
424 |
fun space_symbol (s, _) = Symbol.is_blank s andalso s <> "\n"; |
5825 | 425 |
|
426 |
val scan_space = |
|
48771
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
427 |
Scan.many1 space_symbol @@@ Scan.optional ($$$ "\n") [] || |
2ea997196d04
clarified Command.range vs. Command.proper_range according to Scala version, which is potentially relevant for command status markup;
wenzelm
parents:
48764
diff
changeset
|
428 |
Scan.many space_symbol @@@ $$$ "\n"; |
5825 | 429 |
|
430 |
||
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
431 |
(* scan comment *) |
5825 | 432 |
|
433 |
val scan_comment = |
|
55105 | 434 |
Symbol_Pos.scan_pos -- (Symbol_Pos.scan_comment_body err_prefix -- Symbol_Pos.scan_pos); |
5825 | 435 |
|
436 |
||
27663 | 437 |
|
27769 | 438 |
(** token sources **) |
5825 | 439 |
|
27769 | 440 |
fun source_proper src = src |> Source.filter is_proper; |
5825 | 441 |
|
23678 | 442 |
local |
443 |
||
27769 | 444 |
fun token_leq ((_, syms1), (_, syms2)) = length syms1 <= length syms2; |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
445 |
|
27799 | 446 |
fun token k ss = |
43709
717e96cf9527
discontinued special treatment of hard tabulators;
wenzelm
parents:
42503
diff
changeset
|
447 |
Token ((Symbol_Pos.implode ss, Symbol_Pos.range ss), (k, Symbol_Pos.content ss), Slot); |
27799 | 448 |
|
449 |
fun token_range k (pos1, (ss, pos2)) = |
|
43709
717e96cf9527
discontinued special treatment of hard tabulators;
wenzelm
parents:
42503
diff
changeset
|
450 |
Token (Symbol_Pos.implode_range pos1 pos2 ss, (k, Symbol_Pos.content ss), Slot); |
23678 | 451 |
|
27769 | 452 |
fun scan (lex1, lex2) = !!! "bad input" |
48764 | 453 |
(Symbol_Pos.scan_string_qq err_prefix >> token_range String || |
454 |
Symbol_Pos.scan_string_bq err_prefix >> token_range AltString || |
|
27799 | 455 |
scan_verbatim >> token_range Verbatim || |
55033 | 456 |
scan_cartouche >> token_range Cartouche || |
27799 | 457 |
scan_comment >> token_range Comment || |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
458 |
scan_space >> token Space || |
40525
14a2e686bdac
eliminated slightly odd pervasive Symbol_Pos.symbol;
wenzelm
parents:
40523
diff
changeset
|
459 |
Scan.one (Symbol.is_sync o Symbol_Pos.symbol) >> (token Sync o single) || |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
460 |
(Scan.max token_leq |
27769 | 461 |
(Scan.max token_leq |
462 |
(Scan.literal lex2 >> pair Command) |
|
463 |
(Scan.literal lex1 >> pair Keyword)) |
|
42290
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
464 |
(Lexicon.scan_longid >> pair LongIdent || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
465 |
Lexicon.scan_id >> pair Ident || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
466 |
Lexicon.scan_var >> pair Var || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
467 |
Lexicon.scan_tid >> pair TypeIdent || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
468 |
Lexicon.scan_tvar >> pair TypeVar || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
469 |
Lexicon.scan_float >> pair Float || |
b1f544c84040
discontinued special treatment of structure Lexicon;
wenzelm
parents:
40958
diff
changeset
|
470 |
Lexicon.scan_nat >> pair Nat || |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
471 |
scan_symid >> pair SymIdent) >> uncurry token)); |
27769 | 472 |
|
473 |
fun recover msg = |
|
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
474 |
(Symbol_Pos.recover_string_qq || |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
475 |
Symbol_Pos.recover_string_bq || |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
476 |
recover_verbatim || |
55033 | 477 |
Symbol_Pos.recover_cartouche || |
48743
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
478 |
Symbol_Pos.recover_comment || |
a72f8ffecf31
refined recovery of scan errors: longest prefix of delimited token after failure, otherwise just one symbol;
wenzelm
parents:
48741
diff
changeset
|
479 |
Scan.one (Symbol.is_regular o Symbol_Pos.symbol) >> single) |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
480 |
>> (single o token (Error msg)); |
23678 | 481 |
|
482 |
in |
|
5825 | 483 |
|
27835
ff8b8513965a
Symbol.source/OuterLex.source: more explicit do_recover argument;
wenzelm
parents:
27814
diff
changeset
|
484 |
fun source' {do_recover} get_lex = |
30573 | 485 |
Source.source Symbol_Pos.stopper (Scan.bulk (fn xs => scan (get_lex ()) xs)) |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
486 |
(Option.map (rpair recover) do_recover); |
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
487 |
|
5825 | 488 |
fun source do_recover get_lex pos src = |
30573 | 489 |
Symbol_Pos.source pos src |
27780
7d0910f662f7
more precise positions due to SymbolsPos.implode_delim;
wenzelm
parents:
27769
diff
changeset
|
490 |
|> source' do_recover get_lex; |
23678 | 491 |
|
492 |
end; |
|
5825 | 493 |
|
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
494 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
495 |
(* read_antiq *) |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
496 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
497 |
fun read_antiq lex scan (syms, pos) = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
498 |
let |
48992 | 499 |
fun err msg = cat_error msg ("Malformed antiquotation" ^ Position.here pos ^ ":\n" ^ |
30586
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
500 |
"@{" ^ Symbol_Pos.content syms ^ "}"); |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
501 |
|
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
502 |
val res = |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
503 |
Source.of_list syms |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
504 |
|> source' {do_recover = NONE} (K (lex, Scan.empty_lexicon)) |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
505 |
|> source_proper |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
506 |
|> Source.source stopper (Scan.error (Scan.bulk scan)) NONE |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
507 |
|> Source.exhaust; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
508 |
in (case res of [x] => x | _ => err "") handle ERROR msg => err msg end; |
9674f64a0702
moved basic change_prompt, scan_string, scan_alt_string, scan_quoted to symbol_pos.ML;
wenzelm
parents:
30573
diff
changeset
|
509 |
|
5825 | 510 |
end; |