author | wenzelm |
Sun, 09 Oct 2011 16:47:58 +0200 | |
changeset 45107 | 76fef3e57004 |
parent 44736 | c2a3f1c84179 |
child 45666 | d83797ef0d2d |
permissions | -rw-r--r-- |
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
1 |
(* Title: Pure/Thy/thy_syntax.ML |
23726 | 2 |
Author: Makarius |
3 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
4 |
Superficial theory syntax: tokens and spans. |
23726 | 5 |
*) |
6 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
7 |
signature THY_SYNTAX = |
23726 | 8 |
sig |
27842 | 9 |
val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
10 |
(Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a) |
27770 | 11 |
Source.source) Source.source) Source.source) Source.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
12 |
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
13 |
val present_token: Token.T -> Output.output |
44736 | 14 |
val reports_of_token: Token.T -> Position.report list |
27842 | 15 |
datatype span_kind = Command of string | Ignored | Malformed |
16 |
type span |
|
17 |
val span_kind: span -> span_kind |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
18 |
val span_content: span -> Token.T list |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
19 |
val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source |
27842 | 20 |
val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
21 |
val present_span: span -> Output.output |
43621 | 22 |
type element = {head: span, proof: span list, proper_proof: bool} |
23 |
val element_source: (span, 'a) Source.source -> |
|
24 |
(element, (span, 'a) Source.source) Source.source |
|
23726 | 25 |
end; |
26 |
||
37216
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
wenzelm
parents:
37197
diff
changeset
|
27 |
structure Thy_Syntax: THY_SYNTAX = |
23726 | 28 |
struct |
29 |
||
23803 | 30 |
(** tokens **) |
31 |
||
32 |
(* parse *) |
|
23726 | 33 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
34 |
fun token_source lexs pos = |
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
35 |
Symbol.source #> Token.source {do_recover = SOME false} (K lexs) pos; |
23726 | 36 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
37 |
fun parse_tokens lexs pos = |
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
38 |
Source.of_string #> token_source lexs pos #> Source.exhaust; |
23726 | 39 |
|
40 |
||
23803 | 41 |
(* present *) |
23726 | 42 |
|
43 |
local |
|
44 |
||
45 |
val token_kind_markup = |
|
37193 | 46 |
fn Token.Command => Markup.command |
47 |
| Token.Keyword => Markup.keyword |
|
44706
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
48 |
| Token.Ident => Markup.empty |
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
49 |
| Token.LongIdent => Markup.empty |
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
50 |
| Token.SymIdent => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
51 |
| Token.Var => Markup.var |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
52 |
| Token.TypeIdent => Markup.tfree |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
53 |
| Token.TypeVar => Markup.tvar |
44706
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
54 |
| Token.Nat => Markup.empty |
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
55 |
| Token.Float => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
56 |
| Token.String => Markup.string |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
57 |
| Token.AltString => Markup.altstring |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
58 |
| Token.Verbatim => Markup.verbatim |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
59 |
| Token.Space => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
60 |
| Token.Comment => Markup.comment |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
61 |
| Token.InternalValue => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
62 |
| Token.Error _ => Markup.malformed |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
63 |
| Token.Sync => Markup.control |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
64 |
| Token.EOF => Markup.control; |
23726 | 65 |
|
37192 | 66 |
fun token_markup tok = |
43430
1ed88ddf1268
more uniform treatment of "keyword" vs. "operator";
wenzelm
parents:
42290
diff
changeset
|
67 |
if Token.keyword_with (not o Lexicon.is_ascii_identifier) tok then Markup.operator |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
68 |
else |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
69 |
let |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
70 |
val kind = Token.kind_of tok; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
71 |
val props = |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
72 |
if kind = Token.Command |
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
73 |
then Markup.properties [(Markup.nameN, Token.content_of tok)] |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
74 |
else I; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
75 |
in props (token_kind_markup kind) end; |
37192 | 76 |
|
44736 | 77 |
fun reports_of_symbol (sym, pos) = |
78 |
if Symbol.is_malformed sym then [(pos, Markup.malformed)] else []; |
|
40528 | 79 |
|
23803 | 80 |
in |
81 |
||
23726 | 82 |
fun present_token tok = |
37192 | 83 |
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok)); |
23726 | 84 |
|
44736 | 85 |
fun reports_of_token tok = |
86 |
(Token.position_of tok, token_markup tok) :: |
|
87 |
maps reports_of_symbol (Symbol_Pos.explode (Token.source_position_of tok)); |
|
27842 | 88 |
|
23803 | 89 |
end; |
90 |
||
91 |
||
92 |
||
27665 | 93 |
(** spans **) |
94 |
||
27842 | 95 |
(* type span *) |
96 |
||
97 |
datatype span_kind = Command of string | Ignored | Malformed; |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
98 |
datatype span = Span of span_kind * Token.T list; |
23803 | 99 |
|
27842 | 100 |
fun span_kind (Span (k, _)) = k; |
101 |
fun span_content (Span (_, toks)) = toks; |
|
102 |
||
23803 | 103 |
|
104 |
(* parse *) |
|
23726 | 105 |
|
23803 | 106 |
local |
107 |
||
44354 | 108 |
val whitespace = Scan.many (not o Token.is_proper); |
109 |
val whitespace1 = Scan.many1 (not o Token.is_proper); |
|
27665 | 110 |
|
44354 | 111 |
val body = Scan.unless (whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof; |
23726 | 112 |
|
27665 | 113 |
val span = |
36950 | 114 |
Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body |
27842 | 115 |
>> (fn ((name, c), bs) => Span (Command name, c :: bs)) || |
44354 | 116 |
whitespace1 >> (fn toks => Span (Ignored, toks)) || |
27842 | 117 |
Scan.repeat1 body >> (fn toks => Span (Malformed, toks)); |
23726 | 118 |
|
119 |
in |
|
120 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
121 |
fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src; |
23803 | 122 |
|
123 |
end; |
|
124 |
||
27842 | 125 |
fun parse_spans lexs pos str = |
126 |
Source.of_string str |
|
127 |
|> token_source lexs pos |
|
128 |
|> span_source |
|
129 |
|> Source.exhaust; |
|
23803 | 130 |
|
131 |
||
132 |
(* present *) |
|
133 |
||
134 |
local |
|
135 |
||
27665 | 136 |
fun kind_markup (Command name) = Markup.command_span name |
137 |
| kind_markup Ignored = Markup.ignored_span |
|
27842 | 138 |
| kind_markup Malformed = Markup.malformed_span; |
23803 | 139 |
|
140 |
in |
|
141 |
||
27842 | 142 |
fun present_span span = |
143 |
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span))); |
|
144 |
||
23803 | 145 |
end; |
146 |
||
28434 | 147 |
|
148 |
||
43621 | 149 |
(** specification elements: commands with optional proof **) |
150 |
||
151 |
type element = {head: span, proof: span list, proper_proof: bool}; |
|
152 |
||
153 |
fun make_element head proof proper_proof = |
|
154 |
{head = head, proof = proof, proper_proof = proper_proof}; |
|
155 |
||
28434 | 156 |
|
157 |
(* scanning spans *) |
|
158 |
||
159 |
val eof = Span (Command "", []); |
|
160 |
||
161 |
fun is_eof (Span (Command "", _)) = true |
|
162 |
| is_eof _ = false; |
|
163 |
||
164 |
val not_eof = not o is_eof; |
|
165 |
||
166 |
val stopper = Scan.stopper (K eof) is_eof; |
|
167 |
||
168 |
||
43621 | 169 |
(* element_source *) |
28434 | 170 |
|
171 |
local |
|
172 |
||
173 |
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false); |
|
174 |
||
175 |
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d => |
|
28454
c63168db774c
unit_source: more rigid parsing, stop after final qed;
wenzelm
parents:
28438
diff
changeset
|
176 |
if d <= 0 then Scan.fail |
28434 | 177 |
else |
36950 | 178 |
command_with Keyword.is_qed_global >> pair ~1 || |
179 |
command_with Keyword.is_proof_goal >> pair (d + 1) || |
|
180 |
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) || |
|
181 |
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state); |
|
28434 | 182 |
|
43621 | 183 |
val element = |
184 |
command_with Keyword.is_theory_goal -- proof |
|
185 |
>> (fn (a, (bs, d)) => make_element a bs (d >= 0)) || |
|
186 |
Scan.one not_eof >> (fn a => make_element a [] true); |
|
28434 | 187 |
|
188 |
in |
|
189 |
||
43621 | 190 |
fun element_source src = Source.source stopper (Scan.bulk element) NONE src; |
28434 | 191 |
|
23726 | 192 |
end; |
28434 | 193 |
|
194 |
end; |