author | wenzelm |
Fri, 10 Aug 2012 10:18:07 +0200 | |
changeset 48751 | dc3bbdda4bc8 |
parent 48749 | c197b3c3e7fa |
child 48752 | 8a81ef0bc790 |
permissions | -rw-r--r-- |
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
1 |
(* Title: Pure/Thy/thy_syntax.ML |
23726 | 2 |
Author: Makarius |
3 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
4 |
Superficial theory syntax: tokens and spans. |
23726 | 5 |
*) |
6 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
7 |
signature THY_SYNTAX = |
23726 | 8 |
sig |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
9 |
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list |
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
10 |
val reports_of_tokens: Token.T list -> bool * Position.report list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
11 |
val present_token: Token.T -> Output.output |
27842 | 12 |
datatype span_kind = Command of string | Ignored | Malformed |
13 |
type span |
|
14 |
val span_kind: span -> span_kind |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
15 |
val span_content: span -> Token.T list |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
16 |
val parse_spans: Token.T list -> span list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
17 |
val present_span: span -> Output.output |
43621 | 18 |
type element = {head: span, proof: span list, proper_proof: bool} |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
19 |
val parse_elements: span list -> element list |
23726 | 20 |
end; |
21 |
||
37216
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
wenzelm
parents:
37197
diff
changeset
|
22 |
structure Thy_Syntax: THY_SYNTAX = |
23726 | 23 |
struct |
24 |
||
23803 | 25 |
(** tokens **) |
26 |
||
27 |
(* parse *) |
|
23726 | 28 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
29 |
fun parse_tokens lexs pos = |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
30 |
Source.of_string #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
31 |
Symbol.source #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
32 |
Token.source {do_recover = SOME false} (K lexs) pos #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
33 |
Source.exhaust; |
23726 | 34 |
|
35 |
||
23803 | 36 |
(* present *) |
23726 | 37 |
|
38 |
local |
|
39 |
||
40 |
val token_kind_markup = |
|
45666 | 41 |
fn Token.Command => Isabelle_Markup.command |
42 |
| Token.Keyword => Isabelle_Markup.keyword |
|
44706
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
43 |
| Token.Ident => Markup.empty |
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
44 |
| Token.LongIdent => Markup.empty |
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
45 |
| Token.SymIdent => Markup.empty |
45666 | 46 |
| Token.Var => Isabelle_Markup.var |
47 |
| Token.TypeIdent => Isabelle_Markup.tfree |
|
48 |
| Token.TypeVar => Isabelle_Markup.tvar |
|
44706
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
49 |
| Token.Nat => Markup.empty |
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
wenzelm
parents:
44658
diff
changeset
|
50 |
| Token.Float => Markup.empty |
45666 | 51 |
| Token.String => Isabelle_Markup.string |
52 |
| Token.AltString => Isabelle_Markup.altstring |
|
53 |
| Token.Verbatim => Isabelle_Markup.verbatim |
|
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
54 |
| Token.Space => Markup.empty |
45666 | 55 |
| Token.Comment => Isabelle_Markup.comment |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
56 |
| Token.InternalValue => Markup.empty |
48751 | 57 |
| Token.Error _ => Isabelle_Markup.bad |
45666 | 58 |
| Token.Sync => Isabelle_Markup.control |
59 |
| Token.EOF => Isabelle_Markup.control; |
|
23726 | 60 |
|
37192 | 61 |
fun token_markup tok = |
45666 | 62 |
if Token.keyword_with (not o Lexicon.is_ascii_identifier) tok then Isabelle_Markup.operator |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
63 |
else |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
64 |
let |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
65 |
val kind = Token.kind_of tok; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
66 |
val props = |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
67 |
if kind = Token.Command |
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
68 |
then Markup.properties [(Markup.nameN, Token.content_of tok)] |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
69 |
else I; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
70 |
in props (token_kind_markup kind) end; |
37192 | 71 |
|
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
72 |
fun reports_of_token tok = |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
73 |
let |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
74 |
val malformed_symbols = |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
75 |
Symbol_Pos.explode (Token.source_position_of tok) |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
76 |
|> map_filter (fn (sym, pos) => |
48751 | 77 |
if Symbol.is_malformed sym then SOME (pos, Isabelle_Markup.bad) else NONE); |
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
78 |
val is_malformed = Token.is_error tok orelse not (null malformed_symbols); |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
79 |
val reports = (Token.position_of tok, token_markup tok) :: malformed_symbols; |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
80 |
in (is_malformed, reports) end; |
40528 | 81 |
|
23803 | 82 |
in |
83 |
||
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
84 |
fun reports_of_tokens toks = |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
85 |
let val results = map reports_of_token toks |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
86 |
in (exists fst results, maps snd results) end; |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
87 |
|
23726 | 88 |
fun present_token tok = |
37192 | 89 |
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok)); |
23726 | 90 |
|
23803 | 91 |
end; |
92 |
||
93 |
||
94 |
||
27665 | 95 |
(** spans **) |
96 |
||
27842 | 97 |
(* type span *) |
98 |
||
99 |
datatype span_kind = Command of string | Ignored | Malformed; |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
100 |
datatype span = Span of span_kind * Token.T list; |
23803 | 101 |
|
27842 | 102 |
fun span_kind (Span (k, _)) = k; |
103 |
fun span_content (Span (_, toks)) = toks; |
|
104 |
||
23803 | 105 |
|
106 |
(* parse *) |
|
23726 | 107 |
|
23803 | 108 |
local |
109 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
110 |
fun make_span toks = |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
111 |
if not (null toks) andalso Token.is_command (hd toks) then |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
112 |
Span (Command (Token.content_of (hd toks)), toks) |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
113 |
else if forall (not o Token.is_proper) toks then Span (Ignored, toks) |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
114 |
else Span (Malformed, toks); |
23726 | 115 |
|
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
116 |
fun flush (result, span) = if null span then (result, span) else (rev span :: result, []); |
23726 | 117 |
|
118 |
in |
|
119 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
120 |
fun parse_spans toks = |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
121 |
fold (fn tok => Token.is_command tok ? flush #> apsnd (cons tok)) toks ([], []) |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
122 |
|> flush |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
123 |
|> #1 |> rev |> map make_span; |
23803 | 124 |
|
125 |
end; |
|
126 |
||
127 |
||
128 |
(* present *) |
|
129 |
||
130 |
local |
|
131 |
||
45666 | 132 |
fun kind_markup (Command name) = Isabelle_Markup.command_span name |
133 |
| kind_markup Ignored = Isabelle_Markup.ignored_span |
|
134 |
| kind_markup Malformed = Isabelle_Markup.malformed_span; |
|
23803 | 135 |
|
136 |
in |
|
137 |
||
27842 | 138 |
fun present_span span = |
139 |
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span))); |
|
140 |
||
23803 | 141 |
end; |
142 |
||
28434 | 143 |
|
144 |
||
43621 | 145 |
(** specification elements: commands with optional proof **) |
146 |
||
147 |
type element = {head: span, proof: span list, proper_proof: bool}; |
|
148 |
||
149 |
fun make_element head proof proper_proof = |
|
150 |
{head = head, proof = proof, proper_proof = proper_proof}; |
|
151 |
||
28434 | 152 |
|
153 |
(* scanning spans *) |
|
154 |
||
155 |
val eof = Span (Command "", []); |
|
156 |
||
157 |
fun is_eof (Span (Command "", _)) = true |
|
158 |
| is_eof _ = false; |
|
159 |
||
160 |
val not_eof = not o is_eof; |
|
161 |
||
162 |
val stopper = Scan.stopper (K eof) is_eof; |
|
163 |
||
164 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
165 |
(* parse *) |
28434 | 166 |
|
167 |
local |
|
168 |
||
169 |
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false); |
|
170 |
||
171 |
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d => |
|
28454
c63168db774c
unit_source: more rigid parsing, stop after final qed;
wenzelm
parents:
28438
diff
changeset
|
172 |
if d <= 0 then Scan.fail |
28434 | 173 |
else |
36950 | 174 |
command_with Keyword.is_qed_global >> pair ~1 || |
175 |
command_with Keyword.is_proof_goal >> pair (d + 1) || |
|
176 |
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) || |
|
177 |
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state); |
|
28434 | 178 |
|
43621 | 179 |
val element = |
180 |
command_with Keyword.is_theory_goal -- proof |
|
181 |
>> (fn (a, (bs, d)) => make_element a bs (d >= 0)) || |
|
182 |
Scan.one not_eof >> (fn a => make_element a [] true); |
|
28434 | 183 |
|
184 |
in |
|
185 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
186 |
val parse_elements = |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
187 |
Source.of_list #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
188 |
Source.source stopper (Scan.bulk element) NONE #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
189 |
Source.exhaust; |
28434 | 190 |
|
23726 | 191 |
end; |
28434 | 192 |
|
193 |
end; |