author | wenzelm |
Thu, 03 Jan 2013 14:12:12 +0100 | |
changeset 50700 | e1df173b12a1 |
parent 50238 | 98d35a7368bd |
child 51225 | 3fe0d8d55975 |
permissions | -rw-r--r-- |
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
1 |
(* Title: Pure/Thy/thy_syntax.ML |
23726 | 2 |
Author: Makarius |
3 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
4 |
Superficial theory syntax: tokens and spans. |
23726 | 5 |
*) |
6 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
7 |
signature THY_SYNTAX = |
23726 | 8 |
sig |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
9 |
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
10 |
val reports_of_tokens: Token.T list -> bool * (Position.report * string) list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
11 |
val present_token: Token.T -> Output.output |
48878 | 12 |
datatype span_kind = Command of string * Position.T | Ignored | Malformed |
48867
e9beabf045ab
some support for inlining file content into outer syntax token language;
wenzelm
parents:
48865
diff
changeset
|
13 |
datatype span = Span of span_kind * Token.T list |
27842 | 14 |
val span_kind: span -> span_kind |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
15 |
val span_content: span -> Token.T list |
48865 | 16 |
val present_span: span -> Output.output |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
17 |
val parse_spans: Token.T list -> span list |
43621 | 18 |
type element = {head: span, proof: span list, proper_proof: bool} |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
19 |
val parse_elements: span list -> element list |
23726 | 20 |
end; |
21 |
||
37216
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
wenzelm
parents:
37197
diff
changeset
|
22 |
structure Thy_Syntax: THY_SYNTAX = |
23726 | 23 |
struct |
24 |
||
23803 | 25 |
(** tokens **) |
26 |
||
27 |
(* parse *) |
|
23726 | 28 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
29 |
fun parse_tokens lexs pos = |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
30 |
Source.of_string #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
31 |
Symbol.source #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
32 |
Token.source {do_recover = SOME false} (K lexs) pos #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
33 |
Source.exhaust; |
23726 | 34 |
|
35 |
||
23803 | 36 |
(* present *) |
23726 | 37 |
|
38 |
local |
|
39 |
||
40 |
val token_kind_markup = |
|
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
41 |
fn Token.Command => (Markup.command, "") |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
42 |
| Token.Keyword => (Markup.keyword, "") |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
43 |
| Token.Ident => (Markup.empty, "") |
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
44 |
| Token.LongIdent => (Markup.empty, "") |
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
45 |
| Token.SymIdent => (Markup.empty, "") |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
46 |
| Token.Var => (Markup.var, "") |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
47 |
| Token.TypeIdent => (Markup.tfree, "") |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
48 |
| Token.TypeVar => (Markup.tvar, "") |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
49 |
| Token.Nat => (Markup.empty, "") |
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
50 |
| Token.Float => (Markup.empty, "") |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
51 |
| Token.String => (Markup.string, "") |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
52 |
| Token.AltString => (Markup.altstring, "") |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
53 |
| Token.Verbatim => (Markup.verbatim, "") |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
54 |
| Token.Space => (Markup.empty, "") |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
55 |
| Token.Comment => (Markup.comment, "") |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
56 |
| Token.InternalValue => (Markup.empty, "") |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
57 |
| Token.Error msg => (Markup.bad, msg) |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
58 |
| Token.Sync => (Markup.control, "") |
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
59 |
| Token.EOF => (Markup.control, ""); |
23726 | 60 |
|
37192 | 61 |
fun token_markup tok = |
50238
98d35a7368bd
more uniform Symbol.is_ascii_identifier in ML/Scala;
wenzelm
parents:
50201
diff
changeset
|
62 |
if Token.keyword_with (not o Symbol.is_ascii_identifier) tok |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
63 |
then (Markup.operator, "") |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
64 |
else |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
65 |
let |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
66 |
val kind = Token.kind_of tok; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
67 |
val props = |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
68 |
if kind = Token.Command |
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
69 |
then Markup.properties [(Markup.nameN, Token.content_of tok)] |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
70 |
else I; |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
71 |
val (markup, txt) = token_kind_markup kind; |
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
72 |
in (props markup, txt) end; |
37192 | 73 |
|
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
74 |
fun reports_of_token tok = |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
75 |
let |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
76 |
val malformed_symbols = |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
77 |
Symbol_Pos.explode (Token.source_position_of tok) |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
78 |
|> map_filter (fn (sym, pos) => |
48756
1c843142758e
sneak message into "bad" markup as property -- to be displayed after YXML parsing;
wenzelm
parents:
48752
diff
changeset
|
79 |
if Symbol.is_malformed sym |
50201
c26369c9eda6
Isabelle-specific implementation of quasi-abstract markup elements -- back to module arrangement before d83797ef0d2d;
wenzelm
parents:
48878
diff
changeset
|
80 |
then SOME ((pos, Markup.bad), "Malformed symbolic character") else NONE); |
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
81 |
val is_malformed = Token.is_error tok orelse not (null malformed_symbols); |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
82 |
val (markup, txt) = token_markup tok; |
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
83 |
val reports = ((Token.position_of tok, markup), txt) :: malformed_symbols; |
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
84 |
in (is_malformed, reports) end; |
40528 | 85 |
|
23803 | 86 |
in |
87 |
||
48749
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
88 |
fun reports_of_tokens toks = |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
89 |
let val results = map reports_of_token toks |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
90 |
in (exists fst results, maps snd results) end; |
c197b3c3e7fa
some attempts to keep malformed syntax errors focussed, without too much red spilled onto the document view;
wenzelm
parents:
46811
diff
changeset
|
91 |
|
23726 | 92 |
fun present_token tok = |
48768
abc45de5bb22
clarified "bad" markup: proper body text, invent missing serial on Scala side (counting backwards);
wenzelm
parents:
48756
diff
changeset
|
93 |
Markup.enclose (fst (token_markup tok)) (Output.output (Token.unparse tok)); |
23726 | 94 |
|
23803 | 95 |
end; |
96 |
||
97 |
||
98 |
||
27665 | 99 |
(** spans **) |
100 |
||
27842 | 101 |
(* type span *) |
102 |
||
48878 | 103 |
datatype span_kind = Command of string * Position.T | Ignored | Malformed; |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
104 |
datatype span = Span of span_kind * Token.T list; |
23803 | 105 |
|
27842 | 106 |
fun span_kind (Span (k, _)) = k; |
107 |
fun span_content (Span (_, toks)) = toks; |
|
108 |
||
48865 | 109 |
val present_span = implode o map present_token o span_content; |
110 |
||
23803 | 111 |
|
112 |
(* parse *) |
|
23726 | 113 |
|
23803 | 114 |
local |
115 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
116 |
fun make_span toks = |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
117 |
if not (null toks) andalso Token.is_command (hd toks) then |
48878 | 118 |
Span (Command (Token.content_of (hd toks), Token.position_of (hd toks)), toks) |
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
119 |
else if forall (not o Token.is_proper) toks then Span (Ignored, toks) |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
120 |
else Span (Malformed, toks); |
23726 | 121 |
|
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
122 |
fun flush (result, span) = if null span then (result, span) else (rev span :: result, []); |
23726 | 123 |
|
124 |
in |
|
125 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
126 |
fun parse_spans toks = |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
127 |
fold (fn tok => Token.is_command tok ? flush #> apsnd (cons tok)) toks ([], []) |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
128 |
|> flush |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
129 |
|> #1 |> rev |> map make_span; |
23803 | 130 |
|
131 |
end; |
|
132 |
||
133 |
||
28434 | 134 |
|
43621 | 135 |
(** specification elements: commands with optional proof **) |
136 |
||
137 |
type element = {head: span, proof: span list, proper_proof: bool}; |
|
138 |
||
139 |
fun make_element head proof proper_proof = |
|
140 |
{head = head, proof = proof, proper_proof = proper_proof}; |
|
141 |
||
28434 | 142 |
|
143 |
(* scanning spans *) |
|
144 |
||
48878 | 145 |
val eof = Span (Command ("", Position.none), []); |
28434 | 146 |
|
48878 | 147 |
fun is_eof (Span (Command ("", _), _)) = true |
28434 | 148 |
| is_eof _ = false; |
149 |
||
150 |
val not_eof = not o is_eof; |
|
151 |
||
152 |
val stopper = Scan.stopper (K eof) is_eof; |
|
153 |
||
154 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
155 |
(* parse *) |
28434 | 156 |
|
157 |
local |
|
158 |
||
48878 | 159 |
fun command_with pred = |
160 |
Scan.one (fn (Span (Command (name, _), _)) => pred name | _ => false); |
|
28434 | 161 |
|
162 |
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d => |
|
28454
c63168db774c
unit_source: more rigid parsing, stop after final qed;
wenzelm
parents:
28438
diff
changeset
|
163 |
if d <= 0 then Scan.fail |
28434 | 164 |
else |
36950 | 165 |
command_with Keyword.is_qed_global >> pair ~1 || |
166 |
command_with Keyword.is_proof_goal >> pair (d + 1) || |
|
167 |
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) || |
|
168 |
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state); |
|
28434 | 169 |
|
43621 | 170 |
val element = |
171 |
command_with Keyword.is_theory_goal -- proof |
|
172 |
>> (fn (a, (bs, d)) => make_element a bs (d >= 0)) || |
|
173 |
Scan.one not_eof >> (fn a => make_element a [] true); |
|
28434 | 174 |
|
175 |
in |
|
176 |
||
46811
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
177 |
val parse_elements = |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
178 |
Source.of_list #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
179 |
Source.source stopper (Scan.bulk element) NONE #> |
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
wenzelm
parents:
45666
diff
changeset
|
180 |
Source.exhaust; |
28434 | 181 |
|
23726 | 182 |
end; |
28434 | 183 |
|
184 |
end; |