author | wenzelm |
Sun, 30 May 2010 18:23:50 +0200 | |
changeset 37197 | 953fc4983439 |
parent 37193 | a4b2bb0dab08 |
child 37216 | 3165bc303f66 |
permissions | -rw-r--r-- |
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
1 |
(* Title: Pure/Thy/thy_syntax.ML |
23726 | 2 |
Author: Makarius |
3 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
4 |
Superficial theory syntax: tokens and spans. |
23726 | 5 |
*) |
6 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
7 |
signature THY_SYNTAX = |
23726 | 8 |
sig |
27842 | 9 |
val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
10 |
(Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a) |
27770 | 11 |
Source.source) Source.source) Source.source) Source.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
12 |
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
13 |
val present_token: Token.T -> output |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
14 |
val report_token: Token.T -> unit |
27842 | 15 |
datatype span_kind = Command of string | Ignored | Malformed |
16 |
type span |
|
17 |
val span_kind: span -> span_kind |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
18 |
val span_content: span -> Token.T list |
27665 | 19 |
val span_range: span -> Position.range |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
20 |
val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source |
27842 | 21 |
val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list |
27665 | 22 |
val present_span: span -> output |
27842 | 23 |
val report_span: span -> unit |
28434 | 24 |
val unit_source: (span, 'a) Source.source -> |
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
25 |
(span * span list * bool, (span, 'a) Source.source) Source.source |
23726 | 26 |
end; |
27 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
28 |
structure ThySyntax: THY_SYNTAX = |
23726 | 29 |
struct |
30 |
||
23803 | 31 |
(** tokens **) |
32 |
||
33 |
(* parse *) |
|
23726 | 34 |
|
27842 | 35 |
fun token_source lexs pos src = |
36 |
Symbol.source {do_recover = true} src |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
37 |
|> Token.source {do_recover = SOME false} (K lexs) pos; |
23726 | 38 |
|
27842 | 39 |
fun parse_tokens lexs pos str = |
40 |
Source.of_string str |
|
41 |
|> token_source lexs pos |
|
42 |
|> Source.exhaust; |
|
23726 | 43 |
|
44 |
||
23803 | 45 |
(* present *) |
23726 | 46 |
|
47 |
local |
|
48 |
||
49 |
val token_kind_markup = |
|
37193 | 50 |
fn Token.Command => Markup.command |
51 |
| Token.Keyword => Markup.keyword |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
52 |
| Token.Ident => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
53 |
| Token.LongIdent => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
54 |
| Token.SymIdent => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
55 |
| Token.Var => Markup.var |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
56 |
| Token.TypeIdent => Markup.tfree |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
57 |
| Token.TypeVar => Markup.tvar |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
58 |
| Token.Nat => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
59 |
| Token.String => Markup.string |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
60 |
| Token.AltString => Markup.altstring |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
61 |
| Token.Verbatim => Markup.verbatim |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
62 |
| Token.Space => Markup.none |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
63 |
| Token.Comment => Markup.comment |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
64 |
| Token.InternalValue => Markup.none |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
65 |
| Token.Malformed => Markup.malformed |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
66 |
| Token.Error _ => Markup.malformed |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
67 |
| Token.Sync => Markup.control |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
68 |
| Token.EOF => Markup.control; |
23726 | 69 |
|
37192 | 70 |
fun token_markup tok = |
71 |
if Token.keyword_with (not o Syntax.is_identifier) tok then Markup.operator |
|
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
72 |
else |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
73 |
let |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
74 |
val kind = Token.kind_of tok; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
75 |
val props = |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
76 |
if kind = Token.Command then |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
77 |
(case Keyword.command_keyword (Token.content_of tok) of |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
78 |
SOME k => Markup.properties [(Markup.kindN, Keyword.kind_of k)] |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
79 |
| NONE => I) |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
80 |
else I; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
81 |
in props (token_kind_markup kind) end; |
37192 | 82 |
|
23803 | 83 |
in |
84 |
||
23726 | 85 |
fun present_token tok = |
37192 | 86 |
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok)); |
23726 | 87 |
|
27842 | 88 |
fun report_token tok = |
37192 | 89 |
Position.report (token_markup tok) (Token.position_of tok); |
27842 | 90 |
|
23803 | 91 |
end; |
92 |
||
93 |
||
94 |
||
27665 | 95 |
(** spans **) |
96 |
||
27842 | 97 |
(* type span *) |
98 |
||
99 |
datatype span_kind = Command of string | Ignored | Malformed; |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
100 |
datatype span = Span of span_kind * Token.T list; |
23803 | 101 |
|
27842 | 102 |
fun span_kind (Span (k, _)) = k; |
103 |
fun span_content (Span (_, toks)) = toks; |
|
104 |
||
105 |
fun span_range span = |
|
106 |
(case span_content span of |
|
107 |
[] => (Position.none, Position.none) |
|
108 |
| toks => |
|
27665 | 109 |
let |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
110 |
val start_pos = Token.position_of (hd toks); |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
111 |
val end_pos = Token.end_position_of (List.last toks); |
27842 | 112 |
in (start_pos, end_pos) end); |
23803 | 113 |
|
114 |
||
115 |
(* parse *) |
|
23726 | 116 |
|
23803 | 117 |
local |
118 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
119 |
val is_whitespace = Token.is_kind Token.Space orf Token.is_kind Token.Comment; |
27665 | 120 |
|
36950 | 121 |
val body = |
122 |
Scan.unless (Scan.many is_whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof; |
|
23726 | 123 |
|
27665 | 124 |
val span = |
36950 | 125 |
Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body |
27842 | 126 |
>> (fn ((name, c), bs) => Span (Command name, c :: bs)) || |
127 |
Scan.many1 is_whitespace >> (fn toks => Span (Ignored, toks)) || |
|
128 |
Scan.repeat1 body >> (fn toks => Span (Malformed, toks)); |
|
23726 | 129 |
|
130 |
in |
|
131 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
132 |
fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src; |
23803 | 133 |
|
134 |
end; |
|
135 |
||
27842 | 136 |
fun parse_spans lexs pos str = |
137 |
Source.of_string str |
|
138 |
|> token_source lexs pos |
|
139 |
|> span_source |
|
140 |
|> Source.exhaust; |
|
23803 | 141 |
|
142 |
||
143 |
(* present *) |
|
144 |
||
145 |
local |
|
146 |
||
27665 | 147 |
fun kind_markup (Command name) = Markup.command_span name |
148 |
| kind_markup Ignored = Markup.ignored_span |
|
27842 | 149 |
| kind_markup Malformed = Markup.malformed_span; |
23803 | 150 |
|
151 |
in |
|
152 |
||
27842 | 153 |
fun present_span span = |
154 |
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span))); |
|
155 |
||
156 |
fun report_span span = |
|
157 |
Position.report (kind_markup (span_kind span)) (Position.encode_range (span_range span)); |
|
23803 | 158 |
|
159 |
end; |
|
160 |
||
28434 | 161 |
|
162 |
||
163 |
(** units: commands with proof **) |
|
164 |
||
165 |
(* scanning spans *) |
|
166 |
||
167 |
val eof = Span (Command "", []); |
|
168 |
||
169 |
fun is_eof (Span (Command "", _)) = true |
|
170 |
| is_eof _ = false; |
|
171 |
||
172 |
val not_eof = not o is_eof; |
|
173 |
||
174 |
val stopper = Scan.stopper (K eof) is_eof; |
|
175 |
||
176 |
||
177 |
(* unit_source *) |
|
178 |
||
179 |
local |
|
180 |
||
181 |
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false); |
|
182 |
||
183 |
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d => |
|
28454
c63168db774c
unit_source: more rigid parsing, stop after final qed;
wenzelm
parents:
28438
diff
changeset
|
184 |
if d <= 0 then Scan.fail |
28434 | 185 |
else |
36950 | 186 |
command_with Keyword.is_qed_global >> pair ~1 || |
187 |
command_with Keyword.is_proof_goal >> pair (d + 1) || |
|
188 |
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) || |
|
189 |
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state); |
|
28434 | 190 |
|
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
191 |
val unit = |
36950 | 192 |
command_with Keyword.is_theory_goal -- proof >> (fn (a, (bs, d)) => (a, bs, d >= 0)) || |
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
193 |
Scan.one not_eof >> (fn a => (a, [], true)); |
28434 | 194 |
|
195 |
in |
|
196 |
||
197 |
fun unit_source src = Source.source stopper (Scan.bulk unit) NONE src; |
|
198 |
||
23726 | 199 |
end; |
28434 | 200 |
|
201 |
end; |