author | wenzelm |
Wed, 18 Aug 2010 11:02:47 +0200 | |
changeset 38474 | e498dc2eb576 |
parent 38471 | 0924654b8163 |
child 39507 | 839873937ddd |
permissions | -rw-r--r-- |
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
1 |
(* Title: Pure/Thy/thy_syntax.ML |
23726 | 2 |
Author: Makarius |
3 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
4 |
Superficial theory syntax: tokens and spans. |
23726 | 5 |
*) |
6 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
7 |
signature THY_SYNTAX = |
23726 | 8 |
sig |
27842 | 9 |
val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
10 |
(Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a) |
27770 | 11 |
Source.source) Source.source) Source.source) Source.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
12 |
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
13 |
val present_token: Token.T -> output |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
14 |
val report_token: Token.T -> unit |
27842 | 15 |
datatype span_kind = Command of string | Ignored | Malformed |
16 |
type span |
|
17 |
val span_kind: span -> span_kind |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
18 |
val span_content: span -> Token.T list |
27665 | 19 |
val span_range: span -> Position.range |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
20 |
val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source |
27842 | 21 |
val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list |
27665 | 22 |
val present_span: span -> output |
27842 | 23 |
val report_span: span -> unit |
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
24 |
val atom_source: (span, 'a) Source.source -> |
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
25 |
(span * span list * bool, (span, 'a) Source.source) Source.source |
23726 | 26 |
end; |
27 |
||
37216
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
wenzelm
parents:
37197
diff
changeset
|
28 |
structure Thy_Syntax: THY_SYNTAX = |
23726 | 29 |
struct |
30 |
||
23803 | 31 |
(** tokens **) |
32 |
||
33 |
(* parse *) |
|
23726 | 34 |
|
27842 | 35 |
fun token_source lexs pos src = |
36 |
Symbol.source {do_recover = true} src |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
37 |
|> Token.source {do_recover = SOME false} (K lexs) pos; |
23726 | 38 |
|
27842 | 39 |
fun parse_tokens lexs pos str = |
40 |
Source.of_string str |
|
41 |
|> token_source lexs pos |
|
42 |
|> Source.exhaust; |
|
23726 | 43 |
|
44 |
||
23803 | 45 |
(* present *) |
23726 | 46 |
|
47 |
local |
|
48 |
||
49 |
val token_kind_markup = |
|
37193 | 50 |
fn Token.Command => Markup.command |
51 |
| Token.Keyword => Markup.keyword |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
52 |
| Token.Ident => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
53 |
| Token.LongIdent => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
54 |
| Token.SymIdent => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
55 |
| Token.Var => Markup.var |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
56 |
| Token.TypeIdent => Markup.tfree |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
57 |
| Token.TypeVar => Markup.tvar |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
58 |
| Token.Nat => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
59 |
| Token.String => Markup.string |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
60 |
| Token.AltString => Markup.altstring |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
61 |
| Token.Verbatim => Markup.verbatim |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
62 |
| Token.Space => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
63 |
| Token.Comment => Markup.comment |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
64 |
| Token.InternalValue => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
65 |
| Token.Malformed => Markup.malformed |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
66 |
| Token.Error _ => Markup.malformed |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
67 |
| Token.Sync => Markup.control |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
68 |
| Token.EOF => Markup.control; |
23726 | 69 |
|
37192 | 70 |
fun token_markup tok = |
71 |
if Token.keyword_with (not o Syntax.is_identifier) tok then Markup.operator |
|
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
72 |
else |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
73 |
let |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
74 |
val kind = Token.kind_of tok; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
75 |
val props = |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
76 |
if kind = Token.Command |
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
77 |
then Markup.properties [(Markup.nameN, Token.content_of tok)] |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
78 |
else I; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
79 |
in props (token_kind_markup kind) end; |
37192 | 80 |
|
23803 | 81 |
in |
82 |
||
23726 | 83 |
fun present_token tok = |
37192 | 84 |
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok)); |
23726 | 85 |
|
27842 | 86 |
fun report_token tok = |
37192 | 87 |
Position.report (token_markup tok) (Token.position_of tok); |
27842 | 88 |
|
23803 | 89 |
end; |
90 |
||
91 |
||
92 |
||
27665 | 93 |
(** spans **) |
94 |
||
27842 | 95 |
(* type span *) |
96 |
||
97 |
datatype span_kind = Command of string | Ignored | Malformed; |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
98 |
datatype span = Span of span_kind * Token.T list; |
23803 | 99 |
|
27842 | 100 |
fun span_kind (Span (k, _)) = k; |
101 |
fun span_content (Span (_, toks)) = toks; |
|
102 |
||
103 |
fun span_range span = |
|
104 |
(case span_content span of |
|
105 |
[] => (Position.none, Position.none) |
|
106 |
| toks => |
|
27665 | 107 |
let |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
108 |
val start_pos = Token.position_of (hd toks); |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
109 |
val end_pos = Token.end_position_of (List.last toks); |
27842 | 110 |
in (start_pos, end_pos) end); |
23803 | 111 |
|
112 |
||
113 |
(* parse *) |
|
23726 | 114 |
|
23803 | 115 |
local |
116 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
117 |
val is_whitespace = Token.is_kind Token.Space orf Token.is_kind Token.Comment; |
27665 | 118 |
|
36950 | 119 |
val body = |
120 |
Scan.unless (Scan.many is_whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof; |
|
23726 | 121 |
|
27665 | 122 |
val span = |
36950 | 123 |
Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body |
27842 | 124 |
>> (fn ((name, c), bs) => Span (Command name, c :: bs)) || |
125 |
Scan.many1 is_whitespace >> (fn toks => Span (Ignored, toks)) || |
|
126 |
Scan.repeat1 body >> (fn toks => Span (Malformed, toks)); |
|
23726 | 127 |
|
128 |
in |
|
129 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
130 |
fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src; |
23803 | 131 |
|
132 |
end; |
|
133 |
||
27842 | 134 |
fun parse_spans lexs pos str = |
135 |
Source.of_string str |
|
136 |
|> token_source lexs pos |
|
137 |
|> span_source |
|
138 |
|> Source.exhaust; |
|
23803 | 139 |
|
140 |
||
141 |
(* present *) |
|
142 |
||
143 |
local |
|
144 |
||
27665 | 145 |
fun kind_markup (Command name) = Markup.command_span name |
146 |
| kind_markup Ignored = Markup.ignored_span |
|
27842 | 147 |
| kind_markup Malformed = Markup.malformed_span; |
23803 | 148 |
|
149 |
in |
|
150 |
||
27842 | 151 |
fun present_span span = |
152 |
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span))); |
|
153 |
||
154 |
fun report_span span = |
|
155 |
Position.report (kind_markup (span_kind span)) (Position.encode_range (span_range span)); |
|
23803 | 156 |
|
157 |
end; |
|
158 |
||
28434 | 159 |
|
160 |
||
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
161 |
(** specification atoms: commands with optional proof **) |
28434 | 162 |
|
163 |
(* scanning spans *) |
|
164 |
||
165 |
val eof = Span (Command "", []); |
|
166 |
||
167 |
fun is_eof (Span (Command "", _)) = true |
|
168 |
| is_eof _ = false; |
|
169 |
||
170 |
val not_eof = not o is_eof; |
|
171 |
||
172 |
val stopper = Scan.stopper (K eof) is_eof; |
|
173 |
||
174 |
||
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
175 |
(* atom_source *) |
28434 | 176 |
|
177 |
local |
|
178 |
||
179 |
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false); |
|
180 |
||
181 |
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d => |
|
28454
c63168db774c
unit_source: more rigid parsing, stop after final qed;
wenzelm
parents:
28438
diff
changeset
|
182 |
if d <= 0 then Scan.fail |
28434 | 183 |
else |
36950 | 184 |
command_with Keyword.is_qed_global >> pair ~1 || |
185 |
command_with Keyword.is_proof_goal >> pair (d + 1) || |
|
186 |
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) || |
|
187 |
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state); |
|
28434 | 188 |
|
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
189 |
val atom = |
36950 | 190 |
command_with Keyword.is_theory_goal -- proof >> (fn (a, (bs, d)) => (a, bs, d >= 0)) || |
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
191 |
Scan.one not_eof >> (fn a => (a, [], true)); |
28434 | 192 |
|
193 |
in |
|
194 |
||
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
195 |
fun atom_source src = Source.source stopper (Scan.bulk atom) NONE src; |
28434 | 196 |
|
23726 | 197 |
end; |
28434 | 198 |
|
199 |
end; |