author | wenzelm |
Sun, 09 Jan 2011 20:30:47 +0100 | |
changeset 41484 | 51310e1ccd6f |
parent 40958 | 755f8fe7ced9 |
child 42290 | b1f544c84040 |
permissions | -rw-r--r-- |
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
1 |
(* Title: Pure/Thy/thy_syntax.ML |
23726 | 2 |
Author: Makarius |
3 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
4 |
Superficial theory syntax: tokens and spans. |
23726 | 5 |
*) |
6 |
||
29315
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
wenzelm
parents:
28454
diff
changeset
|
7 |
signature THY_SYNTAX = |
23726 | 8 |
sig |
27842 | 9 |
val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source -> |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
10 |
(Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a) |
27770 | 11 |
Source.source) Source.source) Source.source) Source.source |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
12 |
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
13 |
val present_token: Token.T -> Output.output |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
14 |
val report_token: Token.T -> unit |
27842 | 15 |
datatype span_kind = Command of string | Ignored | Malformed |
16 |
type span |
|
17 |
val span_kind: span -> span_kind |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
18 |
val span_content: span -> Token.T list |
27665 | 19 |
val span_range: span -> Position.range |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
20 |
val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source |
27842 | 21 |
val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list |
40131
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
wenzelm
parents:
39507
diff
changeset
|
22 |
val present_span: span -> Output.output |
27842 | 23 |
val report_span: span -> unit |
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
24 |
val atom_source: (span, 'a) Source.source -> |
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
25 |
(span * span list * bool, (span, 'a) Source.source) Source.source |
23726 | 26 |
end; |
27 |
||
37216
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
wenzelm
parents:
37197
diff
changeset
|
28 |
structure Thy_Syntax: THY_SYNTAX = |
23726 | 29 |
struct |
30 |
||
23803 | 31 |
(** tokens **) |
32 |
||
33 |
(* parse *) |
|
23726 | 34 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
35 |
fun token_source lexs pos = |
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
36 |
Symbol.source #> Token.source {do_recover = SOME false} (K lexs) pos; |
23726 | 37 |
|
40523
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
38 |
fun parse_tokens lexs pos = |
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
wenzelm
parents:
40290
diff
changeset
|
39 |
Source.of_string #> token_source lexs pos #> Source.exhaust; |
23726 | 40 |
|
41 |
||
23803 | 42 |
(* present *) |
23726 | 43 |
|
44 |
local |
|
45 |
||
46 |
val token_kind_markup = |
|
37193 | 47 |
fn Token.Command => Markup.command |
48 |
| Token.Keyword => Markup.keyword |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
49 |
| Token.Ident => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
50 |
| Token.LongIdent => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
51 |
| Token.SymIdent => Markup.ident |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
52 |
| Token.Var => Markup.var |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
53 |
| Token.TypeIdent => Markup.tfree |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
54 |
| Token.TypeVar => Markup.tvar |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
55 |
| Token.Nat => Markup.ident |
40290
47f572aff50a
support for floating-point tokens in outer syntax (coinciding with inner syntax version);
wenzelm
parents:
40131
diff
changeset
|
56 |
| Token.Float => Markup.ident |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
57 |
| Token.String => Markup.string |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
58 |
| Token.AltString => Markup.altstring |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
59 |
| Token.Verbatim => Markup.verbatim |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
60 |
| Token.Space => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
61 |
| Token.Comment => Markup.comment |
38474
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
wenzelm
parents:
38471
diff
changeset
|
62 |
| Token.InternalValue => Markup.empty |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
63 |
| Token.Error _ => Markup.malformed |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
64 |
| Token.Sync => Markup.control |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
65 |
| Token.EOF => Markup.control; |
23726 | 66 |
|
37192 | 67 |
fun token_markup tok = |
68 |
if Token.keyword_with (not o Syntax.is_identifier) tok then Markup.operator |
|
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
69 |
else |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
70 |
let |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
71 |
val kind = Token.kind_of tok; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
72 |
val props = |
38471
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
73 |
if kind = Token.Command |
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
wenzelm
parents:
38422
diff
changeset
|
74 |
then Markup.properties [(Markup.nameN, Token.content_of tok)] |
37197
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
75 |
else I; |
953fc4983439
more detailed token markup, including command kind as sub_kind;
wenzelm
parents:
37193
diff
changeset
|
76 |
in props (token_kind_markup kind) end; |
37192 | 77 |
|
40528 | 78 |
fun report_symbol (sym, pos) = |
79 |
if Symbol.is_malformed sym then Position.report pos Markup.malformed else (); |
|
80 |
||
23803 | 81 |
in |
82 |
||
23726 | 83 |
fun present_token tok = |
37192 | 84 |
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok)); |
23726 | 85 |
|
27842 | 86 |
fun report_token tok = |
40528 | 87 |
(Position.report (Token.position_of tok) (token_markup tok); |
88 |
List.app report_symbol (Symbol_Pos.explode (Token.source_position_of tok))); |
|
27842 | 89 |
|
23803 | 90 |
end; |
91 |
||
92 |
||
93 |
||
27665 | 94 |
(** spans **) |
95 |
||
27842 | 96 |
(* type span *) |
97 |
||
98 |
datatype span_kind = Command of string | Ignored | Malformed; |
|
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
99 |
datatype span = Span of span_kind * Token.T list; |
23803 | 100 |
|
27842 | 101 |
fun span_kind (Span (k, _)) = k; |
102 |
fun span_content (Span (_, toks)) = toks; |
|
103 |
||
104 |
fun span_range span = |
|
105 |
(case span_content span of |
|
106 |
[] => (Position.none, Position.none) |
|
107 |
| toks => |
|
27665 | 108 |
let |
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
109 |
val start_pos = Token.position_of (hd toks); |
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
110 |
val end_pos = Token.end_position_of (List.last toks); |
27842 | 111 |
in (start_pos, end_pos) end); |
23803 | 112 |
|
113 |
||
114 |
(* parse *) |
|
23726 | 115 |
|
23803 | 116 |
local |
117 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
118 |
val is_whitespace = Token.is_kind Token.Space orf Token.is_kind Token.Comment; |
27665 | 119 |
|
36950 | 120 |
val body = |
121 |
Scan.unless (Scan.many is_whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof; |
|
23726 | 122 |
|
27665 | 123 |
val span = |
36950 | 124 |
Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body |
27842 | 125 |
>> (fn ((name, c), bs) => Span (Command name, c :: bs)) || |
126 |
Scan.many1 is_whitespace >> (fn toks => Span (Ignored, toks)) || |
|
127 |
Scan.repeat1 body >> (fn toks => Span (Malformed, toks)); |
|
23726 | 128 |
|
129 |
in |
|
130 |
||
36959
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
wenzelm
parents:
36950
diff
changeset
|
131 |
fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src; |
23803 | 132 |
|
133 |
end; |
|
134 |
||
27842 | 135 |
fun parse_spans lexs pos str = |
136 |
Source.of_string str |
|
137 |
|> token_source lexs pos |
|
138 |
|> span_source |
|
139 |
|> Source.exhaust; |
|
23803 | 140 |
|
141 |
||
142 |
(* present *) |
|
143 |
||
144 |
local |
|
145 |
||
27665 | 146 |
fun kind_markup (Command name) = Markup.command_span name |
147 |
| kind_markup Ignored = Markup.ignored_span |
|
27842 | 148 |
| kind_markup Malformed = Markup.malformed_span; |
23803 | 149 |
|
150 |
in |
|
151 |
||
27842 | 152 |
fun present_span span = |
153 |
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span))); |
|
154 |
||
155 |
fun report_span span = |
|
41484 | 156 |
Position.report (Position.set_range (span_range span)) (kind_markup (span_kind span)); |
23803 | 157 |
|
158 |
end; |
|
159 |
||
28434 | 160 |
|
161 |
||
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
162 |
(** specification atoms: commands with optional proof **) |
28434 | 163 |
|
164 |
(* scanning spans *) |
|
165 |
||
166 |
val eof = Span (Command "", []); |
|
167 |
||
168 |
fun is_eof (Span (Command "", _)) = true |
|
169 |
| is_eof _ = false; |
|
170 |
||
171 |
val not_eof = not o is_eof; |
|
172 |
||
173 |
val stopper = Scan.stopper (K eof) is_eof; |
|
174 |
||
175 |
||
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
176 |
(* atom_source *) |
28434 | 177 |
|
178 |
local |
|
179 |
||
180 |
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false); |
|
181 |
||
182 |
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d => |
|
28454
c63168db774c
unit_source: more rigid parsing, stop after final qed;
wenzelm
parents:
28438
diff
changeset
|
183 |
if d <= 0 then Scan.fail |
28434 | 184 |
else |
36950 | 185 |
command_with Keyword.is_qed_global >> pair ~1 || |
186 |
command_with Keyword.is_proof_goal >> pair (d + 1) || |
|
187 |
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) || |
|
188 |
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state); |
|
28434 | 189 |
|
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
190 |
val atom = |
36950 | 191 |
command_with Keyword.is_theory_goal -- proof >> (fn (a, (bs, d)) => (a, bs, d >= 0)) || |
28438
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
wenzelm
parents:
28434
diff
changeset
|
192 |
Scan.one not_eof >> (fn a => (a, [], true)); |
28434 | 193 |
|
194 |
in |
|
195 |
||
38422
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
wenzelm
parents:
37216
diff
changeset
|
196 |
fun atom_source src = Source.source stopper (Scan.bulk atom) NONE src; |
28434 | 197 |
|
23726 | 198 |
end; |
28434 | 199 |
|
200 |
end; |