| author | haftmann | 
| Tue, 31 Aug 2010 14:21:06 +0200 | |
| changeset 38926 | 24f82786cc57 | 
| parent 38474 | e498dc2eb576 | 
| child 39507 | 839873937ddd | 
| permissions | -rw-r--r-- | 
| 
29315
 
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
 
wenzelm 
parents: 
28454 
diff
changeset
 | 
1  | 
(* Title: Pure/Thy/thy_syntax.ML  | 
| 23726 | 2  | 
Author: Makarius  | 
3  | 
||
| 
29315
 
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
 
wenzelm 
parents: 
28454 
diff
changeset
 | 
4  | 
Superficial theory syntax: tokens and spans.  | 
| 23726 | 5  | 
*)  | 
6  | 
||
| 
29315
 
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
 
wenzelm 
parents: 
28454 
diff
changeset
 | 
7  | 
signature THY_SYNTAX =  | 
| 23726 | 8  | 
sig  | 
| 27842 | 9  | 
val token_source: Scan.lexicon * Scan.lexicon -> Position.T -> (string, 'a) Source.source ->  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
10  | 
(Token.T, (Symbol_Pos.T, Position.T * (Symbol.symbol, (string, 'a)  | 
| 27770 | 11  | 
Source.source) Source.source) Source.source) Source.source  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
12  | 
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
13  | 
val present_token: Token.T -> output  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
14  | 
val report_token: Token.T -> unit  | 
| 27842 | 15  | 
datatype span_kind = Command of string | Ignored | Malformed  | 
16  | 
type span  | 
|
17  | 
val span_kind: span -> span_kind  | 
|
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
18  | 
val span_content: span -> Token.T list  | 
| 27665 | 19  | 
val span_range: span -> Position.range  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
20  | 
val span_source: (Token.T, 'a) Source.source -> (span, (Token.T, 'a) Source.source) Source.source  | 
| 27842 | 21  | 
val parse_spans: Scan.lexicon * Scan.lexicon -> Position.T -> string -> span list  | 
| 27665 | 22  | 
val present_span: span -> output  | 
| 27842 | 23  | 
val report_span: span -> unit  | 
| 
38422
 
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
 
wenzelm 
parents: 
37216 
diff
changeset
 | 
24  | 
val atom_source: (span, 'a) Source.source ->  | 
| 
28438
 
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
 
wenzelm 
parents: 
28434 
diff
changeset
 | 
25  | 
(span * span list * bool, (span, 'a) Source.source) Source.source  | 
| 23726 | 26  | 
end;  | 
27  | 
||
| 
37216
 
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
 
wenzelm 
parents: 
37197 
diff
changeset
 | 
28  | 
structure Thy_Syntax: THY_SYNTAX =  | 
| 23726 | 29  | 
struct  | 
30  | 
||
| 23803 | 31  | 
(** tokens **)  | 
32  | 
||
33  | 
(* parse *)  | 
|
| 23726 | 34  | 
|
| 27842 | 35  | 
fun token_source lexs pos src =  | 
36  | 
  Symbol.source {do_recover = true} src
 | 
|
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
37  | 
  |> Token.source {do_recover = SOME false} (K lexs) pos;
 | 
| 23726 | 38  | 
|
| 27842 | 39  | 
fun parse_tokens lexs pos str =  | 
40  | 
Source.of_string str  | 
|
41  | 
|> token_source lexs pos  | 
|
42  | 
|> Source.exhaust;  | 
|
| 23726 | 43  | 
|
44  | 
||
| 23803 | 45  | 
(* present *)  | 
| 23726 | 46  | 
|
47  | 
local  | 
|
48  | 
||
49  | 
val token_kind_markup =  | 
|
| 37193 | 50  | 
fn Token.Command => Markup.command  | 
51  | 
| Token.Keyword => Markup.keyword  | 
|
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
52  | 
| Token.Ident => Markup.ident  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
53  | 
| Token.LongIdent => Markup.ident  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
54  | 
| Token.SymIdent => Markup.ident  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
55  | 
| Token.Var => Markup.var  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
56  | 
| Token.TypeIdent => Markup.tfree  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
57  | 
| Token.TypeVar => Markup.tvar  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
58  | 
| Token.Nat => Markup.ident  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
59  | 
| Token.String => Markup.string  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
60  | 
| Token.AltString => Markup.altstring  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
61  | 
| Token.Verbatim => Markup.verbatim  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
62  | 
| Token.Space => Markup.empty  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
63  | 
| Token.Comment => Markup.comment  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
64  | 
| Token.InternalValue => Markup.empty  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
65  | 
| Token.Malformed => Markup.malformed  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
66  | 
| Token.Error _ => Markup.malformed  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
67  | 
| Token.Sync => Markup.control  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
68  | 
| Token.EOF => Markup.control;  | 
| 23726 | 69  | 
|
| 37192 | 70  | 
fun token_markup tok =  | 
71  | 
if Token.keyword_with (not o Syntax.is_identifier) tok then Markup.operator  | 
|
| 
37197
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
72  | 
else  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
73  | 
let  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
74  | 
val kind = Token.kind_of tok;  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
75  | 
val props =  | 
| 
38471
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
38422 
diff
changeset
 | 
76  | 
if kind = Token.Command  | 
| 
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
38422 
diff
changeset
 | 
77  | 
then Markup.properties [(Markup.nameN, Token.content_of tok)]  | 
| 
37197
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
78  | 
else I;  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
79  | 
in props (token_kind_markup kind) end;  | 
| 37192 | 80  | 
|
| 23803 | 81  | 
in  | 
82  | 
||
| 23726 | 83  | 
fun present_token tok =  | 
| 37192 | 84  | 
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok));  | 
| 23726 | 85  | 
|
| 27842 | 86  | 
fun report_token tok =  | 
| 37192 | 87  | 
Position.report (token_markup tok) (Token.position_of tok);  | 
| 27842 | 88  | 
|
| 23803 | 89  | 
end;  | 
90  | 
||
91  | 
||
92  | 
||
| 27665 | 93  | 
(** spans **)  | 
94  | 
||
| 27842 | 95  | 
(* type span *)  | 
96  | 
||
97  | 
datatype span_kind = Command of string | Ignored | Malformed;  | 
|
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
98  | 
datatype span = Span of span_kind * Token.T list;  | 
| 23803 | 99  | 
|
| 27842 | 100  | 
fun span_kind (Span (k, _)) = k;  | 
101  | 
fun span_content (Span (_, toks)) = toks;  | 
|
102  | 
||
103  | 
fun span_range span =  | 
|
104  | 
(case span_content span of  | 
|
105  | 
[] => (Position.none, Position.none)  | 
|
106  | 
| toks =>  | 
|
| 27665 | 107  | 
let  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
108  | 
val start_pos = Token.position_of (hd toks);  | 
| 
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
109  | 
val end_pos = Token.end_position_of (List.last toks);  | 
| 27842 | 110  | 
in (start_pos, end_pos) end);  | 
| 23803 | 111  | 
|
112  | 
||
113  | 
(* parse *)  | 
|
| 23726 | 114  | 
|
| 23803 | 115  | 
local  | 
116  | 
||
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
117  | 
val is_whitespace = Token.is_kind Token.Space orf Token.is_kind Token.Comment;  | 
| 27665 | 118  | 
|
| 36950 | 119  | 
val body =  | 
120  | 
Scan.unless (Scan.many is_whitespace -- Scan.ahead (Parse.command || Parse.eof)) Parse.not_eof;  | 
|
| 23726 | 121  | 
|
| 27665 | 122  | 
val span =  | 
| 36950 | 123  | 
Scan.ahead Parse.command -- Parse.not_eof -- Scan.repeat body  | 
| 27842 | 124  | 
>> (fn ((name, c), bs) => Span (Command name, c :: bs)) ||  | 
125  | 
Scan.many1 is_whitespace >> (fn toks => Span (Ignored, toks)) ||  | 
|
126  | 
Scan.repeat1 body >> (fn toks => Span (Malformed, toks));  | 
|
| 23726 | 127  | 
|
128  | 
in  | 
|
129  | 
||
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
130  | 
fun span_source src = Source.source Token.stopper (Scan.bulk span) NONE src;  | 
| 23803 | 131  | 
|
132  | 
end;  | 
|
133  | 
||
| 27842 | 134  | 
fun parse_spans lexs pos str =  | 
135  | 
Source.of_string str  | 
|
136  | 
|> token_source lexs pos  | 
|
137  | 
|> span_source  | 
|
138  | 
|> Source.exhaust;  | 
|
| 23803 | 139  | 
|
140  | 
||
141  | 
(* present *)  | 
|
142  | 
||
143  | 
local  | 
|
144  | 
||
| 27665 | 145  | 
fun kind_markup (Command name) = Markup.command_span name  | 
146  | 
| kind_markup Ignored = Markup.ignored_span  | 
|
| 27842 | 147  | 
| kind_markup Malformed = Markup.malformed_span;  | 
| 23803 | 148  | 
|
149  | 
in  | 
|
150  | 
||
| 27842 | 151  | 
fun present_span span =  | 
152  | 
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span)));  | 
|
153  | 
||
154  | 
fun report_span span =  | 
|
155  | 
Position.report (kind_markup (span_kind span)) (Position.encode_range (span_range span));  | 
|
| 23803 | 156  | 
|
157  | 
end;  | 
|
158  | 
||
| 28434 | 159  | 
|
160  | 
||
| 
38422
 
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
 
wenzelm 
parents: 
37216 
diff
changeset
 | 
161  | 
(** specification atoms: commands with optional proof **)  | 
| 28434 | 162  | 
|
163  | 
(* scanning spans *)  | 
|
164  | 
||
165  | 
val eof = Span (Command "", []);  | 
|
166  | 
||
167  | 
fun is_eof (Span (Command "", _)) = true  | 
|
168  | 
| is_eof _ = false;  | 
|
169  | 
||
170  | 
val not_eof = not o is_eof;  | 
|
171  | 
||
172  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
|
173  | 
||
174  | 
||
| 
38422
 
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
 
wenzelm 
parents: 
37216 
diff
changeset
 | 
175  | 
(* atom_source *)  | 
| 28434 | 176  | 
|
177  | 
local  | 
|
178  | 
||
179  | 
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false);  | 
|
180  | 
||
181  | 
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d =>  | 
|
| 
28454
 
c63168db774c
unit_source: more rigid parsing, stop after final qed;
 
wenzelm 
parents: 
28438 
diff
changeset
 | 
182  | 
if d <= 0 then Scan.fail  | 
| 28434 | 183  | 
else  | 
| 36950 | 184  | 
command_with Keyword.is_qed_global >> pair ~1 ||  | 
185  | 
command_with Keyword.is_proof_goal >> pair (d + 1) ||  | 
|
186  | 
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) ||  | 
|
187  | 
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state);  | 
|
| 28434 | 188  | 
|
| 
38422
 
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
 
wenzelm 
parents: 
37216 
diff
changeset
 | 
189  | 
val atom =  | 
| 36950 | 190  | 
command_with Keyword.is_theory_goal -- proof >> (fn (a, (bs, d)) => (a, bs, d >= 0)) ||  | 
| 
28438
 
32bb6b4eb390
unit_source: explicit treatment of 'oops' proofs;
 
wenzelm 
parents: 
28434 
diff
changeset
 | 
191  | 
Scan.one not_eof >> (fn a => (a, [], true));  | 
| 28434 | 192  | 
|
193  | 
in  | 
|
194  | 
||
| 
38422
 
f96394dba335
rename "unit" to "atom", to avoid confusion with the unit type;
 
wenzelm 
parents: 
37216 
diff
changeset
 | 
195  | 
fun atom_source src = Source.source stopper (Scan.bulk atom) NONE src;  | 
| 28434 | 196  | 
|
| 23726 | 197  | 
end;  | 
| 28434 | 198  | 
|
199  | 
end;  |