| author | huffman | 
| Thu, 29 Mar 2012 14:47:31 +0200 | |
| changeset 47196 | 6012241abe93 | 
| parent 46811 | 03a2dc9e0624 | 
| child 48749 | c197b3c3e7fa | 
| permissions | -rw-r--r-- | 
| 
29315
 
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
 
wenzelm 
parents: 
28454 
diff
changeset
 | 
1  | 
(* Title: Pure/Thy/thy_syntax.ML  | 
| 23726 | 2  | 
Author: Makarius  | 
3  | 
||
| 
29315
 
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
 
wenzelm 
parents: 
28454 
diff
changeset
 | 
4  | 
Superficial theory syntax: tokens and spans.  | 
| 23726 | 5  | 
*)  | 
6  | 
||
| 
29315
 
b074c05f00ad
renamed ThyEdit (in thy_edit.ML) to ThySyntax (in thy_syntax.ML);
 
wenzelm 
parents: 
28454 
diff
changeset
 | 
7  | 
signature THY_SYNTAX =  | 
| 23726 | 8  | 
sig  | 
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
9  | 
val parse_tokens: Scan.lexicon * Scan.lexicon -> Position.T -> string -> Token.T list  | 
| 
40131
 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
10  | 
val present_token: Token.T -> Output.output  | 
| 44736 | 11  | 
val reports_of_token: Token.T -> Position.report list  | 
| 27842 | 12  | 
datatype span_kind = Command of string | Ignored | Malformed  | 
13  | 
type span  | 
|
14  | 
val span_kind: span -> span_kind  | 
|
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
15  | 
val span_content: span -> Token.T list  | 
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
16  | 
val parse_spans: Token.T list -> span list  | 
| 
40131
 
7cbebd636e79
explicitly qualify type Output.output, which is a slightly odd internal feature;
 
wenzelm 
parents: 
39507 
diff
changeset
 | 
17  | 
val present_span: span -> Output.output  | 
| 43621 | 18  | 
  type element = {head: span, proof: span list, proper_proof: bool}
 | 
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
19  | 
val parse_elements: span list -> element list  | 
| 23726 | 20  | 
end;  | 
21  | 
||
| 
37216
 
3165bc303f66
modernized some structure names, keeping a few legacy aliases;
 
wenzelm 
parents: 
37197 
diff
changeset
 | 
22  | 
structure Thy_Syntax: THY_SYNTAX =  | 
| 23726 | 23  | 
struct  | 
24  | 
||
| 23803 | 25  | 
(** tokens **)  | 
26  | 
||
27  | 
(* parse *)  | 
|
| 23726 | 28  | 
|
| 
40523
 
1050315f6ee2
simplified/robustified treatment of malformed symbols, which are now fully internalized (total Symbol.explode etc.);
 
wenzelm 
parents: 
40290 
diff
changeset
 | 
29  | 
fun parse_tokens lexs pos =  | 
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
30  | 
Source.of_string #>  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
31  | 
Symbol.source #>  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
32  | 
  Token.source {do_recover = SOME false} (K lexs) pos #>
 | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
33  | 
Source.exhaust;  | 
| 23726 | 34  | 
|
35  | 
||
| 23803 | 36  | 
(* present *)  | 
| 23726 | 37  | 
|
38  | 
local  | 
|
39  | 
||
40  | 
val token_kind_markup =  | 
|
| 45666 | 41  | 
fn Token.Command => Isabelle_Markup.command  | 
42  | 
| Token.Keyword => Isabelle_Markup.keyword  | 
|
| 
44706
 
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
 
wenzelm 
parents: 
44658 
diff
changeset
 | 
43  | 
| Token.Ident => Markup.empty  | 
| 
 
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
 
wenzelm 
parents: 
44658 
diff
changeset
 | 
44  | 
| Token.LongIdent => Markup.empty  | 
| 
 
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
 
wenzelm 
parents: 
44658 
diff
changeset
 | 
45  | 
| Token.SymIdent => Markup.empty  | 
| 45666 | 46  | 
| Token.Var => Isabelle_Markup.var  | 
47  | 
| Token.TypeIdent => Isabelle_Markup.tfree  | 
|
48  | 
| Token.TypeVar => Isabelle_Markup.tvar  | 
|
| 
44706
 
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
 
wenzelm 
parents: 
44658 
diff
changeset
 | 
49  | 
| Token.Nat => Markup.empty  | 
| 
 
fe319b45315c
eliminated markup for plain identifiers (frequent but insignificant);
 
wenzelm 
parents: 
44658 
diff
changeset
 | 
50  | 
| Token.Float => Markup.empty  | 
| 45666 | 51  | 
| Token.String => Isabelle_Markup.string  | 
52  | 
| Token.AltString => Isabelle_Markup.altstring  | 
|
53  | 
| Token.Verbatim => Isabelle_Markup.verbatim  | 
|
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
54  | 
| Token.Space => Markup.empty  | 
| 45666 | 55  | 
| Token.Comment => Isabelle_Markup.comment  | 
| 
38474
 
e498dc2eb576
uniform Markup.empty/Markup.Empty in ML and Scala;
 
wenzelm 
parents: 
38471 
diff
changeset
 | 
56  | 
| Token.InternalValue => Markup.empty  | 
| 45666 | 57  | 
| Token.Error _ => Isabelle_Markup.malformed  | 
58  | 
| Token.Sync => Isabelle_Markup.control  | 
|
59  | 
| Token.EOF => Isabelle_Markup.control;  | 
|
| 23726 | 60  | 
|
| 37192 | 61  | 
fun token_markup tok =  | 
| 45666 | 62  | 
if Token.keyword_with (not o Lexicon.is_ascii_identifier) tok then Isabelle_Markup.operator  | 
| 
37197
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
63  | 
else  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
64  | 
let  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
65  | 
val kind = Token.kind_of tok;  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
66  | 
val props =  | 
| 
38471
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
38422 
diff
changeset
 | 
67  | 
if kind = Token.Command  | 
| 
 
0924654b8163
report command token name instead of kind, which can be retrieved later via Outer_Syntax.keyword_kind;
 
wenzelm 
parents: 
38422 
diff
changeset
 | 
68  | 
then Markup.properties [(Markup.nameN, Token.content_of tok)]  | 
| 
37197
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
69  | 
else I;  | 
| 
 
953fc4983439
more detailed token markup, including command kind as sub_kind;
 
wenzelm 
parents: 
37193 
diff
changeset
 | 
70  | 
in props (token_kind_markup kind) end;  | 
| 37192 | 71  | 
|
| 44736 | 72  | 
fun reports_of_symbol (sym, pos) =  | 
| 45666 | 73  | 
if Symbol.is_malformed sym then [(pos, Isabelle_Markup.malformed)] else [];  | 
| 40528 | 74  | 
|
| 23803 | 75  | 
in  | 
76  | 
||
| 23726 | 77  | 
fun present_token tok =  | 
| 37192 | 78  | 
Markup.enclose (token_markup tok) (Output.output (Token.unparse tok));  | 
| 23726 | 79  | 
|
| 44736 | 80  | 
fun reports_of_token tok =  | 
81  | 
(Token.position_of tok, token_markup tok) ::  | 
|
82  | 
maps reports_of_symbol (Symbol_Pos.explode (Token.source_position_of tok));  | 
|
| 27842 | 83  | 
|
| 23803 | 84  | 
end;  | 
85  | 
||
86  | 
||
87  | 
||
| 27665 | 88  | 
(** spans **)  | 
89  | 
||
| 27842 | 90  | 
(* type span *)  | 
91  | 
||
92  | 
datatype span_kind = Command of string | Ignored | Malformed;  | 
|
| 
36959
 
f5417836dbea
renamed structure OuterLex to Token and type token to Token.T, keeping legacy aliases for some time;
 
wenzelm 
parents: 
36950 
diff
changeset
 | 
93  | 
datatype span = Span of span_kind * Token.T list;  | 
| 23803 | 94  | 
|
| 27842 | 95  | 
fun span_kind (Span (k, _)) = k;  | 
96  | 
fun span_content (Span (_, toks)) = toks;  | 
|
97  | 
||
| 23803 | 98  | 
|
99  | 
(* parse *)  | 
|
| 23726 | 100  | 
|
| 23803 | 101  | 
local  | 
102  | 
||
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
103  | 
fun make_span toks =  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
104  | 
if not (null toks) andalso Token.is_command (hd toks) then  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
105  | 
Span (Command (Token.content_of (hd toks)), toks)  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
106  | 
else if forall (not o Token.is_proper) toks then Span (Ignored, toks)  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
107  | 
else Span (Malformed, toks);  | 
| 23726 | 108  | 
|
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
109  | 
fun flush (result, span) = if null span then (result, span) else (rev span :: result, []);  | 
| 23726 | 110  | 
|
111  | 
in  | 
|
112  | 
||
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
113  | 
fun parse_spans toks =  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
114  | 
fold (fn tok => Token.is_command tok ? flush #> apsnd (cons tok)) toks ([], [])  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
115  | 
|> flush  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
116  | 
|> #1 |> rev |> map make_span;  | 
| 23803 | 117  | 
|
118  | 
end;  | 
|
119  | 
||
120  | 
||
121  | 
(* present *)  | 
|
122  | 
||
123  | 
local  | 
|
124  | 
||
| 45666 | 125  | 
fun kind_markup (Command name) = Isabelle_Markup.command_span name  | 
126  | 
| kind_markup Ignored = Isabelle_Markup.ignored_span  | 
|
127  | 
| kind_markup Malformed = Isabelle_Markup.malformed_span;  | 
|
| 23803 | 128  | 
|
129  | 
in  | 
|
130  | 
||
| 27842 | 131  | 
fun present_span span =  | 
132  | 
Markup.enclose (kind_markup (span_kind span)) (implode (map present_token (span_content span)));  | 
|
133  | 
||
| 23803 | 134  | 
end;  | 
135  | 
||
| 28434 | 136  | 
|
137  | 
||
| 43621 | 138  | 
(** specification elements: commands with optional proof **)  | 
139  | 
||
140  | 
type element = {head: span, proof: span list, proper_proof: bool};
 | 
|
141  | 
||
142  | 
fun make_element head proof proper_proof =  | 
|
143  | 
  {head = head, proof = proof, proper_proof = proper_proof};
 | 
|
144  | 
||
| 28434 | 145  | 
|
146  | 
(* scanning spans *)  | 
|
147  | 
||
148  | 
val eof = Span (Command "", []);  | 
|
149  | 
||
150  | 
fun is_eof (Span (Command "", _)) = true  | 
|
151  | 
| is_eof _ = false;  | 
|
152  | 
||
153  | 
val not_eof = not o is_eof;  | 
|
154  | 
||
155  | 
val stopper = Scan.stopper (K eof) is_eof;  | 
|
156  | 
||
157  | 
||
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
158  | 
(* parse *)  | 
| 28434 | 159  | 
|
160  | 
local  | 
|
161  | 
||
162  | 
fun command_with pred = Scan.one (fn (Span (Command name, _)) => pred name | _ => false);  | 
|
163  | 
||
164  | 
val proof = Scan.pass 1 (Scan.repeat (Scan.depend (fn d =>  | 
|
| 
28454
 
c63168db774c
unit_source: more rigid parsing, stop after final qed;
 
wenzelm 
parents: 
28438 
diff
changeset
 | 
165  | 
if d <= 0 then Scan.fail  | 
| 28434 | 166  | 
else  | 
| 36950 | 167  | 
command_with Keyword.is_qed_global >> pair ~1 ||  | 
168  | 
command_with Keyword.is_proof_goal >> pair (d + 1) ||  | 
|
169  | 
(if d = 0 then Scan.fail else command_with Keyword.is_qed >> pair (d - 1)) ||  | 
|
170  | 
Scan.unless (command_with Keyword.is_theory) (Scan.one not_eof) >> pair d)) -- Scan.state);  | 
|
| 28434 | 171  | 
|
| 43621 | 172  | 
val element =  | 
173  | 
command_with Keyword.is_theory_goal -- proof  | 
|
174  | 
>> (fn (a, (bs, d)) => make_element a bs (d >= 0)) ||  | 
|
175  | 
Scan.one not_eof >> (fn a => make_element a [] true);  | 
|
| 28434 | 176  | 
|
177  | 
in  | 
|
178  | 
||
| 
46811
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
179  | 
val parse_elements =  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
180  | 
Source.of_list #>  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
181  | 
Source.source stopper (Scan.bulk element) NONE #>  | 
| 
 
03a2dc9e0624
clarified command span: include trailing whitespace/comments and thus reduce number of ignored spans with associated transactions and states (factor 2);
 
wenzelm 
parents: 
45666 
diff
changeset
 | 
182  | 
Source.exhaust;  | 
| 28434 | 183  | 
|
| 23726 | 184  | 
end;  | 
| 28434 | 185  | 
|
186  | 
end;  |