author | wenzelm |
Sat, 01 Mar 2014 22:46:31 +0100 | |
changeset 55828 | 42ac3cfb89f6 |
parent 55613 | ad446b45efff |
child 56163 | 331f4aba14b3 |
permissions | -rw-r--r-- |
55030 | 1 |
(* Title: Pure/Tools/rail.ML |
42504 | 2 |
Author: Michael Kerscher, TU München |
3 |
Author: Makarius |
|
4 |
||
5 |
Railroad diagrams in LaTeX. |
|
6 |
*) |
|
7 |
||
8 |
structure Rail: sig end = |
|
9 |
struct |
|
10 |
||
11 |
(** lexical syntax **) |
|
12 |
||
13 |
(* datatype token *) |
|
14 |
||
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
15 |
datatype kind = |
55526 | 16 |
Keyword | Ident | String | Antiq of Antiquote.antiq | EOF; |
42504 | 17 |
|
18 |
datatype token = Token of Position.range * (kind * string); |
|
19 |
||
20 |
fun pos_of (Token ((pos, _), _)) = pos; |
|
21 |
fun end_pos_of (Token ((_, pos), _)) = pos; |
|
22 |
||
23 |
fun kind_of (Token (_, (k, _))) = k; |
|
24 |
fun content_of (Token (_, (_, x))) = x; |
|
25 |
||
26 |
||
27 |
(* diagnostics *) |
|
28 |
||
29 |
val print_kind = |
|
30 |
fn Keyword => "rail keyword" |
|
31 |
| Ident => "identifier" |
|
32 |
| String => "single-quoted string" |
|
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
33 |
| Antiq _ => "antiquotation" |
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
48764
diff
changeset
|
34 |
| EOF => "end-of-input"; |
42504 | 35 |
|
36 |
fun print (Token ((pos, _), (k, x))) = |
|
37 |
(if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^ |
|
48992 | 38 |
Position.here pos; |
42504 | 39 |
|
40 |
fun print_keyword x = print_kind Keyword ^ " " ^ quote x; |
|
41 |
||
55613 | 42 |
fun reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)] |
43 |
| reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports antiq |
|
44 |
| reports_of_token _ = []; |
|
45 |
||
42504 | 46 |
|
47 |
(* stopper *) |
|
48 |
||
49 |
fun mk_eof pos = Token ((pos, Position.none), (EOF, "")); |
|
50 |
val eof = mk_eof Position.none; |
|
51 |
||
52 |
fun is_eof (Token (_, (EOF, _))) = true |
|
53 |
| is_eof _ = false; |
|
54 |
||
55 |
val stopper = |
|
56 |
Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; |
|
57 |
||
58 |
||
59 |
(* tokenize *) |
|
60 |
||
61 |
local |
|
62 |
||
63 |
fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))]; |
|
64 |
||
65 |
val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol); |
|
66 |
||
67 |
val scan_keyword = |
|
55029
61a6bf7d4b02
clarified @{rail} syntax: prefer explicit \<newline> symbol;
wenzelm
parents:
53171
diff
changeset
|
68 |
Scan.one |
55030 | 69 |
(member (op =) ["|", "*", "+", "?", "(", ")", "\<newline>", ";", ":", "@"] o Symbol_Pos.symbol); |
42504 | 70 |
|
48764 | 71 |
val err_prefix = "Rail lexical error: "; |
72 |
||
42504 | 73 |
val scan_token = |
74 |
scan_space >> K [] || |
|
42516 | 75 |
Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) || |
42504 | 76 |
scan_keyword >> (token Keyword o single) || |
77 |
Lexicon.scan_id >> token Ident || |
|
55613 | 78 |
Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) => |
79 |
[Token (Position.range pos1 pos2, (String, Symbol_Pos.content ss))]); |
|
42504 | 80 |
|
42506
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
wenzelm
parents:
42504
diff
changeset
|
81 |
val scan = |
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
wenzelm
parents:
42504
diff
changeset
|
82 |
(Scan.repeat scan_token >> flat) --| |
48764 | 83 |
Symbol_Pos.!!! (fn () => err_prefix ^ "bad input") |
42506
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
wenzelm
parents:
42504
diff
changeset
|
84 |
(Scan.ahead (Scan.one Symbol_Pos.is_eof)); |
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
wenzelm
parents:
42504
diff
changeset
|
85 |
|
42504 | 86 |
in |
87 |
||
55613 | 88 |
val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan); |
42504 | 89 |
|
90 |
end; |
|
91 |
||
92 |
||
93 |
||
94 |
(** parsing **) |
|
95 |
||
96 |
fun !!! scan = |
|
97 |
let |
|
98 |
val prefix = "Rail syntax error"; |
|
99 |
||
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
48764
diff
changeset
|
100 |
fun get_pos [] = " (end-of-input)" |
48992 | 101 |
| get_pos (tok :: _) = Position.here (pos_of tok); |
42504 | 102 |
|
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
103 |
fun err (toks, NONE) = (fn () => prefix ^ get_pos toks) |
42504 | 104 |
| err (toks, SOME msg) = |
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
105 |
(fn () => |
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
106 |
let val s = msg () in |
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
107 |
if String.isPrefix prefix s then s |
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
108 |
else prefix ^ get_pos toks ^ ": " ^ s |
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
109 |
end); |
42504 | 110 |
in Scan.!! err scan end; |
111 |
||
112 |
fun $$$ x = |
|
113 |
Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) || |
|
114 |
Scan.fail_with |
|
48911
5debc3e4fa81
tuned messages: end-of-input rarely means physical end-of-file from the past;
wenzelm
parents:
48764
diff
changeset
|
115 |
(fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found") |
43947
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
wenzelm
parents:
43564
diff
changeset
|
116 |
| tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found")); |
42504 | 117 |
|
118 |
fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); |
|
119 |
fun enum sep scan = enum1 sep scan || Scan.succeed []; |
|
120 |
||
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
121 |
val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE); |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
122 |
val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE); |
42504 | 123 |
|
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
124 |
val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE)); |
42504 | 125 |
|
126 |
||
127 |
||
128 |
(** rail expressions **) |
|
129 |
||
130 |
(* datatype *) |
|
131 |
||
132 |
datatype rails = |
|
133 |
Cat of int * rail list |
|
134 |
and rail = |
|
135 |
Bar of rails list | |
|
136 |
Plus of rails * rails | |
|
137 |
Newline of int | |
|
138 |
Nonterminal of string | |
|
42516 | 139 |
Terminal of bool * string | |
55526 | 140 |
Antiquote of bool * Antiquote.antiq; |
42504 | 141 |
|
142 |
fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails)) |
|
143 |
and reverse (Bar cats) = Bar (map reverse_cat cats) |
|
144 |
| reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2) |
|
145 |
| reverse x = x; |
|
146 |
||
147 |
fun cat rails = Cat (0, rails); |
|
148 |
||
149 |
val empty = cat []; |
|
150 |
fun is_empty (Cat (_, [])) = true | is_empty _ = false; |
|
151 |
||
152 |
fun is_newline (Newline _) = true | is_newline _ = false; |
|
153 |
||
154 |
fun bar [Cat (_, [rail])] = rail |
|
155 |
| bar cats = Bar cats; |
|
156 |
||
157 |
fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2); |
|
158 |
||
159 |
fun star cat1 cat2 = |
|
160 |
if is_empty cat2 then plus empty cat1 |
|
161 |
else bar [empty, cat [plus cat1 cat2]]; |
|
162 |
||
163 |
fun maybe rail = bar [empty, cat [rail]]; |
|
164 |
||
165 |
||
166 |
(* read *) |
|
167 |
||
168 |
local |
|
169 |
||
42516 | 170 |
val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true); |
171 |
||
42504 | 172 |
fun body x = (enum1 "|" body1 >> bar) x |
173 |
and body0 x = (enum "|" body1 >> bar) x |
|
174 |
and body1 x = |
|
175 |
(body2 :|-- (fn a => |
|
176 |
$$$ "*" |-- !!! body4e >> (cat o single o star a) || |
|
177 |
$$$ "+" |-- !!! body4e >> (cat o single o plus a) || |
|
178 |
Scan.succeed a)) x |
|
179 |
and body2 x = (Scan.repeat1 body3 >> cat) x |
|
180 |
and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x |
|
181 |
and body4 x = |
|
182 |
($$$ "(" |-- !!! (body0 --| $$$ ")") || |
|
55030 | 183 |
$$$ "\<newline>" >> K (Newline 0) || |
42504 | 184 |
ident >> Nonterminal || |
42516 | 185 |
at_mode -- string >> Terminal || |
186 |
at_mode -- antiq >> Antiquote) x |
|
42504 | 187 |
and body4e x = (Scan.option body4 >> (cat o the_list)) x; |
188 |
||
42516 | 189 |
val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq; |
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
190 |
val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text ""); |
42504 | 191 |
val rules = enum1 ";" (Scan.option rule) >> map_filter I; |
192 |
||
193 |
in |
|
194 |
||
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55613
diff
changeset
|
195 |
fun read ctxt (source: Symbol_Pos.source) = |
55613 | 196 |
let |
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55613
diff
changeset
|
197 |
val {text, pos, ...} = source; |
55613 | 198 |
val _ = Context_Position.report ctxt pos Markup.language_rail; |
55828
42ac3cfb89f6
clarified language markup: added "delimited" property;
wenzelm
parents:
55613
diff
changeset
|
199 |
val toks = tokenize (Symbol_Pos.explode (text, pos)); |
55613 | 200 |
val _ = Context_Position.reports ctxt (maps reports_of_token toks); |
201 |
in #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks) end; |
|
42504 | 202 |
|
203 |
end; |
|
204 |
||
205 |
||
206 |
(* latex output *) |
|
207 |
||
208 |
local |
|
209 |
||
210 |
fun vertical_range_cat (Cat (_, rails)) y = |
|
211 |
let val (rails', (_, y')) = |
|
212 |
fold_map (fn rail => fn (y0, y') => |
|
213 |
if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2)) |
|
214 |
else |
|
215 |
let val (rail', y0') = vertical_range rail y0; |
|
216 |
in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1) |
|
217 |
in (Cat (y, rails'), y') end |
|
218 |
||
219 |
and vertical_range (Bar cats) y = |
|
220 |
let val (cats', y') = fold_map vertical_range_cat cats y |
|
221 |
in (Bar cats', Int.max (y + 1, y')) end |
|
222 |
| vertical_range (Plus (cat1, cat2)) y = |
|
223 |
let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y; |
|
224 |
in (Plus (cat1', cat2'), Int.max (y + 1, y')) end |
|
225 |
| vertical_range (Newline _) y = (Newline (y + 2), y + 3) |
|
226 |
| vertical_range atom y = (atom, y + 1); |
|
227 |
||
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
228 |
fun output_rules state rules = |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
229 |
let |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
230 |
val output_antiq = Thy_Output.eval_antiq (#1 (Keyword.get_lexicons ())) state; |
42516 | 231 |
fun output_text b s = |
232 |
Output.output s |
|
233 |
|> b ? enclose "\\isakeyword{" "}" |
|
234 |
|> enclose "\\isa{" "}"; |
|
42504 | 235 |
|
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
236 |
fun output_cat c (Cat (_, rails)) = outputs c rails |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
237 |
and outputs c [rail] = output c rail |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
238 |
| outputs _ rails = implode (map (output "") rails) |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
239 |
and output _ (Bar []) = "" |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
240 |
| output c (Bar [cat]) = output_cat c cat |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
241 |
| output _ (Bar (cat :: cats)) = |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
242 |
"\\rail@bar\n" ^ output_cat "" cat ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
243 |
implode (map (fn Cat (y, rails) => |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
244 |
"\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
245 |
"\\rail@endbar\n" |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
246 |
| output c (Plus (cat, Cat (y, rails))) = |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
247 |
"\\rail@plus\n" ^ output_cat c cat ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
248 |
"\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
249 |
"\\rail@endplus\n" |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
250 |
| output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n" |
42516 | 251 |
| output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n" |
252 |
| output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n" |
|
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
253 |
| output c (Antiquote (b, a)) = |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
254 |
"\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n"; |
42504 | 255 |
|
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
256 |
fun output_rule (name, rail) = |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
257 |
let |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
258 |
val (rail', y') = vertical_range rail 0; |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
259 |
val out_name = |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
260 |
(case name of |
42661
824d3f1d8de6
proper treatment of empty name -- avoid excessive vertical space;
wenzelm
parents:
42657
diff
changeset
|
261 |
Antiquote.Text "" => "" |
824d3f1d8de6
proper treatment of empty name -- avoid excessive vertical space;
wenzelm
parents:
42657
diff
changeset
|
262 |
| Antiquote.Text s => output_text false s |
42508
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
263 |
| Antiquote.Antiq a => output_antiq a); |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
264 |
in |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
265 |
"\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
266 |
output "" rail' ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
267 |
"\\rail@end\n" |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
268 |
end; |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
269 |
in |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
270 |
"\\begin{railoutput}\n" ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
271 |
implode (map output_rule rules) ^ |
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
wenzelm
parents:
42507
diff
changeset
|
272 |
"\\end{railoutput}\n" |
42504 | 273 |
end; |
274 |
||
275 |
in |
|
276 |
||
53171 | 277 |
val _ = Theory.setup |
55030 | 278 |
(Thy_Output.antiquotation @{binding rail} |
55112
b1a5d603fd12
prefer rail cartouche -- avoid back-slashed quotes;
wenzelm
parents:
55030
diff
changeset
|
279 |
(Scan.lift (Parse.source_position (Parse.string || Parse.cartouche))) |
55613 | 280 |
(fn {state, context, ...} => output_rules state o read context)); |
42504 | 281 |
|
282 |
end; |
|
283 |
||
284 |
end; |
|
285 |