| author | haftmann | 
| Fri, 30 Mar 2012 09:04:29 +0200 | |
| changeset 47202 | 69cee87927f0 | 
| parent 43947 | 9b00f09f7721 | 
| child 48764 | 4fe0920d5049 | 
| permissions | -rw-r--r-- | 
| 42504 | 1 | (* Title: Pure/Thy/rail.ML | 
| 2 | Author: Michael Kerscher, TU München | |
| 3 | Author: Makarius | |
| 4 | ||
| 5 | Railroad diagrams in LaTeX. | |
| 6 | *) | |
| 7 | ||
| 8 | structure Rail: sig end = | |
| 9 | struct | |
| 10 | ||
| 11 | (** lexical syntax **) | |
| 12 | ||
| 13 | (* datatype token *) | |
| 14 | ||
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 15 | datatype kind = | 
| 42516 | 16 | Keyword | Ident | String | Antiq of Symbol_Pos.T list * Position.range | EOF; | 
| 42504 | 17 | |
| 18 | datatype token = Token of Position.range * (kind * string); | |
| 19 | ||
| 20 | fun pos_of (Token ((pos, _), _)) = pos; | |
| 21 | fun end_pos_of (Token ((_, pos), _)) = pos; | |
| 22 | ||
| 23 | fun kind_of (Token (_, (k, _))) = k; | |
| 24 | fun content_of (Token (_, (_, x))) = x; | |
| 25 | ||
| 26 | ||
| 27 | (* diagnostics *) | |
| 28 | ||
| 29 | val print_kind = | |
| 30 | fn Keyword => "rail keyword" | |
| 31 | | Ident => "identifier" | |
| 32 | | String => "single-quoted string" | |
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 33 | | Antiq _ => "antiquotation" | 
| 42504 | 34 | | EOF => "end-of-file"; | 
| 35 | ||
| 36 | fun print (Token ((pos, _), (k, x))) = | |
| 37 | (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^ | |
| 38 | Position.str_of pos; | |
| 39 | ||
| 40 | fun print_keyword x = print_kind Keyword ^ " " ^ quote x; | |
| 41 | ||
| 42 | ||
| 43 | (* stopper *) | |
| 44 | ||
| 45 | fun mk_eof pos = Token ((pos, Position.none), (EOF, "")); | |
| 46 | val eof = mk_eof Position.none; | |
| 47 | ||
| 48 | fun is_eof (Token (_, (EOF, _))) = true | |
| 49 | | is_eof _ = false; | |
| 50 | ||
| 51 | val stopper = | |
| 52 | Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof; | |
| 53 | ||
| 54 | ||
| 55 | (* tokenize *) | |
| 56 | ||
| 57 | local | |
| 58 | ||
| 59 | fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))]; | |
| 60 | ||
| 61 | val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol); | |
| 62 | ||
| 63 | val scan_keyword = | |
| 42516 | 64 |   Scan.one (member (op =) ["|", "*", "+", "?", "(", ")", "\\", ";", ":", "@"] o Symbol_Pos.symbol);
 | 
| 42504 | 65 | |
| 66 | val scan_token = | |
| 67 | scan_space >> K [] || | |
| 42516 | 68 | Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) || | 
| 42504 | 69 | scan_keyword >> (token Keyword o single) || | 
| 70 | Lexicon.scan_id >> token Ident || | |
| 42516 | 71 | Symbol_Pos.scan_string_q >> (token String o #1 o #2); | 
| 42504 | 72 | |
| 42506 
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
 wenzelm parents: 
42504diff
changeset | 73 | val scan = | 
| 
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
 wenzelm parents: 
42504diff
changeset | 74 | (Scan.repeat scan_token >> flat) --| | 
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 75 | Symbol_Pos.!!! (fn () => "Rail lexical error: bad input") | 
| 42506 
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
 wenzelm parents: 
42504diff
changeset | 76 | (Scan.ahead (Scan.one Symbol_Pos.is_eof)); | 
| 
876887b07e8d
more robust error handling (NB: Source.source requires total scanner or recover);
 wenzelm parents: 
42504diff
changeset | 77 | |
| 42504 | 78 | in | 
| 79 | ||
| 42507 | 80 | val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan) o Symbol_Pos.explode; | 
| 42504 | 81 | |
| 82 | end; | |
| 83 | ||
| 84 | ||
| 85 | ||
| 86 | (** parsing **) | |
| 87 | ||
| 88 | fun !!! scan = | |
| 89 | let | |
| 90 | val prefix = "Rail syntax error"; | |
| 91 | ||
| 92 | fun get_pos [] = " (past end-of-file!)" | |
| 93 | | get_pos (tok :: _) = Position.str_of (pos_of tok); | |
| 94 | ||
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 95 | fun err (toks, NONE) = (fn () => prefix ^ get_pos toks) | 
| 42504 | 96 | | err (toks, SOME msg) = | 
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 97 | (fn () => | 
| 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 98 | let val s = msg () in | 
| 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 99 | if String.isPrefix prefix s then s | 
| 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 100 | else prefix ^ get_pos toks ^ ": " ^ s | 
| 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 101 | end); | 
| 42504 | 102 | in Scan.!! err scan end; | 
| 103 | ||
| 104 | fun $$$ x = | |
| 105 | Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) || | |
| 106 | Scan.fail_with | |
| 43947 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 107 | (fn [] => (fn () => print_keyword x ^ " expected (past end-of-file!)") | 
| 
9b00f09f7721
defer evaluation of Scan.message, for improved performance in the frequent situation where failure is handled later (e.g. via ||);
 wenzelm parents: 
43564diff
changeset | 108 | | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found")); | 
| 42504 | 109 | |
| 110 | fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan); | |
| 111 | fun enum sep scan = enum1 sep scan || Scan.succeed []; | |
| 112 | ||
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 113 | val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE); | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 114 | val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE); | 
| 42504 | 115 | |
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 116 | val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE)); | 
| 42504 | 117 | |
| 118 | ||
| 119 | ||
| 120 | (** rail expressions **) | |
| 121 | ||
| 122 | (* datatype *) | |
| 123 | ||
| 124 | datatype rails = | |
| 125 | Cat of int * rail list | |
| 126 | and rail = | |
| 127 | Bar of rails list | | |
| 128 | Plus of rails * rails | | |
| 129 | Newline of int | | |
| 130 | Nonterminal of string | | |
| 42516 | 131 | Terminal of bool * string | | 
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 132 | Antiquote of bool * (Symbol_Pos.T list * Position.range); | 
| 42504 | 133 | |
| 134 | fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails)) | |
| 135 | and reverse (Bar cats) = Bar (map reverse_cat cats) | |
| 136 | | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2) | |
| 137 | | reverse x = x; | |
| 138 | ||
| 139 | fun cat rails = Cat (0, rails); | |
| 140 | ||
| 141 | val empty = cat []; | |
| 142 | fun is_empty (Cat (_, [])) = true | is_empty _ = false; | |
| 143 | ||
| 144 | fun is_newline (Newline _) = true | is_newline _ = false; | |
| 145 | ||
| 146 | fun bar [Cat (_, [rail])] = rail | |
| 147 | | bar cats = Bar cats; | |
| 148 | ||
| 149 | fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2); | |
| 150 | ||
| 151 | fun star cat1 cat2 = | |
| 152 | if is_empty cat2 then plus empty cat1 | |
| 153 | else bar [empty, cat [plus cat1 cat2]]; | |
| 154 | ||
| 155 | fun maybe rail = bar [empty, cat [rail]]; | |
| 156 | ||
| 157 | ||
| 158 | (* read *) | |
| 159 | ||
| 160 | local | |
| 161 | ||
| 42516 | 162 | val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true); | 
| 163 | ||
| 42504 | 164 | fun body x = (enum1 "|" body1 >> bar) x | 
| 165 | and body0 x = (enum "|" body1 >> bar) x | |
| 166 | and body1 x = | |
| 167 | (body2 :|-- (fn a => | |
| 168 | $$$ "*" |-- !!! body4e >> (cat o single o star a) || | |
| 169 | $$$ "+" |-- !!! body4e >> (cat o single o plus a) || | |
| 170 | Scan.succeed a)) x | |
| 171 | and body2 x = (Scan.repeat1 body3 >> cat) x | |
| 172 | and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x | |
| 173 | and body4 x = | |
| 174 |  ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
 | |
| 175 | $$$ "\\" >> K (Newline 0) || | |
| 176 | ident >> Nonterminal || | |
| 42516 | 177 | at_mode -- string >> Terminal || | 
| 178 | at_mode -- antiq >> Antiquote) x | |
| 42504 | 179 | and body4e x = (Scan.option body4 >> (cat o the_list)) x; | 
| 180 | ||
| 42516 | 181 | val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq; | 
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 182 | val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text ""); | 
| 42504 | 183 | val rules = enum1 ";" (Scan.option rule) >> map_filter I; | 
| 184 | ||
| 185 | in | |
| 186 | ||
| 42507 | 187 | val read = | 
| 188 | #1 o Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) o tokenize; | |
| 42504 | 189 | |
| 190 | end; | |
| 191 | ||
| 192 | ||
| 193 | (* latex output *) | |
| 194 | ||
| 195 | local | |
| 196 | ||
| 197 | fun vertical_range_cat (Cat (_, rails)) y = | |
| 198 | let val (rails', (_, y')) = | |
| 199 | fold_map (fn rail => fn (y0, y') => | |
| 200 | if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2)) | |
| 201 | else | |
| 202 | let val (rail', y0') = vertical_range rail y0; | |
| 203 | in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1) | |
| 204 | in (Cat (y, rails'), y') end | |
| 205 | ||
| 206 | and vertical_range (Bar cats) y = | |
| 207 | let val (cats', y') = fold_map vertical_range_cat cats y | |
| 208 | in (Bar cats', Int.max (y + 1, y')) end | |
| 209 | | vertical_range (Plus (cat1, cat2)) y = | |
| 210 | let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y; | |
| 211 | in (Plus (cat1', cat2'), Int.max (y + 1, y')) end | |
| 212 | | vertical_range (Newline _) y = (Newline (y + 2), y + 3) | |
| 213 | | vertical_range atom y = (atom, y + 1); | |
| 214 | ||
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 215 | fun output_rules state rules = | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 216 | let | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 217 | val output_antiq = Thy_Output.eval_antiq (#1 (Keyword.get_lexicons ())) state; | 
| 42516 | 218 | fun output_text b s = | 
| 219 | Output.output s | |
| 220 |       |> b ? enclose "\\isakeyword{" "}"
 | |
| 221 |       |> enclose "\\isa{" "}";
 | |
| 42504 | 222 | |
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 223 | fun output_cat c (Cat (_, rails)) = outputs c rails | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 224 | and outputs c [rail] = output c rail | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 225 | | outputs _ rails = implode (map (output "") rails) | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 226 | and output _ (Bar []) = "" | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 227 | | output c (Bar [cat]) = output_cat c cat | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 228 | | output _ (Bar (cat :: cats)) = | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 229 | "\\rail@bar\n" ^ output_cat "" cat ^ | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 230 | implode (map (fn Cat (y, rails) => | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 231 |               "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
 | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 232 | "\\rail@endbar\n" | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 233 | | output c (Plus (cat, Cat (y, rails))) = | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 234 | "\\rail@plus\n" ^ output_cat c cat ^ | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 235 |           "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
 | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 236 | "\\rail@endplus\n" | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 237 |       | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
 | 
| 42516 | 238 |       | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
 | 
| 239 |       | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
 | |
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 240 | | output c (Antiquote (b, a)) = | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 241 |           "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
 | 
| 42504 | 242 | |
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 243 | fun output_rule (name, rail) = | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 244 | let | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 245 | val (rail', y') = vertical_range rail 0; | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 246 | val out_name = | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 247 | (case name of | 
| 42661 
824d3f1d8de6
proper treatment of empty name -- avoid excessive vertical space;
 wenzelm parents: 
42657diff
changeset | 248 | Antiquote.Text "" => "" | 
| 
824d3f1d8de6
proper treatment of empty name -- avoid excessive vertical space;
 wenzelm parents: 
42657diff
changeset | 249 | | Antiquote.Text s => output_text false s | 
| 42508 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 250 | | Antiquote.Antiq a => output_antiq a); | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 251 | in | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 252 |         "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
 | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 253 | output "" rail' ^ | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 254 | "\\rail@end\n" | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 255 | end; | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 256 | in | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 257 |     "\\begin{railoutput}\n" ^
 | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 258 | implode (map output_rule rules) ^ | 
| 
e21362bf1d93
allow nested @{antiq} (nonterminal) and @@{antiq} terminal;
 wenzelm parents: 
42507diff
changeset | 259 |     "\\end{railoutput}\n"
 | 
| 42504 | 260 | end; | 
| 261 | ||
| 262 | in | |
| 263 | ||
| 264 | val _ = | |
| 43564 
9864182c6bad
document antiquotations are managed as theory data, with proper name space and entity markup;
 wenzelm parents: 
42661diff
changeset | 265 | Context.>> (Context.map_theory | 
| 
9864182c6bad
document antiquotations are managed as theory data, with proper name space and entity markup;
 wenzelm parents: 
42661diff
changeset | 266 | (Thy_Output.antiquotation (Binding.name "rail") | 
| 
9864182c6bad
document antiquotations are managed as theory data, with proper name space and entity markup;
 wenzelm parents: 
42661diff
changeset | 267 | (Scan.lift (Parse.source_position Parse.string)) | 
| 
9864182c6bad
document antiquotations are managed as theory data, with proper name space and entity markup;
 wenzelm parents: 
42661diff
changeset | 268 |       (fn {state, ...} => output_rules state o read)));
 | 
| 42504 | 269 | |
| 270 | end; | |
| 271 | ||
| 272 | end; | |
| 273 |