src/Pure/Tools/rail.ML
author wenzelm
Sun Oct 18 21:30:01 2015 +0200 (2015-10-18)
changeset 61476 1884c40f1539
parent 61473 34d1913f0b20
child 62748 aa0084adce1f
permissions -rw-r--r--
tuned signature;
     1 (*  Title:      Pure/Tools/rail.ML
     2     Author:     Michael Kerscher, TU M√ľnchen
     3     Author:     Makarius
     4 
     5 Railroad diagrams in LaTeX.
     6 *)
     7 
     8 structure Rail: sig end =
     9 struct
    10 
    11 (** lexical syntax **)
    12 
    13 (* singleton keywords *)
    14 
    15 val keywords =
    16   Symtab.make [
    17     ("|", Markup.keyword3),
    18     ("*", Markup.keyword3),
    19     ("+", Markup.keyword3),
    20     ("?", Markup.keyword3),
    21     ("(", Markup.empty),
    22     (")", Markup.empty),
    23     ("\<newline>", Markup.keyword2),
    24     (";", Markup.keyword2),
    25     (":", Markup.keyword2),
    26     ("@", Markup.keyword1)];
    27 
    28 
    29 (* datatype token *)
    30 
    31 datatype kind =
    32   Keyword | Ident | String | Antiq of Antiquote.antiq | EOF;
    33 
    34 datatype token = Token of Position.range * (kind * string);
    35 
    36 fun pos_of (Token ((pos, _), _)) = pos;
    37 fun end_pos_of (Token ((_, pos), _)) = pos;
    38 
    39 fun range_of (toks as tok :: _) =
    40       let val pos' = end_pos_of (List.last toks)
    41       in Position.range (pos_of tok) pos' end
    42   | range_of [] = Position.no_range;
    43 
    44 fun kind_of (Token (_, (k, _))) = k;
    45 fun content_of (Token (_, (_, x))) = x;
    46 
    47 
    48 (* diagnostics *)
    49 
    50 val print_kind =
    51  fn Keyword => "rail keyword"
    52   | Ident => "identifier"
    53   | String => "single-quoted string"
    54   | Antiq _ => "antiquotation"
    55   | EOF => "end-of-input";
    56 
    57 fun print (Token ((pos, _), (k, x))) =
    58   (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
    59   Position.here pos;
    60 
    61 fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
    62 
    63 fun reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)]
    64   | reports_of_token (Token ((pos, _), (Keyword, x))) =
    65       map (pair pos) (the_list (Symtab.lookup keywords x) @ Completion.suppress_abbrevs x)
    66   | reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports [Antiquote.Antiq antiq]
    67   | reports_of_token _ = [];
    68 
    69 
    70 (* stopper *)
    71 
    72 fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    73 val eof = mk_eof Position.none;
    74 
    75 fun is_eof (Token (_, (EOF, _))) = true
    76   | is_eof _ = false;
    77 
    78 val stopper =
    79   Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
    80 
    81 
    82 (* tokenize *)
    83 
    84 local
    85 
    86 fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
    87 
    88 fun antiq_token antiq =
    89   [Token (#range antiq, (Antiq antiq, Symbol_Pos.content (#body antiq)))];
    90 
    91 val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
    92 
    93 val scan_keyword =
    94   Scan.one (Symtab.defined keywords o Symbol_Pos.symbol);
    95 
    96 val err_prefix = "Rail lexical error: ";
    97 
    98 val scan_token =
    99   scan_space >> K [] ||
   100   Antiquote.scan_antiq >> antiq_token ||
   101   scan_keyword >> (token Keyword o single) ||
   102   Lexicon.scan_id >> token Ident ||
   103   Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) =>
   104     [Token (Position.range pos1 pos2, (String, Symbol_Pos.content ss))]);
   105 
   106 val scan =
   107   Scan.repeats scan_token --|
   108     Symbol_Pos.!!! (fn () => err_prefix ^ "bad input")
   109       (Scan.ahead (Scan.one Symbol_Pos.is_eof));
   110 
   111 in
   112 
   113 val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan);
   114 
   115 end;
   116 
   117 
   118 
   119 (** parsing **)
   120 
   121 (* parser combinators *)
   122 
   123 fun !!! scan =
   124   let
   125     val prefix = "Rail syntax error";
   126 
   127     fun get_pos [] = " (end-of-input)"
   128       | get_pos (tok :: _) = Position.here (pos_of tok);
   129 
   130     fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
   131       | err (toks, SOME msg) =
   132           (fn () =>
   133             let val s = msg () in
   134               if String.isPrefix prefix s then s
   135               else prefix ^ get_pos toks ^ ": " ^ s
   136             end);
   137   in Scan.!! err scan end;
   138 
   139 fun $$$ x =
   140   Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
   141   Scan.fail_with
   142     (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found")
   143       | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
   144 
   145 fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
   146 fun enum sep scan = enum1 sep scan || Scan.succeed [];
   147 
   148 val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
   149 val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
   150 
   151 val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
   152 
   153 fun RANGE scan = Scan.trace scan >> apsnd range_of;
   154 fun RANGE_APP scan = RANGE scan >> (fn (f, r) => f r);
   155 
   156 
   157 (* parse trees *)
   158 
   159 datatype trees =
   160   CAT of tree list * Position.range
   161 and tree =
   162   BAR of trees list * Position.range |
   163   STAR of (trees * trees) * Position.range |
   164   PLUS of (trees * trees) * Position.range |
   165   MAYBE of tree * Position.range |
   166   NEWLINE of Position.range |
   167   NONTERMINAL of string * Position.range |
   168   TERMINAL of (bool * string) * Position.range |
   169   ANTIQUOTE of (bool * Antiquote.antiq) * Position.range;
   170 
   171 fun reports_of_tree ctxt =
   172   if Context_Position.is_visible ctxt then
   173     let
   174       fun reports r =
   175         if r = Position.no_range then []
   176         else [(Position.set_range r, Markup.expression)];
   177       fun trees (CAT (ts, r)) = reports r @ maps tree ts
   178       and tree (BAR (Ts, r)) = reports r @ maps trees Ts
   179         | tree (STAR ((T1, T2), r)) = reports r @ trees T1 @ trees T2
   180         | tree (PLUS ((T1, T2), r)) = reports r @ trees T1 @ trees T2
   181         | tree (MAYBE (t, r)) = reports r @ tree t
   182         | tree (NEWLINE r) = reports r
   183         | tree (NONTERMINAL (_, r)) = reports r
   184         | tree (TERMINAL (_, r)) = reports r
   185         | tree (ANTIQUOTE (_, r)) = reports r;
   186     in distinct (op =) o tree end
   187   else K [];
   188 
   189 local
   190 
   191 val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
   192 
   193 fun body x = (RANGE (enum1 "|" body1) >> BAR) x
   194 and body0 x = (RANGE (enum "|" body1) >> BAR) x
   195 and body1 x =
   196  (RANGE_APP (body2 :|-- (fn a =>
   197    $$$ "*" |-- !!! body4e >> (fn b => fn r => CAT ([STAR ((a, b), r)], r)) ||
   198    $$$ "+" |-- !!! body4e >> (fn b => fn r => CAT ([PLUS ((a, b), r)], r)) ||
   199    Scan.succeed (K a)))) x
   200 and body2 x = (RANGE (Scan.repeat1 body3) >> CAT) x
   201 and body3 x =
   202  (RANGE_APP (body4 :|-- (fn a =>
   203    $$$ "?" >> K (curry MAYBE a) ||
   204    Scan.succeed (K a)))) x
   205 and body4 x =
   206  ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
   207   RANGE_APP
   208    ($$$ "\<newline>" >> K NEWLINE ||
   209     ident >> curry NONTERMINAL ||
   210     at_mode -- string >> curry TERMINAL ||
   211     at_mode -- antiq >> curry ANTIQUOTE)) x
   212 and body4e x =
   213   (RANGE (Scan.option body4) >> (fn (a, r) => CAT (the_list a, r))) x;
   214 
   215 val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
   216 val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
   217 val rules = enum1 ";" (Scan.option rule) >> map_filter I;
   218 
   219 in
   220 
   221 fun parse_rules toks =
   222   #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks);
   223 
   224 end;
   225 
   226 
   227 (** rail expressions **)
   228 
   229 (* datatype *)
   230 
   231 datatype rails =
   232   Cat of int * rail list
   233 and rail =
   234   Bar of rails list |
   235   Plus of rails * rails |
   236   Newline of int |
   237   Nonterminal of string |
   238   Terminal of bool * string |
   239   Antiquote of bool * Antiquote.antiq;
   240 
   241 fun is_newline (Newline _) = true | is_newline _ = false;
   242 
   243 
   244 (* prepare *)
   245 
   246 local
   247 
   248 fun cat rails = Cat (0, rails);
   249 
   250 val empty = cat [];
   251 fun is_empty (Cat (_, [])) = true | is_empty _ = false;
   252 
   253 fun bar [Cat (_, [rail])] = rail
   254   | bar cats = Bar cats;
   255 
   256 fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
   257 and reverse (Bar cats) = Bar (map reverse_cat cats)
   258   | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
   259   | reverse x = x;
   260 
   261 fun plus (cat1, cat2) = Plus (cat1, reverse_cat cat2);
   262 
   263 in
   264 
   265 fun prepare_trees (CAT (ts, _)) = Cat (0, map prepare_tree ts)
   266 and prepare_tree (BAR (Ts, _)) = bar (map prepare_trees Ts)
   267   | prepare_tree (STAR (Ts, _)) =
   268       let val (cat1, cat2) = apply2 prepare_trees Ts in
   269         if is_empty cat2 then plus (empty, cat1)
   270         else bar [empty, cat [plus (cat1, cat2)]]
   271       end
   272   | prepare_tree (PLUS (Ts, _)) = plus (apply2 prepare_trees Ts)
   273   | prepare_tree (MAYBE (t, _)) = bar [empty, cat [prepare_tree t]]
   274   | prepare_tree (NEWLINE _) = Newline 0
   275   | prepare_tree (NONTERMINAL (a, _)) = Nonterminal a
   276   | prepare_tree (TERMINAL (a, _)) = Terminal a
   277   | prepare_tree (ANTIQUOTE (a, _)) = Antiquote a;
   278 
   279 end;
   280 
   281 
   282 (* read *)
   283 
   284 fun read ctxt source =
   285   let
   286     val _ = Context_Position.report ctxt (Input.pos_of source) Markup.language_rail;
   287     val toks = tokenize (Input.source_explode source);
   288     val _ = Context_Position.reports ctxt (maps reports_of_token toks);
   289     val rules = parse_rules toks;
   290     val _ = Position.reports (maps (reports_of_tree ctxt o #2) rules);
   291   in map (apsnd prepare_tree) rules end;
   292 
   293 
   294 (* latex output *)
   295 
   296 local
   297 
   298 fun vertical_range_cat (Cat (_, rails)) y =
   299   let val (rails', (_, y')) =
   300     fold_map (fn rail => fn (y0, y') =>
   301       if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
   302       else
   303         let val (rail', y0') = vertical_range rail y0;
   304         in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
   305   in (Cat (y, rails'), y') end
   306 
   307 and vertical_range (Bar cats) y =
   308       let val (cats', y') = fold_map vertical_range_cat cats y
   309       in (Bar cats', Int.max (y + 1, y')) end
   310   | vertical_range (Plus (cat1, cat2)) y =
   311       let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
   312       in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
   313   | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
   314   | vertical_range atom y = (atom, y + 1);
   315 
   316 fun output_rules state rules =
   317   let
   318     val output_antiq = Thy_Output.eval_antiquote state o Antiquote.Antiq;
   319     fun output_text b s =
   320       Output.output s
   321       |> b ? enclose "\\isakeyword{" "}"
   322       |> enclose "\\isa{" "}";
   323 
   324     fun output_cat c (Cat (_, rails)) = outputs c rails
   325     and outputs c [rail] = output c rail
   326       | outputs _ rails = implode (map (output "") rails)
   327     and output _ (Bar []) = ""
   328       | output c (Bar [cat]) = output_cat c cat
   329       | output _ (Bar (cat :: cats)) =
   330           "\\rail@bar\n" ^ output_cat "" cat ^
   331           implode (map (fn Cat (y, rails) =>
   332               "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
   333           "\\rail@endbar\n"
   334       | output c (Plus (cat, Cat (y, rails))) =
   335           "\\rail@plus\n" ^ output_cat c cat ^
   336           "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
   337           "\\rail@endplus\n"
   338       | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
   339       | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
   340       | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
   341       | output c (Antiquote (b, a)) =
   342           "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
   343 
   344     fun output_rule (name, rail) =
   345       let
   346         val (rail', y') = vertical_range rail 0;
   347         val out_name =
   348           (case name of
   349             Antiquote.Text "" => ""
   350           | Antiquote.Text s => output_text false s
   351           | Antiquote.Antiq a => output_antiq a);
   352       in
   353         "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
   354         output "" rail' ^
   355         "\\rail@end\n"
   356       end;
   357   in Latex.environment "railoutput" (implode (map output_rule rules)) end;
   358 
   359 in
   360 
   361 val _ = Theory.setup
   362   (Thy_Output.antiquotation @{binding rail} (Scan.lift Args.text_input)
   363     (fn {state, context, ...} => output_rules state o read context));
   364 
   365 end;
   366 
   367 end;
   368