src/Pure/Tools/rail.ML
author wenzelm
Sat Nov 04 15:24:40 2017 +0100 (21 months ago)
changeset 67003 49850a679c2c
parent 62806 de9bf8171626
child 67147 dea94b1aabc3
permissions -rw-r--r--
more robust sorted_entries;
     1 (*  Title:      Pure/Tools/rail.ML
     2     Author:     Michael Kerscher, TU M√ľnchen
     3     Author:     Makarius
     4 
     5 Railroad diagrams in LaTeX.
     6 *)
     7 
     8 signature RAIL =
     9 sig
    10   datatype rails =
    11     Cat of int * rail list
    12   and rail =
    13     Bar of rails list |
    14     Plus of rails * rails |
    15     Newline of int |
    16     Nonterminal of string |
    17     Terminal of bool * string |
    18     Antiquote of bool * Antiquote.antiq
    19   val read: Proof.context -> Input.source -> (string Antiquote.antiquote * rail) list
    20   val output_rules: Toplevel.state -> (string Antiquote.antiquote * rail) list -> string
    21 end;
    22 
    23 structure Rail: RAIL =
    24 struct
    25 
    26 (** lexical syntax **)
    27 
    28 (* singleton keywords *)
    29 
    30 val keywords =
    31   Symtab.make [
    32     ("|", Markup.keyword3),
    33     ("*", Markup.keyword3),
    34     ("+", Markup.keyword3),
    35     ("?", Markup.keyword3),
    36     ("(", Markup.empty),
    37     (")", Markup.empty),
    38     ("\<newline>", Markup.keyword2),
    39     (";", Markup.keyword2),
    40     (":", Markup.keyword2),
    41     ("@", Markup.keyword1)];
    42 
    43 
    44 (* datatype token *)
    45 
    46 datatype kind =
    47   Keyword | Ident | String | Antiq of Antiquote.antiq | EOF;
    48 
    49 datatype token = Token of Position.range * (kind * string);
    50 
    51 fun pos_of (Token ((pos, _), _)) = pos;
    52 fun end_pos_of (Token ((_, pos), _)) = pos;
    53 
    54 fun range_of (toks as tok :: _) =
    55       let val pos' = end_pos_of (List.last toks)
    56       in Position.range (pos_of tok, pos') end
    57   | range_of [] = Position.no_range;
    58 
    59 fun kind_of (Token (_, (k, _))) = k;
    60 fun content_of (Token (_, (_, x))) = x;
    61 
    62 
    63 (* diagnostics *)
    64 
    65 val print_kind =
    66  fn Keyword => "rail keyword"
    67   | Ident => "identifier"
    68   | String => "single-quoted string"
    69   | Antiq _ => "antiquotation"
    70   | EOF => "end-of-input";
    71 
    72 fun print (Token ((pos, _), (k, x))) =
    73   (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
    74   Position.here pos;
    75 
    76 fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
    77 
    78 fun reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)]
    79   | reports_of_token (Token ((pos, _), (Keyword, x))) =
    80       map (pair pos) (the_list (Symtab.lookup keywords x) @ Completion.suppress_abbrevs x)
    81   | reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports [Antiquote.Antiq antiq]
    82   | reports_of_token _ = [];
    83 
    84 
    85 (* stopper *)
    86 
    87 fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    88 val eof = mk_eof Position.none;
    89 
    90 fun is_eof (Token (_, (EOF, _))) = true
    91   | is_eof _ = false;
    92 
    93 val stopper =
    94   Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
    95 
    96 
    97 (* tokenize *)
    98 
    99 local
   100 
   101 fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
   102 
   103 fun antiq_token antiq =
   104   [Token (#range antiq, (Antiq antiq, Symbol_Pos.content (#body antiq)))];
   105 
   106 val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
   107 
   108 val scan_keyword =
   109   Scan.one (Symtab.defined keywords o Symbol_Pos.symbol);
   110 
   111 val err_prefix = "Rail lexical error: ";
   112 
   113 val scan_token =
   114   scan_space >> K [] ||
   115   Antiquote.scan_antiq >> antiq_token ||
   116   scan_keyword >> (token Keyword o single) ||
   117   Lexicon.scan_id >> token Ident ||
   118   Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) =>
   119     [Token (Position.range (pos1, pos2), (String, Symbol_Pos.content ss))]);
   120 
   121 val scan =
   122   Scan.repeats scan_token --|
   123     Symbol_Pos.!!! (fn () => err_prefix ^ "bad input")
   124       (Scan.ahead (Scan.one Symbol_Pos.is_eof));
   125 
   126 in
   127 
   128 val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan);
   129 
   130 end;
   131 
   132 
   133 
   134 (** parsing **)
   135 
   136 (* parser combinators *)
   137 
   138 fun !!! scan =
   139   let
   140     val prefix = "Rail syntax error";
   141 
   142     fun get_pos [] = " (end-of-input)"
   143       | get_pos (tok :: _) = Position.here (pos_of tok);
   144 
   145     fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
   146       | err (toks, SOME msg) =
   147           (fn () =>
   148             let val s = msg () in
   149               if String.isPrefix prefix s then s
   150               else prefix ^ get_pos toks ^ ": " ^ s
   151             end);
   152   in Scan.!! err scan end;
   153 
   154 fun $$$ x =
   155   Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
   156   Scan.fail_with
   157     (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found")
   158       | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
   159 
   160 fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
   161 fun enum sep scan = enum1 sep scan || Scan.succeed [];
   162 
   163 val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
   164 val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
   165 
   166 val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
   167 
   168 fun RANGE scan = Scan.trace scan >> apsnd range_of;
   169 fun RANGE_APP scan = RANGE scan >> (fn (f, r) => f r);
   170 
   171 
   172 (* parse trees *)
   173 
   174 datatype trees =
   175   CAT of tree list * Position.range
   176 and tree =
   177   BAR of trees list * Position.range |
   178   STAR of (trees * trees) * Position.range |
   179   PLUS of (trees * trees) * Position.range |
   180   MAYBE of tree * Position.range |
   181   NEWLINE of Position.range |
   182   NONTERMINAL of string * Position.range |
   183   TERMINAL of (bool * string) * Position.range |
   184   ANTIQUOTE of (bool * Antiquote.antiq) * Position.range;
   185 
   186 fun reports_of_tree ctxt =
   187   if Context_Position.is_visible ctxt then
   188     let
   189       fun reports r =
   190         if r = Position.no_range then []
   191         else [(Position.range_position r, Markup.expression "")];
   192       fun trees (CAT (ts, r)) = reports r @ maps tree ts
   193       and tree (BAR (Ts, r)) = reports r @ maps trees Ts
   194         | tree (STAR ((T1, T2), r)) = reports r @ trees T1 @ trees T2
   195         | tree (PLUS ((T1, T2), r)) = reports r @ trees T1 @ trees T2
   196         | tree (MAYBE (t, r)) = reports r @ tree t
   197         | tree (NEWLINE r) = reports r
   198         | tree (NONTERMINAL (_, r)) = reports r
   199         | tree (TERMINAL (_, r)) = reports r
   200         | tree (ANTIQUOTE (_, r)) = reports r;
   201     in distinct (op =) o tree end
   202   else K [];
   203 
   204 local
   205 
   206 val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
   207 
   208 fun body x = (RANGE (enum1 "|" body1) >> BAR) x
   209 and body0 x = (RANGE (enum "|" body1) >> BAR) x
   210 and body1 x =
   211  (RANGE_APP (body2 :|-- (fn a =>
   212    $$$ "*" |-- !!! body4e >> (fn b => fn r => CAT ([STAR ((a, b), r)], r)) ||
   213    $$$ "+" |-- !!! body4e >> (fn b => fn r => CAT ([PLUS ((a, b), r)], r)) ||
   214    Scan.succeed (K a)))) x
   215 and body2 x = (RANGE (Scan.repeat1 body3) >> CAT) x
   216 and body3 x =
   217  (RANGE_APP (body4 :|-- (fn a =>
   218    $$$ "?" >> K (curry MAYBE a) ||
   219    Scan.succeed (K a)))) x
   220 and body4 x =
   221  ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
   222   RANGE_APP
   223    ($$$ "\<newline>" >> K NEWLINE ||
   224     ident >> curry NONTERMINAL ||
   225     at_mode -- string >> curry TERMINAL ||
   226     at_mode -- antiq >> curry ANTIQUOTE)) x
   227 and body4e x =
   228   (RANGE (Scan.option body4) >> (fn (a, r) => CAT (the_list a, r))) x;
   229 
   230 val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
   231 val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
   232 val rules = enum1 ";" (Scan.option rule) >> map_filter I;
   233 
   234 in
   235 
   236 fun parse_rules toks =
   237   #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks);
   238 
   239 end;
   240 
   241 
   242 (** rail expressions **)
   243 
   244 (* datatype *)
   245 
   246 datatype rails =
   247   Cat of int * rail list
   248 and rail =
   249   Bar of rails list |
   250   Plus of rails * rails |
   251   Newline of int |
   252   Nonterminal of string |
   253   Terminal of bool * string |
   254   Antiquote of bool * Antiquote.antiq;
   255 
   256 fun is_newline (Newline _) = true | is_newline _ = false;
   257 
   258 
   259 (* prepare *)
   260 
   261 local
   262 
   263 fun cat rails = Cat (0, rails);
   264 
   265 val empty = cat [];
   266 fun is_empty (Cat (_, [])) = true | is_empty _ = false;
   267 
   268 fun bar [Cat (_, [rail])] = rail
   269   | bar cats = Bar cats;
   270 
   271 fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
   272 and reverse (Bar cats) = Bar (map reverse_cat cats)
   273   | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
   274   | reverse x = x;
   275 
   276 fun plus (cat1, cat2) = Plus (cat1, reverse_cat cat2);
   277 
   278 in
   279 
   280 fun prepare_trees (CAT (ts, _)) = Cat (0, map prepare_tree ts)
   281 and prepare_tree (BAR (Ts, _)) = bar (map prepare_trees Ts)
   282   | prepare_tree (STAR (Ts, _)) =
   283       let val (cat1, cat2) = apply2 prepare_trees Ts in
   284         if is_empty cat2 then plus (empty, cat1)
   285         else bar [empty, cat [plus (cat1, cat2)]]
   286       end
   287   | prepare_tree (PLUS (Ts, _)) = plus (apply2 prepare_trees Ts)
   288   | prepare_tree (MAYBE (t, _)) = bar [empty, cat [prepare_tree t]]
   289   | prepare_tree (NEWLINE _) = Newline 0
   290   | prepare_tree (NONTERMINAL (a, _)) = Nonterminal a
   291   | prepare_tree (TERMINAL (a, _)) = Terminal a
   292   | prepare_tree (ANTIQUOTE (a, _)) = Antiquote a;
   293 
   294 end;
   295 
   296 
   297 (* read *)
   298 
   299 fun read ctxt source =
   300   let
   301     val _ = Context_Position.report ctxt (Input.pos_of source) Markup.language_rail;
   302     val toks = tokenize (Input.source_explode source);
   303     val _ = Context_Position.reports ctxt (maps reports_of_token toks);
   304     val rules = parse_rules toks;
   305     val _ = Position.reports (maps (reports_of_tree ctxt o #2) rules);
   306   in map (apsnd prepare_tree) rules end;
   307 
   308 
   309 (* latex output *)
   310 
   311 local
   312 
   313 fun vertical_range_cat (Cat (_, rails)) y =
   314   let val (rails', (_, y')) =
   315     fold_map (fn rail => fn (y0, y') =>
   316       if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
   317       else
   318         let val (rail', y0') = vertical_range rail y0;
   319         in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
   320   in (Cat (y, rails'), y') end
   321 
   322 and vertical_range (Bar cats) y =
   323       let val (cats', y') = fold_map vertical_range_cat cats y
   324       in (Bar cats', Int.max (y + 1, y')) end
   325   | vertical_range (Plus (cat1, cat2)) y =
   326       let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
   327       in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
   328   | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
   329   | vertical_range atom y = (atom, y + 1);
   330 
   331 in
   332 
   333 fun output_rules state rules =
   334   let
   335     val output_antiq = Thy_Output.eval_antiquote state o Antiquote.Antiq;
   336     fun output_text b s =
   337       Output.output s
   338       |> b ? enclose "\\isakeyword{" "}"
   339       |> enclose "\\isa{" "}";
   340 
   341     fun output_cat c (Cat (_, rails)) = outputs c rails
   342     and outputs c [rail] = output c rail
   343       | outputs _ rails = implode (map (output "") rails)
   344     and output _ (Bar []) = ""
   345       | output c (Bar [cat]) = output_cat c cat
   346       | output _ (Bar (cat :: cats)) =
   347           "\\rail@bar\n" ^ output_cat "" cat ^
   348           implode (map (fn Cat (y, rails) =>
   349               "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
   350           "\\rail@endbar\n"
   351       | output c (Plus (cat, Cat (y, rails))) =
   352           "\\rail@plus\n" ^ output_cat c cat ^
   353           "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
   354           "\\rail@endplus\n"
   355       | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
   356       | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
   357       | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
   358       | output c (Antiquote (b, a)) =
   359           "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
   360 
   361     fun output_rule (name, rail) =
   362       let
   363         val (rail', y') = vertical_range rail 0;
   364         val out_name =
   365           (case name of
   366             Antiquote.Text "" => ""
   367           | Antiquote.Text s => output_text false s
   368           | Antiquote.Antiq a => output_antiq a);
   369       in
   370         "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
   371         output "" rail' ^
   372         "\\rail@end\n"
   373       end;
   374   in Latex.environment "railoutput" (implode (map output_rule rules)) end;
   375 
   376 val _ = Theory.setup
   377   (Thy_Output.antiquotation @{binding rail} (Scan.lift Args.text_input)
   378     (fn {state, context, ...} => output_rules state o read context));
   379 
   380 end;
   381 
   382 end;