src/Pure/Tools/rail.ML
author wenzelm
Sat Mar 15 16:54:32 2014 +0100 (2014-03-15)
changeset 56165 dd89ce51d2c8
parent 56163 331f4aba14b3
child 58465 bd06c6479748
permissions -rw-r--r--
tuned markup;
     1 (*  Title:      Pure/Tools/rail.ML
     2     Author:     Michael Kerscher, TU M√ľnchen
     3     Author:     Makarius
     4 
     5 Railroad diagrams in LaTeX.
     6 *)
     7 
     8 structure Rail: sig end =
     9 struct
    10 
    11 (** lexical syntax **)
    12 
    13 (* singleton keywords *)
    14 
    15 val keywords =
    16   Symtab.make [
    17     ("|", Markup.keyword3),
    18     ("*", Markup.keyword3),
    19     ("+", Markup.keyword3),
    20     ("?", Markup.keyword3),
    21     ("(", Markup.empty),
    22     (")", Markup.empty),
    23     ("\<newline>", Markup.keyword2),
    24     (";", Markup.keyword2),
    25     (":", Markup.keyword2),
    26     ("@", Markup.keyword1)];
    27 
    28 
    29 (* datatype token *)
    30 
    31 datatype kind =
    32   Keyword | Ident | String | Antiq of Antiquote.antiq | EOF;
    33 
    34 datatype token = Token of Position.range * (kind * string);
    35 
    36 fun pos_of (Token ((pos, _), _)) = pos;
    37 fun end_pos_of (Token ((_, pos), _)) = pos;
    38 
    39 fun kind_of (Token (_, (k, _))) = k;
    40 fun content_of (Token (_, (_, x))) = x;
    41 
    42 
    43 (* diagnostics *)
    44 
    45 val print_kind =
    46  fn Keyword => "rail keyword"
    47   | Ident => "identifier"
    48   | String => "single-quoted string"
    49   | Antiq _ => "antiquotation"
    50   | EOF => "end-of-input";
    51 
    52 fun print (Token ((pos, _), (k, x))) =
    53   (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
    54   Position.here pos;
    55 
    56 fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
    57 
    58 fun reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)]
    59   | reports_of_token (Token ((pos, _), (Keyword, x))) =
    60       map (pair pos) (the_list (Symtab.lookup keywords x) @ Completion.suppress_abbrevs x)
    61   | reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports antiq
    62   | reports_of_token _ = [];
    63 
    64 
    65 (* stopper *)
    66 
    67 fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    68 val eof = mk_eof Position.none;
    69 
    70 fun is_eof (Token (_, (EOF, _))) = true
    71   | is_eof _ = false;
    72 
    73 val stopper =
    74   Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
    75 
    76 
    77 (* tokenize *)
    78 
    79 local
    80 
    81 fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
    82 
    83 val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
    84 
    85 val scan_keyword =
    86   Scan.one (Symtab.defined keywords o Symbol_Pos.symbol);
    87 
    88 val err_prefix = "Rail lexical error: ";
    89 
    90 val scan_token =
    91   scan_space >> K [] ||
    92   Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) ||
    93   scan_keyword >> (token Keyword o single) ||
    94   Lexicon.scan_id >> token Ident ||
    95   Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) =>
    96     [Token (Position.range pos1 pos2, (String, Symbol_Pos.content ss))]);
    97 
    98 val scan =
    99   (Scan.repeat scan_token >> flat) --|
   100     Symbol_Pos.!!! (fn () => err_prefix ^ "bad input")
   101       (Scan.ahead (Scan.one Symbol_Pos.is_eof));
   102 
   103 in
   104 
   105 val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan);
   106 
   107 end;
   108 
   109 
   110 
   111 (** parsing **)
   112 
   113 fun !!! scan =
   114   let
   115     val prefix = "Rail syntax error";
   116 
   117     fun get_pos [] = " (end-of-input)"
   118       | get_pos (tok :: _) = Position.here (pos_of tok);
   119 
   120     fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
   121       | err (toks, SOME msg) =
   122           (fn () =>
   123             let val s = msg () in
   124               if String.isPrefix prefix s then s
   125               else prefix ^ get_pos toks ^ ": " ^ s
   126             end);
   127   in Scan.!! err scan end;
   128 
   129 fun $$$ x =
   130   Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
   131   Scan.fail_with
   132     (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found")
   133       | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
   134 
   135 fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
   136 fun enum sep scan = enum1 sep scan || Scan.succeed [];
   137 
   138 val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
   139 val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
   140 
   141 val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
   142 
   143 
   144 
   145 (** rail expressions **)
   146 
   147 (* datatype *)
   148 
   149 datatype rails =
   150   Cat of int * rail list
   151 and rail =
   152   Bar of rails list |
   153   Plus of rails * rails |
   154   Newline of int |
   155   Nonterminal of string |
   156   Terminal of bool * string |
   157   Antiquote of bool * Antiquote.antiq;
   158 
   159 fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
   160 and reverse (Bar cats) = Bar (map reverse_cat cats)
   161   | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
   162   | reverse x = x;
   163 
   164 fun cat rails = Cat (0, rails);
   165 
   166 val empty = cat [];
   167 fun is_empty (Cat (_, [])) = true | is_empty _ = false;
   168 
   169 fun is_newline (Newline _) = true | is_newline _ = false;
   170 
   171 fun bar [Cat (_, [rail])] = rail
   172   | bar cats = Bar cats;
   173 
   174 fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
   175 
   176 fun star cat1 cat2 =
   177   if is_empty cat2 then plus empty cat1
   178   else bar [empty, cat [plus cat1 cat2]];
   179 
   180 fun maybe rail = bar [empty, cat [rail]];
   181 
   182 
   183 (* read *)
   184 
   185 local
   186 
   187 val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
   188 
   189 fun body x = (enum1 "|" body1 >> bar) x
   190 and body0 x = (enum "|" body1 >> bar) x
   191 and body1 x =
   192  (body2 :|-- (fn a =>
   193    $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
   194    $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
   195    Scan.succeed a)) x
   196 and body2 x = (Scan.repeat1 body3 >> cat) x
   197 and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
   198 and body4 x =
   199  ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
   200   $$$ "\<newline>" >> K (Newline 0) ||
   201   ident >> Nonterminal ||
   202   at_mode -- string >> Terminal ||
   203   at_mode -- antiq >> Antiquote) x
   204 and body4e x = (Scan.option body4 >> (cat o the_list)) x;
   205 
   206 val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
   207 val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
   208 val rules = enum1 ";" (Scan.option rule) >> map_filter I;
   209 
   210 in
   211 
   212 fun read ctxt (source: Symbol_Pos.source) =
   213   let
   214     val {text, pos, ...} = source;
   215     val _ = Context_Position.report ctxt pos Markup.language_rail;
   216     val toks = tokenize (Symbol_Pos.explode (text, pos));
   217     val _ = Context_Position.reports ctxt (maps reports_of_token toks);
   218   in #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks) end;
   219 
   220 end;
   221 
   222 
   223 (* latex output *)
   224 
   225 local
   226 
   227 fun vertical_range_cat (Cat (_, rails)) y =
   228   let val (rails', (_, y')) =
   229     fold_map (fn rail => fn (y0, y') =>
   230       if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
   231       else
   232         let val (rail', y0') = vertical_range rail y0;
   233         in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
   234   in (Cat (y, rails'), y') end
   235 
   236 and vertical_range (Bar cats) y =
   237       let val (cats', y') = fold_map vertical_range_cat cats y
   238       in (Bar cats', Int.max (y + 1, y')) end
   239   | vertical_range (Plus (cat1, cat2)) y =
   240       let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
   241       in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
   242   | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
   243   | vertical_range atom y = (atom, y + 1);
   244 
   245 fun output_rules state rules =
   246   let
   247     val output_antiq = Thy_Output.eval_antiq (#1 (Keyword.get_lexicons ())) state;
   248     fun output_text b s =
   249       Output.output s
   250       |> b ? enclose "\\isakeyword{" "}"
   251       |> enclose "\\isa{" "}";
   252 
   253     fun output_cat c (Cat (_, rails)) = outputs c rails
   254     and outputs c [rail] = output c rail
   255       | outputs _ rails = implode (map (output "") rails)
   256     and output _ (Bar []) = ""
   257       | output c (Bar [cat]) = output_cat c cat
   258       | output _ (Bar (cat :: cats)) =
   259           "\\rail@bar\n" ^ output_cat "" cat ^
   260           implode (map (fn Cat (y, rails) =>
   261               "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
   262           "\\rail@endbar\n"
   263       | output c (Plus (cat, Cat (y, rails))) =
   264           "\\rail@plus\n" ^ output_cat c cat ^
   265           "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
   266           "\\rail@endplus\n"
   267       | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
   268       | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
   269       | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
   270       | output c (Antiquote (b, a)) =
   271           "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
   272 
   273     fun output_rule (name, rail) =
   274       let
   275         val (rail', y') = vertical_range rail 0;
   276         val out_name =
   277           (case name of
   278             Antiquote.Text "" => ""
   279           | Antiquote.Text s => output_text false s
   280           | Antiquote.Antiq a => output_antiq a);
   281       in
   282         "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
   283         output "" rail' ^
   284         "\\rail@end\n"
   285       end;
   286   in
   287     "\\begin{railoutput}\n" ^
   288     implode (map output_rule rules) ^
   289     "\\end{railoutput}\n"
   290   end;
   291 
   292 in
   293 
   294 val _ = Theory.setup
   295   (Thy_Output.antiquotation @{binding rail}
   296     (Scan.lift (Parse.source_position (Parse.string || Parse.cartouche)))
   297     (fn {state, context, ...} => output_rules state o read context));
   298 
   299 end;
   300 
   301 end;
   302