src/Pure/Tools/rail.ML
author wenzelm
Sat Mar 01 22:46:31 2014 +0100 (2014-03-01)
changeset 55828 42ac3cfb89f6
parent 55613 ad446b45efff
child 56163 331f4aba14b3
permissions -rw-r--r--
clarified language markup: added "delimited" property;
type Symbol_Pos.source preserves information about delimited outer tokens (e.g string, cartouche);
observe Completion.Language_Context only for delimited languages, which is important to complete keywords after undelimited inner tokens, e.g. "lemma A pro";
wenzelm@55030
     1
(*  Title:      Pure/Tools/rail.ML
wenzelm@42504
     2
    Author:     Michael Kerscher, TU M√ľnchen
wenzelm@42504
     3
    Author:     Makarius
wenzelm@42504
     4
wenzelm@42504
     5
Railroad diagrams in LaTeX.
wenzelm@42504
     6
*)
wenzelm@42504
     7
wenzelm@42504
     8
structure Rail: sig end =
wenzelm@42504
     9
struct
wenzelm@42504
    10
wenzelm@42504
    11
(** lexical syntax **)
wenzelm@42504
    12
wenzelm@42504
    13
(* datatype token *)
wenzelm@42504
    14
wenzelm@42508
    15
datatype kind =
wenzelm@55526
    16
  Keyword | Ident | String | Antiq of Antiquote.antiq | EOF;
wenzelm@42504
    17
wenzelm@42504
    18
datatype token = Token of Position.range * (kind * string);
wenzelm@42504
    19
wenzelm@42504
    20
fun pos_of (Token ((pos, _), _)) = pos;
wenzelm@42504
    21
fun end_pos_of (Token ((_, pos), _)) = pos;
wenzelm@42504
    22
wenzelm@42504
    23
fun kind_of (Token (_, (k, _))) = k;
wenzelm@42504
    24
fun content_of (Token (_, (_, x))) = x;
wenzelm@42504
    25
wenzelm@42504
    26
wenzelm@42504
    27
(* diagnostics *)
wenzelm@42504
    28
wenzelm@42504
    29
val print_kind =
wenzelm@42504
    30
 fn Keyword => "rail keyword"
wenzelm@42504
    31
  | Ident => "identifier"
wenzelm@42504
    32
  | String => "single-quoted string"
wenzelm@42508
    33
  | Antiq _ => "antiquotation"
wenzelm@48911
    34
  | EOF => "end-of-input";
wenzelm@42504
    35
wenzelm@42504
    36
fun print (Token ((pos, _), (k, x))) =
wenzelm@42504
    37
  (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
wenzelm@48992
    38
  Position.here pos;
wenzelm@42504
    39
wenzelm@42504
    40
fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
wenzelm@42504
    41
wenzelm@55613
    42
fun reports_of_token (Token ((pos, _), (String, _))) = [(pos, Markup.inner_string)]
wenzelm@55613
    43
  | reports_of_token (Token (_, (Antiq antiq, _))) = Antiquote.antiq_reports antiq
wenzelm@55613
    44
  | reports_of_token _ = [];
wenzelm@55613
    45
wenzelm@42504
    46
wenzelm@42504
    47
(* stopper *)
wenzelm@42504
    48
wenzelm@42504
    49
fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
wenzelm@42504
    50
val eof = mk_eof Position.none;
wenzelm@42504
    51
wenzelm@42504
    52
fun is_eof (Token (_, (EOF, _))) = true
wenzelm@42504
    53
  | is_eof _ = false;
wenzelm@42504
    54
wenzelm@42504
    55
val stopper =
wenzelm@42504
    56
  Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
wenzelm@42504
    57
wenzelm@42504
    58
wenzelm@42504
    59
(* tokenize *)
wenzelm@42504
    60
wenzelm@42504
    61
local
wenzelm@42504
    62
wenzelm@42504
    63
fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
wenzelm@42504
    64
wenzelm@42504
    65
val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
wenzelm@42504
    66
wenzelm@42504
    67
val scan_keyword =
wenzelm@55029
    68
  Scan.one
wenzelm@55030
    69
    (member (op =) ["|", "*", "+", "?", "(", ")", "\<newline>", ";", ":", "@"] o Symbol_Pos.symbol);
wenzelm@42504
    70
wenzelm@48764
    71
val err_prefix = "Rail lexical error: ";
wenzelm@48764
    72
wenzelm@42504
    73
val scan_token =
wenzelm@42504
    74
  scan_space >> K [] ||
wenzelm@42516
    75
  Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) ||
wenzelm@42504
    76
  scan_keyword >> (token Keyword o single) ||
wenzelm@42504
    77
  Lexicon.scan_id >> token Ident ||
wenzelm@55613
    78
  Symbol_Pos.scan_string_q err_prefix >> (fn (pos1, (ss, pos2)) =>
wenzelm@55613
    79
    [Token (Position.range pos1 pos2, (String, Symbol_Pos.content ss))]);
wenzelm@42504
    80
wenzelm@42506
    81
val scan =
wenzelm@42506
    82
  (Scan.repeat scan_token >> flat) --|
wenzelm@48764
    83
    Symbol_Pos.!!! (fn () => err_prefix ^ "bad input")
wenzelm@42506
    84
      (Scan.ahead (Scan.one Symbol_Pos.is_eof));
wenzelm@42506
    85
wenzelm@42504
    86
in
wenzelm@42504
    87
wenzelm@55613
    88
val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan);
wenzelm@42504
    89
wenzelm@42504
    90
end;
wenzelm@42504
    91
wenzelm@42504
    92
wenzelm@42504
    93
wenzelm@42504
    94
(** parsing **)
wenzelm@42504
    95
wenzelm@42504
    96
fun !!! scan =
wenzelm@42504
    97
  let
wenzelm@42504
    98
    val prefix = "Rail syntax error";
wenzelm@42504
    99
wenzelm@48911
   100
    fun get_pos [] = " (end-of-input)"
wenzelm@48992
   101
      | get_pos (tok :: _) = Position.here (pos_of tok);
wenzelm@42504
   102
wenzelm@43947
   103
    fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
wenzelm@42504
   104
      | err (toks, SOME msg) =
wenzelm@43947
   105
          (fn () =>
wenzelm@43947
   106
            let val s = msg () in
wenzelm@43947
   107
              if String.isPrefix prefix s then s
wenzelm@43947
   108
              else prefix ^ get_pos toks ^ ": " ^ s
wenzelm@43947
   109
            end);
wenzelm@42504
   110
  in Scan.!! err scan end;
wenzelm@42504
   111
wenzelm@42504
   112
fun $$$ x =
wenzelm@42504
   113
  Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
wenzelm@42504
   114
  Scan.fail_with
wenzelm@48911
   115
    (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found")
wenzelm@43947
   116
      | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
wenzelm@42504
   117
wenzelm@42504
   118
fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
wenzelm@42504
   119
fun enum sep scan = enum1 sep scan || Scan.succeed [];
wenzelm@42504
   120
wenzelm@42508
   121
val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
wenzelm@42508
   122
val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
wenzelm@42504
   123
wenzelm@42508
   124
val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
wenzelm@42504
   125
wenzelm@42504
   126
wenzelm@42504
   127
wenzelm@42504
   128
(** rail expressions **)
wenzelm@42504
   129
wenzelm@42504
   130
(* datatype *)
wenzelm@42504
   131
wenzelm@42504
   132
datatype rails =
wenzelm@42504
   133
  Cat of int * rail list
wenzelm@42504
   134
and rail =
wenzelm@42504
   135
  Bar of rails list |
wenzelm@42504
   136
  Plus of rails * rails |
wenzelm@42504
   137
  Newline of int |
wenzelm@42504
   138
  Nonterminal of string |
wenzelm@42516
   139
  Terminal of bool * string |
wenzelm@55526
   140
  Antiquote of bool * Antiquote.antiq;
wenzelm@42504
   141
wenzelm@42504
   142
fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
wenzelm@42504
   143
and reverse (Bar cats) = Bar (map reverse_cat cats)
wenzelm@42504
   144
  | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
wenzelm@42504
   145
  | reverse x = x;
wenzelm@42504
   146
wenzelm@42504
   147
fun cat rails = Cat (0, rails);
wenzelm@42504
   148
wenzelm@42504
   149
val empty = cat [];
wenzelm@42504
   150
fun is_empty (Cat (_, [])) = true | is_empty _ = false;
wenzelm@42504
   151
wenzelm@42504
   152
fun is_newline (Newline _) = true | is_newline _ = false;
wenzelm@42504
   153
wenzelm@42504
   154
fun bar [Cat (_, [rail])] = rail
wenzelm@42504
   155
  | bar cats = Bar cats;
wenzelm@42504
   156
wenzelm@42504
   157
fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
wenzelm@42504
   158
wenzelm@42504
   159
fun star cat1 cat2 =
wenzelm@42504
   160
  if is_empty cat2 then plus empty cat1
wenzelm@42504
   161
  else bar [empty, cat [plus cat1 cat2]];
wenzelm@42504
   162
wenzelm@42504
   163
fun maybe rail = bar [empty, cat [rail]];
wenzelm@42504
   164
wenzelm@42504
   165
wenzelm@42504
   166
(* read *)
wenzelm@42504
   167
wenzelm@42504
   168
local
wenzelm@42504
   169
wenzelm@42516
   170
val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
wenzelm@42516
   171
wenzelm@42504
   172
fun body x = (enum1 "|" body1 >> bar) x
wenzelm@42504
   173
and body0 x = (enum "|" body1 >> bar) x
wenzelm@42504
   174
and body1 x =
wenzelm@42504
   175
 (body2 :|-- (fn a =>
wenzelm@42504
   176
   $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
wenzelm@42504
   177
   $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
wenzelm@42504
   178
   Scan.succeed a)) x
wenzelm@42504
   179
and body2 x = (Scan.repeat1 body3 >> cat) x
wenzelm@42504
   180
and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
wenzelm@42504
   181
and body4 x =
wenzelm@42504
   182
 ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
wenzelm@55030
   183
  $$$ "\<newline>" >> K (Newline 0) ||
wenzelm@42504
   184
  ident >> Nonterminal ||
wenzelm@42516
   185
  at_mode -- string >> Terminal ||
wenzelm@42516
   186
  at_mode -- antiq >> Antiquote) x
wenzelm@42504
   187
and body4e x = (Scan.option body4 >> (cat o the_list)) x;
wenzelm@42504
   188
wenzelm@42516
   189
val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
wenzelm@42508
   190
val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
wenzelm@42504
   191
val rules = enum1 ";" (Scan.option rule) >> map_filter I;
wenzelm@42504
   192
wenzelm@42504
   193
in
wenzelm@42504
   194
wenzelm@55828
   195
fun read ctxt (source: Symbol_Pos.source) =
wenzelm@55613
   196
  let
wenzelm@55828
   197
    val {text, pos, ...} = source;
wenzelm@55613
   198
    val _ = Context_Position.report ctxt pos Markup.language_rail;
wenzelm@55828
   199
    val toks = tokenize (Symbol_Pos.explode (text, pos));
wenzelm@55613
   200
    val _ = Context_Position.reports ctxt (maps reports_of_token toks);
wenzelm@55613
   201
  in #1 (Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) toks) end;
wenzelm@42504
   202
wenzelm@42504
   203
end;
wenzelm@42504
   204
wenzelm@42504
   205
wenzelm@42504
   206
(* latex output *)
wenzelm@42504
   207
wenzelm@42504
   208
local
wenzelm@42504
   209
wenzelm@42504
   210
fun vertical_range_cat (Cat (_, rails)) y =
wenzelm@42504
   211
  let val (rails', (_, y')) =
wenzelm@42504
   212
    fold_map (fn rail => fn (y0, y') =>
wenzelm@42504
   213
      if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
wenzelm@42504
   214
      else
wenzelm@42504
   215
        let val (rail', y0') = vertical_range rail y0;
wenzelm@42504
   216
        in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
wenzelm@42504
   217
  in (Cat (y, rails'), y') end
wenzelm@42504
   218
wenzelm@42504
   219
and vertical_range (Bar cats) y =
wenzelm@42504
   220
      let val (cats', y') = fold_map vertical_range_cat cats y
wenzelm@42504
   221
      in (Bar cats', Int.max (y + 1, y')) end
wenzelm@42504
   222
  | vertical_range (Plus (cat1, cat2)) y =
wenzelm@42504
   223
      let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
wenzelm@42504
   224
      in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
wenzelm@42504
   225
  | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
wenzelm@42504
   226
  | vertical_range atom y = (atom, y + 1);
wenzelm@42504
   227
wenzelm@42508
   228
fun output_rules state rules =
wenzelm@42508
   229
  let
wenzelm@42508
   230
    val output_antiq = Thy_Output.eval_antiq (#1 (Keyword.get_lexicons ())) state;
wenzelm@42516
   231
    fun output_text b s =
wenzelm@42516
   232
      Output.output s
wenzelm@42516
   233
      |> b ? enclose "\\isakeyword{" "}"
wenzelm@42516
   234
      |> enclose "\\isa{" "}";
wenzelm@42504
   235
wenzelm@42508
   236
    fun output_cat c (Cat (_, rails)) = outputs c rails
wenzelm@42508
   237
    and outputs c [rail] = output c rail
wenzelm@42508
   238
      | outputs _ rails = implode (map (output "") rails)
wenzelm@42508
   239
    and output _ (Bar []) = ""
wenzelm@42508
   240
      | output c (Bar [cat]) = output_cat c cat
wenzelm@42508
   241
      | output _ (Bar (cat :: cats)) =
wenzelm@42508
   242
          "\\rail@bar\n" ^ output_cat "" cat ^
wenzelm@42508
   243
          implode (map (fn Cat (y, rails) =>
wenzelm@42508
   244
              "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
wenzelm@42508
   245
          "\\rail@endbar\n"
wenzelm@42508
   246
      | output c (Plus (cat, Cat (y, rails))) =
wenzelm@42508
   247
          "\\rail@plus\n" ^ output_cat c cat ^
wenzelm@42508
   248
          "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
wenzelm@42508
   249
          "\\rail@endplus\n"
wenzelm@42508
   250
      | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
wenzelm@42516
   251
      | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
wenzelm@42516
   252
      | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
wenzelm@42508
   253
      | output c (Antiquote (b, a)) =
wenzelm@42508
   254
          "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
wenzelm@42504
   255
wenzelm@42508
   256
    fun output_rule (name, rail) =
wenzelm@42508
   257
      let
wenzelm@42508
   258
        val (rail', y') = vertical_range rail 0;
wenzelm@42508
   259
        val out_name =
wenzelm@42508
   260
          (case name of
wenzelm@42661
   261
            Antiquote.Text "" => ""
wenzelm@42661
   262
          | Antiquote.Text s => output_text false s
wenzelm@42508
   263
          | Antiquote.Antiq a => output_antiq a);
wenzelm@42508
   264
      in
wenzelm@42508
   265
        "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
wenzelm@42508
   266
        output "" rail' ^
wenzelm@42508
   267
        "\\rail@end\n"
wenzelm@42508
   268
      end;
wenzelm@42508
   269
  in
wenzelm@42508
   270
    "\\begin{railoutput}\n" ^
wenzelm@42508
   271
    implode (map output_rule rules) ^
wenzelm@42508
   272
    "\\end{railoutput}\n"
wenzelm@42504
   273
  end;
wenzelm@42504
   274
wenzelm@42504
   275
in
wenzelm@42504
   276
wenzelm@53171
   277
val _ = Theory.setup
wenzelm@55030
   278
  (Thy_Output.antiquotation @{binding rail}
wenzelm@55112
   279
    (Scan.lift (Parse.source_position (Parse.string || Parse.cartouche)))
wenzelm@55613
   280
    (fn {state, context, ...} => output_rules state o read context));
wenzelm@42504
   281
wenzelm@42504
   282
end;
wenzelm@42504
   283
wenzelm@42504
   284
end;
wenzelm@42504
   285