src/Pure/Tools/rail.ML
changeset 55030 9a9049d12e21
parent 55029 61a6bf7d4b02
child 55112 b1a5d603fd12
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/Pure/Tools/rail.ML	Fri Jan 17 20:31:39 2014 +0100
     1.3 @@ -0,0 +1,275 @@
     1.4 +(*  Title:      Pure/Tools/rail.ML
     1.5 +    Author:     Michael Kerscher, TU M√ľnchen
     1.6 +    Author:     Makarius
     1.7 +
     1.8 +Railroad diagrams in LaTeX.
     1.9 +*)
    1.10 +
    1.11 +structure Rail: sig end =
    1.12 +struct
    1.13 +
    1.14 +(** lexical syntax **)
    1.15 +
    1.16 +(* datatype token *)
    1.17 +
    1.18 +datatype kind =
    1.19 +  Keyword | Ident | String | Antiq of Symbol_Pos.T list * Position.range | EOF;
    1.20 +
    1.21 +datatype token = Token of Position.range * (kind * string);
    1.22 +
    1.23 +fun pos_of (Token ((pos, _), _)) = pos;
    1.24 +fun end_pos_of (Token ((_, pos), _)) = pos;
    1.25 +
    1.26 +fun kind_of (Token (_, (k, _))) = k;
    1.27 +fun content_of (Token (_, (_, x))) = x;
    1.28 +
    1.29 +
    1.30 +(* diagnostics *)
    1.31 +
    1.32 +val print_kind =
    1.33 + fn Keyword => "rail keyword"
    1.34 +  | Ident => "identifier"
    1.35 +  | String => "single-quoted string"
    1.36 +  | Antiq _ => "antiquotation"
    1.37 +  | EOF => "end-of-input";
    1.38 +
    1.39 +fun print (Token ((pos, _), (k, x))) =
    1.40 +  (if k = EOF then print_kind k else print_kind k ^ " " ^ quote x) ^
    1.41 +  Position.here pos;
    1.42 +
    1.43 +fun print_keyword x = print_kind Keyword ^ " " ^ quote x;
    1.44 +
    1.45 +
    1.46 +(* stopper *)
    1.47 +
    1.48 +fun mk_eof pos = Token ((pos, Position.none), (EOF, ""));
    1.49 +val eof = mk_eof Position.none;
    1.50 +
    1.51 +fun is_eof (Token (_, (EOF, _))) = true
    1.52 +  | is_eof _ = false;
    1.53 +
    1.54 +val stopper =
    1.55 +  Scan.stopper (fn [] => eof | toks => mk_eof (end_pos_of (List.last toks))) is_eof;
    1.56 +
    1.57 +
    1.58 +(* tokenize *)
    1.59 +
    1.60 +local
    1.61 +
    1.62 +fun token k ss = [Token (Symbol_Pos.range ss, (k, Symbol_Pos.content ss))];
    1.63 +
    1.64 +val scan_space = Scan.many1 (Symbol.is_blank o Symbol_Pos.symbol);
    1.65 +
    1.66 +val scan_keyword =
    1.67 +  Scan.one
    1.68 +    (member (op =) ["|", "*", "+", "?", "(", ")", "\<newline>", ";", ":", "@"] o Symbol_Pos.symbol);
    1.69 +
    1.70 +val err_prefix = "Rail lexical error: ";
    1.71 +
    1.72 +val scan_token =
    1.73 +  scan_space >> K [] ||
    1.74 +  Antiquote.scan_antiq >> (fn antiq as (ss, _) => token (Antiq antiq) ss) ||
    1.75 +  scan_keyword >> (token Keyword o single) ||
    1.76 +  Lexicon.scan_id >> token Ident ||
    1.77 +  Symbol_Pos.scan_string_q err_prefix >> (token String o #1 o #2);
    1.78 +
    1.79 +val scan =
    1.80 +  (Scan.repeat scan_token >> flat) --|
    1.81 +    Symbol_Pos.!!! (fn () => err_prefix ^ "bad input")
    1.82 +      (Scan.ahead (Scan.one Symbol_Pos.is_eof));
    1.83 +
    1.84 +in
    1.85 +
    1.86 +val tokenize = #1 o Scan.error (Scan.finite Symbol_Pos.stopper scan) o Symbol_Pos.explode;
    1.87 +
    1.88 +end;
    1.89 +
    1.90 +
    1.91 +
    1.92 +(** parsing **)
    1.93 +
    1.94 +fun !!! scan =
    1.95 +  let
    1.96 +    val prefix = "Rail syntax error";
    1.97 +
    1.98 +    fun get_pos [] = " (end-of-input)"
    1.99 +      | get_pos (tok :: _) = Position.here (pos_of tok);
   1.100 +
   1.101 +    fun err (toks, NONE) = (fn () => prefix ^ get_pos toks)
   1.102 +      | err (toks, SOME msg) =
   1.103 +          (fn () =>
   1.104 +            let val s = msg () in
   1.105 +              if String.isPrefix prefix s then s
   1.106 +              else prefix ^ get_pos toks ^ ": " ^ s
   1.107 +            end);
   1.108 +  in Scan.!! err scan end;
   1.109 +
   1.110 +fun $$$ x =
   1.111 +  Scan.one (fn tok => kind_of tok = Keyword andalso content_of tok = x) ||
   1.112 +  Scan.fail_with
   1.113 +    (fn [] => (fn () => print_keyword x ^ " expected,\nbut end-of-input was found")
   1.114 +      | tok :: _ => (fn () => print_keyword x ^ " expected,\nbut " ^ print tok ^ " was found"));
   1.115 +
   1.116 +fun enum1 sep scan = scan ::: Scan.repeat ($$$ sep |-- !!! scan);
   1.117 +fun enum sep scan = enum1 sep scan || Scan.succeed [];
   1.118 +
   1.119 +val ident = Scan.some (fn tok => if kind_of tok = Ident then SOME (content_of tok) else NONE);
   1.120 +val string = Scan.some (fn tok => if kind_of tok = String then SOME (content_of tok) else NONE);
   1.121 +
   1.122 +val antiq = Scan.some (fn tok => (case kind_of tok of Antiq a => SOME a | _ => NONE));
   1.123 +
   1.124 +
   1.125 +
   1.126 +(** rail expressions **)
   1.127 +
   1.128 +(* datatype *)
   1.129 +
   1.130 +datatype rails =
   1.131 +  Cat of int * rail list
   1.132 +and rail =
   1.133 +  Bar of rails list |
   1.134 +  Plus of rails * rails |
   1.135 +  Newline of int |
   1.136 +  Nonterminal of string |
   1.137 +  Terminal of bool * string |
   1.138 +  Antiquote of bool * (Symbol_Pos.T list * Position.range);
   1.139 +
   1.140 +fun reverse_cat (Cat (y, rails)) = Cat (y, rev (map reverse rails))
   1.141 +and reverse (Bar cats) = Bar (map reverse_cat cats)
   1.142 +  | reverse (Plus (cat1, cat2)) = Plus (reverse_cat cat1, reverse_cat cat2)
   1.143 +  | reverse x = x;
   1.144 +
   1.145 +fun cat rails = Cat (0, rails);
   1.146 +
   1.147 +val empty = cat [];
   1.148 +fun is_empty (Cat (_, [])) = true | is_empty _ = false;
   1.149 +
   1.150 +fun is_newline (Newline _) = true | is_newline _ = false;
   1.151 +
   1.152 +fun bar [Cat (_, [rail])] = rail
   1.153 +  | bar cats = Bar cats;
   1.154 +
   1.155 +fun plus cat1 cat2 = Plus (cat1, reverse_cat cat2);
   1.156 +
   1.157 +fun star cat1 cat2 =
   1.158 +  if is_empty cat2 then plus empty cat1
   1.159 +  else bar [empty, cat [plus cat1 cat2]];
   1.160 +
   1.161 +fun maybe rail = bar [empty, cat [rail]];
   1.162 +
   1.163 +
   1.164 +(* read *)
   1.165 +
   1.166 +local
   1.167 +
   1.168 +val at_mode = Scan.option ($$$ "@") >> (fn NONE => false | _ => true);
   1.169 +
   1.170 +fun body x = (enum1 "|" body1 >> bar) x
   1.171 +and body0 x = (enum "|" body1 >> bar) x
   1.172 +and body1 x =
   1.173 + (body2 :|-- (fn a =>
   1.174 +   $$$ "*" |-- !!! body4e >> (cat o single o star a) ||
   1.175 +   $$$ "+" |-- !!! body4e >> (cat o single o plus a) ||
   1.176 +   Scan.succeed a)) x
   1.177 +and body2 x = (Scan.repeat1 body3 >> cat) x
   1.178 +and body3 x = (body4 :|-- (fn a => $$$ "?" >> K (maybe a) || Scan.succeed a)) x
   1.179 +and body4 x =
   1.180 + ($$$ "(" |-- !!! (body0 --| $$$ ")") ||
   1.181 +  $$$ "\<newline>" >> K (Newline 0) ||
   1.182 +  ident >> Nonterminal ||
   1.183 +  at_mode -- string >> Terminal ||
   1.184 +  at_mode -- antiq >> Antiquote) x
   1.185 +and body4e x = (Scan.option body4 >> (cat o the_list)) x;
   1.186 +
   1.187 +val rule_name = ident >> Antiquote.Text || antiq >> Antiquote.Antiq;
   1.188 +val rule = rule_name -- ($$$ ":" |-- !!! body) || body >> pair (Antiquote.Text "");
   1.189 +val rules = enum1 ";" (Scan.option rule) >> map_filter I;
   1.190 +
   1.191 +in
   1.192 +
   1.193 +val read =
   1.194 +  #1 o Scan.error (Scan.finite stopper (rules --| !!! (Scan.ahead (Scan.one is_eof)))) o tokenize;
   1.195 +
   1.196 +end;
   1.197 +
   1.198 +
   1.199 +(* latex output *)
   1.200 +
   1.201 +local
   1.202 +
   1.203 +fun vertical_range_cat (Cat (_, rails)) y =
   1.204 +  let val (rails', (_, y')) =
   1.205 +    fold_map (fn rail => fn (y0, y') =>
   1.206 +      if is_newline rail then (Newline (y' + 1), (y' + 1, y' + 2))
   1.207 +      else
   1.208 +        let val (rail', y0') = vertical_range rail y0;
   1.209 +        in (rail', (y0, Int.max (y0', y'))) end) rails (y, y + 1)
   1.210 +  in (Cat (y, rails'), y') end
   1.211 +
   1.212 +and vertical_range (Bar cats) y =
   1.213 +      let val (cats', y') = fold_map vertical_range_cat cats y
   1.214 +      in (Bar cats', Int.max (y + 1, y')) end
   1.215 +  | vertical_range (Plus (cat1, cat2)) y =
   1.216 +      let val ([cat1', cat2'], y') = fold_map vertical_range_cat [cat1, cat2] y;
   1.217 +      in (Plus (cat1', cat2'), Int.max (y + 1, y')) end
   1.218 +  | vertical_range (Newline _) y = (Newline (y + 2), y + 3)
   1.219 +  | vertical_range atom y = (atom, y + 1);
   1.220 +
   1.221 +fun output_rules state rules =
   1.222 +  let
   1.223 +    val output_antiq = Thy_Output.eval_antiq (#1 (Keyword.get_lexicons ())) state;
   1.224 +    fun output_text b s =
   1.225 +      Output.output s
   1.226 +      |> b ? enclose "\\isakeyword{" "}"
   1.227 +      |> enclose "\\isa{" "}";
   1.228 +
   1.229 +    fun output_cat c (Cat (_, rails)) = outputs c rails
   1.230 +    and outputs c [rail] = output c rail
   1.231 +      | outputs _ rails = implode (map (output "") rails)
   1.232 +    and output _ (Bar []) = ""
   1.233 +      | output c (Bar [cat]) = output_cat c cat
   1.234 +      | output _ (Bar (cat :: cats)) =
   1.235 +          "\\rail@bar\n" ^ output_cat "" cat ^
   1.236 +          implode (map (fn Cat (y, rails) =>
   1.237 +              "\\rail@nextbar{" ^ string_of_int y ^ "}\n" ^ outputs "" rails) cats) ^
   1.238 +          "\\rail@endbar\n"
   1.239 +      | output c (Plus (cat, Cat (y, rails))) =
   1.240 +          "\\rail@plus\n" ^ output_cat c cat ^
   1.241 +          "\\rail@nextplus{" ^ string_of_int y ^ "}\n" ^ outputs "c" rails ^
   1.242 +          "\\rail@endplus\n"
   1.243 +      | output _ (Newline y) = "\\rail@cr{" ^ string_of_int y ^ "}\n"
   1.244 +      | output c (Nonterminal s) = "\\rail@" ^ c ^ "nont{" ^ output_text false s ^ "}[]\n"
   1.245 +      | output c (Terminal (b, s)) = "\\rail@" ^ c ^ "term{" ^ output_text b s ^ "}[]\n"
   1.246 +      | output c (Antiquote (b, a)) =
   1.247 +          "\\rail@" ^ c ^ (if b then "term{" else "nont{") ^ output_antiq a ^ "}[]\n";
   1.248 +
   1.249 +    fun output_rule (name, rail) =
   1.250 +      let
   1.251 +        val (rail', y') = vertical_range rail 0;
   1.252 +        val out_name =
   1.253 +          (case name of
   1.254 +            Antiquote.Text "" => ""
   1.255 +          | Antiquote.Text s => output_text false s
   1.256 +          | Antiquote.Antiq a => output_antiq a);
   1.257 +      in
   1.258 +        "\\rail@begin{" ^ string_of_int y' ^ "}{" ^ out_name ^ "}\n" ^
   1.259 +        output "" rail' ^
   1.260 +        "\\rail@end\n"
   1.261 +      end;
   1.262 +  in
   1.263 +    "\\begin{railoutput}\n" ^
   1.264 +    implode (map output_rule rules) ^
   1.265 +    "\\end{railoutput}\n"
   1.266 +  end;
   1.267 +
   1.268 +in
   1.269 +
   1.270 +val _ = Theory.setup
   1.271 +  (Thy_Output.antiquotation @{binding rail}
   1.272 +    (Scan.lift (Parse.source_position Parse.string))
   1.273 +    (fn {state, ...} => output_rules state o read));
   1.274 +
   1.275 +end;
   1.276 +
   1.277 +end;
   1.278 +