doc-src/rail.ML
author bulwahn
Fri Mar 18 18:19:42 2011 +0100 (2011-03-18)
changeset 42020 2da02764d523
parent 38767 d8da44a8dd25
child 42361 23f352990944
permissions -rw-r--r--
translating bash output in quickcheck_narrowing to handle special characters; adding simple test cases
     1 (*  Title:      doc-src/rail.ML
     2     Author:     Michael Kerscher, TUM
     3 
     4 Railroad diagrams for LaTeX.
     5 *)
     6 
     7 structure Rail =
     8 struct
     9 
    10 datatype token =
    11   Identifier of string |
    12   Special_Identifier of string |
    13   Text of string |
    14   Anot of string |
    15   Symbol of string |
    16   EOF;
    17 
    18 fun is_identifier (Identifier _) = true
    19   | is_identifier (Special_Identifier _ ) = true
    20   | is_identifier _ = false;
    21 
    22 fun is_text (Text _) = true
    23   | is_text _ = false;
    24 
    25 fun is_anot (Anot _) = true
    26   | is_anot _ = false;
    27 
    28 fun is_symbol (Symbol _) = true
    29   | is_symbol _ = false;
    30 
    31 fun is_ str = (fn s => s = Symbol str);
    32 
    33 
    34 local (* copied from antiquote-setup... *)
    35 fun translate f = Symbol.explode #> map f #> implode;
    36 
    37 fun clean_name "\<dots>" = "dots"
    38   | clean_name ".." = "ddot"
    39   | clean_name "." = "dot"
    40   | clean_name "_" = "underscore"
    41   | clean_name "{" = "braceleft"
    42   | clean_name "}" = "braceright"
    43   | clean_name s = s |> translate (fn "_" => "-" | "\<dash>" => "-" | c => c);
    44 
    45 fun no_check _ _ = true;
    46 
    47 fun false_check _ _ = false;
    48 
    49 fun thy_check intern defined ctxt =
    50   let val thy = ProofContext.theory_of ctxt
    51   in defined thy o intern thy end;
    52 
    53 fun check_tool name =
    54   File.exists (Path.append (Path.explode "~~/lib/Tools") (Path.basic name));
    55 
    56 val arg = enclose "{" "}";
    57 
    58 val markup_table =
    59 (*  [(kind, (markup, check, style))*)
    60   Symtab.make [
    61   ("syntax", ("", no_check, true)),
    62   ("command", ("isacommand", K (is_some o Keyword.command_keyword), true)),
    63   ("keyword", ("isakeyword", K Keyword.is_keyword, true)),
    64   ("element", ("isakeyword", K Keyword.is_keyword, true)),
    65   ("method", ("", thy_check Method.intern Method.defined, true)),
    66   ("attribute", ("", thy_check Attrib.intern Attrib.defined, true)),
    67   ("fact", ("", no_check, true)),
    68   ("variable", ("", no_check, true)),
    69   ("case", ("", no_check, true)),
    70   ("antiquotation", ("", K Thy_Output.defined_command, true)),
    71   ("antiquotation option", ("", K Thy_Output.defined_option, true)), (* kann mein scanner nicht erkennen *)
    72   ("setting", ("isatt", (fn _ => fn name => is_some (OS.Process.getEnv name)), true)),
    73   ("inference", ("", no_check, true)),
    74   ("executable", ("isatt", no_check, true)),
    75   ("tool", ("isatt", K check_tool, true)),
    76   ("file", ("isatt", K (File.exists o Path.explode), true)),
    77   ("theory", ("", K (can Thy_Info.get_theory), true))
    78   ];
    79 
    80 in
    81 
    82 fun decode_link ctxt (kind,index,logic,name) =
    83   let
    84   val (markup, check, style) = case Symtab.lookup markup_table kind of
    85     SOME x => x
    86   | NONE => ("", false_check, false);
    87   val hyper_name =
    88     "{" ^ Long_Name.append kind (Long_Name.append logic (clean_name name)) ^ "}";
    89   val hyper =
    90     enclose ("\\hyperlink" ^ hyper_name ^ "{") "}" #>
    91     index = "def" ? enclose ("\\hypertarget" ^ hyper_name ^ "{") "}";
    92   val idx =
    93     if index = "" then ""
    94     else "\\index" ^ index ^ arg logic ^ arg kind ^ arg name;
    95   in
    96   if check ctxt name then
    97     (idx ^
    98     (Output.output name
    99       |> (if markup = "" then I else enclose ("\\" ^ markup ^ "{") "}")
   100       |> (if Config.get ctxt Thy_Output.quotes then quote else I)
   101       |> (if Config.get ctxt Thy_Output.display
   102           then enclose "\\begin{isabelle}%\n" "%\n\\end{isabelle}"
   103           else hyper o enclose "\\mbox{\\isa{" "}}")), style)
   104   else ("Bad " ^ kind ^ " " ^ name, false)
   105   end;
   106 end;
   107 
   108 val blanks =
   109   Scan.many Symbol.is_blank >> implode;
   110 
   111 val scan_symbol =
   112   $$ ";" || $$ ":"|| $$ "("|| $$ ")"|| $$ "+"|| $$ "|"|| $$ "*"|| $$ "?"|| $$ "\\";
   113 
   114 (* escaped version *)
   115 val scan_link = (* @{kind{_def|_ref (logic) name} *)
   116   let
   117     fun pseudo_antiquote inner_scan = ($$ "@" ^^ $$ "{") |-- inner_scan --| (blanks -- $$ "}");
   118     fun parens scan = $$ "(" |-- scan --| $$ ")";
   119     fun opt_quotes scan = $$ "'" |-- scan --| $$ "'" || scan;
   120     val letters = Scan.many Symbol.is_letter >> implode;
   121     val kind_name = letters;
   122     val opt_kind_type = Scan.optional (
   123       $$ "_" |-- (Scan.this_string "def" || Scan.this_string "ref")) "";
   124     val logic_name = letters;
   125     val escaped_singlequote = $$ "\\" |-- $$ "'";
   126     val text = Scan.repeat (Scan.one Symbol.is_letter || escaped_singlequote) >> implode;
   127   in
   128    pseudo_antiquote (
   129     kind_name -- opt_kind_type --
   130     (blanks |-- Scan.optional ( parens logic_name ) "") --
   131     (blanks |-- opt_quotes text) )
   132     >> (fn (((kind,index),logic),name) => (kind, index, logic, name))
   133 end;
   134 
   135 (* escaped version *)
   136 fun scan_identifier ctxt =
   137   let fun is_identifier_start s =
   138     Symbol.is_letter s orelse
   139     s = "_"
   140   fun is_identifier_rest s =
   141     Symbol.is_letter s orelse
   142     Symbol.is_digit s orelse
   143     s = "_" orelse
   144     s = "."
   145   in
   146   (Scan.one is_identifier_start :::
   147     Scan.repeat (Scan.one is_identifier_rest || ($$ "\\" |-- $$ "'"))
   148   ) >> (Identifier o enclose "\\isa{" "}" o Output.output o implode) ||
   149   scan_link >> (decode_link ctxt) >>
   150     (fn (txt, style) =>
   151         if style then Special_Identifier(txt)
   152         else Identifier(txt))
   153 end;
   154 
   155 fun scan_anot ctxt =
   156   let val scan_anot =
   157     Scan.many (fn s =>
   158       s <> "\n" andalso
   159       s <> "\t" andalso
   160       s <> "]" andalso
   161       Symbol.is_regular s) >> implode
   162   in
   163   $$ "[" |-- scan_link --| $$ "]" >> (fst o (decode_link ctxt)) ||
   164   $$ "[" |-- scan_anot --| $$ "]" >> Output.output
   165 end;
   166 
   167 (* escaped version *)
   168 fun scan_text ctxt =
   169   let
   170     val text_sq =
   171       Scan.repeat (
   172         Scan.one (fn s =>
   173           s <> "\n" andalso
   174           s <> "\t" andalso
   175           s <> "'" andalso
   176           s <> "\\" andalso
   177           Symbol.is_regular s) ||
   178         ($$ "\\" |-- $$ "'")
   179       ) >> implode
   180   fun quoted scan = $$ "'" |-- scan --| $$ "'";
   181   in
   182   quoted scan_link >> (fst o (decode_link ctxt)) ||
   183   quoted text_sq >> (enclose "\\isa{" "}" o Output.output)
   184 end;
   185 
   186 fun scan_rail ctxt =
   187   Scan.repeat ( blanks |-- (
   188     scan_identifier ctxt ||
   189     scan_anot ctxt >> Anot ||
   190     scan_text ctxt >> Text ||
   191     scan_symbol >> Symbol)
   192   ) --| blanks;
   193 
   194 fun lex_rail txt ctxt = (* Symbol_Pos fuer spaeter durchgereicht *)
   195   Symbol.scanner "Malformed rail-declaration" (scan_rail ctxt) (map fst (Symbol_Pos.explode txt));
   196 
   197 val lex = lex_rail;
   198 
   199 datatype id_kind = UNKNOWN | TOKEN | TERM | NTERM;
   200 
   201 datatype id_type =
   202   Id of string * id_kind |
   203   Null_Id;
   204 
   205 datatype body_kind =
   206   CAT | BAR | PLUS |
   207   CR | EMPTY | ANNOTE | IDENT | STRING |
   208   Null_Kind;
   209 
   210 datatype body_type =
   211   Body of body_kind * string * string * id_type * body_type list |
   212   Body_Pos of body_kind * string * string * id_type * body_type list * int * int |
   213   Empty_Body |
   214   Null_Body;
   215 
   216 datatype rule =
   217   Rule of id_type * body_type;
   218 
   219 fun new_id id kind = Id (id, kind);
   220 
   221 fun is_empty (Body(kind,_,_,_,_)) = kind = EMPTY;
   222 
   223 fun new_body (kind, Null_Body, Null_Body) = Body (kind, "", "", Null_Id, [])
   224   | new_body (kind, body1, body2) = Body (kind, "", "", Null_Id, body1 :: [body2]);
   225 
   226 fun is_kind_of kind (Body(bodyKind,_,_,_,_)) = kind = bodyKind
   227   | is_kind_of _ _ = false;
   228 
   229 fun add_list (Body(kind, text, annot, id, bodies), body) =
   230   Body(kind, text, annot, id, bodies @ [body]);
   231 
   232 fun cat_body_lists (Body(kind, text, annot, id, bodies1), Body(_,_,_,_, bodies2)) =
   233       Body(kind, text, annot, id, bodies1 @ bodies2);
   234 
   235 fun add_body (kind, body1 as Body(kind1,_,_,_,_), body2 as Body(kind2,_,_,_,_)) =
   236   if kind = kind1 andalso kind <> BAR then
   237     if kind = kind2 then
   238       cat_body_lists(body1, body2)
   239     else (* kind <> kind2 *)
   240       add_list(body1, body2)
   241   else (* kind <> kind1 orelse kind = BAR *)
   242     if kind = kind2 then
   243       cat_body_lists(add_list(new_body(kind,Null_Body,Null_Body), body1), body2)
   244     else (* kind <> kind2 *)
   245       add_list(add_list(new_body(kind,Null_Body,Null_Body), body1), body2);
   246 
   247 fun rev_body (body as Body (kind, text, annot, id, [])) = body
   248   | rev_body (Body (CAT, text, annot, id, bodies)) =
   249       Body(CAT, text, annot, id, map rev_body (rev bodies))
   250   | rev_body (Body (kind, text, annot, id, bodies)) =
   251       Body(kind, text, annot, id, map rev_body bodies)
   252   | rev_body body = body;
   253 
   254 fun set_body_text text (Body(k,_,a,i,b)) = Body(k,text,a,i,b);
   255 fun set_body_anot anot (Body(k,t,_,i,b)) = Body(k,t,anot,i,b);
   256 fun set_body_ident id (Body(k,t,a,_,b)) = Body(k,t,a, new_id id TOKEN,b);
   257 fun set_body_special_ident id (Body(k,t,a,_,b)) = Body(k,t,a, new_id id TERM,b);
   258 
   259 
   260 fun mk_eof _ = EOF;
   261 fun is_eof s = s = EOF;
   262 val stopper = Scan.stopper mk_eof is_eof;
   263 
   264 (* TODO: change this, so the next or next two tokens are printed *)
   265 fun lex_err msg (cs, _) = "rail grammar error: " ^ msg cs;
   266 fun !!! msg scan = Scan.!! (lex_err (K msg)) scan;
   267 fun $$$ tok = Scan.one (is_ tok);
   268 
   269 
   270 local
   271 fun new_bar_body([], body2) = body2
   272   | new_bar_body(body1::bodies, body2) =
   273       add_body(BAR, body1, new_bar_body(bodies, body2));
   274 
   275 fun new_cat_body(body::[]) = body
   276   | new_cat_body(body1::bodies) = add_body(CAT, body1, new_cat_body(bodies));
   277 
   278 fun new_annote_body (Anot anot) =
   279   set_body_text anot (new_body(ANNOTE, Empty_Body, Empty_Body));
   280 
   281 fun new_text_annote_body (Text text, Anot anot) =
   282   set_body_anot anot (set_body_text text (new_body(STRING, Empty_Body, Empty_Body)));
   283 
   284 fun new_ident_body (Identifier ident, Anot anot) =
   285       set_body_anot anot (set_body_ident ident (new_body(IDENT, Empty_Body, Empty_Body)))
   286   | new_ident_body (Special_Identifier ident, Anot anot) =
   287       set_body_anot anot (set_body_special_ident ident (new_body(IDENT, Empty_Body, Empty_Body)));
   288 
   289 val new_empty_body = new_body(EMPTY, Null_Body, Null_Body);
   290 in
   291 
   292 fun parse_body x =
   293   (
   294   Scan.repeat1 (parse_body0 --| $$$ "|") -- !!! "body0 expected" (parse_body0) >>
   295     new_bar_body ||
   296   parse_body0
   297   ) x
   298 and parse_body0 x =
   299   (
   300   Scan.one is_anot -- !!! "body1 expected" (parse_body1) >>
   301     (fn (anot, body) => add_body(CAT, new_annote_body(anot), body))  ||
   302   parse_body1
   303   ) x
   304 and parse_body1 x =
   305   (
   306   parse_body2 -- ($$$ "*" |-- !!! "body4e expected" (parse_body4e)) >>
   307     (fn (body1, body2) =>
   308       if is_empty body2 then
   309         add_body(PLUS, new_empty_body, rev_body body1)
   310       else
   311         add_body(BAR, new_empty_body, add_body (PLUS, body1, rev_body body2)) ) ||
   312   parse_body2 -- ($$$ "+" |-- !!! "body4e expected" (parse_body4e)) >>
   313     (fn (body1, body2) => new_body (PLUS, body1, rev_body body2) ) ||
   314   parse_body2e
   315   ) x
   316 and parse_body2e x =
   317   (
   318   parse_body2 ||
   319   (fn toks => (new_empty_body, toks))
   320   ) x
   321 and parse_body2 x =
   322   (
   323   Scan.repeat1 (parse_body3) >> new_cat_body
   324   ) x
   325 and parse_body3 x =
   326   (
   327   parse_body4 --| $$$ "?" >> (fn body => new_body (BAR, new_empty_body, body) ) ||
   328   parse_body4
   329   ) x
   330 and parse_body4e x =
   331   (
   332   parse_body4 ||
   333   (fn toks => (new_empty_body, toks))
   334   ) x
   335 and parse_body4 x =
   336   (
   337   $$$ "(" |-- !!! "body0 or ')' expected" (parse_body --| $$$ ")") ||
   338   Scan.one is_text -- (Scan.optional (Scan.one is_anot) (Anot(""))) >>
   339     (fn (text, anot) => new_text_annote_body (text,anot)) ||
   340   Scan.one is_identifier -- (Scan.optional (Scan.one is_anot) (Anot(""))) >>
   341     (fn (id, anot) => new_ident_body (id,anot)) ||
   342   $$$ "\\" >> (fn _ => new_body (CR, Null_Body, Null_Body))
   343   ) x;
   344 end;
   345 
   346 fun new_named_rule (Identifier name, body) = Rule(Id(name, UNKNOWN), body)
   347   | new_named_rule (Special_Identifier name, body) = Rule(Id(name, UNKNOWN), body);
   348 fun new_anonym_rule body = Rule(Null_Id, body);
   349 
   350 val parse_rule =
   351   (Scan.one (is_identifier) -- ($$$ ":" |-- !!! "body expected" (parse_body)) ) >>
   352     new_named_rule ||
   353   parse_body >> new_anonym_rule;
   354 
   355 val parse_rules =
   356   Scan.repeat ( parse_rule --| $$$ ";") @@@ Scan.single parse_rule;
   357 
   358 fun parse_rail s =
   359   Scan.read stopper parse_rules s;
   360 
   361 val parse = parse_rail;
   362 
   363 fun getystart (Body_Pos(_,_,_,_,_,ystart,_)) = ystart;
   364 fun getynext (Body_Pos(_,_,_,_,_,_,ynext)) = ynext;
   365 
   366 fun position_body (body as Body(kind, text, annot, id, bodies), ystart) =
   367   let fun max (x,y) = if x > y then x else y
   368     fun set_body_position (Body(kind, text, annot, id, bodies), ystart, ynext) =
   369           Body_Pos(kind, text, annot, id, bodies, ystart, ynext)
   370     fun pos_bodies_cat ([],_,ynext,liste) = (liste, ynext)
   371       | pos_bodies_cat (x::xs, ystart, ynext, liste) =
   372           if is_kind_of CR x then
   373               (case set_body_position(x, ystart, ynext+1) of
   374                 body as Body_Pos(_,_,_,_,_,_,ynext1) =>
   375                   pos_bodies_cat(xs, ynext1, max(ynext,ynext1), liste@[body])
   376               )
   377           else
   378               (case position_body(x, ystart) of
   379                 body as Body_Pos(_,_,_,_,_,_,ynext1) =>
   380                   pos_bodies_cat(xs, ystart, max(ynext,ynext1), liste@[body])
   381               )
   382     fun pos_bodies_bar_plus ([],_,ynext,liste) = (liste, ynext)
   383       | pos_bodies_bar_plus (x::xs, ystart, ynext, liste) =
   384           (case position_body(x, ystart) of
   385             body as Body_Pos(_,_,_,_,_,_,ynext1) =>
   386               pos_bodies_bar_plus(xs, ynext1, max(ynext,ynext1), liste@[body])
   387           )
   388   in
   389   (case kind of
   390     CAT => (case pos_bodies_cat(bodies,ystart,ystart+1,[]) of
   391               (bodiesPos, ynext) =>
   392                 Body_Pos(kind, text, annot, id, bodiesPos, ystart, ynext))
   393   | BAR => (case pos_bodies_bar_plus(bodies,ystart,ystart+1,[]) of
   394               (bodiesPos, ynext) =>
   395                 Body_Pos(kind, text, annot, id, bodiesPos, ystart, ynext))
   396   | PLUS => (case pos_bodies_bar_plus(bodies,ystart,ystart+1,[]) of
   397               (bodiesPos, ynext) =>
   398                 Body_Pos(kind, text, annot, id, bodiesPos, ystart, ynext))
   399   | CR => set_body_position(body, ystart, ystart+3)
   400   | EMPTY => set_body_position(body, ystart, ystart+1)
   401   | ANNOTE => set_body_position(body, ystart, ystart+1)
   402   | IDENT => set_body_position(body, ystart, ystart+1)
   403   | STRING => set_body_position(body, ystart, ystart+1)
   404   )
   405   end;
   406 
   407 fun format_body (Body_Pos(EMPTY,_,_,_,_,_,_), _) = ""
   408   | format_body (Body_Pos(CAT,_,_,_,bodies,_,_), cent) =
   409     let fun format_bodies([]) = ""
   410           | format_bodies(x::xs) = format_body (x, "") ^ format_bodies(xs)
   411     in
   412       format_bodies(bodies)
   413     end
   414   | format_body (Body_Pos(BAR,_,_,_,bodies,_,_),cent) =
   415     let fun format_bodies([]) = "\\rail@endbar\n"
   416           | format_bodies(x::xs) =
   417               "\\rail@nextbar{" ^ string_of_int(getystart(x)) ^"}\n" ^
   418               format_body(x, "") ^ format_bodies(xs)
   419     in
   420       "\\rail@bar\n" ^ format_body(hd(bodies), "") ^ format_bodies(tl(bodies))
   421     end
   422   | format_body (Body_Pos(PLUS,_,_,_,x::y::xs,_,_),cent) =
   423       "\\rail@plus\n" ^ format_body(x, cent) ^
   424       "\\rail@nextplus{" ^ string_of_int(getystart(y)) ^ "}\n" ^
   425       format_body(y, "c") ^
   426       "\\rail@endplus\n"
   427   | format_body (Body_Pos(ANNOTE,text,_,_,_,_,_),cent) =
   428       "\\rail@annote[" ^ text ^ "]\n"
   429   | format_body (Body_Pos(IDENT,_,annot,Id(name,TERM),_,_,_),cent) =
   430       "\\rail@" ^ cent ^ "token{" ^ name ^ "}" ^ "[" ^ annot ^ "]\n"
   431   | format_body (Body_Pos(IDENT,_,annot,Id(name,_),_,_,_),cent) =
   432       "\\rail@" ^ cent ^ "nont{" ^ name ^ "}" ^ "[" ^ annot ^ "]\n"
   433   | format_body (Body_Pos(CR,_,_,_,_,_,ynext),cent) =
   434       "\\rail@cr{" ^ string_of_int(ynext) ^ "}\n"
   435   | format_body (Body_Pos(STRING,text,annot,_,_,_,_),cent) =
   436       "\\rail@" ^ cent ^ "term{" ^ text ^ "}[" ^ annot ^ "]\n"
   437   | format_body _ =
   438       "\\rail@unknown\n";
   439 
   440 fun out_body (Id(name,_), body) =
   441   let val bodyPos as Body_Pos(_,_,_,_,_,_,ynext) = position_body(body,0)
   442   in
   443     "\\rail@begin{" ^ string_of_int(ynext) ^ "}{" ^ name ^ "}\n" ^
   444     format_body(bodyPos,"") ^
   445     "\\rail@end\n"
   446   end
   447   | out_body (Null_Id, body) = out_body (Id("", UNKNOWN), body);
   448 
   449 fun out_rule (Rule(id, body)) =
   450   if is_empty body then ""
   451   else out_body (id, body);
   452 
   453 fun out_rules ([]) = ""
   454   | out_rules (rule::rules) = out_rule rule ^ out_rules rules;
   455 
   456 val output_no_rules =
   457   "\\rail@begin{1}{}\n" ^
   458   "\\rail@setbox{\\bfseries ???}\n" ^
   459   "\\rail@oval\n" ^
   460   "\\rail@end\n";
   461 
   462 
   463 fun print (SOME rules) =
   464     "\\begin{railoutput}\n" ^
   465     out_rules rules ^
   466     "\\end{railoutput}\n"
   467   | print (NONE) =
   468     "\\begin{railoutput}\n" ^
   469     output_no_rules ^
   470     "\\end{railoutput}\n";
   471 
   472 fun process txt ctxt =
   473   lex txt ctxt
   474   |> parse
   475   |> print;
   476 
   477 val _ = Thy_Output.antiquotation "rail" (Scan.lift (Parse.position Args.name))
   478   (fn {context = ctxt,...} => fn txt => process txt ctxt);
   479 
   480 end;
   481