more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
authorwenzelm
Mon, 15 Jan 2018 22:46:04 +0100
changeset 67439 78759a7bd874
parent 67438 fdb7b995974d
child 67440 e5ba0ca1e465
more uniform support for formal comments in outer syntax, notably \<^cancel> and \<^latex>;
src/Pure/Isar/outer_syntax.ML
src/Pure/Isar/token.ML
src/Pure/Isar/token.scala
src/Pure/Thy/thy_output.ML
src/Tools/jEdit/src/jedit_rendering.scala
--- a/src/Pure/Isar/outer_syntax.ML	Mon Jan 15 14:31:57 2018 +0100
+++ b/src/Pure/Isar/outer_syntax.ML	Mon Jan 15 22:46:04 2018 +0100
@@ -273,12 +273,22 @@
 
 (* side-comments *)
 
+local
+
+fun is_cmt tok = Token.kind_of tok = Token.Comment (SOME Comment.Comment);
+
 fun cmts (t1 :: t2 :: toks) =
       if Token.keyword_with (fn s => s = "--" orelse s = Symbol.comment) t1 then t2 :: cmts toks
+      else if is_cmt t1 then t1 :: cmts (t2 :: toks)
       else cmts (t2 :: toks)
   | cmts _ = [];
 
-val side_comments = filter Token.is_proper #> cmts;
+in
+
+val side_comments =
+  cmts o filter_out (fn tok => Token.is_informal_comment tok orelse Token.is_space tok);
+
+end;
 
 
 (* check commands *)
--- a/src/Pure/Isar/token.ML	Mon Jan 15 14:31:57 2018 +0100
+++ b/src/Pure/Isar/token.ML	Mon Jan 15 22:46:04 2018 +0100
@@ -11,7 +11,7 @@
     Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
     Float | Space |
     (*delimited content*)
-    String | Alt_String | Verbatim | Cartouche | Comment |
+    String | Alt_String | Verbatim | Cartouche | Comment of Comment.kind option |
     (*special content*)
     Error of string | EOF
   val str_of_kind: kind -> string
@@ -44,6 +44,8 @@
   val is_proper: T -> bool
   val is_improper: T -> bool
   val is_comment: T -> bool
+  val is_informal_comment: T -> bool
+  val is_formal_comment: T -> bool
   val is_begin_ignore: T -> bool
   val is_end_ignore: T -> bool
   val is_error: T -> bool
@@ -116,7 +118,7 @@
   Command | Keyword | Ident | Long_Ident | Sym_Ident | Var | Type_Ident | Type_Var | Nat |
   Float | Space |
   (*delimited content*)
-  String | Alt_String | Verbatim | Cartouche | Comment |
+  String | Alt_String | Verbatim | Cartouche | Comment of Comment.kind option |
   (*special content*)
   Error of string | EOF;
 
@@ -136,7 +138,8 @@
   | Alt_String => "back-quoted string"
   | Verbatim => "verbatim text"
   | Cartouche => "text cartouche"
-  | Comment => "comment text"
+  | Comment NONE => "informal comment"
+  | Comment (SOME _) => "formal comment"
   | Error _ => "bad input"
   | EOF => "end-of-input";
 
@@ -144,7 +147,13 @@
   Vector.fromList
     [Command, Keyword, Ident, Long_Ident, Sym_Ident, Var, Type_Ident, Type_Var, Nat, Float, Space];
 
-val delimited_kind = member (op =) [String, Alt_String, Verbatim, Cartouche, Comment];
+val delimited_kind =
+  (fn String => true
+    | Alt_String => true
+    | Verbatim => true
+    | Cartouche => true
+    | Comment _ => true
+    | _ => false);
 
 
 (* datatype token *)
@@ -220,18 +229,24 @@
   | ident_with _ _ = false;
 
 fun is_proper (Token (_, (Space, _), _)) = false
-  | is_proper (Token (_, (Comment, _), _)) = false
+  | is_proper (Token (_, (Comment _, _), _)) = false
   | is_proper _ = true;
 
 val is_improper = not o is_proper;
 
-fun is_comment (Token (_, (Comment, _), _)) = true
+fun is_comment (Token (_, (Comment _, _), _)) = true
   | is_comment _ = false;
 
-fun is_begin_ignore (Token (_, (Comment, "<"), _)) = true
+fun is_informal_comment (Token (_, (Comment NONE, _), _)) = true
+  | is_informal_comment _ = false;
+
+fun is_formal_comment (Token (_, (Comment (SOME _), _), _)) = true
+  | is_formal_comment _ = false;
+
+fun is_begin_ignore (Token (_, (Comment NONE, "<"), _)) = true
   | is_begin_ignore _ = false;
 
-fun is_end_ignore (Token (_, (Comment, ">"), _)) = true
+fun is_end_ignore (Token (_, (Comment NONE, ">"), _)) = true
   | is_end_ignore _ = false;
 
 fun is_error (Token (_, (Error _, _), _)) = true
@@ -274,7 +289,7 @@
   | Alt_String => (Markup.alt_string, "")
   | Verbatim => (Markup.verbatim, "")
   | Cartouche => (Markup.cartouche, "")
-  | Comment => (Markup.comment, "")
+  | Comment NONE => (Markup.comment, "")
   | Error msg => (Markup.bad (), msg)
   | _ => (Markup.empty, "");
 
@@ -321,7 +336,7 @@
   | Alt_String => Symbol_Pos.quote_string_bq x
   | Verbatim => enclose "{*" "*}" x
   | Cartouche => cartouche x
-  | Comment => enclose "(*" "*)" x
+  | Comment NONE => enclose "(*" "*)" x
   | EOF => ""
   | _ => x);
 
@@ -608,7 +623,8 @@
     Symbol_Pos.scan_string_bq err_prefix >> token_range Alt_String ||
     scan_verbatim >> token_range Verbatim ||
     scan_cartouche >> token_range Cartouche ||
-    scan_comment >> token_range Comment ||
+    scan_comment >> token_range (Comment NONE) ||
+    (Comment.scan_cancel || Comment.scan_latex) >> (fn (k, ss) => token (Comment (SOME k)) ss) ||
     scan_space >> token Space ||
     (Scan.max token_leq
       (Scan.max token_leq
--- a/src/Pure/Isar/token.scala	Mon Jan 15 14:31:57 2018 +0100
+++ b/src/Pure/Isar/token.scala	Mon Jan 15 22:46:04 2018 +0100
@@ -34,7 +34,8 @@
     val ALT_STRING = Value("back-quoted string")
     val VERBATIM = Value("verbatim text")
     val CARTOUCHE = Value("text cartouche")
-    val COMMENT = Value("comment text")
+    val INFORMAL_COMMENT = Value("informal comment")
+    val FORMAL_COMMENT = Value("formal comment")
     /*special content*/
     val ERROR = Value("bad input")
     val UNPARSED = Value("unparsed input")
@@ -45,17 +46,21 @@
 
   object Parsers extends Parsers
 
-  trait Parsers extends Scan.Parsers
+  trait Parsers extends Scan.Parsers with Comment.Parsers
   {
+    private def comment_marker: Parser[Token] =
+      one(Symbol.is_comment) ^^ (x => Token(Token.Kind.KEYWORD, x))
+
     private def delimited_token: Parser[Token] =
     {
       val string = quoted("\"") ^^ (x => Token(Token.Kind.STRING, x))
       val alt_string = quoted("`") ^^ (x => Token(Token.Kind.ALT_STRING, x))
       val verb = verbatim ^^ (x => Token(Token.Kind.VERBATIM, x))
       val cart = cartouche ^^ (x => Token(Token.Kind.CARTOUCHE, x))
-      val cmt = comment ^^ (x => Token(Token.Kind.COMMENT, x))
+      val cmt = comment ^^ (x => Token(Token.Kind.INFORMAL_COMMENT, x))
+      val formal_cmt = comment_cartouche ^^ (x => Token(Token.Kind.FORMAL_COMMENT, x))
 
-      string | (alt_string | (verb | (cart | cmt)))
+      string | (alt_string | (verb | (cart | (cmt | formal_cmt))))
     }
 
     private def other_token(keywords: Keyword.Keywords): Parser[Token] =
@@ -106,7 +111,7 @@
     }
 
     def token(keywords: Keyword.Keywords): Parser[Token] =
-      delimited_token | other_token(keywords)
+      comment_marker | (delimited_token | other_token(keywords))
 
     def token_line(keywords: Keyword.Keywords, ctxt: Scan.Line_Context)
       : Parser[(Token, Scan.Line_Context)] =
@@ -117,10 +122,12 @@
         quoted_line("`", ctxt) ^^ { case (x, c) => (Token(Token.Kind.ALT_STRING, x), c) }
       val verb = verbatim_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.VERBATIM, x), c) }
       val cart = cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.CARTOUCHE, x), c) }
-      val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.COMMENT, x), c) }
+      val cmt = comment_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.INFORMAL_COMMENT, x), c) }
+      val formal_cmt =
+        comment_cartouche_line(ctxt) ^^ { case (x, c) => (Token(Token.Kind.FORMAL_COMMENT, x), c) }
       val other = other_token(keywords) ^^ { case x => (x, Scan.Finished) }
 
-      string | (alt_string | (verb | (cart | (cmt | other))))
+      string | (alt_string | (verb | (cart | (cmt | (formal_cmt | other)))))
     }
   }
 
@@ -287,7 +294,7 @@
     kind == Token.Kind.TYPE_VAR
   def is_text: Boolean = is_embedded || kind == Token.Kind.VERBATIM
   def is_space: Boolean = kind == Token.Kind.SPACE
-  def is_comment: Boolean = kind == Token.Kind.COMMENT
+  def is_comment: Boolean = kind == Token.Kind.INFORMAL_COMMENT || kind == Token.Kind.FORMAL_COMMENT
   def is_improper: Boolean = is_space || is_comment
   def is_proper: Boolean = !is_space && !is_comment
   def is_error: Boolean = kind == Token.Kind.ERROR
@@ -313,7 +320,8 @@
     else if (kind == Token.Kind.ALT_STRING) Scan.Parsers.quoted_content("`", source)
     else if (kind == Token.Kind.VERBATIM) Scan.Parsers.verbatim_content(source)
     else if (kind == Token.Kind.CARTOUCHE) Scan.Parsers.cartouche_content(source)
-    else if (kind == Token.Kind.COMMENT) Scan.Parsers.comment_content(source)
+    else if (kind == Token.Kind.INFORMAL_COMMENT) Scan.Parsers.comment_content(source)
+    else if (kind == Token.Kind.FORMAL_COMMENT) Comment.content(source)
     else source
 
   def is_system_name: Boolean =
--- a/src/Pure/Thy/thy_output.ML	Mon Jan 15 14:31:57 2018 +0100
+++ b/src/Pure/Thy/thy_output.ML	Mon Jan 15 22:46:04 2018 +0100
@@ -117,7 +117,7 @@
       output_body ctxt antiq bg en (Input.source_explode (Token.input_of tok));
   in
     (case Token.kind_of tok of
-      Token.Comment => []
+      Token.Comment NONE => []
     | Token.Command => output false "\\isacommand{" "}"
     | Token.Keyword =>
         if Symbol.is_ascii_identifier (Token.content_of tok)
@@ -151,6 +151,12 @@
 
 (*NB: arranging white space around command spans is a black art*)
 
+val is_white = Token.is_space orf Token.is_informal_comment;
+val is_black = not o is_white;
+
+val is_white_comment = Token.is_informal_comment;
+val is_black_comment = Token.is_formal_comment;
+
 
 (* presentation tokens *)
 
@@ -164,8 +170,8 @@
 fun basic_token pred (Basic_Token tok) = pred tok
   | basic_token _ _ = false;
 
-val improper_token = basic_token Token.is_improper;
-val comment_token = basic_token Token.is_comment;
+val white_token = basic_token is_white;
+val white_comment_token = basic_token is_white_comment;
 val blank_token = basic_token Token.is_blank;
 val newline_token = basic_token Token.is_newline;
 
@@ -197,9 +203,9 @@
       | take_newline [] = ([], [], false);
     val (((src_prefix, src_main), src_suffix1), (src_suffix2, src_appendix, newline)) =
       src
-      |> take_prefix (improper_token o fst)
-      ||>> take_suffix (improper_token o fst)
-      ||>> take_prefix (comment_token o fst)
+      |> take_prefix (white_token o fst)
+      ||>> take_suffix (white_token o fst)
+      ||>> take_prefix (white_comment_token o fst)
       ||> take_newline;
   in Span (cmd, (src_prefix, src_main, src_suffix1 @ src_suffix2, src_appendix), newline) end;
 
@@ -296,9 +302,9 @@
 val markup_false = "\\isamarkupfalse%\n";
 
 val space_proper =
-  Scan.one Token.is_blank -- Scan.many Token.is_comment -- Scan.one Token.is_proper;
+  Scan.one Token.is_blank -- Scan.many is_white_comment -- Scan.one is_black;
 
-val is_improper = not o (Token.is_proper orf Token.is_begin_ignore orf Token.is_end_ignore);
+val is_improper = not o (is_black orf Token.is_begin_ignore orf Token.is_end_ignore);
 val improper = Scan.many is_improper;
 val improper_end = Scan.repeat (Scan.unless space_proper (Scan.one is_improper));
 val blank_end = Scan.repeat (Scan.unless space_proper (Scan.one Token.is_blank));
@@ -349,6 +355,8 @@
             (Basic_Token cmd, (markup_false, d)))]));
 
     val cmt = Scan.peek (fn d =>
+      Scan.one is_black_comment >>
+        (fn tok => (NONE, (Basic_Token tok, ("", d)))) ||
       (Parse.$$$ "--" || Parse.$$$ Symbol.comment) |--
         Parse.!!!! (improper |-- Parse.document_source) >>
         (fn source => (NONE, (Markup_Token ("cmt", source), ("", d)))));
@@ -373,13 +381,13 @@
     val cmd = Scan.one (is_some o fst);
     val non_cmd = Scan.one (is_none o fst andf not o is_eof) >> #2;
 
-    val comments = Scan.many (comment_token o fst o snd);
+    val white_comments = Scan.many (white_comment_token o fst o snd);
     val blank = Scan.one (blank_token o fst o snd);
     val newline = Scan.one (newline_token o fst o snd);
     val before_cmd =
-      Scan.option (newline -- comments) --
-      Scan.option (newline -- comments) --
-      Scan.option (blank -- comments) -- cmd;
+      Scan.option (newline -- white_comments) --
+      Scan.option (newline -- white_comments) --
+      Scan.option (blank -- white_comments) -- cmd;
 
     val span =
       Scan.repeat non_cmd -- cmd --
--- a/src/Tools/jEdit/src/jedit_rendering.scala	Mon Jan 15 14:31:57 2018 +0100
+++ b/src/Tools/jEdit/src/jedit_rendering.scala	Mon Jan 15 22:46:04 2018 +0100
@@ -60,7 +60,8 @@
       Token.Kind.ALT_STRING -> LITERAL2,
       Token.Kind.VERBATIM -> COMMENT3,
       Token.Kind.CARTOUCHE -> COMMENT4,
-      Token.Kind.COMMENT -> COMMENT1,
+      Token.Kind.INFORMAL_COMMENT -> COMMENT1,
+      Token.Kind.FORMAL_COMMENT -> COMMENT1,
       Token.Kind.ERROR -> INVALID
     ).withDefaultValue(NULL)
   }